pere commited on
Commit
23cc11f
·
1 Parent(s): 5e6f41b
run_distillation_node.py → run_distillation_nodes.py RENAMED
File without changes
run_large_nodes_training.sh ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ TOKENIZERS_PARALLELISM=false python3 run_distillation_nodes.py \
3
+ --model_name_or_path "./nb-distil-large-init" \
4
+ --teacher_model_name_or_path "NbAiLab/nb-whisper-large" \
5
+ --train_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_compact8_large" \
6
+ --train_dataset_config_name "no" \
7
+ --train_split_name "train" \
8
+ --eval_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_compact8_large" \
9
+ --eval_dataset_config_name "no" \
10
+ --eval_split_name "validation_norwegian_fleurs" \
11
+ --eval_steps 10 \
12
+ --save_steps 1000 \
13
+ --warmup_steps 100 \
14
+ --learning_rate 0.0001 \
15
+ --lr_scheduler_type "linear" \
16
+ --logging_steps 25 \
17
+ --save_total_limit 1 \
18
+ --max_steps 10000 \
19
+ --wer_threshold 10 \
20
+ --per_device_train_batch_size 16\
21
+ --per_device_eval_batch_size 16 \
22
+ --dataloader_num_workers 32 \
23
+ --dtype "bfloat16" \
24
+ --output_dir "./" \
25
+ --do_train \
26
+ --do_eval \
27
+ --use_scan \
28
+ --gradient_checkpointing \
29
+ --overwrite_output_dir \
30
+ --predict_with_generate \
31
+ --freeze_encoder \
32
+ --streaming \
33
+ --use_auth_token \
34
+ --report_to "wandb" \
35
+ --wandb_project "nb-distil-whisper-large-test3" \
36
+ --hub_model_id "NbAiLab/nb-distil-whisper-large-flax1-no" \
37
+ --push_to_hub
38
+