NbAiLab
/

nb-distil-whisper-large-pytorch-wer10

🇪🇺 Region: EU

Model card Files Files and versions Metrics Training metrics Community

pere commited on Nov 11, 2024

Commit

e8127c4

·

1 Parent(s): adc1547

trying first training

Files changed (1) hide show

run_large_training.sh +6 -5

run_large_training.sh CHANGED Viewed

@@ -1,13 +1,13 @@
 #!/usr/bin/env bash
 accelerate launch run_distillation.py \
-  --model_name_or_path "./previous-36500-epoch1-wer-15-250" \
   --teacher_model_name_or_path "NbAiLab/nb-whisper-large" \
   --train_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_large" \
   --train_dataset_config_name "" \
   --train_split_name "train" \
-  --eval_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_large" \
   --eval_dataset_config_name "" \
-  --eval_split_name "validation" \
   --eval_steps 500 \
   --save_steps 1000 \
   --warmup_steps 1000 \
@@ -19,7 +19,7 @@ accelerate launch run_distillation.py \
   --task "transcribe" \
   --logging_steps 200 \
   --save_total_limit 1 \
-  --max_steps 10000 \
   --wer_threshold 5 \
   --per_device_train_batch_size 32 \
   --per_device_eval_batch_size 32 \
@@ -38,6 +38,7 @@ accelerate launch run_distillation.py \
   --freeze_embed_positions \
   --streaming True \
   --wandb_project "nb-distil-whisper-large-pytorch" \
-  --wandb_name "pytorch2_lr3e4_wer5" \
   --push_to_hub

 #!/usr/bin/env bash
 accelerate launch run_distillation.py \
+  --model_name_or_path "./nb-distil-large-init" \
   --teacher_model_name_or_path "NbAiLab/nb-whisper-large" \
   --train_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_large" \
   --train_dataset_config_name "" \
   --train_split_name "train" \
+  --eval_dataset_name "NbAiLab/ncc_speech_v7" \
   --eval_dataset_config_name "" \
+  --eval_split_name "validation_norwegian_fleurs" \
   --eval_steps 500 \
   --save_steps 1000 \
   --warmup_steps 1000 \
   --task "transcribe" \
   --logging_steps 200 \
   --save_total_limit 1 \
+  --max_steps 100000 \
   --wer_threshold 5 \
   --per_device_train_batch_size 32 \
   --per_device_eval_batch_size 32 \
   --freeze_embed_positions \
   --streaming True \
   --wandb_project "nb-distil-whisper-large-pytorch" \
+  --wandb_name "pytorch_lr3e4_wer10" \
+  --hub_model_id "NbAiLab/nb-distil-whisper-large-pytorch-wer10" \
   --push_to_hub