update

Files changed (8) hide show

distil_whisper/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (423 Bytes). View file

distil_whisper/__pycache__/layers.cpython-310.pyc ADDED Viewed

Binary file (41.9 kB). View file

distil_whisper/__pycache__/modeling_flax_whisper.cpython-310.pyc ADDED Viewed

Binary file (54 kB). View file

distil_whisper/__pycache__/partitioner.cpython-310.pyc ADDED Viewed

Binary file (33.3 kB). View file

distil_whisper/__pycache__/pipeline.cpython-310.pyc ADDED Viewed

Binary file (16.8 kB). View file

distil_whisper/__pycache__/train_state.cpython-310.pyc ADDED Viewed

Binary file (4.12 kB). View file

run_distillation.py CHANGED Viewed

@@ -1523,6 +1523,8 @@ def main():
             for i in range(len(label_str))
         ]
         wer_ortho = 100 * metric.compute(predictions=spaced_pred_str, references=spaced_label_str)
         # Iterate through all predictions and labels
         for pred, label in zip(pred_str, label_str):

             for i in range(len(label_str))
         ]
         wer_ortho = 100 * metric.compute(predictions=spaced_pred_str, references=spaced_label_str)
+        norm_pred_str, norm_label_str = [], []
         # Iterate through all predictions and labels
         for pred, label in zip(pred_str, label_str):

run_large_training.sh CHANGED Viewed

@@ -8,7 +8,7 @@ TOKENIZERS_PARALLELISM=false python3 run_distillation.py \
   --eval_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_compact8_large" \
   --eval_dataset_config_name "no" \
   --eval_split_name "validation_norwegian_fleurs" \
-  --eval_steps 1000 \
   --save_steps 1000 \
   --warmup_steps 100 \
   --learning_rate 0.0001 \
@@ -19,7 +19,7 @@ TOKENIZERS_PARALLELISM=false python3 run_distillation.py \
   --wer_threshold 10 \
   --per_device_train_batch_size 16\
   --per_device_eval_batch_size 16 \
-  --dataloader_num_workers 16 \
   --dtype "bfloat16" \
   --output_dir "./" \
   --do_train \

   --eval_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_compact8_large" \
   --eval_dataset_config_name "no" \
   --eval_split_name "validation_norwegian_fleurs" \
+  --eval_steps 10 \
   --save_steps 1000 \
   --warmup_steps 100 \
   --learning_rate 0.0001 \
   --wer_threshold 10 \
   --per_device_train_batch_size 16\
   --per_device_eval_batch_size 16 \
+  --dataloader_num_workers 32 \
   --dtype "bfloat16" \
   --output_dir "./" \
   --do_train \