pere commited on
Commit
7bd0aec
·
1 Parent(s): 7ea628d
distil_whisper/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (423 Bytes). View file
 
distil_whisper/__pycache__/layers.cpython-310.pyc ADDED
Binary file (41.9 kB). View file
 
distil_whisper/__pycache__/modeling_flax_whisper.cpython-310.pyc ADDED
Binary file (54 kB). View file
 
distil_whisper/__pycache__/partitioner.cpython-310.pyc ADDED
Binary file (33.3 kB). View file
 
distil_whisper/__pycache__/pipeline.cpython-310.pyc ADDED
Binary file (16.8 kB). View file
 
distil_whisper/__pycache__/train_state.cpython-310.pyc ADDED
Binary file (4.12 kB). View file
 
run_distillation.py CHANGED
@@ -1523,6 +1523,8 @@ def main():
1523
  for i in range(len(label_str))
1524
  ]
1525
  wer_ortho = 100 * metric.compute(predictions=spaced_pred_str, references=spaced_label_str)
 
 
1526
 
1527
  # Iterate through all predictions and labels
1528
  for pred, label in zip(pred_str, label_str):
 
1523
  for i in range(len(label_str))
1524
  ]
1525
  wer_ortho = 100 * metric.compute(predictions=spaced_pred_str, references=spaced_label_str)
1526
+
1527
+ norm_pred_str, norm_label_str = [], []
1528
 
1529
  # Iterate through all predictions and labels
1530
  for pred, label in zip(pred_str, label_str):
run_large_training.sh CHANGED
@@ -8,7 +8,7 @@ TOKENIZERS_PARALLELISM=false python3 run_distillation.py \
8
  --eval_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_compact8_large" \
9
  --eval_dataset_config_name "no" \
10
  --eval_split_name "validation_norwegian_fleurs" \
11
- --eval_steps 1000 \
12
  --save_steps 1000 \
13
  --warmup_steps 100 \
14
  --learning_rate 0.0001 \
@@ -19,7 +19,7 @@ TOKENIZERS_PARALLELISM=false python3 run_distillation.py \
19
  --wer_threshold 10 \
20
  --per_device_train_batch_size 16\
21
  --per_device_eval_batch_size 16 \
22
- --dataloader_num_workers 16 \
23
  --dtype "bfloat16" \
24
  --output_dir "./" \
25
  --do_train \
 
8
  --eval_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_compact8_large" \
9
  --eval_dataset_config_name "no" \
10
  --eval_split_name "validation_norwegian_fleurs" \
11
+ --eval_steps 10 \
12
  --save_steps 1000 \
13
  --warmup_steps 100 \
14
  --learning_rate 0.0001 \
 
19
  --wer_threshold 10 \
20
  --per_device_train_batch_size 16\
21
  --per_device_eval_batch_size 16 \
22
+ --dataloader_num_workers 32 \
23
  --dtype "bfloat16" \
24
  --output_dir "./" \
25
  --do_train \