update
Browse files- distil_whisper/__pycache__/__init__.cpython-310.pyc +0 -0
- distil_whisper/__pycache__/layers.cpython-310.pyc +0 -0
- distil_whisper/__pycache__/modeling_flax_whisper.cpython-310.pyc +0 -0
- distil_whisper/__pycache__/partitioner.cpython-310.pyc +0 -0
- distil_whisper/__pycache__/pipeline.cpython-310.pyc +0 -0
- distil_whisper/__pycache__/train_state.cpython-310.pyc +0 -0
- run_distillation.py +2 -0
- run_large_training.sh +2 -2
distil_whisper/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (423 Bytes). View file
|
|
distil_whisper/__pycache__/layers.cpython-310.pyc
ADDED
Binary file (41.9 kB). View file
|
|
distil_whisper/__pycache__/modeling_flax_whisper.cpython-310.pyc
ADDED
Binary file (54 kB). View file
|
|
distil_whisper/__pycache__/partitioner.cpython-310.pyc
ADDED
Binary file (33.3 kB). View file
|
|
distil_whisper/__pycache__/pipeline.cpython-310.pyc
ADDED
Binary file (16.8 kB). View file
|
|
distil_whisper/__pycache__/train_state.cpython-310.pyc
ADDED
Binary file (4.12 kB). View file
|
|
run_distillation.py
CHANGED
@@ -1523,6 +1523,8 @@ def main():
|
|
1523 |
for i in range(len(label_str))
|
1524 |
]
|
1525 |
wer_ortho = 100 * metric.compute(predictions=spaced_pred_str, references=spaced_label_str)
|
|
|
|
|
1526 |
|
1527 |
# Iterate through all predictions and labels
|
1528 |
for pred, label in zip(pred_str, label_str):
|
|
|
1523 |
for i in range(len(label_str))
|
1524 |
]
|
1525 |
wer_ortho = 100 * metric.compute(predictions=spaced_pred_str, references=spaced_label_str)
|
1526 |
+
|
1527 |
+
norm_pred_str, norm_label_str = [], []
|
1528 |
|
1529 |
# Iterate through all predictions and labels
|
1530 |
for pred, label in zip(pred_str, label_str):
|
run_large_training.sh
CHANGED
@@ -8,7 +8,7 @@ TOKENIZERS_PARALLELISM=false python3 run_distillation.py \
|
|
8 |
--eval_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_compact8_large" \
|
9 |
--eval_dataset_config_name "no" \
|
10 |
--eval_split_name "validation_norwegian_fleurs" \
|
11 |
-
--eval_steps
|
12 |
--save_steps 1000 \
|
13 |
--warmup_steps 100 \
|
14 |
--learning_rate 0.0001 \
|
@@ -19,7 +19,7 @@ TOKENIZERS_PARALLELISM=false python3 run_distillation.py \
|
|
19 |
--wer_threshold 10 \
|
20 |
--per_device_train_batch_size 16\
|
21 |
--per_device_eval_batch_size 16 \
|
22 |
-
--dataloader_num_workers
|
23 |
--dtype "bfloat16" \
|
24 |
--output_dir "./" \
|
25 |
--do_train \
|
|
|
8 |
--eval_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_compact8_large" \
|
9 |
--eval_dataset_config_name "no" \
|
10 |
--eval_split_name "validation_norwegian_fleurs" \
|
11 |
+
--eval_steps 10 \
|
12 |
--save_steps 1000 \
|
13 |
--warmup_steps 100 \
|
14 |
--learning_rate 0.0001 \
|
|
|
19 |
--wer_threshold 10 \
|
20 |
--per_device_train_batch_size 16\
|
21 |
--per_device_eval_batch_size 16 \
|
22 |
+
--dataloader_num_workers 32 \
|
23 |
--dtype "bfloat16" \
|
24 |
--output_dir "./" \
|
25 |
--do_train \
|