pere commited on
Commit
21c1ad0
·
1 Parent(s): e16e8e1
config.json CHANGED
@@ -281,7 +281,7 @@
281
  50257
282
  ],
283
  "torch_dtype": "float32",
284
- "transformers_version": "4.46.1",
285
  "use_cache": true,
286
  "use_weighted_layer_sum": false,
287
  "vocab_size": 51866
 
281
  50257
282
  ],
283
  "torch_dtype": "float32",
284
+ "transformers_version": "4.46.2",
285
  "use_cache": true,
286
  "use_weighted_layer_sum": false,
287
  "vocab_size": 51866
distil_whisper/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/distil_whisper/__pycache__/__init__.cpython-310.pyc and b/distil_whisper/__pycache__/__init__.cpython-310.pyc differ
 
distil_whisper/__pycache__/layers.cpython-310.pyc CHANGED
Binary files a/distil_whisper/__pycache__/layers.cpython-310.pyc and b/distil_whisper/__pycache__/layers.cpython-310.pyc differ
 
distil_whisper/__pycache__/modeling_flax_whisper.cpython-310.pyc CHANGED
Binary files a/distil_whisper/__pycache__/modeling_flax_whisper.cpython-310.pyc and b/distil_whisper/__pycache__/modeling_flax_whisper.cpython-310.pyc differ
 
distil_whisper/__pycache__/partitioner.cpython-310.pyc CHANGED
Binary files a/distil_whisper/__pycache__/partitioner.cpython-310.pyc and b/distil_whisper/__pycache__/partitioner.cpython-310.pyc differ
 
distil_whisper/__pycache__/pipeline.cpython-310.pyc CHANGED
Binary files a/distil_whisper/__pycache__/pipeline.cpython-310.pyc and b/distil_whisper/__pycache__/pipeline.cpython-310.pyc differ
 
distil_whisper/__pycache__/train_state.cpython-310.pyc CHANGED
Binary files a/distil_whisper/__pycache__/train_state.cpython-310.pyc and b/distil_whisper/__pycache__/train_state.cpython-310.pyc differ
 
generation_config.json CHANGED
@@ -266,6 +266,6 @@
266
  "transcribe": 50360,
267
  "translate": 50359
268
  },
269
- "transformers_version": "4.46.1",
270
  "use_scan": false
271
  }
 
266
  "transcribe": 50360,
267
  "translate": 50359
268
  },
269
+ "transformers_version": "4.46.2",
270
  "use_scan": false
271
  }
run_large_training.sh CHANGED
@@ -11,15 +11,15 @@ TOKENIZERS_PARALLELISM=false python3 run_distillation_nodes.py \
11
  --eval_steps 500 \
12
  --save_steps 1000 \
13
  --warmup_steps 1000 \
14
- --learning_rate 0.0001 \
15
  --lr_scheduler_type "linear" \
16
- --logging_steps 25 \
17
  --save_total_limit 1 \
18
- --max_steps 100000 \
19
  --wer_threshold 10 \
20
  --per_device_train_batch_size 16\
21
  --per_device_eval_batch_size 16 \
22
- --dataloader_num_workers 32 \
23
  --dtype "bfloat16" \
24
  --output_dir "./" \
25
  --do_train \
 
11
  --eval_steps 500 \
12
  --save_steps 1000 \
13
  --warmup_steps 1000 \
14
+ --learning_rate 0.0003 \
15
  --lr_scheduler_type "linear" \
16
+ --logging_steps 200 \
17
  --save_total_limit 1 \
18
+ --max_steps 50000 \
19
  --wer_threshold 10 \
20
  --per_device_train_batch_size 16\
21
  --per_device_eval_batch_size 16 \
22
+ --dataloader_num_workers 16 \
23
  --dtype "bfloat16" \
24
  --output_dir "./" \
25
  --do_train \