shaobai880824 commited on
Commit
7b0aef3
·
verified ·
1 Parent(s): 81dae4e

Model save

Browse files
README.md CHANGED
@@ -34,20 +34,19 @@ More information needed
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 1e-05
37
- - train_batch_size: 2
38
  - eval_batch_size: 1
39
  - seed: 42
40
- - gradient_accumulation_steps: 8
41
  - total_train_batch_size: 16
42
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: cosine
44
- - lr_scheduler_warmup_steps: 300
45
- - training_steps: 3000
46
- - mixed_precision_training: Native AMP
47
 
48
  ### Framework versions
49
 
50
  - Transformers 4.53.2
51
  - Pytorch 2.6.0+cu124
52
- - Datasets 4.0.0
53
  - Tokenizers 0.21.2
 
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 1e-05
37
+ - train_batch_size: 1
38
  - eval_batch_size: 1
39
  - seed: 42
40
+ - gradient_accumulation_steps: 16
41
  - total_train_batch_size: 16
42
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: cosine
44
+ - lr_scheduler_warmup_steps: 150
45
+ - training_steps: 1500
 
46
 
47
  ### Framework versions
48
 
49
  - Transformers 4.53.2
50
  - Pytorch 2.6.0+cu124
51
+ - Datasets 2.14.4
52
  - Tokenizers 0.21.2
config.json CHANGED
@@ -6,7 +6,6 @@
6
  "WhisperForConditionalGeneration"
7
  ],
8
  "attention_dropout": 0.0,
9
- "begin_suppress_tokens": null,
10
  "bos_token_id": 50257,
11
  "classifier_proj_size": 256,
12
  "d_model": 1280,
@@ -21,7 +20,6 @@
21
  "encoder_layerdrop": 0.0,
22
  "encoder_layers": 32,
23
  "eos_token_id": 50257,
24
- "forced_decoder_ids": null,
25
  "init_std": 0.02,
26
  "is_encoder_decoder": true,
27
  "mask_feature_length": 10,
@@ -30,7 +28,6 @@
30
  "mask_time_length": 10,
31
  "mask_time_min_masks": 2,
32
  "mask_time_prob": 0.05,
33
- "max_length": null,
34
  "max_source_positions": 1500,
35
  "max_target_positions": 448,
36
  "median_filter_width": 7,
@@ -39,7 +36,7 @@
39
  "num_mel_bins": 80,
40
  "pad_token_id": 50257,
41
  "scale_embedding": false,
42
- "torch_dtype": "float16",
43
  "transformers_version": "4.53.2",
44
  "use_cache": false,
45
  "use_weighted_layer_sum": false,
 
6
  "WhisperForConditionalGeneration"
7
  ],
8
  "attention_dropout": 0.0,
 
9
  "bos_token_id": 50257,
10
  "classifier_proj_size": 256,
11
  "d_model": 1280,
 
20
  "encoder_layerdrop": 0.0,
21
  "encoder_layers": 32,
22
  "eos_token_id": 50257,
 
23
  "init_std": 0.02,
24
  "is_encoder_decoder": true,
25
  "mask_feature_length": 10,
 
28
  "mask_time_length": 10,
29
  "mask_time_min_masks": 2,
30
  "mask_time_prob": 0.05,
 
31
  "max_source_positions": 1500,
32
  "max_target_positions": 448,
33
  "median_filter_width": 7,
 
36
  "num_mel_bins": 80,
37
  "pad_token_id": 50257,
38
  "scale_embedding": false,
39
+ "torch_dtype": "float32",
40
  "transformers_version": "4.53.2",
41
  "use_cache": false,
42
  "use_weighted_layer_sum": false,
generation_config.json CHANGED
@@ -218,7 +218,96 @@
218
  "pad_token_id": 50257,
219
  "prev_sot_token_id": 50361,
220
  "return_timestamps": false,
221
- "suppress_tokens": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  "task_to_id": {
223
  "transcribe": 50359,
224
  "translate": 50358
 
218
  "pad_token_id": 50257,
219
  "prev_sot_token_id": 50361,
220
  "return_timestamps": false,
221
+ "suppress_tokens": [
222
+ 1,
223
+ 2,
224
+ 7,
225
+ 8,
226
+ 9,
227
+ 10,
228
+ 14,
229
+ 25,
230
+ 26,
231
+ 27,
232
+ 28,
233
+ 29,
234
+ 31,
235
+ 58,
236
+ 59,
237
+ 60,
238
+ 61,
239
+ 62,
240
+ 63,
241
+ 90,
242
+ 91,
243
+ 92,
244
+ 93,
245
+ 359,
246
+ 503,
247
+ 522,
248
+ 542,
249
+ 873,
250
+ 893,
251
+ 902,
252
+ 918,
253
+ 922,
254
+ 931,
255
+ 1350,
256
+ 1853,
257
+ 1982,
258
+ 2460,
259
+ 2627,
260
+ 3246,
261
+ 3253,
262
+ 3268,
263
+ 3536,
264
+ 3846,
265
+ 3961,
266
+ 4183,
267
+ 4667,
268
+ 6585,
269
+ 6647,
270
+ 7273,
271
+ 9061,
272
+ 9383,
273
+ 10428,
274
+ 10929,
275
+ 11938,
276
+ 12033,
277
+ 12331,
278
+ 12562,
279
+ 13793,
280
+ 14157,
281
+ 14635,
282
+ 15265,
283
+ 15618,
284
+ 16553,
285
+ 16604,
286
+ 18362,
287
+ 18956,
288
+ 20075,
289
+ 21675,
290
+ 22520,
291
+ 26130,
292
+ 26161,
293
+ 26435,
294
+ 28279,
295
+ 29464,
296
+ 31650,
297
+ 32302,
298
+ 32470,
299
+ 36865,
300
+ 42863,
301
+ 47425,
302
+ 49870,
303
+ 50254,
304
+ 50258,
305
+ 50358,
306
+ 50359,
307
+ 50360,
308
+ 50361,
309
+ 50362
310
+ ],
311
  "task_to_id": {
312
  "transcribe": 50359,
313
  "translate": 50358
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3508d59734fe44bab3c9b3f1f413d0d808cdeba5dbf8ce0a6fc76cdd9982e2d
3
+ size 4992706480
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b6b78a5b694bed18907c4ee943758dfae3d579257411381e7786d6300d0a58
3
+ size 1180663192
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c6038b811fd79b4ce583fbd7f04f9e13dc4b2b623c9c7227cff6927b45c2721
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:137d0929f18565db19c616bcb7392ee4c71db13d6459f3f5e43b4d22e421d5b2
3
  size 5560