Marxx01 commited on
Commit
142ed3c
·
verified ·
1 Parent(s): 8905aef

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +0 -0
  4. scheduler.pt +0 -0
  5. trainer_state.json +143 -3
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2ccb1201adc7ba3dce45c039a56a518750657ba48b9dc6407631be5a9ec0851
3
  size 1426462208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a594d700ae7ced351be9f9fa205e77a1c04484c30562fc301b8498ded4627f7
3
  size 1426462208
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a11c2381fc62f5a000590233a5f5371a956d9b0994b73d666ec134c93e250cb3
3
  size 2853107898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2409b4f78e63edadb2a0bf9adfb64ff1e7e25ec40d11b7709030002d2cded2e1
3
  size 2853107898
rng_state.pth CHANGED
Binary files a/rng_state.pth and b/rng_state.pth differ
 
scheduler.pt CHANGED
Binary files a/scheduler.pt and b/scheduler.pt differ
 
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.18651009765668713,
5
  "eval_steps": 500,
6
- "global_step": 400000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -287,6 +287,146 @@
287
  "learning_rate": 4.067789892644788e-05,
288
  "loss": 2.6014,
289
  "step": 400000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  }
291
  ],
292
  "logging_steps": 10000,
@@ -306,7 +446,7 @@
306
  "attributes": {}
307
  }
308
  },
309
- "total_flos": 9.86304678504589e+18,
310
  "train_batch_size": 4,
311
  "trial_name": null,
312
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2797651464850307,
5
  "eval_steps": 500,
6
+ "global_step": 600000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
287
  "learning_rate": 4.067789892644788e-05,
288
  "loss": 2.6014,
289
  "step": 400000
290
+ },
291
+ {
292
+ "epoch": 0.1911728500981043,
293
+ "grad_norm": 0.8321977257728577,
294
+ "learning_rate": 4.044485455942585e-05,
295
+ "loss": 2.5964,
296
+ "step": 410000
297
+ },
298
+ {
299
+ "epoch": 0.19583560253952148,
300
+ "grad_norm": 1.3375693559646606,
301
+ "learning_rate": 4.021178687864161e-05,
302
+ "loss": 2.5906,
303
+ "step": 420000
304
+ },
305
+ {
306
+ "epoch": 0.20049835498093868,
307
+ "grad_norm": 0.8211286067962646,
308
+ "learning_rate": 3.997874251161958e-05,
309
+ "loss": 2.5866,
310
+ "step": 430000
311
+ },
312
+ {
313
+ "epoch": 0.20516110742235585,
314
+ "grad_norm": 0.7890422940254211,
315
+ "learning_rate": 3.9745698144597546e-05,
316
+ "loss": 2.5817,
317
+ "step": 440000
318
+ },
319
+ {
320
+ "epoch": 0.20982385986377303,
321
+ "grad_norm": 1.0580294132232666,
322
+ "learning_rate": 3.951263046381331e-05,
323
+ "loss": 2.5776,
324
+ "step": 450000
325
+ },
326
+ {
327
+ "epoch": 0.2144866123051902,
328
+ "grad_norm": 1.0666168928146362,
329
+ "learning_rate": 3.927956278302907e-05,
330
+ "loss": 2.5729,
331
+ "step": 460000
332
+ },
333
+ {
334
+ "epoch": 0.21914936474660737,
335
+ "grad_norm": 1.0440067052841187,
336
+ "learning_rate": 3.904651841600704e-05,
337
+ "loss": 2.5748,
338
+ "step": 470000
339
+ },
340
+ {
341
+ "epoch": 0.22381211718802457,
342
+ "grad_norm": 0.8746099472045898,
343
+ "learning_rate": 3.88134507352228e-05,
344
+ "loss": 2.5704,
345
+ "step": 480000
346
+ },
347
+ {
348
+ "epoch": 0.22847486962944175,
349
+ "grad_norm": 0.882897675037384,
350
+ "learning_rate": 3.858040636820078e-05,
351
+ "loss": 2.5623,
352
+ "step": 490000
353
+ },
354
+ {
355
+ "epoch": 0.23313762207085892,
356
+ "grad_norm": 0.8458369970321655,
357
+ "learning_rate": 3.834740862870316e-05,
358
+ "loss": 2.5612,
359
+ "step": 500000
360
+ },
361
+ {
362
+ "epoch": 0.2378003745122761,
363
+ "grad_norm": 0.9579658508300781,
364
+ "learning_rate": 3.811434094791892e-05,
365
+ "loss": 2.5551,
366
+ "step": 510000
367
+ },
368
+ {
369
+ "epoch": 0.24246312695369326,
370
+ "grad_norm": 1.0498722791671753,
371
+ "learning_rate": 3.78813198946591e-05,
372
+ "loss": 2.5502,
373
+ "step": 520000
374
+ },
375
+ {
376
+ "epoch": 0.24712587939511046,
377
+ "grad_norm": 1.032334804534912,
378
+ "learning_rate": 3.764825221387486e-05,
379
+ "loss": 2.5534,
380
+ "step": 530000
381
+ },
382
+ {
383
+ "epoch": 0.2517886318365276,
384
+ "grad_norm": 0.9145790934562683,
385
+ "learning_rate": 3.7415184533090624e-05,
386
+ "loss": 2.547,
387
+ "step": 540000
388
+ },
389
+ {
390
+ "epoch": 0.2564513842779448,
391
+ "grad_norm": 1.0633904933929443,
392
+ "learning_rate": 3.71821634798308e-05,
393
+ "loss": 2.543,
394
+ "step": 550000
395
+ },
396
+ {
397
+ "epoch": 0.261114136719362,
398
+ "grad_norm": 0.9828123450279236,
399
+ "learning_rate": 3.694911911280877e-05,
400
+ "loss": 2.5398,
401
+ "step": 560000
402
+ },
403
+ {
404
+ "epoch": 0.2657768891607792,
405
+ "grad_norm": 0.8735861778259277,
406
+ "learning_rate": 3.671607474578674e-05,
407
+ "loss": 2.5345,
408
+ "step": 570000
409
+ },
410
+ {
411
+ "epoch": 0.27043964160219636,
412
+ "grad_norm": 1.1347264051437378,
413
+ "learning_rate": 3.6483053692526915e-05,
414
+ "loss": 2.5312,
415
+ "step": 580000
416
+ },
417
+ {
418
+ "epoch": 0.27510239404361353,
419
+ "grad_norm": 0.8275557160377502,
420
+ "learning_rate": 3.625000932550489e-05,
421
+ "loss": 2.5299,
422
+ "step": 590000
423
+ },
424
+ {
425
+ "epoch": 0.2797651464850307,
426
+ "grad_norm": 0.8891148567199707,
427
+ "learning_rate": 3.601696495848285e-05,
428
+ "loss": 2.5301,
429
+ "step": 600000
430
  }
431
  ],
432
  "logging_steps": 10000,
 
446
  "attributes": {}
447
  }
448
  },
449
+ "total_flos": 1.4794966500590223e+19,
450
  "train_batch_size": 4,
451
  "trial_name": null,
452
  "trial_params": null