Obeida commited on
Commit
dfc96f0
·
verified ·
1 Parent(s): dd4d51e

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff65517a5e9f73ab753caee129180a904c7be8f2d1906184ca1d9133e6c17d4e
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a9fe368cada65a1d14e0d43824db3cfa6564a7e9352735addd8e143a521f036
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2945835dfba158e8c92d9a6bbb20ef702ed8937783f2a27f58ae9e0c926e7003
3
  size 591203178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d891655449340c3c183b53db468d2b34e252acd512740f3ba321650b6e87a4db
3
  size 591203178
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69f50a692634404f2eebb2eab9f456865957578d752987bc52d843ac2a774366
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c10fa2121ab6afd6fed1f0664ee144253e5014a952691dbd6b2d21b27fc1ee76
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04d66d5cf9f88eb471f3ee880f779ee95d8aa2fa1c7189416b5337797176b58
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.819672131147541,
5
  "eval_steps": 100,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -397,6 +397,84 @@
397
  "eval_samples_per_second": 1.092,
398
  "eval_steps_per_second": 1.092,
399
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  }
401
  ],
402
  "logging_steps": 10,
@@ -416,7 +494,7 @@
416
  "attributes": {}
417
  }
418
  },
419
- "total_flos": 9342816307322880.0,
420
  "train_batch_size": 1,
421
  "trial_name": null,
422
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9836065573770492,
5
  "eval_steps": 100,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
397
  "eval_samples_per_second": 1.092,
398
  "eval_steps_per_second": 1.092,
399
  "step": 500
400
+ },
401
+ {
402
+ "epoch": 0.8360655737704918,
403
+ "grad_norm": 0.8413804173469543,
404
+ "learning_rate": 7.965471193905954e-06,
405
+ "loss": 0.114,
406
+ "step": 510
407
+ },
408
+ {
409
+ "epoch": 0.8524590163934426,
410
+ "grad_norm": 0.9656268954277039,
411
+ "learning_rate": 6.4857379484922375e-06,
412
+ "loss": 0.1114,
413
+ "step": 520
414
+ },
415
+ {
416
+ "epoch": 0.8688524590163934,
417
+ "grad_norm": 0.6842993497848511,
418
+ "learning_rate": 5.148456576529081e-06,
419
+ "loss": 0.1265,
420
+ "step": 530
421
+ },
422
+ {
423
+ "epoch": 0.8852459016393442,
424
+ "grad_norm": 1.1006067991256714,
425
+ "learning_rate": 3.958004912496127e-06,
426
+ "loss": 0.1418,
427
+ "step": 540
428
+ },
429
+ {
430
+ "epoch": 0.9016393442622951,
431
+ "grad_norm": 0.8723937273025513,
432
+ "learning_rate": 2.918280117043709e-06,
433
+ "loss": 0.1555,
434
+ "step": 550
435
+ },
436
+ {
437
+ "epoch": 0.9180327868852459,
438
+ "grad_norm": 0.810757577419281,
439
+ "learning_rate": 2.032685918926508e-06,
440
+ "loss": 0.1195,
441
+ "step": 560
442
+ },
443
+ {
444
+ "epoch": 0.9344262295081968,
445
+ "grad_norm": 1.3418116569519043,
446
+ "learning_rate": 1.3041214722768035e-06,
447
+ "loss": 0.1395,
448
+ "step": 570
449
+ },
450
+ {
451
+ "epoch": 0.9508196721311475,
452
+ "grad_norm": 0.7165635228157043,
453
+ "learning_rate": 7.349718656945504e-07,
454
+ "loss": 0.1296,
455
+ "step": 580
456
+ },
457
+ {
458
+ "epoch": 0.9672131147540983,
459
+ "grad_norm": 0.7757364511489868,
460
+ "learning_rate": 3.271003142248652e-07,
461
+ "loss": 0.1411,
462
+ "step": 590
463
+ },
464
+ {
465
+ "epoch": 0.9836065573770492,
466
+ "grad_norm": 0.8983824253082275,
467
+ "learning_rate": 8.184205978370996e-08,
468
+ "loss": 0.1174,
469
+ "step": 600
470
+ },
471
+ {
472
+ "epoch": 0.9836065573770492,
473
+ "eval_loss": 0.12527307868003845,
474
+ "eval_runtime": 62.1952,
475
+ "eval_samples_per_second": 1.093,
476
+ "eval_steps_per_second": 1.093,
477
+ "step": 600
478
  }
479
  ],
480
  "logging_steps": 10,
 
494
  "attributes": {}
495
  }
496
  },
497
+ "total_flos": 1.115979432247296e+16,
498
  "train_batch_size": 1,
499
  "trial_name": null,
500
  "trial_params": null