Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +714 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbafb981a2d288bfdf7a1f89cc4b62dbc98cdc1cd677ecb903ab1f9a9619dd25
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:37e5c19e4276056fe2a42d97b80630a7bf234e6647c2a5eae2d3b842c114f039
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:189c19b01d2dc2621bf018ac3c73307ea9665095dc9f13ac4a1ce34b221c3229
 size 243802996

 version https://git-lfs.github.com/spec/v1
+oid sha256:dcf792015115532ea39dc0e3f0b579199422a8357ed558246e31fabdaca0f731
 size 243802996

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47dc72b99375775a7231a5da72866a698ea424411b3309fa35c38dc469701432
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:86faad75af495502c6842f34c5c8a26e4ac90c6e64a473ff1143b8216db293ef
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505f9225762b105f8ca5168f44d99b2f8467174f4ade85f1cc95f684fbd828e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.4342938661575317,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.03787699446048956,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,714 @@
       "eval_samples_per_second": 21.405,
       "eval_steps_per_second": 5.351,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.276333740621824e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.4300745725631714,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.04734624307561195,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.405,
       "eval_steps_per_second": 5.351,
       "step": 400
+    },
+    {
+      "epoch": 0.037971686946640784,
+      "grad_norm": 0.8284499049186707,
+      "learning_rate": 1.013396731136465e-05,
+      "loss": 1.518,
+      "step": 401
+    },
+    {
+      "epoch": 0.03806637943279201,
+      "grad_norm": 0.8855971097946167,
+      "learning_rate": 9.937309365446973e-06,
+      "loss": 1.5035,
+      "step": 402
+    },
+    {
+      "epoch": 0.03816107191894323,
+      "grad_norm": 0.8813216686248779,
+      "learning_rate": 9.742367571857091e-06,
+      "loss": 1.4783,
+      "step": 403
+    },
+    {
+      "epoch": 0.038255764405094456,
+      "grad_norm": 0.9831085205078125,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.4681,
+      "step": 404
+    },
+    {
+      "epoch": 0.03835045689124568,
+      "grad_norm": 0.9731152057647705,
+      "learning_rate": 9.357665770419244e-06,
+      "loss": 1.5385,
+      "step": 405
+    },
+    {
+      "epoch": 0.0384451493773969,
+      "grad_norm": 0.8489096164703369,
+      "learning_rate": 9.167922241916055e-06,
+      "loss": 1.4937,
+      "step": 406
+    },
+    {
+      "epoch": 0.03853984186354813,
+      "grad_norm": 1.0949358940124512,
+      "learning_rate": 8.97992782372432e-06,
+      "loss": 1.6593,
+      "step": 407
+    },
+    {
+      "epoch": 0.03863453434969935,
+      "grad_norm": 0.919141411781311,
+      "learning_rate": 8.793690568899216e-06,
+      "loss": 1.5539,
+      "step": 408
+    },
+    {
+      "epoch": 0.038729226835850575,
+      "grad_norm": 0.7879128456115723,
+      "learning_rate": 8.609218455224893e-06,
+      "loss": 1.4344,
+      "step": 409
+    },
+    {
+      "epoch": 0.0388239193220018,
+      "grad_norm": 0.8573994040489197,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 1.3365,
+      "step": 410
+    },
+    {
+      "epoch": 0.03891861180815302,
+      "grad_norm": 0.9220760464668274,
+      "learning_rate": 8.245601184062852e-06,
+      "loss": 1.5665,
+      "step": 411
+    },
+    {
+      "epoch": 0.039013304294304246,
+      "grad_norm": 1.032471776008606,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 1.6067,
+      "step": 412
+    },
+    {
+      "epoch": 0.03910799678045547,
+      "grad_norm": 1.0284119844436646,
+      "learning_rate": 7.889138314185678e-06,
+      "loss": 1.5696,
+      "step": 413
+    },
+    {
+      "epoch": 0.039202689266606694,
+      "grad_norm": 0.9169747233390808,
+      "learning_rate": 7.71360891480134e-06,
+      "loss": 1.4384,
+      "step": 414
+    },
+    {
+      "epoch": 0.03929738175275792,
+      "grad_norm": 0.9162052869796753,
+      "learning_rate": 7.539890923671062e-06,
+      "loss": 1.5036,
+      "step": 415
+    },
+    {
+      "epoch": 0.03939207423890914,
+      "grad_norm": 0.8811019659042358,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 1.3255,
+      "step": 416
+    },
+    {
+      "epoch": 0.039486766725060365,
+      "grad_norm": 0.8802171349525452,
+      "learning_rate": 7.197918854261432e-06,
+      "loss": 1.4396,
+      "step": 417
+    },
+    {
+      "epoch": 0.03958145921121159,
+      "grad_norm": 0.9827845096588135,
+      "learning_rate": 7.029679424927365e-06,
+      "loss": 1.5017,
+      "step": 418
+    },
+    {
+      "epoch": 0.03967615169736281,
+      "grad_norm": 1.6350576877593994,
+      "learning_rate": 6.863280701110408e-06,
+      "loss": 1.4623,
+      "step": 419
+    },
+    {
+      "epoch": 0.03977084418351404,
+      "grad_norm": 0.9150916337966919,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.3007,
+      "step": 420
+    },
+    {
+      "epoch": 0.03986553666966526,
+      "grad_norm": 1.0812715291976929,
+      "learning_rate": 6.536033802742813e-06,
+      "loss": 1.6212,
+      "step": 421
+    },
+    {
+      "epoch": 0.039960229155816485,
+      "grad_norm": 0.908984363079071,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 1.302,
+      "step": 422
+    },
+    {
+      "epoch": 0.04005492164196771,
+      "grad_norm": 1.0411983728408813,
+      "learning_rate": 6.216234231230012e-06,
+      "loss": 1.3603,
+      "step": 423
+    },
+    {
+      "epoch": 0.04014961412811893,
+      "grad_norm": 0.8334738612174988,
+      "learning_rate": 6.059144366901736e-06,
+      "loss": 1.3487,
+      "step": 424
+    },
+    {
+      "epoch": 0.040244306614270156,
+      "grad_norm": 0.9687286615371704,
+      "learning_rate": 5.903936782582253e-06,
+      "loss": 1.6879,
+      "step": 425
+    },
+    {
+      "epoch": 0.04033899910042138,
+      "grad_norm": 0.9194291234016418,
+      "learning_rate": 5.750618126847912e-06,
+      "loss": 1.4283,
+      "step": 426
+    },
+    {
+      "epoch": 0.040433691586572604,
+      "grad_norm": 0.888853907585144,
+      "learning_rate": 5.599194967359639e-06,
+      "loss": 1.4864,
+      "step": 427
+    },
+    {
+      "epoch": 0.04052838407272383,
+      "grad_norm": 0.9150128960609436,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 1.4955,
+      "step": 428
+    },
+    {
+      "epoch": 0.04062307655887505,
+      "grad_norm": 0.9649456739425659,
+      "learning_rate": 5.302061001503394e-06,
+      "loss": 1.5217,
+      "step": 429
+    },
+    {
+      "epoch": 0.040717769045026275,
+      "grad_norm": 0.8887634873390198,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 1.3724,
+      "step": 430
+    },
+    {
+      "epoch": 0.0408124615311775,
+      "grad_norm": 0.9875292181968689,
+      "learning_rate": 5.012585797388936e-06,
+      "loss": 1.5303,
+      "step": 431
+    },
+    {
+      "epoch": 0.04090715401732872,
+      "grad_norm": 0.796764612197876,
+      "learning_rate": 4.87073578250698e-06,
+      "loss": 1.419,
+      "step": 432
+    },
+    {
+      "epoch": 0.04100184650347995,
+      "grad_norm": 0.9389521479606628,
+      "learning_rate": 4.730818955102234e-06,
+      "loss": 1.548,
+      "step": 433
+    },
+    {
+      "epoch": 0.04109653898963117,
+      "grad_norm": 0.8730347752571106,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 1.4371,
+      "step": 434
+    },
+    {
+      "epoch": 0.041191231475782394,
+      "grad_norm": 0.8850758671760559,
+      "learning_rate": 4.456808753941205e-06,
+      "loss": 1.5498,
+      "step": 435
+    },
+    {
+      "epoch": 0.04128592396193362,
+      "grad_norm": 0.931686282157898,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 1.4103,
+      "step": 436
+    },
+    {
+      "epoch": 0.04138061644808484,
+      "grad_norm": 0.8667348623275757,
+      "learning_rate": 4.190602144143207e-06,
+      "loss": 1.2572,
+      "step": 437
+    },
+    {
+      "epoch": 0.041475308934236066,
+      "grad_norm": 0.9325525760650635,
+      "learning_rate": 4.06043949255509e-06,
+      "loss": 1.5351,
+      "step": 438
+    },
+    {
+      "epoch": 0.04157000142038729,
+      "grad_norm": 0.9076157808303833,
+      "learning_rate": 3.932244738840379e-06,
+      "loss": 1.4042,
+      "step": 439
+    },
+    {
+      "epoch": 0.041664693906538514,
+      "grad_norm": 0.8434134721755981,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 1.3226,
+      "step": 440
+    },
+    {
+      "epoch": 0.04175938639268974,
+      "grad_norm": 0.8772730231285095,
+      "learning_rate": 3.681780806244095e-06,
+      "loss": 1.48,
+      "step": 441
+    },
+    {
+      "epoch": 0.04185407887884096,
+      "grad_norm": 0.898636519908905,
+      "learning_rate": 3.5595223564037884e-06,
+      "loss": 1.3751,
+      "step": 442
+    },
+    {
+      "epoch": 0.041948771364992185,
+      "grad_norm": 0.9745778441429138,
+      "learning_rate": 3.4392532620598216e-06,
+      "loss": 1.3361,
+      "step": 443
+    },
+    {
+      "epoch": 0.04204346385114341,
+      "grad_norm": 0.9480960965156555,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 1.4406,
+      "step": 444
+    },
+    {
+      "epoch": 0.04213815633729463,
+      "grad_norm": 0.9620943665504456,
+      "learning_rate": 3.2047036621337236e-06,
+      "loss": 1.2771,
+      "step": 445
+    },
+    {
+      "epoch": 0.04223284882344586,
+      "grad_norm": 0.9345410466194153,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 1.4887,
+      "step": 446
+    },
+    {
+      "epoch": 0.04232754130959708,
+      "grad_norm": 1.3283085823059082,
+      "learning_rate": 2.978172195332263e-06,
+      "loss": 1.2433,
+      "step": 447
+    },
+    {
+      "epoch": 0.042422233795748304,
+      "grad_norm": 0.933115541934967,
+      "learning_rate": 2.8679254453910785e-06,
+      "loss": 1.2802,
+      "step": 448
+    },
+    {
+      "epoch": 0.04251692628189953,
+      "grad_norm": 1.051458716392517,
+      "learning_rate": 2.759697676656098e-06,
+      "loss": 1.2949,
+      "step": 449
+    },
+    {
+      "epoch": 0.04261161876805075,
+      "grad_norm": 0.9788049459457397,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 1.2153,
+      "step": 450
+    },
+    {
+      "epoch": 0.042706311254201976,
+      "grad_norm": 0.870006263256073,
+      "learning_rate": 2.549317540589308e-06,
+      "loss": 1.574,
+      "step": 451
+    },
+    {
+      "epoch": 0.0428010037403532,
+      "grad_norm": 0.9468023777008057,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.4798,
+      "step": 452
+    },
+    {
+      "epoch": 0.04289569622650443,
+      "grad_norm": 0.8150451183319092,
+      "learning_rate": 2.3470678346851518e-06,
+      "loss": 1.396,
+      "step": 453
+    },
+    {
+      "epoch": 0.042990388712655654,
+      "grad_norm": 0.8766146302223206,
+      "learning_rate": 2.2490027771406687e-06,
+      "loss": 1.6015,
+      "step": 454
+    },
+    {
+      "epoch": 0.04308508119880688,
+      "grad_norm": 1.408542275428772,
+      "learning_rate": 2.152983213389559e-06,
+      "loss": 1.4643,
+      "step": 455
+    },
+    {
+      "epoch": 0.0431797736849581,
+      "grad_norm": 0.8867570757865906,
+      "learning_rate": 2.0590132565903476e-06,
+      "loss": 1.451,
+      "step": 456
+    },
+    {
+      "epoch": 0.043274466171109326,
+      "grad_norm": 1.105926275253296,
+      "learning_rate": 1.9670969321032407e-06,
+      "loss": 1.3688,
+      "step": 457
+    },
+    {
+      "epoch": 0.04336915865726055,
+      "grad_norm": 0.8873040080070496,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 1.4257,
+      "step": 458
+    },
+    {
+      "epoch": 0.04346385114341177,
+      "grad_norm": 1.0809788703918457,
+      "learning_rate": 1.7894408414835362e-06,
+      "loss": 1.3992,
+      "step": 459
+    },
+    {
+      "epoch": 0.043558543629563,
+      "grad_norm": 0.8864390850067139,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.3402,
+      "step": 460
+    },
+    {
+      "epoch": 0.04365323611571422,
+      "grad_norm": 0.9017770290374756,
+      "learning_rate": 1.620045381987012e-06,
+      "loss": 1.3997,
+      "step": 461
+    },
+    {
+      "epoch": 0.043747928601865445,
+      "grad_norm": 0.8999272584915161,
+      "learning_rate": 1.5384545146622852e-06,
+      "loss": 1.3638,
+      "step": 462
+    },
+    {
+      "epoch": 0.04384262108801667,
+      "grad_norm": 0.9070709943771362,
+      "learning_rate": 1.4589395786535953e-06,
+      "loss": 1.4147,
+      "step": 463
+    },
+    {
+      "epoch": 0.04393731357416789,
+      "grad_norm": 1.0017914772033691,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 1.3714,
+      "step": 464
+    },
+    {
+      "epoch": 0.044032006060319116,
+      "grad_norm": 0.9015138745307922,
+      "learning_rate": 1.3061510361333185e-06,
+      "loss": 1.49,
+      "step": 465
+    },
+    {
+      "epoch": 0.04412669854647034,
+      "grad_norm": 0.8739427328109741,
+      "learning_rate": 1.232883974574367e-06,
+      "loss": 1.4328,
+      "step": 466
+    },
+    {
+      "epoch": 0.044221391032621564,
+      "grad_norm": 0.9068736433982849,
+      "learning_rate": 1.1617059339563807e-06,
+      "loss": 1.5588,
+      "step": 467
+    },
+    {
+      "epoch": 0.04431608351877279,
+      "grad_norm": 1.0159845352172852,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.3826,
+      "step": 468
+    },
+    {
+      "epoch": 0.04441077600492401,
+      "grad_norm": 1.0707426071166992,
+      "learning_rate": 1.0256290220474307e-06,
+      "loss": 1.5956,
+      "step": 469
+    },
+    {
+      "epoch": 0.044505468491075235,
+      "grad_norm": 0.9067130088806152,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 1.4824,
+      "step": 470
+    },
+    {
+      "epoch": 0.04460016097722646,
+      "grad_norm": 1.027878761291504,
+      "learning_rate": 8.979436164848088e-07,
+      "loss": 1.5338,
+      "step": 471
+    },
+    {
+      "epoch": 0.04469485346337768,
+      "grad_norm": 0.9116301536560059,
+      "learning_rate": 8.372546218022747e-07,
+      "loss": 1.3766,
+      "step": 472
+    },
+    {
+      "epoch": 0.04478954594952891,
+      "grad_norm": 1.078776478767395,
+      "learning_rate": 7.786715955054203e-07,
+      "loss": 1.5215,
+      "step": 473
+    },
+    {
+      "epoch": 0.04488423843568013,
+      "grad_norm": 0.9668835997581482,
+      "learning_rate": 7.221970470961125e-07,
+      "loss": 1.5595,
+      "step": 474
+    },
+    {
+      "epoch": 0.044978930921831355,
+      "grad_norm": 0.927127480506897,
+      "learning_rate": 6.678333957560512e-07,
+      "loss": 1.64,
+      "step": 475
+    },
+    {
+      "epoch": 0.04507362340798258,
+      "grad_norm": 0.9441102147102356,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 1.5857,
+      "step": 476
+    },
+    {
+      "epoch": 0.0451683158941338,
+      "grad_norm": 0.9293668270111084,
+      "learning_rate": 5.654480087916303e-07,
+      "loss": 1.5782,
+      "step": 477
+    },
+    {
+      "epoch": 0.045263008380285026,
+      "grad_norm": 1.0216238498687744,
+      "learning_rate": 5.174306590164879e-07,
+      "loss": 1.4718,
+      "step": 478
+    },
+    {
+      "epoch": 0.04535770086643625,
+      "grad_norm": 0.7744228839874268,
+      "learning_rate": 4.715329778211375e-07,
+      "loss": 1.1542,
+      "step": 479
+    },
+    {
+      "epoch": 0.045452393352587474,
+      "grad_norm": 0.8553738594055176,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 1.527,
+      "step": 480
+    },
+    {
+      "epoch": 0.0455470858387387,
+      "grad_norm": 0.8071620464324951,
+      "learning_rate": 3.8610439470164737e-07,
+      "loss": 1.3992,
+      "step": 481
+    },
+    {
+      "epoch": 0.04564177832488992,
+      "grad_norm": 0.9688414931297302,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 1.6968,
+      "step": 482
+    },
+    {
+      "epoch": 0.045736470811041145,
+      "grad_norm": 1.2488914728164673,
+      "learning_rate": 3.09176897181096e-07,
+      "loss": 1.5449,
+      "step": 483
+    },
+    {
+      "epoch": 0.04583116329719237,
+      "grad_norm": 0.8446553349494934,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.3165,
+      "step": 484
+    },
+    {
+      "epoch": 0.04592585578334359,
+      "grad_norm": 1.0321190357208252,
+      "learning_rate": 2.407636663901591e-07,
+      "loss": 1.6571,
+      "step": 485
+    },
+    {
+      "epoch": 0.04602054826949482,
+      "grad_norm": 0.8929604887962341,
+      "learning_rate": 2.0975362126691712e-07,
+      "loss": 1.4237,
+      "step": 486
+    },
+    {
+      "epoch": 0.04611524075564604,
+      "grad_norm": 1.0071064233779907,
+      "learning_rate": 1.8087642458373134e-07,
+      "loss": 1.4537,
+      "step": 487
+    },
+    {
+      "epoch": 0.046209933241797264,
+      "grad_norm": 0.947636604309082,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 1.4466,
+      "step": 488
+    },
+    {
+      "epoch": 0.04630462572794849,
+      "grad_norm": 0.9839028716087341,
+      "learning_rate": 1.2952543313240472e-07,
+      "loss": 1.5749,
+      "step": 489
+    },
+    {
+      "epoch": 0.04639931821409971,
+      "grad_norm": 0.9297147989273071,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 1.5478,
+      "step": 490
+    },
+    {
+      "epoch": 0.046494010700250936,
+      "grad_norm": 0.9294151663780212,
+      "learning_rate": 8.671949076420882e-08,
+      "loss": 1.4369,
+      "step": 491
+    },
+    {
+      "epoch": 0.04658870318640216,
+      "grad_norm": 1.0332527160644531,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 1.461,
+      "step": 492
+    },
+    {
+      "epoch": 0.046683395672553384,
+      "grad_norm": 1.4196107387542725,
+      "learning_rate": 5.246593205699424e-08,
+      "loss": 1.4577,
+      "step": 493
+    },
+    {
+      "epoch": 0.04677808815870461,
+      "grad_norm": 0.8845897912979126,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 1.1767,
+      "step": 494
+    },
+    {
+      "epoch": 0.04687278064485583,
+      "grad_norm": 1.006624698638916,
+      "learning_rate": 2.6770626181715773e-08,
+      "loss": 1.4136,
+      "step": 495
+    },
+    {
+      "epoch": 0.046967473131007055,
+      "grad_norm": 0.8443537950515747,
+      "learning_rate": 1.7133751222137007e-08,
+      "loss": 1.0819,
+      "step": 496
+    },
+    {
+      "epoch": 0.04706216561715828,
+      "grad_norm": 0.9303872585296631,
+      "learning_rate": 9.637975896759077e-09,
+      "loss": 1.1837,
+      "step": 497
+    },
+    {
+      "epoch": 0.0471568581033095,
+      "grad_norm": 0.9849241375923157,
+      "learning_rate": 4.2836212996499865e-09,
+      "loss": 1.2588,
+      "step": 498
+    },
+    {
+      "epoch": 0.04725155058946073,
+      "grad_norm": 0.9961531162261963,
+      "learning_rate": 1.0709167935385455e-09,
+      "loss": 1.0031,
+      "step": 499
+    },
+    {
+      "epoch": 0.04734624307561195,
+      "grad_norm": 1.51860511302948,
+      "learning_rate": 0.0,
+      "loss": 1.391,
+      "step": 500
+    },
+    {
+      "epoch": 0.04734624307561195,
+      "eval_loss": 1.4300745725631714,
+      "eval_runtime": 829.4124,
+      "eval_samples_per_second": 21.445,
+      "eval_steps_per_second": 5.362,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.84541717577728e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null