Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +721 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32de89f64bfbdf5257adb9297417907f059af3bb9a4f224da336aef2a6b7bfa8
 size 9864

 version https://git-lfs.github.com/spec/v1
+oid sha256:267408b82b60dab4b44412e9284bc1f5648d09dec0c8f209b3cf6c1f8a9f63c8
 size 9864

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:206339865b007a0db7499757f0cc0552961328fa94fd512a48b5732e4bb1ebcb
 size 24006

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c7aed16210dbfe7d4bef5bb06de34a5b5301fc935c146c1b94fd2dbb39fdcb7
 size 24006

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bca983b309063a996168bc9ba0246dee10aad731d5eafae85ac843af75455c4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee42b226fbd651b63bde6fca880bc3d2b1843f2955205d066e9bd7809f09c0a9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e9a495185b30e410553401cbf647ae58e45b1f7a5b4cfd1421665ad738e6aa1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c1efa6588a3e275e4071dc95251b6e117e499882aa80f6f8f82ea2ac95bdaef
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.20717337822089862,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,722 @@
       "eval_samples_per_second": 238.303,
       "eval_steps_per_second": 59.649,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -745,7 +1461,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -754,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 31916870860800.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.41434675644179725,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 238.303,
       "eval_steps_per_second": 59.649,
       "step": 100
+    },
+    {
+      "epoch": 0.2092451120031076,
+      "grad_norm": 0.009546121582388878,
+      "learning_rate": 0.00018228387237361244,
+      "loss": 184.0,
+      "step": 101
+    },
+    {
+      "epoch": 0.2113168457853166,
+      "grad_norm": 0.010459087789058685,
+      "learning_rate": 0.00018190462079662896,
+      "loss": 184.0,
+      "step": 102
+    },
+    {
+      "epoch": 0.21338857956752558,
+      "grad_norm": 0.009236454963684082,
+      "learning_rate": 0.00018152175608395814,
+      "loss": 184.0,
+      "step": 103
+    },
+    {
+      "epoch": 0.21546031334973456,
+      "grad_norm": 0.009751472622156143,
+      "learning_rate": 0.0001811352951252717,
+      "loss": 184.0,
+      "step": 104
+    },
+    {
+      "epoch": 0.21753204713194355,
+      "grad_norm": 0.01094972062855959,
+      "learning_rate": 0.0001807452549688859,
+      "loss": 184.0,
+      "step": 105
+    },
+    {
+      "epoch": 0.21960378091415253,
+      "grad_norm": 0.009608612395823002,
+      "learning_rate": 0.0001803516528210096,
+      "loss": 184.0,
+      "step": 106
+    },
+    {
+      "epoch": 0.22167551469636151,
+      "grad_norm": 0.011158065870404243,
+      "learning_rate": 0.00017995450604498512,
+      "loss": 184.0,
+      "step": 107
+    },
+    {
+      "epoch": 0.2237472484785705,
+      "grad_norm": 0.011936160735785961,
+      "learning_rate": 0.0001795538321605222,
+      "loss": 184.0,
+      "step": 108
+    },
+    {
+      "epoch": 0.22581898226077948,
+      "grad_norm": 0.01062457449734211,
+      "learning_rate": 0.00017914964884292544,
+      "loss": 184.0,
+      "step": 109
+    },
+    {
+      "epoch": 0.22789071604298847,
+      "grad_norm": 0.010308627970516682,
+      "learning_rate": 0.00017874197392231414,
+      "loss": 184.0,
+      "step": 110
+    },
+    {
+      "epoch": 0.22996244982519745,
+      "grad_norm": 0.009443109855055809,
+      "learning_rate": 0.00017833082538283614,
+      "loss": 184.0,
+      "step": 111
+    },
+    {
+      "epoch": 0.23203418360740644,
+      "grad_norm": 0.012800839729607105,
+      "learning_rate": 0.00017791622136187422,
+      "loss": 184.0,
+      "step": 112
+    },
+    {
+      "epoch": 0.23410591738961545,
+      "grad_norm": 0.013194529339671135,
+      "learning_rate": 0.0001774981801492461,
+      "loss": 184.0,
+      "step": 113
+    },
+    {
+      "epoch": 0.23617765117182443,
+      "grad_norm": 0.013125723227858543,
+      "learning_rate": 0.00017707672018639758,
+      "loss": 184.0,
+      "step": 114
+    },
+    {
+      "epoch": 0.23824938495403342,
+      "grad_norm": 0.011432023718953133,
+      "learning_rate": 0.000176651860065589,
+      "loss": 184.0,
+      "step": 115
+    },
+    {
+      "epoch": 0.2403211187362424,
+      "grad_norm": 0.00989463273435831,
+      "learning_rate": 0.00017622361852907505,
+      "loss": 184.0,
+      "step": 116
+    },
+    {
+      "epoch": 0.24239285251845138,
+      "grad_norm": 0.01200682483613491,
+      "learning_rate": 0.000175792014468278,
+      "loss": 184.0,
+      "step": 117
+    },
+    {
+      "epoch": 0.24446458630066037,
+      "grad_norm": 0.010150207206606865,
+      "learning_rate": 0.00017535706692295436,
+      "loss": 184.0,
+      "step": 118
+    },
+    {
+      "epoch": 0.24653632008286935,
+      "grad_norm": 0.012173088267445564,
+      "learning_rate": 0.0001749187950803549,
+      "loss": 184.0,
+      "step": 119
+    },
+    {
+      "epoch": 0.24860805386507834,
+      "grad_norm": 0.012775926850736141,
+      "learning_rate": 0.0001744772182743782,
+      "loss": 184.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.2506797876472873,
+      "grad_norm": 0.01158383209258318,
+      "learning_rate": 0.0001740323559847179,
+      "loss": 184.0,
+      "step": 121
+    },
+    {
+      "epoch": 0.2527515214294963,
+      "grad_norm": 0.01167603861540556,
+      "learning_rate": 0.0001735842278360032,
+      "loss": 184.0,
+      "step": 122
+    },
+    {
+      "epoch": 0.2548232552117053,
+      "grad_norm": 0.012568404898047447,
+      "learning_rate": 0.0001731328535969332,
+      "loss": 184.0,
+      "step": 123
+    },
+    {
+      "epoch": 0.2568949889939143,
+      "grad_norm": 0.01397041417658329,
+      "learning_rate": 0.00017267825317940493,
+      "loss": 184.0,
+      "step": 124
+    },
+    {
+      "epoch": 0.25896672277612326,
+      "grad_norm": 0.011861158534884453,
+      "learning_rate": 0.00017222044663763484,
+      "loss": 184.0,
+      "step": 125
+    },
+    {
+      "epoch": 0.26103845655833224,
+      "grad_norm": 0.012289059348404408,
+      "learning_rate": 0.00017175945416727405,
+      "loss": 184.0,
+      "step": 126
+    },
+    {
+      "epoch": 0.26311019034054123,
+      "grad_norm": 0.013283212669193745,
+      "learning_rate": 0.00017129529610451774,
+      "loss": 184.0,
+      "step": 127
+    },
+    {
+      "epoch": 0.2651819241227502,
+      "grad_norm": 0.010840194299817085,
+      "learning_rate": 0.00017082799292520768,
+      "loss": 184.0,
+      "step": 128
+    },
+    {
+      "epoch": 0.2672536579049592,
+      "grad_norm": 0.01217116229236126,
+      "learning_rate": 0.00017035756524392924,
+      "loss": 184.0,
+      "step": 129
+    },
+    {
+      "epoch": 0.2693253916871682,
+      "grad_norm": 0.014737384393811226,
+      "learning_rate": 0.00016988403381310176,
+      "loss": 184.0,
+      "step": 130
+    },
+    {
+      "epoch": 0.27139712546937717,
+      "grad_norm": 0.013226029463112354,
+      "learning_rate": 0.0001694074195220634,
+      "loss": 184.0,
+      "step": 131
+    },
+    {
+      "epoch": 0.27346885925158615,
+      "grad_norm": 0.013064621016383171,
+      "learning_rate": 0.00016892774339614928,
+      "loss": 184.0,
+      "step": 132
+    },
+    {
+      "epoch": 0.27554059303379513,
+      "grad_norm": 0.013637942261993885,
+      "learning_rate": 0.00016844502659576414,
+      "loss": 184.0,
+      "step": 133
+    },
+    {
+      "epoch": 0.2776123268160041,
+      "grad_norm": 0.011332944966852665,
+      "learning_rate": 0.0001679592904154489,
+      "loss": 184.0,
+      "step": 134
+    },
+    {
+      "epoch": 0.2796840605982131,
+      "grad_norm": 0.014374660328030586,
+      "learning_rate": 0.00016747055628294134,
+      "loss": 184.0,
+      "step": 135
+    },
+    {
+      "epoch": 0.28175579438042214,
+      "grad_norm": 0.010553686879575253,
+      "learning_rate": 0.00016697884575823043,
+      "loss": 184.0,
+      "step": 136
+    },
+    {
+      "epoch": 0.2838275281626311,
+      "grad_norm": 0.01155412383377552,
+      "learning_rate": 0.00016648418053260585,
+      "loss": 184.0,
+      "step": 137
+    },
+    {
+      "epoch": 0.2858992619448401,
+      "grad_norm": 0.012205103412270546,
+      "learning_rate": 0.00016598658242770054,
+      "loss": 184.0,
+      "step": 138
+    },
+    {
+      "epoch": 0.2879709957270491,
+      "grad_norm": 0.013882125727832317,
+      "learning_rate": 0.00016548607339452853,
+      "loss": 184.0,
+      "step": 139
+    },
+    {
+      "epoch": 0.2900427295092581,
+      "grad_norm": 0.013000452890992165,
+      "learning_rate": 0.00016498267551251616,
+      "loss": 184.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.29211446329146706,
+      "grad_norm": 0.011942530982196331,
+      "learning_rate": 0.0001644764109885284,
+      "loss": 184.0,
+      "step": 141
+    },
+    {
+      "epoch": 0.29418619707367605,
+      "grad_norm": 0.013372701592743397,
+      "learning_rate": 0.00016396730215588915,
+      "loss": 184.0,
+      "step": 142
+    },
+    {
+      "epoch": 0.29625793085588503,
+      "grad_norm": 0.011299816891551018,
+      "learning_rate": 0.00016345537147339579,
+      "loss": 184.0,
+      "step": 143
+    },
+    {
+      "epoch": 0.298329664638094,
+      "grad_norm": 0.013057565316557884,
+      "learning_rate": 0.00016294064152432879,
+      "loss": 184.0,
+      "step": 144
+    },
+    {
+      "epoch": 0.300401398420303,
+      "grad_norm": 0.013032233342528343,
+      "learning_rate": 0.0001624231350154552,
+      "loss": 184.0,
+      "step": 145
+    },
+    {
+      "epoch": 0.302473132202512,
+      "grad_norm": 0.012773418799042702,
+      "learning_rate": 0.00016190287477602718,
+      "loss": 184.0,
+      "step": 146
+    },
+    {
+      "epoch": 0.30454486598472097,
+      "grad_norm": 0.013914387673139572,
+      "learning_rate": 0.00016137988375677467,
+      "loss": 184.0,
+      "step": 147
+    },
+    {
+      "epoch": 0.30661659976692995,
+      "grad_norm": 0.012964308261871338,
+      "learning_rate": 0.00016085418502889316,
+      "loss": 184.0,
+      "step": 148
+    },
+    {
+      "epoch": 0.30868833354913894,
+      "grad_norm": 0.012943675741553307,
+      "learning_rate": 0.00016032580178302583,
+      "loss": 184.0,
+      "step": 149
+    },
+    {
+      "epoch": 0.3107600673313479,
+      "grad_norm": 0.013705256395041943,
+      "learning_rate": 0.00015979475732824048,
+      "loss": 184.0,
+      "step": 150
+    },
+    {
+      "epoch": 0.3107600673313479,
+      "eval_loss": 11.5,
+      "eval_runtime": 6.845,
+      "eval_samples_per_second": 237.545,
+      "eval_steps_per_second": 59.459,
+      "step": 150
+    },
+    {
+      "epoch": 0.3128318011135569,
+      "grad_norm": 0.012948175892233849,
+      "learning_rate": 0.00015926107509100137,
+      "loss": 184.0,
+      "step": 151
+    },
+    {
+      "epoch": 0.3149035348957659,
+      "grad_norm": 0.012896777130663395,
+      "learning_rate": 0.00015872477861413576,
+      "loss": 184.0,
+      "step": 152
+    },
+    {
+      "epoch": 0.3169752686779749,
+      "grad_norm": 0.011514625512063503,
+      "learning_rate": 0.0001581858915557953,
+      "loss": 184.0,
+      "step": 153
+    },
+    {
+      "epoch": 0.31904700246018386,
+      "grad_norm": 0.011680962517857552,
+      "learning_rate": 0.00015764443768841234,
+      "loss": 184.0,
+      "step": 154
+    },
+    {
+      "epoch": 0.32111873624239284,
+      "grad_norm": 0.013176261447370052,
+      "learning_rate": 0.00015710044089765145,
+      "loss": 184.0,
+      "step": 155
+    },
+    {
+      "epoch": 0.32319047002460183,
+      "grad_norm": 0.011944272555410862,
+      "learning_rate": 0.00015655392518135539,
+      "loss": 184.0,
+      "step": 156
+    },
+    {
+      "epoch": 0.3252622038068108,
+      "grad_norm": 0.013128337450325489,
+      "learning_rate": 0.00015600491464848678,
+      "loss": 184.0,
+      "step": 157
+    },
+    {
+      "epoch": 0.3273339375890198,
+      "grad_norm": 0.010075918398797512,
+      "learning_rate": 0.00015545343351806444,
+      "loss": 184.0,
+      "step": 158
+    },
+    {
+      "epoch": 0.3294056713712288,
+      "grad_norm": 0.011055225506424904,
+      "learning_rate": 0.00015489950611809484,
+      "loss": 184.0,
+      "step": 159
+    },
+    {
+      "epoch": 0.33147740515343777,
+      "grad_norm": 0.012351332232356071,
+      "learning_rate": 0.00015434315688449924,
+      "loss": 184.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.33354913893564675,
+      "grad_norm": 0.011134196072816849,
+      "learning_rate": 0.0001537844103600354,
+      "loss": 184.0,
+      "step": 161
+    },
+    {
+      "epoch": 0.33562087271785573,
+      "grad_norm": 0.012073795311152935,
+      "learning_rate": 0.00015322329119321507,
+      "loss": 184.0,
+      "step": 162
+    },
+    {
+      "epoch": 0.3376926065000647,
+      "grad_norm": 0.012623626738786697,
+      "learning_rate": 0.00015265982413721662,
+      "loss": 184.0,
+      "step": 163
+    },
+    {
+      "epoch": 0.3397643402822737,
+      "grad_norm": 0.011703762225806713,
+      "learning_rate": 0.00015209403404879303,
+      "loss": 184.0,
+      "step": 164
+    },
+    {
+      "epoch": 0.3418360740644827,
+      "grad_norm": 0.011324395425617695,
+      "learning_rate": 0.00015152594588717543,
+      "loss": 184.0,
+      "step": 165
+    },
+    {
+      "epoch": 0.3439078078466917,
+      "grad_norm": 0.012606433592736721,
+      "learning_rate": 0.00015095558471297195,
+      "loss": 184.0,
+      "step": 166
+    },
+    {
+      "epoch": 0.3459795416289007,
+      "grad_norm": 0.013505402021110058,
+      "learning_rate": 0.00015038297568706243,
+      "loss": 184.0,
+      "step": 167
+    },
+    {
+      "epoch": 0.3480512754111097,
+      "grad_norm": 0.01219989825040102,
+      "learning_rate": 0.00014980814406948806,
+      "loss": 184.0,
+      "step": 168
+    },
+    {
+      "epoch": 0.3501230091933187,
+      "grad_norm": 0.01135861687362194,
+      "learning_rate": 0.00014923111521833758,
+      "loss": 184.0,
+      "step": 169
+    },
+    {
+      "epoch": 0.35219474297552766,
+      "grad_norm": 0.011894915252923965,
+      "learning_rate": 0.00014865191458862816,
+      "loss": 184.0,
+      "step": 170
+    },
+    {
+      "epoch": 0.35426647675773665,
+      "grad_norm": 0.011499403044581413,
+      "learning_rate": 0.00014807056773118274,
+      "loss": 184.0,
+      "step": 171
+    },
+    {
+      "epoch": 0.35633821053994563,
+      "grad_norm": 0.01057684887200594,
+      "learning_rate": 0.00014748710029150293,
+      "loss": 184.0,
+      "step": 172
+    },
+    {
+      "epoch": 0.3584099443221546,
+      "grad_norm": 0.010864908806979656,
+      "learning_rate": 0.0001469015380086374,
+      "loss": 184.0,
+      "step": 173
+    },
+    {
+      "epoch": 0.3604816781043636,
+      "grad_norm": 0.011844526045024395,
+      "learning_rate": 0.0001463139067140468,
+      "loss": 184.0,
+      "step": 174
+    },
+    {
+      "epoch": 0.3625534118865726,
+      "grad_norm": 0.012101615779101849,
+      "learning_rate": 0.00014572423233046386,
+      "loss": 184.0,
+      "step": 175
+    },
+    {
+      "epoch": 0.36462514566878157,
+      "grad_norm": 0.010460996069014072,
+      "learning_rate": 0.00014513254087075014,
+      "loss": 184.0,
+      "step": 176
+    },
+    {
+      "epoch": 0.36669687945099055,
+      "grad_norm": 0.011071057058870792,
+      "learning_rate": 0.00014453885843674838,
+      "loss": 184.0,
+      "step": 177
+    },
+    {
+      "epoch": 0.36876861323319954,
+      "grad_norm": 0.010831611230969429,
+      "learning_rate": 0.00014394321121813093,
+      "loss": 184.0,
+      "step": 178
+    },
+    {
+      "epoch": 0.3708403470154085,
+      "grad_norm": 0.012190107256174088,
+      "learning_rate": 0.00014334562549124467,
+      "loss": 184.0,
+      "step": 179
+    },
+    {
+      "epoch": 0.3729120807976175,
+      "grad_norm": 0.011053929105401039,
+      "learning_rate": 0.0001427461276179517,
+      "loss": 184.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.3749838145798265,
+      "grad_norm": 0.009843365289270878,
+      "learning_rate": 0.0001421447440444663,
+      "loss": 184.0,
+      "step": 181
+    },
+    {
+      "epoch": 0.3770555483620355,
+      "grad_norm": 0.011174674145877361,
+      "learning_rate": 0.00014154150130018866,
+      "loss": 184.0,
+      "step": 182
+    },
+    {
+      "epoch": 0.37912728214424446,
+      "grad_norm": 0.011116673238575459,
+      "learning_rate": 0.00014093642599653406,
+      "loss": 184.0,
+      "step": 183
+    },
+    {
+      "epoch": 0.38119901592645344,
+      "grad_norm": 0.010997000150382519,
+      "learning_rate": 0.00014032954482575937,
+      "loss": 184.0,
+      "step": 184
+    },
+    {
+      "epoch": 0.38327074970866243,
+      "grad_norm": 0.010498798452317715,
+      "learning_rate": 0.00013972088455978536,
+      "loss": 184.0,
+      "step": 185
+    },
+    {
+      "epoch": 0.3853424834908714,
+      "grad_norm": 0.011176199652254581,
+      "learning_rate": 0.0001391104720490156,
+      "loss": 184.0,
+      "step": 186
+    },
+    {
+      "epoch": 0.3874142172730804,
+      "grad_norm": 0.01269106101244688,
+      "learning_rate": 0.00013849833422115222,
+      "loss": 184.0,
+      "step": 187
+    },
+    {
+      "epoch": 0.3894859510552894,
+      "grad_norm": 0.010353796184062958,
+      "learning_rate": 0.0001378844980800078,
+      "loss": 184.0,
+      "step": 188
+    },
+    {
+      "epoch": 0.39155768483749837,
+      "grad_norm": 0.009553619660437107,
+      "learning_rate": 0.00013726899070431423,
+      "loss": 184.0,
+      "step": 189
+    },
+    {
+      "epoch": 0.39362941861970735,
+      "grad_norm": 0.010300654917955399,
+      "learning_rate": 0.00013665183924652815,
+      "loss": 184.0,
+      "step": 190
+    },
+    {
+      "epoch": 0.39570115240191633,
+      "grad_norm": 0.010640212334692478,
+      "learning_rate": 0.00013603307093163318,
+      "loss": 184.0,
+      "step": 191
+    },
+    {
+      "epoch": 0.3977728861841253,
+      "grad_norm": 0.00991272833198309,
+      "learning_rate": 0.00013541271305593877,
+      "loss": 184.0,
+      "step": 192
+    },
+    {
+      "epoch": 0.3998446199663343,
+      "grad_norm": 0.010035275481641293,
+      "learning_rate": 0.00013479079298587635,
+      "loss": 184.0,
+      "step": 193
+    },
+    {
+      "epoch": 0.4019163537485433,
+      "grad_norm": 0.010469825938344002,
+      "learning_rate": 0.00013416733815679166,
+      "loss": 184.0,
+      "step": 194
+    },
+    {
+      "epoch": 0.40398808753075227,
+      "grad_norm": 0.010510447435081005,
+      "learning_rate": 0.00013354237607173495,
+      "loss": 184.0,
+      "step": 195
+    },
+    {
+      "epoch": 0.4060598213129613,
+      "grad_norm": 0.011126340366899967,
+      "learning_rate": 0.00013291593430024727,
+      "loss": 184.0,
+      "step": 196
+    },
+    {
+      "epoch": 0.4081315550951703,
+      "grad_norm": 0.009824325330555439,
+      "learning_rate": 0.00013228804047714463,
+      "loss": 184.0,
+      "step": 197
+    },
+    {
+      "epoch": 0.4102032888773793,
+      "grad_norm": 0.009497965686023235,
+      "learning_rate": 0.00013165872230129868,
+      "loss": 184.0,
+      "step": 198
+    },
+    {
+      "epoch": 0.41227502265958826,
+      "grad_norm": 0.010396288707852364,
+      "learning_rate": 0.00013102800753441487,
+      "loss": 184.0,
+      "step": 199
+    },
+    {
+      "epoch": 0.41434675644179725,
+      "grad_norm": 0.0102333789691329,
+      "learning_rate": 0.00013039592399980785,
+      "loss": 184.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.41434675644179725,
+      "eval_loss": 11.5,
+      "eval_runtime": 6.8462,
+      "eval_samples_per_second": 237.505,
+      "eval_steps_per_second": 59.449,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 63833741721600.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null