Mistral-7B-v0.3-Middo-Wizard / trainer_state.json
Word2Li's picture
Upload model
0add802 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 349,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014352350197344816,
"grad_norm": 11.071812629699707,
"learning_rate": 3.6363636363636366e-06,
"loss": 0.7414,
"step": 5
},
{
"epoch": 0.02870470039468963,
"grad_norm": 7.1317901611328125,
"learning_rate": 8.181818181818183e-06,
"loss": 0.7038,
"step": 10
},
{
"epoch": 0.04305705059203445,
"grad_norm": 4.095168113708496,
"learning_rate": 9.998056338091415e-06,
"loss": 0.7009,
"step": 15
},
{
"epoch": 0.05740940078937926,
"grad_norm": 2.938896894454956,
"learning_rate": 9.986183876164412e-06,
"loss": 0.6751,
"step": 20
},
{
"epoch": 0.07176175098672408,
"grad_norm": 3.5328586101531982,
"learning_rate": 9.96354437049027e-06,
"loss": 0.6547,
"step": 25
},
{
"epoch": 0.0861141011840689,
"grad_norm": 2.831934690475464,
"learning_rate": 9.930186708264902e-06,
"loss": 0.6616,
"step": 30
},
{
"epoch": 0.1004664513814137,
"grad_norm": 2.707149028778076,
"learning_rate": 9.88618292120984e-06,
"loss": 0.6828,
"step": 35
},
{
"epoch": 0.11481880157875853,
"grad_norm": 2.4383013248443604,
"learning_rate": 9.831628030028698e-06,
"loss": 0.6486,
"step": 40
},
{
"epoch": 0.12917115177610333,
"grad_norm": 2.7486937046051025,
"learning_rate": 9.76663983922178e-06,
"loss": 0.6592,
"step": 45
},
{
"epoch": 0.14352350197344815,
"grad_norm": 2.4958226680755615,
"learning_rate": 9.691358682701927e-06,
"loss": 0.6616,
"step": 50
},
{
"epoch": 0.15787585217079297,
"grad_norm": 2.9137589931488037,
"learning_rate": 9.605947120760878e-06,
"loss": 0.6479,
"step": 55
},
{
"epoch": 0.1722282023681378,
"grad_norm": 2.6844582557678223,
"learning_rate": 9.510589589040554e-06,
"loss": 0.6677,
"step": 60
},
{
"epoch": 0.1865805525654826,
"grad_norm": 2.127028703689575,
"learning_rate": 9.405492000267228e-06,
"loss": 0.6507,
"step": 65
},
{
"epoch": 0.2009329027628274,
"grad_norm": 2.487703800201416,
"learning_rate": 9.29088129960862e-06,
"loss": 0.6615,
"step": 70
},
{
"epoch": 0.21528525296017223,
"grad_norm": 2.5370140075683594,
"learning_rate": 9.16700497461403e-06,
"loss": 0.6522,
"step": 75
},
{
"epoch": 0.22963760315751705,
"grad_norm": 2.203188896179199,
"learning_rate": 9.034130520795774e-06,
"loss": 0.6521,
"step": 80
},
{
"epoch": 0.24398995335486187,
"grad_norm": 2.402724504470825,
"learning_rate": 8.892544864005899e-06,
"loss": 0.6496,
"step": 85
},
{
"epoch": 0.25834230355220666,
"grad_norm": 2.3624093532562256,
"learning_rate": 8.742553740855507e-06,
"loss": 0.6378,
"step": 90
},
{
"epoch": 0.2726946537495515,
"grad_norm": 2.36360239982605,
"learning_rate": 8.584481038514573e-06,
"loss": 0.6422,
"step": 95
},
{
"epoch": 0.2870470039468963,
"grad_norm": 2.0742623805999756,
"learning_rate": 8.418668095317912e-06,
"loss": 0.6441,
"step": 100
},
{
"epoch": 0.3013993541442411,
"grad_norm": 2.300394058227539,
"learning_rate": 8.245472963687484e-06,
"loss": 0.6333,
"step": 105
},
{
"epoch": 0.31575170434158595,
"grad_norm": 2.1633546352386475,
"learning_rate": 8.065269636962765e-06,
"loss": 0.6393,
"step": 110
},
{
"epoch": 0.33010405453893077,
"grad_norm": 2.291600227355957,
"learning_rate": 7.878447241808634e-06,
"loss": 0.647,
"step": 115
},
{
"epoch": 0.3444564047362756,
"grad_norm": 2.0962560176849365,
"learning_rate": 7.685409197944768e-06,
"loss": 0.6226,
"step": 120
},
{
"epoch": 0.35880875493362036,
"grad_norm": 2.4263839721679688,
"learning_rate": 7.486572347010937e-06,
"loss": 0.6442,
"step": 125
},
{
"epoch": 0.3731611051309652,
"grad_norm": 2.1769137382507324,
"learning_rate": 7.282366052449351e-06,
"loss": 0.6434,
"step": 130
},
{
"epoch": 0.38751345532831,
"grad_norm": 2.181265115737915,
"learning_rate": 7.073231272347714e-06,
"loss": 0.628,
"step": 135
},
{
"epoch": 0.4018658055256548,
"grad_norm": 2.1604084968566895,
"learning_rate": 6.859619607245102e-06,
"loss": 0.6349,
"step": 140
},
{
"epoch": 0.41621815572299964,
"grad_norm": 2.0021893978118896,
"learning_rate": 6.641992324956776e-06,
"loss": 0.6159,
"step": 145
},
{
"epoch": 0.43057050592034446,
"grad_norm": 2.3016304969787598,
"learning_rate": 6.4208193645237314e-06,
"loss": 0.6397,
"step": 150
},
{
"epoch": 0.4449228561176893,
"grad_norm": 2.1002519130706787,
"learning_rate": 6.1965783214377895e-06,
"loss": 0.6268,
"step": 155
},
{
"epoch": 0.4592752063150341,
"grad_norm": 4.018091201782227,
"learning_rate": 5.9697534163335645e-06,
"loss": 0.6302,
"step": 160
},
{
"epoch": 0.4736275565123789,
"grad_norm": 1.9025262594223022,
"learning_rate": 5.740834449374237e-06,
"loss": 0.6163,
"step": 165
},
{
"epoch": 0.48797990670972374,
"grad_norm": 1.9070066213607788,
"learning_rate": 5.510315742589042e-06,
"loss": 0.6243,
"step": 170
},
{
"epoch": 0.5023322569070685,
"grad_norm": 2.002209186553955,
"learning_rate": 5.278695072446342e-06,
"loss": 0.6245,
"step": 175
},
{
"epoch": 0.5166846071044133,
"grad_norm": 1.9884896278381348,
"learning_rate": 5.046472594967279e-06,
"loss": 0.607,
"step": 180
},
{
"epoch": 0.5310369573017582,
"grad_norm": 2.062976837158203,
"learning_rate": 4.814149765701059e-06,
"loss": 0.609,
"step": 185
},
{
"epoch": 0.545389307499103,
"grad_norm": 1.9803415536880493,
"learning_rate": 4.582228256894093e-06,
"loss": 0.6119,
"step": 190
},
{
"epoch": 0.5597416576964478,
"grad_norm": 1.8966432809829712,
"learning_rate": 4.351208874191192e-06,
"loss": 0.6291,
"step": 195
},
{
"epoch": 0.5740940078937926,
"grad_norm": 2.038236618041992,
"learning_rate": 4.121590475208071e-06,
"loss": 0.6255,
"step": 200
},
{
"epoch": 0.5884463580911374,
"grad_norm": 2.072604179382324,
"learning_rate": 3.8938688923104015e-06,
"loss": 0.6269,
"step": 205
},
{
"epoch": 0.6027987082884823,
"grad_norm": 2.125389814376831,
"learning_rate": 3.668535861925509e-06,
"loss": 0.6278,
"step": 210
},
{
"epoch": 0.6171510584858271,
"grad_norm": 2.0525448322296143,
"learning_rate": 3.4460779626987186e-06,
"loss": 0.6,
"step": 215
},
{
"epoch": 0.6315034086831719,
"grad_norm": 1.934063196182251,
"learning_rate": 3.226975564787322e-06,
"loss": 0.5925,
"step": 220
},
{
"epoch": 0.6458557588805167,
"grad_norm": 1.8446972370147705,
"learning_rate": 3.0117017925609802e-06,
"loss": 0.6035,
"step": 225
},
{
"epoch": 0.6602081090778615,
"grad_norm": 2.0047950744628906,
"learning_rate": 2.800721502948506e-06,
"loss": 0.6022,
"step": 230
},
{
"epoch": 0.6745604592752064,
"grad_norm": 1.9434928894042969,
"learning_rate": 2.5944902816371573e-06,
"loss": 0.5966,
"step": 235
},
{
"epoch": 0.6889128094725512,
"grad_norm": 1.9444347620010376,
"learning_rate": 2.3934534592920416e-06,
"loss": 0.5839,
"step": 240
},
{
"epoch": 0.703265159669896,
"grad_norm": 1.9846431016921997,
"learning_rate": 2.1980451499199262e-06,
"loss": 0.6063,
"step": 245
},
{
"epoch": 0.7176175098672407,
"grad_norm": 1.8537969589233398,
"learning_rate": 2.0086873134540626e-06,
"loss": 0.6068,
"step": 250
},
{
"epoch": 0.7319698600645855,
"grad_norm": 1.9193341732025146,
"learning_rate": 1.8257888445842026e-06,
"loss": 0.5948,
"step": 255
},
{
"epoch": 0.7463222102619304,
"grad_norm": 1.9606044292449951,
"learning_rate": 1.6497446897993885e-06,
"loss": 0.5932,
"step": 260
},
{
"epoch": 0.7606745604592752,
"grad_norm": 1.9930768013000488,
"learning_rate": 1.4809349945501422e-06,
"loss": 0.5998,
"step": 265
},
{
"epoch": 0.77502691065662,
"grad_norm": 1.9653593301773071,
"learning_rate": 1.319724282371664e-06,
"loss": 0.6135,
"step": 270
},
{
"epoch": 0.7893792608539648,
"grad_norm": 1.7707090377807617,
"learning_rate": 1.1664606677406025e-06,
"loss": 0.6069,
"step": 275
},
{
"epoch": 0.8037316110513096,
"grad_norm": 1.9296361207962036,
"learning_rate": 1.0214751043651582e-06,
"loss": 0.5974,
"step": 280
},
{
"epoch": 0.8180839612486545,
"grad_norm": 1.89194917678833,
"learning_rate": 8.850806705317183e-07,
"loss": 0.5973,
"step": 285
},
{
"epoch": 0.8324363114459993,
"grad_norm": 1.991350769996643,
"learning_rate": 7.575718930512516e-07,
"loss": 0.5867,
"step": 290
},
{
"epoch": 0.8467886616433441,
"grad_norm": 1.789543867111206,
"learning_rate": 6.392241112653031e-07,
"loss": 0.61,
"step": 295
},
{
"epoch": 0.8611410118406889,
"grad_norm": 1.8926385641098022,
"learning_rate": 5.302928824849335e-07,
"loss": 0.6084,
"step": 300
},
{
"epoch": 0.8754933620380337,
"grad_norm": 1.8285890817642212,
"learning_rate": 4.3101343014651356e-07,
"loss": 0.599,
"step": 305
},
{
"epoch": 0.8898457122353786,
"grad_norm": 1.9200646877288818,
"learning_rate": 3.416001358759635e-07,
"loss": 0.5911,
"step": 310
},
{
"epoch": 0.9041980624327234,
"grad_norm": 1.8193855285644531,
"learning_rate": 2.6224607655831236e-07,
"loss": 0.5886,
"step": 315
},
{
"epoch": 0.9185504126300682,
"grad_norm": 1.7819342613220215,
"learning_rate": 1.9312260741218114e-07,
"loss": 0.5917,
"step": 320
},
{
"epoch": 0.932902762827413,
"grad_norm": 1.8590822219848633,
"learning_rate": 1.3437899196950765e-07,
"loss": 0.5799,
"step": 325
},
{
"epoch": 0.9472551130247578,
"grad_norm": 1.7508701086044312,
"learning_rate": 8.614207975952083e-08,
"loss": 0.6015,
"step": 330
},
{
"epoch": 0.9616074632221027,
"grad_norm": 1.8058841228485107,
"learning_rate": 4.851603239296065e-08,
"loss": 0.5848,
"step": 335
},
{
"epoch": 0.9759598134194475,
"grad_norm": 1.788930892944336,
"learning_rate": 2.158209863804217e-08,
"loss": 0.5879,
"step": 340
},
{
"epoch": 0.9903121636167922,
"grad_norm": 1.9363088607788086,
"learning_rate": 5.398438973845954e-09,
"loss": 0.5836,
"step": 345
}
],
"logging_steps": 5,
"max_steps": 349,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.871785990877872e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}