Sauron0019's picture
Upload 20 files
b3e4ac8 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.9809417040358746,
"eval_steps": 500,
"global_step": 712,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14013452914798205,
"grad_norm": 14.509355545043945,
"learning_rate": 3.2000000000000003e-06,
"loss": 7.1534,
"num_input_tokens_seen": 318240,
"step": 25
},
{
"epoch": 0.2802690582959641,
"grad_norm": 6.642310619354248,
"learning_rate": 6.533333333333334e-06,
"loss": 1.9472,
"num_input_tokens_seen": 636760,
"step": 50
},
{
"epoch": 0.4204035874439462,
"grad_norm": 3.3247640132904053,
"learning_rate": 9.866666666666668e-06,
"loss": 1.5046,
"num_input_tokens_seen": 956520,
"step": 75
},
{
"epoch": 0.5605381165919282,
"grad_norm": 2.3405237197875977,
"learning_rate": 1.3200000000000002e-05,
"loss": 1.3971,
"num_input_tokens_seen": 1275920,
"step": 100
},
{
"epoch": 0.7006726457399103,
"grad_norm": 3.028830051422119,
"learning_rate": 1.6533333333333333e-05,
"loss": 1.3768,
"num_input_tokens_seen": 1595280,
"step": 125
},
{
"epoch": 0.8408071748878924,
"grad_norm": 1.428653597831726,
"learning_rate": 1.9866666666666667e-05,
"loss": 1.3278,
"num_input_tokens_seen": 1914720,
"step": 150
},
{
"epoch": 0.9809417040358744,
"grad_norm": 1.9624038934707642,
"learning_rate": 1.9910139651840497e-05,
"loss": 1.2927,
"num_input_tokens_seen": 2233680,
"step": 175
},
{
"epoch": 1.1177130044843049,
"grad_norm": 1.6478056907653809,
"learning_rate": 1.962720313575358e-05,
"loss": 1.2029,
"num_input_tokens_seen": 2544928,
"step": 200
},
{
"epoch": 1.257847533632287,
"grad_norm": 2.1640427112579346,
"learning_rate": 1.915655103523529e-05,
"loss": 1.2199,
"num_input_tokens_seen": 2864008,
"step": 225
},
{
"epoch": 1.397982062780269,
"grad_norm": 2.563964605331421,
"learning_rate": 1.8507360338956896e-05,
"loss": 1.2112,
"num_input_tokens_seen": 3183248,
"step": 250
},
{
"epoch": 1.5381165919282511,
"grad_norm": 1.9103755950927734,
"learning_rate": 1.7692289262315e-05,
"loss": 1.221,
"num_input_tokens_seen": 3502528,
"step": 275
},
{
"epoch": 1.6782511210762332,
"grad_norm": 2.564152240753174,
"learning_rate": 1.6727230431791816e-05,
"loss": 1.1758,
"num_input_tokens_seen": 3821648,
"step": 300
},
{
"epoch": 1.8183856502242153,
"grad_norm": 1.57261061668396,
"learning_rate": 1.563100100329731e-05,
"loss": 1.1828,
"num_input_tokens_seen": 4140008,
"step": 325
},
{
"epoch": 1.9585201793721974,
"grad_norm": 1.8220309019088745,
"learning_rate": 1.442497575670668e-05,
"loss": 1.1948,
"num_input_tokens_seen": 4458448,
"step": 350
},
{
"epoch": 2.0952914798206277,
"grad_norm": 2.028656005859375,
"learning_rate": 1.313267032068285e-05,
"loss": 1.07,
"num_input_tokens_seen": 4770136,
"step": 375
},
{
"epoch": 2.2354260089686098,
"grad_norm": 3.713899850845337,
"learning_rate": 1.1779282654255685e-05,
"loss": 1.0581,
"num_input_tokens_seen": 5087896,
"step": 400
},
{
"epoch": 2.375560538116592,
"grad_norm": 2.000035285949707,
"learning_rate": 1.0391201725558842e-05,
"loss": 1.048,
"num_input_tokens_seen": 5407296,
"step": 425
},
{
"epoch": 2.515695067264574,
"grad_norm": 1.5742937326431274,
"learning_rate": 8.99549296772945e-06,
"loss": 1.0218,
"num_input_tokens_seen": 5726896,
"step": 450
},
{
"epoch": 2.655829596412556,
"grad_norm": 1.7964842319488525,
"learning_rate": 7.619370544785608e-06,
"loss": 1.0206,
"num_input_tokens_seen": 6046856,
"step": 475
},
{
"epoch": 2.795964125560538,
"grad_norm": 1.5667575597763062,
"learning_rate": 6.289666717481497e-06,
"loss": 1.0277,
"num_input_tokens_seen": 6366216,
"step": 500
},
{
"epoch": 2.93609865470852,
"grad_norm": 1.8910105228424072,
"learning_rate": 5.032308655686011e-06,
"loss": 1.0494,
"num_input_tokens_seen": 6685296,
"step": 525
},
{
"epoch": 3.0728699551569507,
"grad_norm": 1.1843669414520264,
"learning_rate": 3.8718128986350154e-06,
"loss": 0.9487,
"num_input_tokens_seen": 6996704,
"step": 550
},
{
"epoch": 3.213004484304933,
"grad_norm": 1.5657708644866943,
"learning_rate": 2.8308073203011667e-06,
"loss": 0.9103,
"num_input_tokens_seen": 7316144,
"step": 575
},
{
"epoch": 3.353139013452915,
"grad_norm": 1.8331918716430664,
"learning_rate": 1.929589920817806e-06,
"loss": 0.9102,
"num_input_tokens_seen": 7635704,
"step": 600
},
{
"epoch": 3.493273542600897,
"grad_norm": 1.651666283607483,
"learning_rate": 1.1857330468424466e-06,
"loss": 0.9034,
"num_input_tokens_seen": 7955304,
"step": 625
},
{
"epoch": 3.633408071748879,
"grad_norm": 1.1031330823898315,
"learning_rate": 6.137407579511212e-07,
"loss": 0.8949,
"num_input_tokens_seen": 8274744,
"step": 650
},
{
"epoch": 3.773542600896861,
"grad_norm": 3.233630895614624,
"learning_rate": 2.2476601988947965e-07,
"loss": 0.9073,
"num_input_tokens_seen": 8594304,
"step": 675
},
{
"epoch": 3.913677130044843,
"grad_norm": 1.2947144508361816,
"learning_rate": 2.639323897518975e-08,
"loss": 0.8967,
"num_input_tokens_seen": 8913944,
"step": 700
},
{
"epoch": 3.9809417040358746,
"num_input_tokens_seen": 9067184,
"step": 712,
"total_flos": 6.151178707859082e+17,
"train_loss": 1.3522842801019046,
"train_runtime": 6892.301,
"train_samples_per_second": 2.588,
"train_steps_per_second": 0.103
}
],
"logging_steps": 25,
"max_steps": 712,
"num_input_tokens_seen": 9067184,
"num_train_epochs": 4,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.151178707859082e+17,
"train_batch_size": 5,
"trial_name": null,
"trial_params": null
}