|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5486284289276808, |
|
"eval_steps": 500, |
|
"global_step": 550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0113314447592068, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.5448, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0226628895184136, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.4594, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0339943342776204, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.16e-05, |
|
"loss": 0.3953, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0453257790368272, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.5600000000000003e-05, |
|
"loss": 0.3695, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.056657223796033995, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 0.3366, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0679886685552408, |
|
"grad_norm": 0.21484375, |
|
"learning_rate": 1.9998642592088543e-05, |
|
"loss": 0.3204, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07932011331444759, |
|
"grad_norm": 0.2060546875, |
|
"learning_rate": 1.9993950790937545e-05, |
|
"loss": 0.3231, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0906515580736544, |
|
"grad_norm": 0.2236328125, |
|
"learning_rate": 1.9985909410557404e-05, |
|
"loss": 0.3127, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.10198300283286119, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 1.9974521146102535e-05, |
|
"loss": 0.3033, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11331444759206799, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 1.9959789814471278e-05, |
|
"loss": 0.2907, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12464589235127478, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 1.9941720353026582e-05, |
|
"loss": 0.2941, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1359773371104816, |
|
"grad_norm": 0.2236328125, |
|
"learning_rate": 1.9920318817941234e-05, |
|
"loss": 0.2844, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14730878186968838, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 1.9895592382168036e-05, |
|
"loss": 0.2801, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15864022662889518, |
|
"grad_norm": 0.2294921875, |
|
"learning_rate": 1.986754933303574e-05, |
|
"loss": 0.2805, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16997167138810199, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 1.983619906947144e-05, |
|
"loss": 0.2706, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1813031161473088, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 1.980155209885043e-05, |
|
"loss": 0.2756, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19263456090651557, |
|
"grad_norm": 0.232421875, |
|
"learning_rate": 1.9763620033474552e-05, |
|
"loss": 0.2713, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.20396600566572237, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 1.9722415586680204e-05, |
|
"loss": 0.2675, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21529745042492918, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 1.9677952568577316e-05, |
|
"loss": 0.2574, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22662889518413598, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.9630245881420764e-05, |
|
"loss": 0.2636, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23796033994334279, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 1.957931151461572e-05, |
|
"loss": 0.2614, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.24929178470254956, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 1.9525166539358608e-05, |
|
"loss": 0.2548, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.26062322946175637, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 1.946782910291554e-05, |
|
"loss": 0.2532, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2719546742209632, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 1.9407318422540057e-05, |
|
"loss": 0.2545, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.28328611898017, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 1.9343654779032244e-05, |
|
"loss": 0.251, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2593516209476309, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 1.944186059309318e-05, |
|
"loss": 0.2362, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.26932668329177056, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.93877370638343e-05, |
|
"loss": 0.2377, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2793017456359102, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 1.9331191872025963e-05, |
|
"loss": 0.2346, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2892768079800499, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.927223960407727e-05, |
|
"loss": 0.233, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.29925187032418954, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 1.921089546732717e-05, |
|
"loss": 0.23, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3092269326683292, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.9147175286121577e-05, |
|
"loss": 0.2345, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3192019950124688, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 1.90810954977313e-05, |
|
"loss": 0.2325, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.32917705735660846, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 1.9012673148111908e-05, |
|
"loss": 0.2322, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.33915211970074816, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 1.8941925887506527e-05, |
|
"loss": 0.2257, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3491271820448878, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.8868871965892794e-05, |
|
"loss": 0.2273, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.35910224438902744, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.879353022827509e-05, |
|
"loss": 0.2272, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3690773067331671, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 1.8715920109823266e-05, |
|
"loss": 0.2223, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3790523690773067, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 1.8636061630859157e-05, |
|
"loss": 0.2175, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.38902743142144636, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 1.855397539169214e-05, |
|
"loss": 0.2175, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.39900249376558605, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 1.846968256730504e-05, |
|
"loss": 0.2146, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4089775561097257, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 1.838320490189187e-05, |
|
"loss": 0.2127, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.41895261845386533, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 1.8294564703248668e-05, |
|
"loss": 0.207, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.428927680798005, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.8203784837018985e-05, |
|
"loss": 0.2025, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4389027431421446, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.8110888720795453e-05, |
|
"loss": 0.203, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4488778054862843, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.8015900318078976e-05, |
|
"loss": 0.2026, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.45885286783042395, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 1.791884413209711e-05, |
|
"loss": 0.194, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4688279301745636, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 1.7819745199483196e-05, |
|
"loss": 0.1938, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.47880299251870323, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.7718629083817914e-05, |
|
"loss": 0.1981, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.48877805486284287, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 1.7615521869034887e-05, |
|
"loss": 0.1939, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.49875311720698257, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.751045015269207e-05, |
|
"loss": 0.1874, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5087281795511222, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.740344103911063e-05, |
|
"loss": 0.1947, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5187032418952618, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.7294522132383137e-05, |
|
"loss": 0.1927, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5286783042394015, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 1.7183721529252762e-05, |
|
"loss": 0.1879, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5386533665835411, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 0.1899, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5486284289276808, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.6956590040396975e-05, |
|
"loss": 0.1791, |
|
"step": 550 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2006, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.7337702045947e+18, |
|
"train_batch_size": 25, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|