wangrongsheng
commit from root
dbd399b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3704389701796629,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0009999978838190456,
"loss": 2.9794,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 0.0009999915352940948,
"loss": 2.3885,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.000999980954478887,
"loss": 2.3057,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 0.000999966141462985,
"loss": 2.2692,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 0.000999947096371777,
"loss": 2.2576,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 0.0009999238193664748,
"loss": 2.2388,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 0.0009998963106441117,
"loss": 2.2523,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 0.0009998645704375414,
"loss": 2.218,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 0.000999828599015436,
"loss": 2.2457,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 0.0009997883966822835,
"loss": 2.198,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 0.0009997439637783859,
"loss": 2.2013,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 0.000999695300679855,
"loss": 2.1765,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 0.0009996424077986109,
"loss": 2.1741,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 0.000999585285582377,
"loss": 2.1898,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 0.0009995239345146772,
"loss": 2.1466,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 0.0009994583551148314,
"loss": 2.1423,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 0.0009993885479379506,
"loss": 2.1451,
"step": 170
},
{
"epoch": 0.03,
"learning_rate": 0.000999314513574934,
"loss": 2.202,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 0.0009992362526524616,
"loss": 2.1208,
"step": 190
},
{
"epoch": 0.04,
"learning_rate": 0.0009991537658329906,
"loss": 2.1591,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 0.000999067053814749,
"loss": 2.1788,
"step": 210
},
{
"epoch": 0.04,
"learning_rate": 0.0009989761173317304,
"loss": 2.147,
"step": 220
},
{
"epoch": 0.04,
"learning_rate": 0.000998880957153687,
"loss": 2.1249,
"step": 230
},
{
"epoch": 0.04,
"learning_rate": 0.000998781574086123,
"loss": 2.165,
"step": 240
},
{
"epoch": 0.05,
"learning_rate": 0.000998677968970289,
"loss": 2.1428,
"step": 250
},
{
"epoch": 0.05,
"learning_rate": 0.0009985701426831735,
"loss": 2.1384,
"step": 260
},
{
"epoch": 0.05,
"learning_rate": 0.0009984580961374964,
"loss": 2.1585,
"step": 270
},
{
"epoch": 0.05,
"learning_rate": 0.0009983418302817008,
"loss": 2.1156,
"step": 280
},
{
"epoch": 0.05,
"learning_rate": 0.0009982213460999448,
"loss": 2.0811,
"step": 290
},
{
"epoch": 0.06,
"learning_rate": 0.000998096644612094,
"loss": 2.1081,
"step": 300
},
{
"epoch": 0.06,
"learning_rate": 0.0009979677268737118,
"loss": 2.1246,
"step": 310
},
{
"epoch": 0.06,
"learning_rate": 0.0009978345939760515,
"loss": 2.1229,
"step": 320
},
{
"epoch": 0.06,
"learning_rate": 0.000997697247046046,
"loss": 2.1033,
"step": 330
},
{
"epoch": 0.06,
"learning_rate": 0.0009975556872462994,
"loss": 2.0931,
"step": 340
},
{
"epoch": 0.06,
"learning_rate": 0.000997409915775076,
"loss": 2.1206,
"step": 350
},
{
"epoch": 0.07,
"learning_rate": 0.0009972599338662915,
"loss": 2.0537,
"step": 360
},
{
"epoch": 0.07,
"learning_rate": 0.0009971057427895012,
"loss": 2.0762,
"step": 370
},
{
"epoch": 0.07,
"learning_rate": 0.0009969473438498897,
"loss": 2.0883,
"step": 380
},
{
"epoch": 0.07,
"learning_rate": 0.0009967847383882604,
"loss": 2.1174,
"step": 390
},
{
"epoch": 0.07,
"learning_rate": 0.0009966179277810239,
"loss": 2.1111,
"step": 400
},
{
"epoch": 0.08,
"learning_rate": 0.0009964469134401855,
"loss": 2.1288,
"step": 410
},
{
"epoch": 0.08,
"learning_rate": 0.0009962716968133346,
"loss": 2.0967,
"step": 420
},
{
"epoch": 0.08,
"learning_rate": 0.0009960922793836318,
"loss": 2.1216,
"step": 430
},
{
"epoch": 0.08,
"learning_rate": 0.0009959086626697955,
"loss": 2.0924,
"step": 440
},
{
"epoch": 0.08,
"learning_rate": 0.0009957208482260908,
"loss": 2.0809,
"step": 450
},
{
"epoch": 0.09,
"learning_rate": 0.0009955288376423152,
"loss": 2.1082,
"step": 460
},
{
"epoch": 0.09,
"learning_rate": 0.0009953326325437852,
"loss": 2.0885,
"step": 470
},
{
"epoch": 0.09,
"learning_rate": 0.0009951322345913224,
"loss": 2.1133,
"step": 480
},
{
"epoch": 0.09,
"learning_rate": 0.0009949276454812408,
"loss": 2.0844,
"step": 490
},
{
"epoch": 0.09,
"learning_rate": 0.00099471886694533,
"loss": 2.0796,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 0.0009945059007508434,
"loss": 2.1255,
"step": 510
},
{
"epoch": 0.1,
"learning_rate": 0.0009942887487004804,
"loss": 2.0913,
"step": 520
},
{
"epoch": 0.1,
"learning_rate": 0.0009940674126323733,
"loss": 2.1003,
"step": 530
},
{
"epoch": 0.1,
"learning_rate": 0.0009938418944200709,
"loss": 2.0541,
"step": 540
},
{
"epoch": 0.1,
"learning_rate": 0.0009936121959725223,
"loss": 2.0523,
"step": 550
},
{
"epoch": 0.1,
"learning_rate": 0.0009933783192340618,
"loss": 2.1225,
"step": 560
},
{
"epoch": 0.11,
"learning_rate": 0.0009931402661843911,
"loss": 2.0446,
"step": 570
},
{
"epoch": 0.11,
"learning_rate": 0.000992898038838564,
"loss": 2.0921,
"step": 580
},
{
"epoch": 0.11,
"learning_rate": 0.0009926516392469674,
"loss": 2.1081,
"step": 590
},
{
"epoch": 0.11,
"learning_rate": 0.0009924010694953064,
"loss": 2.0734,
"step": 600
},
{
"epoch": 0.11,
"learning_rate": 0.0009921463317045843,
"loss": 2.0652,
"step": 610
},
{
"epoch": 0.11,
"learning_rate": 0.0009918874280310862,
"loss": 2.0818,
"step": 620
},
{
"epoch": 0.12,
"learning_rate": 0.0009916243606663605,
"loss": 2.0776,
"step": 630
},
{
"epoch": 0.12,
"learning_rate": 0.0009913571318371994,
"loss": 2.1025,
"step": 640
},
{
"epoch": 0.12,
"learning_rate": 0.0009910857438056215,
"loss": 2.066,
"step": 650
},
{
"epoch": 0.12,
"learning_rate": 0.0009908101988688512,
"loss": 2.0575,
"step": 660
},
{
"epoch": 0.12,
"learning_rate": 0.0009905304993593008,
"loss": 2.1269,
"step": 670
},
{
"epoch": 0.13,
"learning_rate": 0.0009902466476445486,
"loss": 2.0518,
"step": 680
},
{
"epoch": 0.13,
"learning_rate": 0.0009899586461273218,
"loss": 2.0698,
"step": 690
},
{
"epoch": 0.13,
"learning_rate": 0.000989666497245473,
"loss": 2.0988,
"step": 700
},
{
"epoch": 0.13,
"learning_rate": 0.0009893702034719624,
"loss": 2.0986,
"step": 710
},
{
"epoch": 0.13,
"learning_rate": 0.0009890697673148345,
"loss": 2.0237,
"step": 720
},
{
"epoch": 0.14,
"learning_rate": 0.0009887651913171986,
"loss": 2.0027,
"step": 730
},
{
"epoch": 0.14,
"learning_rate": 0.0009884564780572064,
"loss": 2.0563,
"step": 740
},
{
"epoch": 0.14,
"learning_rate": 0.0009881436301480305,
"loss": 2.0624,
"step": 750
},
{
"epoch": 0.14,
"learning_rate": 0.000987826650237842,
"loss": 2.0926,
"step": 760
},
{
"epoch": 0.14,
"learning_rate": 0.000987505541009788,
"loss": 2.0585,
"step": 770
},
{
"epoch": 0.14,
"learning_rate": 0.0009871803051819696,
"loss": 2.0494,
"step": 780
},
{
"epoch": 0.15,
"learning_rate": 0.0009868509455074183,
"loss": 2.0106,
"step": 790
},
{
"epoch": 0.15,
"learning_rate": 0.0009865174647740729,
"loss": 2.0861,
"step": 800
},
{
"epoch": 0.15,
"learning_rate": 0.0009861798658047556,
"loss": 2.0478,
"step": 810
},
{
"epoch": 0.15,
"learning_rate": 0.0009858381514571484,
"loss": 2.0469,
"step": 820
},
{
"epoch": 0.15,
"learning_rate": 0.000985492324623769,
"loss": 2.0671,
"step": 830
},
{
"epoch": 0.16,
"learning_rate": 0.0009851423882319458,
"loss": 2.0808,
"step": 840
},
{
"epoch": 0.16,
"learning_rate": 0.0009847883452437937,
"loss": 2.0331,
"step": 850
},
{
"epoch": 0.16,
"learning_rate": 0.0009844301986561893,
"loss": 2.0295,
"step": 860
},
{
"epoch": 0.16,
"learning_rate": 0.000984067951500744,
"loss": 2.0873,
"step": 870
},
{
"epoch": 0.16,
"learning_rate": 0.00098370160684378,
"loss": 2.1038,
"step": 880
},
{
"epoch": 0.16,
"learning_rate": 0.0009833311677863042,
"loss": 2.0337,
"step": 890
},
{
"epoch": 0.17,
"learning_rate": 0.0009829566374639801,
"loss": 2.0407,
"step": 900
},
{
"epoch": 0.17,
"learning_rate": 0.0009825780190471042,
"loss": 2.1049,
"step": 910
},
{
"epoch": 0.17,
"learning_rate": 0.000982195315740576,
"loss": 2.0475,
"step": 920
},
{
"epoch": 0.17,
"learning_rate": 0.0009818085307838741,
"loss": 2.0624,
"step": 930
},
{
"epoch": 0.17,
"learning_rate": 0.000981417667451026,
"loss": 2.0714,
"step": 940
},
{
"epoch": 0.18,
"learning_rate": 0.0009810227290505816,
"loss": 2.0947,
"step": 950
},
{
"epoch": 0.18,
"learning_rate": 0.0009806237189255859,
"loss": 2.0591,
"step": 960
},
{
"epoch": 0.18,
"learning_rate": 0.0009802206404535489,
"loss": 2.0301,
"step": 970
},
{
"epoch": 0.18,
"learning_rate": 0.000979813497046419,
"loss": 2.0556,
"step": 980
},
{
"epoch": 0.18,
"learning_rate": 0.0009794022921505523,
"loss": 2.0753,
"step": 990
},
{
"epoch": 0.19,
"learning_rate": 0.000978987029246685,
"loss": 2.0898,
"step": 1000
},
{
"epoch": 0.19,
"learning_rate": 0.0009785677118499029,
"loss": 2.0464,
"step": 1010
},
{
"epoch": 0.19,
"learning_rate": 0.0009781443435096116,
"loss": 2.0828,
"step": 1020
},
{
"epoch": 0.19,
"learning_rate": 0.0009777169278095074,
"loss": 2.1137,
"step": 1030
},
{
"epoch": 0.19,
"learning_rate": 0.0009772854683675462,
"loss": 2.0167,
"step": 1040
},
{
"epoch": 0.19,
"learning_rate": 0.000976849968835913,
"loss": 2.07,
"step": 1050
},
{
"epoch": 0.2,
"learning_rate": 0.0009764104329009909,
"loss": 2.0409,
"step": 1060
},
{
"epoch": 0.2,
"learning_rate": 0.0009759668642833304,
"loss": 2.015,
"step": 1070
},
{
"epoch": 0.2,
"learning_rate": 0.0009755192667376173,
"loss": 2.0175,
"step": 1080
},
{
"epoch": 0.2,
"learning_rate": 0.0009750676440526411,
"loss": 2.0773,
"step": 1090
},
{
"epoch": 0.2,
"learning_rate": 0.0009746120000512632,
"loss": 2.0245,
"step": 1100
},
{
"epoch": 0.21,
"learning_rate": 0.0009741523385903841,
"loss": 2.094,
"step": 1110
},
{
"epoch": 0.21,
"learning_rate": 0.0009736886635609112,
"loss": 2.0506,
"step": 1120
},
{
"epoch": 0.21,
"learning_rate": 0.0009732209788877258,
"loss": 2.0287,
"step": 1130
},
{
"epoch": 0.21,
"learning_rate": 0.0009727492885296489,
"loss": 2.1162,
"step": 1140
},
{
"epoch": 0.21,
"learning_rate": 0.0009722735964794099,
"loss": 2.1096,
"step": 1150
},
{
"epoch": 0.21,
"learning_rate": 0.0009717939067636099,
"loss": 2.0621,
"step": 1160
},
{
"epoch": 0.22,
"learning_rate": 0.0009713102234426903,
"loss": 2.0796,
"step": 1170
},
{
"epoch": 0.22,
"learning_rate": 0.0009708225506108965,
"loss": 2.0565,
"step": 1180
},
{
"epoch": 0.22,
"learning_rate": 0.0009703308923962447,
"loss": 2.0669,
"step": 1190
},
{
"epoch": 0.22,
"learning_rate": 0.0009698352529604857,
"loss": 2.0638,
"step": 1200
},
{
"epoch": 0.22,
"learning_rate": 0.0009693356364990705,
"loss": 2.0358,
"step": 1210
},
{
"epoch": 0.23,
"learning_rate": 0.0009688320472411143,
"loss": 2.0859,
"step": 1220
},
{
"epoch": 0.23,
"learning_rate": 0.0009683244894493613,
"loss": 2.0932,
"step": 1230
},
{
"epoch": 0.23,
"learning_rate": 0.0009678129674201479,
"loss": 2.0129,
"step": 1240
},
{
"epoch": 0.23,
"learning_rate": 0.0009672974854833669,
"loss": 2.055,
"step": 1250
},
{
"epoch": 0.23,
"learning_rate": 0.0009667780480024304,
"loss": 2.0665,
"step": 1260
},
{
"epoch": 0.24,
"learning_rate": 0.0009662546593742334,
"loss": 2.0488,
"step": 1270
},
{
"epoch": 0.24,
"learning_rate": 0.0009657273240291159,
"loss": 2.0543,
"step": 1280
},
{
"epoch": 0.24,
"learning_rate": 0.0009651960464308261,
"loss": 2.0418,
"step": 1290
},
{
"epoch": 0.24,
"learning_rate": 0.0009646608310764819,
"loss": 2.033,
"step": 1300
},
{
"epoch": 0.24,
"learning_rate": 0.0009641216824965338,
"loss": 2.1034,
"step": 1310
},
{
"epoch": 0.24,
"learning_rate": 0.0009635786052547253,
"loss": 2.0866,
"step": 1320
},
{
"epoch": 0.25,
"learning_rate": 0.0009630316039480556,
"loss": 2.0607,
"step": 1330
},
{
"epoch": 0.25,
"learning_rate": 0.0009624806832067394,
"loss": 2.0457,
"step": 1340
},
{
"epoch": 0.25,
"learning_rate": 0.0009619258476941686,
"loss": 2.032,
"step": 1350
},
{
"epoch": 0.25,
"learning_rate": 0.000961367102106873,
"loss": 2.0519,
"step": 1360
},
{
"epoch": 0.25,
"learning_rate": 0.0009608044511744791,
"loss": 2.0449,
"step": 1370
},
{
"epoch": 0.26,
"learning_rate": 0.0009602378996596721,
"loss": 1.9949,
"step": 1380
},
{
"epoch": 0.26,
"learning_rate": 0.0009596674523581539,
"loss": 2.0394,
"step": 1390
},
{
"epoch": 0.26,
"learning_rate": 0.0009590931140986035,
"loss": 2.0386,
"step": 1400
},
{
"epoch": 0.26,
"learning_rate": 0.0009585148897426354,
"loss": 2.0254,
"step": 1410
},
{
"epoch": 0.26,
"learning_rate": 0.0009579327841847593,
"loss": 2.0238,
"step": 1420
},
{
"epoch": 0.26,
"learning_rate": 0.000957346802352338,
"loss": 2.0509,
"step": 1430
},
{
"epoch": 0.27,
"learning_rate": 0.0009567569492055456,
"loss": 2.0004,
"step": 1440
},
{
"epoch": 0.27,
"learning_rate": 0.0009561632297373263,
"loss": 2.0203,
"step": 1450
},
{
"epoch": 0.27,
"learning_rate": 0.0009555656489733513,
"loss": 2.0182,
"step": 1460
},
{
"epoch": 0.27,
"learning_rate": 0.000954964211971977,
"loss": 1.9754,
"step": 1470
},
{
"epoch": 0.27,
"learning_rate": 0.0009543589238242012,
"loss": 2.0374,
"step": 1480
},
{
"epoch": 0.28,
"learning_rate": 0.000953749789653621,
"loss": 2.0367,
"step": 1490
},
{
"epoch": 0.28,
"learning_rate": 0.000953136814616389,
"loss": 2.0866,
"step": 1500
},
{
"epoch": 0.28,
"learning_rate": 0.0009525200039011694,
"loss": 2.0083,
"step": 1510
},
{
"epoch": 0.28,
"learning_rate": 0.0009518993627290948,
"loss": 2.0525,
"step": 1520
},
{
"epoch": 0.28,
"learning_rate": 0.0009512748963537212,
"loss": 2.0636,
"step": 1530
},
{
"epoch": 0.29,
"learning_rate": 0.000950646610060984,
"loss": 2.0522,
"step": 1540
},
{
"epoch": 0.29,
"learning_rate": 0.0009500145091691532,
"loss": 2.05,
"step": 1550
},
{
"epoch": 0.29,
"learning_rate": 0.0009493785990287882,
"loss": 1.9887,
"step": 1560
},
{
"epoch": 0.29,
"learning_rate": 0.0009487388850226926,
"loss": 2.0309,
"step": 1570
},
{
"epoch": 0.29,
"learning_rate": 0.000948095372565869,
"loss": 1.9954,
"step": 1580
},
{
"epoch": 0.29,
"learning_rate": 0.0009474480671054726,
"loss": 2.0078,
"step": 1590
},
{
"epoch": 0.3,
"learning_rate": 0.0009467969741207652,
"loss": 2.0395,
"step": 1600
},
{
"epoch": 0.3,
"learning_rate": 0.0009461420991230693,
"loss": 2.0415,
"step": 1610
},
{
"epoch": 0.3,
"learning_rate": 0.0009454834476557207,
"loss": 2.0308,
"step": 1620
},
{
"epoch": 0.3,
"learning_rate": 0.0009448210252940223,
"loss": 2.0826,
"step": 1630
},
{
"epoch": 0.3,
"learning_rate": 0.0009441548376451963,
"loss": 2.0424,
"step": 1640
},
{
"epoch": 0.31,
"learning_rate": 0.0009434848903483373,
"loss": 2.0125,
"step": 1650
},
{
"epoch": 0.31,
"learning_rate": 0.0009428111890743639,
"loss": 2.0139,
"step": 1660
},
{
"epoch": 0.31,
"learning_rate": 0.0009421337395259717,
"loss": 2.0682,
"step": 1670
},
{
"epoch": 0.31,
"learning_rate": 0.0009414525474375837,
"loss": 2.0577,
"step": 1680
},
{
"epoch": 0.31,
"learning_rate": 0.0009407676185753029,
"loss": 2.0262,
"step": 1690
},
{
"epoch": 0.31,
"learning_rate": 0.0009400789587368632,
"loss": 2.0515,
"step": 1700
},
{
"epoch": 0.32,
"learning_rate": 0.0009393865737515794,
"loss": 2.0398,
"step": 1710
},
{
"epoch": 0.32,
"learning_rate": 0.0009386904694802997,
"loss": 2.0146,
"step": 1720
},
{
"epoch": 0.32,
"learning_rate": 0.0009379906518153543,
"loss": 2.0438,
"step": 1730
},
{
"epoch": 0.32,
"learning_rate": 0.0009372871266805063,
"loss": 2.0377,
"step": 1740
},
{
"epoch": 0.32,
"learning_rate": 0.000936579900030902,
"loss": 2.0789,
"step": 1750
},
{
"epoch": 0.33,
"learning_rate": 0.0009358689778530193,
"loss": 2.0201,
"step": 1760
},
{
"epoch": 0.33,
"learning_rate": 0.0009351543661646185,
"loss": 2.0114,
"step": 1770
},
{
"epoch": 0.33,
"learning_rate": 0.0009344360710146898,
"loss": 2.0242,
"step": 1780
},
{
"epoch": 0.33,
"learning_rate": 0.0009337140984834034,
"loss": 2.0436,
"step": 1790
},
{
"epoch": 0.33,
"learning_rate": 0.0009329884546820572,
"loss": 2.0452,
"step": 1800
},
{
"epoch": 0.34,
"learning_rate": 0.000932259145753026,
"loss": 2.0254,
"step": 1810
},
{
"epoch": 0.34,
"learning_rate": 0.0009315261778697083,
"loss": 2.0409,
"step": 1820
},
{
"epoch": 0.34,
"learning_rate": 0.0009307895572364746,
"loss": 2.0301,
"step": 1830
},
{
"epoch": 0.34,
"learning_rate": 0.0009300492900886154,
"loss": 2.0078,
"step": 1840
},
{
"epoch": 0.34,
"learning_rate": 0.0009293053826922873,
"loss": 1.9851,
"step": 1850
},
{
"epoch": 0.34,
"learning_rate": 0.0009285578413444613,
"loss": 1.9947,
"step": 1860
},
{
"epoch": 0.35,
"learning_rate": 0.0009278066723728682,
"loss": 2.0331,
"step": 1870
},
{
"epoch": 0.35,
"learning_rate": 0.0009270518821359461,
"loss": 2.0058,
"step": 1880
},
{
"epoch": 0.35,
"learning_rate": 0.0009262934770227858,
"loss": 2.05,
"step": 1890
},
{
"epoch": 0.35,
"learning_rate": 0.0009255314634530771,
"loss": 2.0444,
"step": 1900
},
{
"epoch": 0.35,
"learning_rate": 0.0009247658478770543,
"loss": 2.0045,
"step": 1910
},
{
"epoch": 0.36,
"learning_rate": 0.000923996636775442,
"loss": 2.0211,
"step": 1920
},
{
"epoch": 0.36,
"learning_rate": 0.0009232238366593997,
"loss": 2.0124,
"step": 1930
},
{
"epoch": 0.36,
"learning_rate": 0.0009224474540704671,
"loss": 2.0067,
"step": 1940
},
{
"epoch": 0.36,
"learning_rate": 0.0009216674955805079,
"loss": 2.0247,
"step": 1950
},
{
"epoch": 0.36,
"learning_rate": 0.0009208839677916557,
"loss": 2.0314,
"step": 1960
},
{
"epoch": 0.36,
"learning_rate": 0.0009200968773362568,
"loss": 2.067,
"step": 1970
},
{
"epoch": 0.37,
"learning_rate": 0.0009193062308768145,
"loss": 2.0168,
"step": 1980
},
{
"epoch": 0.37,
"learning_rate": 0.0009185120351059326,
"loss": 2.0649,
"step": 1990
},
{
"epoch": 0.37,
"learning_rate": 0.0009177142967462591,
"loss": 2.0208,
"step": 2000
}
],
"max_steps": 10798,
"num_train_epochs": 2,
"total_flos": 1.8983090426321306e+18,
"trial_name": null,
"trial_params": null
}