Vicuna-7B-v1.5-ORPO-SALT / trainer_state.json
chchen's picture
End of training
4963dfc verified
{
"best_metric": 0.9497246742248535,
"best_model_checkpoint": "saves/Vicuna-7B-v1.5/lora/orpo-salt/checkpoint-1500",
"epoch": 2.9969690846635686,
"eval_steps": 500,
"global_step": 1854,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01616488179430188,
"grad_norm": 0.3899887204170227,
"learning_rate": 4.999648198770648e-06,
"logits/chosen": -0.8260404467582703,
"logits/rejected": -0.779380202293396,
"logps/chosen": -1.0734994411468506,
"logps/rejected": -1.2254035472869873,
"loss": 1.146,
"odds_ratio_loss": 0.7249619364738464,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.1073499470949173,
"rewards/margins": 0.01519041694700718,
"rewards/rejected": -0.12254035472869873,
"sft_loss": 1.0734994411468506,
"step": 10
},
{
"epoch": 0.03232976358860376,
"grad_norm": 0.4923989176750183,
"learning_rate": 4.998578646361359e-06,
"logits/chosen": -0.7854002714157104,
"logits/rejected": -0.781389594078064,
"logps/chosen": -1.0866433382034302,
"logps/rejected": -1.2551138401031494,
"loss": 1.1535,
"odds_ratio_loss": 0.668422520160675,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.10866433382034302,
"rewards/margins": 0.016847047954797745,
"rewards/rejected": -0.12551137804985046,
"sft_loss": 1.0866433382034302,
"step": 20
},
{
"epoch": 0.04849464538290564,
"grad_norm": 0.7084988951683044,
"learning_rate": 4.996791614004449e-06,
"logits/chosen": -0.7559419274330139,
"logits/rejected": -0.7485054731369019,
"logps/chosen": -1.0929394960403442,
"logps/rejected": -1.1501963138580322,
"loss": 1.1699,
"odds_ratio_loss": 0.7694913148880005,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.10929396003484726,
"rewards/margins": 0.00572569016367197,
"rewards/rejected": -0.11501964181661606,
"sft_loss": 1.0929394960403442,
"step": 30
},
{
"epoch": 0.06465952717720752,
"grad_norm": 0.8286219239234924,
"learning_rate": 4.994287614855618e-06,
"logits/chosen": -0.8193706274032593,
"logits/rejected": -0.7897969484329224,
"logps/chosen": -1.1362740993499756,
"logps/rejected": -1.1394835710525513,
"loss": 1.2171,
"odds_ratio_loss": 0.808376133441925,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.11362739652395248,
"rewards/margins": 0.0003209514543414116,
"rewards/rejected": -0.11394836008548737,
"sft_loss": 1.1362740993499756,
"step": 40
},
{
"epoch": 0.0808244089715094,
"grad_norm": 0.537628173828125,
"learning_rate": 4.991067367951343e-06,
"logits/chosen": -0.7530331015586853,
"logits/rejected": -0.7703112363815308,
"logps/chosen": -1.0968067646026611,
"logps/rejected": -1.1828521490097046,
"loss": 1.1729,
"odds_ratio_loss": 0.7610759735107422,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.10968067497015,
"rewards/margins": 0.00860452838242054,
"rewards/rejected": -0.11828521639108658,
"sft_loss": 1.0968067646026611,
"step": 50
},
{
"epoch": 0.09698929076581128,
"grad_norm": 0.2992643415927887,
"learning_rate": 4.987131798002389e-06,
"logits/chosen": -0.7554941773414612,
"logits/rejected": -0.7805821299552917,
"logps/chosen": -1.120224118232727,
"logps/rejected": -1.1958564519882202,
"loss": 1.2007,
"odds_ratio_loss": 0.804762065410614,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.11202241480350494,
"rewards/margins": 0.007563246879726648,
"rewards/rejected": -0.11958565562963486,
"sft_loss": 1.120224118232727,
"step": 60
},
{
"epoch": 0.11315417256011315,
"grad_norm": 0.5207487940788269,
"learning_rate": 4.982482035128285e-06,
"logits/chosen": -0.7931987643241882,
"logits/rejected": -0.7725004553794861,
"logps/chosen": -1.158760666847229,
"logps/rejected": -1.3085857629776,
"loss": 1.2342,
"odds_ratio_loss": 0.7545939683914185,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.11587607860565186,
"rewards/margins": 0.01498250663280487,
"rewards/rejected": -0.13085858523845673,
"sft_loss": 1.158760666847229,
"step": 70
},
{
"epoch": 0.12931905435441504,
"grad_norm": 0.8179022669792175,
"learning_rate": 4.9771194145328e-06,
"logits/chosen": -0.7553219199180603,
"logits/rejected": -0.7355794906616211,
"logps/chosen": -0.9810718297958374,
"logps/rejected": -1.1142699718475342,
"loss": 1.0496,
"odds_ratio_loss": 0.6851751208305359,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.09810719639062881,
"rewards/margins": 0.013319805264472961,
"rewards/rejected": -0.11142698675394058,
"sft_loss": 0.9810718297958374,
"step": 80
},
{
"epoch": 0.1454839361487169,
"grad_norm": 0.5893221497535706,
"learning_rate": 4.971045476120532e-06,
"logits/chosen": -0.7767540216445923,
"logits/rejected": -0.7691196203231812,
"logps/chosen": -1.0343536138534546,
"logps/rejected": -1.1126210689544678,
"loss": 1.1086,
"odds_ratio_loss": 0.7424803972244263,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.10343535989522934,
"rewards/margins": 0.007826738059520721,
"rewards/rejected": -0.11126209795475006,
"sft_loss": 1.0343536138534546,
"step": 90
},
{
"epoch": 0.1616488179430188,
"grad_norm": 0.3746645748615265,
"learning_rate": 4.964261964054713e-06,
"logits/chosen": -0.749561607837677,
"logits/rejected": -0.7426966428756714,
"logps/chosen": -1.0808948278427124,
"logps/rejected": -1.1608020067214966,
"loss": 1.1637,
"odds_ratio_loss": 0.8280612826347351,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.10808948427438736,
"rewards/margins": 0.007990716025233269,
"rewards/rejected": -0.11608020961284637,
"sft_loss": 1.0808948278427124,
"step": 100
},
{
"epoch": 0.17781369973732067,
"grad_norm": 0.5266828536987305,
"learning_rate": 4.956770826256372e-06,
"logits/chosen": -0.7276872396469116,
"logits/rejected": -0.7239276766777039,
"logps/chosen": -1.0891507863998413,
"logps/rejected": -1.188951015472412,
"loss": 1.1606,
"odds_ratio_loss": 0.7148129940032959,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.10891509056091309,
"rewards/margins": 0.009980013594031334,
"rewards/rejected": -0.11889511346817017,
"sft_loss": 1.0891507863998413,
"step": 110
},
{
"epoch": 0.19397858153162256,
"grad_norm": 0.5117731690406799,
"learning_rate": 4.94857421384497e-06,
"logits/chosen": -0.7153638601303101,
"logits/rejected": -0.7017214894294739,
"logps/chosen": -1.0659247636795044,
"logps/rejected": -1.1995283365249634,
"loss": 1.1411,
"odds_ratio_loss": 0.7518999576568604,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.10659247636795044,
"rewards/margins": 0.013360358774662018,
"rewards/rejected": -0.11995282024145126,
"sft_loss": 1.0659247636795044,
"step": 120
},
{
"epoch": 0.21014346332592443,
"grad_norm": 0.3964090049266815,
"learning_rate": 4.939674480520701e-06,
"logits/chosen": -0.7281032800674438,
"logits/rejected": -0.6757130026817322,
"logps/chosen": -0.9924377202987671,
"logps/rejected": -1.0807675123214722,
"loss": 1.0644,
"odds_ratio_loss": 0.7199574708938599,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.09924378246068954,
"rewards/margins": 0.008832980878651142,
"rewards/rejected": -0.10807675123214722,
"sft_loss": 0.9924377202987671,
"step": 130
},
{
"epoch": 0.2263083451202263,
"grad_norm": 0.31593117117881775,
"learning_rate": 4.930074181888613e-06,
"logits/chosen": -0.6932573914527893,
"logits/rejected": -0.6765223741531372,
"logps/chosen": -1.011648416519165,
"logps/rejected": -1.1101162433624268,
"loss": 1.0811,
"odds_ratio_loss": 0.6949580907821655,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.10116484016180038,
"rewards/margins": 0.009846789762377739,
"rewards/rejected": -0.11101162433624268,
"sft_loss": 1.011648416519165,
"step": 140
},
{
"epoch": 0.2424732269145282,
"grad_norm": 0.7396884560585022,
"learning_rate": 4.91977607472475e-06,
"logits/chosen": -0.6414996981620789,
"logits/rejected": -0.6007689237594604,
"logps/chosen": -1.0180175304412842,
"logps/rejected": -1.0574676990509033,
"loss": 1.0929,
"odds_ratio_loss": 0.748645544052124,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.10180176794528961,
"rewards/margins": 0.00394500233232975,
"rewards/rejected": -0.10574676841497421,
"sft_loss": 1.0180175304412842,
"step": 150
},
{
"epoch": 0.2586381087088301,
"grad_norm": 0.5049052834510803,
"learning_rate": 4.908783116184534e-06,
"logits/chosen": -0.6661972403526306,
"logits/rejected": -0.626873791217804,
"logps/chosen": -0.953465461730957,
"logps/rejected": -1.0835082530975342,
"loss": 1.02,
"odds_ratio_loss": 0.6655644178390503,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.09534655511379242,
"rewards/margins": 0.013004262931644917,
"rewards/rejected": -0.10835081338882446,
"sft_loss": 0.953465461730957,
"step": 160
},
{
"epoch": 0.27480299050313195,
"grad_norm": 0.4969651699066162,
"learning_rate": 4.897098462953598e-06,
"logits/chosen": -0.5929690599441528,
"logits/rejected": -0.6147447824478149,
"logps/chosen": -0.9747630953788757,
"logps/rejected": -1.1718312501907349,
"loss": 1.0464,
"odds_ratio_loss": 0.7164822220802307,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.09747631102800369,
"rewards/margins": 0.019706813618540764,
"rewards/rejected": -0.11718311160802841,
"sft_loss": 0.9747630953788757,
"step": 170
},
{
"epoch": 0.2909678722974338,
"grad_norm": 0.37429389357566833,
"learning_rate": 4.884725470341331e-06,
"logits/chosen": -0.5573834180831909,
"logits/rejected": -0.544479250907898,
"logps/chosen": -0.8867887258529663,
"logps/rejected": -1.1076356172561646,
"loss": 0.9499,
"odds_ratio_loss": 0.6307954788208008,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.08867888152599335,
"rewards/margins": 0.022084690630435944,
"rewards/rejected": -0.1107635647058487,
"sft_loss": 0.8867887258529663,
"step": 180
},
{
"epoch": 0.3071327540917357,
"grad_norm": 1.2109434604644775,
"learning_rate": 4.871667691317377e-06,
"logits/chosen": -0.6222495436668396,
"logits/rejected": -0.6174622774124146,
"logps/chosen": -1.1702499389648438,
"logps/rejected": -1.0528119802474976,
"loss": 1.2649,
"odds_ratio_loss": 0.9465614557266235,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.1170249953866005,
"rewards/margins": -0.011743778362870216,
"rewards/rejected": -0.10528121143579483,
"sft_loss": 1.1702499389648438,
"step": 190
},
{
"epoch": 0.3232976358860376,
"grad_norm": 1.5371562242507935,
"learning_rate": 4.857928875491392e-06,
"logits/chosen": -0.5464112162590027,
"logits/rejected": -0.5513696670532227,
"logps/chosen": -0.8908155560493469,
"logps/rejected": -1.0076180696487427,
"loss": 0.9612,
"odds_ratio_loss": 0.7040323615074158,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08908155560493469,
"rewards/margins": 0.011680259369313717,
"rewards/rejected": -0.10076181590557098,
"sft_loss": 0.8908155560493469,
"step": 200
},
{
"epoch": 0.33946251768033947,
"grad_norm": 0.6159927845001221,
"learning_rate": 4.843512968036314e-06,
"logits/chosen": -0.6329461932182312,
"logits/rejected": -0.592659592628479,
"logps/chosen": -0.975503146648407,
"logps/rejected": -0.9970613718032837,
"loss": 1.0514,
"odds_ratio_loss": 0.7591590881347656,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.09755031019449234,
"rewards/margins": 0.00215582805685699,
"rewards/rejected": -0.09970613569021225,
"sft_loss": 0.975503146648407,
"step": 210
},
{
"epoch": 0.35562739947464134,
"grad_norm": 0.3111410439014435,
"learning_rate": 4.828424108555486e-06,
"logits/chosen": -0.5221891403198242,
"logits/rejected": -0.5304391980171204,
"logps/chosen": -1.1862733364105225,
"logps/rejected": -1.2753493785858154,
"loss": 1.2641,
"odds_ratio_loss": 0.7783994674682617,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.11862732470035553,
"rewards/margins": 0.008907611481845379,
"rewards/rejected": -0.12753494083881378,
"sft_loss": 1.1862733364105225,
"step": 220
},
{
"epoch": 0.3717922812689432,
"grad_norm": 0.301698237657547,
"learning_rate": 4.812666629893957e-06,
"logits/chosen": -0.4992770254611969,
"logits/rejected": -0.4967115521430969,
"logps/chosen": -0.9971933364868164,
"logps/rejected": -1.0213407278060913,
"loss": 1.0744,
"odds_ratio_loss": 0.7721298933029175,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.09971933811903,
"rewards/margins": 0.0024147380609065294,
"rewards/rejected": -0.1021340861916542,
"sft_loss": 0.9971933364868164,
"step": 230
},
{
"epoch": 0.3879571630632451,
"grad_norm": 0.2967057526111603,
"learning_rate": 4.796245056894273e-06,
"logits/chosen": -0.5198571085929871,
"logits/rejected": -0.4987764358520508,
"logps/chosen": -0.9578666687011719,
"logps/rejected": -1.0644018650054932,
"loss": 1.0315,
"odds_ratio_loss": 0.7367077469825745,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.0957866758108139,
"rewards/margins": 0.01065351627767086,
"rewards/rejected": -0.10644018650054932,
"sft_loss": 0.9578666687011719,
"step": 240
},
{
"epoch": 0.404122044857547,
"grad_norm": 0.336041659116745,
"learning_rate": 4.779164105097148e-06,
"logits/chosen": -0.4748106002807617,
"logits/rejected": -0.44636374711990356,
"logps/chosen": -0.9247462153434753,
"logps/rejected": -1.1018692255020142,
"loss": 0.9923,
"odds_ratio_loss": 0.6758453845977783,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.09247462451457977,
"rewards/margins": 0.017712296918034554,
"rewards/rejected": -0.11018691956996918,
"sft_loss": 0.9247462153434753,
"step": 250
},
{
"epoch": 0.42028692665184886,
"grad_norm": 0.5222122669219971,
"learning_rate": 4.761428679387373e-06,
"logits/chosen": -0.46434497833251953,
"logits/rejected": -0.4350043833255768,
"logps/chosen": -0.8905488848686218,
"logps/rejected": -1.0182609558105469,
"loss": 0.9591,
"odds_ratio_loss": 0.6853379011154175,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.0890548899769783,
"rewards/margins": 0.01277120690792799,
"rewards/rejected": -0.10182609409093857,
"sft_loss": 0.8905488848686218,
"step": 260
},
{
"epoch": 0.4364518084461507,
"grad_norm": 0.5936411023139954,
"learning_rate": 4.7430438725853515e-06,
"logits/chosen": -0.48627519607543945,
"logits/rejected": -0.4379982352256775,
"logps/chosen": -0.9183929562568665,
"logps/rejected": -1.1679961681365967,
"loss": 0.984,
"odds_ratio_loss": 0.6556900143623352,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.09183929860591888,
"rewards/margins": 0.024960322305560112,
"rewards/rejected": -0.11679961532354355,
"sft_loss": 0.9183929562568665,
"step": 270
},
{
"epoch": 0.4526166902404526,
"grad_norm": 0.46239179372787476,
"learning_rate": 4.724014963984669e-06,
"logits/chosen": -0.4012899398803711,
"logits/rejected": -0.411139577627182,
"logps/chosen": -1.008721947669983,
"logps/rejected": -1.2014849185943604,
"loss": 1.0765,
"odds_ratio_loss": 0.6780184507369995,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.10087219625711441,
"rewards/margins": 0.01927630603313446,
"rewards/rejected": -0.12014850229024887,
"sft_loss": 1.008721947669983,
"step": 280
},
{
"epoch": 0.4687815720347545,
"grad_norm": 0.5760877132415771,
"learning_rate": 4.704347417836116e-06,
"logits/chosen": -0.4533885419368744,
"logits/rejected": -0.46080097556114197,
"logps/chosen": -0.9372620582580566,
"logps/rejected": -1.1106752157211304,
"loss": 1.0089,
"odds_ratio_loss": 0.716440737247467,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.09372620284557343,
"rewards/margins": 0.01734132692217827,
"rewards/rejected": -0.1110675185918808,
"sft_loss": 0.9372620582580566,
"step": 290
},
{
"epoch": 0.4849464538290564,
"grad_norm": 0.44260743260383606,
"learning_rate": 4.684046881778603e-06,
"logits/chosen": -0.5344091653823853,
"logits/rejected": -0.49474531412124634,
"logps/chosen": -0.9150590896606445,
"logps/rejected": -1.0017120838165283,
"loss": 0.9833,
"odds_ratio_loss": 0.6827279329299927,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.09150592237710953,
"rewards/margins": 0.008665294386446476,
"rewards/rejected": -0.10017120838165283,
"sft_loss": 0.9150590896606445,
"step": 300
},
{
"epoch": 0.5011113356233583,
"grad_norm": 0.3225099742412567,
"learning_rate": 4.663119185217409e-06,
"logits/chosen": -0.43460625410079956,
"logits/rejected": -0.4127863049507141,
"logps/chosen": -0.8891846537590027,
"logps/rejected": -1.0905497074127197,
"loss": 0.954,
"odds_ratio_loss": 0.6476849913597107,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.08891846239566803,
"rewards/margins": 0.020136509090662003,
"rewards/rejected": -0.10905496776103973,
"sft_loss": 0.8891846537590027,
"step": 310
},
{
"epoch": 0.5172762174176602,
"grad_norm": 0.3512892723083496,
"learning_rate": 4.641570337650232e-06,
"logits/chosen": -0.43388432264328003,
"logits/rejected": -0.39495667815208435,
"logps/chosen": -0.8790934681892395,
"logps/rejected": -0.9963566064834595,
"loss": 0.9498,
"odds_ratio_loss": 0.7069565057754517,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.08790934085845947,
"rewards/margins": 0.011726310476660728,
"rewards/rejected": -0.09963564574718475,
"sft_loss": 0.8790934681892395,
"step": 320
},
{
"epoch": 0.533441099211962,
"grad_norm": 0.3520517349243164,
"learning_rate": 4.61940652694154e-06,
"logits/chosen": -0.45831650495529175,
"logits/rejected": -0.4600452780723572,
"logps/chosen": -0.9612126350402832,
"logps/rejected": -1.0601940155029297,
"loss": 1.0373,
"odds_ratio_loss": 0.7606214880943298,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.09612125903367996,
"rewards/margins": 0.00989813357591629,
"rewards/rejected": -0.10601940006017685,
"sft_loss": 0.9612126350402832,
"step": 330
},
{
"epoch": 0.5496059810062639,
"grad_norm": 0.42445889115333557,
"learning_rate": 4.596634117545689e-06,
"logits/chosen": -0.3920242190361023,
"logits/rejected": -0.41387075185775757,
"logps/chosen": -0.9238036274909973,
"logps/rejected": -1.0761339664459229,
"loss": 0.9917,
"odds_ratio_loss": 0.6789978742599487,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.09238035976886749,
"rewards/margins": 0.015233027748763561,
"rewards/rejected": -0.10761336982250214,
"sft_loss": 0.9238036274909973,
"step": 340
},
{
"epoch": 0.5657708628005658,
"grad_norm": 0.3377890884876251,
"learning_rate": 4.573259648679335e-06,
"logits/chosen": -0.39150765538215637,
"logits/rejected": -0.4451742172241211,
"logps/chosen": -0.9269700050354004,
"logps/rejected": -1.077823281288147,
"loss": 0.9987,
"odds_ratio_loss": 0.7173791527748108,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.09269699454307556,
"rewards/margins": 0.01508533675223589,
"rewards/rejected": -0.10778234153985977,
"sft_loss": 0.9269700050354004,
"step": 350
},
{
"epoch": 0.5819357445948676,
"grad_norm": 0.9352906942367554,
"learning_rate": 4.549289832443663e-06,
"logits/chosen": -0.39780086278915405,
"logits/rejected": -0.3602847754955292,
"logps/chosen": -0.9020577669143677,
"logps/rejected": -1.0630056858062744,
"loss": 0.9737,
"odds_ratio_loss": 0.7168340682983398,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.09020576626062393,
"rewards/margins": 0.01609480008482933,
"rewards/rejected": -0.10630057752132416,
"sft_loss": 0.9020577669143677,
"step": 360
},
{
"epoch": 0.5981006263891695,
"grad_norm": 0.3642963469028473,
"learning_rate": 4.524731551896978e-06,
"logits/chosen": -0.4040652811527252,
"logits/rejected": -0.39201897382736206,
"logps/chosen": -0.822562038898468,
"logps/rejected": -0.9484196901321411,
"loss": 0.8918,
"odds_ratio_loss": 0.6919523477554321,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.08225620537996292,
"rewards/margins": 0.012585763819515705,
"rewards/rejected": -0.09484197199344635,
"sft_loss": 0.822562038898468,
"step": 370
},
{
"epoch": 0.6142655081834714,
"grad_norm": 0.9358541965484619,
"learning_rate": 4.4995918590781925e-06,
"logits/chosen": -0.41558751463890076,
"logits/rejected": -0.39345669746398926,
"logps/chosen": -0.9379288554191589,
"logps/rejected": -1.0011296272277832,
"loss": 1.0132,
"odds_ratio_loss": 0.7530064582824707,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.0937928855419159,
"rewards/margins": 0.006320066750049591,
"rewards/rejected": -0.10011295974254608,
"sft_loss": 0.9379288554191589,
"step": 380
},
{
"epoch": 0.6304303899777733,
"grad_norm": 0.42754364013671875,
"learning_rate": 4.473877972981797e-06,
"logits/chosen": -0.4294399321079254,
"logits/rejected": -0.48693591356277466,
"logps/chosen": -0.9050455093383789,
"logps/rejected": -1.0990797281265259,
"loss": 0.9681,
"odds_ratio_loss": 0.6305026412010193,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.09050454199314117,
"rewards/margins": 0.019403431564569473,
"rewards/rejected": -0.10990796983242035,
"sft_loss": 0.9050455093383789,
"step": 390
},
{
"epoch": 0.6465952717720752,
"grad_norm": 0.3870018422603607,
"learning_rate": 4.447597277484894e-06,
"logits/chosen": -0.41894254088401794,
"logits/rejected": -0.3863012492656708,
"logps/chosen": -0.9011236429214478,
"logps/rejected": -1.011643648147583,
"loss": 0.971,
"odds_ratio_loss": 0.6992276906967163,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0901123657822609,
"rewards/margins": 0.011052015237510204,
"rewards/rejected": -0.10116437822580338,
"sft_loss": 0.9011236429214478,
"step": 400
},
{
"epoch": 0.6627601535663771,
"grad_norm": 0.6716357469558716,
"learning_rate": 4.42075731922687e-06,
"logits/chosen": -0.381665974855423,
"logits/rejected": -0.40627461671829224,
"logps/chosen": -0.9860145449638367,
"logps/rejected": -1.0734965801239014,
"loss": 1.0559,
"odds_ratio_loss": 0.6987608671188354,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.09860144555568695,
"rewards/margins": 0.008748206309974194,
"rewards/rejected": -0.10734964907169342,
"sft_loss": 0.9860145449638367,
"step": 410
},
{
"epoch": 0.6789250353606789,
"grad_norm": 0.4379284083843231,
"learning_rate": 4.3933658054423465e-06,
"logits/chosen": -0.42450767755508423,
"logits/rejected": -0.4302968978881836,
"logps/chosen": -0.8682054281234741,
"logps/rejected": -1.0158107280731201,
"loss": 0.9348,
"odds_ratio_loss": 0.6656124591827393,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.0868205577135086,
"rewards/margins": 0.014760518446564674,
"rewards/rejected": -0.10158105939626694,
"sft_loss": 0.8682054281234741,
"step": 420
},
{
"epoch": 0.6950899171549808,
"grad_norm": 0.4341568052768707,
"learning_rate": 4.365430601748003e-06,
"logits/chosen": -0.3941816985607147,
"logits/rejected": -0.349882036447525,
"logps/chosen": -0.9646803140640259,
"logps/rejected": -1.0113680362701416,
"loss": 1.0372,
"odds_ratio_loss": 0.7253597974777222,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.09646803140640259,
"rewards/margins": 0.004668788518756628,
"rewards/rejected": -0.10113681852817535,
"sft_loss": 0.9646803140640259,
"step": 430
},
{
"epoch": 0.7112547989492827,
"grad_norm": 1.7109006643295288,
"learning_rate": 4.336959729883925e-06,
"logits/chosen": -0.37049371004104614,
"logits/rejected": -0.3737342953681946,
"logps/chosen": -0.9116461873054504,
"logps/rejected": -0.9422439336776733,
"loss": 0.9849,
"odds_ratio_loss": 0.7329493165016174,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.09116461873054504,
"rewards/margins": 0.0030597783625125885,
"rewards/rejected": -0.09422439336776733,
"sft_loss": 0.9116461873054504,
"step": 440
},
{
"epoch": 0.7274196807435845,
"grad_norm": 0.4295767843723297,
"learning_rate": 4.307961365410118e-06,
"logits/chosen": -0.46054011583328247,
"logits/rejected": -0.4506424069404602,
"logps/chosen": -0.904135525226593,
"logps/rejected": -0.965890109539032,
"loss": 0.9756,
"odds_ratio_loss": 0.7150284051895142,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.09041355550289154,
"rewards/margins": 0.0061754509806632996,
"rewards/rejected": -0.09658900648355484,
"sft_loss": 0.904135525226593,
"step": 450
},
{
"epoch": 0.7435845625378864,
"grad_norm": 0.7776443958282471,
"learning_rate": 4.278443835358854e-06,
"logits/chosen": -0.3951818645000458,
"logits/rejected": -0.4040835499763489,
"logps/chosen": -0.8823555707931519,
"logps/rejected": -1.1062017679214478,
"loss": 0.9449,
"odds_ratio_loss": 0.6257806420326233,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.08823557198047638,
"rewards/margins": 0.022384602576494217,
"rewards/rejected": -0.1106201782822609,
"sft_loss": 0.8823555707931519,
"step": 460
},
{
"epoch": 0.7597494443321883,
"grad_norm": 0.37953025102615356,
"learning_rate": 4.248415615843523e-06,
"logits/chosen": -0.376980721950531,
"logits/rejected": -0.40178006887435913,
"logps/chosen": -0.9119707345962524,
"logps/rejected": -0.9874213933944702,
"loss": 0.9817,
"odds_ratio_loss": 0.6976627111434937,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.09119707345962524,
"rewards/margins": 0.007545073516666889,
"rewards/rejected": -0.09874214231967926,
"sft_loss": 0.9119707345962524,
"step": 470
},
{
"epoch": 0.7759143261264903,
"grad_norm": 0.5314805507659912,
"learning_rate": 4.217885329624666e-06,
"logits/chosen": -0.3499462604522705,
"logits/rejected": -0.33436357975006104,
"logps/chosen": -0.876055121421814,
"logps/rejected": -1.064893126487732,
"loss": 0.9413,
"odds_ratio_loss": 0.6526578068733215,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.08760551363229752,
"rewards/margins": 0.018883811309933662,
"rewards/rejected": -0.10648931562900543,
"sft_loss": 0.876055121421814,
"step": 480
},
{
"epoch": 0.7920792079207921,
"grad_norm": 0.40282461047172546,
"learning_rate": 4.186861743633911e-06,
"logits/chosen": -0.41591471433639526,
"logits/rejected": -0.4058813154697418,
"logps/chosen": -0.8972100019454956,
"logps/rejected": -1.093335509300232,
"loss": 0.9699,
"odds_ratio_loss": 0.7265552282333374,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.08972100913524628,
"rewards/margins": 0.01961255446076393,
"rewards/rejected": -0.10933355987071991,
"sft_loss": 0.8972100019454956,
"step": 490
},
{
"epoch": 0.808244089715094,
"grad_norm": 0.43431738018989563,
"learning_rate": 4.155353766456497e-06,
"logits/chosen": -0.30508697032928467,
"logits/rejected": -0.3136020302772522,
"logps/chosen": -0.9303945302963257,
"logps/rejected": -1.0141643285751343,
"loss": 1.0008,
"odds_ratio_loss": 0.7037394046783447,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.09303945302963257,
"rewards/margins": 0.00837697833776474,
"rewards/rejected": -0.1014164462685585,
"sft_loss": 0.9303945302963257,
"step": 500
},
{
"epoch": 0.808244089715094,
"eval_logits/chosen": -0.3878052830696106,
"eval_logits/rejected": -0.3689490258693695,
"eval_logps/chosen": -0.9066087007522583,
"eval_logps/rejected": -1.0192701816558838,
"eval_loss": 0.9776538014411926,
"eval_odds_ratio_loss": 0.710451602935791,
"eval_rewards/accuracies": 0.5054545402526855,
"eval_rewards/chosen": -0.09066087007522583,
"eval_rewards/margins": 0.011266152374446392,
"eval_rewards/rejected": -0.1019270196557045,
"eval_runtime": 192.2826,
"eval_samples_per_second": 5.721,
"eval_sft_loss": 0.9066087007522583,
"eval_steps_per_second": 2.86,
"step": 500
},
{
"epoch": 0.8244089715093958,
"grad_norm": 0.3983856737613678,
"learning_rate": 4.123370445773134e-06,
"logits/chosen": -0.344710111618042,
"logits/rejected": -0.3169902563095093,
"logps/chosen": -0.8998648524284363,
"logps/rejected": -0.9106130599975586,
"loss": 0.975,
"odds_ratio_loss": 0.7513402700424194,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.08998648822307587,
"rewards/margins": 0.0010748239001259208,
"rewards/rejected": -0.0910613164305687,
"sft_loss": 0.8998648524284363,
"step": 510
},
{
"epoch": 0.8405738533036977,
"grad_norm": 1.2135205268859863,
"learning_rate": 4.090920965761906e-06,
"logits/chosen": -0.3448580205440521,
"logits/rejected": -0.3548375964164734,
"logps/chosen": -0.9812738299369812,
"logps/rejected": -1.0694336891174316,
"loss": 1.0535,
"odds_ratio_loss": 0.7224698662757874,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.09812740236520767,
"rewards/margins": 0.00881598424166441,
"rewards/rejected": -0.10694338381290436,
"sft_loss": 0.9812738299369812,
"step": 520
},
{
"epoch": 0.8567387350979996,
"grad_norm": 0.9130859375,
"learning_rate": 4.058014644460991e-06,
"logits/chosen": -0.34060588479042053,
"logits/rejected": -0.3562433123588562,
"logps/chosen": -0.9648042917251587,
"logps/rejected": -1.0603010654449463,
"loss": 1.032,
"odds_ratio_loss": 0.6720489859580994,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.09648042917251587,
"rewards/margins": 0.009549676440656185,
"rewards/rejected": -0.10603010654449463,
"sft_loss": 0.9648042917251587,
"step": 530
},
{
"epoch": 0.8729036168923014,
"grad_norm": 0.6945879459381104,
"learning_rate": 4.024660931092939e-06,
"logits/chosen": -0.39998704195022583,
"logits/rejected": -0.39360350370407104,
"logps/chosen": -0.8902137875556946,
"logps/rejected": -1.0513432025909424,
"loss": 0.9562,
"odds_ratio_loss": 0.6595617532730103,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.08902137726545334,
"rewards/margins": 0.016112947836518288,
"rewards/rejected": -0.10513432323932648,
"sft_loss": 0.8902137875556946,
"step": 540
},
{
"epoch": 0.8890684986866033,
"grad_norm": 0.45378220081329346,
"learning_rate": 3.990869403351272e-06,
"logits/chosen": -0.3531869053840637,
"logits/rejected": -0.38131508231163025,
"logps/chosen": -0.9068384170532227,
"logps/rejected": -1.065394639968872,
"loss": 0.9704,
"odds_ratio_loss": 0.635545015335083,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.09068384021520615,
"rewards/margins": 0.01585562154650688,
"rewards/rejected": -0.10653946548700333,
"sft_loss": 0.9068384170532227,
"step": 550
},
{
"epoch": 0.9052333804809052,
"grad_norm": 0.5512678623199463,
"learning_rate": 3.956649764650206e-06,
"logits/chosen": -0.29515427350997925,
"logits/rejected": -0.31435275077819824,
"logps/chosen": -0.9203943014144897,
"logps/rejected": -1.0603986978530884,
"loss": 0.9918,
"odds_ratio_loss": 0.7142159938812256,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.09203943610191345,
"rewards/margins": 0.014000418595969677,
"rewards/rejected": -0.106039859354496,
"sft_loss": 0.9203943014144897,
"step": 560
},
{
"epoch": 0.9213982622752072,
"grad_norm": 0.5750080347061157,
"learning_rate": 3.92201184133826e-06,
"logits/chosen": -0.3182484209537506,
"logits/rejected": -0.3164721131324768,
"logps/chosen": -0.8570343255996704,
"logps/rejected": -1.0225125551223755,
"loss": 0.922,
"odds_ratio_loss": 0.6495530009269714,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.08570344746112823,
"rewards/margins": 0.01654782146215439,
"rewards/rejected": -0.10225125402212143,
"sft_loss": 0.8570343255996704,
"step": 570
},
{
"epoch": 0.937563144069509,
"grad_norm": 0.5823240876197815,
"learning_rate": 3.886965579876572e-06,
"logits/chosen": -0.307335764169693,
"logits/rejected": -0.331511914730072,
"logps/chosen": -0.8535898923873901,
"logps/rejected": -0.9173160791397095,
"loss": 0.9234,
"odds_ratio_loss": 0.6983198523521423,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.08535899966955185,
"rewards/margins": 0.006372606847435236,
"rewards/rejected": -0.09173160046339035,
"sft_loss": 0.8535898923873901,
"step": 580
},
{
"epoch": 0.9537280258638109,
"grad_norm": 0.3793308734893799,
"learning_rate": 3.851521043982716e-06,
"logits/chosen": -0.3546546399593353,
"logits/rejected": -0.3105318248271942,
"logps/chosen": -0.9257644414901733,
"logps/rejected": -0.994279682636261,
"loss": 0.9977,
"odds_ratio_loss": 0.7192004919052124,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.09257644414901733,
"rewards/margins": 0.006851526442915201,
"rewards/rejected": -0.0994279757142067,
"sft_loss": 0.9257644414901733,
"step": 590
},
{
"epoch": 0.9698929076581128,
"grad_norm": 0.5789406895637512,
"learning_rate": 3.81568841174086e-06,
"logits/chosen": -0.39430108666419983,
"logits/rejected": -0.38088011741638184,
"logps/chosen": -0.8874362111091614,
"logps/rejected": -1.0097267627716064,
"loss": 0.9592,
"odds_ratio_loss": 0.7179639935493469,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.08874362707138062,
"rewards/margins": 0.012229054234921932,
"rewards/rejected": -0.10097268968820572,
"sft_loss": 0.8874362111091614,
"step": 600
},
{
"epoch": 0.9860577894524146,
"grad_norm": 0.4505593478679657,
"learning_rate": 3.7794779726790664e-06,
"logits/chosen": -0.4197085499763489,
"logits/rejected": -0.3544057607650757,
"logps/chosen": -0.8556501269340515,
"logps/rejected": -0.9688836336135864,
"loss": 0.9233,
"odds_ratio_loss": 0.6760933995246887,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.0855650082230568,
"rewards/margins": 0.011323352344334126,
"rewards/rejected": -0.09688836336135864,
"sft_loss": 0.8556501269340515,
"step": 610
},
{
"epoch": 1.0022226712467166,
"grad_norm": 0.41950830817222595,
"learning_rate": 3.7429001248146096e-06,
"logits/chosen": -0.3560163080692291,
"logits/rejected": -0.32193905115127563,
"logps/chosen": -0.8660818934440613,
"logps/rejected": -1.0638062953948975,
"loss": 0.9302,
"odds_ratio_loss": 0.6412297487258911,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.08660819381475449,
"rewards/margins": 0.019772443920373917,
"rewards/rejected": -0.10638062655925751,
"sft_loss": 0.8660818934440613,
"step": 620
},
{
"epoch": 1.0183875530410185,
"grad_norm": 0.30259978771209717,
"learning_rate": 3.7059653716681227e-06,
"logits/chosen": -0.3218996524810791,
"logits/rejected": -0.3514016568660736,
"logps/chosen": -0.9751222729682922,
"logps/rejected": -1.1278547048568726,
"loss": 1.046,
"odds_ratio_loss": 0.7084661722183228,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.09751223772764206,
"rewards/margins": 0.015273240394890308,
"rewards/rejected": -0.11278548091650009,
"sft_loss": 0.9751222729682922,
"step": 630
},
{
"epoch": 1.0345524348353203,
"grad_norm": 1.449523687362671,
"learning_rate": 3.668684319247463e-06,
"logits/chosen": -0.3402321934700012,
"logits/rejected": -0.3320569396018982,
"logps/chosen": -0.8782706260681152,
"logps/rejected": -1.0504738092422485,
"loss": 0.9434,
"odds_ratio_loss": 0.651136040687561,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.08782706409692764,
"rewards/margins": 0.01722031459212303,
"rewards/rejected": -0.10504738241434097,
"sft_loss": 0.8782706260681152,
"step": 640
},
{
"epoch": 1.0507173166296222,
"grad_norm": 0.36652296781539917,
"learning_rate": 3.6310676730021373e-06,
"logits/chosen": -0.3392433524131775,
"logits/rejected": -0.3268556296825409,
"logps/chosen": -0.8789156079292297,
"logps/rejected": -0.9153023958206177,
"loss": 0.9515,
"odds_ratio_loss": 0.7262720465660095,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.08789155632257462,
"rewards/margins": 0.0036386798601597548,
"rewards/rejected": -0.09153024852275848,
"sft_loss": 0.8789156079292297,
"step": 650
},
{
"epoch": 1.066882198423924,
"grad_norm": 0.42644253373146057,
"learning_rate": 3.593126234749178e-06,
"logits/chosen": -0.35958123207092285,
"logits/rejected": -0.33439984917640686,
"logps/chosen": -0.9317266345024109,
"logps/rejected": -0.9812437891960144,
"loss": 1.004,
"odds_ratio_loss": 0.7226861119270325,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.09317266196012497,
"rewards/margins": 0.0049517154693603516,
"rewards/rejected": -0.09812436997890472,
"sft_loss": 0.9317266345024109,
"step": 660
},
{
"epoch": 1.083047080218226,
"grad_norm": 0.5300435423851013,
"learning_rate": 3.554870899571343e-06,
"logits/chosen": -0.4070967137813568,
"logits/rejected": -0.38338038325309753,
"logps/chosen": -0.9088705778121948,
"logps/rejected": -1.0065948963165283,
"loss": 0.9774,
"odds_ratio_loss": 0.6850352883338928,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.09088706225156784,
"rewards/margins": 0.009772435761988163,
"rewards/rejected": -0.10065948963165283,
"sft_loss": 0.9088705778121948,
"step": 670
},
{
"epoch": 1.0992119620125278,
"grad_norm": 1.5718979835510254,
"learning_rate": 3.5163126526885373e-06,
"logits/chosen": -0.3708317279815674,
"logits/rejected": -0.3510357737541199,
"logps/chosen": -0.8702448606491089,
"logps/rejected": -0.9972399473190308,
"loss": 0.9409,
"odds_ratio_loss": 0.7065256834030151,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.08702448755502701,
"rewards/margins": 0.012699509970843792,
"rewards/rejected": -0.09972399473190308,
"sft_loss": 0.8702448606491089,
"step": 680
},
{
"epoch": 1.1153768438068297,
"grad_norm": 0.31913694739341736,
"learning_rate": 3.4774625663033484e-06,
"logits/chosen": -0.39085036516189575,
"logits/rejected": -0.37611085176467896,
"logps/chosen": -0.8731836080551147,
"logps/rejected": -0.9660570025444031,
"loss": 0.9427,
"odds_ratio_loss": 0.6954530477523804,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.08731836825609207,
"rewards/margins": 0.009287341497838497,
"rewards/rejected": -0.09660570323467255,
"sft_loss": 0.8731836080551147,
"step": 690
},
{
"epoch": 1.1315417256011315,
"grad_norm": 0.5645192265510559,
"learning_rate": 3.4383317964216067e-06,
"logits/chosen": -0.3893832564353943,
"logits/rejected": -0.3442583680152893,
"logps/chosen": -0.870397686958313,
"logps/rejected": -0.9214354753494263,
"loss": 0.9448,
"odds_ratio_loss": 0.7436445355415344,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.0870397686958313,
"rewards/margins": 0.005103783216327429,
"rewards/rejected": -0.09214354306459427,
"sft_loss": 0.870397686958313,
"step": 700
},
{
"epoch": 1.1477066073954334,
"grad_norm": 0.7822654247283936,
"learning_rate": 3.398931579648877e-06,
"logits/chosen": -0.3577522039413452,
"logits/rejected": -0.2890363931655884,
"logps/chosen": -0.9082385301589966,
"logps/rejected": -1.1010273694992065,
"loss": 0.9792,
"odds_ratio_loss": 0.7092560529708862,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.09082385897636414,
"rewards/margins": 0.019278880208730698,
"rewards/rejected": -0.11010273545980453,
"sft_loss": 0.9082385301589966,
"step": 710
},
{
"epoch": 1.1638714891897353,
"grad_norm": 0.6916553974151611,
"learning_rate": 3.359273229963813e-06,
"logits/chosen": -0.33050891757011414,
"logits/rejected": -0.33249133825302124,
"logps/chosen": -0.8524163961410522,
"logps/rejected": -0.9603297114372253,
"loss": 0.9215,
"odds_ratio_loss": 0.6913267374038696,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.0852416455745697,
"rewards/margins": 0.010791336186230183,
"rewards/rejected": -0.09603297710418701,
"sft_loss": 0.8524163961410522,
"step": 720
},
{
"epoch": 1.1800363709840371,
"grad_norm": 0.36541640758514404,
"learning_rate": 3.319368135469285e-06,
"logits/chosen": -0.34484899044036865,
"logits/rejected": -0.3120992183685303,
"logps/chosen": -0.8964350819587708,
"logps/rejected": -1.0409529209136963,
"loss": 0.9665,
"odds_ratio_loss": 0.7009326219558716,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.08964350074529648,
"rewards/margins": 0.014451777562499046,
"rewards/rejected": -0.10409528017044067,
"sft_loss": 0.8964350819587708,
"step": 730
},
{
"epoch": 1.196201252778339,
"grad_norm": 0.5928468704223633,
"learning_rate": 3.279227755122228e-06,
"logits/chosen": -0.359285831451416,
"logits/rejected": -0.3708931505680084,
"logps/chosen": -0.817459225654602,
"logps/rejected": -1.1048064231872559,
"loss": 0.8791,
"odds_ratio_loss": 0.6168545484542847,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.0817459225654602,
"rewards/margins": 0.028734717518091202,
"rewards/rejected": -0.1104806438088417,
"sft_loss": 0.817459225654602,
"step": 740
},
{
"epoch": 1.2123661345726409,
"grad_norm": 0.8944354057312012,
"learning_rate": 3.2388636154431417e-06,
"logits/chosen": -0.32971471548080444,
"logits/rejected": -0.3240662217140198,
"logps/chosen": -0.9531005024909973,
"logps/rejected": -1.1055543422698975,
"loss": 1.0252,
"odds_ratio_loss": 0.7207925319671631,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.09531004726886749,
"rewards/margins": 0.015245395712554455,
"rewards/rejected": -0.11055544763803482,
"sft_loss": 0.9531005024909973,
"step": 750
},
{
"epoch": 1.2285310163669427,
"grad_norm": 0.5451232194900513,
"learning_rate": 3.198287307206192e-06,
"logits/chosen": -0.3906642198562622,
"logits/rejected": -0.36378178000450134,
"logps/chosen": -0.909538745880127,
"logps/rejected": -1.005489706993103,
"loss": 0.9791,
"odds_ratio_loss": 0.6954682469367981,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.09095387905836105,
"rewards/margins": 0.009595084004104137,
"rewards/rejected": -0.10054896771907806,
"sft_loss": 0.909538745880127,
"step": 760
},
{
"epoch": 1.2446958981612446,
"grad_norm": 0.3986392617225647,
"learning_rate": 3.157510482110856e-06,
"logits/chosen": -0.31712478399276733,
"logits/rejected": -0.3332034647464752,
"logps/chosen": -0.8950090408325195,
"logps/rejected": -0.9677726626396179,
"loss": 0.9687,
"odds_ratio_loss": 0.7365735173225403,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.08950088918209076,
"rewards/margins": 0.007276373915374279,
"rewards/rejected": -0.09677727520465851,
"sft_loss": 0.8950090408325195,
"step": 770
},
{
"epoch": 1.2608607799555465,
"grad_norm": 0.9783799648284912,
"learning_rate": 3.116544849436077e-06,
"logits/chosen": -0.3367740213871002,
"logits/rejected": -0.3552953600883484,
"logps/chosen": -0.9589813351631165,
"logps/rejected": -1.1763808727264404,
"loss": 1.0263,
"odds_ratio_loss": 0.6732120513916016,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.09589814394712448,
"rewards/margins": 0.02173994854092598,
"rewards/rejected": -0.11763808876276016,
"sft_loss": 0.9589813351631165,
"step": 780
},
{
"epoch": 1.2770256617498483,
"grad_norm": 0.3939819931983948,
"learning_rate": 3.0754021726778848e-06,
"logits/chosen": -0.3505743741989136,
"logits/rejected": -0.37322431802749634,
"logps/chosen": -0.83990079164505,
"logps/rejected": -1.0232980251312256,
"loss": 0.9049,
"odds_ratio_loss": 0.6501890420913696,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.08399007469415665,
"rewards/margins": 0.018339723348617554,
"rewards/rejected": -0.1023297905921936,
"sft_loss": 0.83990079164505,
"step": 790
},
{
"epoch": 1.2931905435441502,
"grad_norm": 0.35344642400741577,
"learning_rate": 3.0340942661714463e-06,
"logits/chosen": -0.3435738980770111,
"logits/rejected": -0.36761245131492615,
"logps/chosen": -0.9316965341567993,
"logps/rejected": -1.0095479488372803,
"loss": 1.003,
"odds_ratio_loss": 0.7125651836395264,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.09316965192556381,
"rewards/margins": 0.007785154972225428,
"rewards/rejected": -0.1009548082947731,
"sft_loss": 0.9316965341567993,
"step": 800
},
{
"epoch": 1.3093554253384523,
"grad_norm": 0.4086878001689911,
"learning_rate": 2.992632991698512e-06,
"logits/chosen": -0.39886465668678284,
"logits/rejected": -0.3849073350429535,
"logps/chosen": -0.9022181630134583,
"logps/rejected": -1.0039399862289429,
"loss": 0.9729,
"odds_ratio_loss": 0.7066690325737,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.09022180736064911,
"rewards/margins": 0.010172189213335514,
"rewards/rejected": -0.10039399564266205,
"sft_loss": 0.9022181630134583,
"step": 810
},
{
"epoch": 1.3255203071327541,
"grad_norm": 0.45464497804641724,
"learning_rate": 2.9510302550812537e-06,
"logits/chosen": -0.3623855710029602,
"logits/rejected": -0.31726986169815063,
"logps/chosen": -0.8218330144882202,
"logps/rejected": -1.0319081544876099,
"loss": 0.8851,
"odds_ratio_loss": 0.6329900026321411,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.08218331634998322,
"rewards/margins": 0.02100750431418419,
"rewards/rejected": -0.1031908169388771,
"sft_loss": 0.8218330144882202,
"step": 820
},
{
"epoch": 1.341685188927056,
"grad_norm": 1.1504096984863281,
"learning_rate": 2.9092980027634325e-06,
"logits/chosen": -0.38953226804733276,
"logits/rejected": -0.3612954914569855,
"logps/chosen": -0.8214972615242004,
"logps/rejected": -0.9684427976608276,
"loss": 0.8864,
"odds_ratio_loss": 0.6492589712142944,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.08214972913265228,
"rewards/margins": 0.014694547280669212,
"rewards/rejected": -0.09684427082538605,
"sft_loss": 0.8214972615242004,
"step": 830
},
{
"epoch": 1.3578500707213579,
"grad_norm": 0.33391210436820984,
"learning_rate": 2.867448218379927e-06,
"logits/chosen": -0.3767167627811432,
"logits/rejected": -0.3566213548183441,
"logps/chosen": -0.9622126817703247,
"logps/rejected": -1.030574083328247,
"loss": 1.0363,
"odds_ratio_loss": 0.7405400276184082,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.09622127562761307,
"rewards/margins": 0.006836143787950277,
"rewards/rejected": -0.10305740684270859,
"sft_loss": 0.9622126817703247,
"step": 840
},
{
"epoch": 1.3740149525156597,
"grad_norm": 1.2477465867996216,
"learning_rate": 2.825492919315559e-06,
"logits/chosen": -0.3341541886329651,
"logits/rejected": -0.28563547134399414,
"logps/chosen": -0.9898349046707153,
"logps/rejected": -0.9626699686050415,
"loss": 1.0687,
"odds_ratio_loss": 0.7890844345092773,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.09898348897695541,
"rewards/margins": -0.0027165021747350693,
"rewards/rejected": -0.0962669849395752,
"sft_loss": 0.9898349046707153,
"step": 850
},
{
"epoch": 1.3901798343099616,
"grad_norm": 0.37100037932395935,
"learning_rate": 2.7834441532542482e-06,
"logits/chosen": -0.3620319366455078,
"logits/rejected": -0.3429003357887268,
"logps/chosen": -0.8693292737007141,
"logps/rejected": -0.991874098777771,
"loss": 0.9379,
"odds_ratio_loss": 0.6856324076652527,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.08693292737007141,
"rewards/margins": 0.012254483997821808,
"rewards/rejected": -0.09918741136789322,
"sft_loss": 0.8693292737007141,
"step": 860
},
{
"epoch": 1.4063447161042635,
"grad_norm": 1.2096267938613892,
"learning_rate": 2.74131399471945e-06,
"logits/chosen": -0.3446846306324005,
"logits/rejected": -0.3061850666999817,
"logps/chosen": -0.9667361974716187,
"logps/rejected": -1.053593397140503,
"loss": 1.0382,
"odds_ratio_loss": 0.714438796043396,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.0966736227273941,
"rewards/margins": 0.008685723878443241,
"rewards/rejected": -0.10535935312509537,
"sft_loss": 0.9667361974716187,
"step": 870
},
{
"epoch": 1.4225095978985653,
"grad_norm": 0.47893857955932617,
"learning_rate": 2.6991145416068947e-06,
"logits/chosen": -0.3955840468406677,
"logits/rejected": -0.31594154238700867,
"logps/chosen": -0.9019123315811157,
"logps/rejected": -0.9488536715507507,
"loss": 0.9734,
"odds_ratio_loss": 0.7147491574287415,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.09019123762845993,
"rewards/margins": 0.0046941377222537994,
"rewards/rejected": -0.09488537907600403,
"sft_loss": 0.9019123315811157,
"step": 880
},
{
"epoch": 1.4386744796928672,
"grad_norm": 0.2868447005748749,
"learning_rate": 2.6568579117106143e-06,
"logits/chosen": -0.4024140238761902,
"logits/rejected": -0.4033503532409668,
"logps/chosen": -0.8388016819953918,
"logps/rejected": -0.9728044271469116,
"loss": 0.9081,
"odds_ratio_loss": 0.6926370859146118,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.08388017117977142,
"rewards/margins": 0.013400280848145485,
"rewards/rejected": -0.09728045761585236,
"sft_loss": 0.8388016819953918,
"step": 890
},
{
"epoch": 1.454839361487169,
"grad_norm": 0.24462518095970154,
"learning_rate": 2.6145562392432544e-06,
"logits/chosen": -0.3949779272079468,
"logits/rejected": -0.39668601751327515,
"logps/chosen": -0.8613153696060181,
"logps/rejected": -0.9795036315917969,
"loss": 0.9305,
"odds_ratio_loss": 0.6919496059417725,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.08613153547048569,
"rewards/margins": 0.011818833649158478,
"rewards/rejected": -0.09795036166906357,
"sft_loss": 0.8613153696060181,
"step": 900
},
{
"epoch": 1.471004243281471,
"grad_norm": 0.5152093768119812,
"learning_rate": 2.5722216713516682e-06,
"logits/chosen": -0.42058199644088745,
"logits/rejected": -0.38909250497817993,
"logps/chosen": -0.8609904050827026,
"logps/rejected": -0.9690335988998413,
"loss": 0.9318,
"odds_ratio_loss": 0.7082633972167969,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.0860990509390831,
"rewards/margins": 0.010804320685565472,
"rewards/rejected": -0.09690337628126144,
"sft_loss": 0.8609904050827026,
"step": 910
},
{
"epoch": 1.4871691250757728,
"grad_norm": 0.5419692397117615,
"learning_rate": 2.5298663646288064e-06,
"logits/chosen": -0.35978519916534424,
"logits/rejected": -0.35384541749954224,
"logps/chosen": -0.8710163235664368,
"logps/rejected": -1.0426474809646606,
"loss": 0.9373,
"odds_ratio_loss": 0.6623716354370117,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.08710163086652756,
"rewards/margins": 0.017163105309009552,
"rewards/rejected": -0.10426473617553711,
"sft_loss": 0.8710163235664368,
"step": 920
},
{
"epoch": 1.503334006870075,
"grad_norm": 3.1488473415374756,
"learning_rate": 2.487502481622879e-06,
"logits/chosen": -0.4146711230278015,
"logits/rejected": -0.40715789794921875,
"logps/chosen": -0.9579635858535767,
"logps/rejected": -1.0180439949035645,
"loss": 1.0298,
"odds_ratio_loss": 0.718089759349823,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.0957963615655899,
"rewards/margins": 0.006008026655763388,
"rewards/rejected": -0.10180439800024033,
"sft_loss": 0.9579635858535767,
"step": 930
},
{
"epoch": 1.5194988886643768,
"grad_norm": 0.6520385146141052,
"learning_rate": 2.4451421873448253e-06,
"logits/chosen": -0.3981381952762604,
"logits/rejected": -0.33850008249282837,
"logps/chosen": -0.9044814109802246,
"logps/rejected": -0.9930024147033691,
"loss": 0.9767,
"odds_ratio_loss": 0.7225072979927063,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.09044814109802246,
"rewards/margins": 0.008852103725075722,
"rewards/rejected": -0.09930024296045303,
"sft_loss": 0.9044814109802246,
"step": 940
},
{
"epoch": 1.5356637704586786,
"grad_norm": 0.5775251984596252,
"learning_rate": 2.40279764577506e-06,
"logits/chosen": -0.36691075563430786,
"logits/rejected": -0.31715118885040283,
"logps/chosen": -0.9193195104598999,
"logps/rejected": -0.9655280113220215,
"loss": 0.9919,
"odds_ratio_loss": 0.7258428931236267,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.09193196147680283,
"rewards/margins": 0.004620848223567009,
"rewards/rejected": -0.09655280411243439,
"sft_loss": 0.9193195104598999,
"step": 950
},
{
"epoch": 1.5518286522529805,
"grad_norm": 0.4706912636756897,
"learning_rate": 2.3604810163705242e-06,
"logits/chosen": -0.3801175355911255,
"logits/rejected": -0.34497779607772827,
"logps/chosen": -0.8502659797668457,
"logps/rejected": -0.9808200597763062,
"loss": 0.9153,
"odds_ratio_loss": 0.6503497362136841,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.08502660691738129,
"rewards/margins": 0.013055416755378246,
"rewards/rejected": -0.09808202087879181,
"sft_loss": 0.8502659797668457,
"step": 960
},
{
"epoch": 1.5679935340472824,
"grad_norm": 0.8772755265235901,
"learning_rate": 2.3182044505730364e-06,
"logits/chosen": -0.3701505661010742,
"logits/rejected": -0.3588781952857971,
"logps/chosen": -0.8278260231018066,
"logps/rejected": -0.9880140423774719,
"loss": 0.8943,
"odds_ratio_loss": 0.6643026471138,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.08278260380029678,
"rewards/margins": 0.016018804162740707,
"rewards/rejected": -0.09880141168832779,
"sft_loss": 0.8278260231018066,
"step": 970
},
{
"epoch": 1.5841584158415842,
"grad_norm": 0.5644322633743286,
"learning_rate": 2.275980088319941e-06,
"logits/chosen": -0.37429267168045044,
"logits/rejected": -0.38965049386024475,
"logps/chosen": -0.830912709236145,
"logps/rejected": -0.931898295879364,
"loss": 0.901,
"odds_ratio_loss": 0.7011361122131348,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.08309127390384674,
"rewards/margins": 0.010098553262650967,
"rewards/rejected": -0.09318983554840088,
"sft_loss": 0.830912709236145,
"step": 980
},
{
"epoch": 1.600323297635886,
"grad_norm": 0.7188877463340759,
"learning_rate": 2.2338200545580577e-06,
"logits/chosen": -0.387838214635849,
"logits/rejected": -0.3446332514286041,
"logps/chosen": -0.8468879461288452,
"logps/rejected": -1.0357553958892822,
"loss": 0.9171,
"odds_ratio_loss": 0.7018327713012695,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.08468880504369736,
"rewards/margins": 0.018886741250753403,
"rewards/rejected": -0.10357554256916046,
"sft_loss": 0.8468879461288452,
"step": 990
},
{
"epoch": 1.616488179430188,
"grad_norm": 0.40455734729766846,
"learning_rate": 2.191736455761947e-06,
"logits/chosen": -0.32430940866470337,
"logits/rejected": -0.3191392719745636,
"logps/chosen": -0.7817317247390747,
"logps/rejected": -0.8739973306655884,
"loss": 0.8458,
"odds_ratio_loss": 0.6406995058059692,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.07817317545413971,
"rewards/margins": 0.009226562455296516,
"rewards/rejected": -0.08739973604679108,
"sft_loss": 0.7817317247390747,
"step": 1000
},
{
"epoch": 1.616488179430188,
"eval_logits/chosen": -0.3771926760673523,
"eval_logits/rejected": -0.3578239679336548,
"eval_logps/chosen": -0.8850269317626953,
"eval_logps/rejected": -0.9999891519546509,
"eval_loss": 0.9560017585754395,
"eval_odds_ratio_loss": 0.7097483277320862,
"eval_rewards/accuracies": 0.5190908908843994,
"eval_rewards/chosen": -0.08850269019603729,
"eval_rewards/margins": 0.0114962263032794,
"eval_rewards/rejected": -0.09999892115592957,
"eval_runtime": 192.1461,
"eval_samples_per_second": 5.725,
"eval_sft_loss": 0.8850269317626953,
"eval_steps_per_second": 2.862,
"step": 1000
},
{
"epoch": 1.6326530612244898,
"grad_norm": 0.3581576347351074,
"learning_rate": 2.1497413764574673e-06,
"logits/chosen": -0.31272074580192566,
"logits/rejected": -0.33244556188583374,
"logps/chosen": -0.9109123349189758,
"logps/rejected": -1.0614047050476074,
"loss": 0.9769,
"odds_ratio_loss": 0.6601108908653259,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.09109123051166534,
"rewards/margins": 0.01504923403263092,
"rewards/rejected": -0.10614047199487686,
"sft_loss": 0.9109123349189758,
"step": 1010
},
{
"epoch": 1.6488179430187917,
"grad_norm": 1.0781522989273071,
"learning_rate": 2.1078468757516395e-06,
"logits/chosen": -0.3577747941017151,
"logits/rejected": -0.372037798166275,
"logps/chosen": -0.8666743040084839,
"logps/rejected": -0.9286467432975769,
"loss": 0.943,
"odds_ratio_loss": 0.7631633877754211,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.08666743338108063,
"rewards/margins": 0.006197246722877026,
"rewards/rejected": -0.09286467730998993,
"sft_loss": 0.8666743040084839,
"step": 1020
},
{
"epoch": 1.6649828248130936,
"grad_norm": 0.4093440771102905,
"learning_rate": 2.0660649838698145e-06,
"logits/chosen": -0.24239635467529297,
"logits/rejected": -0.2550283670425415,
"logps/chosen": -0.8779211044311523,
"logps/rejected": -1.028240442276001,
"loss": 0.9471,
"odds_ratio_loss": 0.691811203956604,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.08779212832450867,
"rewards/margins": 0.015031938441097736,
"rewards/rejected": -0.10282406955957413,
"sft_loss": 0.8779211044311523,
"step": 1030
},
{
"epoch": 1.6811477066073954,
"grad_norm": 0.4143465459346771,
"learning_rate": 2.0244076987011284e-06,
"logits/chosen": -0.320882648229599,
"logits/rejected": -0.35348570346832275,
"logps/chosen": -0.9102975726127625,
"logps/rejected": -1.0311200618743896,
"loss": 0.9776,
"odds_ratio_loss": 0.6728986501693726,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.09102976322174072,
"rewards/margins": 0.012082245200872421,
"rewards/rejected": -0.10311201959848404,
"sft_loss": 0.9102975726127625,
"step": 1040
},
{
"epoch": 1.6973125884016973,
"grad_norm": 0.4322679340839386,
"learning_rate": 1.982886982353251e-06,
"logits/chosen": -0.33857375383377075,
"logits/rejected": -0.38647031784057617,
"logps/chosen": -0.8801182508468628,
"logps/rejected": -1.0462461709976196,
"loss": 0.9472,
"odds_ratio_loss": 0.6703814268112183,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.08801182359457016,
"rewards/margins": 0.016612788662314415,
"rewards/rejected": -0.10462461411952972,
"sft_loss": 0.8801182508468628,
"step": 1050
},
{
"epoch": 1.7134774701959992,
"grad_norm": 0.40310564637184143,
"learning_rate": 1.941514757717392e-06,
"logits/chosen": -0.3961712718009949,
"logits/rejected": -0.3599357604980469,
"logps/chosen": -0.857568621635437,
"logps/rejected": -1.0133601427078247,
"loss": 0.921,
"odds_ratio_loss": 0.6347678899765015,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.08575686812400818,
"rewards/margins": 0.015579144470393658,
"rewards/rejected": -0.10133601725101471,
"sft_loss": 0.857568621635437,
"step": 1060
},
{
"epoch": 1.729642351990301,
"grad_norm": 0.5565314888954163,
"learning_rate": 1.9003029050445953e-06,
"logits/chosen": -0.3478461802005768,
"logits/rejected": -0.3207647204399109,
"logps/chosen": -0.9041654467582703,
"logps/rejected": -0.99024897813797,
"loss": 0.9734,
"odds_ratio_loss": 0.6924456357955933,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.0904165506362915,
"rewards/margins": 0.008608358912169933,
"rewards/rejected": -0.09902490675449371,
"sft_loss": 0.9041654467582703,
"step": 1070
},
{
"epoch": 1.745807233784603,
"grad_norm": 0.4490904211997986,
"learning_rate": 1.8592632585342523e-06,
"logits/chosen": -0.36072981357574463,
"logits/rejected": -0.3492718040943146,
"logps/chosen": -0.8714792132377625,
"logps/rejected": -1.010517954826355,
"loss": 0.9396,
"odds_ratio_loss": 0.6810620427131653,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.08714792132377625,
"rewards/margins": 0.013903876766562462,
"rewards/rejected": -0.10105180740356445,
"sft_loss": 0.8714792132377625,
"step": 1080
},
{
"epoch": 1.7619721155789048,
"grad_norm": 0.71334308385849,
"learning_rate": 1.8184076029358527e-06,
"logits/chosen": -0.3724268078804016,
"logits/rejected": -0.40728870034217834,
"logps/chosen": -0.8329513669013977,
"logps/rejected": -0.8585556745529175,
"loss": 0.9053,
"odds_ratio_loss": 0.723603367805481,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08329514414072037,
"rewards/margins": 0.002560428809374571,
"rewards/rejected": -0.08585558086633682,
"sft_loss": 0.8329513669013977,
"step": 1090
},
{
"epoch": 1.7781369973732066,
"grad_norm": 0.38024160265922546,
"learning_rate": 1.7777476701649318e-06,
"logits/chosen": -0.4104040563106537,
"logits/rejected": -0.40031394362449646,
"logps/chosen": -0.9076647758483887,
"logps/rejected": -1.019285798072815,
"loss": 0.9752,
"odds_ratio_loss": 0.6755737662315369,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.09076648205518723,
"rewards/margins": 0.011162097565829754,
"rewards/rejected": -0.10192857682704926,
"sft_loss": 0.9076647758483887,
"step": 1100
},
{
"epoch": 1.7943018791675085,
"grad_norm": 0.433108389377594,
"learning_rate": 1.7372951359341925e-06,
"logits/chosen": -0.35082167387008667,
"logits/rejected": -0.3622151017189026,
"logps/chosen": -0.8306609988212585,
"logps/rejected": -0.9346961975097656,
"loss": 0.8994,
"odds_ratio_loss": 0.6869168281555176,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.08306611329317093,
"rewards/margins": 0.010403511114418507,
"rewards/rejected": -0.09346961975097656,
"sft_loss": 0.8306609988212585,
"step": 1110
},
{
"epoch": 1.8104667609618104,
"grad_norm": 1.0182783603668213,
"learning_rate": 1.6970616164007547e-06,
"logits/chosen": -0.4078885614871979,
"logits/rejected": -0.43148526549339294,
"logps/chosen": -0.8258237838745117,
"logps/rejected": -0.9274940490722656,
"loss": 0.8967,
"odds_ratio_loss": 0.7091785073280334,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.08258237689733505,
"rewards/margins": 0.010167025960981846,
"rewards/rejected": -0.09274940937757492,
"sft_loss": 0.8258237838745117,
"step": 1120
},
{
"epoch": 1.8266316427561122,
"grad_norm": 1.0357805490493774,
"learning_rate": 1.6570586648305276e-06,
"logits/chosen": -0.4377085268497467,
"logits/rejected": -0.407601922750473,
"logps/chosen": -0.8756824731826782,
"logps/rejected": -1.0340659618377686,
"loss": 0.9437,
"odds_ratio_loss": 0.6799197793006897,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.0875682383775711,
"rewards/margins": 0.01583835855126381,
"rewards/rejected": -0.10340659320354462,
"sft_loss": 0.8756824731826782,
"step": 1130
},
{
"epoch": 1.842796524550414,
"grad_norm": 0.4784797430038452,
"learning_rate": 1.6172977682806151e-06,
"logits/chosen": -0.3374441862106323,
"logits/rejected": -0.2926723062992096,
"logps/chosen": -0.8671070337295532,
"logps/rejected": -1.0173355340957642,
"loss": 0.9326,
"odds_ratio_loss": 0.6546159982681274,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.08671069890260696,
"rewards/margins": 0.015022864565253258,
"rewards/rejected": -0.10173355042934418,
"sft_loss": 0.8671070337295532,
"step": 1140
},
{
"epoch": 1.858961406344716,
"grad_norm": 0.5492507219314575,
"learning_rate": 1.5777903443007586e-06,
"logits/chosen": -0.3145988881587982,
"logits/rejected": -0.42871540784835815,
"logps/chosen": -0.8989070057868958,
"logps/rejected": -1.0172455310821533,
"loss": 0.9689,
"odds_ratio_loss": 0.6998150944709778,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.08989070355892181,
"rewards/margins": 0.011833854019641876,
"rewards/rejected": -0.1017245501279831,
"sft_loss": 0.8989070057868958,
"step": 1150
},
{
"epoch": 1.8751262881390178,
"grad_norm": 0.4275898039340973,
"learning_rate": 1.5385477376547226e-06,
"logits/chosen": -0.3347630202770233,
"logits/rejected": -0.34142249822616577,
"logps/chosen": -0.9212555885314941,
"logps/rejected": -1.0021544694900513,
"loss": 0.9893,
"odds_ratio_loss": 0.679952085018158,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.0921255499124527,
"rewards/margins": 0.008089900948107243,
"rewards/rejected": -0.10021545737981796,
"sft_loss": 0.9212555885314941,
"step": 1160
},
{
"epoch": 1.89129116993332,
"grad_norm": 0.5769237875938416,
"learning_rate": 1.4995812170625845e-06,
"logits/chosen": -0.3509088456630707,
"logits/rejected": -0.35828500986099243,
"logps/chosen": -0.8898354768753052,
"logps/rejected": -1.1126220226287842,
"loss": 0.9543,
"odds_ratio_loss": 0.6445311307907104,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.08898355811834335,
"rewards/margins": 0.02227865532040596,
"rewards/rejected": -0.11126221716403961,
"sft_loss": 0.8898354768753052,
"step": 1170
},
{
"epoch": 1.9074560517276218,
"grad_norm": 0.9893414974212646,
"learning_rate": 1.4609019719648666e-06,
"logits/chosen": -0.34388267993927,
"logits/rejected": -0.34255415201187134,
"logps/chosen": -0.9129988551139832,
"logps/rejected": -1.0511752367019653,
"loss": 0.9778,
"odds_ratio_loss": 0.6484531760215759,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.0912998765707016,
"rewards/margins": 0.013817653059959412,
"rewards/rejected": -0.10511753708124161,
"sft_loss": 0.9129988551139832,
"step": 1180
},
{
"epoch": 1.9236209335219236,
"grad_norm": 0.8161694407463074,
"learning_rate": 1.42252110930943e-06,
"logits/chosen": -0.3889426589012146,
"logits/rejected": -0.37780189514160156,
"logps/chosen": -0.8312114477157593,
"logps/rejected": -0.9597098231315613,
"loss": 0.8972,
"odds_ratio_loss": 0.6594355702400208,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.08312113583087921,
"rewards/margins": 0.01284984964877367,
"rewards/rejected": -0.0959709957242012,
"sft_loss": 0.8312114477157593,
"step": 1190
},
{
"epoch": 1.9397858153162255,
"grad_norm": 0.6737188100814819,
"learning_rate": 1.3844496503620493e-06,
"logits/chosen": -0.34721988439559937,
"logits/rejected": -0.29065969586372375,
"logps/chosen": -0.8556321263313293,
"logps/rejected": -0.9435693621635437,
"loss": 0.9217,
"odds_ratio_loss": 0.6608615517616272,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.08556319773197174,
"rewards/margins": 0.008793738670647144,
"rewards/rejected": -0.09435693919658661,
"sft_loss": 0.8556321263313293,
"step": 1200
},
{
"epoch": 1.9559506971105274,
"grad_norm": 1.0895054340362549,
"learning_rate": 1.3466985275416081e-06,
"logits/chosen": -0.38311949372291565,
"logits/rejected": -0.440490186214447,
"logps/chosen": -0.9350228309631348,
"logps/rejected": -1.0175323486328125,
"loss": 1.0086,
"odds_ratio_loss": 0.7355881929397583,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.09350229054689407,
"rewards/margins": 0.008250946179032326,
"rewards/rejected": -0.10175323486328125,
"sft_loss": 0.9350228309631348,
"step": 1210
},
{
"epoch": 1.9721155789048292,
"grad_norm": 0.7546266913414001,
"learning_rate": 1.309278581280791e-06,
"logits/chosen": -0.32461339235305786,
"logits/rejected": -0.38296985626220703,
"logps/chosen": -0.825161337852478,
"logps/rejected": -1.007612943649292,
"loss": 0.8897,
"odds_ratio_loss": 0.6452582478523254,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.082516148686409,
"rewards/margins": 0.018245156854391098,
"rewards/rejected": -0.1007612943649292,
"sft_loss": 0.825161337852478,
"step": 1220
},
{
"epoch": 1.9882804606991311,
"grad_norm": 0.30651387572288513,
"learning_rate": 1.272200556913199e-06,
"logits/chosen": -0.34240493178367615,
"logits/rejected": -0.33365195989608765,
"logps/chosen": -0.9005836248397827,
"logps/rejected": -1.0132153034210205,
"loss": 0.9729,
"odds_ratio_loss": 0.722726583480835,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.09005837142467499,
"rewards/margins": 0.011263175867497921,
"rewards/rejected": -0.10132155567407608,
"sft_loss": 0.9005836248397827,
"step": 1230
},
{
"epoch": 2.004445342493433,
"grad_norm": 0.6191690564155579,
"learning_rate": 1.2354751015877698e-06,
"logits/chosen": -0.3653295636177063,
"logits/rejected": -0.3104439675807953,
"logps/chosen": -0.8316798210144043,
"logps/rejected": -1.0361697673797607,
"loss": 0.8947,
"odds_ratio_loss": 0.6298761963844299,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.08316798508167267,
"rewards/margins": 0.02044900692999363,
"rewards/rejected": -0.10361699759960175,
"sft_loss": 0.8316798210144043,
"step": 1240
},
{
"epoch": 2.020610224287735,
"grad_norm": 0.752289354801178,
"learning_rate": 1.1991127612113945e-06,
"logits/chosen": -0.3582732379436493,
"logits/rejected": -0.3034323751926422,
"logps/chosen": -0.8952615857124329,
"logps/rejected": -1.0291544198989868,
"loss": 0.9609,
"odds_ratio_loss": 0.6566318869590759,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.08952615410089493,
"rewards/margins": 0.013389283791184425,
"rewards/rejected": -0.1029154434800148,
"sft_loss": 0.8952615857124329,
"step": 1250
},
{
"epoch": 2.036775106082037,
"grad_norm": 0.5910158753395081,
"learning_rate": 1.1631239774206035e-06,
"logits/chosen": -0.36862578988075256,
"logits/rejected": -0.3653218150138855,
"logps/chosen": -0.8613477945327759,
"logps/rejected": -0.9755401611328125,
"loss": 0.9325,
"odds_ratio_loss": 0.7117538452148438,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.08613476902246475,
"rewards/margins": 0.011419234797358513,
"rewards/rejected": -0.09755401313304901,
"sft_loss": 0.8613477945327759,
"step": 1260
},
{
"epoch": 2.052939987876339,
"grad_norm": 0.5977714657783508,
"learning_rate": 1.1275190845831978e-06,
"logits/chosen": -0.35793787240982056,
"logits/rejected": -0.3579494059085846,
"logps/chosen": -0.8839446902275085,
"logps/rejected": -1.0555723905563354,
"loss": 0.9484,
"odds_ratio_loss": 0.6443823575973511,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.08839447796344757,
"rewards/margins": 0.01716277375817299,
"rewards/rejected": -0.10555724799633026,
"sft_loss": 0.8839446902275085,
"step": 1270
},
{
"epoch": 2.0691048696706407,
"grad_norm": 0.4356369078159332,
"learning_rate": 1.0923083068306778e-06,
"logits/chosen": -0.2889194190502167,
"logits/rejected": -0.39258915185928345,
"logps/chosen": -0.8745051622390747,
"logps/rejected": -1.061402678489685,
"loss": 0.94,
"odds_ratio_loss": 0.6551867723464966,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.08745051920413971,
"rewards/margins": 0.018689759075641632,
"rewards/rejected": -0.10614027827978134,
"sft_loss": 0.8745051622390747,
"step": 1280
},
{
"epoch": 2.0852697514649425,
"grad_norm": 0.2981340289115906,
"learning_rate": 1.0575017551223348e-06,
"logits/chosen": -0.39015138149261475,
"logits/rejected": -0.40903449058532715,
"logps/chosen": -0.7750725746154785,
"logps/rejected": -0.9115964770317078,
"loss": 0.8412,
"odds_ratio_loss": 0.6609454154968262,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.07750725001096725,
"rewards/margins": 0.013652404770255089,
"rewards/rejected": -0.09115965664386749,
"sft_loss": 0.7750725746154785,
"step": 1290
},
{
"epoch": 2.1014346332592444,
"grad_norm": 0.39186251163482666,
"learning_rate": 1.023109424341833e-06,
"logits/chosen": -0.3986419141292572,
"logits/rejected": -0.36254242062568665,
"logps/chosen": -0.8747810125350952,
"logps/rejected": -0.9971181750297546,
"loss": 0.9444,
"odds_ratio_loss": 0.6959220170974731,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.08747810870409012,
"rewards/margins": 0.012233709916472435,
"rewards/rejected": -0.0997118204832077,
"sft_loss": 0.8747810125350952,
"step": 1300
},
{
"epoch": 2.1175995150535463,
"grad_norm": 0.4826388359069824,
"learning_rate": 9.891411904271273e-07,
"logits/chosen": -0.3570977747440338,
"logits/rejected": -0.34066206216812134,
"logps/chosen": -0.8385666608810425,
"logps/rejected": -0.9865023493766785,
"loss": 0.9076,
"odds_ratio_loss": 0.6902373433113098,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.08385667204856873,
"rewards/margins": 0.014793576672673225,
"rewards/rejected": -0.0986502468585968,
"sft_loss": 0.8385666608810425,
"step": 1310
},
{
"epoch": 2.133764396847848,
"grad_norm": 0.3553561866283417,
"learning_rate": 9.556068075345363e-07,
"logits/chosen": -0.28917670249938965,
"logits/rejected": -0.3470838665962219,
"logps/chosen": -0.8463741540908813,
"logps/rejected": -0.9492172002792358,
"loss": 0.9162,
"odds_ratio_loss": 0.6985523104667664,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.08463741838932037,
"rewards/margins": 0.01028431672602892,
"rewards/rejected": -0.09492173045873642,
"sft_loss": 0.8463741540908813,
"step": 1320
},
{
"epoch": 2.14992927864215,
"grad_norm": 0.3664523959159851,
"learning_rate": 9.225159052377838e-07,
"logits/chosen": -0.3276691436767578,
"logits/rejected": -0.3102811872959137,
"logps/chosen": -0.9000975489616394,
"logps/rejected": -1.0900113582611084,
"loss": 0.9658,
"odds_ratio_loss": 0.6572277545928955,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.09000976383686066,
"rewards/margins": 0.018991392105817795,
"rewards/rejected": -0.10900114476680756,
"sft_loss": 0.9000975489616394,
"step": 1330
},
{
"epoch": 2.166094160436452,
"grad_norm": 0.5697169899940491,
"learning_rate": 8.898779857628184e-07,
"logits/chosen": -0.35697469115257263,
"logits/rejected": -0.29451218247413635,
"logps/chosen": -0.7642744779586792,
"logps/rejected": -0.8856114149093628,
"loss": 0.8306,
"odds_ratio_loss": 0.6628420948982239,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.07642744481563568,
"rewards/margins": 0.012133700773119926,
"rewards/rejected": -0.08856116235256195,
"sft_loss": 0.7642744779586792,
"step": 1340
},
{
"epoch": 2.1822590422307537,
"grad_norm": 1.7151192426681519,
"learning_rate": 8.577024212591975e-07,
"logits/chosen": -0.29253047704696655,
"logits/rejected": -0.3413800001144409,
"logps/chosen": -0.8930098414421082,
"logps/rejected": -0.9748668670654297,
"loss": 0.9639,
"odds_ratio_loss": 0.708949089050293,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.0893009752035141,
"rewards/margins": 0.008185721933841705,
"rewards/rejected": -0.09748668968677521,
"sft_loss": 0.8930098414421082,
"step": 1350
},
{
"epoch": 2.1984239240250556,
"grad_norm": 0.49061620235443115,
"learning_rate": 8.259984511088276e-07,
"logits/chosen": -0.3223104476928711,
"logits/rejected": -0.29760584235191345,
"logps/chosen": -0.8736541867256165,
"logps/rejected": -0.9874069094657898,
"loss": 0.9451,
"odds_ratio_loss": 0.7148812413215637,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.08736542612314224,
"rewards/margins": 0.011375268921256065,
"rewards/rejected": -0.09874069690704346,
"sft_loss": 0.8736541867256165,
"step": 1360
},
{
"epoch": 2.2145888058193575,
"grad_norm": 0.33556151390075684,
"learning_rate": 7.947751792728237e-07,
"logits/chosen": -0.3239595890045166,
"logits/rejected": -0.34610220789909363,
"logps/chosen": -0.8864496946334839,
"logps/rejected": -1.0747450590133667,
"loss": 0.9559,
"odds_ratio_loss": 0.694658637046814,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.08864498138427734,
"rewards/margins": 0.018829550594091415,
"rewards/rejected": -0.10747452825307846,
"sft_loss": 0.8864496946334839,
"step": 1370
},
{
"epoch": 2.2307536876136593,
"grad_norm": 0.5993340611457825,
"learning_rate": 7.640415716772626e-07,
"logits/chosen": -0.3385930359363556,
"logits/rejected": -0.31589871644973755,
"logps/chosen": -0.8884540796279907,
"logps/rejected": -1.0432296991348267,
"loss": 0.9579,
"odds_ratio_loss": 0.6948095560073853,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.08884540945291519,
"rewards/margins": 0.015477565117180347,
"rewards/rejected": -0.10432296991348267,
"sft_loss": 0.8884540796279907,
"step": 1380
},
{
"epoch": 2.246918569407961,
"grad_norm": 0.4777003228664398,
"learning_rate": 7.338064536385722e-07,
"logits/chosen": -0.3243527412414551,
"logits/rejected": -0.3211807608604431,
"logps/chosen": -0.8481816053390503,
"logps/rejected": -1.0429704189300537,
"loss": 0.9136,
"odds_ratio_loss": 0.6539761424064636,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.08481816202402115,
"rewards/margins": 0.019478868693113327,
"rewards/rejected": -0.10429704189300537,
"sft_loss": 0.8481816053390503,
"step": 1390
},
{
"epoch": 2.263083451202263,
"grad_norm": 0.6625237464904785,
"learning_rate": 7.040785073292883e-07,
"logits/chosen": -0.39626187086105347,
"logits/rejected": -0.3658468425273895,
"logps/chosen": -0.9418588876724243,
"logps/rejected": -1.017301321029663,
"loss": 1.0184,
"odds_ratio_loss": 0.7650783658027649,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.09418588131666183,
"rewards/margins": 0.007544253021478653,
"rewards/rejected": -0.10173014551401138,
"sft_loss": 0.9418588876724243,
"step": 1400
},
{
"epoch": 2.279248332996565,
"grad_norm": 0.5683190226554871,
"learning_rate": 6.748662692849297e-07,
"logits/chosen": -0.2916708290576935,
"logits/rejected": -0.289817750453949,
"logps/chosen": -0.8634734153747559,
"logps/rejected": -1.1026208400726318,
"loss": 0.9275,
"odds_ratio_loss": 0.6400235295295715,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.08634734898805618,
"rewards/margins": 0.02391473576426506,
"rewards/rejected": -0.11026208102703094,
"sft_loss": 0.8634734153747559,
"step": 1410
},
{
"epoch": 2.295413214790867,
"grad_norm": 1.625442624092102,
"learning_rate": 6.46178127952686e-07,
"logits/chosen": -0.35586509108543396,
"logits/rejected": -0.35335296392440796,
"logps/chosen": -0.8400161862373352,
"logps/rejected": -0.9910812377929688,
"loss": 0.9025,
"odds_ratio_loss": 0.6247957348823547,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.08400160819292068,
"rewards/margins": 0.015106521546840668,
"rewards/rejected": -0.09910812973976135,
"sft_loss": 0.8400161862373352,
"step": 1420
},
{
"epoch": 2.3115780965851687,
"grad_norm": 0.46490368247032166,
"learning_rate": 6.180223212826289e-07,
"logits/chosen": -0.33770841360092163,
"logits/rejected": -0.37226027250289917,
"logps/chosen": -0.858726978302002,
"logps/rejected": -0.9763249158859253,
"loss": 0.9249,
"odds_ratio_loss": 0.6612924933433533,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.08587270230054855,
"rewards/margins": 0.011759791523218155,
"rewards/rejected": -0.097632497549057,
"sft_loss": 0.858726978302002,
"step": 1430
},
{
"epoch": 2.3277429783794705,
"grad_norm": 0.25405463576316833,
"learning_rate": 5.904069343621443e-07,
"logits/chosen": -0.3201651871204376,
"logits/rejected": -0.34286874532699585,
"logps/chosen": -0.9113739132881165,
"logps/rejected": -1.0487134456634521,
"loss": 0.9766,
"odds_ratio_loss": 0.652290403842926,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.09113740175962448,
"rewards/margins": 0.013733962550759315,
"rewards/rejected": -0.10487135499715805,
"sft_loss": 0.9113739132881165,
"step": 1440
},
{
"epoch": 2.3439078601737724,
"grad_norm": 0.5318045020103455,
"learning_rate": 5.633398970942544e-07,
"logits/chosen": -0.32512596249580383,
"logits/rejected": -0.2820747494697571,
"logps/chosen": -0.8218180537223816,
"logps/rejected": -0.9094691276550293,
"loss": 0.8927,
"odds_ratio_loss": 0.7083881497383118,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08218181133270264,
"rewards/margins": 0.008765103295445442,
"rewards/rejected": -0.09094691276550293,
"sft_loss": 0.8218180537223816,
"step": 1450
},
{
"epoch": 2.3600727419680743,
"grad_norm": 0.6843146681785583,
"learning_rate": 5.368289819205069e-07,
"logits/chosen": -0.39002543687820435,
"logits/rejected": -0.376250684261322,
"logps/chosen": -0.7933530211448669,
"logps/rejected": -0.9611787796020508,
"loss": 0.8585,
"odds_ratio_loss": 0.6519256234169006,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.0793353021144867,
"rewards/margins": 0.016782574355602264,
"rewards/rejected": -0.09611787647008896,
"sft_loss": 0.7933530211448669,
"step": 1460
},
{
"epoch": 2.376237623762376,
"grad_norm": 0.3784586787223816,
"learning_rate": 5.108818015890785e-07,
"logits/chosen": -0.3249315917491913,
"logits/rejected": -0.30507951974868774,
"logps/chosen": -0.8853880167007446,
"logps/rejected": -1.0341455936431885,
"loss": 0.9531,
"odds_ratio_loss": 0.6767874956130981,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.08853879570960999,
"rewards/margins": 0.014875771477818489,
"rewards/rejected": -0.10341457277536392,
"sft_loss": 0.8853880167007446,
"step": 1470
},
{
"epoch": 2.392402505556678,
"grad_norm": 0.5850736498832703,
"learning_rate": 4.855058069687291e-07,
"logits/chosen": -0.4515988230705261,
"logits/rejected": -0.4501380920410156,
"logps/chosen": -0.8440315127372742,
"logps/rejected": -0.978651225566864,
"loss": 0.9111,
"odds_ratio_loss": 0.6708062887191772,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.0844031572341919,
"rewards/margins": 0.013461967930197716,
"rewards/rejected": -0.09786512702703476,
"sft_loss": 0.8440315127372742,
"step": 1480
},
{
"epoch": 2.40856738735098,
"grad_norm": 0.4087739884853363,
"learning_rate": 4.607082849092523e-07,
"logits/chosen": -0.3892877697944641,
"logits/rejected": -0.4075300097465515,
"logps/chosen": -0.9417757987976074,
"logps/rejected": -1.0281052589416504,
"loss": 1.0107,
"odds_ratio_loss": 0.6892626881599426,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.09417758882045746,
"rewards/margins": 0.00863293744623661,
"rewards/rejected": -0.10281052440404892,
"sft_loss": 0.9417757987976074,
"step": 1490
},
{
"epoch": 2.4247322691452817,
"grad_norm": 1.142304539680481,
"learning_rate": 4.3649635614901405e-07,
"logits/chosen": -0.39748096466064453,
"logits/rejected": -0.2932053208351135,
"logps/chosen": -0.8502078056335449,
"logps/rejected": -0.883902907371521,
"loss": 0.9219,
"odds_ratio_loss": 0.7170731425285339,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.08502078056335449,
"rewards/margins": 0.003369513200595975,
"rewards/rejected": -0.0883902907371521,
"sft_loss": 0.8502078056335449,
"step": 1500
},
{
"epoch": 2.4247322691452817,
"eval_logits/chosen": -0.3775150775909424,
"eval_logits/rejected": -0.3581116795539856,
"eval_logps/chosen": -0.8786855936050415,
"eval_logps/rejected": -0.9948004484176636,
"eval_loss": 0.9497246742248535,
"eval_odds_ratio_loss": 0.7103896737098694,
"eval_rewards/accuracies": 0.5163636207580566,
"eval_rewards/chosen": -0.0878685712814331,
"eval_rewards/margins": 0.011611479334533215,
"eval_rewards/rejected": -0.0994800478219986,
"eval_runtime": 192.2752,
"eval_samples_per_second": 5.721,
"eval_sft_loss": 0.8786855936050415,
"eval_steps_per_second": 2.86,
"step": 1500
},
{
"epoch": 2.4408971509395836,
"grad_norm": 0.3841034770011902,
"learning_rate": 4.128769732701973e-07,
"logits/chosen": -0.36835092306137085,
"logits/rejected": -0.4074084758758545,
"logps/chosen": -0.8371820449829102,
"logps/rejected": -0.9595246315002441,
"loss": 0.9062,
"odds_ratio_loss": 0.6903966665267944,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.0837181955575943,
"rewards/margins": 0.012234264984726906,
"rewards/rejected": -0.09595246613025665,
"sft_loss": 0.8371820449829102,
"step": 1510
},
{
"epoch": 2.4570620327338855,
"grad_norm": 0.6487218737602234,
"learning_rate": 3.8985691870233046e-07,
"logits/chosen": -0.36084288358688354,
"logits/rejected": -0.35909101366996765,
"logps/chosen": -0.8767590522766113,
"logps/rejected": -0.9904271364212036,
"loss": 0.9487,
"odds_ratio_loss": 0.7190364599227905,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08767590671777725,
"rewards/margins": 0.011366801336407661,
"rewards/rejected": -0.09904270619153976,
"sft_loss": 0.8767590522766113,
"step": 1520
},
{
"epoch": 2.4732269145281873,
"grad_norm": 0.726983904838562,
"learning_rate": 3.6744280277467904e-07,
"logits/chosen": -0.3547779619693756,
"logits/rejected": -0.37871819734573364,
"logps/chosen": -0.8915858268737793,
"logps/rejected": -1.0086140632629395,
"loss": 0.9661,
"odds_ratio_loss": 0.7449706792831421,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.08915858715772629,
"rewards/margins": 0.01170281507074833,
"rewards/rejected": -0.10086140781641006,
"sft_loss": 0.8915858268737793,
"step": 1530
},
{
"epoch": 2.489391796322489,
"grad_norm": 0.6208191514015198,
"learning_rate": 3.456410618180503e-07,
"logits/chosen": -0.46183329820632935,
"logits/rejected": -0.3973988890647888,
"logps/chosen": -0.7950559258460999,
"logps/rejected": -1.0139881372451782,
"loss": 0.8596,
"odds_ratio_loss": 0.6458045244216919,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.07950559258460999,
"rewards/margins": 0.021893223747611046,
"rewards/rejected": -0.10139881074428558,
"sft_loss": 0.7950559258460999,
"step": 1540
},
{
"epoch": 2.5055566781167915,
"grad_norm": 0.40934354066848755,
"learning_rate": 3.244579563165753e-07,
"logits/chosen": -0.3711478114128113,
"logits/rejected": -0.3300473093986511,
"logps/chosen": -0.8490577936172485,
"logps/rejected": -1.0569615364074707,
"loss": 0.9137,
"odds_ratio_loss": 0.6463108062744141,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.08490578085184097,
"rewards/margins": 0.020790381357073784,
"rewards/rejected": -0.10569615662097931,
"sft_loss": 0.8490577936172485,
"step": 1550
},
{
"epoch": 2.521721559911093,
"grad_norm": 0.4264324903488159,
"learning_rate": 3.038995691099697e-07,
"logits/chosen": -0.35405951738357544,
"logits/rejected": -0.3723445534706116,
"logps/chosen": -0.8575676083564758,
"logps/rejected": -1.0358964204788208,
"loss": 0.9267,
"odds_ratio_loss": 0.6915205717086792,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.08575676381587982,
"rewards/margins": 0.017832884564995766,
"rewards/rejected": -0.10358965396881104,
"sft_loss": 0.8575676083564758,
"step": 1560
},
{
"epoch": 2.5378864417053952,
"grad_norm": 0.5124202370643616,
"learning_rate": 2.839718036468192e-07,
"logits/chosen": -0.39767321944236755,
"logits/rejected": -0.361719012260437,
"logps/chosen": -0.9866407513618469,
"logps/rejected": -1.0687347650527954,
"loss": 1.0574,
"odds_ratio_loss": 0.7079859972000122,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.09866407513618469,
"rewards/margins": 0.00820938404649496,
"rewards/rejected": -0.10687346756458282,
"sft_loss": 0.9866407513618469,
"step": 1570
},
{
"epoch": 2.5540513234996967,
"grad_norm": 0.6700158715248108,
"learning_rate": 2.646803822893723e-07,
"logits/chosen": -0.34473222494125366,
"logits/rejected": -0.339333713054657,
"logps/chosen": -0.9860366582870483,
"logps/rejected": -1.0728685855865479,
"loss": 1.0579,
"odds_ratio_loss": 0.7182521224021912,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.09860367327928543,
"rewards/margins": 0.008683168329298496,
"rewards/rejected": -0.10728684812784195,
"sft_loss": 0.9860366582870483,
"step": 1580
},
{
"epoch": 2.570216205293999,
"grad_norm": 0.4476275146007538,
"learning_rate": 2.460308446703341e-07,
"logits/chosen": -0.37150639295578003,
"logits/rejected": -0.3977029621601105,
"logps/chosen": -0.8994391560554504,
"logps/rejected": -0.9403126835823059,
"loss": 0.9704,
"odds_ratio_loss": 0.7100769877433777,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.08994391560554504,
"rewards/margins": 0.004087349865585566,
"rewards/rejected": -0.09403126686811447,
"sft_loss": 0.8994391560554504,
"step": 1590
},
{
"epoch": 2.5863810870883004,
"grad_norm": 0.8473093509674072,
"learning_rate": 2.2802854610213143e-07,
"logits/chosen": -0.38676199316978455,
"logits/rejected": -0.3973104655742645,
"logps/chosen": -0.8438700437545776,
"logps/rejected": -1.018701434135437,
"loss": 0.9107,
"odds_ratio_loss": 0.6678277850151062,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.08438700437545776,
"rewards/margins": 0.017483150586485863,
"rewards/rejected": -0.10187015682458878,
"sft_loss": 0.8438700437545776,
"step": 1600
},
{
"epoch": 2.6025459688826027,
"grad_norm": 1.2318559885025024,
"learning_rate": 2.106786560391072e-07,
"logits/chosen": -0.41062861680984497,
"logits/rejected": -0.3663537800312042,
"logps/chosen": -0.9180322885513306,
"logps/rejected": -0.9797943830490112,
"loss": 0.9881,
"odds_ratio_loss": 0.7011545300483704,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.09180323779582977,
"rewards/margins": 0.006176213268190622,
"rewards/rejected": -0.09797944128513336,
"sft_loss": 0.9180322885513306,
"step": 1610
},
{
"epoch": 2.6187108506769046,
"grad_norm": 1.8344284296035767,
"learning_rate": 1.9398615659308255e-07,
"logits/chosen": -0.3516565263271332,
"logits/rejected": -0.3090236485004425,
"logps/chosen": -0.8868433833122253,
"logps/rejected": -0.9610105752944946,
"loss": 0.9563,
"odds_ratio_loss": 0.6944981813430786,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08868434280157089,
"rewards/margins": 0.007416720036417246,
"rewards/rejected": -0.0961010605096817,
"sft_loss": 0.8868433833122253,
"step": 1620
},
{
"epoch": 2.6348757324712064,
"grad_norm": 3.686185359954834,
"learning_rate": 1.7795584110272184e-07,
"logits/chosen": -0.33260416984558105,
"logits/rejected": -0.32040587067604065,
"logps/chosen": -0.9077906608581543,
"logps/rejected": -1.0257583856582642,
"loss": 0.9756,
"odds_ratio_loss": 0.6781536340713501,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.09077905863523483,
"rewards/margins": 0.01179676502943039,
"rewards/rejected": -0.10257583856582642,
"sft_loss": 0.9077906608581543,
"step": 1630
},
{
"epoch": 2.6510406142655083,
"grad_norm": 0.7552462220191956,
"learning_rate": 1.6259231275709636e-07,
"logits/chosen": -0.32405030727386475,
"logits/rejected": -0.3262009024620056,
"logps/chosen": -0.8568581342697144,
"logps/rejected": -0.9373190999031067,
"loss": 0.9294,
"odds_ratio_loss": 0.7254046201705933,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.08568581938743591,
"rewards/margins": 0.008046089671552181,
"rewards/rejected": -0.09373190253973007,
"sft_loss": 0.8568581342697144,
"step": 1640
},
{
"epoch": 2.66720549605981,
"grad_norm": 0.45023104548454285,
"learning_rate": 1.478999832738548e-07,
"logits/chosen": -0.34250158071517944,
"logits/rejected": -0.34709858894348145,
"logps/chosen": -0.8354190587997437,
"logps/rejected": -0.9979323148727417,
"loss": 0.9021,
"odds_ratio_loss": 0.6672018766403198,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.08354191482067108,
"rewards/margins": 0.01625131070613861,
"rewards/rejected": -0.09979323297739029,
"sft_loss": 0.8354190587997437,
"step": 1650
},
{
"epoch": 2.683370377854112,
"grad_norm": 0.6760185956954956,
"learning_rate": 1.338830716323769e-07,
"logits/chosen": -0.34901902079582214,
"logits/rejected": -0.352342426776886,
"logps/chosen": -0.8232784271240234,
"logps/rejected": -0.9058715105056763,
"loss": 0.8916,
"odds_ratio_loss": 0.6835728883743286,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.08232785016298294,
"rewards/margins": 0.008259310387074947,
"rewards/rejected": -0.09058715403079987,
"sft_loss": 0.8232784271240234,
"step": 1660
},
{
"epoch": 2.699535259648414,
"grad_norm": 0.9901576638221741,
"learning_rate": 1.205456028622723e-07,
"logits/chosen": -0.3495160639286041,
"logits/rejected": -0.35691842436790466,
"logps/chosen": -0.8500292897224426,
"logps/rejected": -1.0147500038146973,
"loss": 0.9171,
"odds_ratio_loss": 0.6710700988769531,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.08500292897224426,
"rewards/margins": 0.016472063958644867,
"rewards/rejected": -0.10147500038146973,
"sft_loss": 0.8500292897224426,
"step": 1670
},
{
"epoch": 2.7157001414427158,
"grad_norm": 0.29376673698425293,
"learning_rate": 1.0789140688756805e-07,
"logits/chosen": -0.2777409255504608,
"logits/rejected": -0.30515843629837036,
"logps/chosen": -0.8388081789016724,
"logps/rejected": -1.004902720451355,
"loss": 0.9016,
"odds_ratio_loss": 0.6277891397476196,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.08388081192970276,
"rewards/margins": 0.016609463840723038,
"rewards/rejected": -0.10049028694629669,
"sft_loss": 0.8388081789016724,
"step": 1680
},
{
"epoch": 2.7318650232370176,
"grad_norm": 1.1649651527404785,
"learning_rate": 9.592411742693098e-08,
"logits/chosen": -0.3376592993736267,
"logits/rejected": -0.33899828791618347,
"logps/chosen": -0.8941831588745117,
"logps/rejected": -0.9593558311462402,
"loss": 0.9688,
"odds_ratio_loss": 0.7464134693145752,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.08941832929849625,
"rewards/margins": 0.00651725847274065,
"rewards/rejected": -0.09593559056520462,
"sft_loss": 0.8941831588745117,
"step": 1690
},
{
"epoch": 2.7480299050313195,
"grad_norm": 0.365510493516922,
"learning_rate": 8.464717095022168e-08,
"logits/chosen": -0.26350411772727966,
"logits/rejected": -0.3258097767829895,
"logps/chosen": -0.8289276957511902,
"logps/rejected": -0.9933468103408813,
"loss": 0.894,
"odds_ratio_loss": 0.6506984829902649,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.0828927755355835,
"rewards/margins": 0.01644190214574337,
"rewards/rejected": -0.09933467954397202,
"sft_loss": 0.8289276957511902,
"step": 1700
},
{
"epoch": 2.7641947868256214,
"grad_norm": 0.860230565071106,
"learning_rate": 7.406380569169841e-08,
"logits/chosen": -0.35509008169174194,
"logits/rejected": -0.3218967318534851,
"logps/chosen": -0.9126371145248413,
"logps/rejected": -0.8999163508415222,
"loss": 0.9886,
"odds_ratio_loss": 0.759522020816803,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.09126370400190353,
"rewards/margins": -0.0012720691738650203,
"rewards/rejected": -0.08999162912368774,
"sft_loss": 0.9126371145248413,
"step": 1710
},
{
"epoch": 2.7803596686199232,
"grad_norm": 2.069009780883789,
"learning_rate": 6.417706072013808e-08,
"logits/chosen": -0.3513588011264801,
"logits/rejected": -0.31902140378952026,
"logps/chosen": -0.8999738693237305,
"logps/rejected": -0.9839135408401489,
"loss": 0.9715,
"odds_ratio_loss": 0.7152166366577148,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08999738842248917,
"rewards/margins": 0.008393971249461174,
"rewards/rejected": -0.09839136153459549,
"sft_loss": 0.8999738693237305,
"step": 1720
},
{
"epoch": 2.796524550414225,
"grad_norm": 0.59537672996521,
"learning_rate": 5.498977506615294e-08,
"logits/chosen": -0.33539581298828125,
"logits/rejected": -0.36086633801460266,
"logps/chosen": -0.8895516395568848,
"logps/rejected": -0.9674522280693054,
"loss": 0.9602,
"odds_ratio_loss": 0.706065833568573,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.08895515650510788,
"rewards/margins": 0.007790066301822662,
"rewards/rejected": -0.09674523025751114,
"sft_loss": 0.8895516395568848,
"step": 1730
},
{
"epoch": 2.812689432208527,
"grad_norm": 0.4070757031440735,
"learning_rate": 4.6504586906947756e-08,
"logits/chosen": -0.3671857714653015,
"logits/rejected": -0.36166203022003174,
"logps/chosen": -0.9486915469169617,
"logps/rejected": -0.9999829530715942,
"loss": 1.0182,
"odds_ratio_loss": 0.6954110860824585,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.09486915171146393,
"rewards/margins": 0.005129144061356783,
"rewards/rejected": -0.09999830275774002,
"sft_loss": 0.9486915469169617,
"step": 1740
},
{
"epoch": 2.828854314002829,
"grad_norm": 1.650687336921692,
"learning_rate": 3.8723932808754914e-08,
"logits/chosen": -0.2851547300815582,
"logits/rejected": -0.2857135236263275,
"logps/chosen": -0.9708272814750671,
"logps/rejected": -0.9912136197090149,
"loss": 1.0459,
"odds_ratio_loss": 0.7506999969482422,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.09708271920681,
"rewards/margins": 0.0020386301912367344,
"rewards/rejected": -0.0991213470697403,
"sft_loss": 0.9708272814750671,
"step": 1750
},
{
"epoch": 2.8450191957971307,
"grad_norm": 0.9035086035728455,
"learning_rate": 3.1650047027158014e-08,
"logits/chosen": -0.3378879427909851,
"logits/rejected": -0.31768563389778137,
"logps/chosen": -0.863334059715271,
"logps/rejected": -0.9955730438232422,
"loss": 0.9285,
"odds_ratio_loss": 0.6513949632644653,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.08633340895175934,
"rewards/margins": 0.013223896734416485,
"rewards/rejected": -0.0995573028922081,
"sft_loss": 0.863334059715271,
"step": 1760
},
{
"epoch": 2.8611840775914326,
"grad_norm": 0.3864952623844147,
"learning_rate": 2.5284960865517848e-08,
"logits/chosen": -0.39154380559921265,
"logits/rejected": -0.34484562277793884,
"logps/chosen": -0.82793790102005,
"logps/rejected": -1.0070700645446777,
"loss": 0.8928,
"odds_ratio_loss": 0.6486603021621704,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.08279379457235336,
"rewards/margins": 0.0179132129997015,
"rewards/rejected": -0.10070700943470001,
"sft_loss": 0.82793790102005,
"step": 1770
},
{
"epoch": 2.8773489593857344,
"grad_norm": 0.4017253518104553,
"learning_rate": 1.9630502091670388e-08,
"logits/chosen": -0.3473368287086487,
"logits/rejected": -0.37853848934173584,
"logps/chosen": -0.8299247622489929,
"logps/rejected": -0.996843695640564,
"loss": 0.8926,
"odds_ratio_loss": 0.6264339685440063,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.08299248665571213,
"rewards/margins": 0.016691887751221657,
"rewards/rejected": -0.09968437254428864,
"sft_loss": 0.8299247622489929,
"step": 1780
},
{
"epoch": 2.8935138411800363,
"grad_norm": 0.7657872438430786,
"learning_rate": 1.4688294413074677e-08,
"logits/chosen": -0.3813559114933014,
"logits/rejected": -0.34783899784088135,
"logps/chosen": -0.802249550819397,
"logps/rejected": -0.9486366510391235,
"loss": 0.8723,
"odds_ratio_loss": 0.7008516788482666,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.08022496104240417,
"rewards/margins": 0.014638709835708141,
"rewards/rejected": -0.09486366808414459,
"sft_loss": 0.802249550819397,
"step": 1790
},
{
"epoch": 2.909678722974338,
"grad_norm": 0.2962876558303833,
"learning_rate": 1.0459757010556626e-08,
"logits/chosen": -0.4134625494480133,
"logits/rejected": -0.394450843334198,
"logps/chosen": -0.8447575569152832,
"logps/rejected": -0.919145405292511,
"loss": 0.9156,
"odds_ratio_loss": 0.7088185548782349,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.08447576314210892,
"rewards/margins": 0.007438770029693842,
"rewards/rejected": -0.09191453456878662,
"sft_loss": 0.8447575569152832,
"step": 1800
},
{
"epoch": 2.92584360476864,
"grad_norm": 0.4512230455875397,
"learning_rate": 6.94610413078306e-09,
"logits/chosen": -0.4446278512477875,
"logits/rejected": -0.37901362776756287,
"logps/chosen": -0.8928766250610352,
"logps/rejected": -1.078958511352539,
"loss": 0.9617,
"odds_ratio_loss": 0.6879509091377258,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.0892876610159874,
"rewards/margins": 0.01860819011926651,
"rewards/rejected": -0.1078958511352539,
"sft_loss": 0.8928766250610352,
"step": 1810
},
{
"epoch": 2.942008486562942,
"grad_norm": 0.2540852427482605,
"learning_rate": 4.14834473758563e-09,
"logits/chosen": -0.4007115364074707,
"logits/rejected": -0.3911517858505249,
"logps/chosen": -0.8001864552497864,
"logps/rejected": -1.0187556743621826,
"loss": 0.8634,
"odds_ratio_loss": 0.6319615244865417,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.08001864701509476,
"rewards/margins": 0.021856937557458878,
"rewards/rejected": -0.10187558084726334,
"sft_loss": 0.8001864552497864,
"step": 1820
},
{
"epoch": 2.9581733683572438,
"grad_norm": 0.4121166467666626,
"learning_rate": 2.067282222230349e-09,
"logits/chosen": -0.3413907587528229,
"logits/rejected": -0.278145968914032,
"logps/chosen": -0.8189884424209595,
"logps/rejected": -1.0053989887237549,
"loss": 0.881,
"odds_ratio_loss": 0.620233416557312,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.08189885318279266,
"rewards/margins": 0.018641049042344093,
"rewards/rejected": -0.10053990036249161,
"sft_loss": 0.8189884424209595,
"step": 1830
},
{
"epoch": 2.9743382501515456,
"grad_norm": 3.4636123180389404,
"learning_rate": 7.035141727212979e-10,
"logits/chosen": -0.3847911059856415,
"logits/rejected": -0.34176406264305115,
"logps/chosen": -0.8342105746269226,
"logps/rejected": -0.9381749033927917,
"loss": 0.9018,
"odds_ratio_loss": 0.6754266023635864,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.08342105895280838,
"rewards/margins": 0.010396432131528854,
"rewards/rejected": -0.09381748735904694,
"sft_loss": 0.8342105746269226,
"step": 1840
},
{
"epoch": 2.9905031319458475,
"grad_norm": 1.2374101877212524,
"learning_rate": 5.743220219761592e-11,
"logits/chosen": -0.33420827984809875,
"logits/rejected": -0.3142699599266052,
"logps/chosen": -1.0187790393829346,
"logps/rejected": -1.029541015625,
"loss": 1.097,
"odds_ratio_loss": 0.782578706741333,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.10187790542840958,
"rewards/margins": 0.0010761909652501345,
"rewards/rejected": -0.10295410454273224,
"sft_loss": 1.0187790393829346,
"step": 1850
},
{
"epoch": 2.9969690846635686,
"step": 1854,
"total_flos": 1.9948570754930442e+18,
"train_loss": 0.9750770799807618,
"train_runtime": 17949.5667,
"train_samples_per_second": 1.654,
"train_steps_per_second": 0.103
}
],
"logging_steps": 10,
"max_steps": 1854,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1.9948570754930442e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}