|
{ |
|
"best_metric": 0.9497246742248535, |
|
"best_model_checkpoint": "saves/Vicuna-7B-v1.5/lora/orpo-salt/checkpoint-1500", |
|
"epoch": 2.9969690846635686, |
|
"eval_steps": 500, |
|
"global_step": 1854, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01616488179430188, |
|
"grad_norm": 0.3899887204170227, |
|
"learning_rate": 4.999648198770648e-06, |
|
"logits/chosen": -0.8260404467582703, |
|
"logits/rejected": -0.779380202293396, |
|
"logps/chosen": -1.0734994411468506, |
|
"logps/rejected": -1.2254035472869873, |
|
"loss": 1.146, |
|
"odds_ratio_loss": 0.7249619364738464, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1073499470949173, |
|
"rewards/margins": 0.01519041694700718, |
|
"rewards/rejected": -0.12254035472869873, |
|
"sft_loss": 1.0734994411468506, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03232976358860376, |
|
"grad_norm": 0.4923989176750183, |
|
"learning_rate": 4.998578646361359e-06, |
|
"logits/chosen": -0.7854002714157104, |
|
"logits/rejected": -0.781389594078064, |
|
"logps/chosen": -1.0866433382034302, |
|
"logps/rejected": -1.2551138401031494, |
|
"loss": 1.1535, |
|
"odds_ratio_loss": 0.668422520160675, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10866433382034302, |
|
"rewards/margins": 0.016847047954797745, |
|
"rewards/rejected": -0.12551137804985046, |
|
"sft_loss": 1.0866433382034302, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04849464538290564, |
|
"grad_norm": 0.7084988951683044, |
|
"learning_rate": 4.996791614004449e-06, |
|
"logits/chosen": -0.7559419274330139, |
|
"logits/rejected": -0.7485054731369019, |
|
"logps/chosen": -1.0929394960403442, |
|
"logps/rejected": -1.1501963138580322, |
|
"loss": 1.1699, |
|
"odds_ratio_loss": 0.7694913148880005, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.10929396003484726, |
|
"rewards/margins": 0.00572569016367197, |
|
"rewards/rejected": -0.11501964181661606, |
|
"sft_loss": 1.0929394960403442, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06465952717720752, |
|
"grad_norm": 0.8286219239234924, |
|
"learning_rate": 4.994287614855618e-06, |
|
"logits/chosen": -0.8193706274032593, |
|
"logits/rejected": -0.7897969484329224, |
|
"logps/chosen": -1.1362740993499756, |
|
"logps/rejected": -1.1394835710525513, |
|
"loss": 1.2171, |
|
"odds_ratio_loss": 0.808376133441925, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.11362739652395248, |
|
"rewards/margins": 0.0003209514543414116, |
|
"rewards/rejected": -0.11394836008548737, |
|
"sft_loss": 1.1362740993499756, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0808244089715094, |
|
"grad_norm": 0.537628173828125, |
|
"learning_rate": 4.991067367951343e-06, |
|
"logits/chosen": -0.7530331015586853, |
|
"logits/rejected": -0.7703112363815308, |
|
"logps/chosen": -1.0968067646026611, |
|
"logps/rejected": -1.1828521490097046, |
|
"loss": 1.1729, |
|
"odds_ratio_loss": 0.7610759735107422, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10968067497015, |
|
"rewards/margins": 0.00860452838242054, |
|
"rewards/rejected": -0.11828521639108658, |
|
"sft_loss": 1.0968067646026611, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09698929076581128, |
|
"grad_norm": 0.2992643415927887, |
|
"learning_rate": 4.987131798002389e-06, |
|
"logits/chosen": -0.7554941773414612, |
|
"logits/rejected": -0.7805821299552917, |
|
"logps/chosen": -1.120224118232727, |
|
"logps/rejected": -1.1958564519882202, |
|
"loss": 1.2007, |
|
"odds_ratio_loss": 0.804762065410614, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.11202241480350494, |
|
"rewards/margins": 0.007563246879726648, |
|
"rewards/rejected": -0.11958565562963486, |
|
"sft_loss": 1.120224118232727, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11315417256011315, |
|
"grad_norm": 0.5207487940788269, |
|
"learning_rate": 4.982482035128285e-06, |
|
"logits/chosen": -0.7931987643241882, |
|
"logits/rejected": -0.7725004553794861, |
|
"logps/chosen": -1.158760666847229, |
|
"logps/rejected": -1.3085857629776, |
|
"loss": 1.2342, |
|
"odds_ratio_loss": 0.7545939683914185, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11587607860565186, |
|
"rewards/margins": 0.01498250663280487, |
|
"rewards/rejected": -0.13085858523845673, |
|
"sft_loss": 1.158760666847229, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12931905435441504, |
|
"grad_norm": 0.8179022669792175, |
|
"learning_rate": 4.9771194145328e-06, |
|
"logits/chosen": -0.7553219199180603, |
|
"logits/rejected": -0.7355794906616211, |
|
"logps/chosen": -0.9810718297958374, |
|
"logps/rejected": -1.1142699718475342, |
|
"loss": 1.0496, |
|
"odds_ratio_loss": 0.6851751208305359, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09810719639062881, |
|
"rewards/margins": 0.013319805264472961, |
|
"rewards/rejected": -0.11142698675394058, |
|
"sft_loss": 0.9810718297958374, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1454839361487169, |
|
"grad_norm": 0.5893221497535706, |
|
"learning_rate": 4.971045476120532e-06, |
|
"logits/chosen": -0.7767540216445923, |
|
"logits/rejected": -0.7691196203231812, |
|
"logps/chosen": -1.0343536138534546, |
|
"logps/rejected": -1.1126210689544678, |
|
"loss": 1.1086, |
|
"odds_ratio_loss": 0.7424803972244263, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.10343535989522934, |
|
"rewards/margins": 0.007826738059520721, |
|
"rewards/rejected": -0.11126209795475006, |
|
"sft_loss": 1.0343536138534546, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1616488179430188, |
|
"grad_norm": 0.3746645748615265, |
|
"learning_rate": 4.964261964054713e-06, |
|
"logits/chosen": -0.749561607837677, |
|
"logits/rejected": -0.7426966428756714, |
|
"logps/chosen": -1.0808948278427124, |
|
"logps/rejected": -1.1608020067214966, |
|
"loss": 1.1637, |
|
"odds_ratio_loss": 0.8280612826347351, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10808948427438736, |
|
"rewards/margins": 0.007990716025233269, |
|
"rewards/rejected": -0.11608020961284637, |
|
"sft_loss": 1.0808948278427124, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17781369973732067, |
|
"grad_norm": 0.5266828536987305, |
|
"learning_rate": 4.956770826256372e-06, |
|
"logits/chosen": -0.7276872396469116, |
|
"logits/rejected": -0.7239276766777039, |
|
"logps/chosen": -1.0891507863998413, |
|
"logps/rejected": -1.188951015472412, |
|
"loss": 1.1606, |
|
"odds_ratio_loss": 0.7148129940032959, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.10891509056091309, |
|
"rewards/margins": 0.009980013594031334, |
|
"rewards/rejected": -0.11889511346817017, |
|
"sft_loss": 1.0891507863998413, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19397858153162256, |
|
"grad_norm": 0.5117731690406799, |
|
"learning_rate": 4.94857421384497e-06, |
|
"logits/chosen": -0.7153638601303101, |
|
"logits/rejected": -0.7017214894294739, |
|
"logps/chosen": -1.0659247636795044, |
|
"logps/rejected": -1.1995283365249634, |
|
"loss": 1.1411, |
|
"odds_ratio_loss": 0.7518999576568604, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10659247636795044, |
|
"rewards/margins": 0.013360358774662018, |
|
"rewards/rejected": -0.11995282024145126, |
|
"sft_loss": 1.0659247636795044, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21014346332592443, |
|
"grad_norm": 0.3964090049266815, |
|
"learning_rate": 4.939674480520701e-06, |
|
"logits/chosen": -0.7281032800674438, |
|
"logits/rejected": -0.6757130026817322, |
|
"logps/chosen": -0.9924377202987671, |
|
"logps/rejected": -1.0807675123214722, |
|
"loss": 1.0644, |
|
"odds_ratio_loss": 0.7199574708938599, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09924378246068954, |
|
"rewards/margins": 0.008832980878651142, |
|
"rewards/rejected": -0.10807675123214722, |
|
"sft_loss": 0.9924377202987671, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2263083451202263, |
|
"grad_norm": 0.31593117117881775, |
|
"learning_rate": 4.930074181888613e-06, |
|
"logits/chosen": -0.6932573914527893, |
|
"logits/rejected": -0.6765223741531372, |
|
"logps/chosen": -1.011648416519165, |
|
"logps/rejected": -1.1101162433624268, |
|
"loss": 1.0811, |
|
"odds_ratio_loss": 0.6949580907821655, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.10116484016180038, |
|
"rewards/margins": 0.009846789762377739, |
|
"rewards/rejected": -0.11101162433624268, |
|
"sft_loss": 1.011648416519165, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2424732269145282, |
|
"grad_norm": 0.7396884560585022, |
|
"learning_rate": 4.91977607472475e-06, |
|
"logits/chosen": -0.6414996981620789, |
|
"logits/rejected": -0.6007689237594604, |
|
"logps/chosen": -1.0180175304412842, |
|
"logps/rejected": -1.0574676990509033, |
|
"loss": 1.0929, |
|
"odds_ratio_loss": 0.748645544052124, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.10180176794528961, |
|
"rewards/margins": 0.00394500233232975, |
|
"rewards/rejected": -0.10574676841497421, |
|
"sft_loss": 1.0180175304412842, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2586381087088301, |
|
"grad_norm": 0.5049052834510803, |
|
"learning_rate": 4.908783116184534e-06, |
|
"logits/chosen": -0.6661972403526306, |
|
"logits/rejected": -0.626873791217804, |
|
"logps/chosen": -0.953465461730957, |
|
"logps/rejected": -1.0835082530975342, |
|
"loss": 1.02, |
|
"odds_ratio_loss": 0.6655644178390503, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.09534655511379242, |
|
"rewards/margins": 0.013004262931644917, |
|
"rewards/rejected": -0.10835081338882446, |
|
"sft_loss": 0.953465461730957, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.27480299050313195, |
|
"grad_norm": 0.4969651699066162, |
|
"learning_rate": 4.897098462953598e-06, |
|
"logits/chosen": -0.5929690599441528, |
|
"logits/rejected": -0.6147447824478149, |
|
"logps/chosen": -0.9747630953788757, |
|
"logps/rejected": -1.1718312501907349, |
|
"loss": 1.0464, |
|
"odds_ratio_loss": 0.7164822220802307, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09747631102800369, |
|
"rewards/margins": 0.019706813618540764, |
|
"rewards/rejected": -0.11718311160802841, |
|
"sft_loss": 0.9747630953788757, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2909678722974338, |
|
"grad_norm": 0.37429389357566833, |
|
"learning_rate": 4.884725470341331e-06, |
|
"logits/chosen": -0.5573834180831909, |
|
"logits/rejected": -0.544479250907898, |
|
"logps/chosen": -0.8867887258529663, |
|
"logps/rejected": -1.1076356172561646, |
|
"loss": 0.9499, |
|
"odds_ratio_loss": 0.6307954788208008, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08867888152599335, |
|
"rewards/margins": 0.022084690630435944, |
|
"rewards/rejected": -0.1107635647058487, |
|
"sft_loss": 0.8867887258529663, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3071327540917357, |
|
"grad_norm": 1.2109434604644775, |
|
"learning_rate": 4.871667691317377e-06, |
|
"logits/chosen": -0.6222495436668396, |
|
"logits/rejected": -0.6174622774124146, |
|
"logps/chosen": -1.1702499389648438, |
|
"logps/rejected": -1.0528119802474976, |
|
"loss": 1.2649, |
|
"odds_ratio_loss": 0.9465614557266235, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.1170249953866005, |
|
"rewards/margins": -0.011743778362870216, |
|
"rewards/rejected": -0.10528121143579483, |
|
"sft_loss": 1.1702499389648438, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3232976358860376, |
|
"grad_norm": 1.5371562242507935, |
|
"learning_rate": 4.857928875491392e-06, |
|
"logits/chosen": -0.5464112162590027, |
|
"logits/rejected": -0.5513696670532227, |
|
"logps/chosen": -0.8908155560493469, |
|
"logps/rejected": -1.0076180696487427, |
|
"loss": 0.9612, |
|
"odds_ratio_loss": 0.7040323615074158, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08908155560493469, |
|
"rewards/margins": 0.011680259369313717, |
|
"rewards/rejected": -0.10076181590557098, |
|
"sft_loss": 0.8908155560493469, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33946251768033947, |
|
"grad_norm": 0.6159927845001221, |
|
"learning_rate": 4.843512968036314e-06, |
|
"logits/chosen": -0.6329461932182312, |
|
"logits/rejected": -0.592659592628479, |
|
"logps/chosen": -0.975503146648407, |
|
"logps/rejected": -0.9970613718032837, |
|
"loss": 1.0514, |
|
"odds_ratio_loss": 0.7591590881347656, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09755031019449234, |
|
"rewards/margins": 0.00215582805685699, |
|
"rewards/rejected": -0.09970613569021225, |
|
"sft_loss": 0.975503146648407, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35562739947464134, |
|
"grad_norm": 0.3111410439014435, |
|
"learning_rate": 4.828424108555486e-06, |
|
"logits/chosen": -0.5221891403198242, |
|
"logits/rejected": -0.5304391980171204, |
|
"logps/chosen": -1.1862733364105225, |
|
"logps/rejected": -1.2753493785858154, |
|
"loss": 1.2641, |
|
"odds_ratio_loss": 0.7783994674682617, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11862732470035553, |
|
"rewards/margins": 0.008907611481845379, |
|
"rewards/rejected": -0.12753494083881378, |
|
"sft_loss": 1.1862733364105225, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3717922812689432, |
|
"grad_norm": 0.301698237657547, |
|
"learning_rate": 4.812666629893957e-06, |
|
"logits/chosen": -0.4992770254611969, |
|
"logits/rejected": -0.4967115521430969, |
|
"logps/chosen": -0.9971933364868164, |
|
"logps/rejected": -1.0213407278060913, |
|
"loss": 1.0744, |
|
"odds_ratio_loss": 0.7721298933029175, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.09971933811903, |
|
"rewards/margins": 0.0024147380609065294, |
|
"rewards/rejected": -0.1021340861916542, |
|
"sft_loss": 0.9971933364868164, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3879571630632451, |
|
"grad_norm": 0.2967057526111603, |
|
"learning_rate": 4.796245056894273e-06, |
|
"logits/chosen": -0.5198571085929871, |
|
"logits/rejected": -0.4987764358520508, |
|
"logps/chosen": -0.9578666687011719, |
|
"logps/rejected": -1.0644018650054932, |
|
"loss": 1.0315, |
|
"odds_ratio_loss": 0.7367077469825745, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0957866758108139, |
|
"rewards/margins": 0.01065351627767086, |
|
"rewards/rejected": -0.10644018650054932, |
|
"sft_loss": 0.9578666687011719, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.404122044857547, |
|
"grad_norm": 0.336041659116745, |
|
"learning_rate": 4.779164105097148e-06, |
|
"logits/chosen": -0.4748106002807617, |
|
"logits/rejected": -0.44636374711990356, |
|
"logps/chosen": -0.9247462153434753, |
|
"logps/rejected": -1.1018692255020142, |
|
"loss": 0.9923, |
|
"odds_ratio_loss": 0.6758453845977783, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09247462451457977, |
|
"rewards/margins": 0.017712296918034554, |
|
"rewards/rejected": -0.11018691956996918, |
|
"sft_loss": 0.9247462153434753, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.42028692665184886, |
|
"grad_norm": 0.5222122669219971, |
|
"learning_rate": 4.761428679387373e-06, |
|
"logits/chosen": -0.46434497833251953, |
|
"logits/rejected": -0.4350043833255768, |
|
"logps/chosen": -0.8905488848686218, |
|
"logps/rejected": -1.0182609558105469, |
|
"loss": 0.9591, |
|
"odds_ratio_loss": 0.6853379011154175, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0890548899769783, |
|
"rewards/margins": 0.01277120690792799, |
|
"rewards/rejected": -0.10182609409093857, |
|
"sft_loss": 0.8905488848686218, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4364518084461507, |
|
"grad_norm": 0.5936411023139954, |
|
"learning_rate": 4.7430438725853515e-06, |
|
"logits/chosen": -0.48627519607543945, |
|
"logits/rejected": -0.4379982352256775, |
|
"logps/chosen": -0.9183929562568665, |
|
"logps/rejected": -1.1679961681365967, |
|
"loss": 0.984, |
|
"odds_ratio_loss": 0.6556900143623352, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.09183929860591888, |
|
"rewards/margins": 0.024960322305560112, |
|
"rewards/rejected": -0.11679961532354355, |
|
"sft_loss": 0.9183929562568665, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4526166902404526, |
|
"grad_norm": 0.46239179372787476, |
|
"learning_rate": 4.724014963984669e-06, |
|
"logits/chosen": -0.4012899398803711, |
|
"logits/rejected": -0.411139577627182, |
|
"logps/chosen": -1.008721947669983, |
|
"logps/rejected": -1.2014849185943604, |
|
"loss": 1.0765, |
|
"odds_ratio_loss": 0.6780184507369995, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.10087219625711441, |
|
"rewards/margins": 0.01927630603313446, |
|
"rewards/rejected": -0.12014850229024887, |
|
"sft_loss": 1.008721947669983, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4687815720347545, |
|
"grad_norm": 0.5760877132415771, |
|
"learning_rate": 4.704347417836116e-06, |
|
"logits/chosen": -0.4533885419368744, |
|
"logits/rejected": -0.46080097556114197, |
|
"logps/chosen": -0.9372620582580566, |
|
"logps/rejected": -1.1106752157211304, |
|
"loss": 1.0089, |
|
"odds_ratio_loss": 0.716440737247467, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09372620284557343, |
|
"rewards/margins": 0.01734132692217827, |
|
"rewards/rejected": -0.1110675185918808, |
|
"sft_loss": 0.9372620582580566, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4849464538290564, |
|
"grad_norm": 0.44260743260383606, |
|
"learning_rate": 4.684046881778603e-06, |
|
"logits/chosen": -0.5344091653823853, |
|
"logits/rejected": -0.49474531412124634, |
|
"logps/chosen": -0.9150590896606445, |
|
"logps/rejected": -1.0017120838165283, |
|
"loss": 0.9833, |
|
"odds_ratio_loss": 0.6827279329299927, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09150592237710953, |
|
"rewards/margins": 0.008665294386446476, |
|
"rewards/rejected": -0.10017120838165283, |
|
"sft_loss": 0.9150590896606445, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5011113356233583, |
|
"grad_norm": 0.3225099742412567, |
|
"learning_rate": 4.663119185217409e-06, |
|
"logits/chosen": -0.43460625410079956, |
|
"logits/rejected": -0.4127863049507141, |
|
"logps/chosen": -0.8891846537590027, |
|
"logps/rejected": -1.0905497074127197, |
|
"loss": 0.954, |
|
"odds_ratio_loss": 0.6476849913597107, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08891846239566803, |
|
"rewards/margins": 0.020136509090662003, |
|
"rewards/rejected": -0.10905496776103973, |
|
"sft_loss": 0.8891846537590027, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5172762174176602, |
|
"grad_norm": 0.3512892723083496, |
|
"learning_rate": 4.641570337650232e-06, |
|
"logits/chosen": -0.43388432264328003, |
|
"logits/rejected": -0.39495667815208435, |
|
"logps/chosen": -0.8790934681892395, |
|
"logps/rejected": -0.9963566064834595, |
|
"loss": 0.9498, |
|
"odds_ratio_loss": 0.7069565057754517, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08790934085845947, |
|
"rewards/margins": 0.011726310476660728, |
|
"rewards/rejected": -0.09963564574718475, |
|
"sft_loss": 0.8790934681892395, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.533441099211962, |
|
"grad_norm": 0.3520517349243164, |
|
"learning_rate": 4.61940652694154e-06, |
|
"logits/chosen": -0.45831650495529175, |
|
"logits/rejected": -0.4600452780723572, |
|
"logps/chosen": -0.9612126350402832, |
|
"logps/rejected": -1.0601940155029297, |
|
"loss": 1.0373, |
|
"odds_ratio_loss": 0.7606214880943298, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09612125903367996, |
|
"rewards/margins": 0.00989813357591629, |
|
"rewards/rejected": -0.10601940006017685, |
|
"sft_loss": 0.9612126350402832, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5496059810062639, |
|
"grad_norm": 0.42445889115333557, |
|
"learning_rate": 4.596634117545689e-06, |
|
"logits/chosen": -0.3920242190361023, |
|
"logits/rejected": -0.41387075185775757, |
|
"logps/chosen": -0.9238036274909973, |
|
"logps/rejected": -1.0761339664459229, |
|
"loss": 0.9917, |
|
"odds_ratio_loss": 0.6789978742599487, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09238035976886749, |
|
"rewards/margins": 0.015233027748763561, |
|
"rewards/rejected": -0.10761336982250214, |
|
"sft_loss": 0.9238036274909973, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5657708628005658, |
|
"grad_norm": 0.3377890884876251, |
|
"learning_rate": 4.573259648679335e-06, |
|
"logits/chosen": -0.39150765538215637, |
|
"logits/rejected": -0.4451742172241211, |
|
"logps/chosen": -0.9269700050354004, |
|
"logps/rejected": -1.077823281288147, |
|
"loss": 0.9987, |
|
"odds_ratio_loss": 0.7173791527748108, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09269699454307556, |
|
"rewards/margins": 0.01508533675223589, |
|
"rewards/rejected": -0.10778234153985977, |
|
"sft_loss": 0.9269700050354004, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5819357445948676, |
|
"grad_norm": 0.9352906942367554, |
|
"learning_rate": 4.549289832443663e-06, |
|
"logits/chosen": -0.39780086278915405, |
|
"logits/rejected": -0.3602847754955292, |
|
"logps/chosen": -0.9020577669143677, |
|
"logps/rejected": -1.0630056858062744, |
|
"loss": 0.9737, |
|
"odds_ratio_loss": 0.7168340682983398, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09020576626062393, |
|
"rewards/margins": 0.01609480008482933, |
|
"rewards/rejected": -0.10630057752132416, |
|
"sft_loss": 0.9020577669143677, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5981006263891695, |
|
"grad_norm": 0.3642963469028473, |
|
"learning_rate": 4.524731551896978e-06, |
|
"logits/chosen": -0.4040652811527252, |
|
"logits/rejected": -0.39201897382736206, |
|
"logps/chosen": -0.822562038898468, |
|
"logps/rejected": -0.9484196901321411, |
|
"loss": 0.8918, |
|
"odds_ratio_loss": 0.6919523477554321, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.08225620537996292, |
|
"rewards/margins": 0.012585763819515705, |
|
"rewards/rejected": -0.09484197199344635, |
|
"sft_loss": 0.822562038898468, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6142655081834714, |
|
"grad_norm": 0.9358541965484619, |
|
"learning_rate": 4.4995918590781925e-06, |
|
"logits/chosen": -0.41558751463890076, |
|
"logits/rejected": -0.39345669746398926, |
|
"logps/chosen": -0.9379288554191589, |
|
"logps/rejected": -1.0011296272277832, |
|
"loss": 1.0132, |
|
"odds_ratio_loss": 0.7530064582824707, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0937928855419159, |
|
"rewards/margins": 0.006320066750049591, |
|
"rewards/rejected": -0.10011295974254608, |
|
"sft_loss": 0.9379288554191589, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6304303899777733, |
|
"grad_norm": 0.42754364013671875, |
|
"learning_rate": 4.473877972981797e-06, |
|
"logits/chosen": -0.4294399321079254, |
|
"logits/rejected": -0.48693591356277466, |
|
"logps/chosen": -0.9050455093383789, |
|
"logps/rejected": -1.0990797281265259, |
|
"loss": 0.9681, |
|
"odds_ratio_loss": 0.6305026412010193, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.09050454199314117, |
|
"rewards/margins": 0.019403431564569473, |
|
"rewards/rejected": -0.10990796983242035, |
|
"sft_loss": 0.9050455093383789, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6465952717720752, |
|
"grad_norm": 0.3870018422603607, |
|
"learning_rate": 4.447597277484894e-06, |
|
"logits/chosen": -0.41894254088401794, |
|
"logits/rejected": -0.3863012492656708, |
|
"logps/chosen": -0.9011236429214478, |
|
"logps/rejected": -1.011643648147583, |
|
"loss": 0.971, |
|
"odds_ratio_loss": 0.6992276906967163, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0901123657822609, |
|
"rewards/margins": 0.011052015237510204, |
|
"rewards/rejected": -0.10116437822580338, |
|
"sft_loss": 0.9011236429214478, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6627601535663771, |
|
"grad_norm": 0.6716357469558716, |
|
"learning_rate": 4.42075731922687e-06, |
|
"logits/chosen": -0.381665974855423, |
|
"logits/rejected": -0.40627461671829224, |
|
"logps/chosen": -0.9860145449638367, |
|
"logps/rejected": -1.0734965801239014, |
|
"loss": 1.0559, |
|
"odds_ratio_loss": 0.6987608671188354, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09860144555568695, |
|
"rewards/margins": 0.008748206309974194, |
|
"rewards/rejected": -0.10734964907169342, |
|
"sft_loss": 0.9860145449638367, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6789250353606789, |
|
"grad_norm": 0.4379284083843231, |
|
"learning_rate": 4.3933658054423465e-06, |
|
"logits/chosen": -0.42450767755508423, |
|
"logits/rejected": -0.4302968978881836, |
|
"logps/chosen": -0.8682054281234741, |
|
"logps/rejected": -1.0158107280731201, |
|
"loss": 0.9348, |
|
"odds_ratio_loss": 0.6656124591827393, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0868205577135086, |
|
"rewards/margins": 0.014760518446564674, |
|
"rewards/rejected": -0.10158105939626694, |
|
"sft_loss": 0.8682054281234741, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6950899171549808, |
|
"grad_norm": 0.4341568052768707, |
|
"learning_rate": 4.365430601748003e-06, |
|
"logits/chosen": -0.3941816985607147, |
|
"logits/rejected": -0.349882036447525, |
|
"logps/chosen": -0.9646803140640259, |
|
"logps/rejected": -1.0113680362701416, |
|
"loss": 1.0372, |
|
"odds_ratio_loss": 0.7253597974777222, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09646803140640259, |
|
"rewards/margins": 0.004668788518756628, |
|
"rewards/rejected": -0.10113681852817535, |
|
"sft_loss": 0.9646803140640259, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7112547989492827, |
|
"grad_norm": 1.7109006643295288, |
|
"learning_rate": 4.336959729883925e-06, |
|
"logits/chosen": -0.37049371004104614, |
|
"logits/rejected": -0.3737342953681946, |
|
"logps/chosen": -0.9116461873054504, |
|
"logps/rejected": -0.9422439336776733, |
|
"loss": 0.9849, |
|
"odds_ratio_loss": 0.7329493165016174, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09116461873054504, |
|
"rewards/margins": 0.0030597783625125885, |
|
"rewards/rejected": -0.09422439336776733, |
|
"sft_loss": 0.9116461873054504, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7274196807435845, |
|
"grad_norm": 0.4295767843723297, |
|
"learning_rate": 4.307961365410118e-06, |
|
"logits/chosen": -0.46054011583328247, |
|
"logits/rejected": -0.4506424069404602, |
|
"logps/chosen": -0.904135525226593, |
|
"logps/rejected": -0.965890109539032, |
|
"loss": 0.9756, |
|
"odds_ratio_loss": 0.7150284051895142, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09041355550289154, |
|
"rewards/margins": 0.0061754509806632996, |
|
"rewards/rejected": -0.09658900648355484, |
|
"sft_loss": 0.904135525226593, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7435845625378864, |
|
"grad_norm": 0.7776443958282471, |
|
"learning_rate": 4.278443835358854e-06, |
|
"logits/chosen": -0.3951818645000458, |
|
"logits/rejected": -0.4040835499763489, |
|
"logps/chosen": -0.8823555707931519, |
|
"logps/rejected": -1.1062017679214478, |
|
"loss": 0.9449, |
|
"odds_ratio_loss": 0.6257806420326233, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08823557198047638, |
|
"rewards/margins": 0.022384602576494217, |
|
"rewards/rejected": -0.1106201782822609, |
|
"sft_loss": 0.8823555707931519, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7597494443321883, |
|
"grad_norm": 0.37953025102615356, |
|
"learning_rate": 4.248415615843523e-06, |
|
"logits/chosen": -0.376980721950531, |
|
"logits/rejected": -0.40178006887435913, |
|
"logps/chosen": -0.9119707345962524, |
|
"logps/rejected": -0.9874213933944702, |
|
"loss": 0.9817, |
|
"odds_ratio_loss": 0.6976627111434937, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09119707345962524, |
|
"rewards/margins": 0.007545073516666889, |
|
"rewards/rejected": -0.09874214231967926, |
|
"sft_loss": 0.9119707345962524, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7759143261264903, |
|
"grad_norm": 0.5314805507659912, |
|
"learning_rate": 4.217885329624666e-06, |
|
"logits/chosen": -0.3499462604522705, |
|
"logits/rejected": -0.33436357975006104, |
|
"logps/chosen": -0.876055121421814, |
|
"logps/rejected": -1.064893126487732, |
|
"loss": 0.9413, |
|
"odds_ratio_loss": 0.6526578068733215, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.08760551363229752, |
|
"rewards/margins": 0.018883811309933662, |
|
"rewards/rejected": -0.10648931562900543, |
|
"sft_loss": 0.876055121421814, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7920792079207921, |
|
"grad_norm": 0.40282461047172546, |
|
"learning_rate": 4.186861743633911e-06, |
|
"logits/chosen": -0.41591471433639526, |
|
"logits/rejected": -0.4058813154697418, |
|
"logps/chosen": -0.8972100019454956, |
|
"logps/rejected": -1.093335509300232, |
|
"loss": 0.9699, |
|
"odds_ratio_loss": 0.7265552282333374, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08972100913524628, |
|
"rewards/margins": 0.01961255446076393, |
|
"rewards/rejected": -0.10933355987071991, |
|
"sft_loss": 0.8972100019454956, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.808244089715094, |
|
"grad_norm": 0.43431738018989563, |
|
"learning_rate": 4.155353766456497e-06, |
|
"logits/chosen": -0.30508697032928467, |
|
"logits/rejected": -0.3136020302772522, |
|
"logps/chosen": -0.9303945302963257, |
|
"logps/rejected": -1.0141643285751343, |
|
"loss": 1.0008, |
|
"odds_ratio_loss": 0.7037394046783447, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09303945302963257, |
|
"rewards/margins": 0.00837697833776474, |
|
"rewards/rejected": -0.1014164462685585, |
|
"sft_loss": 0.9303945302963257, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.808244089715094, |
|
"eval_logits/chosen": -0.3878052830696106, |
|
"eval_logits/rejected": -0.3689490258693695, |
|
"eval_logps/chosen": -0.9066087007522583, |
|
"eval_logps/rejected": -1.0192701816558838, |
|
"eval_loss": 0.9776538014411926, |
|
"eval_odds_ratio_loss": 0.710451602935791, |
|
"eval_rewards/accuracies": 0.5054545402526855, |
|
"eval_rewards/chosen": -0.09066087007522583, |
|
"eval_rewards/margins": 0.011266152374446392, |
|
"eval_rewards/rejected": -0.1019270196557045, |
|
"eval_runtime": 192.2826, |
|
"eval_samples_per_second": 5.721, |
|
"eval_sft_loss": 0.9066087007522583, |
|
"eval_steps_per_second": 2.86, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8244089715093958, |
|
"grad_norm": 0.3983856737613678, |
|
"learning_rate": 4.123370445773134e-06, |
|
"logits/chosen": -0.344710111618042, |
|
"logits/rejected": -0.3169902563095093, |
|
"logps/chosen": -0.8998648524284363, |
|
"logps/rejected": -0.9106130599975586, |
|
"loss": 0.975, |
|
"odds_ratio_loss": 0.7513402700424194, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.08998648822307587, |
|
"rewards/margins": 0.0010748239001259208, |
|
"rewards/rejected": -0.0910613164305687, |
|
"sft_loss": 0.8998648524284363, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8405738533036977, |
|
"grad_norm": 1.2135205268859863, |
|
"learning_rate": 4.090920965761906e-06, |
|
"logits/chosen": -0.3448580205440521, |
|
"logits/rejected": -0.3548375964164734, |
|
"logps/chosen": -0.9812738299369812, |
|
"logps/rejected": -1.0694336891174316, |
|
"loss": 1.0535, |
|
"odds_ratio_loss": 0.7224698662757874, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.09812740236520767, |
|
"rewards/margins": 0.00881598424166441, |
|
"rewards/rejected": -0.10694338381290436, |
|
"sft_loss": 0.9812738299369812, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8567387350979996, |
|
"grad_norm": 0.9130859375, |
|
"learning_rate": 4.058014644460991e-06, |
|
"logits/chosen": -0.34060588479042053, |
|
"logits/rejected": -0.3562433123588562, |
|
"logps/chosen": -0.9648042917251587, |
|
"logps/rejected": -1.0603010654449463, |
|
"loss": 1.032, |
|
"odds_ratio_loss": 0.6720489859580994, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.09648042917251587, |
|
"rewards/margins": 0.009549676440656185, |
|
"rewards/rejected": -0.10603010654449463, |
|
"sft_loss": 0.9648042917251587, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8729036168923014, |
|
"grad_norm": 0.6945879459381104, |
|
"learning_rate": 4.024660931092939e-06, |
|
"logits/chosen": -0.39998704195022583, |
|
"logits/rejected": -0.39360350370407104, |
|
"logps/chosen": -0.8902137875556946, |
|
"logps/rejected": -1.0513432025909424, |
|
"loss": 0.9562, |
|
"odds_ratio_loss": 0.6595617532730103, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08902137726545334, |
|
"rewards/margins": 0.016112947836518288, |
|
"rewards/rejected": -0.10513432323932648, |
|
"sft_loss": 0.8902137875556946, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8890684986866033, |
|
"grad_norm": 0.45378220081329346, |
|
"learning_rate": 3.990869403351272e-06, |
|
"logits/chosen": -0.3531869053840637, |
|
"logits/rejected": -0.38131508231163025, |
|
"logps/chosen": -0.9068384170532227, |
|
"logps/rejected": -1.065394639968872, |
|
"loss": 0.9704, |
|
"odds_ratio_loss": 0.635545015335083, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.09068384021520615, |
|
"rewards/margins": 0.01585562154650688, |
|
"rewards/rejected": -0.10653946548700333, |
|
"sft_loss": 0.9068384170532227, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9052333804809052, |
|
"grad_norm": 0.5512678623199463, |
|
"learning_rate": 3.956649764650206e-06, |
|
"logits/chosen": -0.29515427350997925, |
|
"logits/rejected": -0.31435275077819824, |
|
"logps/chosen": -0.9203943014144897, |
|
"logps/rejected": -1.0603986978530884, |
|
"loss": 0.9918, |
|
"odds_ratio_loss": 0.7142159938812256, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09203943610191345, |
|
"rewards/margins": 0.014000418595969677, |
|
"rewards/rejected": -0.106039859354496, |
|
"sft_loss": 0.9203943014144897, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9213982622752072, |
|
"grad_norm": 0.5750080347061157, |
|
"learning_rate": 3.92201184133826e-06, |
|
"logits/chosen": -0.3182484209537506, |
|
"logits/rejected": -0.3164721131324768, |
|
"logps/chosen": -0.8570343255996704, |
|
"logps/rejected": -1.0225125551223755, |
|
"loss": 0.922, |
|
"odds_ratio_loss": 0.6495530009269714, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08570344746112823, |
|
"rewards/margins": 0.01654782146215439, |
|
"rewards/rejected": -0.10225125402212143, |
|
"sft_loss": 0.8570343255996704, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.937563144069509, |
|
"grad_norm": 0.5823240876197815, |
|
"learning_rate": 3.886965579876572e-06, |
|
"logits/chosen": -0.307335764169693, |
|
"logits/rejected": -0.331511914730072, |
|
"logps/chosen": -0.8535898923873901, |
|
"logps/rejected": -0.9173160791397095, |
|
"loss": 0.9234, |
|
"odds_ratio_loss": 0.6983198523521423, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08535899966955185, |
|
"rewards/margins": 0.006372606847435236, |
|
"rewards/rejected": -0.09173160046339035, |
|
"sft_loss": 0.8535898923873901, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9537280258638109, |
|
"grad_norm": 0.3793308734893799, |
|
"learning_rate": 3.851521043982716e-06, |
|
"logits/chosen": -0.3546546399593353, |
|
"logits/rejected": -0.3105318248271942, |
|
"logps/chosen": -0.9257644414901733, |
|
"logps/rejected": -0.994279682636261, |
|
"loss": 0.9977, |
|
"odds_ratio_loss": 0.7192004919052124, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09257644414901733, |
|
"rewards/margins": 0.006851526442915201, |
|
"rewards/rejected": -0.0994279757142067, |
|
"sft_loss": 0.9257644414901733, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9698929076581128, |
|
"grad_norm": 0.5789406895637512, |
|
"learning_rate": 3.81568841174086e-06, |
|
"logits/chosen": -0.39430108666419983, |
|
"logits/rejected": -0.38088011741638184, |
|
"logps/chosen": -0.8874362111091614, |
|
"logps/rejected": -1.0097267627716064, |
|
"loss": 0.9592, |
|
"odds_ratio_loss": 0.7179639935493469, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08874362707138062, |
|
"rewards/margins": 0.012229054234921932, |
|
"rewards/rejected": -0.10097268968820572, |
|
"sft_loss": 0.8874362111091614, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9860577894524146, |
|
"grad_norm": 0.4505593478679657, |
|
"learning_rate": 3.7794779726790664e-06, |
|
"logits/chosen": -0.4197085499763489, |
|
"logits/rejected": -0.3544057607650757, |
|
"logps/chosen": -0.8556501269340515, |
|
"logps/rejected": -0.9688836336135864, |
|
"loss": 0.9233, |
|
"odds_ratio_loss": 0.6760933995246887, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0855650082230568, |
|
"rewards/margins": 0.011323352344334126, |
|
"rewards/rejected": -0.09688836336135864, |
|
"sft_loss": 0.8556501269340515, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0022226712467166, |
|
"grad_norm": 0.41950830817222595, |
|
"learning_rate": 3.7429001248146096e-06, |
|
"logits/chosen": -0.3560163080692291, |
|
"logits/rejected": -0.32193905115127563, |
|
"logps/chosen": -0.8660818934440613, |
|
"logps/rejected": -1.0638062953948975, |
|
"loss": 0.9302, |
|
"odds_ratio_loss": 0.6412297487258911, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08660819381475449, |
|
"rewards/margins": 0.019772443920373917, |
|
"rewards/rejected": -0.10638062655925751, |
|
"sft_loss": 0.8660818934440613, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0183875530410185, |
|
"grad_norm": 0.30259978771209717, |
|
"learning_rate": 3.7059653716681227e-06, |
|
"logits/chosen": -0.3218996524810791, |
|
"logits/rejected": -0.3514016568660736, |
|
"logps/chosen": -0.9751222729682922, |
|
"logps/rejected": -1.1278547048568726, |
|
"loss": 1.046, |
|
"odds_ratio_loss": 0.7084661722183228, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09751223772764206, |
|
"rewards/margins": 0.015273240394890308, |
|
"rewards/rejected": -0.11278548091650009, |
|
"sft_loss": 0.9751222729682922, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0345524348353203, |
|
"grad_norm": 1.449523687362671, |
|
"learning_rate": 3.668684319247463e-06, |
|
"logits/chosen": -0.3402321934700012, |
|
"logits/rejected": -0.3320569396018982, |
|
"logps/chosen": -0.8782706260681152, |
|
"logps/rejected": -1.0504738092422485, |
|
"loss": 0.9434, |
|
"odds_ratio_loss": 0.651136040687561, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08782706409692764, |
|
"rewards/margins": 0.01722031459212303, |
|
"rewards/rejected": -0.10504738241434097, |
|
"sft_loss": 0.8782706260681152, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.0507173166296222, |
|
"grad_norm": 0.36652296781539917, |
|
"learning_rate": 3.6310676730021373e-06, |
|
"logits/chosen": -0.3392433524131775, |
|
"logits/rejected": -0.3268556296825409, |
|
"logps/chosen": -0.8789156079292297, |
|
"logps/rejected": -0.9153023958206177, |
|
"loss": 0.9515, |
|
"odds_ratio_loss": 0.7262720465660095, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08789155632257462, |
|
"rewards/margins": 0.0036386798601597548, |
|
"rewards/rejected": -0.09153024852275848, |
|
"sft_loss": 0.8789156079292297, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.066882198423924, |
|
"grad_norm": 0.42644253373146057, |
|
"learning_rate": 3.593126234749178e-06, |
|
"logits/chosen": -0.35958123207092285, |
|
"logits/rejected": -0.33439984917640686, |
|
"logps/chosen": -0.9317266345024109, |
|
"logps/rejected": -0.9812437891960144, |
|
"loss": 1.004, |
|
"odds_ratio_loss": 0.7226861119270325, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09317266196012497, |
|
"rewards/margins": 0.0049517154693603516, |
|
"rewards/rejected": -0.09812436997890472, |
|
"sft_loss": 0.9317266345024109, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.083047080218226, |
|
"grad_norm": 0.5300435423851013, |
|
"learning_rate": 3.554870899571343e-06, |
|
"logits/chosen": -0.4070967137813568, |
|
"logits/rejected": -0.38338038325309753, |
|
"logps/chosen": -0.9088705778121948, |
|
"logps/rejected": -1.0065948963165283, |
|
"loss": 0.9774, |
|
"odds_ratio_loss": 0.6850352883338928, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.09088706225156784, |
|
"rewards/margins": 0.009772435761988163, |
|
"rewards/rejected": -0.10065948963165283, |
|
"sft_loss": 0.9088705778121948, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.0992119620125278, |
|
"grad_norm": 1.5718979835510254, |
|
"learning_rate": 3.5163126526885373e-06, |
|
"logits/chosen": -0.3708317279815674, |
|
"logits/rejected": -0.3510357737541199, |
|
"logps/chosen": -0.8702448606491089, |
|
"logps/rejected": -0.9972399473190308, |
|
"loss": 0.9409, |
|
"odds_ratio_loss": 0.7065256834030151, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.08702448755502701, |
|
"rewards/margins": 0.012699509970843792, |
|
"rewards/rejected": -0.09972399473190308, |
|
"sft_loss": 0.8702448606491089, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1153768438068297, |
|
"grad_norm": 0.31913694739341736, |
|
"learning_rate": 3.4774625663033484e-06, |
|
"logits/chosen": -0.39085036516189575, |
|
"logits/rejected": -0.37611085176467896, |
|
"logps/chosen": -0.8731836080551147, |
|
"logps/rejected": -0.9660570025444031, |
|
"loss": 0.9427, |
|
"odds_ratio_loss": 0.6954530477523804, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08731836825609207, |
|
"rewards/margins": 0.009287341497838497, |
|
"rewards/rejected": -0.09660570323467255, |
|
"sft_loss": 0.8731836080551147, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1315417256011315, |
|
"grad_norm": 0.5645192265510559, |
|
"learning_rate": 3.4383317964216067e-06, |
|
"logits/chosen": -0.3893832564353943, |
|
"logits/rejected": -0.3442583680152893, |
|
"logps/chosen": -0.870397686958313, |
|
"logps/rejected": -0.9214354753494263, |
|
"loss": 0.9448, |
|
"odds_ratio_loss": 0.7436445355415344, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0870397686958313, |
|
"rewards/margins": 0.005103783216327429, |
|
"rewards/rejected": -0.09214354306459427, |
|
"sft_loss": 0.870397686958313, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1477066073954334, |
|
"grad_norm": 0.7822654247283936, |
|
"learning_rate": 3.398931579648877e-06, |
|
"logits/chosen": -0.3577522039413452, |
|
"logits/rejected": -0.2890363931655884, |
|
"logps/chosen": -0.9082385301589966, |
|
"logps/rejected": -1.1010273694992065, |
|
"loss": 0.9792, |
|
"odds_ratio_loss": 0.7092560529708862, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09082385897636414, |
|
"rewards/margins": 0.019278880208730698, |
|
"rewards/rejected": -0.11010273545980453, |
|
"sft_loss": 0.9082385301589966, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1638714891897353, |
|
"grad_norm": 0.6916553974151611, |
|
"learning_rate": 3.359273229963813e-06, |
|
"logits/chosen": -0.33050891757011414, |
|
"logits/rejected": -0.33249133825302124, |
|
"logps/chosen": -0.8524163961410522, |
|
"logps/rejected": -0.9603297114372253, |
|
"loss": 0.9215, |
|
"odds_ratio_loss": 0.6913267374038696, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0852416455745697, |
|
"rewards/margins": 0.010791336186230183, |
|
"rewards/rejected": -0.09603297710418701, |
|
"sft_loss": 0.8524163961410522, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1800363709840371, |
|
"grad_norm": 0.36541640758514404, |
|
"learning_rate": 3.319368135469285e-06, |
|
"logits/chosen": -0.34484899044036865, |
|
"logits/rejected": -0.3120992183685303, |
|
"logps/chosen": -0.8964350819587708, |
|
"logps/rejected": -1.0409529209136963, |
|
"loss": 0.9665, |
|
"odds_ratio_loss": 0.7009326219558716, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08964350074529648, |
|
"rewards/margins": 0.014451777562499046, |
|
"rewards/rejected": -0.10409528017044067, |
|
"sft_loss": 0.8964350819587708, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.196201252778339, |
|
"grad_norm": 0.5928468704223633, |
|
"learning_rate": 3.279227755122228e-06, |
|
"logits/chosen": -0.359285831451416, |
|
"logits/rejected": -0.3708931505680084, |
|
"logps/chosen": -0.817459225654602, |
|
"logps/rejected": -1.1048064231872559, |
|
"loss": 0.8791, |
|
"odds_ratio_loss": 0.6168545484542847, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0817459225654602, |
|
"rewards/margins": 0.028734717518091202, |
|
"rewards/rejected": -0.1104806438088417, |
|
"sft_loss": 0.817459225654602, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2123661345726409, |
|
"grad_norm": 0.8944354057312012, |
|
"learning_rate": 3.2388636154431417e-06, |
|
"logits/chosen": -0.32971471548080444, |
|
"logits/rejected": -0.3240662217140198, |
|
"logps/chosen": -0.9531005024909973, |
|
"logps/rejected": -1.1055543422698975, |
|
"loss": 1.0252, |
|
"odds_ratio_loss": 0.7207925319671631, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09531004726886749, |
|
"rewards/margins": 0.015245395712554455, |
|
"rewards/rejected": -0.11055544763803482, |
|
"sft_loss": 0.9531005024909973, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2285310163669427, |
|
"grad_norm": 0.5451232194900513, |
|
"learning_rate": 3.198287307206192e-06, |
|
"logits/chosen": -0.3906642198562622, |
|
"logits/rejected": -0.36378178000450134, |
|
"logps/chosen": -0.909538745880127, |
|
"logps/rejected": -1.005489706993103, |
|
"loss": 0.9791, |
|
"odds_ratio_loss": 0.6954682469367981, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09095387905836105, |
|
"rewards/margins": 0.009595084004104137, |
|
"rewards/rejected": -0.10054896771907806, |
|
"sft_loss": 0.909538745880127, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2446958981612446, |
|
"grad_norm": 0.3986392617225647, |
|
"learning_rate": 3.157510482110856e-06, |
|
"logits/chosen": -0.31712478399276733, |
|
"logits/rejected": -0.3332034647464752, |
|
"logps/chosen": -0.8950090408325195, |
|
"logps/rejected": -0.9677726626396179, |
|
"loss": 0.9687, |
|
"odds_ratio_loss": 0.7365735173225403, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08950088918209076, |
|
"rewards/margins": 0.007276373915374279, |
|
"rewards/rejected": -0.09677727520465851, |
|
"sft_loss": 0.8950090408325195, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.2608607799555465, |
|
"grad_norm": 0.9783799648284912, |
|
"learning_rate": 3.116544849436077e-06, |
|
"logits/chosen": -0.3367740213871002, |
|
"logits/rejected": -0.3552953600883484, |
|
"logps/chosen": -0.9589813351631165, |
|
"logps/rejected": -1.1763808727264404, |
|
"loss": 1.0263, |
|
"odds_ratio_loss": 0.6732120513916016, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09589814394712448, |
|
"rewards/margins": 0.02173994854092598, |
|
"rewards/rejected": -0.11763808876276016, |
|
"sft_loss": 0.9589813351631165, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2770256617498483, |
|
"grad_norm": 0.3939819931983948, |
|
"learning_rate": 3.0754021726778848e-06, |
|
"logits/chosen": -0.3505743741989136, |
|
"logits/rejected": -0.37322431802749634, |
|
"logps/chosen": -0.83990079164505, |
|
"logps/rejected": -1.0232980251312256, |
|
"loss": 0.9049, |
|
"odds_ratio_loss": 0.6501890420913696, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08399007469415665, |
|
"rewards/margins": 0.018339723348617554, |
|
"rewards/rejected": -0.1023297905921936, |
|
"sft_loss": 0.83990079164505, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.2931905435441502, |
|
"grad_norm": 0.35344642400741577, |
|
"learning_rate": 3.0340942661714463e-06, |
|
"logits/chosen": -0.3435738980770111, |
|
"logits/rejected": -0.36761245131492615, |
|
"logps/chosen": -0.9316965341567993, |
|
"logps/rejected": -1.0095479488372803, |
|
"loss": 1.003, |
|
"odds_ratio_loss": 0.7125651836395264, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09316965192556381, |
|
"rewards/margins": 0.007785154972225428, |
|
"rewards/rejected": -0.1009548082947731, |
|
"sft_loss": 0.9316965341567993, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3093554253384523, |
|
"grad_norm": 0.4086878001689911, |
|
"learning_rate": 2.992632991698512e-06, |
|
"logits/chosen": -0.39886465668678284, |
|
"logits/rejected": -0.3849073350429535, |
|
"logps/chosen": -0.9022181630134583, |
|
"logps/rejected": -1.0039399862289429, |
|
"loss": 0.9729, |
|
"odds_ratio_loss": 0.7066690325737, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09022180736064911, |
|
"rewards/margins": 0.010172189213335514, |
|
"rewards/rejected": -0.10039399564266205, |
|
"sft_loss": 0.9022181630134583, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3255203071327541, |
|
"grad_norm": 0.45464497804641724, |
|
"learning_rate": 2.9510302550812537e-06, |
|
"logits/chosen": -0.3623855710029602, |
|
"logits/rejected": -0.31726986169815063, |
|
"logps/chosen": -0.8218330144882202, |
|
"logps/rejected": -1.0319081544876099, |
|
"loss": 0.8851, |
|
"odds_ratio_loss": 0.6329900026321411, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08218331634998322, |
|
"rewards/margins": 0.02100750431418419, |
|
"rewards/rejected": -0.1031908169388771, |
|
"sft_loss": 0.8218330144882202, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.341685188927056, |
|
"grad_norm": 1.1504096984863281, |
|
"learning_rate": 2.9092980027634325e-06, |
|
"logits/chosen": -0.38953226804733276, |
|
"logits/rejected": -0.3612954914569855, |
|
"logps/chosen": -0.8214972615242004, |
|
"logps/rejected": -0.9684427976608276, |
|
"loss": 0.8864, |
|
"odds_ratio_loss": 0.6492589712142944, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08214972913265228, |
|
"rewards/margins": 0.014694547280669212, |
|
"rewards/rejected": -0.09684427082538605, |
|
"sft_loss": 0.8214972615242004, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3578500707213579, |
|
"grad_norm": 0.33391210436820984, |
|
"learning_rate": 2.867448218379927e-06, |
|
"logits/chosen": -0.3767167627811432, |
|
"logits/rejected": -0.3566213548183441, |
|
"logps/chosen": -0.9622126817703247, |
|
"logps/rejected": -1.030574083328247, |
|
"loss": 1.0363, |
|
"odds_ratio_loss": 0.7405400276184082, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09622127562761307, |
|
"rewards/margins": 0.006836143787950277, |
|
"rewards/rejected": -0.10305740684270859, |
|
"sft_loss": 0.9622126817703247, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3740149525156597, |
|
"grad_norm": 1.2477465867996216, |
|
"learning_rate": 2.825492919315559e-06, |
|
"logits/chosen": -0.3341541886329651, |
|
"logits/rejected": -0.28563547134399414, |
|
"logps/chosen": -0.9898349046707153, |
|
"logps/rejected": -0.9626699686050415, |
|
"loss": 1.0687, |
|
"odds_ratio_loss": 0.7890844345092773, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09898348897695541, |
|
"rewards/margins": -0.0027165021747350693, |
|
"rewards/rejected": -0.0962669849395752, |
|
"sft_loss": 0.9898349046707153, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3901798343099616, |
|
"grad_norm": 0.37100037932395935, |
|
"learning_rate": 2.7834441532542482e-06, |
|
"logits/chosen": -0.3620319366455078, |
|
"logits/rejected": -0.3429003357887268, |
|
"logps/chosen": -0.8693292737007141, |
|
"logps/rejected": -0.991874098777771, |
|
"loss": 0.9379, |
|
"odds_ratio_loss": 0.6856324076652527, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08693292737007141, |
|
"rewards/margins": 0.012254483997821808, |
|
"rewards/rejected": -0.09918741136789322, |
|
"sft_loss": 0.8693292737007141, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4063447161042635, |
|
"grad_norm": 1.2096267938613892, |
|
"learning_rate": 2.74131399471945e-06, |
|
"logits/chosen": -0.3446846306324005, |
|
"logits/rejected": -0.3061850666999817, |
|
"logps/chosen": -0.9667361974716187, |
|
"logps/rejected": -1.053593397140503, |
|
"loss": 1.0382, |
|
"odds_ratio_loss": 0.714438796043396, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0966736227273941, |
|
"rewards/margins": 0.008685723878443241, |
|
"rewards/rejected": -0.10535935312509537, |
|
"sft_loss": 0.9667361974716187, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4225095978985653, |
|
"grad_norm": 0.47893857955932617, |
|
"learning_rate": 2.6991145416068947e-06, |
|
"logits/chosen": -0.3955840468406677, |
|
"logits/rejected": -0.31594154238700867, |
|
"logps/chosen": -0.9019123315811157, |
|
"logps/rejected": -0.9488536715507507, |
|
"loss": 0.9734, |
|
"odds_ratio_loss": 0.7147491574287415, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09019123762845993, |
|
"rewards/margins": 0.0046941377222537994, |
|
"rewards/rejected": -0.09488537907600403, |
|
"sft_loss": 0.9019123315811157, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4386744796928672, |
|
"grad_norm": 0.2868447005748749, |
|
"learning_rate": 2.6568579117106143e-06, |
|
"logits/chosen": -0.4024140238761902, |
|
"logits/rejected": -0.4033503532409668, |
|
"logps/chosen": -0.8388016819953918, |
|
"logps/rejected": -0.9728044271469116, |
|
"loss": 0.9081, |
|
"odds_ratio_loss": 0.6926370859146118, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08388017117977142, |
|
"rewards/margins": 0.013400280848145485, |
|
"rewards/rejected": -0.09728045761585236, |
|
"sft_loss": 0.8388016819953918, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.454839361487169, |
|
"grad_norm": 0.24462518095970154, |
|
"learning_rate": 2.6145562392432544e-06, |
|
"logits/chosen": -0.3949779272079468, |
|
"logits/rejected": -0.39668601751327515, |
|
"logps/chosen": -0.8613153696060181, |
|
"logps/rejected": -0.9795036315917969, |
|
"loss": 0.9305, |
|
"odds_ratio_loss": 0.6919496059417725, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.08613153547048569, |
|
"rewards/margins": 0.011818833649158478, |
|
"rewards/rejected": -0.09795036166906357, |
|
"sft_loss": 0.8613153696060181, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.471004243281471, |
|
"grad_norm": 0.5152093768119812, |
|
"learning_rate": 2.5722216713516682e-06, |
|
"logits/chosen": -0.42058199644088745, |
|
"logits/rejected": -0.38909250497817993, |
|
"logps/chosen": -0.8609904050827026, |
|
"logps/rejected": -0.9690335988998413, |
|
"loss": 0.9318, |
|
"odds_ratio_loss": 0.7082633972167969, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0860990509390831, |
|
"rewards/margins": 0.010804320685565472, |
|
"rewards/rejected": -0.09690337628126144, |
|
"sft_loss": 0.8609904050827026, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.4871691250757728, |
|
"grad_norm": 0.5419692397117615, |
|
"learning_rate": 2.5298663646288064e-06, |
|
"logits/chosen": -0.35978519916534424, |
|
"logits/rejected": -0.35384541749954224, |
|
"logps/chosen": -0.8710163235664368, |
|
"logps/rejected": -1.0426474809646606, |
|
"loss": 0.9373, |
|
"odds_ratio_loss": 0.6623716354370117, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08710163086652756, |
|
"rewards/margins": 0.017163105309009552, |
|
"rewards/rejected": -0.10426473617553711, |
|
"sft_loss": 0.8710163235664368, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.503334006870075, |
|
"grad_norm": 3.1488473415374756, |
|
"learning_rate": 2.487502481622879e-06, |
|
"logits/chosen": -0.4146711230278015, |
|
"logits/rejected": -0.40715789794921875, |
|
"logps/chosen": -0.9579635858535767, |
|
"logps/rejected": -1.0180439949035645, |
|
"loss": 1.0298, |
|
"odds_ratio_loss": 0.718089759349823, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0957963615655899, |
|
"rewards/margins": 0.006008026655763388, |
|
"rewards/rejected": -0.10180439800024033, |
|
"sft_loss": 0.9579635858535767, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5194988886643768, |
|
"grad_norm": 0.6520385146141052, |
|
"learning_rate": 2.4451421873448253e-06, |
|
"logits/chosen": -0.3981381952762604, |
|
"logits/rejected": -0.33850008249282837, |
|
"logps/chosen": -0.9044814109802246, |
|
"logps/rejected": -0.9930024147033691, |
|
"loss": 0.9767, |
|
"odds_ratio_loss": 0.7225072979927063, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09044814109802246, |
|
"rewards/margins": 0.008852103725075722, |
|
"rewards/rejected": -0.09930024296045303, |
|
"sft_loss": 0.9044814109802246, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5356637704586786, |
|
"grad_norm": 0.5775251984596252, |
|
"learning_rate": 2.40279764577506e-06, |
|
"logits/chosen": -0.36691075563430786, |
|
"logits/rejected": -0.31715118885040283, |
|
"logps/chosen": -0.9193195104598999, |
|
"logps/rejected": -0.9655280113220215, |
|
"loss": 0.9919, |
|
"odds_ratio_loss": 0.7258428931236267, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09193196147680283, |
|
"rewards/margins": 0.004620848223567009, |
|
"rewards/rejected": -0.09655280411243439, |
|
"sft_loss": 0.9193195104598999, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5518286522529805, |
|
"grad_norm": 0.4706912636756897, |
|
"learning_rate": 2.3604810163705242e-06, |
|
"logits/chosen": -0.3801175355911255, |
|
"logits/rejected": -0.34497779607772827, |
|
"logps/chosen": -0.8502659797668457, |
|
"logps/rejected": -0.9808200597763062, |
|
"loss": 0.9153, |
|
"odds_ratio_loss": 0.6503497362136841, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08502660691738129, |
|
"rewards/margins": 0.013055416755378246, |
|
"rewards/rejected": -0.09808202087879181, |
|
"sft_loss": 0.8502659797668457, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.5679935340472824, |
|
"grad_norm": 0.8772755265235901, |
|
"learning_rate": 2.3182044505730364e-06, |
|
"logits/chosen": -0.3701505661010742, |
|
"logits/rejected": -0.3588781952857971, |
|
"logps/chosen": -0.8278260231018066, |
|
"logps/rejected": -0.9880140423774719, |
|
"loss": 0.8943, |
|
"odds_ratio_loss": 0.6643026471138, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08278260380029678, |
|
"rewards/margins": 0.016018804162740707, |
|
"rewards/rejected": -0.09880141168832779, |
|
"sft_loss": 0.8278260231018066, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.5841584158415842, |
|
"grad_norm": 0.5644322633743286, |
|
"learning_rate": 2.275980088319941e-06, |
|
"logits/chosen": -0.37429267168045044, |
|
"logits/rejected": -0.38965049386024475, |
|
"logps/chosen": -0.830912709236145, |
|
"logps/rejected": -0.931898295879364, |
|
"loss": 0.901, |
|
"odds_ratio_loss": 0.7011361122131348, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08309127390384674, |
|
"rewards/margins": 0.010098553262650967, |
|
"rewards/rejected": -0.09318983554840088, |
|
"sft_loss": 0.830912709236145, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.600323297635886, |
|
"grad_norm": 0.7188877463340759, |
|
"learning_rate": 2.2338200545580577e-06, |
|
"logits/chosen": -0.387838214635849, |
|
"logits/rejected": -0.3446332514286041, |
|
"logps/chosen": -0.8468879461288452, |
|
"logps/rejected": -1.0357553958892822, |
|
"loss": 0.9171, |
|
"odds_ratio_loss": 0.7018327713012695, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08468880504369736, |
|
"rewards/margins": 0.018886741250753403, |
|
"rewards/rejected": -0.10357554256916046, |
|
"sft_loss": 0.8468879461288452, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.616488179430188, |
|
"grad_norm": 0.40455734729766846, |
|
"learning_rate": 2.191736455761947e-06, |
|
"logits/chosen": -0.32430940866470337, |
|
"logits/rejected": -0.3191392719745636, |
|
"logps/chosen": -0.7817317247390747, |
|
"logps/rejected": -0.8739973306655884, |
|
"loss": 0.8458, |
|
"odds_ratio_loss": 0.6406995058059692, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.07817317545413971, |
|
"rewards/margins": 0.009226562455296516, |
|
"rewards/rejected": -0.08739973604679108, |
|
"sft_loss": 0.7817317247390747, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.616488179430188, |
|
"eval_logits/chosen": -0.3771926760673523, |
|
"eval_logits/rejected": -0.3578239679336548, |
|
"eval_logps/chosen": -0.8850269317626953, |
|
"eval_logps/rejected": -0.9999891519546509, |
|
"eval_loss": 0.9560017585754395, |
|
"eval_odds_ratio_loss": 0.7097483277320862, |
|
"eval_rewards/accuracies": 0.5190908908843994, |
|
"eval_rewards/chosen": -0.08850269019603729, |
|
"eval_rewards/margins": 0.0114962263032794, |
|
"eval_rewards/rejected": -0.09999892115592957, |
|
"eval_runtime": 192.1461, |
|
"eval_samples_per_second": 5.725, |
|
"eval_sft_loss": 0.8850269317626953, |
|
"eval_steps_per_second": 2.862, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 0.3581576347351074, |
|
"learning_rate": 2.1497413764574673e-06, |
|
"logits/chosen": -0.31272074580192566, |
|
"logits/rejected": -0.33244556188583374, |
|
"logps/chosen": -0.9109123349189758, |
|
"logps/rejected": -1.0614047050476074, |
|
"loss": 0.9769, |
|
"odds_ratio_loss": 0.6601108908653259, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.09109123051166534, |
|
"rewards/margins": 0.01504923403263092, |
|
"rewards/rejected": -0.10614047199487686, |
|
"sft_loss": 0.9109123349189758, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.6488179430187917, |
|
"grad_norm": 1.0781522989273071, |
|
"learning_rate": 2.1078468757516395e-06, |
|
"logits/chosen": -0.3577747941017151, |
|
"logits/rejected": -0.372037798166275, |
|
"logps/chosen": -0.8666743040084839, |
|
"logps/rejected": -0.9286467432975769, |
|
"loss": 0.943, |
|
"odds_ratio_loss": 0.7631633877754211, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08666743338108063, |
|
"rewards/margins": 0.006197246722877026, |
|
"rewards/rejected": -0.09286467730998993, |
|
"sft_loss": 0.8666743040084839, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6649828248130936, |
|
"grad_norm": 0.4093440771102905, |
|
"learning_rate": 2.0660649838698145e-06, |
|
"logits/chosen": -0.24239635467529297, |
|
"logits/rejected": -0.2550283670425415, |
|
"logps/chosen": -0.8779211044311523, |
|
"logps/rejected": -1.028240442276001, |
|
"loss": 0.9471, |
|
"odds_ratio_loss": 0.691811203956604, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.08779212832450867, |
|
"rewards/margins": 0.015031938441097736, |
|
"rewards/rejected": -0.10282406955957413, |
|
"sft_loss": 0.8779211044311523, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.6811477066073954, |
|
"grad_norm": 0.4143465459346771, |
|
"learning_rate": 2.0244076987011284e-06, |
|
"logits/chosen": -0.320882648229599, |
|
"logits/rejected": -0.35348570346832275, |
|
"logps/chosen": -0.9102975726127625, |
|
"logps/rejected": -1.0311200618743896, |
|
"loss": 0.9776, |
|
"odds_ratio_loss": 0.6728986501693726, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09102976322174072, |
|
"rewards/margins": 0.012082245200872421, |
|
"rewards/rejected": -0.10311201959848404, |
|
"sft_loss": 0.9102975726127625, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"grad_norm": 0.4322679340839386, |
|
"learning_rate": 1.982886982353251e-06, |
|
"logits/chosen": -0.33857375383377075, |
|
"logits/rejected": -0.38647031784057617, |
|
"logps/chosen": -0.8801182508468628, |
|
"logps/rejected": -1.0462461709976196, |
|
"loss": 0.9472, |
|
"odds_ratio_loss": 0.6703814268112183, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08801182359457016, |
|
"rewards/margins": 0.016612788662314415, |
|
"rewards/rejected": -0.10462461411952972, |
|
"sft_loss": 0.8801182508468628, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7134774701959992, |
|
"grad_norm": 0.40310564637184143, |
|
"learning_rate": 1.941514757717392e-06, |
|
"logits/chosen": -0.3961712718009949, |
|
"logits/rejected": -0.3599357604980469, |
|
"logps/chosen": -0.857568621635437, |
|
"logps/rejected": -1.0133601427078247, |
|
"loss": 0.921, |
|
"odds_ratio_loss": 0.6347678899765015, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08575686812400818, |
|
"rewards/margins": 0.015579144470393658, |
|
"rewards/rejected": -0.10133601725101471, |
|
"sft_loss": 0.857568621635437, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.729642351990301, |
|
"grad_norm": 0.5565314888954163, |
|
"learning_rate": 1.9003029050445953e-06, |
|
"logits/chosen": -0.3478461802005768, |
|
"logits/rejected": -0.3207647204399109, |
|
"logps/chosen": -0.9041654467582703, |
|
"logps/rejected": -0.99024897813797, |
|
"loss": 0.9734, |
|
"odds_ratio_loss": 0.6924456357955933, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.0904165506362915, |
|
"rewards/margins": 0.008608358912169933, |
|
"rewards/rejected": -0.09902490675449371, |
|
"sft_loss": 0.9041654467582703, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.745807233784603, |
|
"grad_norm": 0.4490904211997986, |
|
"learning_rate": 1.8592632585342523e-06, |
|
"logits/chosen": -0.36072981357574463, |
|
"logits/rejected": -0.3492718040943146, |
|
"logps/chosen": -0.8714792132377625, |
|
"logps/rejected": -1.010517954826355, |
|
"loss": 0.9396, |
|
"odds_ratio_loss": 0.6810620427131653, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08714792132377625, |
|
"rewards/margins": 0.013903876766562462, |
|
"rewards/rejected": -0.10105180740356445, |
|
"sft_loss": 0.8714792132377625, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7619721155789048, |
|
"grad_norm": 0.71334308385849, |
|
"learning_rate": 1.8184076029358527e-06, |
|
"logits/chosen": -0.3724268078804016, |
|
"logits/rejected": -0.40728870034217834, |
|
"logps/chosen": -0.8329513669013977, |
|
"logps/rejected": -0.8585556745529175, |
|
"loss": 0.9053, |
|
"odds_ratio_loss": 0.723603367805481, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08329514414072037, |
|
"rewards/margins": 0.002560428809374571, |
|
"rewards/rejected": -0.08585558086633682, |
|
"sft_loss": 0.8329513669013977, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.7781369973732066, |
|
"grad_norm": 0.38024160265922546, |
|
"learning_rate": 1.7777476701649318e-06, |
|
"logits/chosen": -0.4104040563106537, |
|
"logits/rejected": -0.40031394362449646, |
|
"logps/chosen": -0.9076647758483887, |
|
"logps/rejected": -1.019285798072815, |
|
"loss": 0.9752, |
|
"odds_ratio_loss": 0.6755737662315369, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09076648205518723, |
|
"rewards/margins": 0.011162097565829754, |
|
"rewards/rejected": -0.10192857682704926, |
|
"sft_loss": 0.9076647758483887, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7943018791675085, |
|
"grad_norm": 0.433108389377594, |
|
"learning_rate": 1.7372951359341925e-06, |
|
"logits/chosen": -0.35082167387008667, |
|
"logits/rejected": -0.3622151017189026, |
|
"logps/chosen": -0.8306609988212585, |
|
"logps/rejected": -0.9346961975097656, |
|
"loss": 0.8994, |
|
"odds_ratio_loss": 0.6869168281555176, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08306611329317093, |
|
"rewards/margins": 0.010403511114418507, |
|
"rewards/rejected": -0.09346961975097656, |
|
"sft_loss": 0.8306609988212585, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8104667609618104, |
|
"grad_norm": 1.0182783603668213, |
|
"learning_rate": 1.6970616164007547e-06, |
|
"logits/chosen": -0.4078885614871979, |
|
"logits/rejected": -0.43148526549339294, |
|
"logps/chosen": -0.8258237838745117, |
|
"logps/rejected": -0.9274940490722656, |
|
"loss": 0.8967, |
|
"odds_ratio_loss": 0.7091785073280334, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08258237689733505, |
|
"rewards/margins": 0.010167025960981846, |
|
"rewards/rejected": -0.09274940937757492, |
|
"sft_loss": 0.8258237838745117, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.8266316427561122, |
|
"grad_norm": 1.0357805490493774, |
|
"learning_rate": 1.6570586648305276e-06, |
|
"logits/chosen": -0.4377085268497467, |
|
"logits/rejected": -0.407601922750473, |
|
"logps/chosen": -0.8756824731826782, |
|
"logps/rejected": -1.0340659618377686, |
|
"loss": 0.9437, |
|
"odds_ratio_loss": 0.6799197793006897, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0875682383775711, |
|
"rewards/margins": 0.01583835855126381, |
|
"rewards/rejected": -0.10340659320354462, |
|
"sft_loss": 0.8756824731826782, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.842796524550414, |
|
"grad_norm": 0.4784797430038452, |
|
"learning_rate": 1.6172977682806151e-06, |
|
"logits/chosen": -0.3374441862106323, |
|
"logits/rejected": -0.2926723062992096, |
|
"logps/chosen": -0.8671070337295532, |
|
"logps/rejected": -1.0173355340957642, |
|
"loss": 0.9326, |
|
"odds_ratio_loss": 0.6546159982681274, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08671069890260696, |
|
"rewards/margins": 0.015022864565253258, |
|
"rewards/rejected": -0.10173355042934418, |
|
"sft_loss": 0.8671070337295532, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.858961406344716, |
|
"grad_norm": 0.5492507219314575, |
|
"learning_rate": 1.5777903443007586e-06, |
|
"logits/chosen": -0.3145988881587982, |
|
"logits/rejected": -0.42871540784835815, |
|
"logps/chosen": -0.8989070057868958, |
|
"logps/rejected": -1.0172455310821533, |
|
"loss": 0.9689, |
|
"odds_ratio_loss": 0.6998150944709778, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08989070355892181, |
|
"rewards/margins": 0.011833854019641876, |
|
"rewards/rejected": -0.1017245501279831, |
|
"sft_loss": 0.8989070057868958, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8751262881390178, |
|
"grad_norm": 0.4275898039340973, |
|
"learning_rate": 1.5385477376547226e-06, |
|
"logits/chosen": -0.3347630202770233, |
|
"logits/rejected": -0.34142249822616577, |
|
"logps/chosen": -0.9212555885314941, |
|
"logps/rejected": -1.0021544694900513, |
|
"loss": 0.9893, |
|
"odds_ratio_loss": 0.679952085018158, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0921255499124527, |
|
"rewards/margins": 0.008089900948107243, |
|
"rewards/rejected": -0.10021545737981796, |
|
"sft_loss": 0.9212555885314941, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.89129116993332, |
|
"grad_norm": 0.5769237875938416, |
|
"learning_rate": 1.4995812170625845e-06, |
|
"logits/chosen": -0.3509088456630707, |
|
"logits/rejected": -0.35828500986099243, |
|
"logps/chosen": -0.8898354768753052, |
|
"logps/rejected": -1.1126220226287842, |
|
"loss": 0.9543, |
|
"odds_ratio_loss": 0.6445311307907104, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08898355811834335, |
|
"rewards/margins": 0.02227865532040596, |
|
"rewards/rejected": -0.11126221716403961, |
|
"sft_loss": 0.8898354768753052, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.9074560517276218, |
|
"grad_norm": 0.9893414974212646, |
|
"learning_rate": 1.4609019719648666e-06, |
|
"logits/chosen": -0.34388267993927, |
|
"logits/rejected": -0.34255415201187134, |
|
"logps/chosen": -0.9129988551139832, |
|
"logps/rejected": -1.0511752367019653, |
|
"loss": 0.9778, |
|
"odds_ratio_loss": 0.6484531760215759, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0912998765707016, |
|
"rewards/margins": 0.013817653059959412, |
|
"rewards/rejected": -0.10511753708124161, |
|
"sft_loss": 0.9129988551139832, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.9236209335219236, |
|
"grad_norm": 0.8161694407463074, |
|
"learning_rate": 1.42252110930943e-06, |
|
"logits/chosen": -0.3889426589012146, |
|
"logits/rejected": -0.37780189514160156, |
|
"logps/chosen": -0.8312114477157593, |
|
"logps/rejected": -0.9597098231315613, |
|
"loss": 0.8972, |
|
"odds_ratio_loss": 0.6594355702400208, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08312113583087921, |
|
"rewards/margins": 0.01284984964877367, |
|
"rewards/rejected": -0.0959709957242012, |
|
"sft_loss": 0.8312114477157593, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.9397858153162255, |
|
"grad_norm": 0.6737188100814819, |
|
"learning_rate": 1.3844496503620493e-06, |
|
"logits/chosen": -0.34721988439559937, |
|
"logits/rejected": -0.29065969586372375, |
|
"logps/chosen": -0.8556321263313293, |
|
"logps/rejected": -0.9435693621635437, |
|
"loss": 0.9217, |
|
"odds_ratio_loss": 0.6608615517616272, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08556319773197174, |
|
"rewards/margins": 0.008793738670647144, |
|
"rewards/rejected": -0.09435693919658661, |
|
"sft_loss": 0.8556321263313293, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9559506971105274, |
|
"grad_norm": 1.0895054340362549, |
|
"learning_rate": 1.3466985275416081e-06, |
|
"logits/chosen": -0.38311949372291565, |
|
"logits/rejected": -0.440490186214447, |
|
"logps/chosen": -0.9350228309631348, |
|
"logps/rejected": -1.0175323486328125, |
|
"loss": 1.0086, |
|
"odds_ratio_loss": 0.7355881929397583, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09350229054689407, |
|
"rewards/margins": 0.008250946179032326, |
|
"rewards/rejected": -0.10175323486328125, |
|
"sft_loss": 0.9350228309631348, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.9721155789048292, |
|
"grad_norm": 0.7546266913414001, |
|
"learning_rate": 1.309278581280791e-06, |
|
"logits/chosen": -0.32461339235305786, |
|
"logits/rejected": -0.38296985626220703, |
|
"logps/chosen": -0.825161337852478, |
|
"logps/rejected": -1.007612943649292, |
|
"loss": 0.8897, |
|
"odds_ratio_loss": 0.6452582478523254, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.082516148686409, |
|
"rewards/margins": 0.018245156854391098, |
|
"rewards/rejected": -0.1007612943649292, |
|
"sft_loss": 0.825161337852478, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.9882804606991311, |
|
"grad_norm": 0.30651387572288513, |
|
"learning_rate": 1.272200556913199e-06, |
|
"logits/chosen": -0.34240493178367615, |
|
"logits/rejected": -0.33365195989608765, |
|
"logps/chosen": -0.9005836248397827, |
|
"logps/rejected": -1.0132153034210205, |
|
"loss": 0.9729, |
|
"odds_ratio_loss": 0.722726583480835, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09005837142467499, |
|
"rewards/margins": 0.011263175867497921, |
|
"rewards/rejected": -0.10132155567407608, |
|
"sft_loss": 0.9005836248397827, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.004445342493433, |
|
"grad_norm": 0.6191690564155579, |
|
"learning_rate": 1.2354751015877698e-06, |
|
"logits/chosen": -0.3653295636177063, |
|
"logits/rejected": -0.3104439675807953, |
|
"logps/chosen": -0.8316798210144043, |
|
"logps/rejected": -1.0361697673797607, |
|
"loss": 0.8947, |
|
"odds_ratio_loss": 0.6298761963844299, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08316798508167267, |
|
"rewards/margins": 0.02044900692999363, |
|
"rewards/rejected": -0.10361699759960175, |
|
"sft_loss": 0.8316798210144043, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.020610224287735, |
|
"grad_norm": 0.752289354801178, |
|
"learning_rate": 1.1991127612113945e-06, |
|
"logits/chosen": -0.3582732379436493, |
|
"logits/rejected": -0.3034323751926422, |
|
"logps/chosen": -0.8952615857124329, |
|
"logps/rejected": -1.0291544198989868, |
|
"loss": 0.9609, |
|
"odds_ratio_loss": 0.6566318869590759, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08952615410089493, |
|
"rewards/margins": 0.013389283791184425, |
|
"rewards/rejected": -0.1029154434800148, |
|
"sft_loss": 0.8952615857124329, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.036775106082037, |
|
"grad_norm": 0.5910158753395081, |
|
"learning_rate": 1.1631239774206035e-06, |
|
"logits/chosen": -0.36862578988075256, |
|
"logits/rejected": -0.3653218150138855, |
|
"logps/chosen": -0.8613477945327759, |
|
"logps/rejected": -0.9755401611328125, |
|
"loss": 0.9325, |
|
"odds_ratio_loss": 0.7117538452148438, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.08613476902246475, |
|
"rewards/margins": 0.011419234797358513, |
|
"rewards/rejected": -0.09755401313304901, |
|
"sft_loss": 0.8613477945327759, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.052939987876339, |
|
"grad_norm": 0.5977714657783508, |
|
"learning_rate": 1.1275190845831978e-06, |
|
"logits/chosen": -0.35793787240982056, |
|
"logits/rejected": -0.3579494059085846, |
|
"logps/chosen": -0.8839446902275085, |
|
"logps/rejected": -1.0555723905563354, |
|
"loss": 0.9484, |
|
"odds_ratio_loss": 0.6443823575973511, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08839447796344757, |
|
"rewards/margins": 0.01716277375817299, |
|
"rewards/rejected": -0.10555724799633026, |
|
"sft_loss": 0.8839446902275085, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.0691048696706407, |
|
"grad_norm": 0.4356369078159332, |
|
"learning_rate": 1.0923083068306778e-06, |
|
"logits/chosen": -0.2889194190502167, |
|
"logits/rejected": -0.39258915185928345, |
|
"logps/chosen": -0.8745051622390747, |
|
"logps/rejected": -1.061402678489685, |
|
"loss": 0.94, |
|
"odds_ratio_loss": 0.6551867723464966, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.08745051920413971, |
|
"rewards/margins": 0.018689759075641632, |
|
"rewards/rejected": -0.10614027827978134, |
|
"sft_loss": 0.8745051622390747, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.0852697514649425, |
|
"grad_norm": 0.2981340289115906, |
|
"learning_rate": 1.0575017551223348e-06, |
|
"logits/chosen": -0.39015138149261475, |
|
"logits/rejected": -0.40903449058532715, |
|
"logps/chosen": -0.7750725746154785, |
|
"logps/rejected": -0.9115964770317078, |
|
"loss": 0.8412, |
|
"odds_ratio_loss": 0.6609454154968262, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.07750725001096725, |
|
"rewards/margins": 0.013652404770255089, |
|
"rewards/rejected": -0.09115965664386749, |
|
"sft_loss": 0.7750725746154785, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.1014346332592444, |
|
"grad_norm": 0.39186251163482666, |
|
"learning_rate": 1.023109424341833e-06, |
|
"logits/chosen": -0.3986419141292572, |
|
"logits/rejected": -0.36254242062568665, |
|
"logps/chosen": -0.8747810125350952, |
|
"logps/rejected": -0.9971181750297546, |
|
"loss": 0.9444, |
|
"odds_ratio_loss": 0.6959220170974731, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08747810870409012, |
|
"rewards/margins": 0.012233709916472435, |
|
"rewards/rejected": -0.0997118204832077, |
|
"sft_loss": 0.8747810125350952, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.1175995150535463, |
|
"grad_norm": 0.4826388359069824, |
|
"learning_rate": 9.891411904271273e-07, |
|
"logits/chosen": -0.3570977747440338, |
|
"logits/rejected": -0.34066206216812134, |
|
"logps/chosen": -0.8385666608810425, |
|
"logps/rejected": -0.9865023493766785, |
|
"loss": 0.9076, |
|
"odds_ratio_loss": 0.6902373433113098, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.08385667204856873, |
|
"rewards/margins": 0.014793576672673225, |
|
"rewards/rejected": -0.0986502468585968, |
|
"sft_loss": 0.8385666608810425, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.133764396847848, |
|
"grad_norm": 0.3553561866283417, |
|
"learning_rate": 9.556068075345363e-07, |
|
"logits/chosen": -0.28917670249938965, |
|
"logits/rejected": -0.3470838665962219, |
|
"logps/chosen": -0.8463741540908813, |
|
"logps/rejected": -0.9492172002792358, |
|
"loss": 0.9162, |
|
"odds_ratio_loss": 0.6985523104667664, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08463741838932037, |
|
"rewards/margins": 0.01028431672602892, |
|
"rewards/rejected": -0.09492173045873642, |
|
"sft_loss": 0.8463741540908813, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.14992927864215, |
|
"grad_norm": 0.3664523959159851, |
|
"learning_rate": 9.225159052377838e-07, |
|
"logits/chosen": -0.3276691436767578, |
|
"logits/rejected": -0.3102811872959137, |
|
"logps/chosen": -0.9000975489616394, |
|
"logps/rejected": -1.0900113582611084, |
|
"loss": 0.9658, |
|
"odds_ratio_loss": 0.6572277545928955, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.09000976383686066, |
|
"rewards/margins": 0.018991392105817795, |
|
"rewards/rejected": -0.10900114476680756, |
|
"sft_loss": 0.9000975489616394, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.166094160436452, |
|
"grad_norm": 0.5697169899940491, |
|
"learning_rate": 8.898779857628184e-07, |
|
"logits/chosen": -0.35697469115257263, |
|
"logits/rejected": -0.29451218247413635, |
|
"logps/chosen": -0.7642744779586792, |
|
"logps/rejected": -0.8856114149093628, |
|
"loss": 0.8306, |
|
"odds_ratio_loss": 0.6628420948982239, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.07642744481563568, |
|
"rewards/margins": 0.012133700773119926, |
|
"rewards/rejected": -0.08856116235256195, |
|
"sft_loss": 0.7642744779586792, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.1822590422307537, |
|
"grad_norm": 1.7151192426681519, |
|
"learning_rate": 8.577024212591975e-07, |
|
"logits/chosen": -0.29253047704696655, |
|
"logits/rejected": -0.3413800001144409, |
|
"logps/chosen": -0.8930098414421082, |
|
"logps/rejected": -0.9748668670654297, |
|
"loss": 0.9639, |
|
"odds_ratio_loss": 0.708949089050293, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0893009752035141, |
|
"rewards/margins": 0.008185721933841705, |
|
"rewards/rejected": -0.09748668968677521, |
|
"sft_loss": 0.8930098414421082, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.1984239240250556, |
|
"grad_norm": 0.49061620235443115, |
|
"learning_rate": 8.259984511088276e-07, |
|
"logits/chosen": -0.3223104476928711, |
|
"logits/rejected": -0.29760584235191345, |
|
"logps/chosen": -0.8736541867256165, |
|
"logps/rejected": -0.9874069094657898, |
|
"loss": 0.9451, |
|
"odds_ratio_loss": 0.7148812413215637, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08736542612314224, |
|
"rewards/margins": 0.011375268921256065, |
|
"rewards/rejected": -0.09874069690704346, |
|
"sft_loss": 0.8736541867256165, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.2145888058193575, |
|
"grad_norm": 0.33556151390075684, |
|
"learning_rate": 7.947751792728237e-07, |
|
"logits/chosen": -0.3239595890045166, |
|
"logits/rejected": -0.34610220789909363, |
|
"logps/chosen": -0.8864496946334839, |
|
"logps/rejected": -1.0747450590133667, |
|
"loss": 0.9559, |
|
"odds_ratio_loss": 0.694658637046814, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08864498138427734, |
|
"rewards/margins": 0.018829550594091415, |
|
"rewards/rejected": -0.10747452825307846, |
|
"sft_loss": 0.8864496946334839, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.2307536876136593, |
|
"grad_norm": 0.5993340611457825, |
|
"learning_rate": 7.640415716772626e-07, |
|
"logits/chosen": -0.3385930359363556, |
|
"logits/rejected": -0.31589871644973755, |
|
"logps/chosen": -0.8884540796279907, |
|
"logps/rejected": -1.0432296991348267, |
|
"loss": 0.9579, |
|
"odds_ratio_loss": 0.6948095560073853, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08884540945291519, |
|
"rewards/margins": 0.015477565117180347, |
|
"rewards/rejected": -0.10432296991348267, |
|
"sft_loss": 0.8884540796279907, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.246918569407961, |
|
"grad_norm": 0.4777003228664398, |
|
"learning_rate": 7.338064536385722e-07, |
|
"logits/chosen": -0.3243527412414551, |
|
"logits/rejected": -0.3211807608604431, |
|
"logps/chosen": -0.8481816053390503, |
|
"logps/rejected": -1.0429704189300537, |
|
"loss": 0.9136, |
|
"odds_ratio_loss": 0.6539761424064636, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08481816202402115, |
|
"rewards/margins": 0.019478868693113327, |
|
"rewards/rejected": -0.10429704189300537, |
|
"sft_loss": 0.8481816053390503, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.263083451202263, |
|
"grad_norm": 0.6625237464904785, |
|
"learning_rate": 7.040785073292883e-07, |
|
"logits/chosen": -0.39626187086105347, |
|
"logits/rejected": -0.3658468425273895, |
|
"logps/chosen": -0.9418588876724243, |
|
"logps/rejected": -1.017301321029663, |
|
"loss": 1.0184, |
|
"odds_ratio_loss": 0.7650783658027649, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09418588131666183, |
|
"rewards/margins": 0.007544253021478653, |
|
"rewards/rejected": -0.10173014551401138, |
|
"sft_loss": 0.9418588876724243, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.279248332996565, |
|
"grad_norm": 0.5683190226554871, |
|
"learning_rate": 6.748662692849297e-07, |
|
"logits/chosen": -0.2916708290576935, |
|
"logits/rejected": -0.289817750453949, |
|
"logps/chosen": -0.8634734153747559, |
|
"logps/rejected": -1.1026208400726318, |
|
"loss": 0.9275, |
|
"odds_ratio_loss": 0.6400235295295715, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08634734898805618, |
|
"rewards/margins": 0.02391473576426506, |
|
"rewards/rejected": -0.11026208102703094, |
|
"sft_loss": 0.8634734153747559, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.295413214790867, |
|
"grad_norm": 1.625442624092102, |
|
"learning_rate": 6.46178127952686e-07, |
|
"logits/chosen": -0.35586509108543396, |
|
"logits/rejected": -0.35335296392440796, |
|
"logps/chosen": -0.8400161862373352, |
|
"logps/rejected": -0.9910812377929688, |
|
"loss": 0.9025, |
|
"odds_ratio_loss": 0.6247957348823547, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08400160819292068, |
|
"rewards/margins": 0.015106521546840668, |
|
"rewards/rejected": -0.09910812973976135, |
|
"sft_loss": 0.8400161862373352, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.3115780965851687, |
|
"grad_norm": 0.46490368247032166, |
|
"learning_rate": 6.180223212826289e-07, |
|
"logits/chosen": -0.33770841360092163, |
|
"logits/rejected": -0.37226027250289917, |
|
"logps/chosen": -0.858726978302002, |
|
"logps/rejected": -0.9763249158859253, |
|
"loss": 0.9249, |
|
"odds_ratio_loss": 0.6612924933433533, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08587270230054855, |
|
"rewards/margins": 0.011759791523218155, |
|
"rewards/rejected": -0.097632497549057, |
|
"sft_loss": 0.858726978302002, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.3277429783794705, |
|
"grad_norm": 0.25405463576316833, |
|
"learning_rate": 5.904069343621443e-07, |
|
"logits/chosen": -0.3201651871204376, |
|
"logits/rejected": -0.34286874532699585, |
|
"logps/chosen": -0.9113739132881165, |
|
"logps/rejected": -1.0487134456634521, |
|
"loss": 0.9766, |
|
"odds_ratio_loss": 0.652290403842926, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.09113740175962448, |
|
"rewards/margins": 0.013733962550759315, |
|
"rewards/rejected": -0.10487135499715805, |
|
"sft_loss": 0.9113739132881165, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.3439078601737724, |
|
"grad_norm": 0.5318045020103455, |
|
"learning_rate": 5.633398970942544e-07, |
|
"logits/chosen": -0.32512596249580383, |
|
"logits/rejected": -0.2820747494697571, |
|
"logps/chosen": -0.8218180537223816, |
|
"logps/rejected": -0.9094691276550293, |
|
"loss": 0.8927, |
|
"odds_ratio_loss": 0.7083881497383118, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08218181133270264, |
|
"rewards/margins": 0.008765103295445442, |
|
"rewards/rejected": -0.09094691276550293, |
|
"sft_loss": 0.8218180537223816, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.3600727419680743, |
|
"grad_norm": 0.6843146681785583, |
|
"learning_rate": 5.368289819205069e-07, |
|
"logits/chosen": -0.39002543687820435, |
|
"logits/rejected": -0.376250684261322, |
|
"logps/chosen": -0.7933530211448669, |
|
"logps/rejected": -0.9611787796020508, |
|
"loss": 0.8585, |
|
"odds_ratio_loss": 0.6519256234169006, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0793353021144867, |
|
"rewards/margins": 0.016782574355602264, |
|
"rewards/rejected": -0.09611787647008896, |
|
"sft_loss": 0.7933530211448669, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.376237623762376, |
|
"grad_norm": 0.3784586787223816, |
|
"learning_rate": 5.108818015890785e-07, |
|
"logits/chosen": -0.3249315917491913, |
|
"logits/rejected": -0.30507951974868774, |
|
"logps/chosen": -0.8853880167007446, |
|
"logps/rejected": -1.0341455936431885, |
|
"loss": 0.9531, |
|
"odds_ratio_loss": 0.6767874956130981, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08853879570960999, |
|
"rewards/margins": 0.014875771477818489, |
|
"rewards/rejected": -0.10341457277536392, |
|
"sft_loss": 0.8853880167007446, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.392402505556678, |
|
"grad_norm": 0.5850736498832703, |
|
"learning_rate": 4.855058069687291e-07, |
|
"logits/chosen": -0.4515988230705261, |
|
"logits/rejected": -0.4501380920410156, |
|
"logps/chosen": -0.8440315127372742, |
|
"logps/rejected": -0.978651225566864, |
|
"loss": 0.9111, |
|
"odds_ratio_loss": 0.6708062887191772, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0844031572341919, |
|
"rewards/margins": 0.013461967930197716, |
|
"rewards/rejected": -0.09786512702703476, |
|
"sft_loss": 0.8440315127372742, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.40856738735098, |
|
"grad_norm": 0.4087739884853363, |
|
"learning_rate": 4.607082849092523e-07, |
|
"logits/chosen": -0.3892877697944641, |
|
"logits/rejected": -0.4075300097465515, |
|
"logps/chosen": -0.9417757987976074, |
|
"logps/rejected": -1.0281052589416504, |
|
"loss": 1.0107, |
|
"odds_ratio_loss": 0.6892626881599426, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09417758882045746, |
|
"rewards/margins": 0.00863293744623661, |
|
"rewards/rejected": -0.10281052440404892, |
|
"sft_loss": 0.9417757987976074, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.4247322691452817, |
|
"grad_norm": 1.142304539680481, |
|
"learning_rate": 4.3649635614901405e-07, |
|
"logits/chosen": -0.39748096466064453, |
|
"logits/rejected": -0.2932053208351135, |
|
"logps/chosen": -0.8502078056335449, |
|
"logps/rejected": -0.883902907371521, |
|
"loss": 0.9219, |
|
"odds_ratio_loss": 0.7170731425285339, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08502078056335449, |
|
"rewards/margins": 0.003369513200595975, |
|
"rewards/rejected": -0.0883902907371521, |
|
"sft_loss": 0.8502078056335449, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4247322691452817, |
|
"eval_logits/chosen": -0.3775150775909424, |
|
"eval_logits/rejected": -0.3581116795539856, |
|
"eval_logps/chosen": -0.8786855936050415, |
|
"eval_logps/rejected": -0.9948004484176636, |
|
"eval_loss": 0.9497246742248535, |
|
"eval_odds_ratio_loss": 0.7103896737098694, |
|
"eval_rewards/accuracies": 0.5163636207580566, |
|
"eval_rewards/chosen": -0.0878685712814331, |
|
"eval_rewards/margins": 0.011611479334533215, |
|
"eval_rewards/rejected": -0.0994800478219986, |
|
"eval_runtime": 192.2752, |
|
"eval_samples_per_second": 5.721, |
|
"eval_sft_loss": 0.8786855936050415, |
|
"eval_steps_per_second": 2.86, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4408971509395836, |
|
"grad_norm": 0.3841034770011902, |
|
"learning_rate": 4.128769732701973e-07, |
|
"logits/chosen": -0.36835092306137085, |
|
"logits/rejected": -0.4074084758758545, |
|
"logps/chosen": -0.8371820449829102, |
|
"logps/rejected": -0.9595246315002441, |
|
"loss": 0.9062, |
|
"odds_ratio_loss": 0.6903966665267944, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0837181955575943, |
|
"rewards/margins": 0.012234264984726906, |
|
"rewards/rejected": -0.09595246613025665, |
|
"sft_loss": 0.8371820449829102, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.4570620327338855, |
|
"grad_norm": 0.6487218737602234, |
|
"learning_rate": 3.8985691870233046e-07, |
|
"logits/chosen": -0.36084288358688354, |
|
"logits/rejected": -0.35909101366996765, |
|
"logps/chosen": -0.8767590522766113, |
|
"logps/rejected": -0.9904271364212036, |
|
"loss": 0.9487, |
|
"odds_ratio_loss": 0.7190364599227905, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08767590671777725, |
|
"rewards/margins": 0.011366801336407661, |
|
"rewards/rejected": -0.09904270619153976, |
|
"sft_loss": 0.8767590522766113, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.4732269145281873, |
|
"grad_norm": 0.726983904838562, |
|
"learning_rate": 3.6744280277467904e-07, |
|
"logits/chosen": -0.3547779619693756, |
|
"logits/rejected": -0.37871819734573364, |
|
"logps/chosen": -0.8915858268737793, |
|
"logps/rejected": -1.0086140632629395, |
|
"loss": 0.9661, |
|
"odds_ratio_loss": 0.7449706792831421, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08915858715772629, |
|
"rewards/margins": 0.01170281507074833, |
|
"rewards/rejected": -0.10086140781641006, |
|
"sft_loss": 0.8915858268737793, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.489391796322489, |
|
"grad_norm": 0.6208191514015198, |
|
"learning_rate": 3.456410618180503e-07, |
|
"logits/chosen": -0.46183329820632935, |
|
"logits/rejected": -0.3973988890647888, |
|
"logps/chosen": -0.7950559258460999, |
|
"logps/rejected": -1.0139881372451782, |
|
"loss": 0.8596, |
|
"odds_ratio_loss": 0.6458045244216919, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.07950559258460999, |
|
"rewards/margins": 0.021893223747611046, |
|
"rewards/rejected": -0.10139881074428558, |
|
"sft_loss": 0.7950559258460999, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.5055566781167915, |
|
"grad_norm": 0.40934354066848755, |
|
"learning_rate": 3.244579563165753e-07, |
|
"logits/chosen": -0.3711478114128113, |
|
"logits/rejected": -0.3300473093986511, |
|
"logps/chosen": -0.8490577936172485, |
|
"logps/rejected": -1.0569615364074707, |
|
"loss": 0.9137, |
|
"odds_ratio_loss": 0.6463108062744141, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08490578085184097, |
|
"rewards/margins": 0.020790381357073784, |
|
"rewards/rejected": -0.10569615662097931, |
|
"sft_loss": 0.8490577936172485, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.521721559911093, |
|
"grad_norm": 0.4264324903488159, |
|
"learning_rate": 3.038995691099697e-07, |
|
"logits/chosen": -0.35405951738357544, |
|
"logits/rejected": -0.3723445534706116, |
|
"logps/chosen": -0.8575676083564758, |
|
"logps/rejected": -1.0358964204788208, |
|
"loss": 0.9267, |
|
"odds_ratio_loss": 0.6915205717086792, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08575676381587982, |
|
"rewards/margins": 0.017832884564995766, |
|
"rewards/rejected": -0.10358965396881104, |
|
"sft_loss": 0.8575676083564758, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.5378864417053952, |
|
"grad_norm": 0.5124202370643616, |
|
"learning_rate": 2.839718036468192e-07, |
|
"logits/chosen": -0.39767321944236755, |
|
"logits/rejected": -0.361719012260437, |
|
"logps/chosen": -0.9866407513618469, |
|
"logps/rejected": -1.0687347650527954, |
|
"loss": 1.0574, |
|
"odds_ratio_loss": 0.7079859972000122, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09866407513618469, |
|
"rewards/margins": 0.00820938404649496, |
|
"rewards/rejected": -0.10687346756458282, |
|
"sft_loss": 0.9866407513618469, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.5540513234996967, |
|
"grad_norm": 0.6700158715248108, |
|
"learning_rate": 2.646803822893723e-07, |
|
"logits/chosen": -0.34473222494125366, |
|
"logits/rejected": -0.339333713054657, |
|
"logps/chosen": -0.9860366582870483, |
|
"logps/rejected": -1.0728685855865479, |
|
"loss": 1.0579, |
|
"odds_ratio_loss": 0.7182521224021912, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09860367327928543, |
|
"rewards/margins": 0.008683168329298496, |
|
"rewards/rejected": -0.10728684812784195, |
|
"sft_loss": 0.9860366582870483, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.570216205293999, |
|
"grad_norm": 0.4476275146007538, |
|
"learning_rate": 2.460308446703341e-07, |
|
"logits/chosen": -0.37150639295578003, |
|
"logits/rejected": -0.3977029621601105, |
|
"logps/chosen": -0.8994391560554504, |
|
"logps/rejected": -0.9403126835823059, |
|
"loss": 0.9704, |
|
"odds_ratio_loss": 0.7100769877433777, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08994391560554504, |
|
"rewards/margins": 0.004087349865585566, |
|
"rewards/rejected": -0.09403126686811447, |
|
"sft_loss": 0.8994391560554504, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.5863810870883004, |
|
"grad_norm": 0.8473093509674072, |
|
"learning_rate": 2.2802854610213143e-07, |
|
"logits/chosen": -0.38676199316978455, |
|
"logits/rejected": -0.3973104655742645, |
|
"logps/chosen": -0.8438700437545776, |
|
"logps/rejected": -1.018701434135437, |
|
"loss": 0.9107, |
|
"odds_ratio_loss": 0.6678277850151062, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08438700437545776, |
|
"rewards/margins": 0.017483150586485863, |
|
"rewards/rejected": -0.10187015682458878, |
|
"sft_loss": 0.8438700437545776, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.6025459688826027, |
|
"grad_norm": 1.2318559885025024, |
|
"learning_rate": 2.106786560391072e-07, |
|
"logits/chosen": -0.41062861680984497, |
|
"logits/rejected": -0.3663537800312042, |
|
"logps/chosen": -0.9180322885513306, |
|
"logps/rejected": -0.9797943830490112, |
|
"loss": 0.9881, |
|
"odds_ratio_loss": 0.7011545300483704, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.09180323779582977, |
|
"rewards/margins": 0.006176213268190622, |
|
"rewards/rejected": -0.09797944128513336, |
|
"sft_loss": 0.9180322885513306, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.6187108506769046, |
|
"grad_norm": 1.8344284296035767, |
|
"learning_rate": 1.9398615659308255e-07, |
|
"logits/chosen": -0.3516565263271332, |
|
"logits/rejected": -0.3090236485004425, |
|
"logps/chosen": -0.8868433833122253, |
|
"logps/rejected": -0.9610105752944946, |
|
"loss": 0.9563, |
|
"odds_ratio_loss": 0.6944981813430786, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08868434280157089, |
|
"rewards/margins": 0.007416720036417246, |
|
"rewards/rejected": -0.0961010605096817, |
|
"sft_loss": 0.8868433833122253, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.6348757324712064, |
|
"grad_norm": 3.686185359954834, |
|
"learning_rate": 1.7795584110272184e-07, |
|
"logits/chosen": -0.33260416984558105, |
|
"logits/rejected": -0.32040587067604065, |
|
"logps/chosen": -0.9077906608581543, |
|
"logps/rejected": -1.0257583856582642, |
|
"loss": 0.9756, |
|
"odds_ratio_loss": 0.6781536340713501, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09077905863523483, |
|
"rewards/margins": 0.01179676502943039, |
|
"rewards/rejected": -0.10257583856582642, |
|
"sft_loss": 0.9077906608581543, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.6510406142655083, |
|
"grad_norm": 0.7552462220191956, |
|
"learning_rate": 1.6259231275709636e-07, |
|
"logits/chosen": -0.32405030727386475, |
|
"logits/rejected": -0.3262009024620056, |
|
"logps/chosen": -0.8568581342697144, |
|
"logps/rejected": -0.9373190999031067, |
|
"loss": 0.9294, |
|
"odds_ratio_loss": 0.7254046201705933, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.08568581938743591, |
|
"rewards/margins": 0.008046089671552181, |
|
"rewards/rejected": -0.09373190253973007, |
|
"sft_loss": 0.8568581342697144, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.66720549605981, |
|
"grad_norm": 0.45023104548454285, |
|
"learning_rate": 1.478999832738548e-07, |
|
"logits/chosen": -0.34250158071517944, |
|
"logits/rejected": -0.34709858894348145, |
|
"logps/chosen": -0.8354190587997437, |
|
"logps/rejected": -0.9979323148727417, |
|
"loss": 0.9021, |
|
"odds_ratio_loss": 0.6672018766403198, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08354191482067108, |
|
"rewards/margins": 0.01625131070613861, |
|
"rewards/rejected": -0.09979323297739029, |
|
"sft_loss": 0.8354190587997437, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.683370377854112, |
|
"grad_norm": 0.6760185956954956, |
|
"learning_rate": 1.338830716323769e-07, |
|
"logits/chosen": -0.34901902079582214, |
|
"logits/rejected": -0.352342426776886, |
|
"logps/chosen": -0.8232784271240234, |
|
"logps/rejected": -0.9058715105056763, |
|
"loss": 0.8916, |
|
"odds_ratio_loss": 0.6835728883743286, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08232785016298294, |
|
"rewards/margins": 0.008259310387074947, |
|
"rewards/rejected": -0.09058715403079987, |
|
"sft_loss": 0.8232784271240234, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.699535259648414, |
|
"grad_norm": 0.9901576638221741, |
|
"learning_rate": 1.205456028622723e-07, |
|
"logits/chosen": -0.3495160639286041, |
|
"logits/rejected": -0.35691842436790466, |
|
"logps/chosen": -0.8500292897224426, |
|
"logps/rejected": -1.0147500038146973, |
|
"loss": 0.9171, |
|
"odds_ratio_loss": 0.6710700988769531, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08500292897224426, |
|
"rewards/margins": 0.016472063958644867, |
|
"rewards/rejected": -0.10147500038146973, |
|
"sft_loss": 0.8500292897224426, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.7157001414427158, |
|
"grad_norm": 0.29376673698425293, |
|
"learning_rate": 1.0789140688756805e-07, |
|
"logits/chosen": -0.2777409255504608, |
|
"logits/rejected": -0.30515843629837036, |
|
"logps/chosen": -0.8388081789016724, |
|
"logps/rejected": -1.004902720451355, |
|
"loss": 0.9016, |
|
"odds_ratio_loss": 0.6277891397476196, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08388081192970276, |
|
"rewards/margins": 0.016609463840723038, |
|
"rewards/rejected": -0.10049028694629669, |
|
"sft_loss": 0.8388081789016724, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.7318650232370176, |
|
"grad_norm": 1.1649651527404785, |
|
"learning_rate": 9.592411742693098e-08, |
|
"logits/chosen": -0.3376592993736267, |
|
"logits/rejected": -0.33899828791618347, |
|
"logps/chosen": -0.8941831588745117, |
|
"logps/rejected": -0.9593558311462402, |
|
"loss": 0.9688, |
|
"odds_ratio_loss": 0.7464134693145752, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.08941832929849625, |
|
"rewards/margins": 0.00651725847274065, |
|
"rewards/rejected": -0.09593559056520462, |
|
"sft_loss": 0.8941831588745117, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.7480299050313195, |
|
"grad_norm": 0.365510493516922, |
|
"learning_rate": 8.464717095022168e-08, |
|
"logits/chosen": -0.26350411772727966, |
|
"logits/rejected": -0.3258097767829895, |
|
"logps/chosen": -0.8289276957511902, |
|
"logps/rejected": -0.9933468103408813, |
|
"loss": 0.894, |
|
"odds_ratio_loss": 0.6506984829902649, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0828927755355835, |
|
"rewards/margins": 0.01644190214574337, |
|
"rewards/rejected": -0.09933467954397202, |
|
"sft_loss": 0.8289276957511902, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.7641947868256214, |
|
"grad_norm": 0.860230565071106, |
|
"learning_rate": 7.406380569169841e-08, |
|
"logits/chosen": -0.35509008169174194, |
|
"logits/rejected": -0.3218967318534851, |
|
"logps/chosen": -0.9126371145248413, |
|
"logps/rejected": -0.8999163508415222, |
|
"loss": 0.9886, |
|
"odds_ratio_loss": 0.759522020816803, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.09126370400190353, |
|
"rewards/margins": -0.0012720691738650203, |
|
"rewards/rejected": -0.08999162912368774, |
|
"sft_loss": 0.9126371145248413, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.7803596686199232, |
|
"grad_norm": 2.069009780883789, |
|
"learning_rate": 6.417706072013808e-08, |
|
"logits/chosen": -0.3513588011264801, |
|
"logits/rejected": -0.31902140378952026, |
|
"logps/chosen": -0.8999738693237305, |
|
"logps/rejected": -0.9839135408401489, |
|
"loss": 0.9715, |
|
"odds_ratio_loss": 0.7152166366577148, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08999738842248917, |
|
"rewards/margins": 0.008393971249461174, |
|
"rewards/rejected": -0.09839136153459549, |
|
"sft_loss": 0.8999738693237305, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.796524550414225, |
|
"grad_norm": 0.59537672996521, |
|
"learning_rate": 5.498977506615294e-08, |
|
"logits/chosen": -0.33539581298828125, |
|
"logits/rejected": -0.36086633801460266, |
|
"logps/chosen": -0.8895516395568848, |
|
"logps/rejected": -0.9674522280693054, |
|
"loss": 0.9602, |
|
"odds_ratio_loss": 0.706065833568573, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08895515650510788, |
|
"rewards/margins": 0.007790066301822662, |
|
"rewards/rejected": -0.09674523025751114, |
|
"sft_loss": 0.8895516395568848, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.812689432208527, |
|
"grad_norm": 0.4070757031440735, |
|
"learning_rate": 4.6504586906947756e-08, |
|
"logits/chosen": -0.3671857714653015, |
|
"logits/rejected": -0.36166203022003174, |
|
"logps/chosen": -0.9486915469169617, |
|
"logps/rejected": -0.9999829530715942, |
|
"loss": 1.0182, |
|
"odds_ratio_loss": 0.6954110860824585, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.09486915171146393, |
|
"rewards/margins": 0.005129144061356783, |
|
"rewards/rejected": -0.09999830275774002, |
|
"sft_loss": 0.9486915469169617, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 1.650687336921692, |
|
"learning_rate": 3.8723932808754914e-08, |
|
"logits/chosen": -0.2851547300815582, |
|
"logits/rejected": -0.2857135236263275, |
|
"logps/chosen": -0.9708272814750671, |
|
"logps/rejected": -0.9912136197090149, |
|
"loss": 1.0459, |
|
"odds_ratio_loss": 0.7506999969482422, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09708271920681, |
|
"rewards/margins": 0.0020386301912367344, |
|
"rewards/rejected": -0.0991213470697403, |
|
"sft_loss": 0.9708272814750671, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.8450191957971307, |
|
"grad_norm": 0.9035086035728455, |
|
"learning_rate": 3.1650047027158014e-08, |
|
"logits/chosen": -0.3378879427909851, |
|
"logits/rejected": -0.31768563389778137, |
|
"logps/chosen": -0.863334059715271, |
|
"logps/rejected": -0.9955730438232422, |
|
"loss": 0.9285, |
|
"odds_ratio_loss": 0.6513949632644653, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08633340895175934, |
|
"rewards/margins": 0.013223896734416485, |
|
"rewards/rejected": -0.0995573028922081, |
|
"sft_loss": 0.863334059715271, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.8611840775914326, |
|
"grad_norm": 0.3864952623844147, |
|
"learning_rate": 2.5284960865517848e-08, |
|
"logits/chosen": -0.39154380559921265, |
|
"logits/rejected": -0.34484562277793884, |
|
"logps/chosen": -0.82793790102005, |
|
"logps/rejected": -1.0070700645446777, |
|
"loss": 0.8928, |
|
"odds_ratio_loss": 0.6486603021621704, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08279379457235336, |
|
"rewards/margins": 0.0179132129997015, |
|
"rewards/rejected": -0.10070700943470001, |
|
"sft_loss": 0.82793790102005, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.8773489593857344, |
|
"grad_norm": 0.4017253518104553, |
|
"learning_rate": 1.9630502091670388e-08, |
|
"logits/chosen": -0.3473368287086487, |
|
"logits/rejected": -0.37853848934173584, |
|
"logps/chosen": -0.8299247622489929, |
|
"logps/rejected": -0.996843695640564, |
|
"loss": 0.8926, |
|
"odds_ratio_loss": 0.6264339685440063, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08299248665571213, |
|
"rewards/margins": 0.016691887751221657, |
|
"rewards/rejected": -0.09968437254428864, |
|
"sft_loss": 0.8299247622489929, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.8935138411800363, |
|
"grad_norm": 0.7657872438430786, |
|
"learning_rate": 1.4688294413074677e-08, |
|
"logits/chosen": -0.3813559114933014, |
|
"logits/rejected": -0.34783899784088135, |
|
"logps/chosen": -0.802249550819397, |
|
"logps/rejected": -0.9486366510391235, |
|
"loss": 0.8723, |
|
"odds_ratio_loss": 0.7008516788482666, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.08022496104240417, |
|
"rewards/margins": 0.014638709835708141, |
|
"rewards/rejected": -0.09486366808414459, |
|
"sft_loss": 0.802249550819397, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.909678722974338, |
|
"grad_norm": 0.2962876558303833, |
|
"learning_rate": 1.0459757010556626e-08, |
|
"logits/chosen": -0.4134625494480133, |
|
"logits/rejected": -0.394450843334198, |
|
"logps/chosen": -0.8447575569152832, |
|
"logps/rejected": -0.919145405292511, |
|
"loss": 0.9156, |
|
"odds_ratio_loss": 0.7088185548782349, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08447576314210892, |
|
"rewards/margins": 0.007438770029693842, |
|
"rewards/rejected": -0.09191453456878662, |
|
"sft_loss": 0.8447575569152832, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.92584360476864, |
|
"grad_norm": 0.4512230455875397, |
|
"learning_rate": 6.94610413078306e-09, |
|
"logits/chosen": -0.4446278512477875, |
|
"logits/rejected": -0.37901362776756287, |
|
"logps/chosen": -0.8928766250610352, |
|
"logps/rejected": -1.078958511352539, |
|
"loss": 0.9617, |
|
"odds_ratio_loss": 0.6879509091377258, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0892876610159874, |
|
"rewards/margins": 0.01860819011926651, |
|
"rewards/rejected": -0.1078958511352539, |
|
"sft_loss": 0.8928766250610352, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.942008486562942, |
|
"grad_norm": 0.2540852427482605, |
|
"learning_rate": 4.14834473758563e-09, |
|
"logits/chosen": -0.4007115364074707, |
|
"logits/rejected": -0.3911517858505249, |
|
"logps/chosen": -0.8001864552497864, |
|
"logps/rejected": -1.0187556743621826, |
|
"loss": 0.8634, |
|
"odds_ratio_loss": 0.6319615244865417, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08001864701509476, |
|
"rewards/margins": 0.021856937557458878, |
|
"rewards/rejected": -0.10187558084726334, |
|
"sft_loss": 0.8001864552497864, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.9581733683572438, |
|
"grad_norm": 0.4121166467666626, |
|
"learning_rate": 2.067282222230349e-09, |
|
"logits/chosen": -0.3413907587528229, |
|
"logits/rejected": -0.278145968914032, |
|
"logps/chosen": -0.8189884424209595, |
|
"logps/rejected": -1.0053989887237549, |
|
"loss": 0.881, |
|
"odds_ratio_loss": 0.620233416557312, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.08189885318279266, |
|
"rewards/margins": 0.018641049042344093, |
|
"rewards/rejected": -0.10053990036249161, |
|
"sft_loss": 0.8189884424209595, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.9743382501515456, |
|
"grad_norm": 3.4636123180389404, |
|
"learning_rate": 7.035141727212979e-10, |
|
"logits/chosen": -0.3847911059856415, |
|
"logits/rejected": -0.34176406264305115, |
|
"logps/chosen": -0.8342105746269226, |
|
"logps/rejected": -0.9381749033927917, |
|
"loss": 0.9018, |
|
"odds_ratio_loss": 0.6754266023635864, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.08342105895280838, |
|
"rewards/margins": 0.010396432131528854, |
|
"rewards/rejected": -0.09381748735904694, |
|
"sft_loss": 0.8342105746269226, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.9905031319458475, |
|
"grad_norm": 1.2374101877212524, |
|
"learning_rate": 5.743220219761592e-11, |
|
"logits/chosen": -0.33420827984809875, |
|
"logits/rejected": -0.3142699599266052, |
|
"logps/chosen": -1.0187790393829346, |
|
"logps/rejected": -1.029541015625, |
|
"loss": 1.097, |
|
"odds_ratio_loss": 0.782578706741333, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.10187790542840958, |
|
"rewards/margins": 0.0010761909652501345, |
|
"rewards/rejected": -0.10295410454273224, |
|
"sft_loss": 1.0187790393829346, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.9969690846635686, |
|
"step": 1854, |
|
"total_flos": 1.9948570754930442e+18, |
|
"train_loss": 0.9750770799807618, |
|
"train_runtime": 17949.5667, |
|
"train_samples_per_second": 1.654, |
|
"train_steps_per_second": 0.103 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1854, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.9948570754930442e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|