|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9646365422396856, |
|
"eval_steps": 50, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09823182711198428, |
|
"grad_norm": 4.67578125, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.882656216621399, |
|
"logits/rejected": -2.0222558975219727, |
|
"logps/chosen": -629.1900024414062, |
|
"logps/rejected": -886.4169311523438, |
|
"loss": 0.6184, |
|
"rewards/accuracies": 0.39500001072883606, |
|
"rewards/chosen": 1.2086933851242065, |
|
"rewards/margins": 0.8173081278800964, |
|
"rewards/rejected": 0.39138519763946533, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09823182711198428, |
|
"eval_logits/chosen": -1.8351056575775146, |
|
"eval_logits/rejected": -1.898075819015503, |
|
"eval_logps/chosen": -827.69677734375, |
|
"eval_logps/rejected": -769.7338256835938, |
|
"eval_loss": 0.21162064373493195, |
|
"eval_rewards/accuracies": 0.7060185074806213, |
|
"eval_rewards/chosen": 4.598723888397217, |
|
"eval_rewards/margins": 3.7964937686920166, |
|
"eval_rewards/rejected": 0.802230179309845, |
|
"eval_runtime": 376.4446, |
|
"eval_samples_per_second": 1.145, |
|
"eval_steps_per_second": 0.574, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19646365422396855, |
|
"grad_norm": 1.43359375, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.9130176305770874, |
|
"logits/rejected": -2.028027296066284, |
|
"logps/chosen": -584.5574951171875, |
|
"logps/rejected": -919.9600219726562, |
|
"loss": 0.3184, |
|
"rewards/accuracies": 0.5550000071525574, |
|
"rewards/chosen": 4.297489166259766, |
|
"rewards/margins": 5.647652626037598, |
|
"rewards/rejected": -1.350163459777832, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19646365422396855, |
|
"eval_logits/chosen": -1.830168604850769, |
|
"eval_logits/rejected": -1.8947844505310059, |
|
"eval_logps/chosen": -808.8148193359375, |
|
"eval_logps/rejected": -775.2783813476562, |
|
"eval_loss": 0.20374441146850586, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.487063884735107, |
|
"eval_rewards/margins": 6.239492416381836, |
|
"eval_rewards/rejected": 0.24757163226604462, |
|
"eval_runtime": 376.9374, |
|
"eval_samples_per_second": 1.143, |
|
"eval_steps_per_second": 0.573, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29469548133595286, |
|
"grad_norm": 0.00019991397857666016, |
|
"learning_rate": 9.92403876506104e-07, |
|
"logits/chosen": -1.8983594179153442, |
|
"logits/rejected": -1.995449185371399, |
|
"logps/chosen": -539.44873046875, |
|
"logps/rejected": -860.8162231445312, |
|
"loss": 0.3339, |
|
"rewards/accuracies": 0.5274999737739563, |
|
"rewards/chosen": 4.254204273223877, |
|
"rewards/margins": 8.191503524780273, |
|
"rewards/rejected": -3.937298536300659, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29469548133595286, |
|
"eval_logits/chosen": -1.8110939264297485, |
|
"eval_logits/rejected": -1.8730604648590088, |
|
"eval_logps/chosen": -810.138916015625, |
|
"eval_logps/rejected": -787.7071533203125, |
|
"eval_loss": 0.20321600139141083, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.352499961853027, |
|
"eval_rewards/margins": 7.34998083114624, |
|
"eval_rewards/rejected": -0.9974803924560547, |
|
"eval_runtime": 381.3297, |
|
"eval_samples_per_second": 1.13, |
|
"eval_steps_per_second": 0.566, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3929273084479371, |
|
"grad_norm": 3.673828125, |
|
"learning_rate": 9.698463103929541e-07, |
|
"logits/chosen": -1.8827344179153442, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -537.2374877929688, |
|
"logps/rejected": -964.1453857421875, |
|
"loss": 0.2992, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 4.266866683959961, |
|
"rewards/margins": 12.264365196228027, |
|
"rewards/rejected": -7.997498989105225, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3929273084479371, |
|
"eval_logits/chosen": -1.8244402408599854, |
|
"eval_logits/rejected": -1.88623046875, |
|
"eval_logps/chosen": -812.432861328125, |
|
"eval_logps/rejected": -809.0966186523438, |
|
"eval_loss": 0.20289792120456696, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.12615442276001, |
|
"eval_rewards/margins": 9.262307167053223, |
|
"eval_rewards/rejected": -3.1361522674560547, |
|
"eval_runtime": 375.9512, |
|
"eval_samples_per_second": 1.146, |
|
"eval_steps_per_second": 0.575, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4911591355599214, |
|
"grad_norm": 3.783203125, |
|
"learning_rate": 9.330127018922193e-07, |
|
"logits/chosen": -1.8917089700698853, |
|
"logits/rejected": -2.057480573654175, |
|
"logps/chosen": -548.9512329101562, |
|
"logps/rejected": -1068.0450439453125, |
|
"loss": 0.3062, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 4.461262226104736, |
|
"rewards/margins": 15.397392272949219, |
|
"rewards/rejected": -10.93613052368164, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4911591355599214, |
|
"eval_logits/chosen": -1.8248969316482544, |
|
"eval_logits/rejected": -1.8831199407577515, |
|
"eval_logps/chosen": -814.4699096679688, |
|
"eval_logps/rejected": -816.1018676757812, |
|
"eval_loss": 0.20285306870937347, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 5.92443323135376, |
|
"eval_rewards/margins": 9.760541915893555, |
|
"eval_rewards/rejected": -3.836108684539795, |
|
"eval_runtime": 382.044, |
|
"eval_samples_per_second": 1.128, |
|
"eval_steps_per_second": 0.565, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5893909626719057, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.83022221559489e-07, |
|
"logits/chosen": -1.8699413537979126, |
|
"logits/rejected": -2.034736394882202, |
|
"logps/chosen": -602.510009765625, |
|
"logps/rejected": -1128.844970703125, |
|
"loss": 0.2878, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 4.663626670837402, |
|
"rewards/margins": 18.277070999145508, |
|
"rewards/rejected": -13.613446235656738, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5893909626719057, |
|
"eval_logits/chosen": -1.8203803300857544, |
|
"eval_logits/rejected": -1.8756872415542603, |
|
"eval_logps/chosen": -814.0150756835938, |
|
"eval_logps/rejected": -814.901611328125, |
|
"eval_loss": 0.20328794419765472, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 5.968328475952148, |
|
"eval_rewards/margins": 9.685802459716797, |
|
"eval_rewards/rejected": -3.7174744606018066, |
|
"eval_runtime": 377.271, |
|
"eval_samples_per_second": 1.142, |
|
"eval_steps_per_second": 0.573, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.68762278978389, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.213938048432696e-07, |
|
"logits/chosen": -1.8613183498382568, |
|
"logits/rejected": -2.010195255279541, |
|
"logps/chosen": -560.01123046875, |
|
"logps/rejected": -1054.34130859375, |
|
"loss": 0.2965, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 4.663458347320557, |
|
"rewards/margins": 17.340160369873047, |
|
"rewards/rejected": -12.676701545715332, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.68762278978389, |
|
"eval_logits/chosen": -1.7953965663909912, |
|
"eval_logits/rejected": -1.8488408327102661, |
|
"eval_logps/chosen": -810.86572265625, |
|
"eval_logps/rejected": -815.11572265625, |
|
"eval_loss": 0.2027529776096344, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.281754016876221, |
|
"eval_rewards/margins": 10.01964282989502, |
|
"eval_rewards/rejected": -3.7378885746002197, |
|
"eval_runtime": 376.3789, |
|
"eval_samples_per_second": 1.145, |
|
"eval_steps_per_second": 0.574, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7858546168958742, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.5e-07, |
|
"logits/chosen": -1.8544628620147705, |
|
"logits/rejected": -1.9972070455551147, |
|
"logps/chosen": -550.8362426757812, |
|
"logps/rejected": -1053.2850341796875, |
|
"loss": 0.3138, |
|
"rewards/accuracies": 0.5475000143051147, |
|
"rewards/chosen": 4.41979455947876, |
|
"rewards/margins": 17.060997009277344, |
|
"rewards/rejected": -12.641203880310059, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7858546168958742, |
|
"eval_logits/chosen": -1.7978651523590088, |
|
"eval_logits/rejected": -1.85009765625, |
|
"eval_logps/chosen": -816.5775756835938, |
|
"eval_logps/rejected": -824.407958984375, |
|
"eval_loss": 0.20284250378608704, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 5.713149070739746, |
|
"eval_rewards/margins": 10.380072593688965, |
|
"eval_rewards/rejected": -4.666924953460693, |
|
"eval_runtime": 376.6783, |
|
"eval_samples_per_second": 1.144, |
|
"eval_steps_per_second": 0.573, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8840864440078585, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.710100716628344e-07, |
|
"logits/chosen": -1.8560644388198853, |
|
"logits/rejected": -1.9615429639816284, |
|
"logps/chosen": -552.2062377929688, |
|
"logps/rejected": -1005.7550048828125, |
|
"loss": 0.3279, |
|
"rewards/accuracies": 0.5350000262260437, |
|
"rewards/chosen": 4.51744270324707, |
|
"rewards/margins": 16.83458137512207, |
|
"rewards/rejected": -12.317138671875, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8840864440078585, |
|
"eval_logits/chosen": -1.7807526588439941, |
|
"eval_logits/rejected": -1.833708643913269, |
|
"eval_logps/chosen": -808.5011596679688, |
|
"eval_logps/rejected": -816.6145629882812, |
|
"eval_loss": 0.20271265506744385, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.52016544342041, |
|
"eval_rewards/margins": 10.406230926513672, |
|
"eval_rewards/rejected": -3.8860654830932617, |
|
"eval_runtime": 376.9509, |
|
"eval_samples_per_second": 1.143, |
|
"eval_steps_per_second": 0.573, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9823182711198428, |
|
"grad_norm": 0.2420654296875, |
|
"learning_rate": 5.868240888334652e-07, |
|
"logits/chosen": -1.8405078649520874, |
|
"logits/rejected": -1.9955663681030273, |
|
"logps/chosen": -563.3287353515625, |
|
"logps/rejected": -1027.811279296875, |
|
"loss": 0.293, |
|
"rewards/accuracies": 0.5774999856948853, |
|
"rewards/chosen": 4.939021587371826, |
|
"rewards/margins": 16.828310012817383, |
|
"rewards/rejected": -11.889289855957031, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9823182711198428, |
|
"eval_logits/chosen": -1.776493787765503, |
|
"eval_logits/rejected": -1.8292959928512573, |
|
"eval_logps/chosen": -803.6666870117188, |
|
"eval_logps/rejected": -814.2019653320312, |
|
"eval_loss": 0.20270462334156036, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 7.002051830291748, |
|
"eval_rewards/margins": 10.646075248718262, |
|
"eval_rewards/rejected": -3.6440227031707764, |
|
"eval_runtime": 375.946, |
|
"eval_samples_per_second": 1.146, |
|
"eval_steps_per_second": 0.575, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.080550098231827, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.845117211341858, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -590.0087280273438, |
|
"logps/rejected": -1017.6812744140625, |
|
"loss": 0.3064, |
|
"rewards/accuracies": 0.5583333373069763, |
|
"rewards/chosen": 5.3995184898376465, |
|
"rewards/margins": 17.224445343017578, |
|
"rewards/rejected": -11.82492446899414, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.080550098231827, |
|
"eval_logits/chosen": -1.7842611074447632, |
|
"eval_logits/rejected": -1.837895154953003, |
|
"eval_logps/chosen": -803.8020629882812, |
|
"eval_logps/rejected": -816.7523193359375, |
|
"eval_loss": 0.20272213220596313, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.988828182220459, |
|
"eval_rewards/margins": 10.88884162902832, |
|
"eval_rewards/rejected": -3.900012254714966, |
|
"eval_runtime": 378.7944, |
|
"eval_samples_per_second": 1.138, |
|
"eval_steps_per_second": 0.57, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.1787819253438114, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.131759111665348e-07, |
|
"logits/chosen": -1.8424999713897705, |
|
"logits/rejected": -1.9918944835662842, |
|
"logps/chosen": -574.5387573242188, |
|
"logps/rejected": -1103.74755859375, |
|
"loss": 0.2844, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 5.156400203704834, |
|
"rewards/margins": 17.812108993530273, |
|
"rewards/rejected": -12.655708312988281, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1787819253438114, |
|
"eval_logits/chosen": -1.7866662740707397, |
|
"eval_logits/rejected": -1.8396222591400146, |
|
"eval_logps/chosen": -805.9617919921875, |
|
"eval_logps/rejected": -820.7268676757812, |
|
"eval_loss": 0.202733114361763, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.773044586181641, |
|
"eval_rewards/margins": 11.070393562316895, |
|
"eval_rewards/rejected": -4.2973504066467285, |
|
"eval_runtime": 375.1497, |
|
"eval_samples_per_second": 1.149, |
|
"eval_steps_per_second": 0.576, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2770137524557956, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.2898992833716563e-07, |
|
"logits/chosen": -1.8421484231948853, |
|
"logits/rejected": -1.972021460533142, |
|
"logps/chosen": -555.9462280273438, |
|
"logps/rejected": -1028.8499755859375, |
|
"loss": 0.2972, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 5.088903903961182, |
|
"rewards/margins": 17.384765625, |
|
"rewards/rejected": -12.295862197875977, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.2770137524557956, |
|
"eval_logits/chosen": -1.7830674648284912, |
|
"eval_logits/rejected": -1.8358244895935059, |
|
"eval_logps/chosen": -804.9791870117188, |
|
"eval_logps/rejected": -819.3784790039062, |
|
"eval_loss": 0.20274707674980164, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.87298583984375, |
|
"eval_rewards/margins": 11.037016868591309, |
|
"eval_rewards/rejected": -4.164031505584717, |
|
"eval_runtime": 375.3471, |
|
"eval_samples_per_second": 1.148, |
|
"eval_steps_per_second": 0.575, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.37524557956778, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.500000000000001e-07, |
|
"logits/chosen": -1.8376269340515137, |
|
"logits/rejected": -1.9944921731948853, |
|
"logps/chosen": -595.8699951171875, |
|
"logps/rejected": -1099.8599853515625, |
|
"loss": 0.293, |
|
"rewards/accuracies": 0.5774999856948853, |
|
"rewards/chosen": 5.341933727264404, |
|
"rewards/margins": 18.64950180053711, |
|
"rewards/rejected": -13.307567596435547, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.37524557956778, |
|
"eval_logits/chosen": -1.7823712825775146, |
|
"eval_logits/rejected": -1.8346353769302368, |
|
"eval_logps/chosen": -804.67822265625, |
|
"eval_logps/rejected": -819.5619506835938, |
|
"eval_loss": 0.20272904634475708, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.901559829711914, |
|
"eval_rewards/margins": 11.084138870239258, |
|
"eval_rewards/rejected": -4.1825785636901855, |
|
"eval_runtime": 377.6355, |
|
"eval_samples_per_second": 1.141, |
|
"eval_steps_per_second": 0.572, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4734774066797642, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7860619515673032e-07, |
|
"logits/chosen": -1.8395702838897705, |
|
"logits/rejected": -1.969667911529541, |
|
"logps/chosen": -551.7537231445312, |
|
"logps/rejected": -1085.00537109375, |
|
"loss": 0.3, |
|
"rewards/accuracies": 0.5674999952316284, |
|
"rewards/chosen": 5.192094326019287, |
|
"rewards/margins": 18.057546615600586, |
|
"rewards/rejected": -12.86545181274414, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.4734774066797642, |
|
"eval_logits/chosen": -1.7821271419525146, |
|
"eval_logits/rejected": -1.8342194557189941, |
|
"eval_logps/chosen": -804.69677734375, |
|
"eval_logps/rejected": -819.6371459960938, |
|
"eval_loss": 0.20272937417030334, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.901213645935059, |
|
"eval_rewards/margins": 11.090865135192871, |
|
"eval_rewards/rejected": -4.1896538734436035, |
|
"eval_runtime": 376.5424, |
|
"eval_samples_per_second": 1.145, |
|
"eval_steps_per_second": 0.574, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.5717092337917484, |
|
"grad_norm": 0.180908203125, |
|
"learning_rate": 1.1697777844051104e-07, |
|
"logits/chosen": -1.8340917825698853, |
|
"logits/rejected": -1.9704101085662842, |
|
"logps/chosen": -571.333740234375, |
|
"logps/rejected": -1077.0687255859375, |
|
"loss": 0.2912, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 5.163873195648193, |
|
"rewards/margins": 17.815139770507812, |
|
"rewards/rejected": -12.651267051696777, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.5717092337917484, |
|
"eval_logits/chosen": -1.7813811302185059, |
|
"eval_logits/rejected": -1.8334101438522339, |
|
"eval_logps/chosen": -804.3715209960938, |
|
"eval_logps/rejected": -819.3778686523438, |
|
"eval_loss": 0.202724888920784, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.932766437530518, |
|
"eval_rewards/margins": 11.097650527954102, |
|
"eval_rewards/rejected": -4.1648850440979, |
|
"eval_runtime": 375.1719, |
|
"eval_samples_per_second": 1.149, |
|
"eval_steps_per_second": 0.576, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6699410609037328, |
|
"grad_norm": 0.05718994140625, |
|
"learning_rate": 6.698729810778064e-08, |
|
"logits/chosen": -1.8348926305770874, |
|
"logits/rejected": -1.9697363376617432, |
|
"logps/chosen": -583.3875122070312, |
|
"logps/rejected": -1078.706298828125, |
|
"loss": 0.2827, |
|
"rewards/accuracies": 0.5924999713897705, |
|
"rewards/chosen": 5.512423515319824, |
|
"rewards/margins": 18.3432674407959, |
|
"rewards/rejected": -12.83084487915039, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.6699410609037328, |
|
"eval_logits/chosen": -1.7811685800552368, |
|
"eval_logits/rejected": -1.8332293033599854, |
|
"eval_logps/chosen": -804.3367919921875, |
|
"eval_logps/rejected": -819.3900756835938, |
|
"eval_loss": 0.20272420346736908, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.935370445251465, |
|
"eval_rewards/margins": 11.101933479309082, |
|
"eval_rewards/rejected": -4.166563034057617, |
|
"eval_runtime": 376.8901, |
|
"eval_samples_per_second": 1.144, |
|
"eval_steps_per_second": 0.573, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.768172888015717, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.015368960704584e-08, |
|
"logits/chosen": -1.837314486503601, |
|
"logits/rejected": -1.9836230278015137, |
|
"logps/chosen": -580.686279296875, |
|
"logps/rejected": -1060.800048828125, |
|
"loss": 0.2912, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 5.313009738922119, |
|
"rewards/margins": 17.764554977416992, |
|
"rewards/rejected": -12.451545715332031, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.768172888015717, |
|
"eval_logits/chosen": -1.781037449836731, |
|
"eval_logits/rejected": -1.8330711126327515, |
|
"eval_logps/chosen": -804.3738403320312, |
|
"eval_logps/rejected": -819.3929443359375, |
|
"eval_loss": 0.20272405445575714, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.9330267906188965, |
|
"eval_rewards/margins": 11.098607063293457, |
|
"eval_rewards/rejected": -4.165579795837402, |
|
"eval_runtime": 376.8443, |
|
"eval_samples_per_second": 1.144, |
|
"eval_steps_per_second": 0.573, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8664047151277012, |
|
"grad_norm": 0.33740234375, |
|
"learning_rate": 7.59612349389599e-09, |
|
"logits/chosen": -1.8312207460403442, |
|
"logits/rejected": -1.980654239654541, |
|
"logps/chosen": -517.9837646484375, |
|
"logps/rejected": -973.3825073242188, |
|
"loss": 0.3137, |
|
"rewards/accuracies": 0.5475000143051147, |
|
"rewards/chosen": 5.046032905578613, |
|
"rewards/margins": 17.248321533203125, |
|
"rewards/rejected": -12.202287673950195, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.8664047151277012, |
|
"eval_logits/chosen": -1.7810285091400146, |
|
"eval_logits/rejected": -1.833039402961731, |
|
"eval_logps/chosen": -804.3425903320312, |
|
"eval_logps/rejected": -819.4027709960938, |
|
"eval_loss": 0.20272395014762878, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.937338829040527, |
|
"eval_rewards/margins": 11.10338020324707, |
|
"eval_rewards/rejected": -4.166042327880859, |
|
"eval_runtime": 376.6269, |
|
"eval_samples_per_second": 1.144, |
|
"eval_steps_per_second": 0.574, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9646365422396856, |
|
"grad_norm": 0.0003845691680908203, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.833642601966858, |
|
"logits/rejected": -1.9818944931030273, |
|
"logps/chosen": -566.6400146484375, |
|
"logps/rejected": -1054.8111572265625, |
|
"loss": 0.2922, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 4.9063005447387695, |
|
"rewards/margins": 17.938020706176758, |
|
"rewards/rejected": -13.031720161437988, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9646365422396856, |
|
"eval_logits/chosen": -1.7809877395629883, |
|
"eval_logits/rejected": -1.8330711126327515, |
|
"eval_logps/chosen": -804.3726806640625, |
|
"eval_logps/rejected": -819.4230346679688, |
|
"eval_loss": 0.20272374153137207, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": 6.933157444000244, |
|
"eval_rewards/margins": 11.101253509521484, |
|
"eval_rewards/rejected": -4.168097019195557, |
|
"eval_runtime": 376.4188, |
|
"eval_samples_per_second": 1.145, |
|
"eval_steps_per_second": 0.574, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9646365422396856, |
|
"step": 1000, |
|
"total_flos": 0.0, |
|
"train_loss": 0.31735729217529296, |
|
"train_runtime": 210247.589, |
|
"train_samples_per_second": 0.038, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|