tsavage68's picture
End of training
de077a9 verified
raw
history blame contribute delete
23.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9646365422396856,
"eval_steps": 50,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09823182711198428,
"grad_norm": 4.67578125,
"learning_rate": 5e-07,
"logits/chosen": -1.882656216621399,
"logits/rejected": -2.0222558975219727,
"logps/chosen": -629.1900024414062,
"logps/rejected": -886.4169311523438,
"loss": 0.6184,
"rewards/accuracies": 0.39500001072883606,
"rewards/chosen": 1.2086933851242065,
"rewards/margins": 0.8173081278800964,
"rewards/rejected": 0.39138519763946533,
"step": 50
},
{
"epoch": 0.09823182711198428,
"eval_logits/chosen": -1.8351056575775146,
"eval_logits/rejected": -1.898075819015503,
"eval_logps/chosen": -827.69677734375,
"eval_logps/rejected": -769.7338256835938,
"eval_loss": 0.21162064373493195,
"eval_rewards/accuracies": 0.7060185074806213,
"eval_rewards/chosen": 4.598723888397217,
"eval_rewards/margins": 3.7964937686920166,
"eval_rewards/rejected": 0.802230179309845,
"eval_runtime": 376.4446,
"eval_samples_per_second": 1.145,
"eval_steps_per_second": 0.574,
"step": 50
},
{
"epoch": 0.19646365422396855,
"grad_norm": 1.43359375,
"learning_rate": 1e-06,
"logits/chosen": -1.9130176305770874,
"logits/rejected": -2.028027296066284,
"logps/chosen": -584.5574951171875,
"logps/rejected": -919.9600219726562,
"loss": 0.3184,
"rewards/accuracies": 0.5550000071525574,
"rewards/chosen": 4.297489166259766,
"rewards/margins": 5.647652626037598,
"rewards/rejected": -1.350163459777832,
"step": 100
},
{
"epoch": 0.19646365422396855,
"eval_logits/chosen": -1.830168604850769,
"eval_logits/rejected": -1.8947844505310059,
"eval_logps/chosen": -808.8148193359375,
"eval_logps/rejected": -775.2783813476562,
"eval_loss": 0.20374441146850586,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.487063884735107,
"eval_rewards/margins": 6.239492416381836,
"eval_rewards/rejected": 0.24757163226604462,
"eval_runtime": 376.9374,
"eval_samples_per_second": 1.143,
"eval_steps_per_second": 0.573,
"step": 100
},
{
"epoch": 0.29469548133595286,
"grad_norm": 0.00019991397857666016,
"learning_rate": 9.92403876506104e-07,
"logits/chosen": -1.8983594179153442,
"logits/rejected": -1.995449185371399,
"logps/chosen": -539.44873046875,
"logps/rejected": -860.8162231445312,
"loss": 0.3339,
"rewards/accuracies": 0.5274999737739563,
"rewards/chosen": 4.254204273223877,
"rewards/margins": 8.191503524780273,
"rewards/rejected": -3.937298536300659,
"step": 150
},
{
"epoch": 0.29469548133595286,
"eval_logits/chosen": -1.8110939264297485,
"eval_logits/rejected": -1.8730604648590088,
"eval_logps/chosen": -810.138916015625,
"eval_logps/rejected": -787.7071533203125,
"eval_loss": 0.20321600139141083,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.352499961853027,
"eval_rewards/margins": 7.34998083114624,
"eval_rewards/rejected": -0.9974803924560547,
"eval_runtime": 381.3297,
"eval_samples_per_second": 1.13,
"eval_steps_per_second": 0.566,
"step": 150
},
{
"epoch": 0.3929273084479371,
"grad_norm": 3.673828125,
"learning_rate": 9.698463103929541e-07,
"logits/chosen": -1.8827344179153442,
"logits/rejected": NaN,
"logps/chosen": -537.2374877929688,
"logps/rejected": -964.1453857421875,
"loss": 0.2992,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 4.266866683959961,
"rewards/margins": 12.264365196228027,
"rewards/rejected": -7.997498989105225,
"step": 200
},
{
"epoch": 0.3929273084479371,
"eval_logits/chosen": -1.8244402408599854,
"eval_logits/rejected": -1.88623046875,
"eval_logps/chosen": -812.432861328125,
"eval_logps/rejected": -809.0966186523438,
"eval_loss": 0.20289792120456696,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.12615442276001,
"eval_rewards/margins": 9.262307167053223,
"eval_rewards/rejected": -3.1361522674560547,
"eval_runtime": 375.9512,
"eval_samples_per_second": 1.146,
"eval_steps_per_second": 0.575,
"step": 200
},
{
"epoch": 0.4911591355599214,
"grad_norm": 3.783203125,
"learning_rate": 9.330127018922193e-07,
"logits/chosen": -1.8917089700698853,
"logits/rejected": -2.057480573654175,
"logps/chosen": -548.9512329101562,
"logps/rejected": -1068.0450439453125,
"loss": 0.3062,
"rewards/accuracies": 0.5600000023841858,
"rewards/chosen": 4.461262226104736,
"rewards/margins": 15.397392272949219,
"rewards/rejected": -10.93613052368164,
"step": 250
},
{
"epoch": 0.4911591355599214,
"eval_logits/chosen": -1.8248969316482544,
"eval_logits/rejected": -1.8831199407577515,
"eval_logps/chosen": -814.4699096679688,
"eval_logps/rejected": -816.1018676757812,
"eval_loss": 0.20285306870937347,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 5.92443323135376,
"eval_rewards/margins": 9.760541915893555,
"eval_rewards/rejected": -3.836108684539795,
"eval_runtime": 382.044,
"eval_samples_per_second": 1.128,
"eval_steps_per_second": 0.565,
"step": 250
},
{
"epoch": 0.5893909626719057,
"grad_norm": 0.0,
"learning_rate": 8.83022221559489e-07,
"logits/chosen": -1.8699413537979126,
"logits/rejected": -2.034736394882202,
"logps/chosen": -602.510009765625,
"logps/rejected": -1128.844970703125,
"loss": 0.2878,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 4.663626670837402,
"rewards/margins": 18.277070999145508,
"rewards/rejected": -13.613446235656738,
"step": 300
},
{
"epoch": 0.5893909626719057,
"eval_logits/chosen": -1.8203803300857544,
"eval_logits/rejected": -1.8756872415542603,
"eval_logps/chosen": -814.0150756835938,
"eval_logps/rejected": -814.901611328125,
"eval_loss": 0.20328794419765472,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 5.968328475952148,
"eval_rewards/margins": 9.685802459716797,
"eval_rewards/rejected": -3.7174744606018066,
"eval_runtime": 377.271,
"eval_samples_per_second": 1.142,
"eval_steps_per_second": 0.573,
"step": 300
},
{
"epoch": 0.68762278978389,
"grad_norm": 0.0,
"learning_rate": 8.213938048432696e-07,
"logits/chosen": -1.8613183498382568,
"logits/rejected": -2.010195255279541,
"logps/chosen": -560.01123046875,
"logps/rejected": -1054.34130859375,
"loss": 0.2965,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 4.663458347320557,
"rewards/margins": 17.340160369873047,
"rewards/rejected": -12.676701545715332,
"step": 350
},
{
"epoch": 0.68762278978389,
"eval_logits/chosen": -1.7953965663909912,
"eval_logits/rejected": -1.8488408327102661,
"eval_logps/chosen": -810.86572265625,
"eval_logps/rejected": -815.11572265625,
"eval_loss": 0.2027529776096344,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.281754016876221,
"eval_rewards/margins": 10.01964282989502,
"eval_rewards/rejected": -3.7378885746002197,
"eval_runtime": 376.3789,
"eval_samples_per_second": 1.145,
"eval_steps_per_second": 0.574,
"step": 350
},
{
"epoch": 0.7858546168958742,
"grad_norm": 0.0,
"learning_rate": 7.5e-07,
"logits/chosen": -1.8544628620147705,
"logits/rejected": -1.9972070455551147,
"logps/chosen": -550.8362426757812,
"logps/rejected": -1053.2850341796875,
"loss": 0.3138,
"rewards/accuracies": 0.5475000143051147,
"rewards/chosen": 4.41979455947876,
"rewards/margins": 17.060997009277344,
"rewards/rejected": -12.641203880310059,
"step": 400
},
{
"epoch": 0.7858546168958742,
"eval_logits/chosen": -1.7978651523590088,
"eval_logits/rejected": -1.85009765625,
"eval_logps/chosen": -816.5775756835938,
"eval_logps/rejected": -824.407958984375,
"eval_loss": 0.20284250378608704,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 5.713149070739746,
"eval_rewards/margins": 10.380072593688965,
"eval_rewards/rejected": -4.666924953460693,
"eval_runtime": 376.6783,
"eval_samples_per_second": 1.144,
"eval_steps_per_second": 0.573,
"step": 400
},
{
"epoch": 0.8840864440078585,
"grad_norm": 0.0,
"learning_rate": 6.710100716628344e-07,
"logits/chosen": -1.8560644388198853,
"logits/rejected": -1.9615429639816284,
"logps/chosen": -552.2062377929688,
"logps/rejected": -1005.7550048828125,
"loss": 0.3279,
"rewards/accuracies": 0.5350000262260437,
"rewards/chosen": 4.51744270324707,
"rewards/margins": 16.83458137512207,
"rewards/rejected": -12.317138671875,
"step": 450
},
{
"epoch": 0.8840864440078585,
"eval_logits/chosen": -1.7807526588439941,
"eval_logits/rejected": -1.833708643913269,
"eval_logps/chosen": -808.5011596679688,
"eval_logps/rejected": -816.6145629882812,
"eval_loss": 0.20271265506744385,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.52016544342041,
"eval_rewards/margins": 10.406230926513672,
"eval_rewards/rejected": -3.8860654830932617,
"eval_runtime": 376.9509,
"eval_samples_per_second": 1.143,
"eval_steps_per_second": 0.573,
"step": 450
},
{
"epoch": 0.9823182711198428,
"grad_norm": 0.2420654296875,
"learning_rate": 5.868240888334652e-07,
"logits/chosen": -1.8405078649520874,
"logits/rejected": -1.9955663681030273,
"logps/chosen": -563.3287353515625,
"logps/rejected": -1027.811279296875,
"loss": 0.293,
"rewards/accuracies": 0.5774999856948853,
"rewards/chosen": 4.939021587371826,
"rewards/margins": 16.828310012817383,
"rewards/rejected": -11.889289855957031,
"step": 500
},
{
"epoch": 0.9823182711198428,
"eval_logits/chosen": -1.776493787765503,
"eval_logits/rejected": -1.8292959928512573,
"eval_logps/chosen": -803.6666870117188,
"eval_logps/rejected": -814.2019653320312,
"eval_loss": 0.20270462334156036,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 7.002051830291748,
"eval_rewards/margins": 10.646075248718262,
"eval_rewards/rejected": -3.6440227031707764,
"eval_runtime": 375.946,
"eval_samples_per_second": 1.146,
"eval_steps_per_second": 0.575,
"step": 500
},
{
"epoch": 1.080550098231827,
"grad_norm": 0.0,
"learning_rate": 5e-07,
"logits/chosen": -1.845117211341858,
"logits/rejected": NaN,
"logps/chosen": -590.0087280273438,
"logps/rejected": -1017.6812744140625,
"loss": 0.3064,
"rewards/accuracies": 0.5583333373069763,
"rewards/chosen": 5.3995184898376465,
"rewards/margins": 17.224445343017578,
"rewards/rejected": -11.82492446899414,
"step": 550
},
{
"epoch": 1.080550098231827,
"eval_logits/chosen": -1.7842611074447632,
"eval_logits/rejected": -1.837895154953003,
"eval_logps/chosen": -803.8020629882812,
"eval_logps/rejected": -816.7523193359375,
"eval_loss": 0.20272213220596313,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.988828182220459,
"eval_rewards/margins": 10.88884162902832,
"eval_rewards/rejected": -3.900012254714966,
"eval_runtime": 378.7944,
"eval_samples_per_second": 1.138,
"eval_steps_per_second": 0.57,
"step": 550
},
{
"epoch": 1.1787819253438114,
"grad_norm": 0.0,
"learning_rate": 4.131759111665348e-07,
"logits/chosen": -1.8424999713897705,
"logits/rejected": -1.9918944835662842,
"logps/chosen": -574.5387573242188,
"logps/rejected": -1103.74755859375,
"loss": 0.2844,
"rewards/accuracies": 0.5899999737739563,
"rewards/chosen": 5.156400203704834,
"rewards/margins": 17.812108993530273,
"rewards/rejected": -12.655708312988281,
"step": 600
},
{
"epoch": 1.1787819253438114,
"eval_logits/chosen": -1.7866662740707397,
"eval_logits/rejected": -1.8396222591400146,
"eval_logps/chosen": -805.9617919921875,
"eval_logps/rejected": -820.7268676757812,
"eval_loss": 0.202733114361763,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.773044586181641,
"eval_rewards/margins": 11.070393562316895,
"eval_rewards/rejected": -4.2973504066467285,
"eval_runtime": 375.1497,
"eval_samples_per_second": 1.149,
"eval_steps_per_second": 0.576,
"step": 600
},
{
"epoch": 1.2770137524557956,
"grad_norm": 0.0,
"learning_rate": 3.2898992833716563e-07,
"logits/chosen": -1.8421484231948853,
"logits/rejected": -1.972021460533142,
"logps/chosen": -555.9462280273438,
"logps/rejected": -1028.8499755859375,
"loss": 0.2972,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 5.088903903961182,
"rewards/margins": 17.384765625,
"rewards/rejected": -12.295862197875977,
"step": 650
},
{
"epoch": 1.2770137524557956,
"eval_logits/chosen": -1.7830674648284912,
"eval_logits/rejected": -1.8358244895935059,
"eval_logps/chosen": -804.9791870117188,
"eval_logps/rejected": -819.3784790039062,
"eval_loss": 0.20274707674980164,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.87298583984375,
"eval_rewards/margins": 11.037016868591309,
"eval_rewards/rejected": -4.164031505584717,
"eval_runtime": 375.3471,
"eval_samples_per_second": 1.148,
"eval_steps_per_second": 0.575,
"step": 650
},
{
"epoch": 1.37524557956778,
"grad_norm": 0.0,
"learning_rate": 2.500000000000001e-07,
"logits/chosen": -1.8376269340515137,
"logits/rejected": -1.9944921731948853,
"logps/chosen": -595.8699951171875,
"logps/rejected": -1099.8599853515625,
"loss": 0.293,
"rewards/accuracies": 0.5774999856948853,
"rewards/chosen": 5.341933727264404,
"rewards/margins": 18.64950180053711,
"rewards/rejected": -13.307567596435547,
"step": 700
},
{
"epoch": 1.37524557956778,
"eval_logits/chosen": -1.7823712825775146,
"eval_logits/rejected": -1.8346353769302368,
"eval_logps/chosen": -804.67822265625,
"eval_logps/rejected": -819.5619506835938,
"eval_loss": 0.20272904634475708,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.901559829711914,
"eval_rewards/margins": 11.084138870239258,
"eval_rewards/rejected": -4.1825785636901855,
"eval_runtime": 377.6355,
"eval_samples_per_second": 1.141,
"eval_steps_per_second": 0.572,
"step": 700
},
{
"epoch": 1.4734774066797642,
"grad_norm": 0.0,
"learning_rate": 1.7860619515673032e-07,
"logits/chosen": -1.8395702838897705,
"logits/rejected": -1.969667911529541,
"logps/chosen": -551.7537231445312,
"logps/rejected": -1085.00537109375,
"loss": 0.3,
"rewards/accuracies": 0.5674999952316284,
"rewards/chosen": 5.192094326019287,
"rewards/margins": 18.057546615600586,
"rewards/rejected": -12.86545181274414,
"step": 750
},
{
"epoch": 1.4734774066797642,
"eval_logits/chosen": -1.7821271419525146,
"eval_logits/rejected": -1.8342194557189941,
"eval_logps/chosen": -804.69677734375,
"eval_logps/rejected": -819.6371459960938,
"eval_loss": 0.20272937417030334,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.901213645935059,
"eval_rewards/margins": 11.090865135192871,
"eval_rewards/rejected": -4.1896538734436035,
"eval_runtime": 376.5424,
"eval_samples_per_second": 1.145,
"eval_steps_per_second": 0.574,
"step": 750
},
{
"epoch": 1.5717092337917484,
"grad_norm": 0.180908203125,
"learning_rate": 1.1697777844051104e-07,
"logits/chosen": -1.8340917825698853,
"logits/rejected": -1.9704101085662842,
"logps/chosen": -571.333740234375,
"logps/rejected": -1077.0687255859375,
"loss": 0.2912,
"rewards/accuracies": 0.5799999833106995,
"rewards/chosen": 5.163873195648193,
"rewards/margins": 17.815139770507812,
"rewards/rejected": -12.651267051696777,
"step": 800
},
{
"epoch": 1.5717092337917484,
"eval_logits/chosen": -1.7813811302185059,
"eval_logits/rejected": -1.8334101438522339,
"eval_logps/chosen": -804.3715209960938,
"eval_logps/rejected": -819.3778686523438,
"eval_loss": 0.202724888920784,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.932766437530518,
"eval_rewards/margins": 11.097650527954102,
"eval_rewards/rejected": -4.1648850440979,
"eval_runtime": 375.1719,
"eval_samples_per_second": 1.149,
"eval_steps_per_second": 0.576,
"step": 800
},
{
"epoch": 1.6699410609037328,
"grad_norm": 0.05718994140625,
"learning_rate": 6.698729810778064e-08,
"logits/chosen": -1.8348926305770874,
"logits/rejected": -1.9697363376617432,
"logps/chosen": -583.3875122070312,
"logps/rejected": -1078.706298828125,
"loss": 0.2827,
"rewards/accuracies": 0.5924999713897705,
"rewards/chosen": 5.512423515319824,
"rewards/margins": 18.3432674407959,
"rewards/rejected": -12.83084487915039,
"step": 850
},
{
"epoch": 1.6699410609037328,
"eval_logits/chosen": -1.7811685800552368,
"eval_logits/rejected": -1.8332293033599854,
"eval_logps/chosen": -804.3367919921875,
"eval_logps/rejected": -819.3900756835938,
"eval_loss": 0.20272420346736908,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.935370445251465,
"eval_rewards/margins": 11.101933479309082,
"eval_rewards/rejected": -4.166563034057617,
"eval_runtime": 376.8901,
"eval_samples_per_second": 1.144,
"eval_steps_per_second": 0.573,
"step": 850
},
{
"epoch": 1.768172888015717,
"grad_norm": 0.0,
"learning_rate": 3.015368960704584e-08,
"logits/chosen": -1.837314486503601,
"logits/rejected": -1.9836230278015137,
"logps/chosen": -580.686279296875,
"logps/rejected": -1060.800048828125,
"loss": 0.2912,
"rewards/accuracies": 0.5799999833106995,
"rewards/chosen": 5.313009738922119,
"rewards/margins": 17.764554977416992,
"rewards/rejected": -12.451545715332031,
"step": 900
},
{
"epoch": 1.768172888015717,
"eval_logits/chosen": -1.781037449836731,
"eval_logits/rejected": -1.8330711126327515,
"eval_logps/chosen": -804.3738403320312,
"eval_logps/rejected": -819.3929443359375,
"eval_loss": 0.20272405445575714,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.9330267906188965,
"eval_rewards/margins": 11.098607063293457,
"eval_rewards/rejected": -4.165579795837402,
"eval_runtime": 376.8443,
"eval_samples_per_second": 1.144,
"eval_steps_per_second": 0.573,
"step": 900
},
{
"epoch": 1.8664047151277012,
"grad_norm": 0.33740234375,
"learning_rate": 7.59612349389599e-09,
"logits/chosen": -1.8312207460403442,
"logits/rejected": -1.980654239654541,
"logps/chosen": -517.9837646484375,
"logps/rejected": -973.3825073242188,
"loss": 0.3137,
"rewards/accuracies": 0.5475000143051147,
"rewards/chosen": 5.046032905578613,
"rewards/margins": 17.248321533203125,
"rewards/rejected": -12.202287673950195,
"step": 950
},
{
"epoch": 1.8664047151277012,
"eval_logits/chosen": -1.7810285091400146,
"eval_logits/rejected": -1.833039402961731,
"eval_logps/chosen": -804.3425903320312,
"eval_logps/rejected": -819.4027709960938,
"eval_loss": 0.20272395014762878,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.937338829040527,
"eval_rewards/margins": 11.10338020324707,
"eval_rewards/rejected": -4.166042327880859,
"eval_runtime": 376.6269,
"eval_samples_per_second": 1.144,
"eval_steps_per_second": 0.574,
"step": 950
},
{
"epoch": 1.9646365422396856,
"grad_norm": 0.0003845691680908203,
"learning_rate": 0.0,
"logits/chosen": -1.833642601966858,
"logits/rejected": -1.9818944931030273,
"logps/chosen": -566.6400146484375,
"logps/rejected": -1054.8111572265625,
"loss": 0.2922,
"rewards/accuracies": 0.5799999833106995,
"rewards/chosen": 4.9063005447387695,
"rewards/margins": 17.938020706176758,
"rewards/rejected": -13.031720161437988,
"step": 1000
},
{
"epoch": 1.9646365422396856,
"eval_logits/chosen": -1.7809877395629883,
"eval_logits/rejected": -1.8330711126327515,
"eval_logps/chosen": -804.3726806640625,
"eval_logps/rejected": -819.4230346679688,
"eval_loss": 0.20272374153137207,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": 6.933157444000244,
"eval_rewards/margins": 11.101253509521484,
"eval_rewards/rejected": -4.168097019195557,
"eval_runtime": 376.4188,
"eval_samples_per_second": 1.145,
"eval_steps_per_second": 0.574,
"step": 1000
},
{
"epoch": 1.9646365422396856,
"step": 1000,
"total_flos": 0.0,
"train_loss": 0.31735729217529296,
"train_runtime": 210247.589,
"train_samples_per_second": 0.038,
"train_steps_per_second": 0.005
}
],
"logging_steps": 50,
"max_steps": 1000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}