|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3125, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0032, |
|
"grad_norm": 72.06516054493869, |
|
"learning_rate": 1.99424e-05, |
|
"logits/chosen": 0.231048583984375, |
|
"logits/rejected": 0.40448302030563354, |
|
"logps/chosen": -263.57501220703125, |
|
"logps/rejected": -220.2624969482422, |
|
"loss": 0.8815, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.6032226085662842, |
|
"rewards/margins": 0.45408326387405396, |
|
"rewards/rejected": -2.056042432785034, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0064, |
|
"grad_norm": 67.31594654043894, |
|
"learning_rate": 1.9878400000000003e-05, |
|
"logits/chosen": 0.58917236328125, |
|
"logits/rejected": 0.6860595941543579, |
|
"logps/chosen": -325.4750061035156, |
|
"logps/rejected": -271.20001220703125, |
|
"loss": 0.8773, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -7.1884765625, |
|
"rewards/margins": 0.971118152141571, |
|
"rewards/rejected": -8.158594131469727, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0096, |
|
"grad_norm": 92.75698380810871, |
|
"learning_rate": 1.98144e-05, |
|
"logits/chosen": 0.5443969964981079, |
|
"logits/rejected": 0.6279541254043579, |
|
"logps/chosen": -360.07501220703125, |
|
"logps/rejected": -327.75, |
|
"loss": 1.0953, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -11.444531440734863, |
|
"rewards/margins": 0.8212890625, |
|
"rewards/rejected": -12.265625, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0128, |
|
"grad_norm": 62.32858351999384, |
|
"learning_rate": 1.97504e-05, |
|
"logits/chosen": 0.3295272886753082, |
|
"logits/rejected": 0.38934326171875, |
|
"logps/chosen": -312.5, |
|
"logps/rejected": -276.2749938964844, |
|
"loss": 1.4072, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -8.244531631469727, |
|
"rewards/margins": 0.801513671875, |
|
"rewards/rejected": -9.04296875, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 50.08386638109074, |
|
"learning_rate": 1.9686400000000002e-05, |
|
"logits/chosen": 0.71923828125, |
|
"logits/rejected": 0.766186535358429, |
|
"logps/chosen": -347.57501220703125, |
|
"logps/rejected": -320.4750061035156, |
|
"loss": 1.2033, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -11.116406440734863, |
|
"rewards/margins": 0.8971191644668579, |
|
"rewards/rejected": -12.012499809265137, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0192, |
|
"grad_norm": 94.5795753479825, |
|
"learning_rate": 1.96224e-05, |
|
"logits/chosen": 0.876757800579071, |
|
"logits/rejected": 0.9106689691543579, |
|
"logps/chosen": -431.54998779296875, |
|
"logps/rejected": -398.8999938964844, |
|
"loss": 1.2877, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -19.684375762939453, |
|
"rewards/margins": 1.2566406726837158, |
|
"rewards/rejected": -20.934375762939453, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0224, |
|
"grad_norm": 87.40075574771897, |
|
"learning_rate": 1.9558400000000002e-05, |
|
"logits/chosen": 0.45653611421585083, |
|
"logits/rejected": 0.5081542730331421, |
|
"logps/chosen": -383.7250061035156, |
|
"logps/rejected": -348.1000061035156, |
|
"loss": 1.7876, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -14.109375, |
|
"rewards/margins": 0.7588866949081421, |
|
"rewards/rejected": -14.8671875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0256, |
|
"grad_norm": 63.266263022294886, |
|
"learning_rate": 1.94944e-05, |
|
"logits/chosen": 0.560455322265625, |
|
"logits/rejected": 0.558880627155304, |
|
"logps/chosen": -446.3500061035156, |
|
"logps/rejected": -406.8999938964844, |
|
"loss": 1.5243, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -18.8125, |
|
"rewards/margins": 2.1751952171325684, |
|
"rewards/rejected": -20.993749618530273, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0288, |
|
"grad_norm": 68.41909447724173, |
|
"learning_rate": 1.9430400000000003e-05, |
|
"logits/chosen": 0.8628174066543579, |
|
"logits/rejected": 0.8074280023574829, |
|
"logps/chosen": -409.6000061035156, |
|
"logps/rejected": -388.79998779296875, |
|
"loss": 1.2459, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -15.942187309265137, |
|
"rewards/margins": 2.4151368141174316, |
|
"rewards/rejected": -18.362499237060547, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 54.65422389140606, |
|
"learning_rate": 1.93664e-05, |
|
"logits/chosen": NaN, |
|
"logits/rejected": 1.095117211341858, |
|
"logps/chosen": -339.8500061035156, |
|
"logps/rejected": -304.875, |
|
"loss": 1.3923, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -10.8828125, |
|
"rewards/margins": 1.1833007335662842, |
|
"rewards/rejected": -12.067187309265137, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0352, |
|
"grad_norm": 67.99626476930845, |
|
"learning_rate": 1.93024e-05, |
|
"logits/chosen": 0.854418933391571, |
|
"logits/rejected": 0.8412841558456421, |
|
"logps/chosen": -388.95001220703125, |
|
"logps/rejected": -357.0, |
|
"loss": 1.4211, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -16.135936737060547, |
|
"rewards/margins": 1.308984398841858, |
|
"rewards/rejected": -17.446874618530273, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0384, |
|
"grad_norm": 71.03359350317054, |
|
"learning_rate": 1.9238400000000002e-05, |
|
"logits/chosen": 0.7735840082168579, |
|
"logits/rejected": 0.8270019292831421, |
|
"logps/chosen": -400.45001220703125, |
|
"logps/rejected": -384.3500061035156, |
|
"loss": 1.6411, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -17.251562118530273, |
|
"rewards/margins": 1.4036133289337158, |
|
"rewards/rejected": -18.651561737060547, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0416, |
|
"grad_norm": 66.85635125220757, |
|
"learning_rate": 1.91744e-05, |
|
"logits/chosen": 1.1936523914337158, |
|
"logits/rejected": 1.174414038658142, |
|
"logps/chosen": -418.70001220703125, |
|
"logps/rejected": -375.42498779296875, |
|
"loss": 1.3968, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -16.337499618530273, |
|
"rewards/margins": 1.5578124523162842, |
|
"rewards/rejected": -17.892187118530273, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0448, |
|
"grad_norm": 43.79339456634566, |
|
"learning_rate": 1.9110400000000003e-05, |
|
"logits/chosen": 1.4757812023162842, |
|
"logits/rejected": 1.500390648841858, |
|
"logps/chosen": -474.5, |
|
"logps/rejected": -437.6000061035156, |
|
"loss": 1.1547, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -21.924999237060547, |
|
"rewards/margins": 2.652148485183716, |
|
"rewards/rejected": -24.584375381469727, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 39.19090939652315, |
|
"learning_rate": 1.90464e-05, |
|
"logits/chosen": 1.182226538658142, |
|
"logits/rejected": 1.2678711414337158, |
|
"logps/chosen": -504.54998779296875, |
|
"logps/rejected": -473.20001220703125, |
|
"loss": 1.0756, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -27.087499618530273, |
|
"rewards/margins": 2.50390625, |
|
"rewards/rejected": -29.584375381469727, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0512, |
|
"grad_norm": 81.33473530148507, |
|
"learning_rate": 1.89824e-05, |
|
"logits/chosen": NaN, |
|
"logits/rejected": 1.238867163658142, |
|
"logps/chosen": -455.0, |
|
"logps/rejected": -433.3500061035156, |
|
"loss": 1.2397, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -21.904687881469727, |
|
"rewards/margins": 1.8517577648162842, |
|
"rewards/rejected": -23.756250381469727, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0544, |
|
"grad_norm": 34.92625419778924, |
|
"learning_rate": 1.89184e-05, |
|
"logits/chosen": 1.28662109375, |
|
"logits/rejected": 1.349023461341858, |
|
"logps/chosen": -479.45001220703125, |
|
"logps/rejected": -466.8500061035156, |
|
"loss": 1.2286, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -24.075000762939453, |
|
"rewards/margins": 2.2798829078674316, |
|
"rewards/rejected": -26.362499237060547, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0576, |
|
"grad_norm": 65.20906568316, |
|
"learning_rate": 1.88544e-05, |
|
"logits/chosen": 1.4373047351837158, |
|
"logits/rejected": 1.4529297351837158, |
|
"logps/chosen": -493.0, |
|
"logps/rejected": -460.54998779296875, |
|
"loss": 1.4699, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -24.090625762939453, |
|
"rewards/margins": 2.7845702171325684, |
|
"rewards/rejected": -26.881250381469727, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0608, |
|
"grad_norm": 68.96107516764462, |
|
"learning_rate": 1.8790400000000002e-05, |
|
"logits/chosen": 1.8361327648162842, |
|
"logits/rejected": 1.876562476158142, |
|
"logps/chosen": -427.25, |
|
"logps/rejected": -413.6499938964844, |
|
"loss": 1.5985, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -17.9140625, |
|
"rewards/margins": 2.70458984375, |
|
"rewards/rejected": -20.620311737060547, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 24.34257611593426, |
|
"learning_rate": 1.87264e-05, |
|
"logits/chosen": 1.532812476158142, |
|
"logits/rejected": 1.641992211341858, |
|
"logps/chosen": -545.7000122070312, |
|
"logps/rejected": -510.20001220703125, |
|
"loss": 1.3766, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -28.649999618530273, |
|
"rewards/margins": 2.236523389816284, |
|
"rewards/rejected": -30.881250381469727, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0672, |
|
"grad_norm": 56.274263875182086, |
|
"learning_rate": 1.86624e-05, |
|
"logits/chosen": 1.5291016101837158, |
|
"logits/rejected": 1.6222655773162842, |
|
"logps/chosen": -519.9000244140625, |
|
"logps/rejected": -483.04998779296875, |
|
"loss": 1.5423, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -27.978124618530273, |
|
"rewards/margins": 2.147265672683716, |
|
"rewards/rejected": -30.134374618530273, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0704, |
|
"grad_norm": 43.12330507290986, |
|
"learning_rate": 1.85984e-05, |
|
"logits/chosen": 1.486914038658142, |
|
"logits/rejected": 1.56640625, |
|
"logps/chosen": -515.4000244140625, |
|
"logps/rejected": -502.6000061035156, |
|
"loss": 1.4434, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -26.762500762939453, |
|
"rewards/margins": 3.0335936546325684, |
|
"rewards/rejected": -29.787500381469727, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0736, |
|
"grad_norm": 104.38928515088328, |
|
"learning_rate": 1.85344e-05, |
|
"logits/chosen": 1.5703125, |
|
"logits/rejected": 1.5720703601837158, |
|
"logps/chosen": -518.6500244140625, |
|
"logps/rejected": -507.8500061035156, |
|
"loss": 1.5691, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -28.631250381469727, |
|
"rewards/margins": 2.7933592796325684, |
|
"rewards/rejected": -31.403125762939453, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0768, |
|
"grad_norm": 49.06004160121367, |
|
"learning_rate": 1.8470400000000002e-05, |
|
"logits/chosen": NaN, |
|
"logits/rejected": 1.42626953125, |
|
"logps/chosen": -547.9500122070312, |
|
"logps/rejected": -520.2999877929688, |
|
"loss": 1.6266, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -30.484375, |
|
"rewards/margins": 2.9068360328674316, |
|
"rewards/rejected": -33.375, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 50.08613409082756, |
|
"learning_rate": 1.84064e-05, |
|
"logits/chosen": 1.2625000476837158, |
|
"logits/rejected": 1.298242211341858, |
|
"logps/chosen": -526.0499877929688, |
|
"logps/rejected": -508.1499938964844, |
|
"loss": 1.7452, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -29.581249237060547, |
|
"rewards/margins": 1.581640601158142, |
|
"rewards/rejected": -31.178125381469727, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0832, |
|
"grad_norm": 68.80666192170693, |
|
"learning_rate": 1.8342400000000002e-05, |
|
"logits/chosen": 1.3650391101837158, |
|
"logits/rejected": 1.4216797351837158, |
|
"logps/chosen": -537.9000244140625, |
|
"logps/rejected": -524.1500244140625, |
|
"loss": 1.9467, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -28.493749618530273, |
|
"rewards/margins": 2.242480516433716, |
|
"rewards/rejected": -30.737499237060547, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0864, |
|
"grad_norm": 48.44817365637928, |
|
"learning_rate": 1.82784e-05, |
|
"logits/chosen": 1.2820312976837158, |
|
"logits/rejected": 1.366601586341858, |
|
"logps/chosen": -518.2999877929688, |
|
"logps/rejected": -507.45001220703125, |
|
"loss": 1.5778, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -26.75, |
|
"rewards/margins": 3.1568360328674316, |
|
"rewards/rejected": -29.896875381469727, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0896, |
|
"grad_norm": 55.2264342326786, |
|
"learning_rate": 1.82144e-05, |
|
"logits/chosen": 1.174218773841858, |
|
"logits/rejected": 1.2262694835662842, |
|
"logps/chosen": -531.7000122070312, |
|
"logps/rejected": -520.0999755859375, |
|
"loss": 2.2168, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -29.125, |
|
"rewards/margins": 3.4775390625, |
|
"rewards/rejected": -32.631248474121094, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.0928, |
|
"grad_norm": 17.509579683846354, |
|
"learning_rate": 1.81504e-05, |
|
"logits/chosen": 1.038964867591858, |
|
"logits/rejected": 1.112695336341858, |
|
"logps/chosen": -531.1500244140625, |
|
"logps/rejected": -501.45001220703125, |
|
"loss": 1.4645, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -28.693750381469727, |
|
"rewards/margins": 2.20703125, |
|
"rewards/rejected": -30.896875381469727, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 67.44310712108323, |
|
"learning_rate": 1.80864e-05, |
|
"logits/chosen": 1.015234351158142, |
|
"logits/rejected": 1.0890624523162842, |
|
"logps/chosen": -502.3500061035156, |
|
"logps/rejected": -494.20001220703125, |
|
"loss": 1.8911, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -27.140625, |
|
"rewards/margins": 2.791210889816284, |
|
"rewards/rejected": -29.921875, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0992, |
|
"grad_norm": 66.76807934268572, |
|
"learning_rate": 1.8022400000000002e-05, |
|
"logits/chosen": 0.9276367425918579, |
|
"logits/rejected": 1.0182616710662842, |
|
"logps/chosen": -493.70001220703125, |
|
"logps/rejected": -474.70001220703125, |
|
"loss": 1.6675, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -24.915624618530273, |
|
"rewards/margins": 2.3501954078674316, |
|
"rewards/rejected": -27.265625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1024, |
|
"grad_norm": 38.01975820590516, |
|
"learning_rate": 1.7958400000000004e-05, |
|
"logits/chosen": 0.9652343988418579, |
|
"logits/rejected": 1.0524413585662842, |
|
"logps/chosen": -485.70001220703125, |
|
"logps/rejected": -477.1000061035156, |
|
"loss": 1.6211, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -26.571874618530273, |
|
"rewards/margins": 2.431933641433716, |
|
"rewards/rejected": -28.993749618530273, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1056, |
|
"grad_norm": 47.41486546942003, |
|
"learning_rate": 1.78944e-05, |
|
"logits/chosen": 1.1033203601837158, |
|
"logits/rejected": 1.233300805091858, |
|
"logps/chosen": -560.7000122070312, |
|
"logps/rejected": -530.6500244140625, |
|
"loss": 1.3983, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -31.793750762939453, |
|
"rewards/margins": 3.658984422683716, |
|
"rewards/rejected": -35.459373474121094, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1088, |
|
"grad_norm": 54.10847973092568, |
|
"learning_rate": 1.78304e-05, |
|
"logits/chosen": 1.1881835460662842, |
|
"logits/rejected": 1.345117211341858, |
|
"logps/chosen": -569.7999877929688, |
|
"logps/rejected": -572.7999877929688, |
|
"loss": 1.6905, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -32.734375, |
|
"rewards/margins": 4.145117282867432, |
|
"rewards/rejected": -36.890625, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.112, |
|
"grad_norm": 44.154579996950346, |
|
"learning_rate": 1.77664e-05, |
|
"logits/chosen": 1.252832055091858, |
|
"logits/rejected": 1.418554663658142, |
|
"logps/chosen": -633.3499755859375, |
|
"logps/rejected": -637.9500122070312, |
|
"loss": 1.984, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -38.146873474121094, |
|
"rewards/margins": 3.931640625, |
|
"rewards/rejected": -42.08124923706055, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1152, |
|
"grad_norm": 56.29869734936286, |
|
"learning_rate": 1.7702400000000002e-05, |
|
"logits/chosen": 0.9906250238418579, |
|
"logits/rejected": 1.1129882335662842, |
|
"logps/chosen": -557.0999755859375, |
|
"logps/rejected": -543.5999755859375, |
|
"loss": 1.0955, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -32.025001525878906, |
|
"rewards/margins": 3.8828125, |
|
"rewards/rejected": -35.90625, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1184, |
|
"grad_norm": 24.075696906183882, |
|
"learning_rate": 1.7638400000000004e-05, |
|
"logits/chosen": 0.8721679449081421, |
|
"logits/rejected": 0.9461914300918579, |
|
"logps/chosen": -580.7000122070312, |
|
"logps/rejected": -578.75, |
|
"loss": 1.3949, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -32.259376525878906, |
|
"rewards/margins": 4.385937690734863, |
|
"rewards/rejected": -36.64374923706055, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1216, |
|
"grad_norm": 72.29804215542902, |
|
"learning_rate": 1.75744e-05, |
|
"logits/chosen": 0.9410156011581421, |
|
"logits/rejected": 1.0275390148162842, |
|
"logps/chosen": -564.0, |
|
"logps/rejected": -546.0999755859375, |
|
"loss": 1.4662, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -31.365625381469727, |
|
"rewards/margins": 3.5619139671325684, |
|
"rewards/rejected": -34.91875076293945, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1248, |
|
"grad_norm": 32.08984891925103, |
|
"learning_rate": 1.75104e-05, |
|
"logits/chosen": 0.79833984375, |
|
"logits/rejected": 1.019921898841858, |
|
"logps/chosen": -573.5, |
|
"logps/rejected": -545.5, |
|
"loss": 1.2478, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -34.00312423706055, |
|
"rewards/margins": 3.6591796875, |
|
"rewards/rejected": -37.662498474121094, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 53.03788678986745, |
|
"learning_rate": 1.74464e-05, |
|
"logits/chosen": 0.754638671875, |
|
"logits/rejected": 0.9137207269668579, |
|
"logps/chosen": -633.7000122070312, |
|
"logps/rejected": -602.1500244140625, |
|
"loss": 1.9865, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -37.20624923706055, |
|
"rewards/margins": 2.9712891578674316, |
|
"rewards/rejected": -40.19062423706055, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1312, |
|
"grad_norm": 28.774739819689344, |
|
"learning_rate": 1.73824e-05, |
|
"logits/chosen": 0.962207019329071, |
|
"logits/rejected": 1.1422851085662842, |
|
"logps/chosen": -637.7000122070312, |
|
"logps/rejected": -610.0999755859375, |
|
"loss": 1.374, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -38.53125, |
|
"rewards/margins": 3.3929686546325684, |
|
"rewards/rejected": -41.931251525878906, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1344, |
|
"grad_norm": 52.18984822269678, |
|
"learning_rate": 1.7318400000000003e-05, |
|
"logits/chosen": 0.9715820550918579, |
|
"logits/rejected": 1.06298828125, |
|
"logps/chosen": -584.4500122070312, |
|
"logps/rejected": -564.0, |
|
"loss": 1.5886, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -33.646873474121094, |
|
"rewards/margins": 3.372851610183716, |
|
"rewards/rejected": -37.01874923706055, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1376, |
|
"grad_norm": 50.15500041735182, |
|
"learning_rate": 1.7254400000000002e-05, |
|
"logits/chosen": 0.8271484375, |
|
"logits/rejected": 0.9488281011581421, |
|
"logps/chosen": -600.5499877929688, |
|
"logps/rejected": -582.4500122070312, |
|
"loss": 2.058, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -36.96875, |
|
"rewards/margins": 2.6484375, |
|
"rewards/rejected": -39.618751525878906, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1408, |
|
"grad_norm": 63.480323123245526, |
|
"learning_rate": 1.71904e-05, |
|
"logits/chosen": 0.8534179925918579, |
|
"logits/rejected": 0.9185546636581421, |
|
"logps/chosen": -650.9000244140625, |
|
"logps/rejected": -622.9000244140625, |
|
"loss": 1.9219, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -40.29999923706055, |
|
"rewards/margins": 1.6640625, |
|
"rewards/rejected": -41.98749923706055, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 83.77496034505918, |
|
"learning_rate": 1.71264e-05, |
|
"logits/chosen": 0.799267590045929, |
|
"logits/rejected": 0.910839855670929, |
|
"logps/chosen": -668.5, |
|
"logps/rejected": -645.5999755859375, |
|
"loss": 1.8358, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -41.368751525878906, |
|
"rewards/margins": 2.6468749046325684, |
|
"rewards/rejected": -44.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1472, |
|
"grad_norm": 29.813610383691678, |
|
"learning_rate": 1.70624e-05, |
|
"logits/chosen": 0.5895019769668579, |
|
"logits/rejected": 0.6923828125, |
|
"logps/chosen": -625.2999877929688, |
|
"logps/rejected": -621.0499877929688, |
|
"loss": 1.4774, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -39.506248474121094, |
|
"rewards/margins": 2.995312452316284, |
|
"rewards/rejected": -42.5, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1504, |
|
"grad_norm": 180.7604639291859, |
|
"learning_rate": 1.6998400000000003e-05, |
|
"logits/chosen": 0.34160155057907104, |
|
"logits/rejected": 0.4342285096645355, |
|
"logps/chosen": -605.0499877929688, |
|
"logps/rejected": -602.5999755859375, |
|
"loss": 1.6735, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -36.631248474121094, |
|
"rewards/margins": 4.542578220367432, |
|
"rewards/rejected": -41.150001525878906, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1536, |
|
"grad_norm": 48.63799038212587, |
|
"learning_rate": 1.6934400000000002e-05, |
|
"logits/chosen": 0.3518127501010895, |
|
"logits/rejected": 0.4427856504917145, |
|
"logps/chosen": -612.0999755859375, |
|
"logps/rejected": -590.0999755859375, |
|
"loss": 2.0439, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -37.775001525878906, |
|
"rewards/margins": 2.4281249046325684, |
|
"rewards/rejected": -40.20624923706055, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1568, |
|
"grad_norm": 37.41673163568628, |
|
"learning_rate": 1.6870400000000004e-05, |
|
"logits/chosen": 0.335458368062973, |
|
"logits/rejected": 0.43060302734375, |
|
"logps/chosen": -610.2999877929688, |
|
"logps/rejected": -592.8499755859375, |
|
"loss": 1.6041, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -35.609375, |
|
"rewards/margins": 3.303906202316284, |
|
"rewards/rejected": -38.91875076293945, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 26.291102862006582, |
|
"learning_rate": 1.68064e-05, |
|
"logits/chosen": 0.36564940214157104, |
|
"logits/rejected": 0.4826454222202301, |
|
"logps/chosen": -674.7000122070312, |
|
"logps/rejected": -653.4000244140625, |
|
"loss": 1.5988, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -42.32500076293945, |
|
"rewards/margins": 3.46875, |
|
"rewards/rejected": -45.79999923706055, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1632, |
|
"grad_norm": 39.95073291435577, |
|
"learning_rate": 1.67424e-05, |
|
"logits/chosen": 0.33665770292282104, |
|
"logits/rejected": 0.4792236387729645, |
|
"logps/chosen": -629.9000244140625, |
|
"logps/rejected": -621.3499755859375, |
|
"loss": 1.3085, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -39.09687423706055, |
|
"rewards/margins": 3.880078077316284, |
|
"rewards/rejected": -42.95000076293945, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1664, |
|
"grad_norm": 42.0639206744406, |
|
"learning_rate": 1.6678400000000003e-05, |
|
"logits/chosen": 0.44868165254592896, |
|
"logits/rejected": 0.5293945074081421, |
|
"logps/chosen": -617.1500244140625, |
|
"logps/rejected": -606.7000122070312, |
|
"loss": 1.849, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -37.19062423706055, |
|
"rewards/margins": 3.7105469703674316, |
|
"rewards/rejected": -40.875, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1696, |
|
"grad_norm": 46.5708491699604, |
|
"learning_rate": 1.66144e-05, |
|
"logits/chosen": 0.6026366949081421, |
|
"logits/rejected": 0.741015613079071, |
|
"logps/chosen": -633.7999877929688, |
|
"logps/rejected": -623.75, |
|
"loss": 1.8425, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -39.5625, |
|
"rewards/margins": 2.8218750953674316, |
|
"rewards/rejected": -42.35625076293945, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1728, |
|
"grad_norm": 44.08411871984865, |
|
"learning_rate": 1.6550400000000003e-05, |
|
"logits/chosen": 0.532763659954071, |
|
"logits/rejected": 0.61279296875, |
|
"logps/chosen": -601.9500122070312, |
|
"logps/rejected": -597.0499877929688, |
|
"loss": 1.6733, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -34.59375, |
|
"rewards/margins": 3.856640577316284, |
|
"rewards/rejected": -38.44062423706055, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.176, |
|
"grad_norm": 51.086462511837894, |
|
"learning_rate": 1.6486400000000002e-05, |
|
"logits/chosen": 0.5535888671875, |
|
"logits/rejected": 0.720458984375, |
|
"logps/chosen": -616.75, |
|
"logps/rejected": -600.2999877929688, |
|
"loss": 1.8463, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -38.243751525878906, |
|
"rewards/margins": 3.561718702316284, |
|
"rewards/rejected": -41.79999923706055, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1792, |
|
"grad_norm": 98.59034152091577, |
|
"learning_rate": 1.64224e-05, |
|
"logits/chosen": 0.24627113342285156, |
|
"logits/rejected": 0.3730407655239105, |
|
"logps/chosen": -631.0999755859375, |
|
"logps/rejected": -630.5, |
|
"loss": 1.427, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -39.525001525878906, |
|
"rewards/margins": 4.168749809265137, |
|
"rewards/rejected": -43.70624923706055, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1824, |
|
"grad_norm": 51.10415695723496, |
|
"learning_rate": 1.6358400000000002e-05, |
|
"logits/chosen": 0.026998138055205345, |
|
"logits/rejected": 0.14695052802562714, |
|
"logps/chosen": -660.7000122070312, |
|
"logps/rejected": -647.7999877929688, |
|
"loss": 1.8255, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -41.57500076293945, |
|
"rewards/margins": 3.4976563453674316, |
|
"rewards/rejected": -45.10625076293945, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1856, |
|
"grad_norm": 47.93213643256416, |
|
"learning_rate": 1.62944e-05, |
|
"logits/chosen": 0.11405792087316513, |
|
"logits/rejected": 0.22451934218406677, |
|
"logps/chosen": -651.5999755859375, |
|
"logps/rejected": -628.0, |
|
"loss": 2.0114, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -39.881248474121094, |
|
"rewards/margins": 3.618359327316284, |
|
"rewards/rejected": -43.493751525878906, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1888, |
|
"grad_norm": 83.41007510418964, |
|
"learning_rate": 1.6230400000000003e-05, |
|
"logits/chosen": 0.19649505615234375, |
|
"logits/rejected": 0.3397933840751648, |
|
"logps/chosen": -627.3499755859375, |
|
"logps/rejected": -606.0499877929688, |
|
"loss": 1.463, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -37.96875, |
|
"rewards/margins": 2.376953125, |
|
"rewards/rejected": -40.36249923706055, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 28.81403201309904, |
|
"learning_rate": 1.61664e-05, |
|
"logits/chosen": 0.1689552366733551, |
|
"logits/rejected": 0.2696182131767273, |
|
"logps/chosen": -658.0, |
|
"logps/rejected": -635.9500122070312, |
|
"loss": 1.6455, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -41.20624923706055, |
|
"rewards/margins": 3.1421875953674316, |
|
"rewards/rejected": -44.318748474121094, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1952, |
|
"grad_norm": 61.34725082837701, |
|
"learning_rate": 1.61024e-05, |
|
"logits/chosen": 0.09075927734375, |
|
"logits/rejected": 0.26406097412109375, |
|
"logps/chosen": -682.7000122070312, |
|
"logps/rejected": -639.0, |
|
"loss": 1.7396, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -41.756248474121094, |
|
"rewards/margins": 3.12890625, |
|
"rewards/rejected": -44.875, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1984, |
|
"grad_norm": 42.24429252671063, |
|
"learning_rate": 1.6038400000000002e-05, |
|
"logits/chosen": 0.2577148377895355, |
|
"logits/rejected": 0.35584717988967896, |
|
"logps/chosen": -616.1500244140625, |
|
"logps/rejected": -575.7000122070312, |
|
"loss": 1.3043, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -36.234375, |
|
"rewards/margins": 3.4593749046325684, |
|
"rewards/rejected": -39.67499923706055, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2016, |
|
"grad_norm": 32.5713838251784, |
|
"learning_rate": 1.59744e-05, |
|
"logits/chosen": 0.294912725687027, |
|
"logits/rejected": 0.4835205078125, |
|
"logps/chosen": -614.9000244140625, |
|
"logps/rejected": -621.5499877929688, |
|
"loss": 1.4359, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -39.1875, |
|
"rewards/margins": 4.471093654632568, |
|
"rewards/rejected": -43.63750076293945, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2048, |
|
"grad_norm": 42.595929409719815, |
|
"learning_rate": 1.5910400000000003e-05, |
|
"logits/chosen": 0.18522796034812927, |
|
"logits/rejected": 0.33216553926467896, |
|
"logps/chosen": -659.5999755859375, |
|
"logps/rejected": -664.9000244140625, |
|
"loss": 1.4863, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -41.0, |
|
"rewards/margins": 4.653906345367432, |
|
"rewards/rejected": -45.650001525878906, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.208, |
|
"grad_norm": 31.20306145083759, |
|
"learning_rate": 1.58464e-05, |
|
"logits/chosen": 0.2649597227573395, |
|
"logits/rejected": 0.4009460508823395, |
|
"logps/chosen": -678.4000244140625, |
|
"logps/rejected": -680.5999755859375, |
|
"loss": 1.227, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -43.875, |
|
"rewards/margins": 5.211718559265137, |
|
"rewards/rejected": -49.10625076293945, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2112, |
|
"grad_norm": 38.78603097715352, |
|
"learning_rate": 1.5782400000000003e-05, |
|
"logits/chosen": 0.11473388969898224, |
|
"logits/rejected": 0.24262085556983948, |
|
"logps/chosen": -662.9500122070312, |
|
"logps/rejected": -672.4000244140625, |
|
"loss": 1.5832, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -41.118751525878906, |
|
"rewards/margins": 4.8125, |
|
"rewards/rejected": -45.9375, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2144, |
|
"grad_norm": 57.46675514687721, |
|
"learning_rate": 1.5718400000000002e-05, |
|
"logits/chosen": 0.120941162109375, |
|
"logits/rejected": 0.29290771484375, |
|
"logps/chosen": -641.0999755859375, |
|
"logps/rejected": -655.0999755859375, |
|
"loss": 1.9178, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -40.63750076293945, |
|
"rewards/margins": 3.264453172683716, |
|
"rewards/rejected": -43.92499923706055, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2176, |
|
"grad_norm": 43.95416076450901, |
|
"learning_rate": 1.56544e-05, |
|
"logits/chosen": 0.10860595852136612, |
|
"logits/rejected": 0.26765745878219604, |
|
"logps/chosen": -647.0999755859375, |
|
"logps/rejected": -625.7000122070312, |
|
"loss": 1.9465, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -40.79999923706055, |
|
"rewards/margins": 3.3539061546325684, |
|
"rewards/rejected": -44.131248474121094, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2208, |
|
"grad_norm": 40.455817117237345, |
|
"learning_rate": 1.5590400000000002e-05, |
|
"logits/chosen": 0.13580170273780823, |
|
"logits/rejected": 0.33785706758499146, |
|
"logps/chosen": -683.7999877929688, |
|
"logps/rejected": -664.2000122070312, |
|
"loss": 1.6045, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -42.881248474121094, |
|
"rewards/margins": 4.641797065734863, |
|
"rewards/rejected": -47.525001525878906, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 43.25947452615599, |
|
"learning_rate": 1.55264e-05, |
|
"logits/chosen": 0.06497345119714737, |
|
"logits/rejected": 0.23727111518383026, |
|
"logps/chosen": -646.2999877929688, |
|
"logps/rejected": -632.7999877929688, |
|
"loss": 1.8122, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -40.912498474121094, |
|
"rewards/margins": 3.6683592796325684, |
|
"rewards/rejected": -44.58124923706055, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2272, |
|
"grad_norm": 37.454723687237205, |
|
"learning_rate": 1.5462400000000003e-05, |
|
"logits/chosen": 0.18839111924171448, |
|
"logits/rejected": 0.360678106546402, |
|
"logps/chosen": -704.4000244140625, |
|
"logps/rejected": -686.7999877929688, |
|
"loss": 1.7326, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -47.712501525878906, |
|
"rewards/margins": 2.428906202316284, |
|
"rewards/rejected": -50.14374923706055, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.2304, |
|
"grad_norm": 45.54945982579894, |
|
"learning_rate": 1.53984e-05, |
|
"logits/chosen": 0.1751358062028885, |
|
"logits/rejected": 0.37080687284469604, |
|
"logps/chosen": -681.7999877929688, |
|
"logps/rejected": -666.2999877929688, |
|
"loss": 1.4359, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -44.95624923706055, |
|
"rewards/margins": 3.3218750953674316, |
|
"rewards/rejected": -48.28125, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.2336, |
|
"grad_norm": 66.48742759140517, |
|
"learning_rate": 1.53344e-05, |
|
"logits/chosen": 0.18069687485694885, |
|
"logits/rejected": 0.3331451416015625, |
|
"logps/chosen": -677.0, |
|
"logps/rejected": -663.7000122070312, |
|
"loss": 2.234, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -42.4375, |
|
"rewards/margins": 3.743359327316284, |
|
"rewards/rejected": -46.17499923706055, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2368, |
|
"grad_norm": 37.522327209701274, |
|
"learning_rate": 1.5270400000000002e-05, |
|
"logits/chosen": 0.10627365112304688, |
|
"logits/rejected": 0.275918573141098, |
|
"logps/chosen": -695.7999877929688, |
|
"logps/rejected": -710.0, |
|
"loss": 1.3593, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -45.818748474121094, |
|
"rewards/margins": 4.501172065734863, |
|
"rewards/rejected": -50.32500076293945, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 38.214598626293, |
|
"learning_rate": 1.52064e-05, |
|
"logits/chosen": -0.07285461574792862, |
|
"logits/rejected": 0.07509155571460724, |
|
"logps/chosen": -668.2999877929688, |
|
"logps/rejected": -642.2999877929688, |
|
"loss": 1.9895, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -41.921875, |
|
"rewards/margins": 3.208203077316284, |
|
"rewards/rejected": -45.131248474121094, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2432, |
|
"grad_norm": 18.45868948841473, |
|
"learning_rate": 1.5142400000000001e-05, |
|
"logits/chosen": -0.21304932236671448, |
|
"logits/rejected": -0.12935790419578552, |
|
"logps/chosen": -611.7999877929688, |
|
"logps/rejected": -620.1500244140625, |
|
"loss": 1.1468, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -37.89374923706055, |
|
"rewards/margins": 4.466406345367432, |
|
"rewards/rejected": -42.38750076293945, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2464, |
|
"grad_norm": 50.50110374912833, |
|
"learning_rate": 1.5078400000000001e-05, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.126597598195076, |
|
"logps/chosen": -665.5499877929688, |
|
"logps/rejected": -741.0999755859375, |
|
"loss": 1.6933, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -40.900001525878906, |
|
"rewards/margins": 4.021874904632568, |
|
"rewards/rejected": -44.92499923706055, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2496, |
|
"grad_norm": 36.323914322167525, |
|
"learning_rate": 1.5014400000000001e-05, |
|
"logits/chosen": -0.2960983216762543, |
|
"logits/rejected": -0.10639800876379013, |
|
"logps/chosen": -677.0, |
|
"logps/rejected": -660.6500244140625, |
|
"loss": 1.3649, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -42.09375, |
|
"rewards/margins": 4.171093940734863, |
|
"rewards/rejected": -46.23749923706055, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2528, |
|
"grad_norm": 40.98554240207498, |
|
"learning_rate": 1.49504e-05, |
|
"logits/chosen": -0.396575927734375, |
|
"logits/rejected": -0.2843994200229645, |
|
"logps/chosen": -634.3499755859375, |
|
"logps/rejected": -630.9000244140625, |
|
"loss": 1.2482, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -39.26874923706055, |
|
"rewards/margins": 4.376172065734863, |
|
"rewards/rejected": -43.662498474121094, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 68.44628531453232, |
|
"learning_rate": 1.48864e-05, |
|
"logits/chosen": -0.35917967557907104, |
|
"logits/rejected": -0.19907227158546448, |
|
"logps/chosen": -623.25, |
|
"logps/rejected": -606.4500122070312, |
|
"loss": 1.6582, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -38.868751525878906, |
|
"rewards/margins": 2.953125, |
|
"rewards/rejected": -41.837501525878906, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2592, |
|
"grad_norm": 29.704417097607617, |
|
"learning_rate": 1.48224e-05, |
|
"logits/chosen": -0.3877929747104645, |
|
"logits/rejected": -0.211151123046875, |
|
"logps/chosen": -661.5999755859375, |
|
"logps/rejected": -656.0999755859375, |
|
"loss": 1.1269, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -42.39374923706055, |
|
"rewards/margins": 4.458984375, |
|
"rewards/rejected": -46.849998474121094, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2624, |
|
"grad_norm": 48.15003119838898, |
|
"learning_rate": 1.4758400000000001e-05, |
|
"logits/chosen": -0.45916748046875, |
|
"logits/rejected": -0.3183044493198395, |
|
"logps/chosen": -699.2000122070312, |
|
"logps/rejected": -697.9000244140625, |
|
"loss": 1.8473, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -45.64374923706055, |
|
"rewards/margins": 3.991406202316284, |
|
"rewards/rejected": -49.63750076293945, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2656, |
|
"grad_norm": 44.613017899690675, |
|
"learning_rate": 1.4694400000000003e-05, |
|
"logits/chosen": -0.3657287657260895, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -659.8499755859375, |
|
"logps/rejected": -612.7000122070312, |
|
"loss": 5.7769, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -43.118751525878906, |
|
"rewards/margins": -1.235742211341858, |
|
"rewards/rejected": -41.900001525878906, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2688, |
|
"grad_norm": 43.75173055134459, |
|
"learning_rate": 1.46304e-05, |
|
"logits/chosen": -0.40928345918655396, |
|
"logits/rejected": -0.3201583921909332, |
|
"logps/chosen": -616.7999877929688, |
|
"logps/rejected": -606.5499877929688, |
|
"loss": 1.6114, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -38.165626525878906, |
|
"rewards/margins": 3.885546922683716, |
|
"rewards/rejected": -42.056251525878906, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.272, |
|
"grad_norm": 18.530340431083207, |
|
"learning_rate": 1.45664e-05, |
|
"logits/chosen": -0.35566407442092896, |
|
"logits/rejected": -0.286752313375473, |
|
"logps/chosen": -672.0999755859375, |
|
"logps/rejected": -664.2999877929688, |
|
"loss": 1.4289, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -44.837501525878906, |
|
"rewards/margins": 3.283203125, |
|
"rewards/rejected": -48.125, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.2752, |
|
"grad_norm": 40.76302621743691, |
|
"learning_rate": 1.45024e-05, |
|
"logits/chosen": -0.364227294921875, |
|
"logits/rejected": -0.25963133573532104, |
|
"logps/chosen": -687.2000122070312, |
|
"logps/rejected": -690.2000122070312, |
|
"loss": 1.6557, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -45.20624923706055, |
|
"rewards/margins": 4.821875095367432, |
|
"rewards/rejected": -50.025001525878906, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2784, |
|
"grad_norm": 40.8932489788095, |
|
"learning_rate": 1.44384e-05, |
|
"logits/chosen": -0.4478515684604645, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -651.0999755859375, |
|
"logps/rejected": -633.0499877929688, |
|
"loss": 1.5459, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -40.78125, |
|
"rewards/margins": 4.033593654632568, |
|
"rewards/rejected": -44.82500076293945, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2816, |
|
"grad_norm": 49.01984548176554, |
|
"learning_rate": 1.4374400000000003e-05, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.3775878846645355, |
|
"logps/chosen": -659.9500122070312, |
|
"logps/rejected": -774.4000244140625, |
|
"loss": 1.5128, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -41.37812423706055, |
|
"rewards/margins": 6.240234375, |
|
"rewards/rejected": -47.618751525878906, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2848, |
|
"grad_norm": 29.90375315285076, |
|
"learning_rate": 1.4310400000000003e-05, |
|
"logits/chosen": -0.4033203125, |
|
"logits/rejected": -0.25498658418655396, |
|
"logps/chosen": -656.0999755859375, |
|
"logps/rejected": -643.2000122070312, |
|
"loss": 1.2458, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -42.64374923706055, |
|
"rewards/margins": 3.559765577316284, |
|
"rewards/rejected": -46.181251525878906, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 123.75390388742322, |
|
"learning_rate": 1.42464e-05, |
|
"logits/chosen": -0.3350830078125, |
|
"logits/rejected": -0.18463440239429474, |
|
"logps/chosen": -632.7999877929688, |
|
"logps/rejected": -634.0999755859375, |
|
"loss": 1.4703, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -38.181251525878906, |
|
"rewards/margins": 6.01953125, |
|
"rewards/rejected": -44.20000076293945, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2912, |
|
"grad_norm": 35.26785703173216, |
|
"learning_rate": 1.41824e-05, |
|
"logits/chosen": 0.01738281175494194, |
|
"logits/rejected": 0.12006988376379013, |
|
"logps/chosen": -690.0, |
|
"logps/rejected": -685.5999755859375, |
|
"loss": 1.1489, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -45.01874923706055, |
|
"rewards/margins": 5.295507907867432, |
|
"rewards/rejected": -50.32500076293945, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2944, |
|
"grad_norm": 31.809750540628983, |
|
"learning_rate": 1.41184e-05, |
|
"logits/chosen": -0.13148804008960724, |
|
"logits/rejected": 0.0057846070267260075, |
|
"logps/chosen": -693.7999877929688, |
|
"logps/rejected": -683.7999877929688, |
|
"loss": 1.2265, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -45.66875076293945, |
|
"rewards/margins": 4.670312404632568, |
|
"rewards/rejected": -50.3125, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2976, |
|
"grad_norm": 32.30726067405822, |
|
"learning_rate": 1.4054400000000002e-05, |
|
"logits/chosen": -0.40455323457717896, |
|
"logits/rejected": -0.16559448838233948, |
|
"logps/chosen": -719.4000244140625, |
|
"logps/rejected": -727.0, |
|
"loss": 1.1941, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -46.73749923706055, |
|
"rewards/margins": 6.235937595367432, |
|
"rewards/rejected": -52.95624923706055, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3008, |
|
"grad_norm": 24.646705893567123, |
|
"learning_rate": 1.3990400000000002e-05, |
|
"logits/chosen": -0.57275390625, |
|
"logits/rejected": -0.449179083108902, |
|
"logps/chosen": -616.8499755859375, |
|
"logps/rejected": -632.7000122070312, |
|
"loss": 1.3855, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -37.131248474121094, |
|
"rewards/margins": 4.955859184265137, |
|
"rewards/rejected": -42.099998474121094, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.304, |
|
"grad_norm": 49.29450819010971, |
|
"learning_rate": 1.3926400000000003e-05, |
|
"logits/chosen": -0.4102416932582855, |
|
"logits/rejected": -0.3392578065395355, |
|
"logps/chosen": -576.9500122070312, |
|
"logps/rejected": -565.4500122070312, |
|
"loss": 1.6633, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -35.00312423706055, |
|
"rewards/margins": 3.369140625, |
|
"rewards/rejected": -38.39374923706055, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3072, |
|
"grad_norm": 49.780482808788896, |
|
"learning_rate": 1.38624e-05, |
|
"logits/chosen": -0.36146241426467896, |
|
"logits/rejected": -0.23393554985523224, |
|
"logps/chosen": -617.0999755859375, |
|
"logps/rejected": -634.5, |
|
"loss": 1.4172, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -36.743751525878906, |
|
"rewards/margins": 7.173047065734863, |
|
"rewards/rejected": -43.931251525878906, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3104, |
|
"grad_norm": 17.189839906819454, |
|
"learning_rate": 1.37984e-05, |
|
"logits/chosen": -0.32120054960250854, |
|
"logits/rejected": -0.22429199516773224, |
|
"logps/chosen": -681.4000244140625, |
|
"logps/rejected": -693.0, |
|
"loss": 1.3687, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -42.79999923706055, |
|
"rewards/margins": 5.156640529632568, |
|
"rewards/rejected": -47.96875, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3136, |
|
"grad_norm": 12.304934608565206, |
|
"learning_rate": 1.3734400000000002e-05, |
|
"logits/chosen": -0.3793701231479645, |
|
"logits/rejected": -0.2492828369140625, |
|
"logps/chosen": -693.0, |
|
"logps/rejected": -685.7999877929688, |
|
"loss": 1.319, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -44.01874923706055, |
|
"rewards/margins": 5.4609375, |
|
"rewards/rejected": -49.48749923706055, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.3168, |
|
"grad_norm": 68.73462530803205, |
|
"learning_rate": 1.3670400000000002e-05, |
|
"logits/chosen": -0.39849853515625, |
|
"logits/rejected": -0.24716797471046448, |
|
"logps/chosen": -638.25, |
|
"logps/rejected": -639.8499755859375, |
|
"loss": 1.6008, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -40.45624923706055, |
|
"rewards/margins": 3.61328125, |
|
"rewards/rejected": -44.04375076293945, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 38.49405717094058, |
|
"learning_rate": 1.3606400000000002e-05, |
|
"logits/chosen": -0.3262786865234375, |
|
"logits/rejected": -0.2204742431640625, |
|
"logps/chosen": -671.7999877929688, |
|
"logps/rejected": -656.5999755859375, |
|
"loss": 1.5795, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -41.775001525878906, |
|
"rewards/margins": 4.087109565734863, |
|
"rewards/rejected": -45.849998474121094, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3232, |
|
"grad_norm": 31.47359965899451, |
|
"learning_rate": 1.3542400000000003e-05, |
|
"logits/chosen": -0.33366698026657104, |
|
"logits/rejected": -0.143086239695549, |
|
"logps/chosen": -687.0999755859375, |
|
"logps/rejected": -677.2999877929688, |
|
"loss": 1.5533, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -43.900001525878906, |
|
"rewards/margins": 4.942968845367432, |
|
"rewards/rejected": -48.849998474121094, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.3264, |
|
"grad_norm": 25.202888613549842, |
|
"learning_rate": 1.34784e-05, |
|
"logits/chosen": -0.1284584105014801, |
|
"logits/rejected": 0.03115081787109375, |
|
"logps/chosen": -716.2000122070312, |
|
"logps/rejected": -697.5999755859375, |
|
"loss": 1.4172, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -48.306251525878906, |
|
"rewards/margins": 3.723828077316284, |
|
"rewards/rejected": -52.025001525878906, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.3296, |
|
"grad_norm": 27.21888910223227, |
|
"learning_rate": 1.3414400000000002e-05, |
|
"logits/chosen": -0.410501092672348, |
|
"logits/rejected": -0.23717650771141052, |
|
"logps/chosen": -670.2000122070312, |
|
"logps/rejected": -680.7000122070312, |
|
"loss": 1.2433, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -43.03125, |
|
"rewards/margins": 5.464453220367432, |
|
"rewards/rejected": -48.474998474121094, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3328, |
|
"grad_norm": 47.60066919480353, |
|
"learning_rate": 1.3350400000000002e-05, |
|
"logits/chosen": -0.43865966796875, |
|
"logits/rejected": -0.2837890684604645, |
|
"logps/chosen": -679.4000244140625, |
|
"logps/rejected": -667.0999755859375, |
|
"loss": 1.3288, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -43.03125, |
|
"rewards/margins": 4.525000095367432, |
|
"rewards/rejected": -47.568748474121094, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 30.202395773250057, |
|
"learning_rate": 1.3286400000000002e-05, |
|
"logits/chosen": -0.4092346131801605, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -720.9000244140625, |
|
"logps/rejected": -697.2999877929688, |
|
"loss": 1.9026, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -49.01250076293945, |
|
"rewards/margins": 2.942578077316284, |
|
"rewards/rejected": -51.962501525878906, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3392, |
|
"grad_norm": 42.034420148084266, |
|
"learning_rate": 1.3222400000000002e-05, |
|
"logits/chosen": -0.6834716796875, |
|
"logits/rejected": -0.5794311761856079, |
|
"logps/chosen": -678.4000244140625, |
|
"logps/rejected": -675.5999755859375, |
|
"loss": 1.43, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -42.556251525878906, |
|
"rewards/margins": 5.164453029632568, |
|
"rewards/rejected": -47.724998474121094, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3424, |
|
"grad_norm": 49.03860430443391, |
|
"learning_rate": 1.31584e-05, |
|
"logits/chosen": -0.6819823980331421, |
|
"logits/rejected": -0.547760009765625, |
|
"logps/chosen": -664.2999877929688, |
|
"logps/rejected": -675.0999755859375, |
|
"loss": 1.364, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -43.59375, |
|
"rewards/margins": 4.734375, |
|
"rewards/rejected": -48.34375, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3456, |
|
"grad_norm": 31.483761229112318, |
|
"learning_rate": 1.3094400000000001e-05, |
|
"logits/chosen": -0.5381530523300171, |
|
"logits/rejected": -0.42399901151657104, |
|
"logps/chosen": -707.7000122070312, |
|
"logps/rejected": -691.7999877929688, |
|
"loss": 1.6441, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -47.412498474121094, |
|
"rewards/margins": 2.9984374046325684, |
|
"rewards/rejected": -50.41875076293945, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3488, |
|
"grad_norm": 34.48968739279645, |
|
"learning_rate": 1.3030400000000001e-05, |
|
"logits/chosen": -0.594866931438446, |
|
"logits/rejected": -0.39453125, |
|
"logps/chosen": -680.7000122070312, |
|
"logps/rejected": -663.2000122070312, |
|
"loss": 1.6324, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -42.625, |
|
"rewards/margins": 5.530468940734863, |
|
"rewards/rejected": -48.125, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 43.9423065836892, |
|
"learning_rate": 1.2966400000000002e-05, |
|
"logits/chosen": -0.68505859375, |
|
"logits/rejected": -0.531848132610321, |
|
"logps/chosen": -629.75, |
|
"logps/rejected": -628.9000244140625, |
|
"loss": 1.316, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -38.95000076293945, |
|
"rewards/margins": 4.518359184265137, |
|
"rewards/rejected": -43.46875, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3552, |
|
"grad_norm": 37.14781795255988, |
|
"learning_rate": 1.2902400000000002e-05, |
|
"logits/chosen": -0.70550537109375, |
|
"logits/rejected": -0.4788818359375, |
|
"logps/chosen": -666.0, |
|
"logps/rejected": -652.2000122070312, |
|
"loss": 1.3027, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -40.54375076293945, |
|
"rewards/margins": 4.414453029632568, |
|
"rewards/rejected": -44.95624923706055, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3584, |
|
"grad_norm": 36.64866248404492, |
|
"learning_rate": 1.2838400000000002e-05, |
|
"logits/chosen": -0.65966796875, |
|
"logits/rejected": -0.5438476800918579, |
|
"logps/chosen": -680.5999755859375, |
|
"logps/rejected": -677.5999755859375, |
|
"loss": 1.5804, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -45.4375, |
|
"rewards/margins": 4.028124809265137, |
|
"rewards/rejected": -49.45624923706055, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3616, |
|
"grad_norm": 24.444790043115425, |
|
"learning_rate": 1.2774400000000001e-05, |
|
"logits/chosen": -0.607128918170929, |
|
"logits/rejected": -0.4249511659145355, |
|
"logps/chosen": -675.2999877929688, |
|
"logps/rejected": -684.2000122070312, |
|
"loss": 1.198, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -45.17499923706055, |
|
"rewards/margins": 4.401171684265137, |
|
"rewards/rejected": -49.57500076293945, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3648, |
|
"grad_norm": 40.983608565411764, |
|
"learning_rate": 1.2710400000000001e-05, |
|
"logits/chosen": -0.59814453125, |
|
"logits/rejected": -0.4374023377895355, |
|
"logps/chosen": -652.0999755859375, |
|
"logps/rejected": -670.5, |
|
"loss": 2.0744, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -41.962501525878906, |
|
"rewards/margins": 6.257031440734863, |
|
"rewards/rejected": -48.243751525878906, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.368, |
|
"grad_norm": 39.26306466929758, |
|
"learning_rate": 1.2646400000000001e-05, |
|
"logits/chosen": -0.580737292766571, |
|
"logits/rejected": -0.4674316346645355, |
|
"logps/chosen": -674.4000244140625, |
|
"logps/rejected": -734.4000244140625, |
|
"loss": 1.113, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -42.375, |
|
"rewards/margins": 9.228124618530273, |
|
"rewards/rejected": -51.618751525878906, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3712, |
|
"grad_norm": 31.72807758004206, |
|
"learning_rate": 1.2582400000000002e-05, |
|
"logits/chosen": -0.6524902582168579, |
|
"logits/rejected": -0.521557629108429, |
|
"logps/chosen": -709.2999877929688, |
|
"logps/rejected": -695.0, |
|
"loss": 2.8163, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -45.70000076293945, |
|
"rewards/margins": 3.572265625, |
|
"rewards/rejected": -49.243751525878906, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.3744, |
|
"grad_norm": 26.864773010834657, |
|
"learning_rate": 1.2518400000000002e-05, |
|
"logits/chosen": -0.5064331293106079, |
|
"logits/rejected": -0.31370848417282104, |
|
"logps/chosen": -675.7000122070312, |
|
"logps/rejected": -677.9000244140625, |
|
"loss": 1.3189, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -44.45000076293945, |
|
"rewards/margins": 3.821484327316284, |
|
"rewards/rejected": -48.26874923706055, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.3776, |
|
"grad_norm": 18.536367675480182, |
|
"learning_rate": 1.2454400000000002e-05, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.3017578125, |
|
"logps/chosen": -693.0999755859375, |
|
"logps/rejected": -689.5, |
|
"loss": 1.1542, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -44.53125, |
|
"rewards/margins": 4.386328220367432, |
|
"rewards/rejected": -48.91875076293945, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3808, |
|
"grad_norm": 83.85775887295496, |
|
"learning_rate": 1.23904e-05, |
|
"logits/chosen": -0.5306640863418579, |
|
"logits/rejected": -0.3149658143520355, |
|
"logps/chosen": -723.5999755859375, |
|
"logps/rejected": -728.9000244140625, |
|
"loss": 1.6014, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -47.837501525878906, |
|
"rewards/margins": 4.866015434265137, |
|
"rewards/rejected": -52.6875, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 48.56346315080496, |
|
"learning_rate": 1.2326400000000001e-05, |
|
"logits/chosen": -0.591595470905304, |
|
"logits/rejected": -0.4169677793979645, |
|
"logps/chosen": -722.0999755859375, |
|
"logps/rejected": -720.4000244140625, |
|
"loss": 1.6855, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -47.493751525878906, |
|
"rewards/margins": 4.980078220367432, |
|
"rewards/rejected": -52.474998474121094, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3872, |
|
"grad_norm": 27.08253908214466, |
|
"learning_rate": 1.2262400000000001e-05, |
|
"logits/chosen": -0.5696045160293579, |
|
"logits/rejected": -0.35125428438186646, |
|
"logps/chosen": -696.5, |
|
"logps/rejected": -704.0, |
|
"loss": 1.266, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -46.98749923706055, |
|
"rewards/margins": 6.761328220367432, |
|
"rewards/rejected": -53.743751525878906, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3904, |
|
"grad_norm": 45.58573367769971, |
|
"learning_rate": 1.2198400000000002e-05, |
|
"logits/chosen": -0.507214367389679, |
|
"logits/rejected": -0.370339959859848, |
|
"logps/chosen": -718.9000244140625, |
|
"logps/rejected": -711.5999755859375, |
|
"loss": 1.8102, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -48.275001525878906, |
|
"rewards/margins": 3.2464842796325684, |
|
"rewards/rejected": -51.53125, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3936, |
|
"grad_norm": 30.442508287653766, |
|
"learning_rate": 1.2134400000000002e-05, |
|
"logits/chosen": -0.6756652593612671, |
|
"logits/rejected": -0.4883056581020355, |
|
"logps/chosen": -722.7999877929688, |
|
"logps/rejected": -696.5, |
|
"loss": 1.3741, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -46.625, |
|
"rewards/margins": 4.519921779632568, |
|
"rewards/rejected": -51.14374923706055, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3968, |
|
"grad_norm": 39.62281764482628, |
|
"learning_rate": 1.20704e-05, |
|
"logits/chosen": -0.647021472454071, |
|
"logits/rejected": -0.352630615234375, |
|
"logps/chosen": -695.2000122070312, |
|
"logps/rejected": -701.9000244140625, |
|
"loss": 1.3634, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -45.76250076293945, |
|
"rewards/margins": 5.391797065734863, |
|
"rewards/rejected": -51.1875, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 56.94249115539426, |
|
"learning_rate": 1.20064e-05, |
|
"logits/chosen": -0.626617431640625, |
|
"logits/rejected": -0.523999035358429, |
|
"logps/chosen": -726.2000122070312, |
|
"logps/rejected": -730.0999755859375, |
|
"loss": 0.9063, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -49.056251525878906, |
|
"rewards/margins": 6.05859375, |
|
"rewards/rejected": -55.13750076293945, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4032, |
|
"grad_norm": 25.301760469749322, |
|
"learning_rate": 1.1942400000000001e-05, |
|
"logits/chosen": -0.647631824016571, |
|
"logits/rejected": -0.5055999755859375, |
|
"logps/chosen": -728.5999755859375, |
|
"logps/rejected": -726.0999755859375, |
|
"loss": 1.4879, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -47.849998474121094, |
|
"rewards/margins": 4.621874809265137, |
|
"rewards/rejected": -52.45000076293945, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.4064, |
|
"grad_norm": 45.78075012847253, |
|
"learning_rate": 1.1878400000000001e-05, |
|
"logits/chosen": -0.675854504108429, |
|
"logits/rejected": -0.602569580078125, |
|
"logps/chosen": -671.5, |
|
"logps/rejected": -683.2999877929688, |
|
"loss": 1.4389, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -42.131248474121094, |
|
"rewards/margins": 4.62890625, |
|
"rewards/rejected": -46.756248474121094, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.4096, |
|
"grad_norm": 49.430335961402825, |
|
"learning_rate": 1.1814400000000002e-05, |
|
"logits/chosen": -0.633227527141571, |
|
"logits/rejected": -0.503387451171875, |
|
"logps/chosen": -601.0499877929688, |
|
"logps/rejected": -609.9000244140625, |
|
"loss": 1.5427, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -37.38750076293945, |
|
"rewards/margins": 3.7523436546325684, |
|
"rewards/rejected": -41.131248474121094, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.4128, |
|
"grad_norm": 19.900739223194954, |
|
"learning_rate": 1.1750400000000002e-05, |
|
"logits/chosen": -0.8213866949081421, |
|
"logits/rejected": -0.7073730230331421, |
|
"logps/chosen": -611.2999877929688, |
|
"logps/rejected": -596.75, |
|
"loss": 1.0477, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -36.02812576293945, |
|
"rewards/margins": 4.68359375, |
|
"rewards/rejected": -40.70624923706055, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 38.51755593166137, |
|
"learning_rate": 1.16864e-05, |
|
"logits/chosen": -0.8940185308456421, |
|
"logits/rejected": -0.821337878704071, |
|
"logps/chosen": -654.5999755859375, |
|
"logps/rejected": -653.7999877929688, |
|
"loss": 1.4967, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -41.04999923706055, |
|
"rewards/margins": 4.975390434265137, |
|
"rewards/rejected": -46.01250076293945, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4192, |
|
"grad_norm": 32.14775699366813, |
|
"learning_rate": 1.16224e-05, |
|
"logits/chosen": -1.0138671398162842, |
|
"logits/rejected": -0.8365234136581421, |
|
"logps/chosen": -665.7999877929688, |
|
"logps/rejected": -644.2000122070312, |
|
"loss": 1.2318, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -42.25, |
|
"rewards/margins": 4.791406154632568, |
|
"rewards/rejected": -47.04375076293945, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.4224, |
|
"grad_norm": 33.48748552900965, |
|
"learning_rate": 1.1558400000000001e-05, |
|
"logits/chosen": -0.98095703125, |
|
"logits/rejected": -0.8668457269668579, |
|
"logps/chosen": -671.0, |
|
"logps/rejected": -686.5999755859375, |
|
"loss": 2.4339, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -44.556251525878906, |
|
"rewards/margins": 4.135546684265137, |
|
"rewards/rejected": -48.6875, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.4256, |
|
"grad_norm": 35.72606394744342, |
|
"learning_rate": 1.1494400000000001e-05, |
|
"logits/chosen": -0.97509765625, |
|
"logits/rejected": -0.821728527545929, |
|
"logps/chosen": -744.7000122070312, |
|
"logps/rejected": -736.7999877929688, |
|
"loss": 1.3735, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -48.375, |
|
"rewards/margins": 4.604687690734863, |
|
"rewards/rejected": -52.96875, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.4288, |
|
"grad_norm": 22.738204624159227, |
|
"learning_rate": 1.1430400000000002e-05, |
|
"logits/chosen": -0.850903332233429, |
|
"logits/rejected": -0.683728039264679, |
|
"logps/chosen": -676.2999877929688, |
|
"logps/rejected": -696.7999877929688, |
|
"loss": 1.0104, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -45.70000076293945, |
|
"rewards/margins": 5.3828125, |
|
"rewards/rejected": -51.11249923706055, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 35.62747483551517, |
|
"learning_rate": 1.1366400000000002e-05, |
|
"logits/chosen": -0.902783215045929, |
|
"logits/rejected": -0.77490234375, |
|
"logps/chosen": -697.2000122070312, |
|
"logps/rejected": -686.5, |
|
"loss": 1.1435, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -45.962501525878906, |
|
"rewards/margins": 4.558203220367432, |
|
"rewards/rejected": -50.53125, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4352, |
|
"grad_norm": 45.562269740805384, |
|
"learning_rate": 1.13024e-05, |
|
"logits/chosen": -0.945019543170929, |
|
"logits/rejected": -0.7890990972518921, |
|
"logps/chosen": -708.9000244140625, |
|
"logps/rejected": -706.2999877929688, |
|
"loss": 1.326, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -45.25, |
|
"rewards/margins": 4.703906059265137, |
|
"rewards/rejected": -49.9375, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.4384, |
|
"grad_norm": 19.992416969929188, |
|
"learning_rate": 1.12384e-05, |
|
"logits/chosen": -0.98681640625, |
|
"logits/rejected": -0.740966796875, |
|
"logps/chosen": -705.4000244140625, |
|
"logps/rejected": -697.0, |
|
"loss": 1.3905, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -46.256248474121094, |
|
"rewards/margins": 4.323828220367432, |
|
"rewards/rejected": -50.5625, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.4416, |
|
"grad_norm": 44.01446576997527, |
|
"learning_rate": 1.1174400000000001e-05, |
|
"logits/chosen": -0.940478503704071, |
|
"logits/rejected": -0.746734619140625, |
|
"logps/chosen": -696.0, |
|
"logps/rejected": -676.2999877929688, |
|
"loss": 1.1882, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -44.537498474121094, |
|
"rewards/margins": 4.753515720367432, |
|
"rewards/rejected": -49.26874923706055, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4448, |
|
"grad_norm": 19.85792464714508, |
|
"learning_rate": 1.1110400000000001e-05, |
|
"logits/chosen": -0.865795910358429, |
|
"logits/rejected": -0.762744128704071, |
|
"logps/chosen": -704.2999877929688, |
|
"logps/rejected": -711.7000122070312, |
|
"loss": 1.3501, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -46.712501525878906, |
|
"rewards/margins": 4.548437595367432, |
|
"rewards/rejected": -51.23749923706055, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 32.97163771144295, |
|
"learning_rate": 1.1046400000000002e-05, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.7448364496231079, |
|
"logps/chosen": -717.6500244140625, |
|
"logps/rejected": -691.2000122070312, |
|
"loss": 1.529, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -47.087501525878906, |
|
"rewards/margins": 3.608593702316284, |
|
"rewards/rejected": -50.693748474121094, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4512, |
|
"grad_norm": 56.63714193074343, |
|
"learning_rate": 1.0982400000000002e-05, |
|
"logits/chosen": -0.8114258050918579, |
|
"logits/rejected": -0.6397033929824829, |
|
"logps/chosen": -708.4000244140625, |
|
"logps/rejected": -711.2000122070312, |
|
"loss": 1.6799, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -46.568748474121094, |
|
"rewards/margins": 3.831249952316284, |
|
"rewards/rejected": -50.39374923706055, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4544, |
|
"grad_norm": 37.82419338196218, |
|
"learning_rate": 1.09184e-05, |
|
"logits/chosen": -0.803271472454071, |
|
"logits/rejected": -0.6448974609375, |
|
"logps/chosen": -694.0999755859375, |
|
"logps/rejected": -694.5, |
|
"loss": 1.6071, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -46.03125, |
|
"rewards/margins": 4.0, |
|
"rewards/rejected": -50.01874923706055, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4576, |
|
"grad_norm": 35.2165866750218, |
|
"learning_rate": 1.08544e-05, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.5844482183456421, |
|
"logps/chosen": -739.9000244140625, |
|
"logps/rejected": -716.2000122070312, |
|
"loss": 1.6826, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -48.91875076293945, |
|
"rewards/margins": 2.9632811546325684, |
|
"rewards/rejected": -51.881248474121094, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.4608, |
|
"grad_norm": 25.10545424336984, |
|
"learning_rate": 1.0790400000000001e-05, |
|
"logits/chosen": -0.7391601800918579, |
|
"logits/rejected": -0.6878417730331421, |
|
"logps/chosen": -710.5, |
|
"logps/rejected": -716.2999877929688, |
|
"loss": 1.1861, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -47.818748474121094, |
|
"rewards/margins": 4.393359184265137, |
|
"rewards/rejected": -52.224998474121094, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.464, |
|
"grad_norm": 37.403530945832465, |
|
"learning_rate": 1.0726400000000001e-05, |
|
"logits/chosen": -0.74072265625, |
|
"logits/rejected": -0.613391101360321, |
|
"logps/chosen": -725.9000244140625, |
|
"logps/rejected": -707.5999755859375, |
|
"loss": 2.6482, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -49.087501525878906, |
|
"rewards/margins": 2.563671827316284, |
|
"rewards/rejected": -51.650001525878906, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.4672, |
|
"grad_norm": 31.894584615960177, |
|
"learning_rate": 1.0662400000000001e-05, |
|
"logits/chosen": -0.694140613079071, |
|
"logits/rejected": -0.55181884765625, |
|
"logps/chosen": -699.5999755859375, |
|
"logps/rejected": -700.9000244140625, |
|
"loss": 1.2498, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -44.775001525878906, |
|
"rewards/margins": 4.860937595367432, |
|
"rewards/rejected": -49.650001525878906, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.4704, |
|
"grad_norm": 31.77468983079844, |
|
"learning_rate": 1.05984e-05, |
|
"logits/chosen": -0.676074206829071, |
|
"logits/rejected": -0.46323853731155396, |
|
"logps/chosen": -677.2999877929688, |
|
"logps/rejected": -690.7999877929688, |
|
"loss": 1.0611, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -43.16875076293945, |
|
"rewards/margins": 7.153906345367432, |
|
"rewards/rejected": -50.34375, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.4736, |
|
"grad_norm": 701.8720995315323, |
|
"learning_rate": 1.05344e-05, |
|
"logits/chosen": -0.679760754108429, |
|
"logits/rejected": -0.5099121332168579, |
|
"logps/chosen": -725.0, |
|
"logps/rejected": -687.5999755859375, |
|
"loss": 2.1299, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -46.631248474121094, |
|
"rewards/margins": 4.241796970367432, |
|
"rewards/rejected": -50.868751525878906, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4768, |
|
"grad_norm": 21.110634577150943, |
|
"learning_rate": 1.04704e-05, |
|
"logits/chosen": -0.6755126714706421, |
|
"logits/rejected": -0.540844738483429, |
|
"logps/chosen": -699.5, |
|
"logps/rejected": -710.2000122070312, |
|
"loss": 1.2919, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -46.58124923706055, |
|
"rewards/margins": 4.320703029632568, |
|
"rewards/rejected": -50.90625, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 33.68928045290949, |
|
"learning_rate": 1.0406400000000001e-05, |
|
"logits/chosen": -0.641308605670929, |
|
"logits/rejected": -0.47297364473342896, |
|
"logps/chosen": -705.9000244140625, |
|
"logps/rejected": -689.5999755859375, |
|
"loss": 1.2651, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -46.63750076293945, |
|
"rewards/margins": 3.5082030296325684, |
|
"rewards/rejected": -50.125, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4832, |
|
"grad_norm": 41.78929183594789, |
|
"learning_rate": 1.0342400000000001e-05, |
|
"logits/chosen": -0.649487316608429, |
|
"logits/rejected": -0.512768566608429, |
|
"logps/chosen": -712.2000122070312, |
|
"logps/rejected": -707.2000122070312, |
|
"loss": 1.605, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -47.443748474121094, |
|
"rewards/margins": 3.4710936546325684, |
|
"rewards/rejected": -50.91875076293945, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4864, |
|
"grad_norm": 83.1103876604526, |
|
"learning_rate": 1.0278400000000001e-05, |
|
"logits/chosen": -0.7444823980331421, |
|
"logits/rejected": -0.6318725347518921, |
|
"logps/chosen": -759.7000122070312, |
|
"logps/rejected": -734.0999755859375, |
|
"loss": 1.469, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -48.53125, |
|
"rewards/margins": 3.9937500953674316, |
|
"rewards/rejected": -52.537498474121094, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4896, |
|
"grad_norm": 31.31743279282106, |
|
"learning_rate": 1.02144e-05, |
|
"logits/chosen": -0.8201904296875, |
|
"logits/rejected": -0.7158203125, |
|
"logps/chosen": -739.0999755859375, |
|
"logps/rejected": -739.7000122070312, |
|
"loss": 1.1614, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -47.525001525878906, |
|
"rewards/margins": 6.057421684265137, |
|
"rewards/rejected": -53.5625, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4928, |
|
"grad_norm": 30.082403707106717, |
|
"learning_rate": 1.01504e-05, |
|
"logits/chosen": -0.735424816608429, |
|
"logits/rejected": -0.5428711175918579, |
|
"logps/chosen": -743.2000122070312, |
|
"logps/rejected": -739.5, |
|
"loss": 1.2279, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -49.35625076293945, |
|
"rewards/margins": 4.89453125, |
|
"rewards/rejected": -54.275001525878906, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.496, |
|
"grad_norm": 28.892914763782777, |
|
"learning_rate": 1.00864e-05, |
|
"logits/chosen": -0.809374988079071, |
|
"logits/rejected": -0.6345459222793579, |
|
"logps/chosen": -699.2999877929688, |
|
"logps/rejected": -721.2000122070312, |
|
"loss": 1.195, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -46.89374923706055, |
|
"rewards/margins": 6.657422065734863, |
|
"rewards/rejected": -53.5625, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4992, |
|
"grad_norm": 24.949136089979643, |
|
"learning_rate": 1.00224e-05, |
|
"logits/chosen": -0.7973388433456421, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -712.2000122070312, |
|
"logps/rejected": -697.4500122070312, |
|
"loss": 1.6646, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -46.381248474121094, |
|
"rewards/margins": 4.708203315734863, |
|
"rewards/rejected": -51.09375, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5024, |
|
"grad_norm": 17.07305244434439, |
|
"learning_rate": 9.958400000000001e-06, |
|
"logits/chosen": -0.791088879108429, |
|
"logits/rejected": -0.7275635004043579, |
|
"logps/chosen": -642.4500122070312, |
|
"logps/rejected": -676.0999755859375, |
|
"loss": 1.3848, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -41.243751525878906, |
|
"rewards/margins": 7.883984565734863, |
|
"rewards/rejected": -49.150001525878906, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5056, |
|
"grad_norm": 27.387702681775544, |
|
"learning_rate": 9.894400000000001e-06, |
|
"logits/chosen": -0.751025378704071, |
|
"logits/rejected": -0.6616455316543579, |
|
"logps/chosen": -630.0, |
|
"logps/rejected": -632.3499755859375, |
|
"loss": 1.1648, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -40.01250076293945, |
|
"rewards/margins": 4.319140434265137, |
|
"rewards/rejected": -44.337501525878906, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5088, |
|
"grad_norm": 58.06569005556697, |
|
"learning_rate": 9.830400000000002e-06, |
|
"logits/chosen": -0.677294909954071, |
|
"logits/rejected": -0.5551391839981079, |
|
"logps/chosen": -656.9000244140625, |
|
"logps/rejected": -646.9000244140625, |
|
"loss": 1.5886, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -42.8125, |
|
"rewards/margins": 3.055468797683716, |
|
"rewards/rejected": -45.881248474121094, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 32.15835121742251, |
|
"learning_rate": 9.7664e-06, |
|
"logits/chosen": -0.8250976800918579, |
|
"logits/rejected": -0.684741199016571, |
|
"logps/chosen": -662.2000122070312, |
|
"logps/rejected": -653.7000122070312, |
|
"loss": 1.3567, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -40.978126525878906, |
|
"rewards/margins": 5.03515625, |
|
"rewards/rejected": -46.037498474121094, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5152, |
|
"grad_norm": 39.1902416854394, |
|
"learning_rate": 9.7024e-06, |
|
"logits/chosen": -0.7381652593612671, |
|
"logits/rejected": -0.53082275390625, |
|
"logps/chosen": -680.4000244140625, |
|
"logps/rejected": -673.2999877929688, |
|
"loss": 1.6258, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -44.45624923706055, |
|
"rewards/margins": 3.6875, |
|
"rewards/rejected": -48.13750076293945, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5184, |
|
"grad_norm": 24.371787626734253, |
|
"learning_rate": 9.6384e-06, |
|
"logits/chosen": -0.79248046875, |
|
"logits/rejected": -0.678546130657196, |
|
"logps/chosen": -705.4000244140625, |
|
"logps/rejected": -715.5, |
|
"loss": 1.2993, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -46.82500076293945, |
|
"rewards/margins": 4.224609375, |
|
"rewards/rejected": -51.04999923706055, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5216, |
|
"grad_norm": 31.2556593358844, |
|
"learning_rate": 9.574400000000001e-06, |
|
"logits/chosen": -0.7989257574081421, |
|
"logits/rejected": -0.6652587652206421, |
|
"logps/chosen": -679.9000244140625, |
|
"logps/rejected": -684.0999755859375, |
|
"loss": 1.1808, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -45.837501525878906, |
|
"rewards/margins": 4.585156440734863, |
|
"rewards/rejected": -50.4375, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5248, |
|
"grad_norm": 27.55628635760918, |
|
"learning_rate": 9.510400000000001e-06, |
|
"logits/chosen": -0.862622082233429, |
|
"logits/rejected": -0.7339111566543579, |
|
"logps/chosen": -663.2999877929688, |
|
"logps/rejected": -659.7000122070312, |
|
"loss": 0.9807, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -43.256248474121094, |
|
"rewards/margins": 4.79296875, |
|
"rewards/rejected": -48.0625, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 34.30396390187216, |
|
"learning_rate": 9.446400000000002e-06, |
|
"logits/chosen": -0.8206787109375, |
|
"logits/rejected": -0.670666515827179, |
|
"logps/chosen": -694.2000122070312, |
|
"logps/rejected": -695.0999755859375, |
|
"loss": 1.0455, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -45.9375, |
|
"rewards/margins": 5.129687309265137, |
|
"rewards/rejected": -51.068748474121094, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5312, |
|
"grad_norm": 34.776770970797735, |
|
"learning_rate": 9.3824e-06, |
|
"logits/chosen": -0.9286254644393921, |
|
"logits/rejected": -0.726513683795929, |
|
"logps/chosen": -685.0, |
|
"logps/rejected": -689.0, |
|
"loss": 1.0873, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -44.875, |
|
"rewards/margins": 5.595703125, |
|
"rewards/rejected": -50.493751525878906, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5344, |
|
"grad_norm": 27.00821250166549, |
|
"learning_rate": 9.3184e-06, |
|
"logits/chosen": -1.0827147960662842, |
|
"logits/rejected": -0.8606933355331421, |
|
"logps/chosen": -696.9000244140625, |
|
"logps/rejected": -684.7999877929688, |
|
"loss": 1.7363, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -44.849998474121094, |
|
"rewards/margins": 4.677734375, |
|
"rewards/rejected": -49.537498474121094, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5376, |
|
"grad_norm": 32.894164043095316, |
|
"learning_rate": 9.2544e-06, |
|
"logits/chosen": -0.972582995891571, |
|
"logits/rejected": -0.8651367425918579, |
|
"logps/chosen": -700.7999877929688, |
|
"logps/rejected": -748.0, |
|
"loss": 1.1076, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -47.79375076293945, |
|
"rewards/margins": 8.16015625, |
|
"rewards/rejected": -55.98125076293945, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5408, |
|
"grad_norm": 53.448156583233676, |
|
"learning_rate": 9.190400000000001e-06, |
|
"logits/chosen": -0.986499011516571, |
|
"logits/rejected": -0.8663574457168579, |
|
"logps/chosen": -692.5999755859375, |
|
"logps/rejected": -689.7999877929688, |
|
"loss": 0.9655, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -45.275001525878906, |
|
"rewards/margins": 5.28125, |
|
"rewards/rejected": -50.5625, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 22.723538012527303, |
|
"learning_rate": 9.126400000000001e-06, |
|
"logits/chosen": -1.037988305091858, |
|
"logits/rejected": -0.8658202886581421, |
|
"logps/chosen": -707.5999755859375, |
|
"logps/rejected": -698.9000244140625, |
|
"loss": 0.9581, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -45.962501525878906, |
|
"rewards/margins": 5.108202934265137, |
|
"rewards/rejected": -51.09375, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5472, |
|
"grad_norm": 29.930466159752008, |
|
"learning_rate": 9.062400000000002e-06, |
|
"logits/chosen": -0.9510253667831421, |
|
"logits/rejected": -0.8387451171875, |
|
"logps/chosen": -689.0, |
|
"logps/rejected": -692.4000244140625, |
|
"loss": 1.5855, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -46.48749923706055, |
|
"rewards/margins": 4.531640529632568, |
|
"rewards/rejected": -51.01250076293945, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5504, |
|
"grad_norm": 22.74526767141219, |
|
"learning_rate": 8.9984e-06, |
|
"logits/chosen": -0.881787121295929, |
|
"logits/rejected": -0.7762206792831421, |
|
"logps/chosen": -697.2999877929688, |
|
"logps/rejected": -691.0999755859375, |
|
"loss": 1.3577, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -46.42499923706055, |
|
"rewards/margins": 3.6976561546325684, |
|
"rewards/rejected": -50.118751525878906, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5536, |
|
"grad_norm": 43.291646269995006, |
|
"learning_rate": 8.9344e-06, |
|
"logits/chosen": -0.972363293170929, |
|
"logits/rejected": -0.83868408203125, |
|
"logps/chosen": -713.0999755859375, |
|
"logps/rejected": -712.5999755859375, |
|
"loss": 1.2145, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -45.98749923706055, |
|
"rewards/margins": 4.237500190734863, |
|
"rewards/rejected": -50.224998474121094, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5568, |
|
"grad_norm": 27.09778676867347, |
|
"learning_rate": 8.8704e-06, |
|
"logits/chosen": -1.0656249523162842, |
|
"logits/rejected": -0.7837280035018921, |
|
"logps/chosen": -697.2999877929688, |
|
"logps/rejected": -682.5999755859375, |
|
"loss": 1.2162, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -45.0, |
|
"rewards/margins": 5.428906440734863, |
|
"rewards/rejected": -50.46875, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 18.82366502469674, |
|
"learning_rate": 8.806400000000001e-06, |
|
"logits/chosen": -0.972851574420929, |
|
"logits/rejected": -0.8165038824081421, |
|
"logps/chosen": -699.9000244140625, |
|
"logps/rejected": -718.5, |
|
"loss": 0.9451, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -46.38750076293945, |
|
"rewards/margins": 5.488671779632568, |
|
"rewards/rejected": -51.875, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5632, |
|
"grad_norm": 37.03702244755633, |
|
"learning_rate": 8.742400000000001e-06, |
|
"logits/chosen": -0.986035168170929, |
|
"logits/rejected": -0.8427978754043579, |
|
"logps/chosen": -693.0, |
|
"logps/rejected": -680.9000244140625, |
|
"loss": 1.5865, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -44.91875076293945, |
|
"rewards/margins": 4.442187309265137, |
|
"rewards/rejected": -49.32500076293945, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5664, |
|
"grad_norm": 15.669813086288999, |
|
"learning_rate": 8.6784e-06, |
|
"logits/chosen": -0.8355957269668579, |
|
"logits/rejected": -0.685546875, |
|
"logps/chosen": -672.5999755859375, |
|
"logps/rejected": -683.9000244140625, |
|
"loss": 0.9858, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -44.875, |
|
"rewards/margins": 5.412499904632568, |
|
"rewards/rejected": -50.3125, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5696, |
|
"grad_norm": 18.185718699982754, |
|
"learning_rate": 8.6144e-06, |
|
"logits/chosen": -0.8070312738418579, |
|
"logits/rejected": -0.677416980266571, |
|
"logps/chosen": -729.2999877929688, |
|
"logps/rejected": -713.0, |
|
"loss": 1.114, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -47.04999923706055, |
|
"rewards/margins": 4.353906154632568, |
|
"rewards/rejected": -51.431251525878906, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5728, |
|
"grad_norm": 18.43743777637667, |
|
"learning_rate": 8.5504e-06, |
|
"logits/chosen": -0.96875, |
|
"logits/rejected": -0.7574218511581421, |
|
"logps/chosen": -726.0, |
|
"logps/rejected": -709.7000122070312, |
|
"loss": 1.0618, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -47.07500076293945, |
|
"rewards/margins": 5.14453125, |
|
"rewards/rejected": -52.212501525878906, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 43.0770877472424, |
|
"learning_rate": 8.4864e-06, |
|
"logits/chosen": -0.9660278558731079, |
|
"logits/rejected": -0.8299804925918579, |
|
"logps/chosen": -660.5999755859375, |
|
"logps/rejected": -684.7999877929688, |
|
"loss": 1.3212, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -43.724998474121094, |
|
"rewards/margins": 5.642187595367432, |
|
"rewards/rejected": -49.349998474121094, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5792, |
|
"grad_norm": 35.13641837689736, |
|
"learning_rate": 8.422400000000001e-06, |
|
"logits/chosen": -0.9552246332168579, |
|
"logits/rejected": -0.800488293170929, |
|
"logps/chosen": -691.4000244140625, |
|
"logps/rejected": -693.9000244140625, |
|
"loss": 1.2366, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -46.11249923706055, |
|
"rewards/margins": 4.806640625, |
|
"rewards/rejected": -50.931251525878906, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5824, |
|
"grad_norm": 39.08294024192249, |
|
"learning_rate": 8.358400000000001e-06, |
|
"logits/chosen": -0.996899425983429, |
|
"logits/rejected": -0.862011730670929, |
|
"logps/chosen": -716.2000122070312, |
|
"logps/rejected": -729.2999877929688, |
|
"loss": 1.0015, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -48.400001525878906, |
|
"rewards/margins": 4.721093654632568, |
|
"rewards/rejected": -53.11249923706055, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5856, |
|
"grad_norm": 40.43387329794278, |
|
"learning_rate": 8.2944e-06, |
|
"logits/chosen": -1.0739257335662842, |
|
"logits/rejected": -0.936962902545929, |
|
"logps/chosen": -703.5999755859375, |
|
"logps/rejected": -701.9000244140625, |
|
"loss": 1.2172, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -46.375, |
|
"rewards/margins": 4.6171875, |
|
"rewards/rejected": -50.98125076293945, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5888, |
|
"grad_norm": 37.96585113209214, |
|
"learning_rate": 8.2304e-06, |
|
"logits/chosen": -1.178613305091858, |
|
"logits/rejected": -1.0226562023162842, |
|
"logps/chosen": -729.2999877929688, |
|
"logps/rejected": -732.0, |
|
"loss": 1.1452, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -46.431251525878906, |
|
"rewards/margins": 6.264452934265137, |
|
"rewards/rejected": -52.6875, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.592, |
|
"grad_norm": 22.023179991872624, |
|
"learning_rate": 8.1664e-06, |
|
"logits/chosen": -1.1320312023162842, |
|
"logits/rejected": -0.948486328125, |
|
"logps/chosen": -710.7999877929688, |
|
"logps/rejected": -693.9000244140625, |
|
"loss": 1.1163, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -47.04999923706055, |
|
"rewards/margins": 4.289453029632568, |
|
"rewards/rejected": -51.34375, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5952, |
|
"grad_norm": 20.62670986549159, |
|
"learning_rate": 8.1024e-06, |
|
"logits/chosen": -1.138671875, |
|
"logits/rejected": -0.9735351800918579, |
|
"logps/chosen": -688.0, |
|
"logps/rejected": -692.2000122070312, |
|
"loss": 1.0507, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -44.5625, |
|
"rewards/margins": 5.944140434265137, |
|
"rewards/rejected": -50.493751525878906, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5984, |
|
"grad_norm": 16.339457625847704, |
|
"learning_rate": 8.0384e-06, |
|
"logits/chosen": -1.102148413658142, |
|
"logits/rejected": -1.001074194908142, |
|
"logps/chosen": -708.2000122070312, |
|
"logps/rejected": -722.7999877929688, |
|
"loss": 0.7581, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -47.71875, |
|
"rewards/margins": 5.521874904632568, |
|
"rewards/rejected": -53.23125076293945, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6016, |
|
"grad_norm": 35.66308810908752, |
|
"learning_rate": 7.974400000000001e-06, |
|
"logits/chosen": -1.0789062976837158, |
|
"logits/rejected": -0.9618285894393921, |
|
"logps/chosen": -738.0999755859375, |
|
"logps/rejected": -732.0999755859375, |
|
"loss": 0.9391, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -48.60625076293945, |
|
"rewards/margins": 4.964062690734863, |
|
"rewards/rejected": -53.587501525878906, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6048, |
|
"grad_norm": 32.66045303471343, |
|
"learning_rate": 7.9104e-06, |
|
"logits/chosen": -1.063330054283142, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -699.0999755859375, |
|
"logps/rejected": -702.0999755859375, |
|
"loss": 1.513, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -47.41875076293945, |
|
"rewards/margins": 3.7445311546325684, |
|
"rewards/rejected": -51.17499923706055, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 27.47143642743382, |
|
"learning_rate": 7.8464e-06, |
|
"logits/chosen": -1.07275390625, |
|
"logits/rejected": -0.8569580316543579, |
|
"logps/chosen": -699.0999755859375, |
|
"logps/rejected": -701.4000244140625, |
|
"loss": 0.9579, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -46.39374923706055, |
|
"rewards/margins": 5.1484375, |
|
"rewards/rejected": -51.556251525878906, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6112, |
|
"grad_norm": 35.69727290418723, |
|
"learning_rate": 7.7824e-06, |
|
"logits/chosen": -1.019262671470642, |
|
"logits/rejected": -0.923876941204071, |
|
"logps/chosen": -720.0999755859375, |
|
"logps/rejected": -712.2000122070312, |
|
"loss": 1.8816, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -47.743751525878906, |
|
"rewards/margins": 4.006249904632568, |
|
"rewards/rejected": -51.743751525878906, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.6144, |
|
"grad_norm": 25.35554574306627, |
|
"learning_rate": 7.7184e-06, |
|
"logits/chosen": -1.036523461341858, |
|
"logits/rejected": -0.8519042730331421, |
|
"logps/chosen": -729.4000244140625, |
|
"logps/rejected": -720.2999877929688, |
|
"loss": 1.039, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -48.36249923706055, |
|
"rewards/margins": 4.984375, |
|
"rewards/rejected": -53.337501525878906, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.6176, |
|
"grad_norm": 33.902865352947586, |
|
"learning_rate": 7.6544e-06, |
|
"logits/chosen": -1.0681641101837158, |
|
"logits/rejected": -0.8866211175918579, |
|
"logps/chosen": -718.5999755859375, |
|
"logps/rejected": -700.2000122070312, |
|
"loss": 1.3977, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -47.41875076293945, |
|
"rewards/margins": 3.130078077316284, |
|
"rewards/rejected": -50.556251525878906, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.6208, |
|
"grad_norm": 31.096553058517653, |
|
"learning_rate": 7.590400000000001e-06, |
|
"logits/chosen": -1.114648461341858, |
|
"logits/rejected": -0.987255871295929, |
|
"logps/chosen": -719.2999877929688, |
|
"logps/rejected": -706.4000244140625, |
|
"loss": 1.2642, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -45.35625076293945, |
|
"rewards/margins": 5.084374904632568, |
|
"rewards/rejected": -50.45624923706055, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 54.87734980777987, |
|
"learning_rate": 7.5264000000000005e-06, |
|
"logits/chosen": -1.041601538658142, |
|
"logits/rejected": -0.8419189453125, |
|
"logps/chosen": -731.5, |
|
"logps/rejected": -740.2000122070312, |
|
"loss": 1.2067, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -48.287498474121094, |
|
"rewards/margins": 4.91015625, |
|
"rewards/rejected": -53.212501525878906, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6272, |
|
"grad_norm": 34.47923251544078, |
|
"learning_rate": 7.462400000000001e-06, |
|
"logits/chosen": -0.9884277582168579, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -733.7999877929688, |
|
"logps/rejected": -728.4000244140625, |
|
"loss": 1.4988, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -49.431251525878906, |
|
"rewards/margins": 4.154687404632568, |
|
"rewards/rejected": -53.587501525878906, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.6304, |
|
"grad_norm": 36.36671803353493, |
|
"learning_rate": 7.398400000000001e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.912402331829071, |
|
"logps/chosen": -702.5999755859375, |
|
"logps/rejected": -696.0, |
|
"loss": 1.406, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -46.45000076293945, |
|
"rewards/margins": 4.345703125, |
|
"rewards/rejected": -50.806251525878906, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6336, |
|
"grad_norm": 29.91188477302202, |
|
"learning_rate": 7.3344000000000005e-06, |
|
"logits/chosen": -0.9787353277206421, |
|
"logits/rejected": -0.8780517578125, |
|
"logps/chosen": -713.5999755859375, |
|
"logps/rejected": -724.2999877929688, |
|
"loss": 1.0452, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -46.26874923706055, |
|
"rewards/margins": 5.87109375, |
|
"rewards/rejected": -52.150001525878906, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6368, |
|
"grad_norm": 26.78263480473984, |
|
"learning_rate": 7.270400000000001e-06, |
|
"logits/chosen": -1.061132788658142, |
|
"logits/rejected": -0.928173840045929, |
|
"logps/chosen": -694.4000244140625, |
|
"logps/rejected": -688.5, |
|
"loss": 1.1949, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -44.849998474121094, |
|
"rewards/margins": 4.34375, |
|
"rewards/rejected": -49.193748474121094, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 30.836160521060737, |
|
"learning_rate": 7.2064e-06, |
|
"logits/chosen": -0.986987292766571, |
|
"logits/rejected": -0.860644519329071, |
|
"logps/chosen": -687.7999877929688, |
|
"logps/rejected": -673.7000122070312, |
|
"loss": 1.1639, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -45.775001525878906, |
|
"rewards/margins": 3.8265624046325684, |
|
"rewards/rejected": -49.618751525878906, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6432, |
|
"grad_norm": 16.21314671791893, |
|
"learning_rate": 7.1424000000000004e-06, |
|
"logits/chosen": -0.953857421875, |
|
"logits/rejected": -0.8311767578125, |
|
"logps/chosen": -702.9000244140625, |
|
"logps/rejected": -704.5999755859375, |
|
"loss": 1.189, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -45.849998474121094, |
|
"rewards/margins": 4.592968940734863, |
|
"rewards/rejected": -50.443748474121094, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6464, |
|
"grad_norm": 16.706238605453063, |
|
"learning_rate": 7.078400000000001e-06, |
|
"logits/chosen": -0.8988281488418579, |
|
"logits/rejected": -0.631359875202179, |
|
"logps/chosen": -717.5, |
|
"logps/rejected": -692.5, |
|
"loss": 0.8347, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -46.5625, |
|
"rewards/margins": 4.493750095367432, |
|
"rewards/rejected": -51.04999923706055, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6496, |
|
"grad_norm": 23.11390264308454, |
|
"learning_rate": 7.0144e-06, |
|
"logits/chosen": -0.981738269329071, |
|
"logits/rejected": -0.803149402141571, |
|
"logps/chosen": -696.7000122070312, |
|
"logps/rejected": -699.0, |
|
"loss": 0.9888, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -45.974998474121094, |
|
"rewards/margins": 4.503125190734863, |
|
"rewards/rejected": -50.48125076293945, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6528, |
|
"grad_norm": 25.551886307935956, |
|
"learning_rate": 6.9504e-06, |
|
"logits/chosen": -1.005957007408142, |
|
"logits/rejected": -0.791259765625, |
|
"logps/chosen": -718.7999877929688, |
|
"logps/rejected": -714.0, |
|
"loss": 1.0371, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -46.82500076293945, |
|
"rewards/margins": 5.044531345367432, |
|
"rewards/rejected": -51.86249923706055, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.656, |
|
"grad_norm": 38.10513975607243, |
|
"learning_rate": 6.886400000000001e-06, |
|
"logits/chosen": -1.0060546398162842, |
|
"logits/rejected": -0.7935760617256165, |
|
"logps/chosen": -695.7999877929688, |
|
"logps/rejected": -697.0, |
|
"loss": 0.8056, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -46.381248474121094, |
|
"rewards/margins": 5.079687595367432, |
|
"rewards/rejected": -51.45624923706055, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6592, |
|
"grad_norm": 43.87278443969138, |
|
"learning_rate": 6.8224e-06, |
|
"logits/chosen": -1.1340820789337158, |
|
"logits/rejected": -1.0141112804412842, |
|
"logps/chosen": -705.5999755859375, |
|
"logps/rejected": -714.4000244140625, |
|
"loss": 1.3753, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -45.0625, |
|
"rewards/margins": 4.603906154632568, |
|
"rewards/rejected": -49.662498474121094, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6624, |
|
"grad_norm": 30.116406487757125, |
|
"learning_rate": 6.7584e-06, |
|
"logits/chosen": -1.061132788658142, |
|
"logits/rejected": -0.921923816204071, |
|
"logps/chosen": -729.7000122070312, |
|
"logps/rejected": -746.0, |
|
"loss": 1.2115, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -49.45624923706055, |
|
"rewards/margins": 5.823046684265137, |
|
"rewards/rejected": -55.256248474121094, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.6656, |
|
"grad_norm": 30.073141884146498, |
|
"learning_rate": 6.694400000000001e-06, |
|
"logits/chosen": -1.0830078125, |
|
"logits/rejected": -0.8765624761581421, |
|
"logps/chosen": -715.7999877929688, |
|
"logps/rejected": -714.5999755859375, |
|
"loss": 0.8647, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -47.0, |
|
"rewards/margins": 4.775781154632568, |
|
"rewards/rejected": -51.76250076293945, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.6688, |
|
"grad_norm": 44.582761143663106, |
|
"learning_rate": 6.6304e-06, |
|
"logits/chosen": -1.01953125, |
|
"logits/rejected": -0.8620361089706421, |
|
"logps/chosen": -689.2999877929688, |
|
"logps/rejected": -690.4000244140625, |
|
"loss": 1.1372, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -46.38750076293945, |
|
"rewards/margins": 4.301953315734863, |
|
"rewards/rejected": -50.681251525878906, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 32.232233128260255, |
|
"learning_rate": 6.5664e-06, |
|
"logits/chosen": -1.0627930164337158, |
|
"logits/rejected": -0.897936999797821, |
|
"logps/chosen": -743.9000244140625, |
|
"logps/rejected": -759.7999877929688, |
|
"loss": 0.8998, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -47.96875, |
|
"rewards/margins": 5.380078315734863, |
|
"rewards/rejected": -53.33124923706055, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6752, |
|
"grad_norm": 24.673000840435204, |
|
"learning_rate": 6.502400000000001e-06, |
|
"logits/chosen": -1.0478515625, |
|
"logits/rejected": -0.807861328125, |
|
"logps/chosen": -706.9000244140625, |
|
"logps/rejected": -694.9000244140625, |
|
"loss": 1.1519, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -47.756248474121094, |
|
"rewards/margins": 4.149218559265137, |
|
"rewards/rejected": -51.92499923706055, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6784, |
|
"grad_norm": 14.646256448520822, |
|
"learning_rate": 6.4384e-06, |
|
"logits/chosen": -1.0353515148162842, |
|
"logits/rejected": -0.891357421875, |
|
"logps/chosen": -714.2000122070312, |
|
"logps/rejected": -718.0, |
|
"loss": 0.9878, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -46.67499923706055, |
|
"rewards/margins": 6.03125, |
|
"rewards/rejected": -52.724998474121094, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6816, |
|
"grad_norm": 515.206138123639, |
|
"learning_rate": 6.3744e-06, |
|
"logits/chosen": -1.020361304283142, |
|
"logits/rejected": -0.8248656988143921, |
|
"logps/chosen": -723.2999877929688, |
|
"logps/rejected": -710.5999755859375, |
|
"loss": 1.5449, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -47.9375, |
|
"rewards/margins": 4.746874809265137, |
|
"rewards/rejected": -52.6875, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.6848, |
|
"grad_norm": 33.81383796288538, |
|
"learning_rate": 6.310400000000001e-06, |
|
"logits/chosen": -1.0212891101837158, |
|
"logits/rejected": -0.821093738079071, |
|
"logps/chosen": -693.5, |
|
"logps/rejected": -721.2999877929688, |
|
"loss": 1.3032, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -46.11249923706055, |
|
"rewards/margins": 6.295702934265137, |
|
"rewards/rejected": -52.431251525878906, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.688, |
|
"grad_norm": 32.355318848104815, |
|
"learning_rate": 6.2464e-06, |
|
"logits/chosen": -0.9873046875, |
|
"logits/rejected": -0.8365722894668579, |
|
"logps/chosen": -699.7999877929688, |
|
"logps/rejected": -695.5, |
|
"loss": 1.2379, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -46.037498474121094, |
|
"rewards/margins": 4.799609184265137, |
|
"rewards/rejected": -50.84375, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6912, |
|
"grad_norm": 23.615506940371404, |
|
"learning_rate": 6.1824e-06, |
|
"logits/chosen": -1.00634765625, |
|
"logits/rejected": -0.9322265386581421, |
|
"logps/chosen": -697.0, |
|
"logps/rejected": -712.5, |
|
"loss": 1.227, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -47.875, |
|
"rewards/margins": 4.198046684265137, |
|
"rewards/rejected": -52.056251525878906, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6944, |
|
"grad_norm": 63.3641597780795, |
|
"learning_rate": 6.1184000000000014e-06, |
|
"logits/chosen": -0.994403064250946, |
|
"logits/rejected": -0.8553466796875, |
|
"logps/chosen": -730.9000244140625, |
|
"logps/rejected": -732.0, |
|
"loss": 1.0171, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -49.07500076293945, |
|
"rewards/margins": 4.673828125, |
|
"rewards/rejected": -53.76250076293945, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.6976, |
|
"grad_norm": 20.809371501110515, |
|
"learning_rate": 6.0544e-06, |
|
"logits/chosen": -1.0204589366912842, |
|
"logits/rejected": -0.813793957233429, |
|
"logps/chosen": -712.5, |
|
"logps/rejected": -710.0, |
|
"loss": 0.7018, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -47.306251525878906, |
|
"rewards/margins": 5.598437309265137, |
|
"rewards/rejected": -52.88750076293945, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.7008, |
|
"grad_norm": 33.50747747591909, |
|
"learning_rate": 5.9904e-06, |
|
"logits/chosen": -1.0885741710662842, |
|
"logits/rejected": -0.971972644329071, |
|
"logps/chosen": -717.7999877929688, |
|
"logps/rejected": -721.7000122070312, |
|
"loss": 1.0924, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -48.025001525878906, |
|
"rewards/margins": 4.930468559265137, |
|
"rewards/rejected": -52.96875, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 32.846590306215425, |
|
"learning_rate": 5.9264e-06, |
|
"logits/chosen": -1.009033203125, |
|
"logits/rejected": -0.8384765386581421, |
|
"logps/chosen": -727.7999877929688, |
|
"logps/rejected": -722.4000244140625, |
|
"loss": 1.0518, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -47.943748474121094, |
|
"rewards/margins": 5.194140434265137, |
|
"rewards/rejected": -53.131248474121094, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7072, |
|
"grad_norm": 18.12069084200929, |
|
"learning_rate": 5.8624e-06, |
|
"logits/chosen": -1.0234375, |
|
"logits/rejected": -0.83984375, |
|
"logps/chosen": -743.5999755859375, |
|
"logps/rejected": -744.9000244140625, |
|
"loss": 1.332, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -48.118751525878906, |
|
"rewards/margins": 5.614843845367432, |
|
"rewards/rejected": -53.73749923706055, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.7104, |
|
"grad_norm": 30.990298323590697, |
|
"learning_rate": 5.798400000000001e-06, |
|
"logits/chosen": -0.9886230230331421, |
|
"logits/rejected": -0.810791015625, |
|
"logps/chosen": -702.0999755859375, |
|
"logps/rejected": -717.4000244140625, |
|
"loss": 1.213, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -48.756248474121094, |
|
"rewards/margins": 4.651953220367432, |
|
"rewards/rejected": -53.4375, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7136, |
|
"grad_norm": 25.950846199532993, |
|
"learning_rate": 5.7344e-06, |
|
"logits/chosen": -1.082421898841858, |
|
"logits/rejected": -0.895800769329071, |
|
"logps/chosen": -730.2999877929688, |
|
"logps/rejected": -726.7999877929688, |
|
"loss": 1.0928, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -49.03125, |
|
"rewards/margins": 5.237109184265137, |
|
"rewards/rejected": -54.26250076293945, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.7168, |
|
"grad_norm": 41.05623425015503, |
|
"learning_rate": 5.6704e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.9478393793106079, |
|
"logps/chosen": -697.0999755859375, |
|
"logps/rejected": -810.7000122070312, |
|
"loss": 1.1882, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -46.693748474121094, |
|
"rewards/margins": 4.758984565734863, |
|
"rewards/rejected": -51.42499923706055, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 26.691785630362762, |
|
"learning_rate": 5.606400000000001e-06, |
|
"logits/chosen": -1.12939453125, |
|
"logits/rejected": -0.979968249797821, |
|
"logps/chosen": -688.4000244140625, |
|
"logps/rejected": -691.2999877929688, |
|
"loss": 0.7555, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -45.45624923706055, |
|
"rewards/margins": 5.046093940734863, |
|
"rewards/rejected": -50.493751525878906, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.7232, |
|
"grad_norm": 36.09811273148159, |
|
"learning_rate": 5.5424e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -1.01104736328125, |
|
"logps/chosen": -717.2999877929688, |
|
"logps/rejected": -724.4000244140625, |
|
"loss": 1.2529, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -45.76874923706055, |
|
"rewards/margins": 5.684374809265137, |
|
"rewards/rejected": -51.443748474121094, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.7264, |
|
"grad_norm": 19.629173955929687, |
|
"learning_rate": 5.478400000000001e-06, |
|
"logits/chosen": -1.137792944908142, |
|
"logits/rejected": -1.005273461341858, |
|
"logps/chosen": -686.5, |
|
"logps/rejected": -691.0, |
|
"loss": 0.9069, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -45.26874923706055, |
|
"rewards/margins": 5.016406059265137, |
|
"rewards/rejected": -50.29999923706055, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7296, |
|
"grad_norm": 15.321250419813746, |
|
"learning_rate": 5.414400000000001e-06, |
|
"logits/chosen": -1.083886742591858, |
|
"logits/rejected": -0.943359375, |
|
"logps/chosen": -716.4000244140625, |
|
"logps/rejected": -712.2000122070312, |
|
"loss": 0.9849, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -46.587501525878906, |
|
"rewards/margins": 5.075390815734863, |
|
"rewards/rejected": -51.65625, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.7328, |
|
"grad_norm": 25.626519146586663, |
|
"learning_rate": 5.3504e-06, |
|
"logits/chosen": -1.0432617664337158, |
|
"logits/rejected": -0.8513427972793579, |
|
"logps/chosen": -700.7999877929688, |
|
"logps/rejected": -706.0, |
|
"loss": 0.7705, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -46.962501525878906, |
|
"rewards/margins": 5.443749904632568, |
|
"rewards/rejected": -52.39374923706055, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 34.37616620792697, |
|
"learning_rate": 5.286400000000001e-06, |
|
"logits/chosen": -1.012231469154358, |
|
"logits/rejected": -0.8785156011581421, |
|
"logps/chosen": -704.5, |
|
"logps/rejected": -712.0, |
|
"loss": 0.991, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -48.131248474121094, |
|
"rewards/margins": 5.206640720367432, |
|
"rewards/rejected": -53.29999923706055, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7392, |
|
"grad_norm": 18.513866720148574, |
|
"learning_rate": 5.222400000000001e-06, |
|
"logits/chosen": -1.0185058116912842, |
|
"logits/rejected": -0.868457019329071, |
|
"logps/chosen": -710.0999755859375, |
|
"logps/rejected": -707.7999877929688, |
|
"loss": 1.0177, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -46.162498474121094, |
|
"rewards/margins": 5.4765625, |
|
"rewards/rejected": -51.618751525878906, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.7424, |
|
"grad_norm": 19.111370557860546, |
|
"learning_rate": 5.1584000000000005e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.7626708745956421, |
|
"logps/chosen": -696.0, |
|
"logps/rejected": -699.9000244140625, |
|
"loss": 0.742, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -46.756248474121094, |
|
"rewards/margins": 5.501172065734863, |
|
"rewards/rejected": -52.23125076293945, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.7456, |
|
"grad_norm": 26.75112470648383, |
|
"learning_rate": 5.094400000000001e-06, |
|
"logits/chosen": -0.992968738079071, |
|
"logits/rejected": -0.7928711175918579, |
|
"logps/chosen": -699.2999877929688, |
|
"logps/rejected": -692.5, |
|
"loss": 0.7287, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -46.01874923706055, |
|
"rewards/margins": 5.475781440734863, |
|
"rewards/rejected": -51.48749923706055, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.7488, |
|
"grad_norm": 20.28190998304776, |
|
"learning_rate": 5.030400000000001e-06, |
|
"logits/chosen": -1.0549805164337158, |
|
"logits/rejected": -0.885986328125, |
|
"logps/chosen": -686.4000244140625, |
|
"logps/rejected": -705.7000122070312, |
|
"loss": 0.9325, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -44.33124923706055, |
|
"rewards/margins": 6.580078125, |
|
"rewards/rejected": -50.900001525878906, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.752, |
|
"grad_norm": 30.31617353068247, |
|
"learning_rate": 4.9664000000000004e-06, |
|
"logits/chosen": -0.877636730670929, |
|
"logits/rejected": -0.7491210699081421, |
|
"logps/chosen": -727.0, |
|
"logps/rejected": -742.2000122070312, |
|
"loss": 1.4356, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -49.42499923706055, |
|
"rewards/margins": 4.980859279632568, |
|
"rewards/rejected": -54.38750076293945, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7552, |
|
"grad_norm": 40.91946462285244, |
|
"learning_rate": 4.902400000000001e-06, |
|
"logits/chosen": -0.966552734375, |
|
"logits/rejected": -0.845751941204071, |
|
"logps/chosen": -741.7000122070312, |
|
"logps/rejected": -738.4000244140625, |
|
"loss": 1.0233, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -48.15625, |
|
"rewards/margins": 5.648828029632568, |
|
"rewards/rejected": -53.8125, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7584, |
|
"grad_norm": 34.039942825197706, |
|
"learning_rate": 4.8384e-06, |
|
"logits/chosen": -1.003027319908142, |
|
"logits/rejected": -0.858447253704071, |
|
"logps/chosen": -757.0, |
|
"logps/rejected": -728.7999877929688, |
|
"loss": 1.6555, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -48.25, |
|
"rewards/margins": 4.786718845367432, |
|
"rewards/rejected": -53.006248474121094, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7616, |
|
"grad_norm": 37.671632804350644, |
|
"learning_rate": 4.7744e-06, |
|
"logits/chosen": -0.769360363483429, |
|
"logits/rejected": -0.765625, |
|
"logps/chosen": -677.7000122070312, |
|
"logps/rejected": -687.7999877929688, |
|
"loss": 0.8503, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -45.92499923706055, |
|
"rewards/margins": 5.232421875, |
|
"rewards/rejected": -51.150001525878906, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.7648, |
|
"grad_norm": 21.084041436826528, |
|
"learning_rate": 4.710400000000001e-06, |
|
"logits/chosen": -0.979687511920929, |
|
"logits/rejected": -0.841064453125, |
|
"logps/chosen": -699.0, |
|
"logps/rejected": -716.7999877929688, |
|
"loss": 0.8988, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -45.006248474121094, |
|
"rewards/margins": 5.942187309265137, |
|
"rewards/rejected": -50.95624923706055, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 30.650485611004637, |
|
"learning_rate": 4.6464e-06, |
|
"logits/chosen": -0.9476073980331421, |
|
"logits/rejected": -0.809130847454071, |
|
"logps/chosen": -693.2999877929688, |
|
"logps/rejected": -693.5, |
|
"loss": 1.0418, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -44.506248474121094, |
|
"rewards/margins": 6.182031154632568, |
|
"rewards/rejected": -50.693748474121094, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7712, |
|
"grad_norm": 26.04029078012027, |
|
"learning_rate": 4.5824e-06, |
|
"logits/chosen": -0.9512695074081421, |
|
"logits/rejected": -0.866650402545929, |
|
"logps/chosen": -699.0, |
|
"logps/rejected": -697.5999755859375, |
|
"loss": 1.1675, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -44.875, |
|
"rewards/margins": 4.698437690734863, |
|
"rewards/rejected": -49.556251525878906, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.7744, |
|
"grad_norm": 36.83215201031879, |
|
"learning_rate": 4.518400000000001e-06, |
|
"logits/chosen": -1.0114257335662842, |
|
"logits/rejected": -0.8132690191268921, |
|
"logps/chosen": -699.5999755859375, |
|
"logps/rejected": -692.5999755859375, |
|
"loss": 0.8036, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -45.6875, |
|
"rewards/margins": 4.723046779632568, |
|
"rewards/rejected": -50.40625, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.7776, |
|
"grad_norm": 22.070335988355417, |
|
"learning_rate": 4.4544e-06, |
|
"logits/chosen": -1.0234375, |
|
"logits/rejected": -0.854473888874054, |
|
"logps/chosen": -680.9000244140625, |
|
"logps/rejected": -704.5999755859375, |
|
"loss": 0.8279, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -44.287498474121094, |
|
"rewards/margins": 6.0546875, |
|
"rewards/rejected": -50.36249923706055, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.7808, |
|
"grad_norm": 25.46409714576939, |
|
"learning_rate": 4.3904e-06, |
|
"logits/chosen": -1.0222656726837158, |
|
"logits/rejected": -0.886279284954071, |
|
"logps/chosen": -687.4000244140625, |
|
"logps/rejected": -690.0999755859375, |
|
"loss": 0.8075, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -44.474998474121094, |
|
"rewards/margins": 5.271484375, |
|
"rewards/rejected": -49.73749923706055, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.784, |
|
"grad_norm": 27.18862097944765, |
|
"learning_rate": 4.326400000000001e-06, |
|
"logits/chosen": -0.98291015625, |
|
"logits/rejected": -0.8377441167831421, |
|
"logps/chosen": -707.9000244140625, |
|
"logps/rejected": -716.2999877929688, |
|
"loss": 0.9811, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -44.90625, |
|
"rewards/margins": 5.499218940734863, |
|
"rewards/rejected": -50.41875076293945, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.7872, |
|
"grad_norm": 19.320839238994825, |
|
"learning_rate": 4.2624e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.816760241985321, |
|
"logps/chosen": -684.5, |
|
"logps/rejected": -710.2999877929688, |
|
"loss": 1.0548, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -45.756248474121094, |
|
"rewards/margins": 5.346093654632568, |
|
"rewards/rejected": -51.099998474121094, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7904, |
|
"grad_norm": 15.431831897667609, |
|
"learning_rate": 4.1984e-06, |
|
"logits/chosen": -0.8251098394393921, |
|
"logits/rejected": -0.7601318359375, |
|
"logps/chosen": -709.2999877929688, |
|
"logps/rejected": -720.7000122070312, |
|
"loss": 0.8206, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -46.13750076293945, |
|
"rewards/margins": 5.135937690734863, |
|
"rewards/rejected": -51.256248474121094, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.7936, |
|
"grad_norm": 31.682062882644072, |
|
"learning_rate": 4.1344e-06, |
|
"logits/chosen": -0.8877929449081421, |
|
"logits/rejected": -0.791735827922821, |
|
"logps/chosen": -730.4000244140625, |
|
"logps/rejected": -741.4000244140625, |
|
"loss": 1.0101, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -47.95624923706055, |
|
"rewards/margins": 4.861718654632568, |
|
"rewards/rejected": -52.806251525878906, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.7968, |
|
"grad_norm": 27.980848387195604, |
|
"learning_rate": 4.070400000000001e-06, |
|
"logits/chosen": -0.811279296875, |
|
"logits/rejected": -0.635449230670929, |
|
"logps/chosen": -719.5999755859375, |
|
"logps/rejected": -724.5, |
|
"loss": 0.9866, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -48.35625076293945, |
|
"rewards/margins": 4.940625190734863, |
|
"rewards/rejected": -53.28125, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 29.28289970079953, |
|
"learning_rate": 4.0064e-06, |
|
"logits/chosen": -0.7798095941543579, |
|
"logits/rejected": -0.6259216070175171, |
|
"logps/chosen": -700.2999877929688, |
|
"logps/rejected": -704.5, |
|
"loss": 0.9269, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -47.70000076293945, |
|
"rewards/margins": 4.230078220367432, |
|
"rewards/rejected": -51.91875076293945, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8032, |
|
"grad_norm": 49.45455905668464, |
|
"learning_rate": 3.9424e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.6906982660293579, |
|
"logps/chosen": -700.9000244140625, |
|
"logps/rejected": -821.7000122070312, |
|
"loss": 1.232, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -46.29999923706055, |
|
"rewards/margins": 4.734375, |
|
"rewards/rejected": -51.01874923706055, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.8064, |
|
"grad_norm": 31.980275647030638, |
|
"learning_rate": 3.878400000000001e-06, |
|
"logits/chosen": -0.9493163824081421, |
|
"logits/rejected": -0.7739502191543579, |
|
"logps/chosen": -702.5, |
|
"logps/rejected": -701.5999755859375, |
|
"loss": 1.0134, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -46.29999923706055, |
|
"rewards/margins": 5.244531154632568, |
|
"rewards/rejected": -51.537498474121094, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.8096, |
|
"grad_norm": 32.60979718815698, |
|
"learning_rate": 3.8144000000000003e-06, |
|
"logits/chosen": -0.919140636920929, |
|
"logits/rejected": -0.746960461139679, |
|
"logps/chosen": -720.9000244140625, |
|
"logps/rejected": -733.0999755859375, |
|
"loss": 1.066, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -46.025001525878906, |
|
"rewards/margins": 6.26171875, |
|
"rewards/rejected": -52.29375076293945, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.8128, |
|
"grad_norm": 18.078133548658265, |
|
"learning_rate": 3.7504e-06, |
|
"logits/chosen": -0.819042980670929, |
|
"logits/rejected": -0.6877075433731079, |
|
"logps/chosen": -717.5999755859375, |
|
"logps/rejected": -709.0999755859375, |
|
"loss": 0.7904, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -46.73749923706055, |
|
"rewards/margins": 5.543359279632568, |
|
"rewards/rejected": -52.29999923706055, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 17.463419825027277, |
|
"learning_rate": 3.6864000000000004e-06, |
|
"logits/chosen": -0.8096923828125, |
|
"logits/rejected": -0.663586437702179, |
|
"logps/chosen": -690.5999755859375, |
|
"logps/rejected": -689.7999877929688, |
|
"loss": 0.8754, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -46.20624923706055, |
|
"rewards/margins": 5.366796970367432, |
|
"rewards/rejected": -51.587501525878906, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8192, |
|
"grad_norm": 34.45398458085358, |
|
"learning_rate": 3.6224000000000002e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.6997131109237671, |
|
"logps/chosen": -702.2999877929688, |
|
"logps/rejected": -694.0, |
|
"loss": 0.8496, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -46.29375076293945, |
|
"rewards/margins": 4.94140625, |
|
"rewards/rejected": -51.20624923706055, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.8224, |
|
"grad_norm": 50.25198996079993, |
|
"learning_rate": 3.5584e-06, |
|
"logits/chosen": -0.758056640625, |
|
"logits/rejected": -0.6135498285293579, |
|
"logps/chosen": -687.7999877929688, |
|
"logps/rejected": -691.5999755859375, |
|
"loss": 1.0614, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -46.256248474121094, |
|
"rewards/margins": 4.607812404632568, |
|
"rewards/rejected": -50.86249923706055, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.8256, |
|
"grad_norm": 19.482286568161427, |
|
"learning_rate": 3.4944e-06, |
|
"logits/chosen": -0.765332043170929, |
|
"logits/rejected": -0.599072277545929, |
|
"logps/chosen": -715.4000244140625, |
|
"logps/rejected": -710.9000244140625, |
|
"loss": 0.9956, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -45.78125, |
|
"rewards/margins": 4.977734565734863, |
|
"rewards/rejected": -50.76250076293945, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.8288, |
|
"grad_norm": 27.18743711565877, |
|
"learning_rate": 3.4304000000000002e-06, |
|
"logits/chosen": -0.725689709186554, |
|
"logits/rejected": -0.6280151605606079, |
|
"logps/chosen": -705.2000122070312, |
|
"logps/rejected": -717.0, |
|
"loss": 0.8677, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -47.41875076293945, |
|
"rewards/margins": 4.928906440734863, |
|
"rewards/rejected": -52.36249923706055, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 28.274057380796958, |
|
"learning_rate": 3.3664e-06, |
|
"logits/chosen": -0.815625011920929, |
|
"logits/rejected": -0.579394519329071, |
|
"logps/chosen": -711.7000122070312, |
|
"logps/rejected": -700.2999877929688, |
|
"loss": 0.801, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -46.681251525878906, |
|
"rewards/margins": 5.397265434265137, |
|
"rewards/rejected": -52.0625, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8352, |
|
"grad_norm": 27.10807058678835, |
|
"learning_rate": 3.3024e-06, |
|
"logits/chosen": -0.818896472454071, |
|
"logits/rejected": -0.65411376953125, |
|
"logps/chosen": -691.2000122070312, |
|
"logps/rejected": -701.9000244140625, |
|
"loss": 0.8487, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -45.91875076293945, |
|
"rewards/margins": 5.360547065734863, |
|
"rewards/rejected": -51.28125, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.8384, |
|
"grad_norm": 16.848696540134306, |
|
"learning_rate": 3.2384000000000006e-06, |
|
"logits/chosen": -0.879833996295929, |
|
"logits/rejected": -0.7293335199356079, |
|
"logps/chosen": -723.7000122070312, |
|
"logps/rejected": -734.4000244140625, |
|
"loss": 0.9332, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -47.57500076293945, |
|
"rewards/margins": 5.736718654632568, |
|
"rewards/rejected": -53.34375, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.8416, |
|
"grad_norm": 23.088137550157327, |
|
"learning_rate": 3.1744e-06, |
|
"logits/chosen": -0.7398926019668579, |
|
"logits/rejected": -0.58740234375, |
|
"logps/chosen": -700.2000122070312, |
|
"logps/rejected": -716.9000244140625, |
|
"loss": 0.8959, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -48.04375076293945, |
|
"rewards/margins": 5.1171875, |
|
"rewards/rejected": -53.16875076293945, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.8448, |
|
"grad_norm": 41.2060376447536, |
|
"learning_rate": 3.1104e-06, |
|
"logits/chosen": -0.8064941167831421, |
|
"logits/rejected": -0.6497802734375, |
|
"logps/chosen": -723.0999755859375, |
|
"logps/rejected": -730.7000122070312, |
|
"loss": 1.0018, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -47.89374923706055, |
|
"rewards/margins": 4.893750190734863, |
|
"rewards/rejected": -52.79375076293945, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.848, |
|
"grad_norm": 17.33185174612147, |
|
"learning_rate": 3.0464000000000006e-06, |
|
"logits/chosen": -0.8216308355331421, |
|
"logits/rejected": -0.7013915777206421, |
|
"logps/chosen": -732.7000122070312, |
|
"logps/rejected": -728.2000122070312, |
|
"loss": 0.9806, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -48.125, |
|
"rewards/margins": 4.714062690734863, |
|
"rewards/rejected": -52.84375, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.8512, |
|
"grad_norm": 24.95900546548201, |
|
"learning_rate": 2.9824000000000004e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.6495605707168579, |
|
"logps/chosen": -713.5, |
|
"logps/rejected": -722.9000244140625, |
|
"loss": 0.7509, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -46.640625, |
|
"rewards/margins": 5.935937404632568, |
|
"rewards/rejected": -52.54999923706055, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.8544, |
|
"grad_norm": 25.149175120143106, |
|
"learning_rate": 2.9184000000000003e-06, |
|
"logits/chosen": -0.812207043170929, |
|
"logits/rejected": -0.5872558355331421, |
|
"logps/chosen": -702.7000122070312, |
|
"logps/rejected": -694.0, |
|
"loss": 0.9383, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -47.900001525878906, |
|
"rewards/margins": 4.258593559265137, |
|
"rewards/rejected": -52.14374923706055, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8576, |
|
"grad_norm": 12.827501902736019, |
|
"learning_rate": 2.8544000000000006e-06, |
|
"logits/chosen": -0.889843761920929, |
|
"logits/rejected": -0.7074950933456421, |
|
"logps/chosen": -700.7000122070312, |
|
"logps/rejected": -717.4000244140625, |
|
"loss": 0.9013, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -46.006248474121094, |
|
"rewards/margins": 5.360156059265137, |
|
"rewards/rejected": -51.36249923706055, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8608, |
|
"grad_norm": 23.908624027933058, |
|
"learning_rate": 2.7904000000000004e-06, |
|
"logits/chosen": -0.8385254144668579, |
|
"logits/rejected": -0.65087890625, |
|
"logps/chosen": -684.5, |
|
"logps/rejected": -682.7000122070312, |
|
"loss": 0.999, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -46.29999923706055, |
|
"rewards/margins": 4.389062404632568, |
|
"rewards/rejected": -50.71875, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 22.34800607029224, |
|
"learning_rate": 2.7264000000000003e-06, |
|
"logits/chosen": -0.818115234375, |
|
"logits/rejected": -0.692089855670929, |
|
"logps/chosen": -693.0999755859375, |
|
"logps/rejected": -697.7000122070312, |
|
"loss": 0.8839, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -45.54375076293945, |
|
"rewards/margins": 5.27734375, |
|
"rewards/rejected": -50.78125, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8672, |
|
"grad_norm": 25.496548305387073, |
|
"learning_rate": 2.6624e-06, |
|
"logits/chosen": -0.839428722858429, |
|
"logits/rejected": -0.694866955280304, |
|
"logps/chosen": -677.0, |
|
"logps/rejected": -676.2999877929688, |
|
"loss": 1.069, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -45.26250076293945, |
|
"rewards/margins": 5.021874904632568, |
|
"rewards/rejected": -50.306251525878906, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.8704, |
|
"grad_norm": 38.775614969317544, |
|
"learning_rate": 2.5984000000000004e-06, |
|
"logits/chosen": -0.8185790777206421, |
|
"logits/rejected": -0.6710449457168579, |
|
"logps/chosen": -680.2999877929688, |
|
"logps/rejected": -722.9000244140625, |
|
"loss": 0.8952, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -45.20624923706055, |
|
"rewards/margins": 7.703906059265137, |
|
"rewards/rejected": -52.90625, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.8736, |
|
"grad_norm": 27.731107053080645, |
|
"learning_rate": 2.5344000000000002e-06, |
|
"logits/chosen": -0.8282226324081421, |
|
"logits/rejected": -0.7029052972793579, |
|
"logps/chosen": -689.7000122070312, |
|
"logps/rejected": -701.9000244140625, |
|
"loss": 0.965, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -47.381248474121094, |
|
"rewards/margins": 4.256640434265137, |
|
"rewards/rejected": -51.65625, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.8768, |
|
"grad_norm": 25.15430651962972, |
|
"learning_rate": 2.4704e-06, |
|
"logits/chosen": -0.8565429449081421, |
|
"logits/rejected": -0.7360382080078125, |
|
"logps/chosen": -719.0, |
|
"logps/rejected": -725.5, |
|
"loss": 0.8099, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -47.375, |
|
"rewards/margins": 5.586718559265137, |
|
"rewards/rejected": -52.95624923706055, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 16.604222380353896, |
|
"learning_rate": 2.4064e-06, |
|
"logits/chosen": -0.877636730670929, |
|
"logits/rejected": -0.7131592035293579, |
|
"logps/chosen": -724.5, |
|
"logps/rejected": -733.5999755859375, |
|
"loss": 0.7523, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -47.5625, |
|
"rewards/margins": 5.414843559265137, |
|
"rewards/rejected": -53.01874923706055, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.8832, |
|
"grad_norm": 28.08075144528864, |
|
"learning_rate": 2.3424000000000002e-06, |
|
"logits/chosen": -0.810229480266571, |
|
"logits/rejected": -0.734570324420929, |
|
"logps/chosen": -728.5, |
|
"logps/rejected": -744.2999877929688, |
|
"loss": 0.836, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -48.40625, |
|
"rewards/margins": 5.391406059265137, |
|
"rewards/rejected": -53.806251525878906, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.8864, |
|
"grad_norm": 28.94506411278981, |
|
"learning_rate": 2.2784e-06, |
|
"logits/chosen": -0.716931164264679, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -733.5, |
|
"logps/rejected": -724.0999755859375, |
|
"loss": 1.5468, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -49.868751525878906, |
|
"rewards/margins": 3.221484422683716, |
|
"rewards/rejected": -53.087501525878906, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.8896, |
|
"grad_norm": 20.33472177289239, |
|
"learning_rate": 2.2144000000000003e-06, |
|
"logits/chosen": -0.8364013433456421, |
|
"logits/rejected": -0.6757232546806335, |
|
"logps/chosen": -726.0, |
|
"logps/rejected": -714.7999877929688, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -47.98125076293945, |
|
"rewards/margins": 5.720312595367432, |
|
"rewards/rejected": -53.70000076293945, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.8928, |
|
"grad_norm": 51.97128098464107, |
|
"learning_rate": 2.1504e-06, |
|
"logits/chosen": -0.863330066204071, |
|
"logits/rejected": -0.766650378704071, |
|
"logps/chosen": -735.7999877929688, |
|
"logps/rejected": -756.0999755859375, |
|
"loss": 0.9659, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -47.59375, |
|
"rewards/margins": 6.539843559265137, |
|
"rewards/rejected": -54.15625, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 34.837802174621544, |
|
"learning_rate": 2.0864e-06, |
|
"logits/chosen": -0.863842785358429, |
|
"logits/rejected": -0.722607433795929, |
|
"logps/chosen": -693.2999877929688, |
|
"logps/rejected": -702.0, |
|
"loss": 0.9458, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -45.92499923706055, |
|
"rewards/margins": 5.104296684265137, |
|
"rewards/rejected": -51.01874923706055, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8992, |
|
"grad_norm": 28.70089724997986, |
|
"learning_rate": 2.0224000000000003e-06, |
|
"logits/chosen": -0.869335949420929, |
|
"logits/rejected": -0.8036133050918579, |
|
"logps/chosen": -710.0, |
|
"logps/rejected": -715.2999877929688, |
|
"loss": 1.1216, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -46.3125, |
|
"rewards/margins": 4.509375095367432, |
|
"rewards/rejected": -50.818748474121094, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.9024, |
|
"grad_norm": 30.63117289172316, |
|
"learning_rate": 1.9584e-06, |
|
"logits/chosen": -0.7902587652206421, |
|
"logits/rejected": -0.666430652141571, |
|
"logps/chosen": -669.2999877929688, |
|
"logps/rejected": -677.5999755859375, |
|
"loss": 0.8344, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -45.5, |
|
"rewards/margins": 4.933203220367432, |
|
"rewards/rejected": -50.431251525878906, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.9056, |
|
"grad_norm": 25.68361016400013, |
|
"learning_rate": 1.8944e-06, |
|
"logits/chosen": -0.8807617425918579, |
|
"logits/rejected": -0.740283191204071, |
|
"logps/chosen": -713.5, |
|
"logps/rejected": -711.4000244140625, |
|
"loss": 0.7472, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -45.912498474121094, |
|
"rewards/margins": 5.207812309265137, |
|
"rewards/rejected": -51.14374923706055, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.9088, |
|
"grad_norm": 20.342553234409863, |
|
"learning_rate": 1.8304000000000003e-06, |
|
"logits/chosen": -0.875, |
|
"logits/rejected": -0.7489013671875, |
|
"logps/chosen": -730.7999877929688, |
|
"logps/rejected": -714.2999877929688, |
|
"loss": 0.8745, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -48.1875, |
|
"rewards/margins": 4.098437309265137, |
|
"rewards/rejected": -52.287498474121094, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 87.2935450396475, |
|
"learning_rate": 1.7664000000000001e-06, |
|
"logits/chosen": -0.772265613079071, |
|
"logits/rejected": -0.7514404058456421, |
|
"logps/chosen": -712.5, |
|
"logps/rejected": -727.0999755859375, |
|
"loss": 0.9705, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -48.03125, |
|
"rewards/margins": 4.150000095367432, |
|
"rewards/rejected": -52.162498474121094, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9152, |
|
"grad_norm": 32.63676345895593, |
|
"learning_rate": 1.7024000000000002e-06, |
|
"logits/chosen": -0.8949218988418579, |
|
"logits/rejected": -0.729443371295929, |
|
"logps/chosen": -708.0, |
|
"logps/rejected": -718.9000244140625, |
|
"loss": 0.9999, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -47.587501525878906, |
|
"rewards/margins": 4.604687690734863, |
|
"rewards/rejected": -52.212501525878906, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.9184, |
|
"grad_norm": 13.131566594805347, |
|
"learning_rate": 1.6384000000000003e-06, |
|
"logits/chosen": -0.839648425579071, |
|
"logits/rejected": -0.675048828125, |
|
"logps/chosen": -691.2000122070312, |
|
"logps/rejected": -698.5, |
|
"loss": 0.7785, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -46.23749923706055, |
|
"rewards/margins": 5.367968559265137, |
|
"rewards/rejected": -51.618751525878906, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.9216, |
|
"grad_norm": 20.210063813573157, |
|
"learning_rate": 1.5744000000000001e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.6630004644393921, |
|
"logps/chosen": -692.7999877929688, |
|
"logps/rejected": -704.0999755859375, |
|
"loss": 0.8411, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -46.131248474121094, |
|
"rewards/margins": 4.912109375, |
|
"rewards/rejected": -51.0625, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.9248, |
|
"grad_norm": 38.30773616311555, |
|
"learning_rate": 1.5104000000000002e-06, |
|
"logits/chosen": -0.826892077922821, |
|
"logits/rejected": -0.7580322027206421, |
|
"logps/chosen": -688.2000122070312, |
|
"logps/rejected": -697.5999755859375, |
|
"loss": 0.8446, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -46.21875, |
|
"rewards/margins": 4.414843559265137, |
|
"rewards/rejected": -50.60625076293945, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 40.22722342990997, |
|
"learning_rate": 1.4464e-06, |
|
"logits/chosen": -0.8775390386581421, |
|
"logits/rejected": -0.6669158935546875, |
|
"logps/chosen": -696.7999877929688, |
|
"logps/rejected": -687.7999877929688, |
|
"loss": 1.3252, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -47.087501525878906, |
|
"rewards/margins": 3.6832032203674316, |
|
"rewards/rejected": -50.756248474121094, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9312, |
|
"grad_norm": 31.041235936499106, |
|
"learning_rate": 1.3824e-06, |
|
"logits/chosen": -0.8633056879043579, |
|
"logits/rejected": -0.733020007610321, |
|
"logps/chosen": -704.2000122070312, |
|
"logps/rejected": -712.7999877929688, |
|
"loss": 1.1374, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -46.881248474121094, |
|
"rewards/margins": 3.9964842796325684, |
|
"rewards/rejected": -50.875, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.9344, |
|
"grad_norm": 28.493763023980495, |
|
"learning_rate": 1.3184000000000002e-06, |
|
"logits/chosen": -0.837451159954071, |
|
"logits/rejected": -0.7790282964706421, |
|
"logps/chosen": -711.4000244140625, |
|
"logps/rejected": -705.2999877929688, |
|
"loss": 0.9248, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -45.20624923706055, |
|
"rewards/margins": 5.015234470367432, |
|
"rewards/rejected": -50.224998474121094, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.9376, |
|
"grad_norm": 25.54508861275617, |
|
"learning_rate": 1.2544e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.6931396722793579, |
|
"logps/chosen": -704.9000244140625, |
|
"logps/rejected": -707.2999877929688, |
|
"loss": 1.0966, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -46.21875, |
|
"rewards/margins": 4.586328029632568, |
|
"rewards/rejected": -50.8125, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.9408, |
|
"grad_norm": 18.95103038138321, |
|
"learning_rate": 1.1904e-06, |
|
"logits/chosen": NaN, |
|
"logits/rejected": -0.6669677495956421, |
|
"logps/chosen": -701.0, |
|
"logps/rejected": -697.0, |
|
"loss": 0.9149, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -46.618751525878906, |
|
"rewards/margins": 4.646093845367432, |
|
"rewards/rejected": -51.23125076293945, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.944, |
|
"grad_norm": 24.581771907889188, |
|
"learning_rate": 1.1264000000000001e-06, |
|
"logits/chosen": -0.8424072265625, |
|
"logits/rejected": -0.653454601764679, |
|
"logps/chosen": -712.7999877929688, |
|
"logps/rejected": -708.9000244140625, |
|
"loss": 0.9627, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -47.006248474121094, |
|
"rewards/margins": 4.411718845367432, |
|
"rewards/rejected": -51.375, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.9472, |
|
"grad_norm": 36.59334250475912, |
|
"learning_rate": 1.0624000000000002e-06, |
|
"logits/chosen": -0.855224609375, |
|
"logits/rejected": -0.755810558795929, |
|
"logps/chosen": -726.0, |
|
"logps/rejected": -719.5999755859375, |
|
"loss": 1.1101, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -45.931251525878906, |
|
"rewards/margins": 4.267187595367432, |
|
"rewards/rejected": -50.193748474121094, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.9504, |
|
"grad_norm": 18.958201936576806, |
|
"learning_rate": 9.984e-07, |
|
"logits/chosen": -0.8471924066543579, |
|
"logits/rejected": -0.7294921875, |
|
"logps/chosen": -726.0, |
|
"logps/rejected": -720.2999877929688, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -45.693748474121094, |
|
"rewards/margins": 5.928124904632568, |
|
"rewards/rejected": -51.618751525878906, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.9536, |
|
"grad_norm": 16.88941090161605, |
|
"learning_rate": 9.344e-07, |
|
"logits/chosen": -0.8091796636581421, |
|
"logits/rejected": -0.6782592535018921, |
|
"logps/chosen": -692.7999877929688, |
|
"logps/rejected": -703.2999877929688, |
|
"loss": 0.9194, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -46.443748474121094, |
|
"rewards/margins": 5.051562309265137, |
|
"rewards/rejected": -51.48749923706055, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.9568, |
|
"grad_norm": 25.846567770988575, |
|
"learning_rate": 8.704000000000002e-07, |
|
"logits/chosen": -0.852001965045929, |
|
"logits/rejected": -0.621874988079071, |
|
"logps/chosen": -706.7999877929688, |
|
"logps/rejected": -693.0999755859375, |
|
"loss": 0.716, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -46.243751525878906, |
|
"rewards/margins": 4.926953315734863, |
|
"rewards/rejected": -51.181251525878906, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 35.33734147087596, |
|
"learning_rate": 8.064000000000001e-07, |
|
"logits/chosen": -0.842089831829071, |
|
"logits/rejected": -0.7021239995956421, |
|
"logps/chosen": -698.2000122070312, |
|
"logps/rejected": -703.5, |
|
"loss": 0.7254, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -45.931251525878906, |
|
"rewards/margins": 5.244531154632568, |
|
"rewards/rejected": -51.181251525878906, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9632, |
|
"grad_norm": 22.328108594927276, |
|
"learning_rate": 7.424000000000001e-07, |
|
"logits/chosen": -0.865039050579071, |
|
"logits/rejected": -0.7381988763809204, |
|
"logps/chosen": -714.9000244140625, |
|
"logps/rejected": -717.7000122070312, |
|
"loss": 0.8847, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -45.775001525878906, |
|
"rewards/margins": 5.114062309265137, |
|
"rewards/rejected": -50.88750076293945, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.9664, |
|
"grad_norm": 30.66790250732441, |
|
"learning_rate": 6.784e-07, |
|
"logits/chosen": -0.8562256097793579, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -698.0, |
|
"logps/rejected": -707.4000244140625, |
|
"loss": 0.7132, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -44.63750076293945, |
|
"rewards/margins": 5.620312690734863, |
|
"rewards/rejected": -50.243751525878906, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.9696, |
|
"grad_norm": 32.290821615623756, |
|
"learning_rate": 6.144000000000001e-07, |
|
"logits/chosen": -0.859570324420929, |
|
"logits/rejected": -0.708325207233429, |
|
"logps/chosen": -718.4000244140625, |
|
"logps/rejected": -707.2000122070312, |
|
"loss": 0.76, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -45.787498474121094, |
|
"rewards/margins": 5.176953315734863, |
|
"rewards/rejected": -50.974998474121094, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.9728, |
|
"grad_norm": 36.98146302872118, |
|
"learning_rate": 5.504000000000001e-07, |
|
"logits/chosen": -0.817822277545929, |
|
"logits/rejected": -0.620288074016571, |
|
"logps/chosen": -697.5, |
|
"logps/rejected": -696.2000122070312, |
|
"loss": 0.7549, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -46.287498474121094, |
|
"rewards/margins": 5.23828125, |
|
"rewards/rejected": -51.53125, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.976, |
|
"grad_norm": 15.540019445487696, |
|
"learning_rate": 4.864e-07, |
|
"logits/chosen": -0.868945300579071, |
|
"logits/rejected": -0.7304443120956421, |
|
"logps/chosen": -696.0, |
|
"logps/rejected": -714.7000122070312, |
|
"loss": 0.9413, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -45.98749923706055, |
|
"rewards/margins": 4.785937309265137, |
|
"rewards/rejected": -50.76874923706055, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.9792, |
|
"grad_norm": 24.566892342952926, |
|
"learning_rate": 4.224e-07, |
|
"logits/chosen": -0.847485363483429, |
|
"logits/rejected": -0.6754394769668579, |
|
"logps/chosen": -712.2999877929688, |
|
"logps/rejected": -718.7999877929688, |
|
"loss": 0.8152, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -46.16875076293945, |
|
"rewards/margins": 5.44921875, |
|
"rewards/rejected": -51.599998474121094, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.9824, |
|
"grad_norm": 16.268903048624235, |
|
"learning_rate": 3.584e-07, |
|
"logits/chosen": -0.846875011920929, |
|
"logits/rejected": -0.7189697027206421, |
|
"logps/chosen": -696.4000244140625, |
|
"logps/rejected": -700.9000244140625, |
|
"loss": 0.7778, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -46.150001525878906, |
|
"rewards/margins": 4.963281154632568, |
|
"rewards/rejected": -51.125, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.9856, |
|
"grad_norm": 45.08062165104853, |
|
"learning_rate": 2.9440000000000004e-07, |
|
"logits/chosen": -0.8548828363418579, |
|
"logits/rejected": -0.72186279296875, |
|
"logps/chosen": -702.2999877929688, |
|
"logps/rejected": -699.0999755859375, |
|
"loss": 0.9371, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -46.368751525878906, |
|
"rewards/margins": 4.705078125, |
|
"rewards/rejected": -51.08124923706055, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.9888, |
|
"grad_norm": 23.350810243354506, |
|
"learning_rate": 2.3040000000000002e-07, |
|
"logits/chosen": -0.851611316204071, |
|
"logits/rejected": -0.7162841558456421, |
|
"logps/chosen": -696.2000122070312, |
|
"logps/rejected": -700.5, |
|
"loss": 0.8579, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -46.67499923706055, |
|
"rewards/margins": 4.52734375, |
|
"rewards/rejected": -51.224998474121094, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 25.578366828611138, |
|
"learning_rate": 1.664e-07, |
|
"logits/chosen": -0.8206542730331421, |
|
"logits/rejected": -0.73602294921875, |
|
"logps/chosen": -711.2000122070312, |
|
"logps/rejected": -714.0, |
|
"loss": 0.8973, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -46.54375076293945, |
|
"rewards/margins": 4.841406345367432, |
|
"rewards/rejected": -51.381248474121094, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9952, |
|
"grad_norm": 24.117757080722892, |
|
"learning_rate": 1.0240000000000002e-07, |
|
"logits/chosen": -0.8827148675918579, |
|
"logits/rejected": -0.6673828363418579, |
|
"logps/chosen": -702.7000122070312, |
|
"logps/rejected": -701.7999877929688, |
|
"loss": 0.7748, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -46.212501525878906, |
|
"rewards/margins": 4.952343940734863, |
|
"rewards/rejected": -51.14374923706055, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"grad_norm": 22.68071342499608, |
|
"learning_rate": 3.8400000000000006e-08, |
|
"logits/chosen": -0.818408191204071, |
|
"logits/rejected": -0.71337890625, |
|
"logps/chosen": -691.7000122070312, |
|
"logps/rejected": -703.0999755859375, |
|
"loss": 0.8272, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -46.16875076293945, |
|
"rewards/margins": 5.1015625, |
|
"rewards/rejected": -51.275001525878906, |
|
"step": 3120 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3125, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|