|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999657651489216, |
|
"eval_steps": 100, |
|
"global_step": 1460, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.424657534246575e-09, |
|
"logits/chosen": -1.796067237854004, |
|
"logits/rejected": -1.6250377893447876, |
|
"logps/chosen": -84.08734130859375, |
|
"logps/rejected": -66.90229797363281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.424657534246575e-08, |
|
"logits/chosen": -1.7872660160064697, |
|
"logits/rejected": -1.5217690467834473, |
|
"logps/chosen": -91.57577514648438, |
|
"logps/rejected": -78.510498046875, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": 0.00252585974521935, |
|
"rewards/margins": 0.003410403151065111, |
|
"rewards/rejected": -0.0008845434640534222, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.84931506849315e-08, |
|
"logits/chosen": -1.9197826385498047, |
|
"logits/rejected": -1.6265367269515991, |
|
"logps/chosen": -96.18563079833984, |
|
"logps/rejected": -73.0329818725586, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.4333333373069763, |
|
"rewards/chosen": -0.0032726190984249115, |
|
"rewards/margins": -0.004598576575517654, |
|
"rewards/rejected": 0.0013259568950161338, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0273972602739725e-07, |
|
"logits/chosen": -1.8969062566757202, |
|
"logits/rejected": -1.5750768184661865, |
|
"logps/chosen": -96.1051254272461, |
|
"logps/rejected": -74.69762420654297, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": -0.0011650085216388106, |
|
"rewards/margins": 0.001948213903233409, |
|
"rewards/rejected": -0.0031132223084568977, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.36986301369863e-07, |
|
"logits/chosen": -1.9271103143692017, |
|
"logits/rejected": -1.6277456283569336, |
|
"logps/chosen": -96.0181884765625, |
|
"logps/rejected": -79.47406005859375, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5166666507720947, |
|
"rewards/chosen": -0.001698245876468718, |
|
"rewards/margins": -0.0010797118302434683, |
|
"rewards/rejected": -0.0006185341626405716, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7123287671232875e-07, |
|
"logits/chosen": -1.9380648136138916, |
|
"logits/rejected": -1.6861069202423096, |
|
"logps/chosen": -93.2973861694336, |
|
"logps/rejected": -76.30517578125, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.34166663885116577, |
|
"rewards/chosen": -0.0030570379458367825, |
|
"rewards/margins": -0.005480821710079908, |
|
"rewards/rejected": 0.002423783764243126, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.054794520547945e-07, |
|
"logits/chosen": -1.832024335861206, |
|
"logits/rejected": -1.511380910873413, |
|
"logps/chosen": -98.84480285644531, |
|
"logps/rejected": -75.86248779296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4500000476837158, |
|
"rewards/chosen": -0.003249814035370946, |
|
"rewards/margins": -0.00364103470928967, |
|
"rewards/rejected": 0.00039122122689150274, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3972602739726023e-07, |
|
"logits/chosen": -1.9178974628448486, |
|
"logits/rejected": -1.6177875995635986, |
|
"logps/chosen": -92.13301086425781, |
|
"logps/rejected": -75.75408172607422, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 0.0013952379813417792, |
|
"rewards/margins": 0.0021971219684928656, |
|
"rewards/rejected": -0.0008018844528123736, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.73972602739726e-07, |
|
"logits/chosen": -1.938677191734314, |
|
"logits/rejected": -1.6598854064941406, |
|
"logps/chosen": -88.68348693847656, |
|
"logps/rejected": -74.39127349853516, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5333333015441895, |
|
"rewards/chosen": 0.0035478367935866117, |
|
"rewards/margins": 0.003417719155550003, |
|
"rewards/rejected": 0.00013011766714043915, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0821917808219176e-07, |
|
"logits/chosen": -1.832851767539978, |
|
"logits/rejected": -1.5398364067077637, |
|
"logps/chosen": -92.82958984375, |
|
"logps/rejected": -73.15336608886719, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 0.00075482705142349, |
|
"rewards/margins": 0.0024536100681871176, |
|
"rewards/rejected": -0.001698783366009593, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.424657534246575e-07, |
|
"logits/chosen": -1.8959871530532837, |
|
"logits/rejected": -1.6301053762435913, |
|
"logps/chosen": -92.27674865722656, |
|
"logps/rejected": -74.60256958007812, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0006887881900183856, |
|
"rewards/margins": 0.0005900462856516242, |
|
"rewards/rejected": 9.874170791590586e-05, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_logits/chosen": -2.0524938106536865, |
|
"eval_logits/rejected": -1.7860654592514038, |
|
"eval_logps/chosen": -91.99323272705078, |
|
"eval_logps/rejected": -72.10526275634766, |
|
"eval_loss": 0.693277895450592, |
|
"eval_rewards/accuracies": 0.4888888895511627, |
|
"eval_rewards/chosen": -0.0007517762714996934, |
|
"eval_rewards/margins": -0.00027055441751144826, |
|
"eval_rewards/rejected": -0.00048122191219590604, |
|
"eval_runtime": 117.2952, |
|
"eval_samples_per_second": 24.4, |
|
"eval_steps_per_second": 0.767, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.767123287671233e-07, |
|
"logits/chosen": -1.947257399559021, |
|
"logits/rejected": -1.6791489124298096, |
|
"logps/chosen": -93.37996673583984, |
|
"logps/rejected": -72.86904907226562, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.5083333849906921, |
|
"rewards/chosen": -0.002063233172520995, |
|
"rewards/margins": 0.0007919662748463452, |
|
"rewards/rejected": -0.0028551991563290358, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.10958904109589e-07, |
|
"logits/chosen": -1.8895361423492432, |
|
"logits/rejected": -1.587282419204712, |
|
"logps/chosen": -93.67366027832031, |
|
"logps/rejected": -72.37762451171875, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.491666704416275, |
|
"rewards/chosen": 0.0028340499848127365, |
|
"rewards/margins": 0.0019404724007472396, |
|
"rewards/rejected": 0.000893576827365905, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4520547945205477e-07, |
|
"logits/chosen": -1.9517349004745483, |
|
"logits/rejected": -1.663013219833374, |
|
"logps/chosen": -84.09037780761719, |
|
"logps/rejected": -72.67787170410156, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.46666663885116577, |
|
"rewards/chosen": 0.0004304441681597382, |
|
"rewards/margins": -0.0008509824983775616, |
|
"rewards/rejected": 0.001281426870264113, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.794520547945205e-07, |
|
"logits/chosen": -1.947576880455017, |
|
"logits/rejected": -1.722400426864624, |
|
"logps/chosen": -89.95055389404297, |
|
"logps/rejected": -76.27583312988281, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.004270606208592653, |
|
"rewards/margins": 0.009159665554761887, |
|
"rewards/rejected": -0.004889058880507946, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.984779299847793e-07, |
|
"logits/chosen": -1.9852988719940186, |
|
"logits/rejected": -1.6872488260269165, |
|
"logps/chosen": -90.7852783203125, |
|
"logps/rejected": -73.9708251953125, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0020091754850000143, |
|
"rewards/margins": 0.0034632813185453415, |
|
"rewards/rejected": -0.0014541053678840399, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.946727549467275e-07, |
|
"logits/chosen": -1.9156850576400757, |
|
"logits/rejected": -1.5840113162994385, |
|
"logps/chosen": -97.8337173461914, |
|
"logps/rejected": -73.20053100585938, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0025704619474709034, |
|
"rewards/margins": 0.002448607701808214, |
|
"rewards/rejected": 0.00012185415107524022, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.908675799086758e-07, |
|
"logits/chosen": -1.803034782409668, |
|
"logits/rejected": -1.4945720434188843, |
|
"logps/chosen": -96.1871337890625, |
|
"logps/rejected": -72.80181121826172, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": 0.003732017008587718, |
|
"rewards/margins": 0.003292496781796217, |
|
"rewards/rejected": 0.0004395198484417051, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.87062404870624e-07, |
|
"logits/chosen": -1.8866764307022095, |
|
"logits/rejected": -1.5327541828155518, |
|
"logps/chosen": -97.74296569824219, |
|
"logps/rejected": -76.4982681274414, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5916666984558105, |
|
"rewards/chosen": 0.004220059607177973, |
|
"rewards/margins": 0.006730073597282171, |
|
"rewards/rejected": -0.002510013757273555, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.832572298325722e-07, |
|
"logits/chosen": -1.945041298866272, |
|
"logits/rejected": -1.6189870834350586, |
|
"logps/chosen": -94.37440490722656, |
|
"logps/rejected": -73.1822738647461, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.004208459984511137, |
|
"rewards/margins": 0.005117190536111593, |
|
"rewards/rejected": -0.0009087308426387608, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.794520547945205e-07, |
|
"logits/chosen": -1.9501270055770874, |
|
"logits/rejected": -1.6498991250991821, |
|
"logps/chosen": -93.06495666503906, |
|
"logps/rejected": -72.0920181274414, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0078277587890625, |
|
"rewards/margins": 0.010002164170145988, |
|
"rewards/rejected": -0.0021744065452367067, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_logits/chosen": -2.052419900894165, |
|
"eval_logits/rejected": -1.7859160900115967, |
|
"eval_logps/chosen": -91.95441436767578, |
|
"eval_logps/rejected": -72.11531066894531, |
|
"eval_loss": 0.6901015043258667, |
|
"eval_rewards/accuracies": 0.5611110925674438, |
|
"eval_rewards/chosen": 0.0031298992689698935, |
|
"eval_rewards/margins": 0.004615597892552614, |
|
"eval_rewards/rejected": -0.0014856986235827208, |
|
"eval_runtime": 117.9228, |
|
"eval_samples_per_second": 24.27, |
|
"eval_steps_per_second": 0.763, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.756468797564688e-07, |
|
"logits/chosen": -1.9345300197601318, |
|
"logits/rejected": -1.6839030981063843, |
|
"logps/chosen": -92.93646240234375, |
|
"logps/rejected": -75.16416931152344, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": 0.0068414295092225075, |
|
"rewards/margins": 0.005408720578998327, |
|
"rewards/rejected": 0.001432707766070962, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.71841704718417e-07, |
|
"logits/chosen": -1.9018728733062744, |
|
"logits/rejected": -1.599656343460083, |
|
"logps/chosen": -94.06185913085938, |
|
"logps/rejected": -72.92243957519531, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.00332554685883224, |
|
"rewards/margins": 0.007436770014464855, |
|
"rewards/rejected": -0.004111223388463259, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.680365296803653e-07, |
|
"logits/chosen": -1.879314661026001, |
|
"logits/rejected": -1.5864207744598389, |
|
"logps/chosen": -93.1510238647461, |
|
"logps/rejected": -74.74366760253906, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007647272199392319, |
|
"rewards/margins": 0.00648108497262001, |
|
"rewards/rejected": 0.0011661878088489175, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.642313546423135e-07, |
|
"logits/chosen": -1.975441336631775, |
|
"logits/rejected": -1.7306878566741943, |
|
"logps/chosen": -86.38069915771484, |
|
"logps/rejected": -75.68878173828125, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.5749999284744263, |
|
"rewards/chosen": 0.007093862630426884, |
|
"rewards/margins": 0.008020764216780663, |
|
"rewards/rejected": -0.0009269017027691007, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.604261796042618e-07, |
|
"logits/chosen": -1.843636155128479, |
|
"logits/rejected": -1.5651946067810059, |
|
"logps/chosen": -92.92024230957031, |
|
"logps/rejected": -75.78328704833984, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": 0.007120449095964432, |
|
"rewards/margins": 0.011716886423528194, |
|
"rewards/rejected": -0.004596438258886337, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5662100456621e-07, |
|
"logits/chosen": -1.8188960552215576, |
|
"logits/rejected": -1.5440260171890259, |
|
"logps/chosen": -91.01820373535156, |
|
"logps/rejected": -72.49656677246094, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6166666150093079, |
|
"rewards/chosen": 0.007922597229480743, |
|
"rewards/margins": 0.010534586384892464, |
|
"rewards/rejected": -0.002611987991258502, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.528158295281583e-07, |
|
"logits/chosen": -1.931947946548462, |
|
"logits/rejected": -1.6909589767456055, |
|
"logps/chosen": -88.5571517944336, |
|
"logps/rejected": -72.00991821289062, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.5999999642372131, |
|
"rewards/chosen": 0.005753463599830866, |
|
"rewards/margins": 0.010802066884934902, |
|
"rewards/rejected": -0.005048603750765324, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.490106544901065e-07, |
|
"logits/chosen": -1.9274402856826782, |
|
"logits/rejected": -1.6220242977142334, |
|
"logps/chosen": -91.9419174194336, |
|
"logps/rejected": -77.3694076538086, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.7083333134651184, |
|
"rewards/chosen": 0.014447471126914024, |
|
"rewards/margins": 0.018269026651978493, |
|
"rewards/rejected": -0.0038215541280806065, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4520547945205477e-07, |
|
"logits/chosen": -1.8854271173477173, |
|
"logits/rejected": -1.6241188049316406, |
|
"logps/chosen": -92.77326965332031, |
|
"logps/rejected": -77.81119537353516, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.64166659116745, |
|
"rewards/chosen": 0.013706192374229431, |
|
"rewards/margins": 0.015467122197151184, |
|
"rewards/rejected": -0.0017609309870749712, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.41400304414003e-07, |
|
"logits/chosen": -1.8017442226409912, |
|
"logits/rejected": -1.5209267139434814, |
|
"logps/chosen": -90.86246490478516, |
|
"logps/rejected": -72.70795440673828, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.013066952116787434, |
|
"rewards/margins": 0.018115142360329628, |
|
"rewards/rejected": -0.00504818931221962, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.051283121109009, |
|
"eval_logits/rejected": -1.7846735715866089, |
|
"eval_logps/chosen": -91.84673309326172, |
|
"eval_logps/rejected": -72.15672302246094, |
|
"eval_loss": 0.6832027435302734, |
|
"eval_rewards/accuracies": 0.6916666626930237, |
|
"eval_rewards/chosen": 0.013898174278438091, |
|
"eval_rewards/margins": 0.01952529139816761, |
|
"eval_rewards/rejected": -0.005627114325761795, |
|
"eval_runtime": 116.9802, |
|
"eval_samples_per_second": 24.466, |
|
"eval_steps_per_second": 0.769, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.375951293759513e-07, |
|
"logits/chosen": -1.8816429376602173, |
|
"logits/rejected": -1.5877922773361206, |
|
"logps/chosen": -92.76510620117188, |
|
"logps/rejected": -77.3439712524414, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.7166666388511658, |
|
"rewards/chosen": 0.016739103943109512, |
|
"rewards/margins": 0.020335419103503227, |
|
"rewards/rejected": -0.003596315626055002, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.337899543378995e-07, |
|
"logits/chosen": -1.8938591480255127, |
|
"logits/rejected": -1.6317039728164673, |
|
"logps/chosen": -93.17677307128906, |
|
"logps/rejected": -74.86593627929688, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.01637251302599907, |
|
"rewards/margins": 0.019245153293013573, |
|
"rewards/rejected": -0.0028726388700306416, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2998477929984777e-07, |
|
"logits/chosen": -1.8730627298355103, |
|
"logits/rejected": -1.6184980869293213, |
|
"logps/chosen": -91.06995391845703, |
|
"logps/rejected": -72.5287857055664, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": 0.01837759278714657, |
|
"rewards/margins": 0.02310130000114441, |
|
"rewards/rejected": -0.004723704420030117, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.26179604261796e-07, |
|
"logits/chosen": -1.8828544616699219, |
|
"logits/rejected": -1.576556921005249, |
|
"logps/chosen": -92.8334732055664, |
|
"logps/rejected": -74.53410339355469, |
|
"loss": 0.6788, |
|
"rewards/accuracies": 0.8166666030883789, |
|
"rewards/chosen": 0.023710301145911217, |
|
"rewards/margins": 0.03184016793966293, |
|
"rewards/rejected": -0.008129866793751717, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.223744292237443e-07, |
|
"logits/chosen": -1.9196786880493164, |
|
"logits/rejected": -1.625704050064087, |
|
"logps/chosen": -90.74723815917969, |
|
"logps/rejected": -76.68948364257812, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.8083333969116211, |
|
"rewards/chosen": 0.026256781071424484, |
|
"rewards/margins": 0.03269972652196884, |
|
"rewards/rejected": -0.00644295010715723, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.185692541856925e-07, |
|
"logits/chosen": -1.981180191040039, |
|
"logits/rejected": -1.6961545944213867, |
|
"logps/chosen": -93.02009582519531, |
|
"logps/rejected": -76.1031723022461, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.03220153599977493, |
|
"rewards/margins": 0.03597740828990936, |
|
"rewards/rejected": -0.0037758699618279934, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1476407914764077e-07, |
|
"logits/chosen": -1.8603204488754272, |
|
"logits/rejected": -1.5979934930801392, |
|
"logps/chosen": -94.58716583251953, |
|
"logps/rejected": -75.44288635253906, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.841666579246521, |
|
"rewards/chosen": 0.021503183990716934, |
|
"rewards/margins": 0.03375691920518875, |
|
"rewards/rejected": -0.012253734283149242, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.10958904109589e-07, |
|
"logits/chosen": -1.9121630191802979, |
|
"logits/rejected": -1.613445520401001, |
|
"logps/chosen": -93.34215545654297, |
|
"logps/rejected": -74.89527893066406, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.02675667405128479, |
|
"rewards/margins": 0.03688011318445206, |
|
"rewards/rejected": -0.010123440995812416, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.071537290715373e-07, |
|
"logits/chosen": -1.9191780090332031, |
|
"logits/rejected": -1.532434105873108, |
|
"logps/chosen": -94.9774169921875, |
|
"logps/rejected": -73.49574279785156, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.8666666150093079, |
|
"rewards/chosen": 0.03616553544998169, |
|
"rewards/margins": 0.048189498484134674, |
|
"rewards/rejected": -0.012023964896798134, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.033485540334855e-07, |
|
"logits/chosen": -1.8837106227874756, |
|
"logits/rejected": -1.6179090738296509, |
|
"logps/chosen": -93.18563079833984, |
|
"logps/rejected": -75.48190307617188, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": 0.03295673802495003, |
|
"rewards/margins": 0.044657547026872635, |
|
"rewards/rejected": -0.011700802482664585, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_logits/chosen": -2.0504438877105713, |
|
"eval_logits/rejected": -1.78355073928833, |
|
"eval_logps/chosen": -91.70490264892578, |
|
"eval_logps/rejected": -72.23121643066406, |
|
"eval_loss": 0.671801745891571, |
|
"eval_rewards/accuracies": 0.824999988079071, |
|
"eval_rewards/chosen": 0.028079798445105553, |
|
"eval_rewards/margins": 0.041155941784381866, |
|
"eval_rewards/rejected": -0.013076143339276314, |
|
"eval_runtime": 122.6508, |
|
"eval_samples_per_second": 23.335, |
|
"eval_steps_per_second": 0.734, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.9954337899543377e-07, |
|
"logits/chosen": -1.9980239868164062, |
|
"logits/rejected": -1.710172414779663, |
|
"logps/chosen": -90.13986206054688, |
|
"logps/rejected": -74.13956451416016, |
|
"loss": 0.6726, |
|
"rewards/accuracies": 0.7833333015441895, |
|
"rewards/chosen": 0.026057172566652298, |
|
"rewards/margins": 0.03604119271039963, |
|
"rewards/rejected": -0.00998402014374733, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.95738203957382e-07, |
|
"logits/chosen": -1.9292805194854736, |
|
"logits/rejected": -1.6599111557006836, |
|
"logps/chosen": -89.1054458618164, |
|
"logps/rejected": -76.2321548461914, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.03600526601076126, |
|
"rewards/margins": 0.04854360967874527, |
|
"rewards/rejected": -0.012538343667984009, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.919330289193303e-07, |
|
"logits/chosen": -1.8889102935791016, |
|
"logits/rejected": -1.5878620147705078, |
|
"logps/chosen": -94.20903015136719, |
|
"logps/rejected": -75.64437103271484, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": 0.04072072356939316, |
|
"rewards/margins": 0.05353847146034241, |
|
"rewards/rejected": -0.012817745096981525, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.881278538812785e-07, |
|
"logits/chosen": -1.889439344406128, |
|
"logits/rejected": -1.5668418407440186, |
|
"logps/chosen": -92.68305206298828, |
|
"logps/rejected": -71.53545379638672, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.9083333015441895, |
|
"rewards/chosen": 0.042834434658288956, |
|
"rewards/margins": 0.05957134813070297, |
|
"rewards/rejected": -0.016736917197704315, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8432267884322677e-07, |
|
"logits/chosen": -1.8060448169708252, |
|
"logits/rejected": -1.5509716272354126, |
|
"logps/chosen": -88.83539581298828, |
|
"logps/rejected": -71.70087432861328, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.8416666984558105, |
|
"rewards/chosen": 0.04349132627248764, |
|
"rewards/margins": 0.05466403439640999, |
|
"rewards/rejected": -0.011172705329954624, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.80517503805175e-07, |
|
"logits/chosen": -1.8319886922836304, |
|
"logits/rejected": -1.5673277378082275, |
|
"logps/chosen": -88.6297378540039, |
|
"logps/rejected": -74.78864288330078, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.9083333015441895, |
|
"rewards/chosen": 0.044696420431137085, |
|
"rewards/margins": 0.0604400709271431, |
|
"rewards/rejected": -0.015743646770715714, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.767123287671233e-07, |
|
"logits/chosen": -1.9542433023452759, |
|
"logits/rejected": -1.641196608543396, |
|
"logps/chosen": -99.97395324707031, |
|
"logps/rejected": -78.014404296875, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.047287195920944214, |
|
"rewards/margins": 0.06728993356227875, |
|
"rewards/rejected": -0.020002741366624832, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.729071537290715e-07, |
|
"logits/chosen": -1.916868805885315, |
|
"logits/rejected": -1.6233441829681396, |
|
"logps/chosen": -97.3001937866211, |
|
"logps/rejected": -76.58578491210938, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.9083333015441895, |
|
"rewards/chosen": 0.05124374479055405, |
|
"rewards/margins": 0.07088983058929443, |
|
"rewards/rejected": -0.019646091386675835, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6910197869101977e-07, |
|
"logits/chosen": -1.9672422409057617, |
|
"logits/rejected": -1.700531244277954, |
|
"logps/chosen": -89.29413604736328, |
|
"logps/rejected": -72.65511322021484, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.9166666269302368, |
|
"rewards/chosen": 0.050498634576797485, |
|
"rewards/margins": 0.06749961525201797, |
|
"rewards/rejected": -0.01700098067522049, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.65296803652968e-07, |
|
"logits/chosen": -1.833929419517517, |
|
"logits/rejected": -1.5389481782913208, |
|
"logps/chosen": -85.8098373413086, |
|
"logps/rejected": -72.5254135131836, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.05660303682088852, |
|
"rewards/margins": 0.0805855318903923, |
|
"rewards/rejected": -0.023982489481568336, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -2.0494003295898438, |
|
"eval_logits/rejected": -1.7820732593536377, |
|
"eval_logps/chosen": -91.48755645751953, |
|
"eval_logps/rejected": -72.31159973144531, |
|
"eval_loss": 0.6574758887290955, |
|
"eval_rewards/accuracies": 0.8861111402511597, |
|
"eval_rewards/chosen": 0.049815867096185684, |
|
"eval_rewards/margins": 0.0709303691983223, |
|
"eval_rewards/rejected": -0.021114489063620567, |
|
"eval_runtime": 126.0719, |
|
"eval_samples_per_second": 22.701, |
|
"eval_steps_per_second": 0.714, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.614916286149163e-07, |
|
"logits/chosen": -1.913190484046936, |
|
"logits/rejected": -1.6543632745742798, |
|
"logps/chosen": -89.34779357910156, |
|
"logps/rejected": -71.7219009399414, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.8916667103767395, |
|
"rewards/chosen": 0.05136920139193535, |
|
"rewards/margins": 0.07187855988740921, |
|
"rewards/rejected": -0.02050935849547386, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.576864535768645e-07, |
|
"logits/chosen": -1.8918750286102295, |
|
"logits/rejected": -1.621498465538025, |
|
"logps/chosen": -92.99454498291016, |
|
"logps/rejected": -72.34910583496094, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.841666579246521, |
|
"rewards/chosen": 0.05106909200549126, |
|
"rewards/margins": 0.0683208703994751, |
|
"rewards/rejected": -0.01725177839398384, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5388127853881277e-07, |
|
"logits/chosen": -1.8910505771636963, |
|
"logits/rejected": -1.6094516515731812, |
|
"logps/chosen": -92.9715347290039, |
|
"logps/rejected": -75.84952545166016, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.05335830897092819, |
|
"rewards/margins": 0.07476408034563065, |
|
"rewards/rejected": -0.021405773237347603, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.50076103500761e-07, |
|
"logits/chosen": -1.8273894786834717, |
|
"logits/rejected": -1.5480557680130005, |
|
"logps/chosen": -93.09770202636719, |
|
"logps/rejected": -77.72835540771484, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": 0.06002092361450195, |
|
"rewards/margins": 0.08498416841030121, |
|
"rewards/rejected": -0.02496323548257351, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.462709284627093e-07, |
|
"logits/chosen": -1.7952553033828735, |
|
"logits/rejected": -1.5454210042953491, |
|
"logps/chosen": -92.24395751953125, |
|
"logps/rejected": -74.6566390991211, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": 0.06232692673802376, |
|
"rewards/margins": 0.08038316667079926, |
|
"rewards/rejected": -0.018056249246001244, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.424657534246575e-07, |
|
"logits/chosen": -1.8805965185165405, |
|
"logits/rejected": -1.586458444595337, |
|
"logps/chosen": -96.27812194824219, |
|
"logps/rejected": -75.61478424072266, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.9083333015441895, |
|
"rewards/chosen": 0.06480798870325089, |
|
"rewards/margins": 0.09341150522232056, |
|
"rewards/rejected": -0.02860351838171482, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3866057838660576e-07, |
|
"logits/chosen": -1.8029924631118774, |
|
"logits/rejected": -1.5792688131332397, |
|
"logps/chosen": -87.8210678100586, |
|
"logps/rejected": -74.4839096069336, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.8749998807907104, |
|
"rewards/chosen": 0.05512385442852974, |
|
"rewards/margins": 0.07892224937677383, |
|
"rewards/rejected": -0.023798387497663498, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.34855403348554e-07, |
|
"logits/chosen": -1.913487434387207, |
|
"logits/rejected": -1.612151861190796, |
|
"logps/chosen": -94.51315307617188, |
|
"logps/rejected": -74.9591293334961, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": 0.0840396136045456, |
|
"rewards/margins": 0.107123002409935, |
|
"rewards/rejected": -0.023083383217453957, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.310502283105023e-07, |
|
"logits/chosen": -1.9018150568008423, |
|
"logits/rejected": -1.6232668161392212, |
|
"logps/chosen": -93.73323822021484, |
|
"logps/rejected": -75.38792419433594, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.8666666150093079, |
|
"rewards/chosen": 0.07307516038417816, |
|
"rewards/margins": 0.10251389443874359, |
|
"rewards/rejected": -0.029438745230436325, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.272450532724505e-07, |
|
"logits/chosen": -1.8873398303985596, |
|
"logits/rejected": -1.6442911624908447, |
|
"logps/chosen": -90.5989761352539, |
|
"logps/rejected": -77.07279968261719, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 0.08184785395860672, |
|
"rewards/margins": 0.11672016233205795, |
|
"rewards/rejected": -0.034872300922870636, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/chosen": -2.048579692840576, |
|
"eval_logits/rejected": -1.780737280845642, |
|
"eval_logps/chosen": -91.28104400634766, |
|
"eval_logps/rejected": -72.44007110595703, |
|
"eval_loss": 0.6415905356407166, |
|
"eval_rewards/accuracies": 0.9111111164093018, |
|
"eval_rewards/chosen": 0.07046664506196976, |
|
"eval_rewards/margins": 0.10442798584699631, |
|
"eval_rewards/rejected": -0.03396133333444595, |
|
"eval_runtime": 117.9826, |
|
"eval_samples_per_second": 24.258, |
|
"eval_steps_per_second": 0.763, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2343987823439876e-07, |
|
"logits/chosen": -1.9880199432373047, |
|
"logits/rejected": -1.676412582397461, |
|
"logps/chosen": -91.00142669677734, |
|
"logps/rejected": -73.52288818359375, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.07217723876237869, |
|
"rewards/margins": 0.1089334487915039, |
|
"rewards/rejected": -0.036756210029125214, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.19634703196347e-07, |
|
"logits/chosen": -1.8773486614227295, |
|
"logits/rejected": -1.573994755744934, |
|
"logps/chosen": -89.79063415527344, |
|
"logps/rejected": -73.29415130615234, |
|
"loss": 0.6391, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.06451607495546341, |
|
"rewards/margins": 0.09732060134410858, |
|
"rewards/rejected": -0.032804541289806366, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.158295281582953e-07, |
|
"logits/chosen": -1.8495439291000366, |
|
"logits/rejected": -1.5227216482162476, |
|
"logps/chosen": -93.12200164794922, |
|
"logps/rejected": -74.12937927246094, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.08449102938175201, |
|
"rewards/margins": 0.11922381818294525, |
|
"rewards/rejected": -0.034732796251773834, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.120243531202435e-07, |
|
"logits/chosen": -1.93155837059021, |
|
"logits/rejected": -1.6498839855194092, |
|
"logps/chosen": -94.86695861816406, |
|
"logps/rejected": -76.8067626953125, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": 0.08870759606361389, |
|
"rewards/margins": 0.12238001823425293, |
|
"rewards/rejected": -0.03367242217063904, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0821917808219176e-07, |
|
"logits/chosen": -1.8748143911361694, |
|
"logits/rejected": -1.6098239421844482, |
|
"logps/chosen": -91.36283874511719, |
|
"logps/rejected": -73.6336669921875, |
|
"loss": 0.6328, |
|
"rewards/accuracies": 0.9083333015441895, |
|
"rewards/chosen": 0.0779472142457962, |
|
"rewards/margins": 0.1164277195930481, |
|
"rewards/rejected": -0.038480497896671295, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0441400304414e-07, |
|
"logits/chosen": -1.8293393850326538, |
|
"logits/rejected": -1.4808156490325928, |
|
"logps/chosen": -94.16930389404297, |
|
"logps/rejected": -72.66043853759766, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.9166666269302368, |
|
"rewards/chosen": 0.09546177834272385, |
|
"rewards/margins": 0.1290528029203415, |
|
"rewards/rejected": -0.033591024577617645, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.006088280060883e-07, |
|
"logits/chosen": -1.8831846714019775, |
|
"logits/rejected": -1.577532172203064, |
|
"logps/chosen": -95.71855926513672, |
|
"logps/rejected": -74.64360046386719, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.09222020953893661, |
|
"rewards/margins": 0.1298314929008484, |
|
"rewards/rejected": -0.037611283361911774, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.968036529680365e-07, |
|
"logits/chosen": -1.884874701499939, |
|
"logits/rejected": -1.56434965133667, |
|
"logps/chosen": -91.71931457519531, |
|
"logps/rejected": -74.07929992675781, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.09030507504940033, |
|
"rewards/margins": 0.13949953019618988, |
|
"rewards/rejected": -0.04919447377324104, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9299847792998476e-07, |
|
"logits/chosen": -1.939512848854065, |
|
"logits/rejected": -1.6418523788452148, |
|
"logps/chosen": -88.81291198730469, |
|
"logps/rejected": -71.72727966308594, |
|
"loss": 0.6255, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.1102442592382431, |
|
"rewards/margins": 0.14661459624767303, |
|
"rewards/rejected": -0.036370351910591125, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.89193302891933e-07, |
|
"logits/chosen": -1.8088592290878296, |
|
"logits/rejected": -1.554652452468872, |
|
"logps/chosen": -88.36043548583984, |
|
"logps/rejected": -74.16236877441406, |
|
"loss": 0.6261, |
|
"rewards/accuracies": 0.9416666030883789, |
|
"rewards/chosen": 0.09904900938272476, |
|
"rewards/margins": 0.13188037276268005, |
|
"rewards/rejected": -0.032831382006406784, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_logits/chosen": -2.0478439331054688, |
|
"eval_logits/rejected": -1.7795759439468384, |
|
"eval_logps/chosen": -91.1009750366211, |
|
"eval_logps/rejected": -72.53546905517578, |
|
"eval_loss": 0.6277271509170532, |
|
"eval_rewards/accuracies": 0.925000011920929, |
|
"eval_rewards/chosen": 0.08847405016422272, |
|
"eval_rewards/margins": 0.13197554647922516, |
|
"eval_rewards/rejected": -0.04350150376558304, |
|
"eval_runtime": 117.9463, |
|
"eval_samples_per_second": 24.265, |
|
"eval_steps_per_second": 0.763, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.853881278538813e-07, |
|
"logits/chosen": -1.8632869720458984, |
|
"logits/rejected": -1.598181962966919, |
|
"logps/chosen": -89.87626647949219, |
|
"logps/rejected": -74.79205322265625, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.09145402163267136, |
|
"rewards/margins": 0.13780589401721954, |
|
"rewards/rejected": -0.04635186120867729, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.815829528158295e-07, |
|
"logits/chosen": -1.8058583736419678, |
|
"logits/rejected": -1.5232179164886475, |
|
"logps/chosen": -87.31756591796875, |
|
"logps/rejected": -72.79600524902344, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.09649817645549774, |
|
"rewards/margins": 0.1376573145389557, |
|
"rewards/rejected": -0.041159145534038544, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -1.8448421955108643, |
|
"logits/rejected": -1.6317142248153687, |
|
"logps/chosen": -87.55412292480469, |
|
"logps/rejected": -75.91432189941406, |
|
"loss": 0.6245, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.10163182020187378, |
|
"rewards/margins": 0.14317932724952698, |
|
"rewards/rejected": -0.0415474958717823, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.73972602739726e-07, |
|
"logits/chosen": -1.800663709640503, |
|
"logits/rejected": -1.5165865421295166, |
|
"logps/chosen": -92.58036041259766, |
|
"logps/rejected": -73.22102355957031, |
|
"loss": 0.6216, |
|
"rewards/accuracies": 0.908333420753479, |
|
"rewards/chosen": 0.09606580436229706, |
|
"rewards/margins": 0.14485225081443787, |
|
"rewards/rejected": -0.04878643900156021, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.701674277016743e-07, |
|
"logits/chosen": -1.884385347366333, |
|
"logits/rejected": -1.6005618572235107, |
|
"logps/chosen": -94.69025421142578, |
|
"logps/rejected": -74.93373107910156, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.11928985267877579, |
|
"rewards/margins": 0.16091035306453705, |
|
"rewards/rejected": -0.04162050783634186, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.663622526636225e-07, |
|
"logits/chosen": -1.8744373321533203, |
|
"logits/rejected": -1.5992999076843262, |
|
"logps/chosen": -93.0952377319336, |
|
"logps/rejected": -76.86521911621094, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.12268207967281342, |
|
"rewards/margins": 0.15957853198051453, |
|
"rewards/rejected": -0.036896444857120514, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6255707762557076e-07, |
|
"logits/chosen": -1.9210466146469116, |
|
"logits/rejected": -1.6140285730361938, |
|
"logps/chosen": -94.59263610839844, |
|
"logps/rejected": -75.74002838134766, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.908333420753479, |
|
"rewards/chosen": 0.09473783522844315, |
|
"rewards/margins": 0.14541365206241608, |
|
"rewards/rejected": -0.050675809383392334, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.58751902587519e-07, |
|
"logits/chosen": -1.9041986465454102, |
|
"logits/rejected": -1.643701195716858, |
|
"logps/chosen": -92.38731384277344, |
|
"logps/rejected": -76.4394302368164, |
|
"loss": 0.6147, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.09374178946018219, |
|
"rewards/margins": 0.14521454274654388, |
|
"rewards/rejected": -0.05147276073694229, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.549467275494673e-07, |
|
"logits/chosen": -1.8866844177246094, |
|
"logits/rejected": -1.5806655883789062, |
|
"logps/chosen": -89.06953430175781, |
|
"logps/rejected": -75.25362396240234, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1129077672958374, |
|
"rewards/margins": 0.1759023219347, |
|
"rewards/rejected": -0.06299454718828201, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.511415525114155e-07, |
|
"logits/chosen": -1.9603450298309326, |
|
"logits/rejected": -1.658216118812561, |
|
"logps/chosen": -91.58163452148438, |
|
"logps/rejected": -74.63312530517578, |
|
"loss": 0.6117, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1321893036365509, |
|
"rewards/margins": 0.18407562375068665, |
|
"rewards/rejected": -0.051886312663555145, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_logits/chosen": -2.047370672225952, |
|
"eval_logits/rejected": -1.7785520553588867, |
|
"eval_logps/chosen": -90.88909149169922, |
|
"eval_logps/rejected": -72.66747283935547, |
|
"eval_loss": 0.6126503348350525, |
|
"eval_rewards/accuracies": 0.9222221970558167, |
|
"eval_rewards/chosen": 0.10966197401285172, |
|
"eval_rewards/margins": 0.16636402904987335, |
|
"eval_rewards/rejected": -0.05670207738876343, |
|
"eval_runtime": 117.9996, |
|
"eval_samples_per_second": 24.254, |
|
"eval_steps_per_second": 0.763, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4733637747336376e-07, |
|
"logits/chosen": -1.9528766870498657, |
|
"logits/rejected": -1.594366192817688, |
|
"logps/chosen": -97.9262466430664, |
|
"logps/rejected": -73.47969055175781, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.9083333015441895, |
|
"rewards/chosen": 0.12691722810268402, |
|
"rewards/margins": 0.1831960827112198, |
|
"rewards/rejected": -0.056278862059116364, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.43531202435312e-07, |
|
"logits/chosen": -1.9507348537445068, |
|
"logits/rejected": -1.6255791187286377, |
|
"logps/chosen": -93.60541534423828, |
|
"logps/rejected": -74.82536315917969, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.9166666269302368, |
|
"rewards/chosen": 0.11346453428268433, |
|
"rewards/margins": 0.17843127250671387, |
|
"rewards/rejected": -0.06496672332286835, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3972602739726023e-07, |
|
"logits/chosen": -1.7865006923675537, |
|
"logits/rejected": -1.5033773183822632, |
|
"logps/chosen": -89.35474395751953, |
|
"logps/rejected": -70.51929473876953, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.1159995049238205, |
|
"rewards/margins": 0.1914626657962799, |
|
"rewards/rejected": -0.07546313852071762, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.359208523592085e-07, |
|
"logits/chosen": -1.9572938680648804, |
|
"logits/rejected": -1.6931850910186768, |
|
"logps/chosen": -90.90480041503906, |
|
"logps/rejected": -77.85359191894531, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.9083333015441895, |
|
"rewards/chosen": 0.10320776700973511, |
|
"rewards/margins": 0.16814057528972626, |
|
"rewards/rejected": -0.06493280827999115, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3211567732115676e-07, |
|
"logits/chosen": -2.0167603492736816, |
|
"logits/rejected": -1.715150237083435, |
|
"logps/chosen": -90.37574768066406, |
|
"logps/rejected": -77.0097427368164, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.1519448608160019, |
|
"rewards/margins": 0.21030330657958984, |
|
"rewards/rejected": -0.058358438313007355, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.28310502283105e-07, |
|
"logits/chosen": -1.8651390075683594, |
|
"logits/rejected": -1.5632587671279907, |
|
"logps/chosen": -94.64379119873047, |
|
"logps/rejected": -75.53591918945312, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.9416667222976685, |
|
"rewards/chosen": 0.13561630249023438, |
|
"rewards/margins": 0.19979830086231232, |
|
"rewards/rejected": -0.06418199837207794, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2450532724505325e-07, |
|
"logits/chosen": -1.9391330480575562, |
|
"logits/rejected": -1.663351058959961, |
|
"logps/chosen": -86.51952362060547, |
|
"logps/rejected": -73.44007873535156, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.1343904286623001, |
|
"rewards/margins": 0.1892845332622528, |
|
"rewards/rejected": -0.054894138127565384, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.207001522070015e-07, |
|
"logits/chosen": -1.842283010482788, |
|
"logits/rejected": -1.5807665586471558, |
|
"logps/chosen": -90.40550231933594, |
|
"logps/rejected": -77.0767593383789, |
|
"loss": 0.6045, |
|
"rewards/accuracies": 0.9249998927116394, |
|
"rewards/chosen": 0.13356170058250427, |
|
"rewards/margins": 0.1978417932987213, |
|
"rewards/rejected": -0.06428009271621704, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1689497716894975e-07, |
|
"logits/chosen": -1.913578748703003, |
|
"logits/rejected": -1.648938536643982, |
|
"logps/chosen": -87.49850463867188, |
|
"logps/rejected": -74.8587646484375, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.13597458600997925, |
|
"rewards/margins": 0.20577768981456757, |
|
"rewards/rejected": -0.06980310380458832, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.13089802130898e-07, |
|
"logits/chosen": -1.8712265491485596, |
|
"logits/rejected": -1.5505828857421875, |
|
"logps/chosen": -92.89669036865234, |
|
"logps/rejected": -77.97479248046875, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.9416667222976685, |
|
"rewards/chosen": 0.1514441967010498, |
|
"rewards/margins": 0.2083124816417694, |
|
"rewards/rejected": -0.056868284940719604, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -2.046788454055786, |
|
"eval_logits/rejected": -1.7776765823364258, |
|
"eval_logps/chosen": -90.75981140136719, |
|
"eval_logps/rejected": -72.78363037109375, |
|
"eval_loss": 0.6019285321235657, |
|
"eval_rewards/accuracies": 0.9277777671813965, |
|
"eval_rewards/chosen": 0.12258908152580261, |
|
"eval_rewards/margins": 0.1909066140651703, |
|
"eval_rewards/rejected": -0.06831753998994827, |
|
"eval_runtime": 117.8634, |
|
"eval_samples_per_second": 24.282, |
|
"eval_steps_per_second": 0.764, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0928462709284625e-07, |
|
"logits/chosen": -1.7578074932098389, |
|
"logits/rejected": -1.445682406425476, |
|
"logps/chosen": -92.38932800292969, |
|
"logps/rejected": -74.73234558105469, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": 0.14447703957557678, |
|
"rewards/margins": 0.2118067443370819, |
|
"rewards/rejected": -0.06732969731092453, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.054794520547945e-07, |
|
"logits/chosen": -1.9356153011322021, |
|
"logits/rejected": -1.6309950351715088, |
|
"logps/chosen": -90.90232849121094, |
|
"logps/rejected": -75.2989730834961, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.14640390872955322, |
|
"rewards/margins": 0.21756890416145325, |
|
"rewards/rejected": -0.07116499543190002, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0167427701674275e-07, |
|
"logits/chosen": -1.903969407081604, |
|
"logits/rejected": -1.6307262182235718, |
|
"logps/chosen": -91.58943939208984, |
|
"logps/rejected": -75.18601989746094, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.13933506608009338, |
|
"rewards/margins": 0.20189881324768066, |
|
"rewards/rejected": -0.06256375461816788, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.97869101978691e-07, |
|
"logits/chosen": -1.866371512413025, |
|
"logits/rejected": -1.6297776699066162, |
|
"logps/chosen": -91.3348617553711, |
|
"logps/rejected": -75.30091857910156, |
|
"loss": 0.5968, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.16422715783119202, |
|
"rewards/margins": 0.22424063086509705, |
|
"rewards/rejected": -0.06001347303390503, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9406392694063925e-07, |
|
"logits/chosen": -1.860414743423462, |
|
"logits/rejected": -1.5813719034194946, |
|
"logps/chosen": -94.010498046875, |
|
"logps/rejected": -73.02679443359375, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": 0.14462696015834808, |
|
"rewards/margins": 0.22170260548591614, |
|
"rewards/rejected": -0.07707564532756805, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.902587519025875e-07, |
|
"logits/chosen": -1.755253791809082, |
|
"logits/rejected": -1.5063341856002808, |
|
"logps/chosen": -89.60089111328125, |
|
"logps/rejected": -75.72676086425781, |
|
"loss": 0.5942, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.12570711970329285, |
|
"rewards/margins": 0.19667670130729675, |
|
"rewards/rejected": -0.0709695890545845, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8645357686453575e-07, |
|
"logits/chosen": -1.9189532995224, |
|
"logits/rejected": -1.60482919216156, |
|
"logps/chosen": -91.0718765258789, |
|
"logps/rejected": -76.86122131347656, |
|
"loss": 0.5919, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.14830803871154785, |
|
"rewards/margins": 0.22014153003692627, |
|
"rewards/rejected": -0.07183349877595901, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.82648401826484e-07, |
|
"logits/chosen": -1.9582935571670532, |
|
"logits/rejected": -1.6904990673065186, |
|
"logps/chosen": -92.6358871459961, |
|
"logps/rejected": -75.13966369628906, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.12001453340053558, |
|
"rewards/margins": 0.20278160274028778, |
|
"rewards/rejected": -0.0827670693397522, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7884322678843225e-07, |
|
"logits/chosen": -1.7855488061904907, |
|
"logits/rejected": -1.498254418373108, |
|
"logps/chosen": -89.93516540527344, |
|
"logps/rejected": -71.06876373291016, |
|
"loss": 0.5923, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 0.13533183932304382, |
|
"rewards/margins": 0.2196374386548996, |
|
"rewards/rejected": -0.08430557698011398, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.750380517503805e-07, |
|
"logits/chosen": -1.9469058513641357, |
|
"logits/rejected": -1.655491828918457, |
|
"logps/chosen": -92.86201477050781, |
|
"logps/rejected": -77.54034423828125, |
|
"loss": 0.5912, |
|
"rewards/accuracies": 0.9499999284744263, |
|
"rewards/chosen": 0.14615695178508759, |
|
"rewards/margins": 0.22164049744606018, |
|
"rewards/rejected": -0.0754835233092308, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -2.046569585800171, |
|
"eval_logits/rejected": -1.7769988775253296, |
|
"eval_logps/chosen": -90.64215850830078, |
|
"eval_logps/rejected": -72.90531921386719, |
|
"eval_loss": 0.5911818742752075, |
|
"eval_rewards/accuracies": 0.9333333373069763, |
|
"eval_rewards/chosen": 0.13435469567775726, |
|
"eval_rewards/margins": 0.21484099328517914, |
|
"eval_rewards/rejected": -0.08048629015684128, |
|
"eval_runtime": 117.9339, |
|
"eval_samples_per_second": 24.268, |
|
"eval_steps_per_second": 0.763, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7123287671232875e-07, |
|
"logits/chosen": -1.940800666809082, |
|
"logits/rejected": -1.6664402484893799, |
|
"logps/chosen": -86.94990539550781, |
|
"logps/rejected": -73.69987487792969, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.09665495157241821, |
|
"rewards/margins": 0.18213674426078796, |
|
"rewards/rejected": -0.08548180013895035, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.67427701674277e-07, |
|
"logits/chosen": -1.9208215475082397, |
|
"logits/rejected": -1.6697795391082764, |
|
"logps/chosen": -90.12159729003906, |
|
"logps/rejected": -75.43165588378906, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.9083331823348999, |
|
"rewards/chosen": 0.1299724578857422, |
|
"rewards/margins": 0.21237368881702423, |
|
"rewards/rejected": -0.08240120857954025, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6362252663622525e-07, |
|
"logits/chosen": -1.8031762838363647, |
|
"logits/rejected": -1.552947759628296, |
|
"logps/chosen": -93.24359130859375, |
|
"logps/rejected": -73.16694641113281, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.14134086668491364, |
|
"rewards/margins": 0.22129371762275696, |
|
"rewards/rejected": -0.07995286583900452, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.598173515981735e-07, |
|
"logits/chosen": -1.9094417095184326, |
|
"logits/rejected": -1.6032779216766357, |
|
"logps/chosen": -95.291015625, |
|
"logps/rejected": -76.30043029785156, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.17578403651714325, |
|
"rewards/margins": 0.24385884404182434, |
|
"rewards/rejected": -0.0680748000741005, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5601217656012175e-07, |
|
"logits/chosen": -1.8715407848358154, |
|
"logits/rejected": -1.5754361152648926, |
|
"logps/chosen": -93.08331298828125, |
|
"logps/rejected": -71.55984497070312, |
|
"loss": 0.5885, |
|
"rewards/accuracies": 0.908333420753479, |
|
"rewards/chosen": 0.16208195686340332, |
|
"rewards/margins": 0.24299950897693634, |
|
"rewards/rejected": -0.08091756701469421, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5220700152207e-07, |
|
"logits/chosen": -1.9729465246200562, |
|
"logits/rejected": -1.6472351551055908, |
|
"logps/chosen": -96.95377349853516, |
|
"logps/rejected": -74.73013305664062, |
|
"loss": 0.5829, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": 0.17102012038230896, |
|
"rewards/margins": 0.26194968819618225, |
|
"rewards/rejected": -0.09092956781387329, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4840182648401825e-07, |
|
"logits/chosen": -1.87423574924469, |
|
"logits/rejected": -1.5873512029647827, |
|
"logps/chosen": -92.32403564453125, |
|
"logps/rejected": -77.21125793457031, |
|
"loss": 0.5842, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 0.1644188016653061, |
|
"rewards/margins": 0.27040743827819824, |
|
"rewards/rejected": -0.10598863661289215, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.445966514459665e-07, |
|
"logits/chosen": -1.8510887622833252, |
|
"logits/rejected": -1.5857640504837036, |
|
"logps/chosen": -92.05542755126953, |
|
"logps/rejected": -77.68433380126953, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.146043062210083, |
|
"rewards/margins": 0.22200524806976318, |
|
"rewards/rejected": -0.07596220076084137, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4079147640791475e-07, |
|
"logits/chosen": -1.8213762044906616, |
|
"logits/rejected": -1.5474189519882202, |
|
"logps/chosen": -91.6738052368164, |
|
"logps/rejected": -74.36346435546875, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.9833332896232605, |
|
"rewards/chosen": 0.1721878945827484, |
|
"rewards/margins": 0.26567989587783813, |
|
"rewards/rejected": -0.09349202364683151, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.36986301369863e-07, |
|
"logits/chosen": -1.8960554599761963, |
|
"logits/rejected": -1.5947376489639282, |
|
"logps/chosen": -95.58143615722656, |
|
"logps/rejected": -75.37010192871094, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.18196699023246765, |
|
"rewards/margins": 0.25899866223335266, |
|
"rewards/rejected": -0.0770316869020462, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_logits/chosen": -2.0461771488189697, |
|
"eval_logits/rejected": -1.7763375043869019, |
|
"eval_logps/chosen": -90.54474639892578, |
|
"eval_logps/rejected": -73.00917053222656, |
|
"eval_loss": 0.5822051763534546, |
|
"eval_rewards/accuracies": 0.9472222328186035, |
|
"eval_rewards/chosen": 0.14409679174423218, |
|
"eval_rewards/margins": 0.23496907949447632, |
|
"eval_rewards/rejected": -0.09087225794792175, |
|
"eval_runtime": 117.8174, |
|
"eval_samples_per_second": 24.292, |
|
"eval_steps_per_second": 0.764, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3318112633181125e-07, |
|
"logits/chosen": -1.893864393234253, |
|
"logits/rejected": -1.5917404890060425, |
|
"logps/chosen": -90.5267105102539, |
|
"logps/rejected": -73.85464477539062, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 0.18791857361793518, |
|
"rewards/margins": 0.26672154664993286, |
|
"rewards/rejected": -0.07880295813083649, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.293759512937595e-07, |
|
"logits/chosen": -1.851485013961792, |
|
"logits/rejected": -1.5561844110488892, |
|
"logps/chosen": -91.46730041503906, |
|
"logps/rejected": -75.5124740600586, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.9083333015441895, |
|
"rewards/chosen": 0.15885117650032043, |
|
"rewards/margins": 0.24810326099395752, |
|
"rewards/rejected": -0.08925210684537888, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2557077625570775e-07, |
|
"logits/chosen": -1.9113174676895142, |
|
"logits/rejected": -1.6106466054916382, |
|
"logps/chosen": -88.4524917602539, |
|
"logps/rejected": -71.1933364868164, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.1571049988269806, |
|
"rewards/margins": 0.23198041319847107, |
|
"rewards/rejected": -0.07487543672323227, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.21765601217656e-07, |
|
"logits/chosen": -1.9750347137451172, |
|
"logits/rejected": -1.6996724605560303, |
|
"logps/chosen": -91.44354248046875, |
|
"logps/rejected": -77.03483581542969, |
|
"loss": 0.5768, |
|
"rewards/accuracies": 0.9416666030883789, |
|
"rewards/chosen": 0.14498132467269897, |
|
"rewards/margins": 0.23422233760356903, |
|
"rewards/rejected": -0.08924100548028946, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1796042617960425e-07, |
|
"logits/chosen": -1.9331929683685303, |
|
"logits/rejected": -1.6460412740707397, |
|
"logps/chosen": -84.95231628417969, |
|
"logps/rejected": -73.00121307373047, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.9416667222976685, |
|
"rewards/chosen": 0.17358574271202087, |
|
"rewards/margins": 0.2643177807331085, |
|
"rewards/rejected": -0.09073203802108765, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.141552511415525e-07, |
|
"logits/chosen": -1.9368797540664673, |
|
"logits/rejected": -1.6109968423843384, |
|
"logps/chosen": -95.30493927001953, |
|
"logps/rejected": -79.38502502441406, |
|
"loss": 0.5725, |
|
"rewards/accuracies": 0.9166666269302368, |
|
"rewards/chosen": 0.16715973615646362, |
|
"rewards/margins": 0.26029014587402344, |
|
"rewards/rejected": -0.09313040971755981, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1035007610350075e-07, |
|
"logits/chosen": -1.9934200048446655, |
|
"logits/rejected": -1.7067596912384033, |
|
"logps/chosen": -91.30809020996094, |
|
"logps/rejected": -75.1029052734375, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1644877940416336, |
|
"rewards/margins": 0.2513524889945984, |
|
"rewards/rejected": -0.08686470985412598, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.06544901065449e-07, |
|
"logits/chosen": -1.8585717678070068, |
|
"logits/rejected": -1.593147873878479, |
|
"logps/chosen": -94.06452941894531, |
|
"logps/rejected": -78.18238830566406, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.1385236382484436, |
|
"rewards/margins": 0.2384444922208786, |
|
"rewards/rejected": -0.09992088377475739, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0273972602739725e-07, |
|
"logits/chosen": -1.921449065208435, |
|
"logits/rejected": -1.6535272598266602, |
|
"logps/chosen": -87.8677749633789, |
|
"logps/rejected": -78.9840087890625, |
|
"loss": 0.5767, |
|
"rewards/accuracies": 0.9666666984558105, |
|
"rewards/chosen": 0.14824248850345612, |
|
"rewards/margins": 0.25046244263648987, |
|
"rewards/rejected": -0.10221991688013077, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.89345509893455e-08, |
|
"logits/chosen": -1.8294260501861572, |
|
"logits/rejected": -1.5607296228408813, |
|
"logps/chosen": -94.44920349121094, |
|
"logps/rejected": -77.64144134521484, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.9166666269302368, |
|
"rewards/chosen": 0.16983038187026978, |
|
"rewards/margins": 0.2730123996734619, |
|
"rewards/rejected": -0.10318204015493393, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_logits/chosen": -2.0464720726013184, |
|
"eval_logits/rejected": -1.7763383388519287, |
|
"eval_logps/chosen": -90.46904754638672, |
|
"eval_logps/rejected": -73.09234619140625, |
|
"eval_loss": 0.5758996605873108, |
|
"eval_rewards/accuracies": 0.9333333373069763, |
|
"eval_rewards/chosen": 0.1516665369272232, |
|
"eval_rewards/margins": 0.2508557140827179, |
|
"eval_rewards/rejected": -0.0991891473531723, |
|
"eval_runtime": 117.7184, |
|
"eval_samples_per_second": 24.312, |
|
"eval_steps_per_second": 0.765, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.512937595129374e-08, |
|
"logits/chosen": -1.8505995273590088, |
|
"logits/rejected": -1.570237398147583, |
|
"logps/chosen": -95.08009338378906, |
|
"logps/rejected": -75.39155578613281, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": 0.14622533321380615, |
|
"rewards/margins": 0.23580579459667206, |
|
"rewards/rejected": -0.08958044648170471, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.1324200913242e-08, |
|
"logits/chosen": -1.8380448818206787, |
|
"logits/rejected": -1.5534820556640625, |
|
"logps/chosen": -89.47401428222656, |
|
"logps/rejected": -73.60565948486328, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.9416667222976685, |
|
"rewards/chosen": 0.1780690848827362, |
|
"rewards/margins": 0.2615908980369568, |
|
"rewards/rejected": -0.08352181315422058, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.751902587519024e-08, |
|
"logits/chosen": -1.975015640258789, |
|
"logits/rejected": -1.6878995895385742, |
|
"logps/chosen": -93.83460998535156, |
|
"logps/rejected": -76.86270904541016, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.17377497255802155, |
|
"rewards/margins": 0.25830045342445374, |
|
"rewards/rejected": -0.08452550321817398, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.37138508371385e-08, |
|
"logits/chosen": -1.8430579900741577, |
|
"logits/rejected": -1.5395987033843994, |
|
"logps/chosen": -92.52012634277344, |
|
"logps/rejected": -79.96121215820312, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": 0.15905624628067017, |
|
"rewards/margins": 0.25326135754585266, |
|
"rewards/rejected": -0.0942051112651825, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.990867579908676e-08, |
|
"logits/chosen": -1.8872146606445312, |
|
"logits/rejected": -1.5909473896026611, |
|
"logps/chosen": -89.97090148925781, |
|
"logps/rejected": -77.28529357910156, |
|
"loss": 0.5727, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.16229455173015594, |
|
"rewards/margins": 0.2598329186439514, |
|
"rewards/rejected": -0.09753839671611786, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.6103500761035e-08, |
|
"logits/chosen": -1.8499984741210938, |
|
"logits/rejected": -1.5525275468826294, |
|
"logps/chosen": -93.28520202636719, |
|
"logps/rejected": -74.24462890625, |
|
"loss": 0.5731, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.14929968118667603, |
|
"rewards/margins": 0.2529754042625427, |
|
"rewards/rejected": -0.10367570072412491, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.229832572298326e-08, |
|
"logits/chosen": -2.009666681289673, |
|
"logits/rejected": -1.7177025079727173, |
|
"logps/chosen": -89.4050521850586, |
|
"logps/rejected": -79.06332397460938, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1704416573047638, |
|
"rewards/margins": 0.27492305636405945, |
|
"rewards/rejected": -0.10448137670755386, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.84931506849315e-08, |
|
"logits/chosen": -1.81943678855896, |
|
"logits/rejected": -1.5563119649887085, |
|
"logps/chosen": -88.48675537109375, |
|
"logps/rejected": -72.3918685913086, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.8916667103767395, |
|
"rewards/chosen": 0.13842932879924774, |
|
"rewards/margins": 0.24020667374134064, |
|
"rewards/rejected": -0.10177735984325409, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.468797564687976e-08, |
|
"logits/chosen": -1.8918097019195557, |
|
"logits/rejected": -1.6081184148788452, |
|
"logps/chosen": -87.69499969482422, |
|
"logps/rejected": -71.82918548583984, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.17439430952072144, |
|
"rewards/margins": 0.2683844268321991, |
|
"rewards/rejected": -0.09399012476205826, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.0882800608828e-08, |
|
"logits/chosen": -1.7783477306365967, |
|
"logits/rejected": -1.4948413372039795, |
|
"logps/chosen": -89.221435546875, |
|
"logps/rejected": -72.36725616455078, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": 0.16254201531410217, |
|
"rewards/margins": 0.27369171380996704, |
|
"rewards/rejected": -0.11114968359470367, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -2.0464510917663574, |
|
"eval_logits/rejected": -1.7761657238006592, |
|
"eval_logps/chosen": -90.43045043945312, |
|
"eval_logps/rejected": -73.13316345214844, |
|
"eval_loss": 0.5722280740737915, |
|
"eval_rewards/accuracies": 0.949999988079071, |
|
"eval_rewards/chosen": 0.15552671253681183, |
|
"eval_rewards/margins": 0.2587975263595581, |
|
"eval_rewards/rejected": -0.10327085852622986, |
|
"eval_runtime": 118.3212, |
|
"eval_samples_per_second": 24.188, |
|
"eval_steps_per_second": 0.761, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.707762557077625e-08, |
|
"logits/chosen": -1.7794984579086304, |
|
"logits/rejected": -1.5128498077392578, |
|
"logps/chosen": -94.00843048095703, |
|
"logps/rejected": -75.231201171875, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.18278029561042786, |
|
"rewards/margins": 0.2786504328250885, |
|
"rewards/rejected": -0.09587012976408005, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.32724505327245e-08, |
|
"logits/chosen": -1.9731611013412476, |
|
"logits/rejected": -1.6866945028305054, |
|
"logps/chosen": -93.30955505371094, |
|
"logps/rejected": -78.57191467285156, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": 0.15462180972099304, |
|
"rewards/margins": 0.26251837611198425, |
|
"rewards/rejected": -0.10789655148983002, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.946727549467275e-08, |
|
"logits/chosen": -1.9133844375610352, |
|
"logits/rejected": -1.636694312095642, |
|
"logps/chosen": -88.56840515136719, |
|
"logps/rejected": -76.69215393066406, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.966666579246521, |
|
"rewards/chosen": 0.15411558747291565, |
|
"rewards/margins": 0.26415151357650757, |
|
"rewards/rejected": -0.11003589630126953, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.5662100456621e-08, |
|
"logits/chosen": -1.8889293670654297, |
|
"logits/rejected": -1.5668977499008179, |
|
"logps/chosen": -93.04912567138672, |
|
"logps/rejected": -74.54277038574219, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.9166666269302368, |
|
"rewards/chosen": 0.1801871955394745, |
|
"rewards/margins": 0.2698620557785034, |
|
"rewards/rejected": -0.08967487514019012, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.185692541856925e-08, |
|
"logits/chosen": -1.853014588356018, |
|
"logits/rejected": -1.6049798727035522, |
|
"logps/chosen": -92.84537506103516, |
|
"logps/rejected": -76.91346740722656, |
|
"loss": 0.5734, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": 0.1830371469259262, |
|
"rewards/margins": 0.27097687125205994, |
|
"rewards/rejected": -0.08793972432613373, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.80517503805175e-08, |
|
"logits/chosen": -1.8321574926376343, |
|
"logits/rejected": -1.559768795967102, |
|
"logps/chosen": -90.3908462524414, |
|
"logps/rejected": -77.5791015625, |
|
"loss": 0.5728, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.15050294995307922, |
|
"rewards/margins": 0.2644408941268921, |
|
"rewards/rejected": -0.11393795162439346, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.424657534246575e-08, |
|
"logits/chosen": -1.8819458484649658, |
|
"logits/rejected": -1.5830302238464355, |
|
"logps/chosen": -96.20283508300781, |
|
"logps/rejected": -79.02750396728516, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.9166666269302368, |
|
"rewards/chosen": 0.1495673507452011, |
|
"rewards/margins": 0.26126623153686523, |
|
"rewards/rejected": -0.11169885098934174, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.0441400304414e-08, |
|
"logits/chosen": -1.8712704181671143, |
|
"logits/rejected": -1.5640711784362793, |
|
"logps/chosen": -87.56044006347656, |
|
"logps/rejected": -73.2048110961914, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.15671199560165405, |
|
"rewards/margins": 0.2683314383029938, |
|
"rewards/rejected": -0.11161943525075912, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.663622526636225e-08, |
|
"logits/chosen": -1.9705963134765625, |
|
"logits/rejected": -1.6459972858428955, |
|
"logps/chosen": -93.333251953125, |
|
"logps/rejected": -75.71430969238281, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.9833332896232605, |
|
"rewards/chosen": 0.22007617354393005, |
|
"rewards/margins": 0.3152759373188019, |
|
"rewards/rejected": -0.09519973397254944, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.28310502283105e-08, |
|
"logits/chosen": -1.9400713443756104, |
|
"logits/rejected": -1.6670045852661133, |
|
"logps/chosen": -90.7359619140625, |
|
"logps/rejected": -72.49601745605469, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.9416667222976685, |
|
"rewards/chosen": 0.18735943734645844, |
|
"rewards/margins": 0.2668268084526062, |
|
"rewards/rejected": -0.07946738600730896, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -2.0464956760406494, |
|
"eval_logits/rejected": -1.776126742362976, |
|
"eval_logps/chosen": -90.40695190429688, |
|
"eval_logps/rejected": -73.16618347167969, |
|
"eval_loss": 0.5701765418052673, |
|
"eval_rewards/accuracies": 0.9416666626930237, |
|
"eval_rewards/chosen": 0.15787601470947266, |
|
"eval_rewards/margins": 0.26444879174232483, |
|
"eval_rewards/rejected": -0.10657278448343277, |
|
"eval_runtime": 118.3051, |
|
"eval_samples_per_second": 24.192, |
|
"eval_steps_per_second": 0.761, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.902587519025875e-08, |
|
"logits/chosen": -1.9084300994873047, |
|
"logits/rejected": -1.5941137075424194, |
|
"logps/chosen": -91.3935546875, |
|
"logps/rejected": -76.21849060058594, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.9416666030883789, |
|
"rewards/chosen": 0.18224991858005524, |
|
"rewards/margins": 0.2809165418148041, |
|
"rewards/rejected": -0.09866663068532944, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5220700152207e-08, |
|
"logits/chosen": -1.8584785461425781, |
|
"logits/rejected": -1.5252989530563354, |
|
"logps/chosen": -94.4938735961914, |
|
"logps/rejected": -76.2500228881836, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.9166666269302368, |
|
"rewards/chosen": 0.17414768040180206, |
|
"rewards/margins": 0.2775163948535919, |
|
"rewards/rejected": -0.10336872190237045, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.141552511415525e-08, |
|
"logits/chosen": -1.8703104257583618, |
|
"logits/rejected": -1.5645638704299927, |
|
"logps/chosen": -92.91007232666016, |
|
"logps/rejected": -72.98766326904297, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.9416666030883789, |
|
"rewards/chosen": 0.1589893251657486, |
|
"rewards/margins": 0.26451554894447327, |
|
"rewards/rejected": -0.10552623122930527, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.6103500761035e-09, |
|
"logits/chosen": -1.9101779460906982, |
|
"logits/rejected": -1.6234633922576904, |
|
"logps/chosen": -88.99760437011719, |
|
"logps/rejected": -73.46092224121094, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.17230169475078583, |
|
"rewards/margins": 0.25403517484664917, |
|
"rewards/rejected": -0.08173345029354095, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.80517503805175e-09, |
|
"logits/chosen": -1.837728500366211, |
|
"logits/rejected": -1.6049699783325195, |
|
"logps/chosen": -92.38600158691406, |
|
"logps/rejected": -78.56689453125, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.18492794036865234, |
|
"rewards/margins": 0.28263044357299805, |
|
"rewards/rejected": -0.0977025032043457, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.8631808757781982, |
|
"logits/rejected": -1.5711183547973633, |
|
"logps/chosen": -90.29685974121094, |
|
"logps/rejected": -76.57972717285156, |
|
"loss": 0.5684, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.18871954083442688, |
|
"rewards/margins": 0.3021387457847595, |
|
"rewards/rejected": -0.11341919749975204, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1460, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6280729855576607, |
|
"train_runtime": 9689.6427, |
|
"train_samples_per_second": 14.469, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1460, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|