{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 5139, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005837711617046118, "grad_norm": 1.990154978242191, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 0.36676594614982605, "log_odds_ratio": -0.5584489107131958, "logits/chosen": -3.978353977203369, "logits/rejected": -3.8568809032440186, "logps/chosen": -1.546073317527771, "logps/rejected": -1.846043586730957, "loss": 2.0838, "nll_loss": 1.9911216497421265, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07730367034673691, "rewards/margins": 0.01499851606786251, "rewards/rejected": -0.09230218827724457, "step": 10 }, { "epoch": 0.011675423234092236, "grad_norm": 1.8071876168039105, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 0.1855998933315277, "log_odds_ratio": -0.6476616859436035, "logits/chosen": -4.11168098449707, "logits/rejected": -4.021448612213135, "logps/chosen": -1.851523756980896, "logps/rejected": -2.0118327140808105, "loss": 2.0208, "nll_loss": 1.9619057178497314, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09257619082927704, "rewards/margins": 0.008015456609427929, "rewards/rejected": -0.10059164464473724, "step": 20 }, { "epoch": 0.017513134851138354, "grad_norm": 2.113566474505677, "learning_rate": 1.5e-06, "log_odds_chosen": -0.15984249114990234, "log_odds_ratio": -0.8128768801689148, "logits/chosen": -4.067653656005859, "logits/rejected": -4.041723728179932, "logps/chosen": -1.7212803363800049, "logps/rejected": -1.612095832824707, "loss": 1.9152, "nll_loss": 1.8544642925262451, "rewards/accuracies": 0.5, "rewards/chosen": -0.08606401085853577, "rewards/margins": -0.005459212698042393, "rewards/rejected": -0.08060479909181595, "step": 30 }, { "epoch": 0.023350846468184472, "grad_norm": 2.4243663524155195, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.3139367699623108, "log_odds_ratio": -0.5713350772857666, "logits/chosen": -4.034919261932373, "logits/rejected": -4.019538879394531, "logps/chosen": -1.631823182106018, "logps/rejected": -1.8853063583374023, "loss": 1.9319, "nll_loss": 1.9523200988769531, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0815911665558815, "rewards/margins": 0.01267415564507246, "rewards/rejected": -0.09426531940698624, "step": 40 }, { "epoch": 0.02918855808523059, "grad_norm": 1.3247144354417215, "learning_rate": 2.5e-06, "log_odds_chosen": 0.30223074555397034, "log_odds_ratio": -0.6337481141090393, "logits/chosen": -3.9345099925994873, "logits/rejected": -3.804218292236328, "logps/chosen": -1.7550376653671265, "logps/rejected": -2.033318042755127, "loss": 1.8639, "nll_loss": 2.0374906063079834, "rewards/accuracies": 0.5, "rewards/chosen": -0.08775188028812408, "rewards/margins": 0.013914018869400024, "rewards/rejected": -0.10166589915752411, "step": 50 }, { "epoch": 0.03502626970227671, "grad_norm": 1.2572510609912855, "learning_rate": 3e-06, "log_odds_chosen": 0.290189653635025, "log_odds_ratio": -0.611628532409668, "logits/chosen": -3.898294448852539, "logits/rejected": -3.917323350906372, "logps/chosen": -1.7132856845855713, "logps/rejected": -1.9628570079803467, "loss": 1.9181, "nll_loss": 1.8665409088134766, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0856642872095108, "rewards/margins": 0.012478563003242016, "rewards/rejected": -0.0981428474187851, "step": 60 }, { "epoch": 0.040863981319322826, "grad_norm": 1.6568601948484962, "learning_rate": 3.5e-06, "log_odds_chosen": 0.24932345747947693, "log_odds_ratio": -0.6089839339256287, "logits/chosen": -3.794062852859497, "logits/rejected": -3.831033229827881, "logps/chosen": -1.6429595947265625, "logps/rejected": -1.8402353525161743, "loss": 1.8218, "nll_loss": 1.7739753723144531, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0821479856967926, "rewards/margins": 0.009863784536719322, "rewards/rejected": -0.09201176464557648, "step": 70 }, { "epoch": 0.046701692936368944, "grad_norm": 2.975037644440534, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.10633324086666107, "log_odds_ratio": -0.6581578850746155, "logits/chosen": -3.8149256706237793, "logits/rejected": -3.8018391132354736, "logps/chosen": -1.6215823888778687, "logps/rejected": -1.7106540203094482, "loss": 1.7732, "nll_loss": 1.697726845741272, "rewards/accuracies": 0.5, "rewards/chosen": -0.08107912540435791, "rewards/margins": 0.0044535803608596325, "rewards/rejected": -0.08553270995616913, "step": 80 }, { "epoch": 0.05253940455341506, "grad_norm": 2.160092742821769, "learning_rate": 4.5e-06, "log_odds_chosen": 0.2655065953731537, "log_odds_ratio": -0.5950418710708618, "logits/chosen": -3.7613322734832764, "logits/rejected": -3.6458842754364014, "logps/chosen": -1.5020530223846436, "logps/rejected": -1.71852707862854, "loss": 1.7175, "nll_loss": 1.5467979907989502, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07510264217853546, "rewards/margins": 0.01082369964569807, "rewards/rejected": -0.085926353931427, "step": 90 }, { "epoch": 0.05837711617046118, "grad_norm": 1.107962871697757, "learning_rate": 5e-06, "log_odds_chosen": -0.010628092102706432, "log_odds_ratio": -0.70680171251297, "logits/chosen": -3.7405014038085938, "logits/rejected": -3.6982369422912598, "logps/chosen": -1.6686241626739502, "logps/rejected": -1.6591441631317139, "loss": 1.6674, "nll_loss": 1.6390740871429443, "rewards/accuracies": 0.5, "rewards/chosen": -0.0834311991930008, "rewards/margins": -0.0004739895521197468, "rewards/rejected": -0.08295721560716629, "step": 100 }, { "epoch": 0.0642148277875073, "grad_norm": 1.967475878263266, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.5874025821685791, "log_odds_ratio": -0.46849799156188965, "logits/chosen": -3.393907070159912, "logits/rejected": -3.5530502796173096, "logps/chosen": -1.278022050857544, "logps/rejected": -1.734056830406189, "loss": 1.5179, "nll_loss": 1.4113084077835083, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06390109658241272, "rewards/margins": 0.02280174195766449, "rewards/rejected": -0.08670283854007721, "step": 110 }, { "epoch": 0.07005253940455342, "grad_norm": 1.2761368811249625, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 0.07370240986347198, "log_odds_ratio": -0.6864700317382812, "logits/chosen": -3.7193660736083984, "logits/rejected": -3.652819871902466, "logps/chosen": -1.6726070642471313, "logps/rejected": -1.737320899963379, "loss": 1.643, "nll_loss": 1.6497442722320557, "rewards/accuracies": 0.5, "rewards/chosen": -0.08363036066293716, "rewards/margins": 0.003235695417970419, "rewards/rejected": -0.08686605840921402, "step": 120 }, { "epoch": 0.07589025102159953, "grad_norm": 1.2462055815439053, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 0.1671605408191681, "log_odds_ratio": -0.6477088928222656, "logits/chosen": -3.585245132446289, "logits/rejected": -3.7995619773864746, "logps/chosen": -1.6958179473876953, "logps/rejected": -1.846692442893982, "loss": 1.7168, "nll_loss": 1.6261155605316162, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08479089289903641, "rewards/margins": 0.00754373986274004, "rewards/rejected": -0.09233463555574417, "step": 130 }, { "epoch": 0.08172796263864565, "grad_norm": 1.2879665108040765, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 0.2554765045642853, "log_odds_ratio": -0.5911614298820496, "logits/chosen": -3.7079670429229736, "logits/rejected": -3.641380786895752, "logps/chosen": -1.4764906167984009, "logps/rejected": -1.6701595783233643, "loss": 1.5093, "nll_loss": 1.4807078838348389, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07382453978061676, "rewards/margins": 0.009683446027338505, "rewards/rejected": -0.0835079774260521, "step": 140 }, { "epoch": 0.08756567425569177, "grad_norm": 1.2800314170672162, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 0.2852652072906494, "log_odds_ratio": -0.6293755769729614, "logits/chosen": -3.5243332386016846, "logits/rejected": -3.4620766639709473, "logps/chosen": -1.3985514640808105, "logps/rejected": -1.676613450050354, "loss": 1.5261, "nll_loss": 1.5597621202468872, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06992756575345993, "rewards/margins": 0.013903103768825531, "rewards/rejected": -0.08383066952228546, "step": 150 }, { "epoch": 0.09340338587273789, "grad_norm": 2.321285820747544, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 0.2227706015110016, "log_odds_ratio": -0.6301699876785278, "logits/chosen": -3.5348258018493652, "logits/rejected": -3.7374844551086426, "logps/chosen": -1.5824413299560547, "logps/rejected": -1.7538429498672485, "loss": 1.5924, "nll_loss": 1.5320008993148804, "rewards/accuracies": 0.5, "rewards/chosen": -0.07912206649780273, "rewards/margins": 0.008570077829062939, "rewards/rejected": -0.08769214153289795, "step": 160 }, { "epoch": 0.099241097489784, "grad_norm": 1.303863822545706, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 0.2827293276786804, "log_odds_ratio": -0.592948317527771, "logits/chosen": -3.5447793006896973, "logits/rejected": -3.5976157188415527, "logps/chosen": -1.4674885272979736, "logps/rejected": -1.7190072536468506, "loss": 1.5642, "nll_loss": 1.433887243270874, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0733744278550148, "rewards/margins": 0.012575936503708363, "rewards/rejected": -0.08595035970211029, "step": 170 }, { "epoch": 0.10507880910683012, "grad_norm": 1.1768460814431243, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 0.3007754385471344, "log_odds_ratio": -0.5873775482177734, "logits/chosen": -3.2876391410827637, "logits/rejected": -3.6375489234924316, "logps/chosen": -1.4483187198638916, "logps/rejected": -1.6627426147460938, "loss": 1.5216, "nll_loss": 1.4991960525512695, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07241594046354294, "rewards/margins": 0.010721193626523018, "rewards/rejected": -0.0831371396780014, "step": 180 }, { "epoch": 0.11091652072387624, "grad_norm": 1.1032890582742096, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": -0.034546807408332825, "log_odds_ratio": -0.7423598766326904, "logits/chosen": -3.4250328540802, "logits/rejected": -3.5813660621643066, "logps/chosen": -1.314984917640686, "logps/rejected": -1.3011138439178467, "loss": 1.5004, "nll_loss": 1.3584058284759521, "rewards/accuracies": 0.5, "rewards/chosen": -0.06574924290180206, "rewards/margins": -0.0006935521960258484, "rewards/rejected": -0.06505569070577621, "step": 190 }, { "epoch": 0.11675423234092236, "grad_norm": 0.9317257015427647, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": -0.0031783967278897762, "log_odds_ratio": -0.7238091230392456, "logits/chosen": -3.836237668991089, "logits/rejected": -3.7646584510803223, "logps/chosen": -1.4376085996627808, "logps/rejected": -1.4141019582748413, "loss": 1.544, "nll_loss": 1.5575282573699951, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07188042998313904, "rewards/margins": -0.001175328390672803, "rewards/rejected": -0.0707051008939743, "step": 200 }, { "epoch": 0.12259194395796848, "grad_norm": 1.0367599864652708, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 0.16346760094165802, "log_odds_ratio": -0.6442714333534241, "logits/chosen": -3.8178744316101074, "logits/rejected": -3.7085328102111816, "logps/chosen": -1.602351188659668, "logps/rejected": -1.7430833578109741, "loss": 1.5156, "nll_loss": 1.5790749788284302, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08011756092309952, "rewards/margins": 0.0070366086438298225, "rewards/rejected": -0.08715417236089706, "step": 210 }, { "epoch": 0.1284296555750146, "grad_norm": 2.904067516376187, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": -0.34260430932044983, "log_odds_ratio": -0.9320541620254517, "logits/chosen": -3.253993511199951, "logits/rejected": -3.607278347015381, "logps/chosen": -1.4718291759490967, "logps/rejected": -1.2767351865768433, "loss": 1.5599, "nll_loss": 1.4918346405029297, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.0735914558172226, "rewards/margins": -0.009754697792232037, "rewards/rejected": -0.06383676081895828, "step": 220 }, { "epoch": 0.1342673671920607, "grad_norm": 1.9175869409236765, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": -0.011457055807113647, "log_odds_ratio": -0.710320234298706, "logits/chosen": -3.591557025909424, "logits/rejected": -3.595198154449463, "logps/chosen": -1.551020860671997, "logps/rejected": -1.5484445095062256, "loss": 1.5366, "nll_loss": 1.4876940250396729, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07755104452371597, "rewards/margins": -0.0001288138300878927, "rewards/rejected": -0.07742222398519516, "step": 230 }, { "epoch": 0.14010507880910683, "grad_norm": 1.066312892566509, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 0.2476961612701416, "log_odds_ratio": -0.6185265779495239, "logits/chosen": -3.7182857990264893, "logits/rejected": -3.870983839035034, "logps/chosen": -1.425626516342163, "logps/rejected": -1.631575345993042, "loss": 1.5213, "nll_loss": 1.507997989654541, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0712813213467598, "rewards/margins": 0.010297447443008423, "rewards/rejected": -0.08157877624034882, "step": 240 }, { "epoch": 0.14594279042615294, "grad_norm": 1.025705751534528, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 0.4077344536781311, "log_odds_ratio": -0.5260993838310242, "logits/chosen": -3.6779837608337402, "logits/rejected": -3.8346943855285645, "logps/chosen": -1.4422261714935303, "logps/rejected": -1.7560911178588867, "loss": 1.5753, "nll_loss": 1.486325740814209, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07211131602525711, "rewards/margins": 0.015693243592977524, "rewards/rejected": -0.08780454844236374, "step": 250 }, { "epoch": 0.15178050204319907, "grad_norm": 1.351208041239516, "learning_rate": 3.1008683647302113e-06, "log_odds_chosen": 0.3104807436466217, "log_odds_ratio": -0.5609003305435181, "logits/chosen": -3.635833263397217, "logits/rejected": -3.4734745025634766, "logps/chosen": -1.336422085762024, "logps/rejected": -1.5650293827056885, "loss": 1.5177, "nll_loss": 1.3071715831756592, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06682110577821732, "rewards/margins": 0.011430369690060616, "rewards/rejected": -0.07825146615505219, "step": 260 }, { "epoch": 0.15761821366024517, "grad_norm": 2.3163570615145193, "learning_rate": 3.0429030972509227e-06, "log_odds_chosen": 0.16966590285301208, "log_odds_ratio": -0.6342719793319702, "logits/chosen": -3.6301426887512207, "logits/rejected": -3.6858131885528564, "logps/chosen": -1.3310140371322632, "logps/rejected": -1.4671225547790527, "loss": 1.4825, "nll_loss": 1.365090250968933, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06655070930719376, "rewards/margins": 0.006805419921875, "rewards/rejected": -0.07335612922906876, "step": 270 }, { "epoch": 0.1634559252772913, "grad_norm": 1.3384107170589448, "learning_rate": 2.988071523335984e-06, "log_odds_chosen": 0.4238092303276062, "log_odds_ratio": -0.5109076499938965, "logits/chosen": -3.703996181488037, "logits/rejected": -3.861206531524658, "logps/chosen": -1.5469166040420532, "logps/rejected": -1.8883613348007202, "loss": 1.5805, "nll_loss": 1.4497464895248413, "rewards/accuracies": 1.0, "rewards/chosen": -0.0773458331823349, "rewards/margins": 0.01707223616540432, "rewards/rejected": -0.09441806375980377, "step": 280 }, { "epoch": 0.1692936368943374, "grad_norm": 1.6247056680910046, "learning_rate": 2.9361010975735177e-06, "log_odds_chosen": 0.3191284239292145, "log_odds_ratio": -0.5725136399269104, "logits/chosen": -3.7078170776367188, "logits/rejected": -3.8318309783935547, "logps/chosen": -1.5214741230010986, "logps/rejected": -1.75186288356781, "loss": 1.5288, "nll_loss": 1.5261731147766113, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07607370615005493, "rewards/margins": 0.011519446037709713, "rewards/rejected": -0.08759315311908722, "step": 290 }, { "epoch": 0.17513134851138354, "grad_norm": 1.6060165066541294, "learning_rate": 2.8867513459481293e-06, "log_odds_chosen": 0.30537939071655273, "log_odds_ratio": -0.5685498118400574, "logits/chosen": -3.6009533405303955, "logits/rejected": -3.89081072807312, "logps/chosen": -1.4867101907730103, "logps/rejected": -1.7454545497894287, "loss": 1.4955, "nll_loss": 1.4746586084365845, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07433551549911499, "rewards/margins": 0.01293722353875637, "rewards/rejected": -0.08727273344993591, "step": 300 }, { "epoch": 0.18096906012842964, "grad_norm": 1.596087499692572, "learning_rate": 2.839809171235324e-06, "log_odds_chosen": 0.13310456275939941, "log_odds_ratio": -0.6524591445922852, "logits/chosen": -3.553497791290283, "logits/rejected": -3.715376615524292, "logps/chosen": -1.3168188333511353, "logps/rejected": -1.394164800643921, "loss": 1.4339, "nll_loss": 1.2734878063201904, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0658409371972084, "rewards/margins": 0.0038672953378409147, "rewards/rejected": -0.06970824301242828, "step": 310 }, { "epoch": 0.18680677174547577, "grad_norm": 1.1595783575632723, "learning_rate": 2.7950849718747376e-06, "log_odds_chosen": 0.17128218710422516, "log_odds_ratio": -0.6544448137283325, "logits/chosen": -3.7404112815856934, "logits/rejected": -3.7400360107421875, "logps/chosen": -1.4285980463027954, "logps/rejected": -1.524806261062622, "loss": 1.5082, "nll_loss": 1.5090662240982056, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07142989337444305, "rewards/margins": 0.004810418002307415, "rewards/rejected": -0.07624031603336334, "step": 320 }, { "epoch": 0.19264448336252188, "grad_norm": 1.2367618610621955, "learning_rate": 2.752409412815902e-06, "log_odds_chosen": 0.32530689239501953, "log_odds_ratio": -0.5974474549293518, "logits/chosen": -3.105989933013916, "logits/rejected": -3.675053834915161, "logps/chosen": -1.2037900686264038, "logps/rejected": -1.3976547718048096, "loss": 1.5339, "nll_loss": 1.3588110208511353, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06018950790166855, "rewards/margins": 0.009693227708339691, "rewards/rejected": -0.06988272815942764, "step": 330 }, { "epoch": 0.198482194979568, "grad_norm": 0.9685921277136716, "learning_rate": 2.711630722733202e-06, "log_odds_chosen": 0.4203189015388489, "log_odds_ratio": -0.5492938756942749, "logits/chosen": -3.585575819015503, "logits/rejected": -3.737740993499756, "logps/chosen": -1.553531289100647, "logps/rejected": -1.8887583017349243, "loss": 1.5432, "nll_loss": 1.5973610877990723, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07767656445503235, "rewards/margins": 0.016761351376771927, "rewards/rejected": -0.09443791210651398, "step": 340 }, { "epoch": 0.20431990659661411, "grad_norm": 2.0155106463580097, "learning_rate": 2.6726124191242444e-06, "log_odds_chosen": 0.5516608357429504, "log_odds_ratio": -0.5166685581207275, "logits/chosen": -3.569571018218994, "logits/rejected": -3.698533296585083, "logps/chosen": -1.3214404582977295, "logps/rejected": -1.7221263647079468, "loss": 1.4819, "nll_loss": 1.332558512687683, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0660720244050026, "rewards/margins": 0.02003430388867855, "rewards/rejected": -0.0861063227057457, "step": 350 }, { "epoch": 0.21015761821366025, "grad_norm": 1.1273374089123385, "learning_rate": 2.6352313834736496e-06, "log_odds_chosen": -0.18085989356040955, "log_odds_ratio": -0.807360827922821, "logits/chosen": -3.8960342407226562, "logits/rejected": -3.70375394821167, "logps/chosen": -1.6963361501693726, "logps/rejected": -1.5660804510116577, "loss": 1.5909, "nll_loss": 1.692983627319336, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.0848168134689331, "rewards/margins": -0.006512784864753485, "rewards/rejected": -0.07830402255058289, "step": 360 }, { "epoch": 0.21599532983070635, "grad_norm": 1.652092130295582, "learning_rate": 2.599376224550182e-06, "log_odds_chosen": 0.05431831628084183, "log_odds_ratio": -0.6972266435623169, "logits/chosen": -3.7287070751190186, "logits/rejected": -3.807194471359253, "logps/chosen": -1.241661787033081, "logps/rejected": -1.2941557168960571, "loss": 1.4333, "nll_loss": 1.319840908050537, "rewards/accuracies": 0.5, "rewards/chosen": -0.06208309531211853, "rewards/margins": 0.0026246909983456135, "rewards/rejected": -0.06470777839422226, "step": 370 }, { "epoch": 0.22183304144775248, "grad_norm": 1.2716434109568895, "learning_rate": 2.564945880212886e-06, "log_odds_chosen": 0.2830047309398651, "log_odds_ratio": -0.632334291934967, "logits/chosen": -3.5780651569366455, "logits/rejected": -3.80888032913208, "logps/chosen": -1.2461971044540405, "logps/rejected": -1.4993908405303955, "loss": 1.5678, "nll_loss": 1.4222053289413452, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.062309861183166504, "rewards/margins": 0.01265968382358551, "rewards/rejected": -0.07496954500675201, "step": 380 }, { "epoch": 0.2276707530647986, "grad_norm": 0.9884503576517664, "learning_rate": 2.5318484177091667e-06, "log_odds_chosen": 0.4506290853023529, "log_odds_ratio": -0.5728394389152527, "logits/chosen": -3.535125732421875, "logits/rejected": -3.7862510681152344, "logps/chosen": -1.3783390522003174, "logps/rejected": -1.6720952987670898, "loss": 1.4952, "nll_loss": 1.3355036973953247, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06891695410013199, "rewards/margins": 0.014687815681099892, "rewards/rejected": -0.08360476791858673, "step": 390 }, { "epoch": 0.23350846468184472, "grad_norm": 3.0086703029642363, "learning_rate": 2.5e-06, "log_odds_chosen": 0.19105727970600128, "log_odds_ratio": -0.629357099533081, "logits/chosen": -3.4363574981689453, "logits/rejected": -3.851020336151123, "logps/chosen": -1.4543683528900146, "logps/rejected": -1.6147823333740234, "loss": 1.4232, "nll_loss": 1.3756486177444458, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07271841913461685, "rewards/margins": 0.008020700886845589, "rewards/rejected": -0.08073912560939789, "step": 400 }, { "epoch": 0.23934617629889082, "grad_norm": 1.2186245603874044, "learning_rate": 2.4693239916239746e-06, "log_odds_chosen": 0.31956928968429565, "log_odds_ratio": -0.5794490575790405, "logits/chosen": -3.6023993492126465, "logits/rejected": -3.793147563934326, "logps/chosen": -1.5385732650756836, "logps/rejected": -1.7986494302749634, "loss": 1.4372, "nll_loss": 1.5570087432861328, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07692866772413254, "rewards/margins": 0.013003801926970482, "rewards/rejected": -0.08993245661258698, "step": 410 }, { "epoch": 0.24518388791593695, "grad_norm": 1.3887739615498178, "learning_rate": 2.4397501823713327e-06, "log_odds_chosen": 0.17208419740200043, "log_odds_ratio": -0.6314740777015686, "logits/chosen": -3.800130844116211, "logits/rejected": -3.8383185863494873, "logps/chosen": -1.4939535856246948, "logps/rejected": -1.6580890417099, "loss": 1.4647, "nll_loss": 1.4735338687896729, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07469768822193146, "rewards/margins": 0.008206775411963463, "rewards/rejected": -0.08290445804595947, "step": 420 }, { "epoch": 0.2510215995329831, "grad_norm": 1.9085822503524716, "learning_rate": 2.411214110852061e-06, "log_odds_chosen": 0.4748357832431793, "log_odds_ratio": -0.5270480513572693, "logits/chosen": -3.297438144683838, "logits/rejected": -3.79032826423645, "logps/chosen": -1.2137784957885742, "logps/rejected": -1.53304123878479, "loss": 1.3529, "nll_loss": 1.2723811864852905, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06068892404437065, "rewards/margins": 0.01596314087510109, "rewards/rejected": -0.07665206491947174, "step": 430 }, { "epoch": 0.2568593111500292, "grad_norm": 1.1164680049474043, "learning_rate": 2.3836564731139807e-06, "log_odds_chosen": 0.026882369071245193, "log_odds_ratio": -0.6991128921508789, "logits/chosen": -3.522395610809326, "logits/rejected": -3.6246464252471924, "logps/chosen": -1.7075363397598267, "logps/rejected": -1.7223564386367798, "loss": 1.5076, "nll_loss": 1.5594959259033203, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0853768140077591, "rewards/margins": 0.0007410004618577659, "rewards/rejected": -0.08611781895160675, "step": 440 }, { "epoch": 0.2626970227670753, "grad_norm": 1.1768913834622843, "learning_rate": 2.357022603955159e-06, "log_odds_chosen": 0.2687508165836334, "log_odds_ratio": -0.5950598120689392, "logits/chosen": -3.569084882736206, "logits/rejected": -3.891885280609131, "logps/chosen": -1.4692662954330444, "logps/rejected": -1.6739263534545898, "loss": 1.5102, "nll_loss": 1.5285979509353638, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0734633207321167, "rewards/margins": 0.010233004577457905, "rewards/rejected": -0.08369632065296173, "step": 450 }, { "epoch": 0.2685347343841214, "grad_norm": 1.4598326425455976, "learning_rate": 2.3312620206007847e-06, "log_odds_chosen": 0.17465341091156006, "log_odds_ratio": -0.621364176273346, "logits/chosen": -3.6908626556396484, "logits/rejected": -3.8004469871520996, "logps/chosen": -1.6082401275634766, "logps/rejected": -1.739894151687622, "loss": 1.4446, "nll_loss": 1.5170495510101318, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08041200041770935, "rewards/margins": 0.0065827080979943275, "rewards/rejected": -0.0869947224855423, "step": 460 }, { "epoch": 0.27437244600116756, "grad_norm": 1.0127484288456075, "learning_rate": 2.3063280200722128e-06, "log_odds_chosen": 0.24642065167427063, "log_odds_ratio": -0.6047152280807495, "logits/chosen": -3.4301185607910156, "logits/rejected": -3.560349702835083, "logps/chosen": -1.2619202136993408, "logps/rejected": -1.4264518022537231, "loss": 1.4545, "nll_loss": 1.3372890949249268, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06309600174427032, "rewards/margins": 0.00822658371180296, "rewards/rejected": -0.07132259011268616, "step": 470 }, { "epoch": 0.28021015761821366, "grad_norm": 1.1557256880661775, "learning_rate": 2.2821773229381924e-06, "log_odds_chosen": 0.2631526589393616, "log_odds_ratio": -0.6088293790817261, "logits/chosen": -3.741560697555542, "logits/rejected": -3.8665318489074707, "logps/chosen": -1.3548665046691895, "logps/rejected": -1.5597515106201172, "loss": 1.5199, "nll_loss": 1.3430006504058838, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06774333119392395, "rewards/margins": 0.010244252160191536, "rewards/rejected": -0.07798758149147034, "step": 480 }, { "epoch": 0.28604786923525977, "grad_norm": 1.1000135380563174, "learning_rate": 2.2587697572631284e-06, "log_odds_chosen": 0.5413572788238525, "log_odds_ratio": -0.49287375807762146, "logits/chosen": -3.5969624519348145, "logits/rejected": -3.648097276687622, "logps/chosen": -1.1404173374176025, "logps/rejected": -1.537798523902893, "loss": 1.3231, "nll_loss": 1.2701988220214844, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05702086165547371, "rewards/margins": 0.019869063049554825, "rewards/rejected": -0.07688992470502853, "step": 490 }, { "epoch": 0.29188558085230587, "grad_norm": 1.2671450713743655, "learning_rate": 2.23606797749979e-06, "log_odds_chosen": 0.4784451127052307, "log_odds_ratio": -0.503769040107727, "logits/chosen": -3.488185167312622, "logits/rejected": -3.5596814155578613, "logps/chosen": -1.4936765432357788, "logps/rejected": -1.8743066787719727, "loss": 1.5328, "nll_loss": 1.4856728315353394, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07468383014202118, "rewards/margins": 0.019031506031751633, "rewards/rejected": -0.09371533989906311, "step": 500 }, { "epoch": 0.29772329246935203, "grad_norm": 1.111950393159219, "learning_rate": 2.2140372138502386e-06, "log_odds_chosen": 0.061009638011455536, "log_odds_ratio": -0.7041588425636292, "logits/chosen": -3.6847712993621826, "logits/rejected": -3.435586452484131, "logps/chosen": -1.6376501321792603, "logps/rejected": -1.668830156326294, "loss": 1.5733, "nll_loss": 1.5438292026519775, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08188250660896301, "rewards/margins": 0.0015589960385113955, "rewards/rejected": -0.08344151079654694, "step": 510 }, { "epoch": 0.30356100408639813, "grad_norm": 1.1580006195759747, "learning_rate": 2.1926450482675734e-06, "log_odds_chosen": 0.09754817187786102, "log_odds_ratio": -0.661777138710022, "logits/chosen": -3.8593666553497314, "logits/rejected": -3.901379108428955, "logps/chosen": -1.620665192604065, "logps/rejected": -1.6982667446136475, "loss": 1.583, "nll_loss": 1.504148006439209, "rewards/accuracies": 0.5, "rewards/chosen": -0.08103326708078384, "rewards/margins": 0.003880070988088846, "rewards/rejected": -0.08491333574056625, "step": 520 }, { "epoch": 0.30939871570344424, "grad_norm": 0.9592978602752241, "learning_rate": 2.1718612138153473e-06, "log_odds_chosen": 0.09928347915410995, "log_odds_ratio": -0.6850290894508362, "logits/chosen": -3.6773905754089355, "logits/rejected": -3.647392988204956, "logps/chosen": -1.5814847946166992, "logps/rejected": -1.6704862117767334, "loss": 1.5443, "nll_loss": 1.5058507919311523, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07907424867153168, "rewards/margins": 0.004450077190995216, "rewards/rejected": -0.08352432399988174, "step": 530 }, { "epoch": 0.31523642732049034, "grad_norm": 1.3309218203106197, "learning_rate": 2.151657414559676e-06, "log_odds_chosen": 0.24405305087566376, "log_odds_ratio": -0.6199005842208862, "logits/chosen": -3.5834121704101562, "logits/rejected": -3.603256940841675, "logps/chosen": -1.3313912153244019, "logps/rejected": -1.485610008239746, "loss": 1.4369, "nll_loss": 1.3496475219726562, "rewards/accuracies": 0.5, "rewards/chosen": -0.06656956672668457, "rewards/margins": 0.007710927166044712, "rewards/rejected": -0.07428049296140671, "step": 540 }, { "epoch": 0.3210741389375365, "grad_norm": 1.4181328752463644, "learning_rate": 2.132007163556104e-06, "log_odds_chosen": -0.13299673795700073, "log_odds_ratio": -0.8034003376960754, "logits/chosen": -3.6089813709259033, "logits/rejected": -3.799307346343994, "logps/chosen": -1.5664103031158447, "logps/rejected": -1.4846795797348022, "loss": 1.459, "nll_loss": 1.5209907293319702, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07832051813602448, "rewards/margins": -0.0040865326300263405, "rewards/rejected": -0.07423397898674011, "step": 550 }, { "epoch": 0.3269118505545826, "grad_norm": 1.1918804765250088, "learning_rate": 2.1128856368212917e-06, "log_odds_chosen": 0.04277733713388443, "log_odds_ratio": -0.7270143628120422, "logits/chosen": -3.3145251274108887, "logits/rejected": -3.611396312713623, "logps/chosen": -1.4878848791122437, "logps/rejected": -1.5433499813079834, "loss": 1.4656, "nll_loss": 1.4447799921035767, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07439424842596054, "rewards/margins": 0.002773257438093424, "rewards/rejected": -0.07716749608516693, "step": 560 }, { "epoch": 0.3327495621716287, "grad_norm": 2.370378875691723, "learning_rate": 2.0942695414584777e-06, "log_odds_chosen": -0.06881371885538101, "log_odds_ratio": -0.7357025146484375, "logits/chosen": -3.727865219116211, "logits/rejected": -3.7729458808898926, "logps/chosen": -1.5684664249420166, "logps/rejected": -1.5277528762817383, "loss": 1.4842, "nll_loss": 1.4857263565063477, "rewards/accuracies": 0.5, "rewards/chosen": -0.07842332124710083, "rewards/margins": -0.002035671379417181, "rewards/rejected": -0.07638765126466751, "step": 570 }, { "epoch": 0.3385872737886748, "grad_norm": 1.5730013381292747, "learning_rate": 2.0761369963434992e-06, "log_odds_chosen": 0.4631689190864563, "log_odds_ratio": -0.5097706913948059, "logits/chosen": -3.2662086486816406, "logits/rejected": -3.3502445220947266, "logps/chosen": -1.2338672876358032, "logps/rejected": -1.544877052307129, "loss": 1.4214, "nll_loss": 1.3126472234725952, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06169336289167404, "rewards/margins": 0.015550491400063038, "rewards/rejected": -0.0772438496351242, "step": 580 }, { "epoch": 0.344424985405721, "grad_norm": 1.160191469078602, "learning_rate": 2.058467423981546e-06, "log_odds_chosen": 0.5404361486434937, "log_odds_ratio": -0.48690181970596313, "logits/chosen": -3.489442825317383, "logits/rejected": -3.844979763031006, "logps/chosen": -1.4420831203460693, "logps/rejected": -1.846099853515625, "loss": 1.3261, "nll_loss": 1.2858939170837402, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07210414111614227, "rewards/margins": 0.020200839266180992, "rewards/rejected": -0.09230498224496841, "step": 590 }, { "epoch": 0.3502626970227671, "grad_norm": 1.0948470395646255, "learning_rate": 2.0412414523193154e-06, "log_odds_chosen": 0.32243841886520386, "log_odds_ratio": -0.5554407835006714, "logits/chosen": -3.4422454833984375, "logits/rejected": -3.6013565063476562, "logps/chosen": -1.4794950485229492, "logps/rejected": -1.7101198434829712, "loss": 1.4904, "nll_loss": 1.409510850906372, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07397475093603134, "rewards/margins": 0.011531239375472069, "rewards/rejected": -0.08550599217414856, "step": 600 }, { "epoch": 0.3561004086398132, "grad_norm": 1.1716860250478092, "learning_rate": 2.0244408254472904e-06, "log_odds_chosen": 0.18393674492835999, "log_odds_ratio": -0.6212956309318542, "logits/chosen": -3.7142586708068848, "logits/rejected": -3.6711719036102295, "logps/chosen": -1.41257643699646, "logps/rejected": -1.5701334476470947, "loss": 1.4557, "nll_loss": 1.2700188159942627, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.070628821849823, "rewards/margins": 0.00787784717977047, "rewards/rejected": -0.07850666344165802, "step": 610 }, { "epoch": 0.3619381202568593, "grad_norm": 0.9925987996095448, "learning_rate": 2.0080483222562476e-06, "log_odds_chosen": 0.23365382850170135, "log_odds_ratio": -0.6280688047409058, "logits/chosen": -3.5164883136749268, "logits/rejected": -3.8072803020477295, "logps/chosen": -1.378462553024292, "logps/rejected": -1.5296428203582764, "loss": 1.3532, "nll_loss": 1.3574854135513306, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06892313063144684, "rewards/margins": 0.0075590163469314575, "rewards/rejected": -0.0764821469783783, "step": 620 }, { "epoch": 0.36777583187390545, "grad_norm": 1.19783215567645, "learning_rate": 1.9920476822239895e-06, "log_odds_chosen": 0.20781341195106506, "log_odds_ratio": -0.6220026016235352, "logits/chosen": -3.6006805896759033, "logits/rejected": -3.7796969413757324, "logps/chosen": -1.3699333667755127, "logps/rejected": -1.5385853052139282, "loss": 1.4535, "nll_loss": 1.3024427890777588, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06849665939807892, "rewards/margins": 0.0084325997158885, "rewards/rejected": -0.0769292563199997, "step": 630 }, { "epoch": 0.37361354349095155, "grad_norm": 1.0757690277269971, "learning_rate": 1.976423537605237e-06, "log_odds_chosen": 0.15414497256278992, "log_odds_ratio": -0.6393491625785828, "logits/chosen": -3.554534912109375, "logits/rejected": -3.7354397773742676, "logps/chosen": -1.1933146715164185, "logps/rejected": -1.3100860118865967, "loss": 1.3988, "nll_loss": 1.2744767665863037, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0596657395362854, "rewards/margins": 0.0058385636657476425, "rewards/rejected": -0.0655042976140976, "step": 640 }, { "epoch": 0.37945125510799765, "grad_norm": 1.1247138043433653, "learning_rate": 1.961161351381841e-06, "log_odds_chosen": 0.07848731428384781, "log_odds_ratio": -0.6781501770019531, "logits/chosen": -3.6891980171203613, "logits/rejected": -3.6988136768341064, "logps/chosen": -1.5304944515228271, "logps/rejected": -1.5742305517196655, "loss": 1.4496, "nll_loss": 1.4792160987854004, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07652471214532852, "rewards/margins": 0.002186819911003113, "rewards/rejected": -0.07871153205633163, "step": 650 }, { "epoch": 0.38528896672504376, "grad_norm": 1.1187527653565255, "learning_rate": 1.9462473604038077e-06, "log_odds_chosen": 0.17809581756591797, "log_odds_ratio": -0.6617375016212463, "logits/chosen": -3.5801117420196533, "logits/rejected": -3.7248215675354004, "logps/chosen": -1.4722580909729004, "logps/rejected": -1.6060224771499634, "loss": 1.4221, "nll_loss": 1.4549881219863892, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07361290603876114, "rewards/margins": 0.006688222289085388, "rewards/rejected": -0.08030112832784653, "step": 660 }, { "epoch": 0.3911266783420899, "grad_norm": 2.806015916967675, "learning_rate": 1.9316685232156397e-06, "log_odds_chosen": 0.6871159076690674, "log_odds_ratio": -0.4531538486480713, "logits/chosen": -3.6006736755371094, "logits/rejected": -3.7145893573760986, "logps/chosen": -1.0015580654144287, "logps/rejected": -1.4777460098266602, "loss": 1.3284, "nll_loss": 1.2214610576629639, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.050077904015779495, "rewards/margins": 0.023809393867850304, "rewards/rejected": -0.07388729602098465, "step": 670 }, { "epoch": 0.396964389959136, "grad_norm": 1.1951846140992266, "learning_rate": 1.917412472118426e-06, "log_odds_chosen": 0.23088832199573517, "log_odds_ratio": -0.6203718185424805, "logits/chosen": -3.713235855102539, "logits/rejected": -3.865156650543213, "logps/chosen": -1.7107610702514648, "logps/rejected": -1.904004454612732, "loss": 1.512, "nll_loss": 1.680768370628357, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08553805947303772, "rewards/margins": 0.009662173688411713, "rewards/rejected": -0.09520022571086884, "step": 680 }, { "epoch": 0.4028021015761821, "grad_norm": 1.782409795783369, "learning_rate": 1.9034674690672024e-06, "log_odds_chosen": 0.2188705950975418, "log_odds_ratio": -0.5969911217689514, "logits/chosen": -3.7659125328063965, "logits/rejected": -3.8833823204040527, "logps/chosen": -1.372694730758667, "logps/rejected": -1.5469518899917603, "loss": 1.4703, "nll_loss": 1.500001072883606, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06863473355770111, "rewards/margins": 0.008712862618267536, "rewards/rejected": -0.07734759151935577, "step": 690 }, { "epoch": 0.40863981319322823, "grad_norm": 1.1360284435846917, "learning_rate": 1.8898223650461362e-06, "log_odds_chosen": 0.16071465611457825, "log_odds_ratio": -0.6528164148330688, "logits/chosen": -3.7116570472717285, "logits/rejected": -3.6581578254699707, "logps/chosen": -1.3063817024230957, "logps/rejected": -1.4016963243484497, "loss": 1.3916, "nll_loss": 1.390954613685608, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06531907618045807, "rewards/margins": 0.004765733610838652, "rewards/rejected": -0.07008481025695801, "step": 700 }, { "epoch": 0.4144775248102744, "grad_norm": 1.3521456190318717, "learning_rate": 1.876466562602004e-06, "log_odds_chosen": 0.3216366171836853, "log_odds_ratio": -0.5657163858413696, "logits/chosen": -3.792027711868286, "logits/rejected": -3.9422874450683594, "logps/chosen": -1.4161447286605835, "logps/rejected": -1.6670799255371094, "loss": 1.6302, "nll_loss": 1.5635159015655518, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07080724835395813, "rewards/margins": 0.012546752579510212, "rewards/rejected": -0.08335401117801666, "step": 710 }, { "epoch": 0.4203152364273205, "grad_norm": 1.8396778327175152, "learning_rate": 1.863389981249825e-06, "log_odds_chosen": -0.025000452995300293, "log_odds_ratio": -0.7603176236152649, "logits/chosen": -3.5535545349121094, "logits/rejected": -3.68872332572937, "logps/chosen": -1.734505295753479, "logps/rejected": -1.7148659229278564, "loss": 1.4624, "nll_loss": 1.5448418855667114, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.08672526478767395, "rewards/margins": -0.000981969409622252, "rewards/rejected": -0.08574329316616058, "step": 720 }, { "epoch": 0.4261529480443666, "grad_norm": 2.12348404540065, "learning_rate": 1.8505830254940132e-06, "log_odds_chosen": -0.08582016825675964, "log_odds_ratio": -0.8430864214897156, "logits/chosen": -3.3888626098632812, "logits/rejected": -3.566910982131958, "logps/chosen": -1.505122423171997, "logps/rejected": -1.520307183265686, "loss": 1.4946, "nll_loss": 1.4743322134017944, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07525612413883209, "rewards/margins": 0.0007592387264594436, "rewards/rejected": -0.07601536065340042, "step": 730 }, { "epoch": 0.4319906596614127, "grad_norm": 1.448693262465781, "learning_rate": 1.8380365552345197e-06, "log_odds_chosen": 0.5798968076705933, "log_odds_ratio": -0.5040274858474731, "logits/chosen": -3.4946117401123047, "logits/rejected": -3.660958766937256, "logps/chosen": -1.429527997970581, "logps/rejected": -1.879400610923767, "loss": 1.5419, "nll_loss": 1.492807388305664, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07147640734910965, "rewards/margins": 0.022493645548820496, "rewards/rejected": -0.09397004544734955, "step": 740 }, { "epoch": 0.43782837127845886, "grad_norm": 1.3100504278355487, "learning_rate": 1.8257418583505536e-06, "log_odds_chosen": 0.16511589288711548, "log_odds_ratio": -0.6417019367218018, "logits/chosen": -3.735492706298828, "logits/rejected": -3.757577896118164, "logps/chosen": -1.6607036590576172, "logps/rejected": -1.7982323169708252, "loss": 1.4581, "nll_loss": 1.5673224925994873, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0830351710319519, "rewards/margins": 0.006876434199512005, "rewards/rejected": -0.08991160988807678, "step": 750 }, { "epoch": 0.44366608289550497, "grad_norm": 0.9973646507019224, "learning_rate": 1.8136906252750293e-06, "log_odds_chosen": 0.396651029586792, "log_odds_ratio": -0.5669261813163757, "logits/chosen": -3.2925877571105957, "logits/rejected": -3.4050495624542236, "logps/chosen": -1.3152573108673096, "logps/rejected": -1.546952247619629, "loss": 1.4088, "nll_loss": 1.3588595390319824, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06576286256313324, "rewards/margins": 0.011584753170609474, "rewards/rejected": -0.07734762132167816, "step": 760 }, { "epoch": 0.44950379451255107, "grad_norm": 0.92777250292162, "learning_rate": 1.801874925391118e-06, "log_odds_chosen": -0.0032298951409757137, "log_odds_ratio": -0.706016480922699, "logits/chosen": -3.875239849090576, "logits/rejected": -3.860006809234619, "logps/chosen": -1.6944328546524048, "logps/rejected": -1.6913435459136963, "loss": 1.4529, "nll_loss": 1.421229600906372, "rewards/accuracies": 0.5, "rewards/chosen": -0.084721639752388, "rewards/margins": -0.00015446878387592733, "rewards/rejected": -0.08456717431545258, "step": 770 }, { "epoch": 0.4553415061295972, "grad_norm": 1.3027224512668856, "learning_rate": 1.7902871850985824e-06, "log_odds_chosen": 0.1817394196987152, "log_odds_ratio": -0.6250007748603821, "logits/chosen": -3.799592971801758, "logits/rejected": -3.8401808738708496, "logps/chosen": -1.3142929077148438, "logps/rejected": -1.4299211502075195, "loss": 1.3148, "nll_loss": 1.3367127180099487, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06571464240550995, "rewards/margins": 0.005781407002359629, "rewards/rejected": -0.07149605453014374, "step": 780 }, { "epoch": 0.46117921774664333, "grad_norm": 1.2673549353158404, "learning_rate": 1.7789201674120502e-06, "log_odds_chosen": 0.4255761504173279, "log_odds_ratio": -0.5791968107223511, "logits/chosen": -3.2339069843292236, "logits/rejected": -3.670290470123291, "logps/chosen": -1.4791820049285889, "logps/rejected": -1.752290964126587, "loss": 1.4315, "nll_loss": 1.4894287586212158, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0739590972661972, "rewards/margins": 0.013655463233590126, "rewards/rejected": -0.08761455863714218, "step": 790 }, { "epoch": 0.46701692936368944, "grad_norm": 1.1818096090662653, "learning_rate": 1.7677669529663689e-06, "log_odds_chosen": 0.42042917013168335, "log_odds_ratio": -0.5310789346694946, "logits/chosen": -3.797935962677002, "logits/rejected": -3.7546029090881348, "logps/chosen": -1.2825416326522827, "logps/rejected": -1.563473105430603, "loss": 1.4201, "nll_loss": 1.420452356338501, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06412708759307861, "rewards/margins": 0.014046574011445045, "rewards/rejected": -0.07817365974187851, "step": 800 }, { "epoch": 0.47285464098073554, "grad_norm": 1.071894527095795, "learning_rate": 1.7568209223157664e-06, "log_odds_chosen": 0.0939396470785141, "log_odds_ratio": -0.667390763759613, "logits/chosen": -3.6949074268341064, "logits/rejected": -3.8002333641052246, "logps/chosen": -1.5261578559875488, "logps/rejected": -1.5788092613220215, "loss": 1.5252, "nll_loss": 1.460928201675415, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07630790024995804, "rewards/margins": 0.0026325793005526066, "rewards/rejected": -0.07894046604633331, "step": 810 }, { "epoch": 0.47869235259778165, "grad_norm": 2.0476248797224605, "learning_rate": 1.7460757394239458e-06, "log_odds_chosen": 0.4034205973148346, "log_odds_ratio": -0.5519649386405945, "logits/chosen": -3.688931703567505, "logits/rejected": -3.455395221710205, "logps/chosen": -1.4605730772018433, "logps/rejected": -1.7438514232635498, "loss": 1.4836, "nll_loss": 1.5282323360443115, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07302865386009216, "rewards/margins": 0.01416392158716917, "rewards/rejected": -0.0871925801038742, "step": 820 }, { "epoch": 0.4845300642148278, "grad_norm": 0.8969258833652863, "learning_rate": 1.7355253362515584e-06, "log_odds_chosen": 0.3920760452747345, "log_odds_ratio": -0.5437257885932922, "logits/chosen": -3.7214913368225098, "logits/rejected": -3.8418056964874268, "logps/chosen": -1.516177773475647, "logps/rejected": -1.8304067850112915, "loss": 1.3885, "nll_loss": 1.5704530477523804, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07580889016389847, "rewards/margins": 0.015711452811956406, "rewards/rejected": -0.09152033179998398, "step": 830 }, { "epoch": 0.4903677758318739, "grad_norm": 1.0144862801192593, "learning_rate": 1.7251638983558855e-06, "log_odds_chosen": 0.26952993869781494, "log_odds_ratio": -0.5876476168632507, "logits/chosen": -3.8201680183410645, "logits/rejected": -3.9873039722442627, "logps/chosen": -1.4011740684509277, "logps/rejected": -1.5994189977645874, "loss": 1.4473, "nll_loss": 1.3440608978271484, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07005870342254639, "rewards/margins": 0.009912244975566864, "rewards/rejected": -0.07997094839811325, "step": 840 }, { "epoch": 0.49620548744892, "grad_norm": 2.0239140173690955, "learning_rate": 1.7149858514250883e-06, "log_odds_chosen": 0.128139466047287, "log_odds_ratio": -0.658035933971405, "logits/chosen": -3.558427333831787, "logits/rejected": -3.9459052085876465, "logps/chosen": -1.5626720190048218, "logps/rejected": -1.6465517282485962, "loss": 1.4737, "nll_loss": 1.4099605083465576, "rewards/accuracies": 0.5, "rewards/chosen": -0.07813360542058945, "rewards/margins": 0.004193988628685474, "rewards/rejected": -0.08232758194208145, "step": 850 }, { "epoch": 0.5020431990659662, "grad_norm": 1.235971556276964, "learning_rate": 1.704985848676184e-06, "log_odds_chosen": 0.48999252915382385, "log_odds_ratio": -0.5461174249649048, "logits/chosen": -3.4095280170440674, "logits/rejected": -3.667527675628662, "logps/chosen": -1.2479019165039062, "logps/rejected": -1.538692831993103, "loss": 1.4062, "nll_loss": 1.3561253547668457, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06239509582519531, "rewards/margins": 0.014539550058543682, "rewards/rejected": -0.07693465799093246, "step": 860 }, { "epoch": 0.5078809106830122, "grad_norm": 1.310076336506196, "learning_rate": 1.6951587590520263e-06, "log_odds_chosen": 0.5809324979782104, "log_odds_ratio": -0.4585697054862976, "logits/chosen": -3.175489902496338, "logits/rejected": -3.7632269859313965, "logps/chosen": -1.1838533878326416, "logps/rejected": -1.5934401750564575, "loss": 1.4493, "nll_loss": 1.1854149103164673, "rewards/accuracies": 1.0, "rewards/chosen": -0.05919266864657402, "rewards/margins": 0.02047933079302311, "rewards/rejected": -0.07967199385166168, "step": 870 }, { "epoch": 0.5137186223000584, "grad_norm": 1.6777815349891365, "learning_rate": 1.6854996561581053e-06, "log_odds_chosen": 0.0882478579878807, "log_odds_ratio": -0.6978998184204102, "logits/chosen": -3.7489681243896484, "logits/rejected": -3.4975247383117676, "logps/chosen": -1.4917502403259277, "logps/rejected": -1.5545430183410645, "loss": 1.499, "nll_loss": 1.5248205661773682, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07458750903606415, "rewards/margins": 0.0031396367121487856, "rewards/rejected": -0.07772714644670486, "step": 880 }, { "epoch": 0.5195563339171045, "grad_norm": 1.3102112377248563, "learning_rate": 1.6760038078849776e-06, "log_odds_chosen": 0.03328792750835419, "log_odds_ratio": -0.7438865900039673, "logits/chosen": -3.44010853767395, "logits/rejected": -3.3413186073303223, "logps/chosen": -1.3668386936187744, "logps/rejected": -1.38387131690979, "loss": 1.4673, "nll_loss": 1.4798543453216553, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0683419331908226, "rewards/margins": 0.0008516364032402635, "rewards/rejected": -0.06919357180595398, "step": 890 }, { "epoch": 0.5253940455341506, "grad_norm": 1.0102902151737552, "learning_rate": 1.6666666666666667e-06, "log_odds_chosen": 0.17614340782165527, "log_odds_ratio": -0.6526997089385986, "logits/chosen": -3.651087999343872, "logits/rejected": -3.8072915077209473, "logps/chosen": -1.3711117506027222, "logps/rejected": -1.4387164115905762, "loss": 1.3938, "nll_loss": 1.402628779411316, "rewards/accuracies": 0.5, "rewards/chosen": -0.06855558604001999, "rewards/margins": 0.0033802289981395006, "rewards/rejected": -0.07193581759929657, "step": 900 }, { "epoch": 0.5312317571511967, "grad_norm": 1.1132381144190895, "learning_rate": 1.6574838603294898e-06, "log_odds_chosen": -0.08135322481393814, "log_odds_ratio": -0.7575902342796326, "logits/chosen": -3.818908214569092, "logits/rejected": -3.8992302417755127, "logps/chosen": -1.5864216089248657, "logps/rejected": -1.5411884784698486, "loss": 1.5335, "nll_loss": 1.598780632019043, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07932107895612717, "rewards/margins": -0.0022616498172283173, "rewards/rejected": -0.07705943286418915, "step": 910 }, { "epoch": 0.5370694687682428, "grad_norm": 0.9759731147779264, "learning_rate": 1.648451183489468e-06, "log_odds_chosen": 0.1813182383775711, "log_odds_ratio": -0.6307699084281921, "logits/chosen": -3.7067809104919434, "logits/rejected": -3.849400758743286, "logps/chosen": -1.732542634010315, "logps/rejected": -1.8837732076644897, "loss": 1.4613, "nll_loss": 1.6715933084487915, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08662714064121246, "rewards/margins": 0.0075615281239151955, "rewards/rejected": -0.09418866038322449, "step": 920 }, { "epoch": 0.542907180385289, "grad_norm": 1.8752786576064029, "learning_rate": 1.6395645894598825e-06, "log_odds_chosen": 0.11342465877532959, "log_odds_ratio": -0.6957853436470032, "logits/chosen": -3.7673614025115967, "logits/rejected": -3.8911919593811035, "logps/chosen": -1.3965904712677002, "logps/rejected": -1.502516746520996, "loss": 1.4611, "nll_loss": 1.3216631412506104, "rewards/accuracies": 0.5, "rewards/chosen": -0.06982952356338501, "rewards/margins": 0.005296317394822836, "rewards/rejected": -0.07512584328651428, "step": 930 }, { "epoch": 0.5487448920023351, "grad_norm": 1.7521439703034665, "learning_rate": 1.6308201826336057e-06, "log_odds_chosen": 0.1376430094242096, "log_odds_ratio": -0.6768438816070557, "logits/chosen": -3.600715160369873, "logits/rejected": -3.736177444458008, "logps/chosen": -1.5456516742706299, "logps/rejected": -1.6318765878677368, "loss": 1.4765, "nll_loss": 1.539489507675171, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07728257030248642, "rewards/margins": 0.004311251454055309, "rewards/rejected": -0.0815938264131546, "step": 940 }, { "epoch": 0.5545826036193812, "grad_norm": 1.3348030735079526, "learning_rate": 1.6222142113076255e-06, "log_odds_chosen": 0.44451841711997986, "log_odds_ratio": -0.5802547931671143, "logits/chosen": -3.4927356243133545, "logits/rejected": -3.502634048461914, "logps/chosen": -1.2677581310272217, "logps/rejected": -1.58249032497406, "loss": 1.5094, "nll_loss": 1.2587336301803589, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0633879080414772, "rewards/margins": 0.015736613422632217, "rewards/rejected": -0.07912451773881912, "step": 950 }, { "epoch": 0.5604203152364273, "grad_norm": 1.9725789614027767, "learning_rate": 1.6137430609197571e-06, "log_odds_chosen": 0.13596093654632568, "log_odds_ratio": -0.647422194480896, "logits/chosen": -3.9101052284240723, "logits/rejected": -3.75105357170105, "logps/chosen": -1.5221458673477173, "logps/rejected": -1.6299253702163696, "loss": 1.4692, "nll_loss": 1.508307695388794, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07610730081796646, "rewards/margins": 0.005388972349464893, "rewards/rejected": -0.08149627596139908, "step": 960 }, { "epoch": 0.5662580268534735, "grad_norm": 1.0971147960865537, "learning_rate": 1.605403247669839e-06, "log_odds_chosen": 0.16537995636463165, "log_odds_ratio": -0.6238513588905334, "logits/chosen": -3.760333299636841, "logits/rejected": -3.7641711235046387, "logps/chosen": -1.5338884592056274, "logps/rejected": -1.6573375463485718, "loss": 1.4566, "nll_loss": 1.4186909198760986, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07669441401958466, "rewards/margins": 0.006172459106892347, "rewards/rejected": -0.08286687731742859, "step": 970 }, { "epoch": 0.5720957384705195, "grad_norm": 1.4881276802972865, "learning_rate": 1.59719141249985e-06, "log_odds_chosen": 0.42865189909935, "log_odds_ratio": -0.5390044450759888, "logits/chosen": -3.581620693206787, "logits/rejected": -3.5899176597595215, "logps/chosen": -1.480674386024475, "logps/rejected": -1.7901808023452759, "loss": 1.3862, "nll_loss": 1.344512939453125, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.074033722281456, "rewards/margins": 0.0154753178358078, "rewards/rejected": -0.08950904756784439, "step": 980 }, { "epoch": 0.5779334500875657, "grad_norm": 1.197164641009502, "learning_rate": 1.5891043154093205e-06, "log_odds_chosen": 0.3543226718902588, "log_odds_ratio": -0.5649783611297607, "logits/chosen": -3.398437023162842, "logits/rejected": -3.7234065532684326, "logps/chosen": -1.4934816360473633, "logps/rejected": -1.762107491493225, "loss": 1.4459, "nll_loss": 1.5012811422348022, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0746740847826004, "rewards/margins": 0.013431290164589882, "rewards/rejected": -0.08810536563396454, "step": 990 }, { "epoch": 0.5837711617046117, "grad_norm": 1.0053741352908385, "learning_rate": 1.5811388300841898e-06, "log_odds_chosen": 0.3622283339500427, "log_odds_ratio": -0.5657080411911011, "logits/chosen": -3.5687339305877686, "logits/rejected": -3.9233851432800293, "logps/chosen": -1.2179609537124634, "logps/rejected": -1.4407708644866943, "loss": 1.3914, "nll_loss": 1.2864320278167725, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06089804694056511, "rewards/margins": 0.011140493676066399, "rewards/rejected": -0.07203854620456696, "step": 1000 }, { "epoch": 0.5896088733216579, "grad_norm": 1.199053469932817, "learning_rate": 1.5732919388188816e-06, "log_odds_chosen": -0.24436108767986298, "log_odds_ratio": -0.8610903024673462, "logits/chosen": -3.7985236644744873, "logits/rejected": -3.809143543243408, "logps/chosen": -1.6935663223266602, "logps/rejected": -1.5062425136566162, "loss": 1.4588, "nll_loss": 1.553226113319397, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08467832207679749, "rewards/margins": -0.009366193786263466, "rewards/rejected": -0.07531212270259857, "step": 1010 }, { "epoch": 0.5954465849387041, "grad_norm": 2.3204126674759102, "learning_rate": 1.565560727712874e-06, "log_odds_chosen": 0.37011271715164185, "log_odds_ratio": -0.5587447285652161, "logits/chosen": -3.6285548210144043, "logits/rejected": -3.4676883220672607, "logps/chosen": -1.4109609127044678, "logps/rejected": -1.6835588216781616, "loss": 1.4398, "nll_loss": 1.4699938297271729, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07054804265499115, "rewards/margins": 0.013629895634949207, "rewards/rejected": -0.08417794108390808, "step": 1020 }, { "epoch": 0.6012842965557501, "grad_norm": 1.167419444827397, "learning_rate": 1.5579423821243897e-06, "log_odds_chosen": 0.19378411769866943, "log_odds_ratio": -0.619697093963623, "logits/chosen": -3.5586349964141846, "logits/rejected": -3.6743836402893066, "logps/chosen": -1.2912766933441162, "logps/rejected": -1.4319881200790405, "loss": 1.4188, "nll_loss": 1.3081495761871338, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0645638257265091, "rewards/margins": 0.007035577204078436, "rewards/rejected": -0.07159940898418427, "step": 1030 }, { "epoch": 0.6071220081727963, "grad_norm": 1.7925468499019566, "learning_rate": 1.5504341823651056e-06, "log_odds_chosen": 0.7065520286560059, "log_odds_ratio": -0.48025959730148315, "logits/chosen": -3.4651741981506348, "logits/rejected": -3.430422306060791, "logps/chosen": -1.1487150192260742, "logps/rejected": -1.6025464534759521, "loss": 1.3968, "nll_loss": 1.2926580905914307, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.05743575096130371, "rewards/margins": 0.022691579535603523, "rewards/rejected": -0.08012732863426208, "step": 1040 }, { "epoch": 0.6129597197898424, "grad_norm": 1.3458466684980221, "learning_rate": 1.5430334996209192e-06, "log_odds_chosen": 0.46427708864212036, "log_odds_ratio": -0.5106879472732544, "logits/chosen": -3.531106948852539, "logits/rejected": -3.7014987468719482, "logps/chosen": -1.3616163730621338, "logps/rejected": -1.6869173049926758, "loss": 1.4697, "nll_loss": 1.305336356163025, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06808082014322281, "rewards/margins": 0.016265040263533592, "rewards/rejected": -0.08434586226940155, "step": 1050 }, { "epoch": 0.6187974314068885, "grad_norm": 1.0326963433011265, "learning_rate": 1.5357377920848783e-06, "log_odds_chosen": 0.21958990395069122, "log_odds_ratio": -0.6063078045845032, "logits/chosen": -3.4317641258239746, "logits/rejected": -3.7364749908447266, "logps/chosen": -1.4099458456039429, "logps/rejected": -1.5919688940048218, "loss": 1.4152, "nll_loss": 1.4237931966781616, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0704972967505455, "rewards/margins": 0.00910115335136652, "rewards/rejected": -0.07959844917058945, "step": 1060 }, { "epoch": 0.6246351430239346, "grad_norm": 0.9393977000204445, "learning_rate": 1.5285446012893579e-06, "log_odds_chosen": 0.35795915126800537, "log_odds_ratio": -0.6163761019706726, "logits/chosen": -3.7608115673065186, "logits/rejected": -3.706493377685547, "logps/chosen": -1.5492169857025146, "logps/rejected": -1.887384057044983, "loss": 1.5139, "nll_loss": 1.514452576637268, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07746085524559021, "rewards/margins": 0.01690833829343319, "rewards/rejected": -0.09436918795108795, "step": 1070 }, { "epoch": 0.6304728546409807, "grad_norm": 1.3342076165390777, "learning_rate": 1.5214515486254614e-06, "log_odds_chosen": 0.2614067494869232, "log_odds_ratio": -0.6338509321212769, "logits/chosen": -3.5621142387390137, "logits/rejected": -3.5427069664001465, "logps/chosen": -1.3340866565704346, "logps/rejected": -1.5029159784317017, "loss": 1.4588, "nll_loss": 1.2656961679458618, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06670433282852173, "rewards/margins": 0.008441467769443989, "rewards/rejected": -0.07514579594135284, "step": 1080 }, { "epoch": 0.6363105662580268, "grad_norm": 1.151425933203342, "learning_rate": 1.5144563320384566e-06, "log_odds_chosen": 0.36316362023353577, "log_odds_ratio": -0.560775876045227, "logits/chosen": -3.436584949493408, "logits/rejected": -3.6030731201171875, "logps/chosen": -1.4574145078659058, "logps/rejected": -1.741363525390625, "loss": 1.424, "nll_loss": 1.3730270862579346, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07287072390317917, "rewards/margins": 0.0141974538564682, "rewards/rejected": -0.08706818521022797, "step": 1090 }, { "epoch": 0.642148277875073, "grad_norm": 1.3571677473201658, "learning_rate": 1.5075567228888182e-06, "log_odds_chosen": 0.000986337661743164, "log_odds_ratio": -0.7365607619285583, "logits/chosen": -3.5484447479248047, "logits/rejected": -3.6163132190704346, "logps/chosen": -1.5053362846374512, "logps/rejected": -1.4924229383468628, "loss": 1.4337, "nll_loss": 1.484412431716919, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07526681572198868, "rewards/margins": -0.000645659863948822, "rewards/rejected": -0.07462115585803986, "step": 1100 }, { "epoch": 0.647985989492119, "grad_norm": 1.8877991378586052, "learning_rate": 1.5007505629691608e-06, "log_odds_chosen": 0.24704858660697937, "log_odds_ratio": -0.6051791906356812, "logits/chosen": -3.4683189392089844, "logits/rejected": -3.8879916667938232, "logps/chosen": -1.3718594312667847, "logps/rejected": -1.5224130153656006, "loss": 1.482, "nll_loss": 1.3225619792938232, "rewards/accuracies": 0.5, "rewards/chosen": -0.06859297305345535, "rewards/margins": 0.007527677807956934, "rewards/rejected": -0.07612065970897675, "step": 1110 }, { "epoch": 0.6538237011091652, "grad_norm": 1.0590023621989324, "learning_rate": 1.494035761667992e-06, "log_odds_chosen": -0.004286480136215687, "log_odds_ratio": -0.7683175206184387, "logits/chosen": -3.6352107524871826, "logits/rejected": -3.498098373413086, "logps/chosen": -1.4056607484817505, "logps/rejected": -1.4154250621795654, "loss": 1.4399, "nll_loss": 1.362975835800171, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07028303295373917, "rewards/margins": 0.00048822275130078197, "rewards/rejected": -0.07077126204967499, "step": 1120 }, { "epoch": 0.6596614127262114, "grad_norm": 1.887784424466616, "learning_rate": 1.487410293271824e-06, "log_odds_chosen": 0.3295649588108063, "log_odds_ratio": -0.5524402856826782, "logits/chosen": -3.6241462230682373, "logits/rejected": -3.687258243560791, "logps/chosen": -1.3957101106643677, "logps/rejected": -1.657015085220337, "loss": 1.3978, "nll_loss": 1.340308666229248, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06978549808263779, "rewards/margins": 0.01306525431573391, "rewards/rejected": -0.08285075426101685, "step": 1130 }, { "epoch": 0.6654991243432574, "grad_norm": 1.501428352224704, "learning_rate": 1.480872194397731e-06, "log_odds_chosen": 0.2706455588340759, "log_odds_ratio": -0.5788730382919312, "logits/chosen": -3.7660269737243652, "logits/rejected": -4.002518653869629, "logps/chosen": -1.2408720254898071, "logps/rejected": -1.4399653673171997, "loss": 1.4437, "nll_loss": 1.2975295782089233, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06204359978437424, "rewards/margins": 0.0099546629935503, "rewards/rejected": -0.07199826091527939, "step": 1140 }, { "epoch": 0.6713368359603036, "grad_norm": 2.339133256366747, "learning_rate": 1.4744195615489715e-06, "log_odds_chosen": 0.0999092310667038, "log_odds_ratio": -0.6702858209609985, "logits/chosen": -3.542649030685425, "logits/rejected": -3.787248134613037, "logps/chosen": -1.589597225189209, "logps/rejected": -1.6912921667099, "loss": 1.3758, "nll_loss": 1.4418556690216064, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07947986572980881, "rewards/margins": 0.0050847469829022884, "rewards/rejected": -0.08456461131572723, "step": 1150 }, { "epoch": 0.6771745475773496, "grad_norm": 1.8198663050097392, "learning_rate": 1.4680505487867589e-06, "log_odds_chosen": 0.12901464104652405, "log_odds_ratio": -0.6508499383926392, "logits/chosen": -3.3829429149627686, "logits/rejected": -3.5581657886505127, "logps/chosen": -1.2563859224319458, "logps/rejected": -1.341601848602295, "loss": 1.4356, "nll_loss": 1.274111032485962, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06281929463148117, "rewards/margins": 0.0042607965879142284, "rewards/rejected": -0.06708009541034698, "step": 1160 }, { "epoch": 0.6830122591943958, "grad_norm": 1.1572231607977412, "learning_rate": 1.4617633655117156e-06, "log_odds_chosen": -0.008226936683058739, "log_odds_ratio": -0.7522357702255249, "logits/chosen": -3.4687016010284424, "logits/rejected": -3.646564483642578, "logps/chosen": -1.48030686378479, "logps/rejected": -1.5129547119140625, "loss": 1.4903, "nll_loss": 1.4443728923797607, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07401534169912338, "rewards/margins": 0.0016323986928910017, "rewards/rejected": -0.0756477415561676, "step": 1170 }, { "epoch": 0.688849970811442, "grad_norm": 0.9871518228930504, "learning_rate": 1.4555562743489552e-06, "log_odds_chosen": 0.41209906339645386, "log_odds_ratio": -0.5445806980133057, "logits/chosen": -3.602752208709717, "logits/rejected": -3.7116405963897705, "logps/chosen": -1.3610628843307495, "logps/rejected": -1.6747217178344727, "loss": 1.4266, "nll_loss": 1.4239057302474976, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06805314868688583, "rewards/margins": 0.01568293757736683, "rewards/rejected": -0.08373607695102692, "step": 1180 }, { "epoch": 0.694687682428488, "grad_norm": 0.8343307527199337, "learning_rate": 1.4494275891311214e-06, "log_odds_chosen": 0.11507141590118408, "log_odds_ratio": -0.6965879201889038, "logits/chosen": -3.685096263885498, "logits/rejected": -3.8423964977264404, "logps/chosen": -1.387915849685669, "logps/rejected": -1.4235694408416748, "loss": 1.4337, "nll_loss": 1.3562302589416504, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06939579546451569, "rewards/margins": 0.001782689942047, "rewards/rejected": -0.07117848098278046, "step": 1190 }, { "epoch": 0.7005253940455342, "grad_norm": 1.196373420872049, "learning_rate": 1.4433756729740647e-06, "log_odds_chosen": 0.5720034837722778, "log_odds_ratio": -0.5054569244384766, "logits/chosen": -3.5103511810302734, "logits/rejected": -3.7682583332061768, "logps/chosen": -1.2312414646148682, "logps/rejected": -1.5801923274993896, "loss": 1.4076, "nll_loss": 1.2738173007965088, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06156207248568535, "rewards/margins": 0.017447542399168015, "rewards/rejected": -0.07900962233543396, "step": 1200 }, { "epoch": 0.7063631056625803, "grad_norm": 0.9363353698948111, "learning_rate": 1.4373989364401727e-06, "log_odds_chosen": 0.1434541791677475, "log_odds_ratio": -0.6399761438369751, "logits/chosen": -3.3865814208984375, "logits/rejected": -3.7607407569885254, "logps/chosen": -1.4287686347961426, "logps/rejected": -1.572047233581543, "loss": 1.3986, "nll_loss": 1.4588409662246704, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07143843919038773, "rewards/margins": 0.007163929287344217, "rewards/rejected": -0.0786023661494255, "step": 1210 }, { "epoch": 0.7122008172796264, "grad_norm": 1.2713268529197488, "learning_rate": 1.4314958357846706e-06, "log_odds_chosen": 0.17422883212566376, "log_odds_ratio": -0.6414699554443359, "logits/chosen": -3.698843479156494, "logits/rejected": -3.903012752532959, "logps/chosen": -1.5843664407730103, "logps/rejected": -1.7367069721221924, "loss": 1.4903, "nll_loss": 1.4968700408935547, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0792183130979538, "rewards/margins": 0.007617020048201084, "rewards/rejected": -0.0868353396654129, "step": 1220 }, { "epoch": 0.7180385288966725, "grad_norm": 1.0309037122143416, "learning_rate": 1.4256648712805027e-06, "log_odds_chosen": 0.11659079790115356, "log_odds_ratio": -0.6565717458724976, "logits/chosen": -3.6903634071350098, "logits/rejected": -3.790111541748047, "logps/chosen": -1.2752035856246948, "logps/rejected": -1.3497754335403442, "loss": 1.4562, "nll_loss": 1.276907205581665, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0637601837515831, "rewards/margins": 0.0037285895086824894, "rewards/rejected": -0.06748877465724945, "step": 1230 }, { "epoch": 0.7238762405137186, "grad_norm": 1.4753907098440404, "learning_rate": 1.419904585617662e-06, "log_odds_chosen": 0.07249963283538818, "log_odds_ratio": -0.6769141554832458, "logits/chosen": -3.87017560005188, "logits/rejected": -3.793915271759033, "logps/chosen": -1.4161723852157593, "logps/rejected": -1.4887874126434326, "loss": 1.4381, "nll_loss": 1.3784804344177246, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07080862671136856, "rewards/margins": 0.0036307442933321, "rewards/rejected": -0.07443936914205551, "step": 1240 }, { "epoch": 0.7297139521307647, "grad_norm": 1.626272923387585, "learning_rate": 1.4142135623730952e-06, "log_odds_chosen": 0.13311097025871277, "log_odds_ratio": -0.6663863062858582, "logits/chosen": -3.4408035278320312, "logits/rejected": -3.8387997150421143, "logps/chosen": -1.2269275188446045, "logps/rejected": -1.3334132432937622, "loss": 1.4258, "nll_loss": 1.261070966720581, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0613463819026947, "rewards/margins": 0.005324280355125666, "rewards/rejected": -0.06667067110538483, "step": 1250 }, { "epoch": 0.7355516637478109, "grad_norm": 1.5049933521332548, "learning_rate": 1.4085904245475275e-06, "log_odds_chosen": 0.4357670843601227, "log_odds_ratio": -0.537013053894043, "logits/chosen": -3.484666347503662, "logits/rejected": -3.6944968700408936, "logps/chosen": -1.3730899095535278, "logps/rejected": -1.6813600063323975, "loss": 1.4763, "nll_loss": 1.5478441715240479, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06865449249744415, "rewards/margins": 0.015413510613143444, "rewards/rejected": -0.08406800776720047, "step": 1260 }, { "epoch": 0.7413893753648569, "grad_norm": 0.9547508342400997, "learning_rate": 1.4030338331657844e-06, "log_odds_chosen": 0.10284624993801117, "log_odds_ratio": -0.6621818542480469, "logits/chosen": -3.776501178741455, "logits/rejected": -3.8835887908935547, "logps/chosen": -1.6118195056915283, "logps/rejected": -1.699235200881958, "loss": 1.5443, "nll_loss": 1.5244765281677246, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08059097826480865, "rewards/margins": 0.004370786249637604, "rewards/rejected": -0.08496175706386566, "step": 1270 }, { "epoch": 0.7472270869819031, "grad_norm": 1.185226987943093, "learning_rate": 1.3975424859373688e-06, "log_odds_chosen": 0.3971897065639496, "log_odds_ratio": -0.545083224773407, "logits/chosen": -3.6657626628875732, "logits/rejected": -3.752920627593994, "logps/chosen": -1.447163462638855, "logps/rejected": -1.7555530071258545, "loss": 1.524, "nll_loss": 1.4501063823699951, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07235816866159439, "rewards/margins": 0.01541948039084673, "rewards/rejected": -0.08777765184640884, "step": 1280 }, { "epoch": 0.7530647985989493, "grad_norm": 1.007153216836115, "learning_rate": 1.3921151159742616e-06, "log_odds_chosen": 0.13516126573085785, "log_odds_ratio": -0.6667526960372925, "logits/chosen": -3.8328967094421387, "logits/rejected": -3.7815799713134766, "logps/chosen": -1.4513742923736572, "logps/rejected": -1.5538709163665771, "loss": 1.4389, "nll_loss": 1.4346994161605835, "rewards/accuracies": 0.5, "rewards/chosen": -0.07256871461868286, "rewards/margins": 0.005124828778207302, "rewards/rejected": -0.07769354432821274, "step": 1290 }, { "epoch": 0.7589025102159953, "grad_norm": 1.2681735806253638, "learning_rate": 1.386750490563073e-06, "log_odds_chosen": 0.45636850595474243, "log_odds_ratio": -0.5258599519729614, "logits/chosen": -3.563424587249756, "logits/rejected": -3.5741677284240723, "logps/chosen": -1.2283709049224854, "logps/rejected": -1.5156924724578857, "loss": 1.4197, "nll_loss": 1.298481822013855, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06141854450106621, "rewards/margins": 0.014366090297698975, "rewards/rejected": -0.07578462362289429, "step": 1300 }, { "epoch": 0.7647402218330415, "grad_norm": 1.008263095251999, "learning_rate": 1.3814474099888442e-06, "log_odds_chosen": 0.06689711660146713, "log_odds_ratio": -0.697658896446228, "logits/chosen": -3.5159404277801514, "logits/rejected": -3.7585225105285645, "logps/chosen": -1.3132297992706299, "logps/rejected": -1.3855555057525635, "loss": 1.4485, "nll_loss": 1.2646446228027344, "rewards/accuracies": 0.5, "rewards/chosen": -0.0656614899635315, "rewards/margins": 0.003616283880546689, "rewards/rejected": -0.06927777826786041, "step": 1310 }, { "epoch": 0.7705779334500875, "grad_norm": 0.9673707983788562, "learning_rate": 1.376204706407951e-06, "log_odds_chosen": 0.5269815325737, "log_odds_ratio": -0.5130890607833862, "logits/chosen": -3.3393330574035645, "logits/rejected": -3.763470411300659, "logps/chosen": -1.3772388696670532, "logps/rejected": -1.779054880142212, "loss": 1.3786, "nll_loss": 1.3951183557510376, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0688619390130043, "rewards/margins": 0.020090807229280472, "rewards/rejected": -0.08895275741815567, "step": 1320 }, { "epoch": 0.7764156450671337, "grad_norm": 1.0868217477068451, "learning_rate": 1.3710212427677044e-06, "log_odds_chosen": 0.3513430058956146, "log_odds_ratio": -0.5549355745315552, "logits/chosen": -3.646064281463623, "logits/rejected": -3.8568196296691895, "logps/chosen": -1.3955647945404053, "logps/rejected": -1.6696979999542236, "loss": 1.3452, "nll_loss": 1.3085718154907227, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06977823376655579, "rewards/margins": 0.013706663623452187, "rewards/rejected": -0.08348490297794342, "step": 1330 }, { "epoch": 0.7822533566841798, "grad_norm": 1.1912420033862787, "learning_rate": 1.3658959117703826e-06, "log_odds_chosen": -0.26394256949424744, "log_odds_ratio": -0.8653214573860168, "logits/chosen": -3.8672187328338623, "logits/rejected": -3.8057098388671875, "logps/chosen": -1.8170604705810547, "logps/rejected": -1.5991381406784058, "loss": 1.449, "nll_loss": 1.543738603591919, "rewards/accuracies": 0.5, "rewards/chosen": -0.0908530205488205, "rewards/margins": -0.010896108113229275, "rewards/rejected": -0.07995691150426865, "step": 1340 }, { "epoch": 0.7880910683012259, "grad_norm": 1.4691629737866725, "learning_rate": 1.3608276348795436e-06, "log_odds_chosen": 0.4397081732749939, "log_odds_ratio": -0.5330623984336853, "logits/chosen": -3.638364315032959, "logits/rejected": -3.746438503265381, "logps/chosen": -1.309575080871582, "logps/rejected": -1.623590111732483, "loss": 1.4047, "nll_loss": 1.3440721035003662, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06547875702381134, "rewards/margins": 0.015700750052928925, "rewards/rejected": -0.08117950707674026, "step": 1350 }, { "epoch": 0.793928779918272, "grad_norm": 1.3319824789159278, "learning_rate": 1.355815361366601e-06, "log_odds_chosen": 0.10500156879425049, "log_odds_ratio": -0.6677210330963135, "logits/chosen": -3.6988863945007324, "logits/rejected": -3.6854004859924316, "logps/chosen": -1.3555651903152466, "logps/rejected": -1.4301506280899048, "loss": 1.4865, "nll_loss": 1.291123628616333, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06777825951576233, "rewards/margins": 0.00372927519492805, "rewards/rejected": -0.071507528424263, "step": 1360 }, { "epoch": 0.7997664915353182, "grad_norm": 1.0600244899762146, "learning_rate": 1.350858067395748e-06, "log_odds_chosen": 0.20431193709373474, "log_odds_ratio": -0.6117268800735474, "logits/chosen": -3.716031312942505, "logits/rejected": -3.7913882732391357, "logps/chosen": -1.3822975158691406, "logps/rejected": -1.522287130355835, "loss": 1.4993, "nll_loss": 1.3604979515075684, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06911487877368927, "rewards/margins": 0.0069994754157960415, "rewards/rejected": -0.07611435651779175, "step": 1370 }, { "epoch": 0.8056042031523643, "grad_norm": 1.4629128631889197, "learning_rate": 1.345954755145414e-06, "log_odds_chosen": 0.25956761837005615, "log_odds_ratio": -0.6013301610946655, "logits/chosen": -3.790165662765503, "logits/rejected": -4.032552242279053, "logps/chosen": -1.3825290203094482, "logps/rejected": -1.6337703466415405, "loss": 1.4203, "nll_loss": 1.3979805707931519, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0691264420747757, "rewards/margins": 0.012562069110572338, "rewards/rejected": -0.08168850839138031, "step": 1380 }, { "epoch": 0.8114419147694104, "grad_norm": 1.0870331746135864, "learning_rate": 1.3411044519645502e-06, "log_odds_chosen": 0.11137370765209198, "log_odds_ratio": -0.6658010482788086, "logits/chosen": -3.4504141807556152, "logits/rejected": -3.9440269470214844, "logps/chosen": -1.2997303009033203, "logps/rejected": -1.3576933145523071, "loss": 1.4138, "nll_loss": 1.3739395141601562, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06498651951551437, "rewards/margins": 0.002898142207413912, "rewards/rejected": -0.0678846687078476, "step": 1390 }, { "epoch": 0.8172796263864565, "grad_norm": 2.5678010106321105, "learning_rate": 1.3363062095621222e-06, "log_odds_chosen": 0.02349839173257351, "log_odds_ratio": -0.7016226649284363, "logits/chosen": -3.3738696575164795, "logits/rejected": -3.682204008102417, "logps/chosen": -1.6592521667480469, "logps/rejected": -1.7012161016464233, "loss": 1.4763, "nll_loss": 1.542872667312622, "rewards/accuracies": 0.5, "rewards/chosen": -0.08296261727809906, "rewards/margins": 0.0020981989800930023, "rewards/rejected": -0.08506080508232117, "step": 1400 }, { "epoch": 0.8231173380035026, "grad_norm": 1.0401130436958035, "learning_rate": 1.3315591032282687e-06, "log_odds_chosen": 0.2714232802391052, "log_odds_ratio": -0.6120047569274902, "logits/chosen": -3.6823527812957764, "logits/rejected": -3.841249942779541, "logps/chosen": -1.4491932392120361, "logps/rejected": -1.6654586791992188, "loss": 1.3923, "nll_loss": 1.4678351879119873, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07245966047048569, "rewards/margins": 0.010813265107572079, "rewards/rejected": -0.08327292650938034, "step": 1410 }, { "epoch": 0.8289550496205488, "grad_norm": 1.1344114850216485, "learning_rate": 1.3268622310856882e-06, "log_odds_chosen": 0.1071448102593422, "log_odds_ratio": -0.654478907585144, "logits/chosen": -3.7761149406433105, "logits/rejected": -3.795307159423828, "logps/chosen": -1.5627695322036743, "logps/rejected": -1.6524778604507446, "loss": 1.4794, "nll_loss": 1.3751448392868042, "rewards/accuracies": 0.5, "rewards/chosen": -0.07813847064971924, "rewards/margins": 0.004485412035137415, "rewards/rejected": -0.08262388408184052, "step": 1420 }, { "epoch": 0.8347927612375948, "grad_norm": 1.3096979281810506, "learning_rate": 1.3222147133698626e-06, "log_odds_chosen": 0.005504929926246405, "log_odds_ratio": -0.7207657098770142, "logits/chosen": -3.9281463623046875, "logits/rejected": -3.990266799926758, "logps/chosen": -1.5015218257904053, "logps/rejected": -1.510406255722046, "loss": 1.482, "nll_loss": 1.3646979331970215, "rewards/accuracies": 0.5, "rewards/chosen": -0.07507608830928802, "rewards/margins": 0.0004442177596502006, "rewards/rejected": -0.07552031427621841, "step": 1430 }, { "epoch": 0.840630472854641, "grad_norm": 1.400320185921348, "learning_rate": 1.3176156917368248e-06, "log_odds_chosen": 0.14395956695079803, "log_odds_ratio": -0.6354511976242065, "logits/chosen": -3.8084969520568848, "logits/rejected": -3.8405890464782715, "logps/chosen": -1.511125087738037, "logps/rejected": -1.628541350364685, "loss": 1.4514, "nll_loss": 1.5444328784942627, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07555624842643738, "rewards/margins": 0.005870812572538853, "rewards/rejected": -0.08142706751823425, "step": 1440 }, { "epoch": 0.8464681844716871, "grad_norm": 2.028511535948128, "learning_rate": 1.3130643285972255e-06, "log_odds_chosen": 0.010021040216088295, "log_odds_ratio": -0.7225396037101746, "logits/chosen": -3.5494637489318848, "logits/rejected": -3.67463755607605, "logps/chosen": -1.5362083911895752, "logps/rejected": -1.5575059652328491, "loss": 1.4311, "nll_loss": 1.4254138469696045, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07681041210889816, "rewards/margins": 0.001064880983904004, "rewards/rejected": -0.0778753012418747, "step": 1450 }, { "epoch": 0.8523058960887332, "grad_norm": 1.079906637007347, "learning_rate": 1.3085598064755342e-06, "log_odds_chosen": 0.20770971477031708, "log_odds_ratio": -0.6136054396629333, "logits/chosen": -3.56421160697937, "logits/rejected": -3.7692012786865234, "logps/chosen": -1.540736198425293, "logps/rejected": -1.7136892080307007, "loss": 1.5077, "nll_loss": 1.4636768102645874, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07703680545091629, "rewards/margins": 0.00864765327423811, "rewards/rejected": -0.08568446338176727, "step": 1460 }, { "epoch": 0.8581436077057794, "grad_norm": 0.9942795939914961, "learning_rate": 1.3041013273932528e-06, "log_odds_chosen": 0.026080254465341568, "log_odds_ratio": -0.7074596285820007, "logits/chosen": -3.9056296348571777, "logits/rejected": -3.869337558746338, "logps/chosen": -1.334733247756958, "logps/rejected": -1.3556592464447021, "loss": 1.4023, "nll_loss": 1.3186346292495728, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06673665344715118, "rewards/margins": 0.0010463027283549309, "rewards/rejected": -0.06778296083211899, "step": 1470 }, { "epoch": 0.8639813193228254, "grad_norm": 1.3576356455754677, "learning_rate": 1.299688112275091e-06, "log_odds_chosen": 0.24515239894390106, "log_odds_ratio": -0.6101824045181274, "logits/chosen": -3.5225143432617188, "logits/rejected": -3.7441012859344482, "logps/chosen": -1.4464024305343628, "logps/rejected": -1.6418733596801758, "loss": 1.4157, "nll_loss": 1.4153852462768555, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0723201185464859, "rewards/margins": 0.009773560799658298, "rewards/rejected": -0.08209367841482162, "step": 1480 }, { "epoch": 0.8698190309398716, "grad_norm": 1.0175822011963382, "learning_rate": 1.2953194003770995e-06, "log_odds_chosen": 0.30448833107948303, "log_odds_ratio": -0.5871037244796753, "logits/chosen": -3.6719565391540527, "logits/rejected": -3.862250804901123, "logps/chosen": -1.4672845602035522, "logps/rejected": -1.666925072669983, "loss": 1.4555, "nll_loss": 1.4628077745437622, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07336422801017761, "rewards/margins": 0.009982016868889332, "rewards/rejected": -0.08334624022245407, "step": 1490 }, { "epoch": 0.8756567425569177, "grad_norm": 1.3251104220723104, "learning_rate": 1.2909944487358056e-06, "log_odds_chosen": -0.05063999444246292, "log_odds_ratio": -0.7470928430557251, "logits/chosen": -3.835287094116211, "logits/rejected": -3.7582383155822754, "logps/chosen": -1.598112940788269, "logps/rejected": -1.562853455543518, "loss": 1.4938, "nll_loss": 1.4100538492202759, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.07990565150976181, "rewards/margins": -0.0017629750072956085, "rewards/rejected": -0.0781426802277565, "step": 1500 }, { "epoch": 0.8814944541739638, "grad_norm": 1.324471693654573, "learning_rate": 1.286712531637447e-06, "log_odds_chosen": 0.486520916223526, "log_odds_ratio": -0.5204166769981384, "logits/chosen": -3.572990894317627, "logits/rejected": -3.656099319458008, "logps/chosen": -1.2840187549591064, "logps/rejected": -1.588592767715454, "loss": 1.4242, "nll_loss": 1.3867017030715942, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06420093774795532, "rewards/margins": 0.01522870920598507, "rewards/rejected": -0.07942964136600494, "step": 1510 }, { "epoch": 0.8873321657910099, "grad_norm": 1.3167279932704552, "learning_rate": 1.282472940106443e-06, "log_odds_chosen": 0.29961758852005005, "log_odds_ratio": -0.5960550904273987, "logits/chosen": -3.5881309509277344, "logits/rejected": -3.652498722076416, "logps/chosen": -1.5546677112579346, "logps/rejected": -1.795606255531311, "loss": 1.5261, "nll_loss": 1.504473328590393, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07773338258266449, "rewards/margins": 0.012046916410326958, "rewards/rejected": -0.0897803083062172, "step": 1520 }, { "epoch": 0.8931698774080561, "grad_norm": 1.0198484657005646, "learning_rate": 1.278274981412284e-06, "log_odds_chosen": 0.21018366515636444, "log_odds_ratio": -0.6356251239776611, "logits/chosen": -3.596066951751709, "logits/rejected": -3.7855236530303955, "logps/chosen": -1.5646319389343262, "logps/rejected": -1.73688542842865, "loss": 1.3812, "nll_loss": 1.5385024547576904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07823159545660019, "rewards/margins": 0.008612675592303276, "rewards/rejected": -0.08684426546096802, "step": 1530 }, { "epoch": 0.8990075890251021, "grad_norm": 1.3356870822842828, "learning_rate": 1.2741179785940638e-06, "log_odds_chosen": 0.28678208589553833, "log_odds_ratio": -0.672522246837616, "logits/chosen": -3.448822498321533, "logits/rejected": -3.650935649871826, "logps/chosen": -1.3155088424682617, "logps/rejected": -1.498695731163025, "loss": 1.4789, "nll_loss": 1.4417707920074463, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06577544659376144, "rewards/margins": 0.009159346111118793, "rewards/rejected": -0.07493479549884796, "step": 1540 }, { "epoch": 0.9048453006421483, "grad_norm": 1.830343770088867, "learning_rate": 1.270001270001905e-06, "log_odds_chosen": 0.440469354391098, "log_odds_ratio": -0.512822151184082, "logits/chosen": -3.5399582386016846, "logits/rejected": -3.559086322784424, "logps/chosen": -1.3331736326217651, "logps/rejected": -1.6561905145645142, "loss": 1.4146, "nll_loss": 1.3835328817367554, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06665869057178497, "rewards/margins": 0.016150841489434242, "rewards/rejected": -0.08280953019857407, "step": 1550 }, { "epoch": 0.9106830122591943, "grad_norm": 1.2659051935803385, "learning_rate": 1.2659242088545834e-06, "log_odds_chosen": 0.39413341879844666, "log_odds_ratio": -0.5558941960334778, "logits/chosen": -3.486229658126831, "logits/rejected": -3.5422847270965576, "logps/chosen": -1.295609712600708, "logps/rejected": -1.5923892259597778, "loss": 1.3868, "nll_loss": 1.2337114810943604, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06478048861026764, "rewards/margins": 0.01483896654099226, "rewards/rejected": -0.07961945235729218, "step": 1560 }, { "epoch": 0.9165207238762405, "grad_norm": 1.155244901580651, "learning_rate": 1.261886162812672e-06, "log_odds_chosen": 0.32028770446777344, "log_odds_ratio": -0.5795842409133911, "logits/chosen": -3.661837100982666, "logits/rejected": -3.850341320037842, "logps/chosen": -1.4294582605361938, "logps/rejected": -1.6780650615692139, "loss": 1.4705, "nll_loss": 1.4566688537597656, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07147292047739029, "rewards/margins": 0.012430345639586449, "rewards/rejected": -0.08390326052904129, "step": 1570 }, { "epoch": 0.9223584354932867, "grad_norm": 1.3048347058304226, "learning_rate": 1.257886513566569e-06, "log_odds_chosen": 0.3404911160469055, "log_odds_ratio": -0.5978623032569885, "logits/chosen": -3.453155994415283, "logits/rejected": -3.780075788497925, "logps/chosen": -1.2879739999771118, "logps/rejected": -1.549617886543274, "loss": 1.3687, "nll_loss": 1.338066816329956, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06439869850873947, "rewards/margins": 0.01308219600468874, "rewards/rejected": -0.07748089730739594, "step": 1580 }, { "epoch": 0.9281961471103327, "grad_norm": 1.75010339140936, "learning_rate": 1.253924656438798e-06, "log_odds_chosen": 0.19407020509243011, "log_odds_ratio": -0.6416453719139099, "logits/chosen": -3.7901241779327393, "logits/rejected": -3.757331371307373, "logps/chosen": -1.4554176330566406, "logps/rejected": -1.6118720769882202, "loss": 1.5231, "nll_loss": 1.3595085144042969, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07277088612318039, "rewards/margins": 0.007822719402611256, "rewards/rejected": -0.08059360086917877, "step": 1590 }, { "epoch": 0.9340338587273789, "grad_norm": 1.131749034903126, "learning_rate": 1.25e-06, "log_odds_chosen": 0.46642714738845825, "log_odds_ratio": -0.5118834376335144, "logits/chosen": -3.4185433387756348, "logits/rejected": -3.7265236377716064, "logps/chosen": -1.4641472101211548, "logps/rejected": -1.826372742652893, "loss": 1.4462, "nll_loss": 1.3941690921783447, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07320736348628998, "rewards/margins": 0.018111277371644974, "rewards/rejected": -0.09131863713264465, "step": 1600 }, { "epoch": 0.939871570344425, "grad_norm": 0.971045741182657, "learning_rate": 1.246111965698067e-06, "log_odds_chosen": 0.33774715662002563, "log_odds_ratio": -0.5784534215927124, "logits/chosen": -3.356546401977539, "logits/rejected": -3.8522427082061768, "logps/chosen": -1.3093016147613525, "logps/rejected": -1.547528624534607, "loss": 1.4392, "nll_loss": 1.3139359951019287, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06546507775783539, "rewards/margins": 0.011911359615623951, "rewards/rejected": -0.07737644016742706, "step": 1610 }, { "epoch": 0.9457092819614711, "grad_norm": 1.163707950875779, "learning_rate": 1.2422599874998834e-06, "log_odds_chosen": 0.5173758268356323, "log_odds_ratio": -0.5307906866073608, "logits/chosen": -3.4634594917297363, "logits/rejected": -3.4533133506774902, "logps/chosen": -1.2362850904464722, "logps/rejected": -1.6176259517669678, "loss": 1.2931, "nll_loss": 1.2277257442474365, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06181425601243973, "rewards/margins": 0.01906704343855381, "rewards/rejected": -0.08088129758834839, "step": 1620 }, { "epoch": 0.9515469935785172, "grad_norm": 1.072523018820316, "learning_rate": 1.238443511545175e-06, "log_odds_chosen": 0.2493160516023636, "log_odds_ratio": -0.6065091490745544, "logits/chosen": -3.8419482707977295, "logits/rejected": -3.647533893585205, "logps/chosen": -1.4229000806808472, "logps/rejected": -1.6018896102905273, "loss": 1.431, "nll_loss": 1.358720302581787, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07114501297473907, "rewards/margins": 0.008949479088187218, "rewards/rejected": -0.08009448647499084, "step": 1630 }, { "epoch": 0.9573847051955633, "grad_norm": 1.7126589341510343, "learning_rate": 1.2346619958119873e-06, "log_odds_chosen": 0.16228720545768738, "log_odds_ratio": -0.6848636865615845, "logits/chosen": -3.592834949493408, "logits/rejected": -3.4870400428771973, "logps/chosen": -1.6205450296401978, "logps/rejected": -1.829359769821167, "loss": 1.4987, "nll_loss": 1.6184488534927368, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08102725446224213, "rewards/margins": 0.010440734215080738, "rewards/rejected": -0.09146799147129059, "step": 1640 }, { "epoch": 0.9632224168126094, "grad_norm": 1.048812518815968, "learning_rate": 1.2309149097933274e-06, "log_odds_chosen": 0.006183737423270941, "log_odds_ratio": -0.7255327105522156, "logits/chosen": -3.684055805206299, "logits/rejected": -3.676953077316284, "logps/chosen": -1.7031185626983643, "logps/rejected": -1.7141425609588623, "loss": 1.5214, "nll_loss": 1.6148998737335205, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0851559266448021, "rewards/margins": 0.0005512058851309121, "rewards/rejected": -0.08570712804794312, "step": 1650 }, { "epoch": 0.9690601284296556, "grad_norm": 1.0040178010751106, "learning_rate": 1.2272017341845401e-06, "log_odds_chosen": 0.06804993003606796, "log_odds_ratio": -0.7061912417411804, "logits/chosen": -3.7481677532196045, "logits/rejected": -3.8301796913146973, "logps/chosen": -1.3068664073944092, "logps/rejected": -1.384875774383545, "loss": 1.3835, "nll_loss": 1.4141874313354492, "rewards/accuracies": 0.5, "rewards/chosen": -0.06534332036972046, "rewards/margins": 0.0039004634600132704, "rewards/rejected": -0.06924378871917725, "step": 1660 }, { "epoch": 0.9748978400467017, "grad_norm": 0.9940300208090319, "learning_rate": 1.223521960580991e-06, "log_odds_chosen": 0.21645712852478027, "log_odds_ratio": -0.6181725263595581, "logits/chosen": -3.65360689163208, "logits/rejected": -3.870680332183838, "logps/chosen": -1.4860312938690186, "logps/rejected": -1.644234299659729, "loss": 1.3867, "nll_loss": 1.4015616178512573, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07430155575275421, "rewards/margins": 0.007910149171948433, "rewards/rejected": -0.08221171051263809, "step": 1670 }, { "epoch": 0.9807355516637478, "grad_norm": 1.0774361313792074, "learning_rate": 1.2198750911856664e-06, "log_odds_chosen": 0.00025553704472258687, "log_odds_ratio": -0.7162112593650818, "logits/chosen": -3.8886780738830566, "logits/rejected": -3.891143321990967, "logps/chosen": -1.4709336757659912, "logps/rejected": -1.4573787450790405, "loss": 1.3989, "nll_loss": 1.3733394145965576, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07354667037725449, "rewards/margins": -0.0006777312373742461, "rewards/rejected": -0.0728689506649971, "step": 1680 }, { "epoch": 0.986573263280794, "grad_norm": 1.0449476716071509, "learning_rate": 1.2162606385262997e-06, "log_odds_chosen": 0.444558322429657, "log_odds_ratio": -0.5077232718467712, "logits/chosen": -3.82499623298645, "logits/rejected": -3.639371395111084, "logps/chosen": -1.311323642730713, "logps/rejected": -1.6218187808990479, "loss": 1.4368, "nll_loss": 1.3700106143951416, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06556618213653564, "rewards/margins": 0.015524746850132942, "rewards/rejected": -0.08109092712402344, "step": 1690 }, { "epoch": 0.99241097489784, "grad_norm": 2.5360036367188354, "learning_rate": 1.2126781251816649e-06, "log_odds_chosen": 0.40302953124046326, "log_odds_ratio": -0.5470925569534302, "logits/chosen": -3.628610610961914, "logits/rejected": -3.7638862133026123, "logps/chosen": -1.4025089740753174, "logps/rejected": -1.7209184169769287, "loss": 1.382, "nll_loss": 1.3465843200683594, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07012544572353363, "rewards/margins": 0.015920482575893402, "rewards/rejected": -0.08604592829942703, "step": 1700 }, { "epoch": 0.9982486865148862, "grad_norm": 1.4347465218381061, "learning_rate": 1.2091270835166862e-06, "log_odds_chosen": 0.2150537520647049, "log_odds_ratio": -0.6200212836265564, "logits/chosen": -3.6568500995635986, "logits/rejected": -3.889483690261841, "logps/chosen": -1.5149071216583252, "logps/rejected": -1.6775153875350952, "loss": 1.4764, "nll_loss": 1.5310183763504028, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0757453590631485, "rewards/margins": 0.00813041627407074, "rewards/rejected": -0.08387576043605804, "step": 1710 }, { "epoch": 1.0040863981319323, "grad_norm": 1.6248680841890921, "learning_rate": 1.2056070554260305e-06, "log_odds_chosen": 0.5309208631515503, "log_odds_ratio": -0.4814608097076416, "logits/chosen": -3.6240715980529785, "logits/rejected": -3.826350450515747, "logps/chosen": -1.1380491256713867, "logps/rejected": -1.4970637559890747, "loss": 1.3297, "nll_loss": 1.2176345586776733, "rewards/accuracies": 1.0, "rewards/chosen": -0.056902457028627396, "rewards/margins": 0.01795072853565216, "rewards/rejected": -0.07485318928956985, "step": 1720 }, { "epoch": 1.0099241097489784, "grad_norm": 1.0445964507746845, "learning_rate": 1.2021175920858626e-06, "log_odds_chosen": 0.3140803575515747, "log_odds_ratio": -0.5826042294502258, "logits/chosen": -3.50083589553833, "logits/rejected": -3.638762950897217, "logps/chosen": -1.4003469944000244, "logps/rejected": -1.6271326541900635, "loss": 1.3554, "nll_loss": 1.3409336805343628, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07001735270023346, "rewards/margins": 0.011339290998876095, "rewards/rejected": -0.08135663717985153, "step": 1730 }, { "epoch": 1.0157618213660244, "grad_norm": 1.7303746807609806, "learning_rate": 1.1986582537134606e-06, "log_odds_chosen": 0.43830710649490356, "log_odds_ratio": -0.5441099405288696, "logits/chosen": -3.1950912475585938, "logits/rejected": -3.6111419200897217, "logps/chosen": -1.0953491926193237, "logps/rejected": -1.3901907205581665, "loss": 1.3274, "nll_loss": 1.1333324909210205, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05476746708154678, "rewards/margins": 0.014742070809006691, "rewards/rejected": -0.06950953602790833, "step": 1740 }, { "epoch": 1.0215995329830707, "grad_norm": 0.9201154464520067, "learning_rate": 1.1952286093343937e-06, "log_odds_chosen": 0.20604786276817322, "log_odds_ratio": -0.6448016166687012, "logits/chosen": -3.2193851470947266, "logits/rejected": -3.7446913719177246, "logps/chosen": -1.3417751789093018, "logps/rejected": -1.514827013015747, "loss": 1.374, "nll_loss": 1.3666739463806152, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06708876043558121, "rewards/margins": 0.00865259487181902, "rewards/rejected": -0.07574135065078735, "step": 1750 }, { "epoch": 1.0274372446001168, "grad_norm": 1.3261398929219284, "learning_rate": 1.1918282365569903e-06, "log_odds_chosen": 0.4730736315250397, "log_odds_ratio": -0.5134940147399902, "logits/chosen": -3.4047799110412598, "logits/rejected": -3.5098586082458496, "logps/chosen": -1.1641530990600586, "logps/rejected": -1.4648202657699585, "loss": 1.5331, "nll_loss": 1.3051875829696655, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05820764973759651, "rewards/margins": 0.015033364295959473, "rewards/rejected": -0.07324101030826569, "step": 1760 }, { "epoch": 1.0332749562171628, "grad_norm": 0.9444411433249622, "learning_rate": 1.1884567213538209e-06, "log_odds_chosen": 0.042138539254665375, "log_odds_ratio": -0.778218686580658, "logits/chosen": -3.1641697883605957, "logits/rejected": -3.4052295684814453, "logps/chosen": -1.2676657438278198, "logps/rejected": -1.371290683746338, "loss": 1.542, "nll_loss": 1.4486913681030273, "rewards/accuracies": 0.5, "rewards/chosen": -0.06338329613208771, "rewards/margins": 0.005181250628083944, "rewards/rejected": -0.06856454163789749, "step": 1770 }, { "epoch": 1.039112667834209, "grad_norm": 1.081524194222709, "learning_rate": 1.1851136578499433e-06, "log_odds_chosen": -0.11115250736474991, "log_odds_ratio": -0.818200409412384, "logits/chosen": -3.349710464477539, "logits/rejected": -3.352593183517456, "logps/chosen": -1.4667171239852905, "logps/rejected": -1.3993372917175293, "loss": 1.3875, "nll_loss": 1.3991974592208862, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07333585619926453, "rewards/margins": -0.0033689953852444887, "rewards/rejected": -0.0699668675661087, "step": 1780 }, { "epoch": 1.0449503794512551, "grad_norm": 0.9069610287078473, "learning_rate": 1.181798648117664e-06, "log_odds_chosen": 0.3634900450706482, "log_odds_ratio": -0.5425634384155273, "logits/chosen": -3.7737395763397217, "logits/rejected": -3.8264834880828857, "logps/chosen": -1.2526437044143677, "logps/rejected": -1.5034019947052002, "loss": 1.3556, "nll_loss": 1.2584360837936401, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06263218075037003, "rewards/margins": 0.012537918984889984, "rewards/rejected": -0.07517009973526001, "step": 1790 }, { "epoch": 1.0507880910683012, "grad_norm": 1.1489579679170379, "learning_rate": 1.1785113019775794e-06, "log_odds_chosen": 0.3152009844779968, "log_odds_ratio": -0.562280535697937, "logits/chosen": -3.6211676597595215, "logits/rejected": -3.466742992401123, "logps/chosen": -1.2548134326934814, "logps/rejected": -1.481236219406128, "loss": 1.3239, "nll_loss": 1.2177187204360962, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06274067610502243, "rewards/margins": 0.011321141384541988, "rewards/rejected": -0.074061818420887, "step": 1800 }, { "epoch": 1.0566258026853474, "grad_norm": 1.4046098082044551, "learning_rate": 1.1752512368056712e-06, "log_odds_chosen": 0.6752073764801025, "log_odds_ratio": -0.44467872381210327, "logits/chosen": -3.695281982421875, "logits/rejected": -3.5766899585723877, "logps/chosen": -1.0987207889556885, "logps/rejected": -1.5829815864562988, "loss": 1.3163, "nll_loss": 1.1287763118743896, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.05493603274226189, "rewards/margins": 0.024213045835494995, "rewards/rejected": -0.07914907485246658, "step": 1810 }, { "epoch": 1.0624635143023935, "grad_norm": 1.0730442707838688, "learning_rate": 1.1720180773462387e-06, "log_odds_chosen": 0.39927372336387634, "log_odds_ratio": -0.530045747756958, "logits/chosen": -3.44526743888855, "logits/rejected": -3.5254242420196533, "logps/chosen": -1.3991167545318604, "logps/rejected": -1.6946735382080078, "loss": 1.3621, "nll_loss": 1.3175911903381348, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06995584815740585, "rewards/margins": 0.014777831733226776, "rewards/rejected": -0.08473367989063263, "step": 1820 }, { "epoch": 1.0683012259194395, "grad_norm": 1.746526987775029, "learning_rate": 1.168811455530461e-06, "log_odds_chosen": 0.2808654308319092, "log_odds_ratio": -0.5972429513931274, "logits/chosen": -3.762648344039917, "logits/rejected": -3.7869269847869873, "logps/chosen": -1.3907674551010132, "logps/rejected": -1.5986067056655884, "loss": 1.4001, "nll_loss": 1.3689020872116089, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06953837722539902, "rewards/margins": 0.010391953401267529, "rewards/rejected": -0.07993032783269882, "step": 1830 }, { "epoch": 1.0741389375364856, "grad_norm": 1.2055895171620474, "learning_rate": 1.1656310103003923e-06, "log_odds_chosen": 0.6251667737960815, "log_odds_ratio": -0.475567489862442, "logits/chosen": -3.5847339630126953, "logits/rejected": -3.4862473011016846, "logps/chosen": -1.1945834159851074, "logps/rejected": -1.6218640804290771, "loss": 1.3367, "nll_loss": 1.2132400274276733, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05972917005419731, "rewards/margins": 0.021364033222198486, "rewards/rejected": -0.0810932070016861, "step": 1840 }, { "epoch": 1.0799766491535319, "grad_norm": 2.5706468807564233, "learning_rate": 1.162476387438193e-06, "log_odds_chosen": 0.5422267317771912, "log_odds_ratio": -0.5372719168663025, "logits/chosen": -3.7324204444885254, "logits/rejected": -3.797912120819092, "logps/chosen": -1.250009298324585, "logps/rejected": -1.6755027770996094, "loss": 1.3415, "nll_loss": 1.3178011178970337, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06250046193599701, "rewards/margins": 0.02127467654645443, "rewards/rejected": -0.08377514779567719, "step": 1850 }, { "epoch": 1.085814360770578, "grad_norm": 1.147961303477568, "learning_rate": 1.1593472394004206e-06, "log_odds_chosen": 0.052546728402376175, "log_odds_ratio": -0.6978060007095337, "logits/chosen": -3.518106460571289, "logits/rejected": -3.6956710815429688, "logps/chosen": -1.4166748523712158, "logps/rejected": -1.474102258682251, "loss": 1.4338, "nll_loss": 1.4295157194137573, "rewards/accuracies": 0.5, "rewards/chosen": -0.0708337351679802, "rewards/margins": 0.002871375996619463, "rewards/rejected": -0.07370512187480927, "step": 1860 }, { "epoch": 1.091652072387624, "grad_norm": 1.1407106803368436, "learning_rate": 1.1562432251572007e-06, "log_odds_chosen": 0.302070677280426, "log_odds_ratio": -0.5877067446708679, "logits/chosen": -3.6910743713378906, "logits/rejected": -3.7169437408447266, "logps/chosen": -1.4157214164733887, "logps/rejected": -1.6487401723861694, "loss": 1.3918, "nll_loss": 1.3657575845718384, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07078607380390167, "rewards/margins": 0.011650935746729374, "rewards/rejected": -0.08243700861930847, "step": 1870 }, { "epoch": 1.0974897840046702, "grad_norm": 1.1544854822414994, "learning_rate": 1.1531640100361064e-06, "log_odds_chosen": 0.4914014935493469, "log_odds_ratio": -0.4995287358760834, "logits/chosen": -3.6153035163879395, "logits/rejected": -3.6367878913879395, "logps/chosen": -1.284345269203186, "logps/rejected": -1.6485198736190796, "loss": 1.3993, "nll_loss": 1.3417773246765137, "rewards/accuracies": 1.0, "rewards/chosen": -0.06421726197004318, "rewards/margins": 0.01820872724056244, "rewards/rejected": -0.08242599666118622, "step": 1880 }, { "epoch": 1.1033274956217163, "grad_norm": 1.366436843128841, "learning_rate": 1.1501092655705905e-06, "log_odds_chosen": 0.4811020791530609, "log_odds_ratio": -0.5066652297973633, "logits/chosen": -3.52642560005188, "logits/rejected": -3.843777894973755, "logps/chosen": -1.4696800708770752, "logps/rejected": -1.8598215579986572, "loss": 1.4744, "nll_loss": 1.5168644189834595, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07348400354385376, "rewards/margins": 0.019507071003317833, "rewards/rejected": -0.09299106895923615, "step": 1890 }, { "epoch": 1.1091652072387623, "grad_norm": 0.9444354726574743, "learning_rate": 1.1470786693528087e-06, "log_odds_chosen": 0.25837087631225586, "log_odds_ratio": -0.5855075120925903, "logits/chosen": -3.7054011821746826, "logits/rejected": -3.785703182220459, "logps/chosen": -1.5098440647125244, "logps/rejected": -1.7152448892593384, "loss": 1.4406, "nll_loss": 1.3809047937393188, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07549221068620682, "rewards/margins": 0.010270049795508385, "rewards/rejected": -0.08576224744319916, "step": 1900 }, { "epoch": 1.1150029188558086, "grad_norm": 0.9893642389971953, "learning_rate": 1.144071904890689e-06, "log_odds_chosen": 0.24827320873737335, "log_odds_ratio": -0.6384488344192505, "logits/chosen": -3.63114595413208, "logits/rejected": -3.846846103668213, "logps/chosen": -1.4477227926254272, "logps/rejected": -1.6093952655792236, "loss": 1.3768, "nll_loss": 1.3859052658081055, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07238615304231644, "rewards/margins": 0.008083615452051163, "rewards/rejected": -0.0804697647690773, "step": 1910 }, { "epoch": 1.1208406304728546, "grad_norm": 1.5279230725797253, "learning_rate": 1.1410886614690962e-06, "log_odds_chosen": 0.412159264087677, "log_odds_ratio": -0.5544516444206238, "logits/chosen": -3.66267728805542, "logits/rejected": -3.6792259216308594, "logps/chosen": -1.3643767833709717, "logps/rejected": -1.5881307125091553, "loss": 1.4036, "nll_loss": 1.4112358093261719, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06821884214878082, "rewards/margins": 0.01118768472224474, "rewards/rejected": -0.07940652966499329, "step": 1920 }, { "epoch": 1.1266783420899007, "grad_norm": 1.1332414402449762, "learning_rate": 1.1381286340149635e-06, "log_odds_chosen": 0.007138377521187067, "log_odds_ratio": -0.7076228857040405, "logits/chosen": -3.724733352661133, "logits/rejected": -3.7842040061950684, "logps/chosen": -1.4928958415985107, "logps/rejected": -1.4994539022445679, "loss": 1.3143, "nll_loss": 1.388541579246521, "rewards/accuracies": 0.5, "rewards/chosen": -0.0746447965502739, "rewards/margins": 0.00032790115801617503, "rewards/rejected": -0.07497268915176392, "step": 1930 }, { "epoch": 1.132516053706947, "grad_norm": 1.5777550831407274, "learning_rate": 1.1351915229662496e-06, "log_odds_chosen": 0.0955348014831543, "log_odds_ratio": -0.684337317943573, "logits/chosen": -3.6207194328308105, "logits/rejected": -3.8182060718536377, "logps/chosen": -1.3694504499435425, "logps/rejected": -1.472367286682129, "loss": 1.3918, "nll_loss": 1.4131734371185303, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06847251951694489, "rewards/margins": 0.005145835690200329, "rewards/rejected": -0.07361836731433868, "step": 1940 }, { "epoch": 1.138353765323993, "grad_norm": 1.0335874830138492, "learning_rate": 1.1322770341445958e-06, "log_odds_chosen": 0.2806586027145386, "log_odds_ratio": -0.582626461982727, "logits/chosen": -3.747776508331299, "logits/rejected": -3.6831672191619873, "logps/chosen": -1.632075548171997, "logps/rejected": -1.8743104934692383, "loss": 1.4847, "nll_loss": 1.586735486984253, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08160378038883209, "rewards/margins": 0.01211173739284277, "rewards/rejected": -0.09371552616357803, "step": 1950 }, { "epoch": 1.144191476941039, "grad_norm": 1.449852748968863, "learning_rate": 1.1293848786315642e-06, "log_odds_chosen": 0.441505491733551, "log_odds_ratio": -0.5378905534744263, "logits/chosen": -3.5396900177001953, "logits/rejected": -3.770970106124878, "logps/chosen": -1.2857306003570557, "logps/rejected": -1.5663769245147705, "loss": 1.3764, "nll_loss": 1.2446125745773315, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06428653001785278, "rewards/margins": 0.014032321982085705, "rewards/rejected": -0.07831884920597076, "step": 1960 }, { "epoch": 1.1500291885580851, "grad_norm": 2.113748386822823, "learning_rate": 1.1265147726483323e-06, "log_odds_chosen": 0.39094239473342896, "log_odds_ratio": -0.5713012218475342, "logits/chosen": -3.7838997840881348, "logits/rejected": -3.761324405670166, "logps/chosen": -1.378896713256836, "logps/rejected": -1.6968309879302979, "loss": 1.391, "nll_loss": 1.4079535007476807, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06894484162330627, "rewards/margins": 0.01589670404791832, "rewards/rejected": -0.08484154939651489, "step": 1970 }, { "epoch": 1.1558669001751314, "grad_norm": 1.2181440894390343, "learning_rate": 1.1236664374387369e-06, "log_odds_chosen": 0.27114516496658325, "log_odds_ratio": -0.6053155660629272, "logits/chosen": -3.8012962341308594, "logits/rejected": -3.6446566581726074, "logps/chosen": -1.3199713230133057, "logps/rejected": -1.5094892978668213, "loss": 1.4118, "nll_loss": 1.2973320484161377, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06599856913089752, "rewards/margins": 0.009475892409682274, "rewards/rejected": -0.07547445595264435, "step": 1980 }, { "epoch": 1.1617046117921774, "grad_norm": 1.2631941895673238, "learning_rate": 1.120839599155551e-06, "log_odds_chosen": 0.09791580587625504, "log_odds_ratio": -0.6844789385795593, "logits/chosen": -3.8895516395568848, "logits/rejected": -3.938814163208008, "logps/chosen": -1.4073612689971924, "logps/rejected": -1.4873847961425781, "loss": 1.3768, "nll_loss": 1.426729440689087, "rewards/accuracies": 0.5, "rewards/chosen": -0.07036806643009186, "rewards/margins": 0.004001176450401545, "rewards/rejected": -0.07436924427747726, "step": 1990 }, { "epoch": 1.1675423234092235, "grad_norm": 1.1238437712663387, "learning_rate": 1.118033988749895e-06, "log_odds_chosen": 0.20849351584911346, "log_odds_ratio": -0.6392322182655334, "logits/chosen": -3.546721935272217, "logits/rejected": -3.7286746501922607, "logps/chosen": -1.422321081161499, "logps/rejected": -1.6041209697723389, "loss": 1.335, "nll_loss": 1.4312111139297485, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07111605256795883, "rewards/margins": 0.009089997038245201, "rewards/rejected": -0.08020605146884918, "step": 2000 }, { "epoch": 1.1733800350262698, "grad_norm": 1.5267170757481834, "learning_rate": 1.1152493418636764e-06, "log_odds_chosen": 0.21198411285877228, "log_odds_ratio": -0.616431713104248, "logits/chosen": -3.764366626739502, "logits/rejected": -3.8637936115264893, "logps/chosen": -1.596558690071106, "logps/rejected": -1.7571001052856445, "loss": 1.411, "nll_loss": 1.5959482192993164, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0798279270529747, "rewards/margins": 0.008027072064578533, "rewards/rejected": -0.0878550112247467, "step": 2010 }, { "epoch": 1.1792177466433158, "grad_norm": 1.4689171500352454, "learning_rate": 1.112485398724962e-06, "log_odds_chosen": 0.30152544379234314, "log_odds_ratio": -0.5816253423690796, "logits/chosen": -3.6848628520965576, "logits/rejected": -3.7476208209991455, "logps/chosen": -1.5022661685943604, "logps/rejected": -1.7426671981811523, "loss": 1.3889, "nll_loss": 1.4230130910873413, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07511331140995026, "rewards/margins": 0.012020042166113853, "rewards/rejected": -0.08713336288928986, "step": 2020 }, { "epoch": 1.1850554582603618, "grad_norm": 1.4351555176921986, "learning_rate": 1.1097419040461884e-06, "log_odds_chosen": 0.25362125039100647, "log_odds_ratio": -0.5913580656051636, "logits/chosen": -3.6000304222106934, "logits/rejected": -3.688863754272461, "logps/chosen": -1.3785302639007568, "logps/rejected": -1.5681920051574707, "loss": 1.4762, "nll_loss": 1.321753740310669, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06892652064561844, "rewards/margins": 0.009483075700700283, "rewards/rejected": -0.0784095972776413, "step": 2030 }, { "epoch": 1.1908931698774081, "grad_norm": 1.5801503164635833, "learning_rate": 1.1070186069251193e-06, "log_odds_chosen": 0.30363231897354126, "log_odds_ratio": -0.5844923257827759, "logits/chosen": -3.7602131366729736, "logits/rejected": -3.3354382514953613, "logps/chosen": -1.2401950359344482, "logps/rejected": -1.4571495056152344, "loss": 1.2767, "nll_loss": 1.3009653091430664, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06200975924730301, "rewards/margins": 0.010847719386219978, "rewards/rejected": -0.07285747677087784, "step": 2040 }, { "epoch": 1.1967308814944542, "grad_norm": 0.9710679111495527, "learning_rate": 1.1043152607484655e-06, "log_odds_chosen": 0.6063815355300903, "log_odds_ratio": -0.49208706617355347, "logits/chosen": -3.6062705516815186, "logits/rejected": -3.72717547416687, "logps/chosen": -1.244929552078247, "logps/rejected": -1.6472994089126587, "loss": 1.3408, "nll_loss": 1.1530210971832275, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06224647909402847, "rewards/margins": 0.02011849358677864, "rewards/rejected": -0.08236496895551682, "step": 2050 }, { "epoch": 1.2025685931115002, "grad_norm": 1.6893383278697465, "learning_rate": 1.1016316230980794e-06, "log_odds_chosen": 0.37986645102500916, "log_odds_ratio": -0.5545374155044556, "logits/chosen": -3.738562822341919, "logits/rejected": -3.8489089012145996, "logps/chosen": -1.4413849115371704, "logps/rejected": -1.7359333038330078, "loss": 1.4062, "nll_loss": 1.3474347591400146, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07206924259662628, "rewards/margins": 0.014727416448295116, "rewards/rejected": -0.08679667115211487, "step": 2060 }, { "epoch": 1.2084063047285465, "grad_norm": 1.7433724298876911, "learning_rate": 1.098967455659645e-06, "log_odds_chosen": 0.13850228488445282, "log_odds_ratio": -0.6520179510116577, "logits/chosen": -3.657125473022461, "logits/rejected": -3.8597030639648438, "logps/chosen": -1.4504598379135132, "logps/rejected": -1.5793650150299072, "loss": 1.3572, "nll_loss": 1.446405053138733, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07252300530672073, "rewards/margins": 0.00644524535164237, "rewards/rejected": -0.07896825671195984, "step": 2070 }, { "epoch": 1.2142440163455925, "grad_norm": 1.5119035774021106, "learning_rate": 1.0963225241337867e-06, "log_odds_chosen": 0.3655147850513458, "log_odds_ratio": -0.5385193824768066, "logits/chosen": -3.796602249145508, "logits/rejected": -3.685631513595581, "logps/chosen": -1.34024977684021, "logps/rejected": -1.6207714080810547, "loss": 1.3178, "nll_loss": 1.330686330795288, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0670124739408493, "rewards/margins": 0.014026084914803505, "rewards/rejected": -0.08103857189416885, "step": 2080 }, { "epoch": 1.2200817279626386, "grad_norm": 1.6477702477437925, "learning_rate": 1.093696598149518e-06, "log_odds_chosen": 0.2183896005153656, "log_odds_ratio": -0.6073380708694458, "logits/chosen": -3.6333587169647217, "logits/rejected": -3.9071974754333496, "logps/chosen": -1.276052713394165, "logps/rejected": -1.4297508001327515, "loss": 1.4138, "nll_loss": 1.3180588483810425, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06380262970924377, "rewards/margins": 0.007684916257858276, "rewards/rejected": -0.07148755341768265, "step": 2090 }, { "epoch": 1.2259194395796849, "grad_norm": 1.1177242509648109, "learning_rate": 1.091089451179962e-06, "log_odds_chosen": -0.10948194563388824, "log_odds_ratio": -0.769691526889801, "logits/chosen": -3.6322569847106934, "logits/rejected": -3.6831047534942627, "logps/chosen": -1.393059492111206, "logps/rejected": -1.326404094696045, "loss": 1.4752, "nll_loss": 1.3147571086883545, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0696529746055603, "rewards/margins": -0.0033327643759548664, "rewards/rejected": -0.06632020324468613, "step": 2100 }, { "epoch": 1.231757151196731, "grad_norm": 1.2337795987803604, "learning_rate": 1.0885008604602703e-06, "log_odds_chosen": 0.1599619835615158, "log_odds_ratio": -0.6566121578216553, "logits/chosen": -3.734853744506836, "logits/rejected": -3.7056527137756348, "logps/chosen": -1.5930907726287842, "logps/rejected": -1.7116711139678955, "loss": 1.4637, "nll_loss": 1.502603530883789, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07965454459190369, "rewards/margins": 0.0059290071949362755, "rewards/rejected": -0.08558355271816254, "step": 2110 }, { "epoch": 1.237594862813777, "grad_norm": 1.227354840457199, "learning_rate": 1.0859306069076736e-06, "log_odds_chosen": -0.04096251726150513, "log_odds_ratio": -0.7658440470695496, "logits/chosen": -3.717850923538208, "logits/rejected": -3.656808853149414, "logps/chosen": -1.460533618927002, "logps/rejected": -1.4643481969833374, "loss": 1.3879, "nll_loss": 1.473681926727295, "rewards/accuracies": 0.5, "rewards/chosen": -0.07302668690681458, "rewards/margins": 0.00019072965369559824, "rewards/rejected": -0.07321741431951523, "step": 2120 }, { "epoch": 1.2434325744308232, "grad_norm": 2.6043786619594202, "learning_rate": 1.083378475043599e-06, "log_odds_chosen": 0.3063589930534363, "log_odds_ratio": -0.6144129037857056, "logits/chosen": -3.514833927154541, "logits/rejected": -3.676304340362549, "logps/chosen": -1.4063876867294312, "logps/rejected": -1.551344633102417, "loss": 1.3486, "nll_loss": 1.3733800649642944, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07031938433647156, "rewards/margins": 0.007247844245284796, "rewards/rejected": -0.07756723463535309, "step": 2130 }, { "epoch": 1.2492702860478693, "grad_norm": 1.585999968698447, "learning_rate": 1.0808442529177925e-06, "log_odds_chosen": 0.39330384135246277, "log_odds_ratio": -0.5571161508560181, "logits/chosen": -3.5188727378845215, "logits/rejected": -3.8803906440734863, "logps/chosen": -1.5003950595855713, "logps/rejected": -1.774963140487671, "loss": 1.3335, "nll_loss": 1.555761456489563, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07501974701881409, "rewards/margins": 0.013728415593504906, "rewards/rejected": -0.08874816447496414, "step": 2140 }, { "epoch": 1.2551079976649153, "grad_norm": 1.170237706753259, "learning_rate": 1.0783277320343842e-06, "log_odds_chosen": 0.3741016089916229, "log_odds_ratio": -0.5769850611686707, "logits/chosen": -3.3308498859405518, "logits/rejected": -3.5217204093933105, "logps/chosen": -1.2770168781280518, "logps/rejected": -1.5669430494308472, "loss": 1.3812, "nll_loss": 1.3277027606964111, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06385083496570587, "rewards/margins": 0.014496306888759136, "rewards/rejected": -0.07834716141223907, "step": 2150 }, { "epoch": 1.2609457092819616, "grad_norm": 1.0756633009876142, "learning_rate": 1.075828707279838e-06, "log_odds_chosen": 0.8311444520950317, "log_odds_ratio": -0.4042474627494812, "logits/chosen": -3.311124801635742, "logits/rejected": -3.3961071968078613, "logps/chosen": -1.2835935354232788, "logps/rejected": -1.824609398841858, "loss": 1.4168, "nll_loss": 1.3423393964767456, "rewards/accuracies": 1.0, "rewards/chosen": -0.0641796737909317, "rewards/margins": 0.02705078385770321, "rewards/rejected": -0.09123046696186066, "step": 2160 }, { "epoch": 1.2667834208990076, "grad_norm": 1.7000848926014147, "learning_rate": 1.0733469768527298e-06, "log_odds_chosen": 0.3261513113975525, "log_odds_ratio": -0.5506059527397156, "logits/chosen": -3.7200980186462402, "logits/rejected": -3.9084160327911377, "logps/chosen": -1.4224252700805664, "logps/rejected": -1.6704928874969482, "loss": 1.3858, "nll_loss": 1.4113878011703491, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07112126052379608, "rewards/margins": 0.012403379194438457, "rewards/rejected": -0.08352464437484741, "step": 2170 }, { "epoch": 1.2726211325160537, "grad_norm": 1.1876816723853048, "learning_rate": 1.0708823421952984e-06, "log_odds_chosen": 0.3324907720088959, "log_odds_ratio": -0.5491698384284973, "logits/chosen": -3.5350430011749268, "logits/rejected": -3.7935256958007812, "logps/chosen": -1.3882853984832764, "logps/rejected": -1.625105857849121, "loss": 1.3484, "nll_loss": 1.3892942667007446, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06941427290439606, "rewards/margins": 0.01184102427214384, "rewards/rejected": -0.08125529438257217, "step": 2180 }, { "epoch": 1.2784588441331, "grad_norm": 1.154527709692401, "learning_rate": 1.0684346079267208e-06, "log_odds_chosen": 0.8990064859390259, "log_odds_ratio": -0.3948432207107544, "logits/chosen": -3.871859312057495, "logits/rejected": -3.9572091102600098, "logps/chosen": -1.1656372547149658, "logps/rejected": -1.7607539892196655, "loss": 1.3092, "nll_loss": 1.2404518127441406, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.05828186124563217, "rewards/margins": 0.029755841940641403, "rewards/rejected": -0.08803769201040268, "step": 2190 }, { "epoch": 1.284296555750146, "grad_norm": 1.1262454253549867, "learning_rate": 1.066003581778052e-06, "log_odds_chosen": 0.05806674435734749, "log_odds_ratio": -0.7136454582214355, "logits/chosen": -3.4614956378936768, "logits/rejected": -3.605149507522583, "logps/chosen": -1.3655524253845215, "logps/rejected": -1.3924560546875, "loss": 1.4086, "nll_loss": 1.274407148361206, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06827761232852936, "rewards/margins": 0.0013451860286295414, "rewards/rejected": -0.06962279975414276, "step": 2200 }, { "epoch": 1.290134267367192, "grad_norm": 1.1800728999548882, "learning_rate": 1.0635890745287928e-06, "log_odds_chosen": 0.2673703730106354, "log_odds_ratio": -0.5777344703674316, "logits/chosen": -3.513185977935791, "logits/rejected": -3.6656200885772705, "logps/chosen": -1.3568477630615234, "logps/rejected": -1.5554181337356567, "loss": 1.3306, "nll_loss": 1.3892247676849365, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06784238666296005, "rewards/margins": 0.009928522631525993, "rewards/rejected": -0.0777709111571312, "step": 2210 }, { "epoch": 1.295971978984238, "grad_norm": 1.0239703915583547, "learning_rate": 1.0611908999450224e-06, "log_odds_chosen": 0.17874717712402344, "log_odds_ratio": -0.6415315270423889, "logits/chosen": -3.5245883464813232, "logits/rejected": -3.780057907104492, "logps/chosen": -1.3992094993591309, "logps/rejected": -1.5496389865875244, "loss": 1.4044, "nll_loss": 1.3946822881698608, "rewards/accuracies": 0.5, "rewards/chosen": -0.06996047496795654, "rewards/margins": 0.007521461695432663, "rewards/rejected": -0.0774819403886795, "step": 2220 }, { "epoch": 1.3018096906012844, "grad_norm": 1.1909109110539167, "learning_rate": 1.058808874719067e-06, "log_odds_chosen": 0.14923596382141113, "log_odds_ratio": -0.6346112489700317, "logits/chosen": -3.855391025543213, "logits/rejected": -3.797222852706909, "logps/chosen": -1.5758622884750366, "logps/rejected": -1.6800258159637451, "loss": 1.4933, "nll_loss": 1.5041024684906006, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07879312336444855, "rewards/margins": 0.00520816957578063, "rewards/rejected": -0.08400129526853561, "step": 2230 }, { "epoch": 1.3076474022183304, "grad_norm": 1.1498306888862888, "learning_rate": 1.0564428184106459e-06, "log_odds_chosen": 0.15304169058799744, "log_odds_ratio": -0.6415072679519653, "logits/chosen": -3.5860087871551514, "logits/rejected": -3.683032989501953, "logps/chosen": -1.5465309619903564, "logps/rejected": -1.6959865093231201, "loss": 1.4556, "nll_loss": 1.5521214008331299, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07732655107975006, "rewards/margins": 0.007472778670489788, "rewards/rejected": -0.08479932695627213, "step": 2240 }, { "epoch": 1.3134851138353765, "grad_norm": 0.9609600396223025, "learning_rate": 1.0540925533894598e-06, "log_odds_chosen": -0.05459030717611313, "log_odds_ratio": -0.7655020952224731, "logits/chosen": -3.690744400024414, "logits/rejected": -3.6361236572265625, "logps/chosen": -1.640801191329956, "logps/rejected": -1.6325104236602783, "loss": 1.3784, "nll_loss": 1.470417857170105, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08204005658626556, "rewards/margins": -0.0004145331622567028, "rewards/rejected": -0.08162552118301392, "step": 2250 }, { "epoch": 1.3193228254524225, "grad_norm": 1.048299743935012, "learning_rate": 1.0517579047791782e-06, "log_odds_chosen": 0.033336538821458817, "log_odds_ratio": -0.6899900436401367, "logits/chosen": -3.8801064491271973, "logits/rejected": -3.7832789421081543, "logps/chosen": -1.4965770244598389, "logps/rejected": -1.5293926000595093, "loss": 1.433, "nll_loss": 1.352863073348999, "rewards/accuracies": 0.5, "rewards/chosen": -0.0748288556933403, "rewards/margins": 0.0016407858347520232, "rewards/rejected": -0.07646963745355606, "step": 2260 }, { "epoch": 1.3251605370694688, "grad_norm": 2.032788705430529, "learning_rate": 1.049438700402784e-06, "log_odds_chosen": 0.42755231261253357, "log_odds_ratio": -0.5227924585342407, "logits/chosen": -3.6507701873779297, "logits/rejected": -3.817701816558838, "logps/chosen": -1.1761362552642822, "logps/rejected": -1.4319543838500977, "loss": 1.3841, "nll_loss": 1.3190925121307373, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.058806806802749634, "rewards/margins": 0.012790912762284279, "rewards/rejected": -0.07159771770238876, "step": 2270 }, { "epoch": 1.3309982486865148, "grad_norm": 1.5826080032193457, "learning_rate": 1.0471347707292389e-06, "log_odds_chosen": 0.2578853666782379, "log_odds_ratio": -0.5916306376457214, "logits/chosen": -3.755441665649414, "logits/rejected": -3.804105758666992, "logps/chosen": -1.4698597192764282, "logps/rejected": -1.6769144535064697, "loss": 1.3661, "nll_loss": 1.431014895439148, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07349297404289246, "rewards/margins": 0.01035275124013424, "rewards/rejected": -0.08384573459625244, "step": 2280 }, { "epoch": 1.3368359603035609, "grad_norm": 1.0281136216460522, "learning_rate": 1.0448459488214322e-06, "log_odds_chosen": 0.23049958050251007, "log_odds_ratio": -0.6142726540565491, "logits/chosen": -3.7891128063201904, "logits/rejected": -3.8232421875, "logps/chosen": -1.425795078277588, "logps/rejected": -1.5712870359420776, "loss": 1.4021, "nll_loss": 1.4236818552017212, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07128976285457611, "rewards/margins": 0.007274591363966465, "rewards/rejected": -0.07856435328722, "step": 2290 }, { "epoch": 1.3426736719206072, "grad_norm": 1.4819335314732858, "learning_rate": 1.042572070285374e-06, "log_odds_chosen": -0.06682056188583374, "log_odds_ratio": -0.7597867846488953, "logits/chosen": -3.63494610786438, "logits/rejected": -3.9175307750701904, "logps/chosen": -1.3917056322097778, "logps/rejected": -1.3679358959197998, "loss": 1.4061, "nll_loss": 1.2918730974197388, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06958527863025665, "rewards/margins": -0.001188483089208603, "rewards/rejected": -0.06839679181575775, "step": 2300 }, { "epoch": 1.3485113835376532, "grad_norm": 1.4734066905090044, "learning_rate": 1.0403129732205989e-06, "log_odds_chosen": 0.39234742522239685, "log_odds_ratio": -0.5433268547058105, "logits/chosen": -3.747788667678833, "logits/rejected": -3.831103563308716, "logps/chosen": -1.3418188095092773, "logps/rejected": -1.6337487697601318, "loss": 1.3589, "nll_loss": 1.2873177528381348, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0670909434556961, "rewards/margins": 0.014596494846045971, "rewards/rejected": -0.08168744295835495, "step": 2310 }, { "epoch": 1.3543490951546993, "grad_norm": 1.3623432072154782, "learning_rate": 1.0380684981717496e-06, "log_odds_chosen": 0.19461138546466827, "log_odds_ratio": -0.6248118281364441, "logits/chosen": -3.711345672607422, "logits/rejected": -3.8566811084747314, "logps/chosen": -1.4569222927093506, "logps/rejected": -1.6182334423065186, "loss": 1.3623, "nll_loss": 1.5083211660385132, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07284610718488693, "rewards/margins": 0.008065571077167988, "rewards/rejected": -0.08091168105602264, "step": 2320 }, { "epoch": 1.3601868067717455, "grad_norm": 1.547354227245893, "learning_rate": 1.0358384880813022e-06, "log_odds_chosen": 0.09989718347787857, "log_odds_ratio": -0.6698582768440247, "logits/chosen": -3.9186275005340576, "logits/rejected": -3.9040687084198, "logps/chosen": -1.364043951034546, "logps/rejected": -1.4672632217407227, "loss": 1.4017, "nll_loss": 1.2522133588790894, "rewards/accuracies": 0.5, "rewards/chosen": -0.0682021975517273, "rewards/margins": 0.00516096968203783, "rewards/rejected": -0.07336317002773285, "step": 2330 }, { "epoch": 1.3660245183887916, "grad_norm": 1.3917310722386715, "learning_rate": 1.033622788243404e-06, "log_odds_chosen": 0.334563285112381, "log_odds_ratio": -0.5592058897018433, "logits/chosen": -3.665653944015503, "logits/rejected": -3.791041851043701, "logps/chosen": -1.3204638957977295, "logps/rejected": -1.5704151391983032, "loss": 1.4476, "nll_loss": 1.4369854927062988, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06602319329977036, "rewards/margins": 0.012497561052441597, "rewards/rejected": -0.0785207524895668, "step": 2340 }, { "epoch": 1.3718622300058376, "grad_norm": 0.9509492994092668, "learning_rate": 1.0314212462587935e-06, "log_odds_chosen": 0.049987345933914185, "log_odds_ratio": -0.6942520141601562, "logits/chosen": -3.8731677532196045, "logits/rejected": -3.851729154586792, "logps/chosen": -1.6609303951263428, "logps/rejected": -1.7073476314544678, "loss": 1.4536, "nll_loss": 1.4803273677825928, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08304651826620102, "rewards/margins": 0.002320867031812668, "rewards/rejected": -0.08536738902330399, "step": 2350 }, { "epoch": 1.377699941622884, "grad_norm": 1.411894901052856, "learning_rate": 1.029233711990773e-06, "log_odds_chosen": 0.2724502682685852, "log_odds_ratio": -0.5821642875671387, "logits/chosen": -3.6930480003356934, "logits/rejected": -3.773982286453247, "logps/chosen": -1.3126004934310913, "logps/rejected": -1.5106394290924072, "loss": 1.2968, "nll_loss": 1.4116160869598389, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06563003361225128, "rewards/margins": 0.009901942685246468, "rewards/rejected": -0.0755319744348526, "step": 2360 }, { "epoch": 1.38353765323993, "grad_norm": 1.5879187679170519, "learning_rate": 1.0270600375222014e-06, "log_odds_chosen": -0.09194701910018921, "log_odds_ratio": -0.7851167321205139, "logits/chosen": -3.4835381507873535, "logits/rejected": -3.483248472213745, "logps/chosen": -1.4609125852584839, "logps/rejected": -1.406585931777954, "loss": 1.3809, "nll_loss": 1.3406339883804321, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.07304563373327255, "rewards/margins": -0.002716336864978075, "rewards/rejected": -0.07032930105924606, "step": 2370 }, { "epoch": 1.389375364856976, "grad_norm": 1.129559065311055, "learning_rate": 1.0249000771134847e-06, "log_odds_chosen": -0.12659236788749695, "log_odds_ratio": -0.7947815656661987, "logits/chosen": -3.7242774963378906, "logits/rejected": -3.8003833293914795, "logps/chosen": -1.3866753578186035, "logps/rejected": -1.3418368101119995, "loss": 1.4244, "nll_loss": 1.4498170614242554, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0693337619304657, "rewards/margins": -0.0022419318556785583, "rewards/rejected": -0.06709183752536774, "step": 2380 }, { "epoch": 1.3952130764740223, "grad_norm": 1.0014032962256134, "learning_rate": 1.022753687161533e-06, "log_odds_chosen": 0.2771717309951782, "log_odds_ratio": -0.5859783887863159, "logits/chosen": -3.7923240661621094, "logits/rejected": -3.7076022624969482, "logps/chosen": -1.6886268854141235, "logps/rejected": -1.9372059106826782, "loss": 1.4837, "nll_loss": 1.6699087619781494, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08443133533000946, "rewards/margins": 0.012428943067789078, "rewards/rejected": -0.09686028212308884, "step": 2390 }, { "epoch": 1.4010507880910683, "grad_norm": 1.7198594564001188, "learning_rate": 1.0206207261596577e-06, "log_odds_chosen": 0.24260301887989044, "log_odds_ratio": -0.6053664088249207, "logits/chosen": -3.7028098106384277, "logits/rejected": -3.7297215461730957, "logps/chosen": -1.4635226726531982, "logps/rejected": -1.6551761627197266, "loss": 1.4637, "nll_loss": 1.4993679523468018, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07317614555358887, "rewards/margins": 0.009582673199474812, "rewards/rejected": -0.0827588140964508, "step": 2400 }, { "epoch": 1.4068884997081144, "grad_norm": 1.529507510318243, "learning_rate": 1.0185010546583882e-06, "log_odds_chosen": 0.5569783449172974, "log_odds_ratio": -0.4847480356693268, "logits/chosen": -3.755013942718506, "logits/rejected": -3.923409938812256, "logps/chosen": -1.285280466079712, "logps/rejected": -1.6695480346679688, "loss": 1.3883, "nll_loss": 1.2036062479019165, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06426401436328888, "rewards/margins": 0.01921338215470314, "rewards/rejected": -0.08347740024328232, "step": 2410 }, { "epoch": 1.4127262113251606, "grad_norm": 1.2148896925148607, "learning_rate": 1.0163945352271773e-06, "log_odds_chosen": 0.4224270284175873, "log_odds_ratio": -0.5318037867546082, "logits/chosen": -3.466093063354492, "logits/rejected": -3.9781346321105957, "logps/chosen": -1.294390320777893, "logps/rejected": -1.5730783939361572, "loss": 1.4477, "nll_loss": 1.3584330081939697, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06471951305866241, "rewards/margins": 0.013934403657913208, "rewards/rejected": -0.07865391671657562, "step": 2420 }, { "epoch": 1.4185639229422067, "grad_norm": 1.7316109504945465, "learning_rate": 1.0143010324169743e-06, "log_odds_chosen": 0.6294891238212585, "log_odds_ratio": -0.4988594055175781, "logits/chosen": -3.4846107959747314, "logits/rejected": -3.8661117553710938, "logps/chosen": -1.2527315616607666, "logps/rejected": -1.6962683200836182, "loss": 1.332, "nll_loss": 1.2120393514633179, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06263657659292221, "rewards/margins": 0.0221768319606781, "rewards/rejected": -0.08481340855360031, "step": 2430 }, { "epoch": 1.4244016345592527, "grad_norm": 1.2902652519897755, "learning_rate": 1.0122204127236452e-06, "log_odds_chosen": 0.4317292273044586, "log_odds_ratio": -0.518875002861023, "logits/chosen": -3.427460193634033, "logits/rejected": -3.605724334716797, "logps/chosen": -1.2946254014968872, "logps/rejected": -1.632815957069397, "loss": 1.3172, "nll_loss": 1.2532665729522705, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06473126262426376, "rewards/margins": 0.016909528523683548, "rewards/rejected": -0.08164079487323761, "step": 2440 }, { "epoch": 1.430239346176299, "grad_norm": 1.0194414295785068, "learning_rate": 1.0101525445522107e-06, "log_odds_chosen": 0.2644456624984741, "log_odds_ratio": -0.6275379061698914, "logits/chosen": -3.696159839630127, "logits/rejected": -3.848839282989502, "logps/chosen": -1.1677498817443848, "logps/rejected": -1.3160344362258911, "loss": 1.3385, "nll_loss": 1.2175195217132568, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05838749557733536, "rewards/margins": 0.007414227817207575, "rewards/rejected": -0.0658017247915268, "step": 2450 }, { "epoch": 1.436077057793345, "grad_norm": 1.2552352550781734, "learning_rate": 1.0080972981818898e-06, "log_odds_chosen": 0.29210177063941956, "log_odds_ratio": -0.5951721668243408, "logits/chosen": -3.683353900909424, "logits/rejected": -3.862502336502075, "logps/chosen": -1.2800837755203247, "logps/rejected": -1.5328537225723267, "loss": 1.4604, "nll_loss": 1.3073418140411377, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06400418281555176, "rewards/margins": 0.012638501822948456, "rewards/rejected": -0.07664267718791962, "step": 2460 }, { "epoch": 1.441914769410391, "grad_norm": 1.012005669018087, "learning_rate": 1.0060545457319173e-06, "log_odds_chosen": 0.3788088262081146, "log_odds_ratio": -0.5632410645484924, "logits/chosen": -3.5881595611572266, "logits/rejected": -3.715555191040039, "logps/chosen": -1.305596113204956, "logps/rejected": -1.606146216392517, "loss": 1.3098, "nll_loss": 1.3053147792816162, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06527981162071228, "rewards/margins": 0.015027513727545738, "rewards/rejected": -0.08030731976032257, "step": 2470 }, { "epoch": 1.4477524810274374, "grad_norm": 1.862297240030745, "learning_rate": 1.0040241611281238e-06, "log_odds_chosen": 0.6236267685890198, "log_odds_ratio": -0.4579935073852539, "logits/chosen": -3.6291000843048096, "logits/rejected": -3.684324264526367, "logps/chosen": -1.2971131801605225, "logps/rejected": -1.7476117610931396, "loss": 1.3914, "nll_loss": 1.2509955167770386, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06485565751791, "rewards/margins": 0.022524921223521233, "rewards/rejected": -0.08738058060407639, "step": 2480 }, { "epoch": 1.4535901926444834, "grad_norm": 1.6796170413958773, "learning_rate": 1.002006020070253e-06, "log_odds_chosen": 0.4925965368747711, "log_odds_ratio": -0.5481725931167603, "logits/chosen": -3.4488468170166016, "logits/rejected": -3.6261839866638184, "logps/chosen": -1.1502301692962646, "logps/rejected": -1.5017732381820679, "loss": 1.404, "nll_loss": 1.207037329673767, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.057511501014232635, "rewards/margins": 0.017577150836586952, "rewards/rejected": -0.07508866488933563, "step": 2490 }, { "epoch": 1.4594279042615295, "grad_norm": 0.906520248108172, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 0.024085920304059982, "log_odds_ratio": -0.7688292860984802, "logits/chosen": -3.7098541259765625, "logits/rejected": -3.714428424835205, "logps/chosen": -1.5487407445907593, "logps/rejected": -1.630674123764038, "loss": 1.4198, "nll_loss": 1.4594240188598633, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07743703573942184, "rewards/margins": 0.004096672870218754, "rewards/rejected": -0.08153371512889862, "step": 2500 }, { "epoch": 1.4652656158785757, "grad_norm": 1.4726466965962564, "learning_rate": 9.98005980069749e-07, "log_odds_chosen": 0.4154183864593506, "log_odds_ratio": -0.5525069236755371, "logits/chosen": -3.680224657058716, "logits/rejected": -3.866144895553589, "logps/chosen": -1.2275493144989014, "logps/rejected": -1.5176894664764404, "loss": 1.3037, "nll_loss": 1.1887648105621338, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.061377473175525665, "rewards/margins": 0.014507013373076916, "rewards/rejected": -0.07588447630405426, "step": 2510 }, { "epoch": 1.4711033274956218, "grad_norm": 1.2487253413108321, "learning_rate": 9.960238411119948e-07, "log_odds_chosen": 0.3063370883464813, "log_odds_ratio": -0.5804927945137024, "logits/chosen": -3.780780076980591, "logits/rejected": -3.7850489616394043, "logps/chosen": -1.220092535018921, "logps/rejected": -1.4065858125686646, "loss": 1.416, "nll_loss": 1.2188657522201538, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.061004627496004105, "rewards/margins": 0.009324668906629086, "rewards/rejected": -0.07032929360866547, "step": 2520 }, { "epoch": 1.4769410391126678, "grad_norm": 1.1550183209671123, "learning_rate": 9.9405346560943e-07, "log_odds_chosen": 0.11556539684534073, "log_odds_ratio": -0.6511185169219971, "logits/chosen": -3.811279773712158, "logits/rejected": -3.8323287963867188, "logps/chosen": -1.631089448928833, "logps/rejected": -1.7259918451309204, "loss": 1.4474, "nll_loss": 1.497432827949524, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08155447989702225, "rewards/margins": 0.004745109472423792, "rewards/rejected": -0.0862995833158493, "step": 2530 }, { "epoch": 1.4827787507297139, "grad_norm": 0.9931307830498537, "learning_rate": 9.920947376656814e-07, "log_odds_chosen": -0.08809924125671387, "log_odds_ratio": -0.7671037912368774, "logits/chosen": -3.731898784637451, "logits/rejected": -3.910121440887451, "logps/chosen": -1.5516074895858765, "logps/rejected": -1.4805986881256104, "loss": 1.3996, "nll_loss": 1.5128743648529053, "rewards/accuracies": 0.5, "rewards/chosen": -0.07758037745952606, "rewards/margins": -0.003550440538674593, "rewards/rejected": -0.07402992993593216, "step": 2540 }, { "epoch": 1.4886164623467601, "grad_norm": 1.4674011001963065, "learning_rate": 9.901475429766744e-07, "log_odds_chosen": 0.46370792388916016, "log_odds_ratio": -0.49628329277038574, "logits/chosen": -3.5763003826141357, "logits/rejected": -3.6213817596435547, "logps/chosen": -1.3962414264678955, "logps/rejected": -1.7590868473052979, "loss": 1.4104, "nll_loss": 1.39280104637146, "rewards/accuracies": 1.0, "rewards/chosen": -0.06981206685304642, "rewards/margins": 0.018142273649573326, "rewards/rejected": -0.08795434236526489, "step": 2550 }, { "epoch": 1.4944541739638062, "grad_norm": 1.2915061979993259, "learning_rate": 9.882117688026186e-07, "log_odds_chosen": 0.17654255032539368, "log_odds_ratio": -0.6233395934104919, "logits/chosen": -3.6934502124786377, "logits/rejected": -3.8409876823425293, "logps/chosen": -1.534360408782959, "logps/rejected": -1.6756503582000732, "loss": 1.3526, "nll_loss": 1.465691328048706, "rewards/accuracies": 0.5, "rewards/chosen": -0.07671802490949631, "rewards/margins": 0.007064486853778362, "rewards/rejected": -0.08378250896930695, "step": 2560 }, { "epoch": 1.5002918855808525, "grad_norm": 1.1149911441038023, "learning_rate": 9.862873039405896e-07, "log_odds_chosen": 0.26600131392478943, "log_odds_ratio": -0.6096302270889282, "logits/chosen": -3.570150852203369, "logits/rejected": -3.6883111000061035, "logps/chosen": -1.290222406387329, "logps/rejected": -1.4855566024780273, "loss": 1.3054, "nll_loss": 1.310992956161499, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06451112031936646, "rewards/margins": 0.009766707196831703, "rewards/rejected": -0.0742778331041336, "step": 2570 }, { "epoch": 1.5061295971978983, "grad_norm": 1.8915238119214892, "learning_rate": 9.843740386976973e-07, "log_odds_chosen": 0.09631429612636566, "log_odds_ratio": -0.6705812215805054, "logits/chosen": -3.588689088821411, "logits/rejected": -3.575705051422119, "logps/chosen": -1.2965900897979736, "logps/rejected": -1.3685389757156372, "loss": 1.3345, "nll_loss": 1.2925102710723877, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.0648295134305954, "rewards/margins": 0.0035974415950477123, "rewards/rejected": -0.0684269517660141, "step": 2580 }, { "epoch": 1.5119673088149446, "grad_norm": 0.8571476315793689, "learning_rate": 9.824718648648244e-07, "log_odds_chosen": 0.18465152382850647, "log_odds_ratio": -0.6445422172546387, "logits/chosen": -3.4747211933135986, "logits/rejected": -3.8081536293029785, "logps/chosen": -1.3001760244369507, "logps/rejected": -1.389573097229004, "loss": 1.333, "nll_loss": 1.1663968563079834, "rewards/accuracies": 0.5, "rewards/chosen": -0.06500881165266037, "rewards/margins": 0.004469842649996281, "rewards/rejected": -0.06947865337133408, "step": 2590 }, { "epoch": 1.5178050204319906, "grad_norm": 1.4592605273303891, "learning_rate": 9.805806756909204e-07, "log_odds_chosen": 0.10690464824438095, "log_odds_ratio": -0.6752808094024658, "logits/chosen": -3.504013776779175, "logits/rejected": -3.6739940643310547, "logps/chosen": -1.3634065389633179, "logps/rejected": -1.444547414779663, "loss": 1.4557, "nll_loss": 1.3778032064437866, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06817033141851425, "rewards/margins": 0.004057037178426981, "rewards/rejected": -0.0722273588180542, "step": 2600 }, { "epoch": 1.5236427320490367, "grad_norm": 1.0456361875228848, "learning_rate": 9.787003658578392e-07, "log_odds_chosen": 0.040711838752031326, "log_odds_ratio": -0.7095690369606018, "logits/chosen": -3.327239513397217, "logits/rejected": -3.5622897148132324, "logps/chosen": -1.6451056003570557, "logps/rejected": -1.7012720108032227, "loss": 1.4275, "nll_loss": 1.5808091163635254, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0822552889585495, "rewards/margins": 0.002808320103213191, "rewards/rejected": -0.08506359905004501, "step": 2610 }, { "epoch": 1.529480443666083, "grad_norm": 1.224630142449674, "learning_rate": 9.768308314557044e-07, "log_odds_chosen": 0.4854598045349121, "log_odds_ratio": -0.5168899893760681, "logits/chosen": -3.6914756298065186, "logits/rejected": -3.759185791015625, "logps/chosen": -1.3598202466964722, "logps/rejected": -1.7397029399871826, "loss": 1.4432, "nll_loss": 1.3511615991592407, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06799101829528809, "rewards/margins": 0.018994133919477463, "rewards/rejected": -0.08698514848947525, "step": 2620 }, { "epoch": 1.535318155283129, "grad_norm": 1.524945034705638, "learning_rate": 9.749719699587899e-07, "log_odds_chosen": 0.3984687626361847, "log_odds_ratio": -0.533967137336731, "logits/chosen": -3.624009609222412, "logits/rejected": -3.8586039543151855, "logps/chosen": -1.2626796960830688, "logps/rejected": -1.5416799783706665, "loss": 1.3556, "nll_loss": 1.3171542882919312, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06313398480415344, "rewards/margins": 0.013950005173683167, "rewards/rejected": -0.07708398997783661, "step": 2630 }, { "epoch": 1.541155866900175, "grad_norm": 1.218712724641104, "learning_rate": 9.731236802019038e-07, "log_odds_chosen": 0.43366748094558716, "log_odds_ratio": -0.5384989976882935, "logits/chosen": -3.5738883018493652, "logits/rejected": -3.876579761505127, "logps/chosen": -1.2914745807647705, "logps/rejected": -1.6316293478012085, "loss": 1.3445, "nll_loss": 1.298351526260376, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0645737275481224, "rewards/margins": 0.017007743939757347, "rewards/rejected": -0.0815814733505249, "step": 2640 }, { "epoch": 1.5469935785172213, "grad_norm": 1.2406652877312498, "learning_rate": 9.712858623572642e-07, "log_odds_chosen": 0.568976640701294, "log_odds_ratio": -0.4777490198612213, "logits/chosen": -3.6247143745422363, "logits/rejected": -3.8142571449279785, "logps/chosen": -1.2485606670379639, "logps/rejected": -1.6792104244232178, "loss": 1.3443, "nll_loss": 1.3708950281143188, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06242803484201431, "rewards/margins": 0.021532487124204636, "rewards/rejected": -0.08396051824092865, "step": 2650 }, { "epoch": 1.5528312901342674, "grad_norm": 1.1650223783815625, "learning_rate": 9.694584179118515e-07, "log_odds_chosen": 0.36115869879722595, "log_odds_ratio": -0.5437900424003601, "logits/chosen": -3.2873942852020264, "logits/rejected": -3.724879503250122, "logps/chosen": -1.425110101699829, "logps/rejected": -1.6902830600738525, "loss": 1.4008, "nll_loss": 1.2924379110336304, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07125551253557205, "rewards/margins": 0.013258648104965687, "rewards/rejected": -0.08451415598392487, "step": 2660 }, { "epoch": 1.5586690017513134, "grad_norm": 1.4680530050030718, "learning_rate": 9.676412496452296e-07, "log_odds_chosen": 0.3083101212978363, "log_odds_ratio": -0.6038891673088074, "logits/chosen": -3.4537301063537598, "logits/rejected": -3.634667158126831, "logps/chosen": -1.2902677059173584, "logps/rejected": -1.5339442491531372, "loss": 1.3967, "nll_loss": 1.3893866539001465, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06451338529586792, "rewards/margins": 0.012183822691440582, "rewards/rejected": -0.0766972154378891, "step": 2670 }, { "epoch": 1.5645067133683597, "grad_norm": 1.5712025294655396, "learning_rate": 9.658342616078198e-07, "log_odds_chosen": 0.19180722534656525, "log_odds_ratio": -0.6609214544296265, "logits/chosen": -3.4631190299987793, "logits/rejected": -3.6281306743621826, "logps/chosen": -1.3951839208602905, "logps/rejected": -1.5413448810577393, "loss": 1.4094, "nll_loss": 1.3890641927719116, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06975920498371124, "rewards/margins": 0.007308043539524078, "rewards/rejected": -0.07706724852323532, "step": 2680 }, { "epoch": 1.5703444249854057, "grad_norm": 2.3272637121482744, "learning_rate": 9.640373590996239e-07, "log_odds_chosen": 0.23623482882976532, "log_odds_ratio": -0.6141432523727417, "logits/chosen": -3.425262451171875, "logits/rejected": -3.7565066814422607, "logps/chosen": -1.3650022745132446, "logps/rejected": -1.5399665832519531, "loss": 1.3666, "nll_loss": 1.412691593170166, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06825011968612671, "rewards/margins": 0.008748212829232216, "rewards/rejected": -0.07699833065271378, "step": 2690 }, { "epoch": 1.5761821366024518, "grad_norm": 1.5976304811291777, "learning_rate": 9.622504486493764e-07, "log_odds_chosen": 0.30844926834106445, "log_odds_ratio": -0.5763429403305054, "logits/chosen": -3.7817561626434326, "logits/rejected": -3.8746910095214844, "logps/chosen": -1.238752007484436, "logps/rejected": -1.445845365524292, "loss": 1.34, "nll_loss": 1.2724225521087646, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0619376003742218, "rewards/margins": 0.010354666039347649, "rewards/rejected": -0.072292260825634, "step": 2700 }, { "epoch": 1.582019848219498, "grad_norm": 1.2055801489192284, "learning_rate": 9.604734379941232e-07, "log_odds_chosen": 0.6291456818580627, "log_odds_ratio": -0.4710697531700134, "logits/chosen": -3.0343692302703857, "logits/rejected": -3.557002544403076, "logps/chosen": -0.8583572506904602, "logps/rejected": -1.2207945585250854, "loss": 1.3317, "nll_loss": 1.1168544292449951, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04291786625981331, "rewards/margins": 0.0181218683719635, "rewards/rejected": -0.06103972718119621, "step": 2710 }, { "epoch": 1.587857559836544, "grad_norm": 0.9696751980223488, "learning_rate": 9.58706236059213e-07, "log_odds_chosen": 0.6333138346672058, "log_odds_ratio": -0.5033305287361145, "logits/chosen": -3.58817720413208, "logits/rejected": -3.5761561393737793, "logps/chosen": -1.3022533655166626, "logps/rejected": -1.700783133506775, "loss": 1.4125, "nll_loss": 1.498599648475647, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06511266529560089, "rewards/margins": 0.01992649771273136, "rewards/rejected": -0.0850391685962677, "step": 2720 }, { "epoch": 1.5936952714535901, "grad_norm": 1.3599988747526675, "learning_rate": 9.56948752938691e-07, "log_odds_chosen": -0.009177133440971375, "log_odds_ratio": -0.706242561340332, "logits/chosen": -3.8921592235565186, "logits/rejected": -3.7516307830810547, "logps/chosen": -1.4468448162078857, "logps/rejected": -1.4342509508132935, "loss": 1.4383, "nll_loss": 1.4193360805511475, "rewards/accuracies": 0.5, "rewards/chosen": -0.07234225422143936, "rewards/margins": -0.0006297050276771188, "rewards/rejected": -0.07171254605054855, "step": 2730 }, { "epoch": 1.5995329830706364, "grad_norm": 1.3892577915709765, "learning_rate": 9.552008998760876e-07, "log_odds_chosen": 0.19850873947143555, "log_odds_ratio": -0.6156947016716003, "logits/chosen": -3.6557726860046387, "logits/rejected": -3.6417064666748047, "logps/chosen": -1.370289921760559, "logps/rejected": -1.507373571395874, "loss": 1.4091, "nll_loss": 1.3163819313049316, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06851449608802795, "rewards/margins": 0.006854187697172165, "rewards/rejected": -0.07536868751049042, "step": 2740 }, { "epoch": 1.6053706946876825, "grad_norm": 1.0350798195093214, "learning_rate": 9.534625892455924e-07, "log_odds_chosen": 0.2591972351074219, "log_odds_ratio": -0.5966399908065796, "logits/chosen": -3.425907611846924, "logits/rejected": -3.7787795066833496, "logps/chosen": -1.1488932371139526, "logps/rejected": -1.2841131687164307, "loss": 1.3357, "nll_loss": 1.1115930080413818, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05744466930627823, "rewards/margins": 0.006760991178452969, "rewards/rejected": -0.06420565396547318, "step": 2750 }, { "epoch": 1.6112084063047285, "grad_norm": 1.1192257562555097, "learning_rate": 9.517337345336012e-07, "log_odds_chosen": 0.597819447517395, "log_odds_ratio": -0.47445765137672424, "logits/chosen": -3.301176071166992, "logits/rejected": -3.739659070968628, "logps/chosen": -1.2674038410186768, "logps/rejected": -1.6690107583999634, "loss": 1.3819, "nll_loss": 1.2866109609603882, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06337019056081772, "rewards/margins": 0.020080335438251495, "rewards/rejected": -0.08345052599906921, "step": 2760 }, { "epoch": 1.6170461179217748, "grad_norm": 1.217262872917631, "learning_rate": 9.50014250320633e-07, "log_odds_chosen": 0.2518064081668854, "log_odds_ratio": -0.66081702709198, "logits/chosen": -3.400434970855713, "logits/rejected": -3.666029691696167, "logps/chosen": -1.279827356338501, "logps/rejected": -1.4387468099594116, "loss": 1.3547, "nll_loss": 1.324276328086853, "rewards/accuracies": 0.5, "rewards/chosen": -0.06399136781692505, "rewards/margins": 0.00794597715139389, "rewards/rejected": -0.07193733751773834, "step": 2770 }, { "epoch": 1.6228838295388208, "grad_norm": 0.9427217846336285, "learning_rate": 9.483040522636021e-07, "log_odds_chosen": 0.5197606086730957, "log_odds_ratio": -0.48741045594215393, "logits/chosen": -3.634812116622925, "logits/rejected": -3.8917477130889893, "logps/chosen": -1.2917941808700562, "logps/rejected": -1.6765391826629639, "loss": 1.3286, "nll_loss": 1.317427635192871, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06458970159292221, "rewards/margins": 0.019237253814935684, "rewards/rejected": -0.0838269591331482, "step": 2780 }, { "epoch": 1.6287215411558669, "grad_norm": 2.5099370326623336, "learning_rate": 9.466030570784414e-07, "log_odds_chosen": 0.7833268642425537, "log_odds_ratio": -0.46350663900375366, "logits/chosen": -3.4418234825134277, "logits/rejected": -3.8244667053222656, "logps/chosen": -1.1056439876556396, "logps/rejected": -1.5612152814865112, "loss": 1.3179, "nll_loss": 1.303929090499878, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.05528220534324646, "rewards/margins": 0.02277856506407261, "rewards/rejected": -0.07806076854467392, "step": 2790 }, { "epoch": 1.6345592527729131, "grad_norm": 1.0383207430808565, "learning_rate": 9.449111825230681e-07, "log_odds_chosen": 0.46022334694862366, "log_odds_ratio": -0.549196720123291, "logits/chosen": -3.58575701713562, "logits/rejected": -3.384629487991333, "logps/chosen": -1.372586727142334, "logps/rejected": -1.7203376293182373, "loss": 1.381, "nll_loss": 1.34969961643219, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06862933933734894, "rewards/margins": 0.017387548461556435, "rewards/rejected": -0.08601687848567963, "step": 2800 }, { "epoch": 1.6403969643899592, "grad_norm": 1.7161221865944924, "learning_rate": 9.432283473806812e-07, "log_odds_chosen": 0.324603796005249, "log_odds_ratio": -0.5718523263931274, "logits/chosen": -3.4963722229003906, "logits/rejected": -3.4521167278289795, "logps/chosen": -1.2941627502441406, "logps/rejected": -1.5069636106491089, "loss": 1.3736, "nll_loss": 1.1936661005020142, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06470812857151031, "rewards/margins": 0.01064004935324192, "rewards/rejected": -0.07534817606210709, "step": 2810 }, { "epoch": 1.6462346760070052, "grad_norm": 1.0811289754317013, "learning_rate": 9.415544714433869e-07, "log_odds_chosen": 0.21617329120635986, "log_odds_ratio": -0.667104184627533, "logits/chosen": -3.6514415740966797, "logits/rejected": -3.7704226970672607, "logps/chosen": -1.3015691041946411, "logps/rejected": -1.417405366897583, "loss": 1.3652, "nll_loss": 1.2332332134246826, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06507845968008041, "rewards/margins": 0.005791815463453531, "rewards/rejected": -0.07087027281522751, "step": 2820 }, { "epoch": 1.6520723876240515, "grad_norm": 1.0318029846154457, "learning_rate": 9.398894754961406e-07, "log_odds_chosen": 0.08640475571155548, "log_odds_ratio": -0.6734355092048645, "logits/chosen": -3.7713780403137207, "logits/rejected": -3.539207935333252, "logps/chosen": -1.340451717376709, "logps/rejected": -1.4227691888809204, "loss": 1.3934, "nll_loss": 1.3438020944595337, "rewards/accuracies": 0.5, "rewards/chosen": -0.06702258437871933, "rewards/margins": 0.004115872085094452, "rewards/rejected": -0.07113845646381378, "step": 2830 }, { "epoch": 1.6579100992410973, "grad_norm": 1.5147558869363986, "learning_rate": 9.38233281301002e-07, "log_odds_chosen": 0.5373395681381226, "log_odds_ratio": -0.5344149470329285, "logits/chosen": -3.4235713481903076, "logits/rejected": -3.87060284614563, "logps/chosen": -1.2229745388031006, "logps/rejected": -1.50307035446167, "loss": 1.3287, "nll_loss": 1.2456796169281006, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06114872545003891, "rewards/margins": 0.014004791155457497, "rewards/rejected": -0.07515352219343185, "step": 2840 }, { "epoch": 1.6637478108581436, "grad_norm": 0.9218850147435372, "learning_rate": 9.365858115816941e-07, "log_odds_chosen": 0.5991418957710266, "log_odds_ratio": -0.49222683906555176, "logits/chosen": -3.531224489212036, "logits/rejected": -3.7082862854003906, "logps/chosen": -1.1877561807632446, "logps/rejected": -1.5936410427093506, "loss": 1.3219, "nll_loss": 1.2025446891784668, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.05938781052827835, "rewards/margins": 0.020294252783060074, "rewards/rejected": -0.07968205213546753, "step": 2850 }, { "epoch": 1.6695855224751899, "grad_norm": 1.150265272398417, "learning_rate": 9.349469900084572e-07, "log_odds_chosen": -0.05002538114786148, "log_odds_ratio": -0.7290476560592651, "logits/chosen": -3.742460250854492, "logits/rejected": -3.7234299182891846, "logps/chosen": -1.6801397800445557, "logps/rejected": -1.643926978111267, "loss": 1.4029, "nll_loss": 1.5547322034835815, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.08400698751211166, "rewards/margins": -0.001810651971027255, "rewards/rejected": -0.08219633996486664, "step": 2860 }, { "epoch": 1.6754232340922357, "grad_norm": 1.056004761874569, "learning_rate": 9.333167411831968e-07, "log_odds_chosen": 0.38071125745773315, "log_odds_ratio": -0.5544384717941284, "logits/chosen": -3.694936752319336, "logits/rejected": -3.73411226272583, "logps/chosen": -1.441366195678711, "logps/rejected": -1.7356693744659424, "loss": 1.3286, "nll_loss": 1.376721978187561, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07206831872463226, "rewards/margins": 0.014715144410729408, "rewards/rejected": -0.08678346872329712, "step": 2870 }, { "epoch": 1.681260945709282, "grad_norm": 1.1373946333682214, "learning_rate": 9.316949906249125e-07, "log_odds_chosen": 0.5552781820297241, "log_odds_ratio": -0.4919613301753998, "logits/chosen": -3.5887980461120605, "logits/rejected": -3.8827691078186035, "logps/chosen": -1.189857006072998, "logps/rejected": -1.55269455909729, "loss": 1.3428, "nll_loss": 1.1777489185333252, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05949285626411438, "rewards/margins": 0.01814188063144684, "rewards/rejected": -0.07763473689556122, "step": 2880 }, { "epoch": 1.6870986573263282, "grad_norm": 1.525961129286351, "learning_rate": 9.300816647554058e-07, "log_odds_chosen": 0.21300478279590607, "log_odds_ratio": -0.6173720955848694, "logits/chosen": -3.6265594959259033, "logits/rejected": -3.6181869506835938, "logps/chosen": -1.4269194602966309, "logps/rejected": -1.5666520595550537, "loss": 1.3869, "nll_loss": 1.4192897081375122, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0713459700345993, "rewards/margins": 0.0069866240955889225, "rewards/rejected": -0.07833259552717209, "step": 2890 }, { "epoch": 1.692936368943374, "grad_norm": 1.8643735064271865, "learning_rate": 9.284766908852594e-07, "log_odds_chosen": 0.2483743131160736, "log_odds_ratio": -0.6263710260391235, "logits/chosen": -3.576674222946167, "logits/rejected": -3.653986692428589, "logps/chosen": -1.611710786819458, "logps/rejected": -1.8701308965682983, "loss": 1.357, "nll_loss": 1.4698790311813354, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0805855318903923, "rewards/margins": 0.012921018525958061, "rewards/rejected": -0.09350655972957611, "step": 2900 }, { "epoch": 1.6987740805604203, "grad_norm": 1.2052923815802097, "learning_rate": 9.26879997200081e-07, "log_odds_chosen": 0.46697139739990234, "log_odds_ratio": -0.5409226417541504, "logits/chosen": -3.7470946311950684, "logits/rejected": -3.8049206733703613, "logps/chosen": -1.5110222101211548, "logps/rejected": -1.876797080039978, "loss": 1.4234, "nll_loss": 1.479819893836975, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0755511075258255, "rewards/margins": 0.018288742750883102, "rewards/rejected": -0.0938398465514183, "step": 2910 }, { "epoch": 1.7046117921774664, "grad_norm": 1.146849284744441, "learning_rate": 9.252915127470066e-07, "log_odds_chosen": 0.3437288701534271, "log_odds_ratio": -0.561978280544281, "logits/chosen": -3.523042678833008, "logits/rejected": -3.7783915996551514, "logps/chosen": -1.403624415397644, "logps/rejected": -1.6148353815078735, "loss": 1.4191, "nll_loss": 1.3047053813934326, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0701812207698822, "rewards/margins": 0.010560544207692146, "rewards/rejected": -0.0807417705655098, "step": 2920 }, { "epoch": 1.7104495037945124, "grad_norm": 1.1826696859736647, "learning_rate": 9.23711167421458e-07, "log_odds_chosen": -0.09541534632444382, "log_odds_ratio": -0.7740492224693298, "logits/chosen": -3.558241367340088, "logits/rejected": -3.6637909412384033, "logps/chosen": -1.3991336822509766, "logps/rejected": -1.3571261167526245, "loss": 1.3369, "nll_loss": 1.3164807558059692, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06995667517185211, "rewards/margins": -0.0021003778092563152, "rewards/rejected": -0.0678563043475151, "step": 2930 }, { "epoch": 1.7162872154115587, "grad_norm": 1.1584587211466635, "learning_rate": 9.221388919541469e-07, "log_odds_chosen": 0.21223704516887665, "log_odds_ratio": -0.615585446357727, "logits/chosen": -3.6045544147491455, "logits/rejected": -3.621849775314331, "logps/chosen": -1.3131473064422607, "logps/rejected": -1.4441912174224854, "loss": 1.3566, "nll_loss": 1.217763066291809, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0656573697924614, "rewards/margins": 0.006552199833095074, "rewards/rejected": -0.07220957428216934, "step": 2940 }, { "epoch": 1.7221249270286048, "grad_norm": 1.3538984022277414, "learning_rate": 9.205746178983235e-07, "log_odds_chosen": 0.2933710515499115, "log_odds_ratio": -0.5626608729362488, "logits/chosen": -3.709604263305664, "logits/rejected": -3.853233814239502, "logps/chosen": -1.4404195547103882, "logps/rejected": -1.6616073846817017, "loss": 1.463, "nll_loss": 1.4306918382644653, "rewards/accuracies": 1.0, "rewards/chosen": -0.0720209851861, "rewards/margins": 0.011059392243623734, "rewards/rejected": -0.08308036625385284, "step": 2950 }, { "epoch": 1.7279626386456508, "grad_norm": 0.9076397296326972, "learning_rate": 9.190182776172598e-07, "log_odds_chosen": 0.05100778862833977, "log_odds_ratio": -0.7056868672370911, "logits/chosen": -3.4250330924987793, "logits/rejected": -3.541905641555786, "logps/chosen": -1.4093817472457886, "logps/rejected": -1.4687174558639526, "loss": 1.3672, "nll_loss": 1.2998182773590088, "rewards/accuracies": 0.5, "rewards/chosen": -0.07046908885240555, "rewards/margins": 0.002966786501929164, "rewards/rejected": -0.07343586534261703, "step": 2960 }, { "epoch": 1.733800350262697, "grad_norm": 1.2053200957078902, "learning_rate": 9.174698042719672e-07, "log_odds_chosen": 0.13008847832679749, "log_odds_ratio": -0.6532391905784607, "logits/chosen": -3.580319881439209, "logits/rejected": -3.3984291553497314, "logps/chosen": -1.3030531406402588, "logps/rejected": -1.4022136926651, "loss": 1.3089, "nll_loss": 1.2311606407165527, "rewards/accuracies": 0.5, "rewards/chosen": -0.06515266001224518, "rewards/margins": 0.004958023317158222, "rewards/rejected": -0.07011068612337112, "step": 2970 }, { "epoch": 1.7396380618797431, "grad_norm": 1.426541775153853, "learning_rate": 9.159291318091397e-07, "log_odds_chosen": 0.2559516727924347, "log_odds_ratio": -0.5983628630638123, "logits/chosen": -3.3384807109832764, "logits/rejected": -3.678255081176758, "logps/chosen": -1.1938564777374268, "logps/rejected": -1.3554447889328003, "loss": 1.4697, "nll_loss": 1.2234647274017334, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05969282239675522, "rewards/margins": 0.00807941798120737, "rewards/rejected": -0.06777223199605942, "step": 2980 }, { "epoch": 1.7454757734967892, "grad_norm": 1.260040899090403, "learning_rate": 9.143961949493189e-07, "log_odds_chosen": 0.6454986333847046, "log_odds_ratio": -0.4486139714717865, "logits/chosen": -3.6495602130889893, "logits/rejected": -3.780928373336792, "logps/chosen": -1.2057209014892578, "logps/rejected": -1.678066611289978, "loss": 1.3047, "nll_loss": 1.2185628414154053, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06028605252504349, "rewards/margins": 0.023617291823029518, "rewards/rejected": -0.08390334248542786, "step": 2990 }, { "epoch": 1.7513134851138354, "grad_norm": 1.9879176353390626, "learning_rate": 9.128709291752768e-07, "log_odds_chosen": 0.18669570982456207, "log_odds_ratio": -0.624148964881897, "logits/chosen": -3.855358600616455, "logits/rejected": -3.8156840801239014, "logps/chosen": -1.5285725593566895, "logps/rejected": -1.6572033166885376, "loss": 1.4656, "nll_loss": 1.4553486108779907, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07642862945795059, "rewards/margins": 0.006431535817682743, "rewards/rejected": -0.08286015689373016, "step": 3000 }, { "epoch": 1.7571511967308815, "grad_norm": 1.190153870867777, "learning_rate": 9.113532707206116e-07, "log_odds_chosen": 0.35250359773635864, "log_odds_ratio": -0.5502651929855347, "logits/chosen": -3.5639655590057373, "logits/rejected": -3.5360419750213623, "logps/chosen": -1.3999148607254028, "logps/rejected": -1.6880311965942383, "loss": 1.4466, "nll_loss": 1.3038928508758545, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06999573856592178, "rewards/margins": 0.014405807480216026, "rewards/rejected": -0.08440155535936356, "step": 3010 }, { "epoch": 1.7629889083479275, "grad_norm": 1.1269241077379475, "learning_rate": 9.098431565585488e-07, "log_odds_chosen": 0.6042129397392273, "log_odds_ratio": -0.5253341794013977, "logits/chosen": -3.612759828567505, "logits/rejected": -3.7220993041992188, "logps/chosen": -1.2392932176589966, "logps/rejected": -1.6779123544692993, "loss": 1.3819, "nll_loss": 1.3426153659820557, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06196466088294983, "rewards/margins": 0.02193094789981842, "rewards/rejected": -0.08389560133218765, "step": 3020 }, { "epoch": 1.7688266199649738, "grad_norm": 1.655305977529902, "learning_rate": 9.083405243909494e-07, "log_odds_chosen": 0.2669103443622589, "log_odds_ratio": -0.5838645696640015, "logits/chosen": -3.812957286834717, "logits/rejected": -3.9678330421447754, "logps/chosen": -1.4790438413619995, "logps/rejected": -1.6906646490097046, "loss": 1.3404, "nll_loss": 1.422533631324768, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07395218312740326, "rewards/margins": 0.010581040754914284, "rewards/rejected": -0.08453322947025299, "step": 3030 }, { "epoch": 1.7746643315820199, "grad_norm": 1.0294666000357822, "learning_rate": 9.068453126375147e-07, "log_odds_chosen": 0.2700028121471405, "log_odds_ratio": -0.5724495649337769, "logits/chosen": -3.708496570587158, "logits/rejected": -3.8041844367980957, "logps/chosen": -1.4400017261505127, "logps/rejected": -1.6519596576690674, "loss": 1.4033, "nll_loss": 1.4034411907196045, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07200008630752563, "rewards/margins": 0.010597886517643929, "rewards/rejected": -0.08259797096252441, "step": 3040 }, { "epoch": 1.780502043199066, "grad_norm": 1.3125076715024615, "learning_rate": 9.053574604251853e-07, "log_odds_chosen": 0.21038727462291718, "log_odds_ratio": -0.6546778082847595, "logits/chosen": -3.6434452533721924, "logits/rejected": -3.795980930328369, "logps/chosen": -1.4494483470916748, "logps/rejected": -1.5928313732147217, "loss": 1.3622, "nll_loss": 1.3719871044158936, "rewards/accuracies": 0.5, "rewards/chosen": -0.07247242331504822, "rewards/margins": 0.007169149816036224, "rewards/rejected": -0.07964157313108444, "step": 3050 }, { "epoch": 1.7863397548161122, "grad_norm": 0.9263470005652826, "learning_rate": 9.03876907577734e-07, "log_odds_chosen": 0.29402467608451843, "log_odds_ratio": -0.5687133073806763, "logits/chosen": -3.621634006500244, "logits/rejected": -3.878628969192505, "logps/chosen": -1.4716471433639526, "logps/rejected": -1.6982942819595337, "loss": 1.3941, "nll_loss": 1.4607350826263428, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07358235120773315, "rewards/margins": 0.011332353577017784, "rewards/rejected": -0.08491470664739609, "step": 3060 }, { "epoch": 1.7921774664331582, "grad_norm": 1.362515095148631, "learning_rate": 9.024035946055421e-07, "log_odds_chosen": 0.40758076310157776, "log_odds_ratio": -0.537082314491272, "logits/chosen": -3.637566328048706, "logits/rejected": -3.798590898513794, "logps/chosen": -1.451775312423706, "logps/rejected": -1.7521677017211914, "loss": 1.3857, "nll_loss": 1.3430957794189453, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07258877903223038, "rewards/margins": 0.015019604936242104, "rewards/rejected": -0.08760838210582733, "step": 3070 }, { "epoch": 1.7980151780502043, "grad_norm": 1.0617600399073077, "learning_rate": 9.00937462695559e-07, "log_odds_chosen": 0.22692064940929413, "log_odds_ratio": -0.6011098623275757, "logits/chosen": -3.752908706665039, "logits/rejected": -3.865767002105713, "logps/chosen": -1.2341846227645874, "logps/rejected": -1.387607216835022, "loss": 1.454, "nll_loss": 1.355435848236084, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06170923635363579, "rewards/margins": 0.0076711238361895084, "rewards/rejected": -0.06938035786151886, "step": 3080 }, { "epoch": 1.8038528896672505, "grad_norm": 1.184728612825373, "learning_rate": 8.994784537014432e-07, "log_odds_chosen": 0.29649534821510315, "log_odds_ratio": -0.5693570375442505, "logits/chosen": -3.3512566089630127, "logits/rejected": -3.616710662841797, "logps/chosen": -1.2734484672546387, "logps/rejected": -1.4913791418075562, "loss": 1.4055, "nll_loss": 1.3224066495895386, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06367242336273193, "rewards/margins": 0.010896533727645874, "rewards/rejected": -0.07456895709037781, "step": 3090 }, { "epoch": 1.8096906012842966, "grad_norm": 1.41473475301836, "learning_rate": 8.980265101338747e-07, "log_odds_chosen": 0.40479230880737305, "log_odds_ratio": -0.5578173398971558, "logits/chosen": -3.3927807807922363, "logits/rejected": -3.7002391815185547, "logps/chosen": -1.3085676431655884, "logps/rejected": -1.6016905307769775, "loss": 1.3095, "nll_loss": 1.2916768789291382, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06542838364839554, "rewards/margins": 0.014656138606369495, "rewards/rejected": -0.08008452504873276, "step": 3100 }, { "epoch": 1.8155283129013426, "grad_norm": 1.3025265701059272, "learning_rate": 8.965815751510408e-07, "log_odds_chosen": 0.1812523901462555, "log_odds_ratio": -0.6351394653320312, "logits/chosen": -3.5449557304382324, "logits/rejected": -3.72521710395813, "logps/chosen": -1.5089774131774902, "logps/rejected": -1.660701036453247, "loss": 1.2655, "nll_loss": 1.3918120861053467, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07544886320829391, "rewards/margins": 0.007586190011352301, "rewards/rejected": -0.08303505927324295, "step": 3110 }, { "epoch": 1.821366024518389, "grad_norm": 1.6426649692480892, "learning_rate": 8.951435925492912e-07, "log_odds_chosen": -0.20959293842315674, "log_odds_ratio": -0.8476014137268066, "logits/chosen": -3.755178928375244, "logits/rejected": -3.508408308029175, "logps/chosen": -1.591137170791626, "logps/rejected": -1.4532674551010132, "loss": 1.4733, "nll_loss": 1.5222704410552979, "rewards/accuracies": 0.5, "rewards/chosen": -0.07955686748027802, "rewards/margins": -0.006893487181514502, "rewards/rejected": -0.07266338169574738, "step": 3120 }, { "epoch": 1.827203736135435, "grad_norm": 1.1827736495127905, "learning_rate": 8.93712506753953e-07, "log_odds_chosen": 0.23824343085289001, "log_odds_ratio": -0.6028715372085571, "logits/chosen": -3.689800977706909, "logits/rejected": -3.7561428546905518, "logps/chosen": -1.417304277420044, "logps/rejected": -1.583245873451233, "loss": 1.4418, "nll_loss": 1.3516265153884888, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0708652138710022, "rewards/margins": 0.008297083899378777, "rewards/rejected": -0.07916229218244553, "step": 3130 }, { "epoch": 1.833041447752481, "grad_norm": 1.0684371511072701, "learning_rate": 8.922882628103122e-07, "log_odds_chosen": 0.02320503629744053, "log_odds_ratio": -0.6896098852157593, "logits/chosen": -3.7532989978790283, "logits/rejected": -3.826082706451416, "logps/chosen": -1.7270689010620117, "logps/rejected": -1.744449257850647, "loss": 1.4571, "nll_loss": 1.4911777973175049, "rewards/accuracies": 0.5, "rewards/chosen": -0.08635345101356506, "rewards/margins": 0.0008690126123838127, "rewards/rejected": -0.08722247183322906, "step": 3140 }, { "epoch": 1.8388791593695273, "grad_norm": 2.0366290099287205, "learning_rate": 8.90870806374748e-07, "log_odds_chosen": 0.2956680655479431, "log_odds_ratio": -0.5728952288627625, "logits/chosen": -3.6527018547058105, "logits/rejected": -3.5178897380828857, "logps/chosen": -1.1791071891784668, "logps/rejected": -1.3476858139038086, "loss": 1.3008, "nll_loss": 1.2199859619140625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0589553602039814, "rewards/margins": 0.008428930304944515, "rewards/rejected": -0.06738429516553879, "step": 3150 }, { "epoch": 1.844716870986573, "grad_norm": 1.3140810376193668, "learning_rate": 8.894600837060251e-07, "log_odds_chosen": 0.6228810548782349, "log_odds_ratio": -0.46820539236068726, "logits/chosen": -3.4118778705596924, "logits/rejected": -3.7473087310791016, "logps/chosen": -0.9608672857284546, "logps/rejected": -1.386189341545105, "loss": 1.338, "nll_loss": 1.080672025680542, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.04804336652159691, "rewards/margins": 0.021266106516122818, "rewards/rejected": -0.06930947303771973, "step": 3160 }, { "epoch": 1.8505545826036194, "grad_norm": 1.2472703646314196, "learning_rate": 8.880560416567349e-07, "log_odds_chosen": 0.4251355528831482, "log_odds_ratio": -0.5288821458816528, "logits/chosen": -3.4653258323669434, "logits/rejected": -3.7232589721679688, "logps/chosen": -1.3875494003295898, "logps/rejected": -1.720334768295288, "loss": 1.357, "nll_loss": 1.4218946695327759, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06937747448682785, "rewards/margins": 0.016639268025755882, "rewards/rejected": -0.08601673692464828, "step": 3170 }, { "epoch": 1.8563922942206657, "grad_norm": 1.688610483304522, "learning_rate": 8.866586276648859e-07, "log_odds_chosen": 0.2520288825035095, "log_odds_ratio": -0.7016493678092957, "logits/chosen": -3.5259406566619873, "logits/rejected": -3.5955650806427, "logps/chosen": -1.324069857597351, "logps/rejected": -1.4332044124603271, "loss": 1.3253, "nll_loss": 1.2337920665740967, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06620349735021591, "rewards/margins": 0.005456716753542423, "rewards/rejected": -0.07166021317243576, "step": 3180 }, { "epoch": 1.8622300058377115, "grad_norm": 1.0730671324315013, "learning_rate": 8.852677897456389e-07, "log_odds_chosen": -0.17328771948814392, "log_odds_ratio": -0.8033192753791809, "logits/chosen": -3.4900710582733154, "logits/rejected": -3.6884219646453857, "logps/chosen": -1.3256826400756836, "logps/rejected": -1.2287745475769043, "loss": 1.3858, "nll_loss": 1.327152967453003, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06628413498401642, "rewards/margins": -0.00484540406614542, "rewards/rejected": -0.061438728123903275, "step": 3190 }, { "epoch": 1.8680677174547577, "grad_norm": 1.1282789155787094, "learning_rate": 8.838834764831844e-07, "log_odds_chosen": 0.5034184455871582, "log_odds_ratio": -0.5096439123153687, "logits/chosen": -3.641875743865967, "logits/rejected": -3.6342689990997314, "logps/chosen": -1.3231292963027954, "logps/rejected": -1.6577365398406982, "loss": 1.4151, "nll_loss": 1.3721673488616943, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06615646183490753, "rewards/margins": 0.01673036254942417, "rewards/rejected": -0.08288682997226715, "step": 3200 }, { "epoch": 1.873905429071804, "grad_norm": 1.8122830180926783, "learning_rate": 8.825056370227597e-07, "log_odds_chosen": -0.0635746642947197, "log_odds_ratio": -0.735160231590271, "logits/chosen": -3.942869186401367, "logits/rejected": -3.8062450885772705, "logps/chosen": -1.6786683797836304, "logps/rejected": -1.6278127431869507, "loss": 1.3833, "nll_loss": 1.537667989730835, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08393342792987823, "rewards/margins": -0.002542783273383975, "rewards/rejected": -0.08139064162969589, "step": 3210 }, { "epoch": 1.8797431406888498, "grad_norm": 1.1231242652947193, "learning_rate": 8.811342210628018e-07, "log_odds_chosen": 0.551103949546814, "log_odds_ratio": -0.5444887280464172, "logits/chosen": -3.318305253982544, "logits/rejected": -3.6903743743896484, "logps/chosen": -1.3123352527618408, "logps/rejected": -1.6270854473114014, "loss": 1.3407, "nll_loss": 1.1797670125961304, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06561677157878876, "rewards/margins": 0.01573750749230385, "rewards/rejected": -0.08135427534580231, "step": 3220 }, { "epoch": 1.8855808523058961, "grad_norm": 1.2759617792201994, "learning_rate": 8.797691788472336e-07, "log_odds_chosen": 0.5755179524421692, "log_odds_ratio": -0.516478419303894, "logits/chosen": -3.6755073070526123, "logits/rejected": -3.7259812355041504, "logps/chosen": -1.491429328918457, "logps/rejected": -1.8879388570785522, "loss": 1.4886, "nll_loss": 1.4534341096878052, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07457147538661957, "rewards/margins": 0.01982547529041767, "rewards/rejected": -0.09439694136381149, "step": 3230 }, { "epoch": 1.8914185639229422, "grad_norm": 1.2163203695744826, "learning_rate": 8.784104611578832e-07, "log_odds_chosen": 0.03782624751329422, "log_odds_ratio": -0.6955235004425049, "logits/chosen": -3.3403923511505127, "logits/rejected": -3.5161705017089844, "logps/chosen": -1.5381957292556763, "logps/rejected": -1.5801900625228882, "loss": 1.316, "nll_loss": 1.4614856243133545, "rewards/accuracies": 0.5, "rewards/chosen": -0.07690979540348053, "rewards/margins": 0.002099717501550913, "rewards/rejected": -0.07900950312614441, "step": 3240 }, { "epoch": 1.8972562755399882, "grad_norm": 1.6653661625341387, "learning_rate": 8.770580193070293e-07, "log_odds_chosen": 0.3862825334072113, "log_odds_ratio": -0.5477957129478455, "logits/chosen": -3.4675850868225098, "logits/rejected": -3.5225131511688232, "logps/chosen": -1.3336288928985596, "logps/rejected": -1.5926841497421265, "loss": 1.3781, "nll_loss": 1.379119873046875, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06668144464492798, "rewards/margins": 0.012952776625752449, "rewards/rejected": -0.07963421195745468, "step": 3250 }, { "epoch": 1.9030939871570345, "grad_norm": 1.602426701079066, "learning_rate": 8.757118051300735e-07, "log_odds_chosen": 0.3159894347190857, "log_odds_ratio": -0.5781133770942688, "logits/chosen": -3.893644332885742, "logits/rejected": -3.7758166790008545, "logps/chosen": -1.3617045879364014, "logps/rejected": -1.5929458141326904, "loss": 1.3905, "nll_loss": 1.3738027811050415, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06808523088693619, "rewards/margins": 0.01156205777078867, "rewards/rejected": -0.07964728772640228, "step": 3260 }, { "epoch": 1.9089316987740805, "grad_norm": 1.4715891185672871, "learning_rate": 8.743717709783363e-07, "log_odds_chosen": 0.5264045000076294, "log_odds_ratio": -0.5166357755661011, "logits/chosen": -3.4007325172424316, "logits/rejected": -3.6659111976623535, "logps/chosen": -1.1596753597259521, "logps/rejected": -1.4814012050628662, "loss": 1.3909, "nll_loss": 1.2409056425094604, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.057983774691820145, "rewards/margins": 0.016086284071207047, "rewards/rejected": -0.07407005876302719, "step": 3270 }, { "epoch": 1.9147694103911266, "grad_norm": 1.8330343580841697, "learning_rate": 8.730378697119729e-07, "log_odds_chosen": 0.2302229404449463, "log_odds_ratio": -0.605810284614563, "logits/chosen": -3.701902389526367, "logits/rejected": -3.7556540966033936, "logps/chosen": -1.628993272781372, "logps/rejected": -1.8200536966323853, "loss": 1.4475, "nll_loss": 1.490070104598999, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08144966512918472, "rewards/margins": 0.00955302082002163, "rewards/rejected": -0.0910026878118515, "step": 3280 }, { "epoch": 1.9206071220081729, "grad_norm": 1.8260104157961528, "learning_rate": 8.717100546930084e-07, "log_odds_chosen": 0.21948346495628357, "log_odds_ratio": -0.6082640886306763, "logits/chosen": -3.679772138595581, "logits/rejected": -3.468172550201416, "logps/chosen": -1.4074385166168213, "logps/rejected": -1.5865525007247925, "loss": 1.466, "nll_loss": 1.5339876413345337, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07037192583084106, "rewards/margins": 0.00895569659769535, "rewards/rejected": -0.07932762801647186, "step": 3290 }, { "epoch": 1.926444833625219, "grad_norm": 1.5635417652261625, "learning_rate": 8.703882797784894e-07, "log_odds_chosen": 0.11329708993434906, "log_odds_ratio": -0.6721440553665161, "logits/chosen": -3.535184860229492, "logits/rejected": -3.7379443645477295, "logps/chosen": -1.40912926197052, "logps/rejected": -1.5139873027801514, "loss": 1.3541, "nll_loss": 1.445618748664856, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07045646011829376, "rewards/margins": 0.0052429125644266605, "rewards/rejected": -0.07569937407970428, "step": 3300 }, { "epoch": 1.932282545242265, "grad_norm": 2.176334538725283, "learning_rate": 8.690724993137478e-07, "log_odds_chosen": -0.014183697290718555, "log_odds_ratio": -0.7154437899589539, "logits/chosen": -3.6487114429473877, "logits/rejected": -3.6364002227783203, "logps/chosen": -1.4908628463745117, "logps/rejected": -1.4863680601119995, "loss": 1.3542, "nll_loss": 1.3927175998687744, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.07454313337802887, "rewards/margins": -0.00022473632998298854, "rewards/rejected": -0.07431840896606445, "step": 3310 }, { "epoch": 1.9381202568593112, "grad_norm": 1.7674536555401543, "learning_rate": 8.677626681257792e-07, "log_odds_chosen": 0.5421845316886902, "log_odds_ratio": -0.4726699888706207, "logits/chosen": -3.610222578048706, "logits/rejected": -3.832785129547119, "logps/chosen": -1.1191767454147339, "logps/rejected": -1.5056393146514893, "loss": 1.4032, "nll_loss": 1.1883442401885986, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.055958837270736694, "rewards/margins": 0.01932312361896038, "rewards/rejected": -0.07528196275234222, "step": 3320 }, { "epoch": 1.9439579684763573, "grad_norm": 2.1655293211827713, "learning_rate": 8.664587415167274e-07, "log_odds_chosen": 0.11959455907344818, "log_odds_ratio": -0.6523088216781616, "logits/chosen": -3.595776319503784, "logits/rejected": -3.8391716480255127, "logps/chosen": -1.315999150276184, "logps/rejected": -1.3993362188339233, "loss": 1.2676, "nll_loss": 1.3520652055740356, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06579995900392532, "rewards/margins": 0.004166848957538605, "rewards/rejected": -0.06996681541204453, "step": 3330 }, { "epoch": 1.9497956800934033, "grad_norm": 0.9353521616215179, "learning_rate": 8.651606752574786e-07, "log_odds_chosen": 0.4048880636692047, "log_odds_ratio": -0.5330785512924194, "logits/chosen": -3.36864972114563, "logits/rejected": -3.772218704223633, "logps/chosen": -1.2041218280792236, "logps/rejected": -1.4776265621185303, "loss": 1.2835, "nll_loss": 1.1992894411087036, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0602060966193676, "rewards/margins": 0.013675238005816936, "rewards/rejected": -0.07388134300708771, "step": 3340 }, { "epoch": 1.9556333917104496, "grad_norm": 1.4007984036279175, "learning_rate": 8.638684255813602e-07, "log_odds_chosen": 0.5986078977584839, "log_odds_ratio": -0.483790785074234, "logits/chosen": -3.732468843460083, "logits/rejected": -3.9322261810302734, "logps/chosen": -1.332970142364502, "logps/rejected": -1.7515175342559814, "loss": 1.3591, "nll_loss": 1.4074921607971191, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06664850562810898, "rewards/margins": 0.02092736028134823, "rewards/rejected": -0.08757587522268295, "step": 3350 }, { "epoch": 1.9614711033274956, "grad_norm": 1.0604515579982727, "learning_rate": 8.625819491779427e-07, "log_odds_chosen": 0.44171327352523804, "log_odds_ratio": -0.5260747075080872, "logits/chosen": -3.6038849353790283, "logits/rejected": -3.7031402587890625, "logps/chosen": -1.3288795948028564, "logps/rejected": -1.6737924814224243, "loss": 1.5199, "nll_loss": 1.2797038555145264, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06644398719072342, "rewards/margins": 0.017245642840862274, "rewards/rejected": -0.0836896151304245, "step": 3360 }, { "epoch": 1.9673088149445417, "grad_norm": 1.5913010927817774, "learning_rate": 8.613012031869432e-07, "log_odds_chosen": 0.29263177514076233, "log_odds_ratio": -0.6443699598312378, "logits/chosen": -3.1635916233062744, "logits/rejected": -3.5083088874816895, "logps/chosen": -1.431062936782837, "logps/rejected": -1.6213613748550415, "loss": 1.3798, "nll_loss": 1.3398135900497437, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07155314832925797, "rewards/margins": 0.009514921344816685, "rewards/rejected": -0.08106806129217148, "step": 3370 }, { "epoch": 1.973146526561588, "grad_norm": 1.115217664534256, "learning_rate": 8.600261451922269e-07, "log_odds_chosen": 0.36544376611709595, "log_odds_ratio": -0.5356424450874329, "logits/chosen": -3.7846920490264893, "logits/rejected": -3.8889851570129395, "logps/chosen": -1.4162582159042358, "logps/rejected": -1.7099082469940186, "loss": 1.3659, "nll_loss": 1.336963415145874, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07081291824579239, "rewards/margins": 0.014682503417134285, "rewards/rejected": -0.08549542725086212, "step": 3380 }, { "epoch": 1.978984238178634, "grad_norm": 0.9937820670127353, "learning_rate": 8.587567332159079e-07, "log_odds_chosen": 0.5218592882156372, "log_odds_ratio": -0.5526521801948547, "logits/chosen": -3.472196578979492, "logits/rejected": -3.7405314445495605, "logps/chosen": -1.2001979351043701, "logps/rejected": -1.5130959749221802, "loss": 1.33, "nll_loss": 1.2340190410614014, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06000988930463791, "rewards/margins": 0.015644896775484085, "rewards/rejected": -0.07565478980541229, "step": 3390 }, { "epoch": 1.98482194979568, "grad_norm": 1.3632456807546827, "learning_rate": 8.574929257125441e-07, "log_odds_chosen": 0.4123097360134125, "log_odds_ratio": -0.5478167533874512, "logits/chosen": -3.5182907581329346, "logits/rejected": -3.792470932006836, "logps/chosen": -1.4127540588378906, "logps/rejected": -1.7218072414398193, "loss": 1.3698, "nll_loss": 1.3835546970367432, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07063769549131393, "rewards/margins": 0.015452663414180279, "rewards/rejected": -0.08609036356210709, "step": 3400 }, { "epoch": 1.9906596614127263, "grad_norm": 1.3593150652820793, "learning_rate": 8.562346815634272e-07, "log_odds_chosen": 0.38279253244400024, "log_odds_ratio": -0.556079089641571, "logits/chosen": -3.8272647857666016, "logits/rejected": -3.737560987472534, "logps/chosen": -1.2767606973648071, "logps/rejected": -1.587856411933899, "loss": 1.3693, "nll_loss": 1.331160068511963, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06383801996707916, "rewards/margins": 0.015554791316390038, "rewards/rejected": -0.07939281314611435, "step": 3410 }, { "epoch": 1.9964973730297724, "grad_norm": 1.4167651802415482, "learning_rate": 8.549819600709619e-07, "log_odds_chosen": 0.15318048000335693, "log_odds_ratio": -0.6462687253952026, "logits/chosen": -3.374661922454834, "logits/rejected": -3.565213680267334, "logps/chosen": -1.268017053604126, "logps/rejected": -1.3932969570159912, "loss": 1.3612, "nll_loss": 1.231384038925171, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06340084969997406, "rewards/margins": 0.006263992749154568, "rewards/rejected": -0.0696648433804512, "step": 3420 }, { "epoch": 2.0023350846468184, "grad_norm": 1.8630237169079376, "learning_rate": 8.537347209531384e-07, "log_odds_chosen": 0.048872023820877075, "log_odds_ratio": -0.7121687531471252, "logits/chosen": -3.4395339488983154, "logits/rejected": -3.700653553009033, "logps/chosen": -1.4821237325668335, "logps/rejected": -1.5633782148361206, "loss": 1.3649, "nll_loss": 1.4192819595336914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07410618662834167, "rewards/margins": 0.004062723368406296, "rewards/rejected": -0.07816891372203827, "step": 3430 }, { "epoch": 2.0081727962638647, "grad_norm": 1.814127284804431, "learning_rate": 8.52492924338092e-07, "log_odds_chosen": 0.5351802706718445, "log_odds_ratio": -0.4980153441429138, "logits/chosen": -3.577688217163086, "logits/rejected": -3.8123645782470703, "logps/chosen": -1.3229641914367676, "logps/rejected": -1.7156559228897095, "loss": 1.3083, "nll_loss": 1.4449093341827393, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06614820659160614, "rewards/margins": 0.019634578377008438, "rewards/rejected": -0.08578278124332428, "step": 3440 }, { "epoch": 2.0140105078809105, "grad_norm": 1.0431057845184406, "learning_rate": 8.512565307587487e-07, "log_odds_chosen": 0.3946095108985901, "log_odds_ratio": -0.5324279069900513, "logits/chosen": -3.680811643600464, "logits/rejected": -3.727930784225464, "logps/chosen": -1.3217189311981201, "logps/rejected": -1.6213276386260986, "loss": 1.4617, "nll_loss": 1.3540632724761963, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06608594954013824, "rewards/margins": 0.014980427920818329, "rewards/rejected": -0.08106637746095657, "step": 3450 }, { "epoch": 2.019848219497957, "grad_norm": 0.9736731892357005, "learning_rate": 8.500255011475575e-07, "log_odds_chosen": 0.38907569646835327, "log_odds_ratio": -0.5724372267723083, "logits/chosen": -3.6448512077331543, "logits/rejected": -3.682485580444336, "logps/chosen": -1.1754862070083618, "logps/rejected": -1.4479039907455444, "loss": 1.3711, "nll_loss": 1.278760313987732, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.058774303644895554, "rewards/margins": 0.013620896264910698, "rewards/rejected": -0.0723952054977417, "step": 3460 }, { "epoch": 2.025685931115003, "grad_norm": 0.9471509453579007, "learning_rate": 8.48799796831305e-07, "log_odds_chosen": 0.557635486125946, "log_odds_ratio": -0.5058034062385559, "logits/chosen": -3.614464521408081, "logits/rejected": -3.61189603805542, "logps/chosen": -1.241998553276062, "logps/rejected": -1.6612411737442017, "loss": 1.2919, "nll_loss": 1.2096889019012451, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06209992617368698, "rewards/margins": 0.020962130278348923, "rewards/rejected": -0.0830620676279068, "step": 3470 }, { "epoch": 2.031523642732049, "grad_norm": 1.6609802104580036, "learning_rate": 8.475793795260132e-07, "log_odds_chosen": 0.505478024482727, "log_odds_ratio": -0.5027508735656738, "logits/chosen": -3.3969924449920654, "logits/rejected": -3.8347816467285156, "logps/chosen": -1.2063688039779663, "logps/rejected": -1.5644428730010986, "loss": 1.3118, "nll_loss": 1.2540374994277954, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.060318440198898315, "rewards/margins": 0.017903707921504974, "rewards/rejected": -0.07822214066982269, "step": 3480 }, { "epoch": 2.037361354349095, "grad_norm": 1.397625018671261, "learning_rate": 8.463642113319158e-07, "log_odds_chosen": 0.3708030581474304, "log_odds_ratio": -0.5468959212303162, "logits/chosen": -3.5648531913757324, "logits/rejected": -3.544677257537842, "logps/chosen": -1.2715681791305542, "logps/rejected": -1.516818642616272, "loss": 1.2701, "nll_loss": 1.2389720678329468, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06357841193675995, "rewards/margins": 0.01226252131164074, "rewards/rejected": -0.07584092766046524, "step": 3490 }, { "epoch": 2.0431990659661414, "grad_norm": 1.2298702794956484, "learning_rate": 8.451542547285166e-07, "log_odds_chosen": 0.4399194121360779, "log_odds_ratio": -0.5096272230148315, "logits/chosen": -3.6836838722229004, "logits/rejected": -3.8577017784118652, "logps/chosen": -1.4172312021255493, "logps/rejected": -1.7793182134628296, "loss": 1.399, "nll_loss": 1.4519798755645752, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0708615630865097, "rewards/margins": 0.018104348331689835, "rewards/rejected": -0.08896590769290924, "step": 3500 }, { "epoch": 2.0490367775831873, "grad_norm": 1.4615905052582112, "learning_rate": 8.439494725697223e-07, "log_odds_chosen": 0.08987633138895035, "log_odds_ratio": -0.6635407209396362, "logits/chosen": -3.8230044841766357, "logits/rejected": -3.7317824363708496, "logps/chosen": -1.4143449068069458, "logps/rejected": -1.472801923751831, "loss": 1.3043, "nll_loss": 1.3469631671905518, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07071724534034729, "rewards/margins": 0.0029228515923023224, "rewards/rejected": -0.07364009320735931, "step": 3510 }, { "epoch": 2.0548744892002335, "grad_norm": 1.5174145728836195, "learning_rate": 8.427498280790526e-07, "log_odds_chosen": 0.8295912742614746, "log_odds_ratio": -0.39577746391296387, "logits/chosen": -2.98356294631958, "logits/rejected": -3.632753849029541, "logps/chosen": -1.140950083732605, "logps/rejected": -1.6769087314605713, "loss": 1.2779, "nll_loss": 1.2862101793289185, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.05704749748110771, "rewards/margins": 0.026797935366630554, "rewards/rejected": -0.08384543657302856, "step": 3520 }, { "epoch": 2.06071220081728, "grad_norm": 1.2646471480744161, "learning_rate": 8.415552848449264e-07, "log_odds_chosen": 0.5569995641708374, "log_odds_ratio": -0.4858066439628601, "logits/chosen": -3.731384754180908, "logits/rejected": -3.6838059425354004, "logps/chosen": -1.525583267211914, "logps/rejected": -1.9724029302597046, "loss": 1.4299, "nll_loss": 1.6033687591552734, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0762791559100151, "rewards/margins": 0.022340983152389526, "rewards/rejected": -0.09862013161182404, "step": 3530 }, { "epoch": 2.0665499124343256, "grad_norm": 1.220153864225786, "learning_rate": 8.40365806816018e-07, "log_odds_chosen": 0.09821932017803192, "log_odds_ratio": -0.6560810208320618, "logits/chosen": -3.7993617057800293, "logits/rejected": -3.68473482131958, "logps/chosen": -1.6741355657577515, "logps/rejected": -1.7484209537506104, "loss": 1.3815, "nll_loss": 1.4993650913238525, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08370678126811981, "rewards/margins": 0.003714256687089801, "rewards/rejected": -0.08742103725671768, "step": 3540 }, { "epoch": 2.072387624051372, "grad_norm": 1.260142425970996, "learning_rate": 8.391813582966891e-07, "log_odds_chosen": 0.23039701581001282, "log_odds_ratio": -0.5917035341262817, "logits/chosen": -3.8085720539093018, "logits/rejected": -3.8372726440429688, "logps/chosen": -1.6289440393447876, "logps/rejected": -1.818232536315918, "loss": 1.3947, "nll_loss": 1.4954445362091064, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08144720643758774, "rewards/margins": 0.00946442224085331, "rewards/rejected": -0.0909116268157959, "step": 3550 }, { "epoch": 2.078225335668418, "grad_norm": 1.2635671291534198, "learning_rate": 8.380019039424888e-07, "log_odds_chosen": 0.531287670135498, "log_odds_ratio": -0.49267420172691345, "logits/chosen": -3.429736614227295, "logits/rejected": -3.560584306716919, "logps/chosen": -1.275709867477417, "logps/rejected": -1.6342052221298218, "loss": 1.3612, "nll_loss": 1.2413498163223267, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06378548592329025, "rewards/margins": 0.017924780026078224, "rewards/rejected": -0.08171026408672333, "step": 3560 }, { "epoch": 2.084063047285464, "grad_norm": 1.2827125286608283, "learning_rate": 8.368274087557231e-07, "log_odds_chosen": 0.25495582818984985, "log_odds_ratio": -0.657101035118103, "logits/chosen": -3.5291683673858643, "logits/rejected": -3.709247589111328, "logps/chosen": -1.2155969142913818, "logps/rejected": -1.3832242488861084, "loss": 1.4259, "nll_loss": 1.4657541513442993, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06077985092997551, "rewards/margins": 0.008381364867091179, "rewards/rejected": -0.06916122138500214, "step": 3570 }, { "epoch": 2.0899007589025103, "grad_norm": 1.3649811346429455, "learning_rate": 8.356578380810946e-07, "log_odds_chosen": 0.40865784883499146, "log_odds_ratio": -0.5826531052589417, "logits/chosen": -3.395352602005005, "logits/rejected": -3.588895082473755, "logps/chosen": -0.9783341288566589, "logps/rejected": -1.2938218116760254, "loss": 1.3236, "nll_loss": 1.1811199188232422, "rewards/accuracies": 0.5, "rewards/chosen": -0.04891670495271683, "rewards/margins": 0.015774380415678024, "rewards/rejected": -0.06469108909368515, "step": 3580 }, { "epoch": 2.0957384705195565, "grad_norm": 1.3096201372751122, "learning_rate": 8.344931576014064e-07, "log_odds_chosen": 0.6784507632255554, "log_odds_ratio": -0.4960360527038574, "logits/chosen": -3.46869158744812, "logits/rejected": -3.3875679969787598, "logps/chosen": -1.2796432971954346, "logps/rejected": -1.7600829601287842, "loss": 1.4511, "nll_loss": 1.318325400352478, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06398217380046844, "rewards/margins": 0.024021979421377182, "rewards/rejected": -0.08800414949655533, "step": 3590 }, { "epoch": 2.1015761821366024, "grad_norm": 1.0083421077349053, "learning_rate": 8.333333333333333e-07, "log_odds_chosen": 0.36845219135284424, "log_odds_ratio": -0.5422554016113281, "logits/chosen": -3.664034366607666, "logits/rejected": -3.8817315101623535, "logps/chosen": -1.3968976736068726, "logps/rejected": -1.687673807144165, "loss": 1.3262, "nll_loss": 1.3390676975250244, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06984489411115646, "rewards/margins": 0.014538794755935669, "rewards/rejected": -0.08438368886709213, "step": 3600 }, { "epoch": 2.1074138937536486, "grad_norm": 1.0392966268407253, "learning_rate": 8.321783316232578e-07, "log_odds_chosen": 0.03545196354389191, "log_odds_ratio": -0.6979047060012817, "logits/chosen": -3.727281093597412, "logits/rejected": -3.650019884109497, "logps/chosen": -1.320505142211914, "logps/rejected": -1.375327229499817, "loss": 1.3291, "nll_loss": 1.2454499006271362, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0660252645611763, "rewards/margins": 0.0027411053888499737, "rewards/rejected": -0.06876637041568756, "step": 3610 }, { "epoch": 2.113251605370695, "grad_norm": 1.1200865595168674, "learning_rate": 8.310281191431671e-07, "log_odds_chosen": 0.31994399428367615, "log_odds_ratio": -0.5863881707191467, "logits/chosen": -3.6258456707000732, "logits/rejected": -3.8620238304138184, "logps/chosen": -1.3940349817276, "logps/rejected": -1.6477559804916382, "loss": 1.4136, "nll_loss": 1.4304087162017822, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06970174610614777, "rewards/margins": 0.012686056084930897, "rewards/rejected": -0.08238780498504639, "step": 3620 }, { "epoch": 2.1190893169877407, "grad_norm": 0.8863096074794962, "learning_rate": 8.298826628866154e-07, "log_odds_chosen": 0.15754401683807373, "log_odds_ratio": -0.643571674823761, "logits/chosen": -3.506723403930664, "logits/rejected": -3.6435649394989014, "logps/chosen": -1.577675223350525, "logps/rejected": -1.7038419246673584, "loss": 1.381, "nll_loss": 1.5068562030792236, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07888376712799072, "rewards/margins": 0.006308333482593298, "rewards/rejected": -0.08519209921360016, "step": 3630 }, { "epoch": 2.124927028604787, "grad_norm": 1.0734058093831005, "learning_rate": 8.287419301647449e-07, "log_odds_chosen": 0.36750227212905884, "log_odds_ratio": -0.5673590302467346, "logits/chosen": -3.6536495685577393, "logits/rejected": -3.7383663654327393, "logps/chosen": -1.2911591529846191, "logps/rejected": -1.584813117980957, "loss": 1.3598, "nll_loss": 1.3511945009231567, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06455795466899872, "rewards/margins": 0.014682704582810402, "rewards/rejected": -0.07924065738916397, "step": 3640 }, { "epoch": 2.1307647402218333, "grad_norm": 1.4116710271436417, "learning_rate": 8.27605888602368e-07, "log_odds_chosen": 0.8098222017288208, "log_odds_ratio": -0.40129566192626953, "logits/chosen": -3.3003921508789062, "logits/rejected": -3.6341748237609863, "logps/chosen": -1.2020775079727173, "logps/rejected": -1.7571001052856445, "loss": 1.3144, "nll_loss": 1.220395803451538, "rewards/accuracies": 1.0, "rewards/chosen": -0.060103874653577805, "rewards/margins": 0.027751142159104347, "rewards/rejected": -0.08785500377416611, "step": 3650 }, { "epoch": 2.136602451838879, "grad_norm": 0.9708296241702945, "learning_rate": 8.264745061341079e-07, "log_odds_chosen": -0.08058247715234756, "log_odds_ratio": -0.8060529828071594, "logits/chosen": -3.497795820236206, "logits/rejected": -3.513354539871216, "logps/chosen": -1.541185975074768, "logps/rejected": -1.5483167171478271, "loss": 1.3504, "nll_loss": 1.4931577444076538, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.077059306204319, "rewards/margins": 0.0003565371152944863, "rewards/rejected": -0.0774158388376236, "step": 3660 }, { "epoch": 2.1424401634559254, "grad_norm": 1.6104528203194608, "learning_rate": 8.253477510005973e-07, "log_odds_chosen": 0.44794636964797974, "log_odds_ratio": -0.5383803248405457, "logits/chosen": -3.600461959838867, "logits/rejected": -3.5781989097595215, "logps/chosen": -1.441270112991333, "logps/rejected": -1.7673368453979492, "loss": 1.3452, "nll_loss": 1.35562264919281, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07206349074840546, "rewards/margins": 0.01630334183573723, "rewards/rejected": -0.08836683630943298, "step": 3670 }, { "epoch": 2.148277875072971, "grad_norm": 1.9789275651202989, "learning_rate": 8.24225591744734e-07, "log_odds_chosen": 0.13841824233531952, "log_odds_ratio": -0.6488375663757324, "logits/chosen": -3.4049975872039795, "logits/rejected": -3.6192803382873535, "logps/chosen": -1.2600632905960083, "logps/rejected": -1.3558993339538574, "loss": 1.2734, "nll_loss": 1.2531174421310425, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06300316005945206, "rewards/margins": 0.004791805054992437, "rewards/rejected": -0.06779496371746063, "step": 3680 }, { "epoch": 2.1541155866900175, "grad_norm": 1.0347147616006613, "learning_rate": 8.231079972079914e-07, "log_odds_chosen": 0.11772646009922028, "log_odds_ratio": -0.6443933248519897, "logits/chosen": -3.8295674324035645, "logits/rejected": -3.813927173614502, "logps/chosen": -1.4735829830169678, "logps/rejected": -1.563428282737732, "loss": 1.4288, "nll_loss": 1.454297661781311, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07367914915084839, "rewards/margins": 0.004492264706641436, "rewards/rejected": -0.07817141711711884, "step": 3690 }, { "epoch": 2.1599532983070637, "grad_norm": 1.3400460153702618, "learning_rate": 8.219949365267865e-07, "log_odds_chosen": 0.3447696268558502, "log_odds_ratio": -0.5511735677719116, "logits/chosen": -3.661748170852661, "logits/rejected": -3.7449212074279785, "logps/chosen": -1.229616403579712, "logps/rejected": -1.4711929559707642, "loss": 1.3472, "nll_loss": 1.290352463722229, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06148082762956619, "rewards/margins": 0.012078830040991306, "rewards/rejected": -0.07355964928865433, "step": 3700 }, { "epoch": 2.1657910099241096, "grad_norm": 2.337708353377245, "learning_rate": 8.208863791288982e-07, "log_odds_chosen": 0.3928822875022888, "log_odds_ratio": -0.5413030385971069, "logits/chosen": -3.316314220428467, "logits/rejected": -3.689746379852295, "logps/chosen": -1.2039897441864014, "logps/rejected": -1.4859472513198853, "loss": 1.2742, "nll_loss": 1.2176729440689087, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06019948795437813, "rewards/margins": 0.014097881503403187, "rewards/rejected": -0.07429736107587814, "step": 3710 }, { "epoch": 2.171628721541156, "grad_norm": 1.5900098777684584, "learning_rate": 8.197822947299412e-07, "log_odds_chosen": 0.28527921438217163, "log_odds_ratio": -0.5816302299499512, "logits/chosen": -3.7909722328186035, "logits/rejected": -3.9739508628845215, "logps/chosen": -1.3698878288269043, "logps/rejected": -1.564864158630371, "loss": 1.3243, "nll_loss": 1.3350427150726318, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06849439442157745, "rewards/margins": 0.009748813696205616, "rewards/rejected": -0.0782432109117508, "step": 3720 }, { "epoch": 2.177466433158202, "grad_norm": 1.8533371772008824, "learning_rate": 8.186826533298912e-07, "log_odds_chosen": 0.277137815952301, "log_odds_ratio": -0.5706073045730591, "logits/chosen": -3.858438014984131, "logits/rejected": -3.7105515003204346, "logps/chosen": -1.3900673389434814, "logps/rejected": -1.599936604499817, "loss": 1.4236, "nll_loss": 1.4558546543121338, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06950335949659348, "rewards/margins": 0.010493464767932892, "rewards/rejected": -0.07999683171510696, "step": 3730 }, { "epoch": 2.183304144775248, "grad_norm": 1.5094609723067725, "learning_rate": 8.175874252096609e-07, "log_odds_chosen": 0.5799206495285034, "log_odds_ratio": -0.4648856520652771, "logits/chosen": -3.410557270050049, "logits/rejected": -3.8509762287139893, "logps/chosen": -1.114579439163208, "logps/rejected": -1.5132566690444946, "loss": 1.3136, "nll_loss": 1.2295008897781372, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.055728979408741, "rewards/margins": 0.019933853298425674, "rewards/rejected": -0.07566282898187637, "step": 3740 }, { "epoch": 2.189141856392294, "grad_norm": 1.062813122886526, "learning_rate": 8.164965809277262e-07, "log_odds_chosen": 0.18967720866203308, "log_odds_ratio": -0.643785834312439, "logits/chosen": -3.6518776416778564, "logits/rejected": -3.6462180614471436, "logps/chosen": -1.2574691772460938, "logps/rejected": -1.3544132709503174, "loss": 1.3675, "nll_loss": 1.2729402780532837, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0628734678030014, "rewards/margins": 0.004847207106649876, "rewards/rejected": -0.06772066652774811, "step": 3750 }, { "epoch": 2.1949795680093405, "grad_norm": 1.1627828812681185, "learning_rate": 8.154100913168028e-07, "log_odds_chosen": 0.11554882675409317, "log_odds_ratio": -0.6644772291183472, "logits/chosen": -3.5889573097229004, "logits/rejected": -3.9045186042785645, "logps/chosen": -1.291551947593689, "logps/rejected": -1.3694874048233032, "loss": 1.3243, "nll_loss": 1.2600188255310059, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06457759439945221, "rewards/margins": 0.003896772861480713, "rewards/rejected": -0.06847437471151352, "step": 3760 }, { "epoch": 2.2008172796263863, "grad_norm": 2.0267633367162716, "learning_rate": 8.143279274805705e-07, "log_odds_chosen": 0.32095831632614136, "log_odds_ratio": -0.6150846481323242, "logits/chosen": -3.2831318378448486, "logits/rejected": -3.7310280799865723, "logps/chosen": -1.2436717748641968, "logps/rejected": -1.4600882530212402, "loss": 1.3891, "nll_loss": 1.374240517616272, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06218358874320984, "rewards/margins": 0.010820824652910233, "rewards/rejected": -0.07300440967082977, "step": 3770 }, { "epoch": 2.2066549912434326, "grad_norm": 1.3008779789373062, "learning_rate": 8.132500607904444e-07, "log_odds_chosen": 0.23689822852611542, "log_odds_ratio": -0.6119979023933411, "logits/chosen": -3.6875782012939453, "logits/rejected": -3.81459379196167, "logps/chosen": -1.3772327899932861, "logps/rejected": -1.5513728857040405, "loss": 1.3448, "nll_loss": 1.3502830266952515, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06886164098978043, "rewards/margins": 0.008707010187208652, "rewards/rejected": -0.0775686651468277, "step": 3780 }, { "epoch": 2.212492702860479, "grad_norm": 1.553355385271789, "learning_rate": 8.12176462882395e-07, "log_odds_chosen": 0.3688274025917053, "log_odds_ratio": -0.5420976877212524, "logits/chosen": -3.516634464263916, "logits/rejected": -3.5515289306640625, "logps/chosen": -1.2487331628799438, "logps/rejected": -1.5325748920440674, "loss": 1.3761, "nll_loss": 1.243917465209961, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06243665888905525, "rewards/margins": 0.014192087575793266, "rewards/rejected": -0.07662875205278397, "step": 3790 }, { "epoch": 2.2183304144775247, "grad_norm": 2.0440589606575874, "learning_rate": 8.111071056538128e-07, "log_odds_chosen": 0.23704901337623596, "log_odds_ratio": -0.6295875310897827, "logits/chosen": -3.365180253982544, "logits/rejected": -3.826443910598755, "logps/chosen": -1.2456001043319702, "logps/rejected": -1.381035327911377, "loss": 1.323, "nll_loss": 1.2859290838241577, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06228000670671463, "rewards/margins": 0.0067717647179961205, "rewards/rejected": -0.06905176490545273, "step": 3800 }, { "epoch": 2.224168126094571, "grad_norm": 1.4348831941841715, "learning_rate": 8.100419612604182e-07, "log_odds_chosen": 0.2958415150642395, "log_odds_ratio": -0.5811907052993774, "logits/chosen": -3.848050594329834, "logits/rejected": -3.8835113048553467, "logps/chosen": -1.4454946517944336, "logps/rejected": -1.6646686792373657, "loss": 1.4546, "nll_loss": 1.3531386852264404, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07227472960948944, "rewards/margins": 0.010958692990243435, "rewards/rejected": -0.08323343098163605, "step": 3810 }, { "epoch": 2.230005837711617, "grad_norm": 1.4308669326175958, "learning_rate": 8.08981002113217e-07, "log_odds_chosen": 0.4358106553554535, "log_odds_ratio": -0.5410605072975159, "logits/chosen": -3.6193413734436035, "logits/rejected": -3.9143505096435547, "logps/chosen": -1.316807508468628, "logps/rejected": -1.6073909997940063, "loss": 1.411, "nll_loss": 1.329372763633728, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06584037840366364, "rewards/margins": 0.014529171399772167, "rewards/rejected": -0.08036954700946808, "step": 3820 }, { "epoch": 2.235843549328663, "grad_norm": 1.250854338202623, "learning_rate": 8.079242008754989e-07, "log_odds_chosen": 0.6285132169723511, "log_odds_ratio": -0.47512131929397583, "logits/chosen": -3.5449302196502686, "logits/rejected": -3.8883426189422607, "logps/chosen": -1.1251137256622314, "logps/rejected": -1.5246002674102783, "loss": 1.3671, "nll_loss": 1.2232335805892944, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05625568702816963, "rewards/margins": 0.019974328577518463, "rewards/rejected": -0.0762300118803978, "step": 3830 }, { "epoch": 2.2416812609457093, "grad_norm": 1.1345967122678697, "learning_rate": 8.068715304598786e-07, "log_odds_chosen": 0.46162766218185425, "log_odds_ratio": -0.5108269453048706, "logits/chosen": -3.692758560180664, "logits/rejected": -3.7583394050598145, "logps/chosen": -1.2011289596557617, "logps/rejected": -1.5554178953170776, "loss": 1.3413, "nll_loss": 1.2346752882003784, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06005645543336868, "rewards/margins": 0.017714442685246468, "rewards/rejected": -0.0777709037065506, "step": 3840 }, { "epoch": 2.2475189725627556, "grad_norm": 1.3147728162978147, "learning_rate": 8.058229640253803e-07, "log_odds_chosen": 0.1319154053926468, "log_odds_ratio": -0.6848911046981812, "logits/chosen": -3.3080577850341797, "logits/rejected": -3.372044324874878, "logps/chosen": -1.3269727230072021, "logps/rejected": -1.440026044845581, "loss": 1.309, "nll_loss": 1.2361667156219482, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06634863466024399, "rewards/margins": 0.005652659572660923, "rewards/rejected": -0.07200130075216293, "step": 3850 }, { "epoch": 2.2533566841798014, "grad_norm": 0.9886874880170179, "learning_rate": 8.047784749745631e-07, "log_odds_chosen": 0.6582465171813965, "log_odds_ratio": -0.4920591413974762, "logits/chosen": -3.477421522140503, "logits/rejected": -3.5662131309509277, "logps/chosen": -1.105654239654541, "logps/rejected": -1.5293926000595093, "loss": 1.3973, "nll_loss": 1.1843942403793335, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05528271198272705, "rewards/margins": 0.021186914294958115, "rewards/rejected": -0.07646962255239487, "step": 3860 }, { "epoch": 2.2591943957968477, "grad_norm": 1.5962371841343461, "learning_rate": 8.03738036950687e-07, "log_odds_chosen": 0.0485956184566021, "log_odds_ratio": -0.6962941884994507, "logits/chosen": -3.722119092941284, "logits/rejected": -3.806636095046997, "logps/chosen": -1.3683503866195679, "logps/rejected": -1.4116251468658447, "loss": 1.3077, "nll_loss": 1.3671882152557373, "rewards/accuracies": 0.5, "rewards/chosen": -0.0684175193309784, "rewards/margins": 0.0021637328900396824, "rewards/rejected": -0.07058124244213104, "step": 3870 }, { "epoch": 2.265032107413894, "grad_norm": 1.9579908431770794, "learning_rate": 8.027016238349195e-07, "log_odds_chosen": 0.5312049388885498, "log_odds_ratio": -0.4794917702674866, "logits/chosen": -3.4747490882873535, "logits/rejected": -3.849306106567383, "logps/chosen": -1.2152155637741089, "logps/rejected": -1.5942614078521729, "loss": 1.2994, "nll_loss": 1.230672001838684, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.060760773718357086, "rewards/margins": 0.018952293321490288, "rewards/rejected": -0.07971307635307312, "step": 3880 }, { "epoch": 2.2708698190309398, "grad_norm": 1.0270874112911725, "learning_rate": 8.016692097435824e-07, "log_odds_chosen": 0.1215522438287735, "log_odds_ratio": -0.6791900992393494, "logits/chosen": -3.445002317428589, "logits/rejected": -3.4945099353790283, "logps/chosen": -1.3262484073638916, "logps/rejected": -1.4114497900009155, "loss": 1.3564, "nll_loss": 1.2872954607009888, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06631241738796234, "rewards/margins": 0.004260072950273752, "rewards/rejected": -0.07057248055934906, "step": 3890 }, { "epoch": 2.276707530647986, "grad_norm": 0.997891091092135, "learning_rate": 8.006407690254357e-07, "log_odds_chosen": 0.23851275444030762, "log_odds_ratio": -0.6183353662490845, "logits/chosen": -3.451474666595459, "logits/rejected": -3.5941073894500732, "logps/chosen": -1.398550271987915, "logps/rejected": -1.555262804031372, "loss": 1.3782, "nll_loss": 1.3479764461517334, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06992751359939575, "rewards/margins": 0.007835631258785725, "rewards/rejected": -0.0777631476521492, "step": 3900 }, { "epoch": 2.282545242265032, "grad_norm": 1.042774476976744, "learning_rate": 7.996162762590016e-07, "log_odds_chosen": 0.3154487609863281, "log_odds_ratio": -0.5689778327941895, "logits/chosen": -3.7728991508483887, "logits/rejected": -3.715397357940674, "logps/chosen": -1.4037415981292725, "logps/rejected": -1.6214182376861572, "loss": 1.3674, "nll_loss": 1.3330938816070557, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07018707692623138, "rewards/margins": 0.01088382862508297, "rewards/rejected": -0.0810708999633789, "step": 3910 }, { "epoch": 2.288382953882078, "grad_norm": 2.0098221371609597, "learning_rate": 7.98595706249925e-07, "log_odds_chosen": 0.04333147034049034, "log_odds_ratio": -0.7511554956436157, "logits/chosen": -3.5058670043945312, "logits/rejected": -3.568639039993286, "logps/chosen": -1.252348780632019, "logps/rejected": -1.3051707744598389, "loss": 1.3343, "nll_loss": 1.2446309328079224, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06261744350194931, "rewards/margins": 0.0026410971768200397, "rewards/rejected": -0.06525853276252747, "step": 3920 }, { "epoch": 2.2942206654991244, "grad_norm": 1.7268687499079376, "learning_rate": 7.975790340283705e-07, "log_odds_chosen": 0.06851113587617874, "log_odds_ratio": -0.711614727973938, "logits/chosen": -3.8367366790771484, "logits/rejected": -3.844210147857666, "logps/chosen": -1.4972248077392578, "logps/rejected": -1.5852679014205933, "loss": 1.3468, "nll_loss": 1.4060014486312866, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07486124336719513, "rewards/margins": 0.00440216064453125, "rewards/rejected": -0.07926340401172638, "step": 3930 }, { "epoch": 2.3000583771161702, "grad_norm": 0.9997590733749804, "learning_rate": 7.965662348464579e-07, "log_odds_chosen": 0.5321112871170044, "log_odds_ratio": -0.49938860535621643, "logits/chosen": -3.6841254234313965, "logits/rejected": -3.742030620574951, "logps/chosen": -1.174142599105835, "logps/rejected": -1.572761058807373, "loss": 1.2713, "nll_loss": 1.126697301864624, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.058707136660814285, "rewards/margins": 0.019930921494960785, "rewards/rejected": -0.07863805443048477, "step": 3940 }, { "epoch": 2.3058960887332165, "grad_norm": 1.1264821073883426, "learning_rate": 7.9555728417573e-07, "log_odds_chosen": 0.21614162623882294, "log_odds_ratio": -0.6329764127731323, "logits/chosen": -3.5509209632873535, "logits/rejected": -3.7913641929626465, "logps/chosen": -1.2998569011688232, "logps/rejected": -1.4491976499557495, "loss": 1.2933, "nll_loss": 1.3493707180023193, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06499285250902176, "rewards/margins": 0.007467030547559261, "rewards/rejected": -0.072459876537323, "step": 3950 }, { "epoch": 2.3117338003502628, "grad_norm": 1.473849800250576, "learning_rate": 7.945521577046602e-07, "log_odds_chosen": 0.5916136503219604, "log_odds_ratio": -0.5277214646339417, "logits/chosen": -3.5552101135253906, "logits/rejected": -3.4843227863311768, "logps/chosen": -1.488039255142212, "logps/rejected": -1.947206735610962, "loss": 1.4383, "nll_loss": 1.4417433738708496, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07440195977687836, "rewards/margins": 0.02295837178826332, "rewards/rejected": -0.09736034274101257, "step": 3960 }, { "epoch": 2.3175715119673086, "grad_norm": 1.144774089149527, "learning_rate": 7.935508313361897e-07, "log_odds_chosen": 0.33232083916664124, "log_odds_ratio": -0.5494436025619507, "logits/chosen": -3.5180907249450684, "logits/rejected": -3.927680253982544, "logps/chosen": -1.24396550655365, "logps/rejected": -1.4733179807662964, "loss": 1.3529, "nll_loss": 1.2594363689422607, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06219828873872757, "rewards/margins": 0.01146761979907751, "rewards/rejected": -0.07366590946912766, "step": 3970 }, { "epoch": 2.323409223584355, "grad_norm": 1.031839491071443, "learning_rate": 7.925532811853019e-07, "log_odds_chosen": 0.3270833492279053, "log_odds_ratio": -0.5639451742172241, "logits/chosen": -3.7911548614501953, "logits/rejected": -3.813647747039795, "logps/chosen": -1.4673714637756348, "logps/rejected": -1.7170436382293701, "loss": 1.358, "nll_loss": 1.3955252170562744, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07336857914924622, "rewards/margins": 0.012483599595725536, "rewards/rejected": -0.08585218340158463, "step": 3980 }, { "epoch": 2.329246935201401, "grad_norm": 1.4908853195946064, "learning_rate": 7.915594835766295e-07, "log_odds_chosen": 0.353826105594635, "log_odds_ratio": -0.5449894666671753, "logits/chosen": -3.506638765335083, "logits/rejected": -3.6564040184020996, "logps/chosen": -1.2429616451263428, "logps/rejected": -1.4934203624725342, "loss": 1.2968, "nll_loss": 1.2382066249847412, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0621480830013752, "rewards/margins": 0.012522941455245018, "rewards/rejected": -0.07467101514339447, "step": 3990 }, { "epoch": 2.335084646818447, "grad_norm": 1.8486880788628435, "learning_rate": 7.905694150420949e-07, "log_odds_chosen": 0.24476461112499237, "log_odds_ratio": -0.5875775814056396, "logits/chosen": -3.694812297821045, "logits/rejected": -3.810882091522217, "logps/chosen": -1.537548303604126, "logps/rejected": -1.7405685186386108, "loss": 1.2915, "nll_loss": 1.4232301712036133, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0768774151802063, "rewards/margins": 0.010151011869311333, "rewards/rejected": -0.08702842891216278, "step": 4000 }, { "epoch": 2.3409223584354932, "grad_norm": 1.0358889697983784, "learning_rate": 7.895830523185819e-07, "log_odds_chosen": 0.20839925110340118, "log_odds_ratio": -0.6485505104064941, "logits/chosen": -3.4973220825195312, "logits/rejected": -3.683885097503662, "logps/chosen": -1.333552360534668, "logps/rejected": -1.4812623262405396, "loss": 1.3406, "nll_loss": 1.2696678638458252, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06667762249708176, "rewards/margins": 0.007385495118796825, "rewards/rejected": -0.07406311482191086, "step": 4010 }, { "epoch": 2.3467600700525395, "grad_norm": 0.9815454680848172, "learning_rate": 7.886003723456397e-07, "log_odds_chosen": 0.6239428520202637, "log_odds_ratio": -0.501423716545105, "logits/chosen": -3.5910511016845703, "logits/rejected": -3.6289966106414795, "logps/chosen": -1.3345519304275513, "logps/rejected": -1.7665554285049438, "loss": 1.408, "nll_loss": 1.3168866634368896, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06672760099172592, "rewards/margins": 0.021600166335701942, "rewards/rejected": -0.08832775801420212, "step": 4020 }, { "epoch": 2.3525977816695853, "grad_norm": 1.115885459782697, "learning_rate": 7.876213522632199e-07, "log_odds_chosen": 0.5560516119003296, "log_odds_ratio": -0.4884907603263855, "logits/chosen": -3.458580493927002, "logits/rejected": -3.8223519325256348, "logps/chosen": -1.2626593112945557, "logps/rejected": -1.662915587425232, "loss": 1.2586, "nll_loss": 1.2418534755706787, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06313297152519226, "rewards/margins": 0.020012816414237022, "rewards/rejected": -0.08314578980207443, "step": 4030 }, { "epoch": 2.3584354932866316, "grad_norm": 1.0289028984775583, "learning_rate": 7.866459694094408e-07, "log_odds_chosen": 0.4061863422393799, "log_odds_ratio": -0.530768096446991, "logits/chosen": -3.5232510566711426, "logits/rejected": -3.612356662750244, "logps/chosen": -1.141537070274353, "logps/rejected": -1.4411685466766357, "loss": 1.3527, "nll_loss": 1.2519302368164062, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05707686021924019, "rewards/margins": 0.014981567859649658, "rewards/rejected": -0.07205842435359955, "step": 4040 }, { "epoch": 2.364273204903678, "grad_norm": 1.3000936100139653, "learning_rate": 7.856742013183862e-07, "log_odds_chosen": 0.6619856953620911, "log_odds_ratio": -0.5126733183860779, "logits/chosen": -3.524796724319458, "logits/rejected": -3.730581283569336, "logps/chosen": -1.2440799474716187, "logps/rejected": -1.7706530094146729, "loss": 1.3467, "nll_loss": 1.221119999885559, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06220399588346481, "rewards/margins": 0.02632865309715271, "rewards/rejected": -0.08853264153003693, "step": 4050 }, { "epoch": 2.3701109165207237, "grad_norm": 1.1377917404106181, "learning_rate": 7.847060257179306e-07, "log_odds_chosen": 0.2562550902366638, "log_odds_ratio": -0.601247251033783, "logits/chosen": -3.8245558738708496, "logits/rejected": -3.7503209114074707, "logps/chosen": -1.420677900314331, "logps/rejected": -1.610695481300354, "loss": 1.3589, "nll_loss": 1.4336875677108765, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07103389501571655, "rewards/margins": 0.009500885382294655, "rewards/rejected": -0.08053477108478546, "step": 4060 }, { "epoch": 2.37594862813777, "grad_norm": 1.152573573439539, "learning_rate": 7.837414205275962e-07, "log_odds_chosen": 0.1534736007452011, "log_odds_ratio": -0.6776357293128967, "logits/chosen": -3.899549961090088, "logits/rejected": -3.7218635082244873, "logps/chosen": -1.3788657188415527, "logps/rejected": -1.4635006189346313, "loss": 1.4173, "nll_loss": 1.3646899461746216, "rewards/accuracies": 0.5, "rewards/chosen": -0.06894328445196152, "rewards/margins": 0.004231743980199099, "rewards/rejected": -0.07317502796649933, "step": 4070 }, { "epoch": 2.3817863397548162, "grad_norm": 1.1049404817441175, "learning_rate": 7.82780363856437e-07, "log_odds_chosen": 0.4294154644012451, "log_odds_ratio": -0.5460227727890015, "logits/chosen": -3.634387254714966, "logits/rejected": -3.855067014694214, "logps/chosen": -1.3257923126220703, "logps/rejected": -1.6508433818817139, "loss": 1.3976, "nll_loss": 1.2967407703399658, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06628961861133575, "rewards/margins": 0.016252553090453148, "rewards/rejected": -0.08254216611385345, "step": 4080 }, { "epoch": 2.387624051371862, "grad_norm": 1.003296150245038, "learning_rate": 7.818228340009527e-07, "log_odds_chosen": 0.03641325980424881, "log_odds_ratio": -0.6900805234909058, "logits/chosen": -3.7990384101867676, "logits/rejected": -3.5646986961364746, "logps/chosen": -1.3639791011810303, "logps/rejected": -1.3803296089172363, "loss": 1.3758, "nll_loss": 1.3108468055725098, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06819895654916763, "rewards/margins": 0.0008175313705578446, "rewards/rejected": -0.0690164789557457, "step": 4090 }, { "epoch": 2.3934617629889083, "grad_norm": 1.0351243073420535, "learning_rate": 7.808688094430305e-07, "log_odds_chosen": 0.7399144172668457, "log_odds_ratio": -0.4940679967403412, "logits/chosen": -3.4940993785858154, "logits/rejected": -3.7086615562438965, "logps/chosen": -1.296553373336792, "logps/rejected": -1.8544323444366455, "loss": 1.3463, "nll_loss": 1.2989463806152344, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06482766568660736, "rewards/margins": 0.027893954887986183, "rewards/rejected": -0.0927216112613678, "step": 4100 }, { "epoch": 2.3992994746059546, "grad_norm": 1.078465570826477, "learning_rate": 7.799182688479127e-07, "log_odds_chosen": 0.4186936020851135, "log_odds_ratio": -0.5396771430969238, "logits/chosen": -3.37813138961792, "logits/rejected": -3.5883612632751465, "logps/chosen": -1.306175947189331, "logps/rejected": -1.6126264333724976, "loss": 1.2575, "nll_loss": 1.2336757183074951, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06530880182981491, "rewards/margins": 0.015322531573474407, "rewards/rejected": -0.0806313306093216, "step": 4110 }, { "epoch": 2.4051371862230004, "grad_norm": 1.3876293871369034, "learning_rate": 7.789711910621948e-07, "log_odds_chosen": 0.314229279756546, "log_odds_ratio": -0.5887566804885864, "logits/chosen": -3.524479389190674, "logits/rejected": -3.6442131996154785, "logps/chosen": -1.3849337100982666, "logps/rejected": -1.6004960536956787, "loss": 1.4316, "nll_loss": 1.3382606506347656, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06924669444561005, "rewards/margins": 0.010778116062283516, "rewards/rejected": -0.08002480119466782, "step": 4120 }, { "epoch": 2.4109748978400467, "grad_norm": 1.442333056115696, "learning_rate": 7.780275551118465e-07, "log_odds_chosen": 0.265718013048172, "log_odds_ratio": -0.6283880472183228, "logits/chosen": -3.685558319091797, "logits/rejected": -3.8577492237091064, "logps/chosen": -1.3644132614135742, "logps/rejected": -1.5676016807556152, "loss": 1.3475, "nll_loss": 1.3359495401382446, "rewards/accuracies": 0.5, "rewards/chosen": -0.06822066009044647, "rewards/margins": 0.01015942357480526, "rewards/rejected": -0.07838009297847748, "step": 4130 }, { "epoch": 2.416812609457093, "grad_norm": 1.328777046909203, "learning_rate": 7.770873402002615e-07, "log_odds_chosen": 0.4617186188697815, "log_odds_ratio": -0.5568820238113403, "logits/chosen": -3.2734534740448, "logits/rejected": -3.6214890480041504, "logps/chosen": -1.0846898555755615, "logps/rejected": -1.3882132768630981, "loss": 1.3367, "nll_loss": 1.1255323886871338, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.054234493523836136, "rewards/margins": 0.015176167711615562, "rewards/rejected": -0.06941066682338715, "step": 4140 }, { "epoch": 2.422650321074139, "grad_norm": 1.0635604549052877, "learning_rate": 7.761505257063329e-07, "log_odds_chosen": 0.7371212244033813, "log_odds_ratio": -0.43210601806640625, "logits/chosen": -3.6558661460876465, "logits/rejected": -3.7648606300354004, "logps/chosen": -1.1430461406707764, "logps/rejected": -1.6016225814819336, "loss": 1.3117, "nll_loss": 1.1580312252044678, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05715230852365494, "rewards/margins": 0.02292882278561592, "rewards/rejected": -0.08008112758398056, "step": 4150 }, { "epoch": 2.428488032691185, "grad_norm": 1.2791715773365582, "learning_rate": 7.752170911825528e-07, "log_odds_chosen": 0.22968646883964539, "log_odds_ratio": -0.6175975799560547, "logits/chosen": -3.7523446083068848, "logits/rejected": -3.632612705230713, "logps/chosen": -1.5471794605255127, "logps/rejected": -1.6875340938568115, "loss": 1.373, "nll_loss": 1.3835314512252808, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07735896855592728, "rewards/margins": 0.007017730735242367, "rewards/rejected": -0.08437670767307281, "step": 4160 }, { "epoch": 2.4343257443082313, "grad_norm": 1.0106380745058086, "learning_rate": 7.742870163531387e-07, "log_odds_chosen": 0.21971186995506287, "log_odds_ratio": -0.6078029870986938, "logits/chosen": -3.534611940383911, "logits/rejected": -3.756073474884033, "logps/chosen": -1.394864797592163, "logps/rejected": -1.5461068153381348, "loss": 1.4959, "nll_loss": 1.4088984727859497, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06974324584007263, "rewards/margins": 0.007562094368040562, "rewards/rejected": -0.07730533182621002, "step": 4170 }, { "epoch": 2.440163455925277, "grad_norm": 1.6407602222648379, "learning_rate": 7.733602811121825e-07, "log_odds_chosen": 0.4349486231803894, "log_odds_ratio": -0.5080839991569519, "logits/chosen": -3.6220545768737793, "logits/rejected": -3.941779613494873, "logps/chosen": -1.3381706476211548, "logps/rejected": -1.6605552434921265, "loss": 1.3641, "nll_loss": 1.3328360319137573, "rewards/accuracies": 1.0, "rewards/chosen": -0.06690853089094162, "rewards/margins": 0.01611923798918724, "rewards/rejected": -0.08302777260541916, "step": 4180 }, { "epoch": 2.4460011675423234, "grad_norm": 1.1839844365187027, "learning_rate": 7.724368655218262e-07, "log_odds_chosen": 0.2524792551994324, "log_odds_ratio": -0.6244614720344543, "logits/chosen": -3.5445327758789062, "logits/rejected": -3.5797276496887207, "logps/chosen": -1.178684949874878, "logps/rejected": -1.3419727087020874, "loss": 1.4079, "nll_loss": 1.2507035732269287, "rewards/accuracies": 0.5, "rewards/chosen": -0.058934248983860016, "rewards/margins": 0.008164389058947563, "rewards/rejected": -0.06709863245487213, "step": 4190 }, { "epoch": 2.4518388791593697, "grad_norm": 1.2974827209059727, "learning_rate": 7.715167498104596e-07, "log_odds_chosen": 0.20861110091209412, "log_odds_ratio": -0.6097008585929871, "logits/chosen": -3.641836166381836, "logits/rejected": -3.740596055984497, "logps/chosen": -1.4391405582427979, "logps/rejected": -1.6004644632339478, "loss": 1.3351, "nll_loss": 1.4502677917480469, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07195703685283661, "rewards/margins": 0.008066195994615555, "rewards/rejected": -0.08002322167158127, "step": 4200 }, { "epoch": 2.4576765907764155, "grad_norm": 1.7822509119642815, "learning_rate": 7.705999143709424e-07, "log_odds_chosen": 0.3374689519405365, "log_odds_ratio": -0.5523630380630493, "logits/chosen": -3.3949484825134277, "logits/rejected": -3.790358304977417, "logps/chosen": -1.2528809309005737, "logps/rejected": -1.4887603521347046, "loss": 1.2859, "nll_loss": 1.2491124868392944, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06264404952526093, "rewards/margins": 0.011793969199061394, "rewards/rejected": -0.07443802058696747, "step": 4210 }, { "epoch": 2.463514302393462, "grad_norm": 0.9896533424582, "learning_rate": 7.69686339758849e-07, "log_odds_chosen": 0.6020015478134155, "log_odds_ratio": -0.521888792514801, "logits/chosen": -3.3203253746032715, "logits/rejected": -3.596281051635742, "logps/chosen": -1.232358694076538, "logps/rejected": -1.629847764968872, "loss": 1.3374, "nll_loss": 1.199825644493103, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.061617933213710785, "rewards/margins": 0.01987444795668125, "rewards/rejected": -0.08149238675832748, "step": 4220 }, { "epoch": 2.469352014010508, "grad_norm": 1.1900754541567393, "learning_rate": 7.687760066907376e-07, "log_odds_chosen": 0.34801608324050903, "log_odds_ratio": -0.5787747502326965, "logits/chosen": -3.764467716217041, "logits/rejected": -3.887603282928467, "logps/chosen": -1.3519443273544312, "logps/rejected": -1.5474247932434082, "loss": 1.3909, "nll_loss": 1.3073453903198242, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06759722530841827, "rewards/margins": 0.009774019941687584, "rewards/rejected": -0.07737123221158981, "step": 4230 }, { "epoch": 2.475189725627554, "grad_norm": 0.9991234916343392, "learning_rate": 7.678688960424391e-07, "log_odds_chosen": 0.3661023676395416, "log_odds_ratio": -0.5927722454071045, "logits/chosen": -3.4252243041992188, "logits/rejected": -3.5149357318878174, "logps/chosen": -1.2786228656768799, "logps/rejected": -1.503296136856079, "loss": 1.3846, "nll_loss": 1.1856348514556885, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06393114477396011, "rewards/margins": 0.011233674362301826, "rewards/rejected": -0.0751648098230362, "step": 4240 }, { "epoch": 2.4810274372446, "grad_norm": 1.4422130035860623, "learning_rate": 7.669649888473705e-07, "log_odds_chosen": 0.6897584795951843, "log_odds_ratio": -0.5115848183631897, "logits/chosen": -3.308636426925659, "logits/rejected": -3.741466999053955, "logps/chosen": -1.2085676193237305, "logps/rejected": -1.653320550918579, "loss": 1.3931, "nll_loss": 1.2729769945144653, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06042838841676712, "rewards/margins": 0.022237643599510193, "rewards/rejected": -0.08266602456569672, "step": 4250 }, { "epoch": 2.4868651488616464, "grad_norm": 2.498176524114719, "learning_rate": 7.660642662948695e-07, "log_odds_chosen": 0.4389571249485016, "log_odds_ratio": -0.5380290746688843, "logits/chosen": -3.612717390060425, "logits/rejected": -3.8661227226257324, "logps/chosen": -1.4715601205825806, "logps/rejected": -1.826385259628296, "loss": 1.4388, "nll_loss": 1.50331449508667, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07357800006866455, "rewards/margins": 0.017741259187459946, "rewards/rejected": -0.09131927043199539, "step": 4260 }, { "epoch": 2.4927028604786923, "grad_norm": 1.1313544676463074, "learning_rate": 7.651667097285499e-07, "log_odds_chosen": 0.4711624085903168, "log_odds_ratio": -0.5114272832870483, "logits/chosen": -3.404935359954834, "logits/rejected": -3.469174861907959, "logps/chosen": -0.9997550249099731, "logps/rejected": -1.2770328521728516, "loss": 1.3435, "nll_loss": 1.0220756530761719, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.049987755715847015, "rewards/margins": 0.013863894157111645, "rewards/rejected": -0.06385163962841034, "step": 4270 }, { "epoch": 2.4985405720957385, "grad_norm": 1.592266135967239, "learning_rate": 7.642723006446789e-07, "log_odds_chosen": 0.005055745132267475, "log_odds_ratio": -0.713089644908905, "logits/chosen": -3.7157089710235596, "logits/rejected": -3.581876039505005, "logps/chosen": -1.6132758855819702, "logps/rejected": -1.5907477140426636, "loss": 1.4127, "nll_loss": 1.4817121028900146, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08066380769014359, "rewards/margins": -0.001126415329053998, "rewards/rejected": -0.07953738421201706, "step": 4280 }, { "epoch": 2.504378283712785, "grad_norm": 1.3210564103570037, "learning_rate": 7.633810206905743e-07, "log_odds_chosen": 0.17834734916687012, "log_odds_ratio": -0.6479122042655945, "logits/chosen": -3.4008548259735107, "logits/rejected": -3.577030897140503, "logps/chosen": -1.2764064073562622, "logps/rejected": -1.3747373819351196, "loss": 1.4788, "nll_loss": 1.3198808431625366, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06382031738758087, "rewards/margins": 0.004916543606668711, "rewards/rejected": -0.06873686611652374, "step": 4290 }, { "epoch": 2.5102159953298306, "grad_norm": 1.1043028292163053, "learning_rate": 7.624928516630234e-07, "log_odds_chosen": 0.40462154150009155, "log_odds_ratio": -0.5624701380729675, "logits/chosen": -3.720623016357422, "logits/rejected": -3.728416919708252, "logps/chosen": -1.5325725078582764, "logps/rejected": -1.854771614074707, "loss": 1.3868, "nll_loss": 1.482374668121338, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07662862539291382, "rewards/margins": 0.016109948977828026, "rewards/rejected": -0.092738576233387, "step": 4300 }, { "epoch": 2.516053706946877, "grad_norm": 1.0650604789758802, "learning_rate": 7.616077755067217e-07, "log_odds_chosen": 0.720946192741394, "log_odds_ratio": -0.4566906988620758, "logits/chosen": -3.3578929901123047, "logits/rejected": -3.622135639190674, "logps/chosen": -1.028786540031433, "logps/rejected": -1.515596628189087, "loss": 1.2959, "nll_loss": 1.1043310165405273, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.051439326256513596, "rewards/margins": 0.024340510368347168, "rewards/rejected": -0.07577983289957047, "step": 4310 }, { "epoch": 2.521891418563923, "grad_norm": 1.5427765741796777, "learning_rate": 7.607257743127307e-07, "log_odds_chosen": 0.5943984389305115, "log_odds_ratio": -0.4952498972415924, "logits/chosen": -3.487100124359131, "logits/rejected": -3.618534803390503, "logps/chosen": -1.2128658294677734, "logps/rejected": -1.6285221576690674, "loss": 1.2284, "nll_loss": 1.186262845993042, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06064329296350479, "rewards/margins": 0.020782817155122757, "rewards/rejected": -0.08142612129449844, "step": 4320 }, { "epoch": 2.527729130180969, "grad_norm": 0.9289480938679701, "learning_rate": 7.598468303169562e-07, "log_odds_chosen": 0.379428505897522, "log_odds_ratio": -0.5330532193183899, "logits/chosen": -3.663656234741211, "logits/rejected": -3.6978607177734375, "logps/chosen": -1.3319313526153564, "logps/rejected": -1.6093353033065796, "loss": 1.2941, "nll_loss": 1.2959095239639282, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06659656763076782, "rewards/margins": 0.013870203867554665, "rewards/rejected": -0.08046676963567734, "step": 4330 }, { "epoch": 2.5335668417980153, "grad_norm": 1.9889991137481098, "learning_rate": 7.589709258986455e-07, "log_odds_chosen": 0.15393072366714478, "log_odds_ratio": -0.6892382502555847, "logits/chosen": -3.6660244464874268, "logits/rejected": -3.5470008850097656, "logps/chosen": -1.4339954853057861, "logps/rejected": -1.4756710529327393, "loss": 1.3152, "nll_loss": 1.3396399021148682, "rewards/accuracies": 0.5, "rewards/chosen": -0.07169977575540543, "rewards/margins": 0.002083770465105772, "rewards/rejected": -0.07378354668617249, "step": 4340 }, { "epoch": 2.5394045534150615, "grad_norm": 1.522907763716382, "learning_rate": 7.580980435789034e-07, "log_odds_chosen": 0.174564391374588, "log_odds_ratio": -0.6290434002876282, "logits/chosen": -3.7889208793640137, "logits/rejected": -3.783806324005127, "logps/chosen": -1.4695689678192139, "logps/rejected": -1.600912094116211, "loss": 1.4268, "nll_loss": 1.3781836032867432, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07347844541072845, "rewards/margins": 0.006567155010998249, "rewards/rejected": -0.08004561066627502, "step": 4350 }, { "epoch": 2.5452422650321074, "grad_norm": 1.0470700974234017, "learning_rate": 7.572281660192283e-07, "log_odds_chosen": 0.7275976538658142, "log_odds_ratio": -0.460817813873291, "logits/chosen": -3.5390162467956543, "logits/rejected": -3.6564154624938965, "logps/chosen": -1.1346652507781982, "logps/rejected": -1.5899598598480225, "loss": 1.3716, "nll_loss": 1.330480933189392, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.05673326179385185, "rewards/margins": 0.02276473306119442, "rewards/rejected": -0.07949799299240112, "step": 4360 }, { "epoch": 2.5510799766491536, "grad_norm": 1.0478733444841315, "learning_rate": 7.563612760200645e-07, "log_odds_chosen": 0.6985198259353638, "log_odds_ratio": -0.42733412981033325, "logits/chosen": -3.6684749126434326, "logits/rejected": -3.870738983154297, "logps/chosen": -1.140910029411316, "logps/rejected": -1.6043373346328735, "loss": 1.3021, "nll_loss": 1.2304344177246094, "rewards/accuracies": 1.0, "rewards/chosen": -0.05704550817608833, "rewards/margins": 0.02317136339843273, "rewards/rejected": -0.08021686971187592, "step": 4370 }, { "epoch": 2.5569176882662, "grad_norm": 1.3738265642024323, "learning_rate": 7.554973565193743e-07, "log_odds_chosen": 0.31344205141067505, "log_odds_ratio": -0.6276916861534119, "logits/chosen": -3.6905016899108887, "logits/rejected": -3.7362430095672607, "logps/chosen": -1.310115098953247, "logps/rejected": -1.5161656141281128, "loss": 1.3605, "nll_loss": 1.1931631565093994, "rewards/accuracies": 0.5, "rewards/chosen": -0.06550575792789459, "rewards/margins": 0.010302532464265823, "rewards/rejected": -0.07580828666687012, "step": 4380 }, { "epoch": 2.5627553998832457, "grad_norm": 1.0542220078618536, "learning_rate": 7.546363905912276e-07, "log_odds_chosen": 0.1787935197353363, "log_odds_ratio": -0.6425135731697083, "logits/chosen": -3.7779953479766846, "logits/rejected": -3.657644271850586, "logps/chosen": -1.2439489364624023, "logps/rejected": -1.3830105066299438, "loss": 1.2747, "nll_loss": 1.2336000204086304, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06219743564724922, "rewards/margins": 0.006953080650418997, "rewards/rejected": -0.06915052235126495, "step": 4390 }, { "epoch": 2.568593111500292, "grad_norm": 1.2584003034708513, "learning_rate": 7.537783614444091e-07, "log_odds_chosen": 0.8865945935249329, "log_odds_ratio": -0.4891134798526764, "logits/chosen": -3.368286609649658, "logits/rejected": -3.6218605041503906, "logps/chosen": -1.3417646884918213, "logps/rejected": -2.1152710914611816, "loss": 1.3898, "nll_loss": 1.3685410022735596, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06708823889493942, "rewards/margins": 0.03867531940340996, "rewards/rejected": -0.10576355457305908, "step": 4400 }, { "epoch": 2.574430823117338, "grad_norm": 1.3288767409499218, "learning_rate": 7.529232524210427e-07, "log_odds_chosen": 0.31753212213516235, "log_odds_ratio": -0.5870558023452759, "logits/chosen": -3.563490390777588, "logits/rejected": -3.7228920459747314, "logps/chosen": -1.2357207536697388, "logps/rejected": -1.437811255455017, "loss": 1.3588, "nll_loss": 1.2055788040161133, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06178603321313858, "rewards/margins": 0.010104525834321976, "rewards/rejected": -0.07189056277275085, "step": 4410 }, { "epoch": 2.580268534734384, "grad_norm": 1.469127859920187, "learning_rate": 7.520710469952336e-07, "log_odds_chosen": 0.1949494630098343, "log_odds_ratio": -0.6259863972663879, "logits/chosen": -3.5851874351501465, "logits/rejected": -3.715384006500244, "logps/chosen": -1.3980363607406616, "logps/rejected": -1.5770702362060547, "loss": 1.319, "nll_loss": 1.4016520977020264, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06990182399749756, "rewards/margins": 0.008951690047979355, "rewards/rejected": -0.07885350286960602, "step": 4420 }, { "epoch": 2.5861062463514304, "grad_norm": 1.8579553851863546, "learning_rate": 7.512217287717264e-07, "log_odds_chosen": 0.3588981628417969, "log_odds_ratio": -0.5657521486282349, "logits/chosen": -3.7323498725891113, "logits/rejected": -3.718714952468872, "logps/chosen": -1.4262539148330688, "logps/rejected": -1.6831376552581787, "loss": 1.2702, "nll_loss": 1.3483325242996216, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07131270319223404, "rewards/margins": 0.012844192795455456, "rewards/rejected": -0.08415688574314117, "step": 4430 }, { "epoch": 2.591943957968476, "grad_norm": 1.8654532587611166, "learning_rate": 7.503752814845804e-07, "log_odds_chosen": 0.1963305026292801, "log_odds_ratio": -0.625575065612793, "logits/chosen": -3.7674851417541504, "logits/rejected": -3.7626547813415527, "logps/chosen": -1.5582386255264282, "logps/rejected": -1.7145220041275024, "loss": 1.3375, "nll_loss": 1.4647914171218872, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07791192829608917, "rewards/margins": 0.007814162410795689, "rewards/rejected": -0.08572609722614288, "step": 4440 }, { "epoch": 2.5977816695855225, "grad_norm": 1.0059314595040818, "learning_rate": 7.495316889958615e-07, "log_odds_chosen": 0.38506633043289185, "log_odds_ratio": -0.5428270101547241, "logits/chosen": -3.234342098236084, "logits/rejected": -3.7300140857696533, "logps/chosen": -1.273542046546936, "logps/rejected": -1.52472722530365, "loss": 1.3546, "nll_loss": 1.2962620258331299, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0636771023273468, "rewards/margins": 0.012559259310364723, "rewards/rejected": -0.07623635977506638, "step": 4450 }, { "epoch": 2.6036193812025687, "grad_norm": 1.7661601859783393, "learning_rate": 7.486909352943498e-07, "log_odds_chosen": 0.467414528131485, "log_odds_ratio": -0.536087155342102, "logits/chosen": -3.268976926803589, "logits/rejected": -3.6289799213409424, "logps/chosen": -1.4447723627090454, "logps/rejected": -1.7910102605819702, "loss": 1.3843, "nll_loss": 1.3908330202102661, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07223862409591675, "rewards/margins": 0.01731189526617527, "rewards/rejected": -0.08955051749944687, "step": 4460 }, { "epoch": 2.6094570928196146, "grad_norm": 1.0697894350234018, "learning_rate": 7.478530044942631e-07, "log_odds_chosen": 0.23718483746051788, "log_odds_ratio": -0.6226148009300232, "logits/chosen": -3.7635903358459473, "logits/rejected": -3.8748250007629395, "logps/chosen": -1.3248541355133057, "logps/rejected": -1.5595533847808838, "loss": 1.377, "nll_loss": 1.4344900846481323, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06624270975589752, "rewards/margins": 0.011734960600733757, "rewards/rejected": -0.07797767221927643, "step": 4470 }, { "epoch": 2.615294804436661, "grad_norm": 1.5274875888258503, "learning_rate": 7.47017880833996e-07, "log_odds_chosen": 0.42450985312461853, "log_odds_ratio": -0.5065356492996216, "logits/chosen": -3.2656192779541016, "logits/rejected": -3.837689161300659, "logps/chosen": -1.1769591569900513, "logps/rejected": -1.4772686958312988, "loss": 1.3503, "nll_loss": 1.2731959819793701, "rewards/accuracies": 1.0, "rewards/chosen": -0.058847952634096146, "rewards/margins": 0.01501548569649458, "rewards/rejected": -0.0738634392619133, "step": 4480 }, { "epoch": 2.6211325160537067, "grad_norm": 1.399974883291381, "learning_rate": 7.461855486748755e-07, "log_odds_chosen": 0.38827696442604065, "log_odds_ratio": -0.5257056355476379, "logits/chosen": -3.650022029876709, "logits/rejected": -3.749069929122925, "logps/chosen": -1.5243818759918213, "logps/rejected": -1.8436895608901978, "loss": 1.4042, "nll_loss": 1.3956434726715088, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0762190967798233, "rewards/margins": 0.015965379774570465, "rewards/rejected": -0.09218447655439377, "step": 4490 }, { "epoch": 2.626970227670753, "grad_norm": 1.3357560129386916, "learning_rate": 7.4535599249993e-07, "log_odds_chosen": 0.54172682762146, "log_odds_ratio": -0.5275653004646301, "logits/chosen": -3.428942918777466, "logits/rejected": -3.558269500732422, "logps/chosen": -1.1056445837020874, "logps/rejected": -1.4993008375167847, "loss": 1.3146, "nll_loss": 1.1597810983657837, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05528222396969795, "rewards/margins": 0.019682809710502625, "rewards/rejected": -0.07496503740549088, "step": 4500 }, { "epoch": 2.632807939287799, "grad_norm": 1.6592574407261202, "learning_rate": 7.445291969126747e-07, "log_odds_chosen": 0.2968374490737915, "log_odds_ratio": -0.5706366300582886, "logits/chosen": -3.6357085704803467, "logits/rejected": -3.666624069213867, "logps/chosen": -1.3977539539337158, "logps/rejected": -1.6077009439468384, "loss": 1.4084, "nll_loss": 1.3373079299926758, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06988769769668579, "rewards/margins": 0.010497348383069038, "rewards/rejected": -0.08038504421710968, "step": 4510 }, { "epoch": 2.638645650904845, "grad_norm": 1.6829668797976276, "learning_rate": 7.43705146635912e-07, "log_odds_chosen": 0.30358314514160156, "log_odds_ratio": -0.6149829626083374, "logits/chosen": -3.579745054244995, "logits/rejected": -3.7004809379577637, "logps/chosen": -1.0458054542541504, "logps/rejected": -1.2441623210906982, "loss": 1.243, "nll_loss": 1.1620001792907715, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05229027196764946, "rewards/margins": 0.009917848743498325, "rewards/rejected": -0.06220811605453491, "step": 4520 }, { "epoch": 2.6444833625218913, "grad_norm": 1.2655022310841226, "learning_rate": 7.428838265105448e-07, "log_odds_chosen": 0.001612398074939847, "log_odds_ratio": -0.7568306922912598, "logits/chosen": -3.6289291381835938, "logits/rejected": -3.876047134399414, "logps/chosen": -1.4386510848999023, "logps/rejected": -1.4497865438461304, "loss": 1.4195, "nll_loss": 1.5255053043365479, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07193255424499512, "rewards/margins": 0.0005567710613831878, "rewards/rejected": -0.07248932868242264, "step": 4530 }, { "epoch": 2.6503210741389376, "grad_norm": 2.1379382760704435, "learning_rate": 7.42065221494406e-07, "log_odds_chosen": 0.13790249824523926, "log_odds_ratio": -0.6497594118118286, "logits/chosen": -3.699751377105713, "logits/rejected": -3.781980514526367, "logps/chosen": -1.4123404026031494, "logps/rejected": -1.5064523220062256, "loss": 1.3606, "nll_loss": 1.3067268133163452, "rewards/accuracies": 0.5, "rewards/chosen": -0.07061702013015747, "rewards/margins": 0.004705597646534443, "rewards/rejected": -0.07532262057065964, "step": 4540 }, { "epoch": 2.6561587857559834, "grad_norm": 1.2599546035023763, "learning_rate": 7.412493166611012e-07, "log_odds_chosen": 0.03558870777487755, "log_odds_ratio": -0.6921964883804321, "logits/chosen": -3.7373828887939453, "logits/rejected": -3.731539487838745, "logps/chosen": -1.6277077198028564, "logps/rejected": -1.6514556407928467, "loss": 1.4102, "nll_loss": 1.5806028842926025, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08138538897037506, "rewards/margins": 0.0011874024057760835, "rewards/rejected": -0.08257278800010681, "step": 4550 }, { "epoch": 2.6619964973730297, "grad_norm": 1.5792534728390164, "learning_rate": 7.404360971988655e-07, "log_odds_chosen": 0.11527673900127411, "log_odds_ratio": -0.6657832860946655, "logits/chosen": -3.7286124229431152, "logits/rejected": -3.83191180229187, "logps/chosen": -1.3979746103286743, "logps/rejected": -1.46506667137146, "loss": 1.3387, "nll_loss": 1.329487681388855, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06989873200654984, "rewards/margins": 0.00335461157374084, "rewards/rejected": -0.0732533410191536, "step": 4560 }, { "epoch": 2.667834208990076, "grad_norm": 1.2315977159004747, "learning_rate": 7.396255484094341e-07, "log_odds_chosen": 0.34932225942611694, "log_odds_ratio": -0.5743382573127747, "logits/chosen": -3.6735141277313232, "logits/rejected": -3.610457181930542, "logps/chosen": -1.3995628356933594, "logps/rejected": -1.6261110305786133, "loss": 1.4211, "nll_loss": 1.4400970935821533, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06997814774513245, "rewards/margins": 0.011327404528856277, "rewards/rejected": -0.08130554854869843, "step": 4570 }, { "epoch": 2.6736719206071218, "grad_norm": 1.1398100878598973, "learning_rate": 7.388176557069273e-07, "log_odds_chosen": 0.17825008928775787, "log_odds_ratio": -0.6582268476486206, "logits/chosen": -3.4534358978271484, "logits/rejected": -3.3922317028045654, "logps/chosen": -1.4523494243621826, "logps/rejected": -1.570507526397705, "loss": 1.4008, "nll_loss": 1.3531328439712524, "rewards/accuracies": 0.5, "rewards/chosen": -0.07261747866868973, "rewards/margins": 0.005907907150685787, "rewards/rejected": -0.07852537930011749, "step": 4580 }, { "epoch": 2.679509632224168, "grad_norm": 1.4835761544438852, "learning_rate": 7.380124046167461e-07, "log_odds_chosen": 0.19851037859916687, "log_odds_ratio": -0.6320118308067322, "logits/chosen": -3.5147175788879395, "logits/rejected": -3.775547742843628, "logps/chosen": -1.429049015045166, "logps/rejected": -1.573608636856079, "loss": 1.3709, "nll_loss": 1.4195955991744995, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07145245373249054, "rewards/margins": 0.007227980997413397, "rewards/rejected": -0.07868043333292007, "step": 4590 }, { "epoch": 2.6853473438412143, "grad_norm": 1.1036050520012173, "learning_rate": 7.372097807744858e-07, "log_odds_chosen": 0.9084596633911133, "log_odds_ratio": -0.3986671566963196, "logits/chosen": -3.551112413406372, "logits/rejected": -3.7952332496643066, "logps/chosen": -0.9510765075683594, "logps/rejected": -1.4036376476287842, "loss": 1.2207, "nll_loss": 1.120384693145752, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.04755382984876633, "rewards/margins": 0.022628046572208405, "rewards/rejected": -0.07018187642097473, "step": 4600 }, { "epoch": 2.69118505545826, "grad_norm": 1.1444809479868305, "learning_rate": 7.364097699248571e-07, "log_odds_chosen": 0.30873948335647583, "log_odds_ratio": -0.5825672149658203, "logits/chosen": -3.819852352142334, "logits/rejected": -3.9398422241210938, "logps/chosen": -1.4784214496612549, "logps/rejected": -1.7308578491210938, "loss": 1.4003, "nll_loss": 1.3597805500030518, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0739210769534111, "rewards/margins": 0.012621818110346794, "rewards/rejected": -0.08654288947582245, "step": 4610 }, { "epoch": 2.6970227670753064, "grad_norm": 1.5271842672456675, "learning_rate": 7.356123579206247e-07, "log_odds_chosen": 0.3870165944099426, "log_odds_ratio": -0.6038880944252014, "logits/chosen": -3.659924030303955, "logits/rejected": -3.746086835861206, "logps/chosen": -1.3438364267349243, "logps/rejected": -1.5358312129974365, "loss": 1.4135, "nll_loss": 1.2890079021453857, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06719182431697845, "rewards/margins": 0.009599743410944939, "rewards/rejected": -0.07679156959056854, "step": 4620 }, { "epoch": 2.7028604786923527, "grad_norm": 1.3409345961134806, "learning_rate": 7.348175307215552e-07, "log_odds_chosen": 0.5349884629249573, "log_odds_ratio": -0.5463269352912903, "logits/chosen": -3.620553493499756, "logits/rejected": -3.683053493499756, "logps/chosen": -1.240143060684204, "logps/rejected": -1.6026208400726318, "loss": 1.2933, "nll_loss": 1.2080662250518799, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.062007151544094086, "rewards/margins": 0.018123891204595566, "rewards/rejected": -0.08013103902339935, "step": 4630 }, { "epoch": 2.7086981903093985, "grad_norm": 1.5772290237564457, "learning_rate": 7.340252743933794e-07, "log_odds_chosen": 0.5818043947219849, "log_odds_ratio": -0.4634174406528473, "logits/chosen": -3.739973783493042, "logits/rejected": -3.7838172912597656, "logps/chosen": -1.3381457328796387, "logps/rejected": -1.7901296615600586, "loss": 1.425, "nll_loss": 1.4181610345840454, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06690728664398193, "rewards/margins": 0.022599194198846817, "rewards/rejected": -0.08950648456811905, "step": 4640 }, { "epoch": 2.714535901926445, "grad_norm": 1.5460582167399013, "learning_rate": 7.332355751067666e-07, "log_odds_chosen": 0.3826814591884613, "log_odds_ratio": -0.5847423076629639, "logits/chosen": -3.4893290996551514, "logits/rejected": -3.7183406352996826, "logps/chosen": -1.0917346477508545, "logps/rejected": -1.3597638607025146, "loss": 1.2606, "nll_loss": 1.1721118688583374, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.054586730897426605, "rewards/margins": 0.013401458039879799, "rewards/rejected": -0.06798818707466125, "step": 4650 }, { "epoch": 2.720373613543491, "grad_norm": 3.11037596257588, "learning_rate": 7.324484191363096e-07, "log_odds_chosen": 0.23665058612823486, "log_odds_ratio": -0.6448532342910767, "logits/chosen": -3.6837520599365234, "logits/rejected": -3.529111385345459, "logps/chosen": -1.2345397472381592, "logps/rejected": -1.44516921043396, "loss": 1.2773, "nll_loss": 1.2073009014129639, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06172698736190796, "rewards/margins": 0.010531477630138397, "rewards/rejected": -0.07225845754146576, "step": 4660 }, { "epoch": 2.726211325160537, "grad_norm": 1.4160298299030125, "learning_rate": 7.316637928595242e-07, "log_odds_chosen": 0.4975837767124176, "log_odds_ratio": -0.5381174087524414, "logits/chosen": -3.599916458129883, "logits/rejected": -3.6145262718200684, "logps/chosen": -1.3319010734558105, "logps/rejected": -1.6957378387451172, "loss": 1.2896, "nll_loss": 1.1994726657867432, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06659505516290665, "rewards/margins": 0.018191838636994362, "rewards/rejected": -0.08478689938783646, "step": 4670 }, { "epoch": 2.732049036777583, "grad_norm": 1.1210584210218417, "learning_rate": 7.308816827558578e-07, "log_odds_chosen": 0.47735175490379333, "log_odds_ratio": -0.6244334578514099, "logits/chosen": -3.492513656616211, "logits/rejected": -3.6763274669647217, "logps/chosen": -1.3440539836883545, "logps/rejected": -1.5393940210342407, "loss": 1.3986, "nll_loss": 1.3922169208526611, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06720270216464996, "rewards/margins": 0.009766995906829834, "rewards/rejected": -0.0769696980714798, "step": 4680 }, { "epoch": 2.7378867483946294, "grad_norm": 1.1372939771260182, "learning_rate": 7.301020754057114e-07, "log_odds_chosen": 0.39555472135543823, "log_odds_ratio": -0.5354821681976318, "logits/chosen": -3.728724718093872, "logits/rejected": -3.696002244949341, "logps/chosen": -1.0873397588729858, "logps/rejected": -1.3485620021820068, "loss": 1.4393, "nll_loss": 1.238240361213684, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05436699464917183, "rewards/margins": 0.013061116449534893, "rewards/rejected": -0.0674281045794487, "step": 4690 }, { "epoch": 2.7437244600116752, "grad_norm": 1.0743333857857595, "learning_rate": 7.293249574894729e-07, "log_odds_chosen": 0.08952115476131439, "log_odds_ratio": -0.6859616041183472, "logits/chosen": -3.8508217334747314, "logits/rejected": -3.922930955886841, "logps/chosen": -1.3601635694503784, "logps/rejected": -1.3958338499069214, "loss": 1.2787, "nll_loss": 1.1677021980285645, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0680081844329834, "rewards/margins": 0.0017835169564932585, "rewards/rejected": -0.06979169696569443, "step": 4700 }, { "epoch": 2.7495621716287215, "grad_norm": 1.1819060280028157, "learning_rate": 7.285503157865601e-07, "log_odds_chosen": 0.21099953353405, "log_odds_ratio": -0.6245384812355042, "logits/chosen": -3.652514696121216, "logits/rejected": -3.955211639404297, "logps/chosen": -1.458566665649414, "logps/rejected": -1.593935489654541, "loss": 1.3566, "nll_loss": 1.4501060247421265, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07292833179235458, "rewards/margins": 0.006768439896404743, "rewards/rejected": -0.07969676703214645, "step": 4710 }, { "epoch": 2.755399883245768, "grad_norm": 1.7870245595207639, "learning_rate": 7.277781371744776e-07, "log_odds_chosen": 0.2661178708076477, "log_odds_ratio": -0.6165498495101929, "logits/chosen": -3.595871686935425, "logits/rejected": -3.699608325958252, "logps/chosen": -1.2495249509811401, "logps/rejected": -1.4875980615615845, "loss": 1.3471, "nll_loss": 1.274573564529419, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.062476254999637604, "rewards/margins": 0.011903658509254456, "rewards/rejected": -0.07437990605831146, "step": 4720 }, { "epoch": 2.7612375948628136, "grad_norm": 1.1693623096786268, "learning_rate": 7.270084086278817e-07, "log_odds_chosen": 0.35938888788223267, "log_odds_ratio": -0.5561161637306213, "logits/chosen": -3.6481881141662598, "logits/rejected": -3.783564329147339, "logps/chosen": -1.4305013418197632, "logps/rejected": -1.721234917640686, "loss": 1.3587, "nll_loss": 1.3532081842422485, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07152506709098816, "rewards/margins": 0.014536681585013866, "rewards/rejected": -0.0860617533326149, "step": 4730 }, { "epoch": 2.76707530647986, "grad_norm": 0.9757779044221262, "learning_rate": 7.262411172176586e-07, "log_odds_chosen": 0.31762686371803284, "log_odds_ratio": -0.5824174880981445, "logits/chosen": -3.8106226921081543, "logits/rejected": -3.8083999156951904, "logps/chosen": -1.2675894498825073, "logps/rejected": -1.489734172821045, "loss": 1.3359, "nll_loss": 1.2560367584228516, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06337947398424149, "rewards/margins": 0.011107238009572029, "rewards/rejected": -0.07448671758174896, "step": 4740 }, { "epoch": 2.772913018096906, "grad_norm": 1.0668284685578073, "learning_rate": 7.254762501100117e-07, "log_odds_chosen": 0.2241729199886322, "log_odds_ratio": -0.6123526096343994, "logits/chosen": -3.3693454265594482, "logits/rejected": -3.6432247161865234, "logps/chosen": -1.2049182653427124, "logps/rejected": -1.3958112001419067, "loss": 1.2554, "nll_loss": 1.1303247213363647, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06024591997265816, "rewards/margins": 0.009544647298753262, "rewards/rejected": -0.0697905644774437, "step": 4750 }, { "epoch": 2.778750729713952, "grad_norm": 1.0533303054818648, "learning_rate": 7.247137945655607e-07, "log_odds_chosen": 0.4626019597053528, "log_odds_ratio": -0.5420461893081665, "logits/chosen": -3.2544944286346436, "logits/rejected": -3.6274573802948, "logps/chosen": -1.2565922737121582, "logps/rejected": -1.655053734779358, "loss": 1.3023, "nll_loss": 1.3465907573699951, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06282962113618851, "rewards/margins": 0.019923070445656776, "rewards/rejected": -0.08275268971920013, "step": 4760 }, { "epoch": 2.7845884413309983, "grad_norm": 0.8910060894352477, "learning_rate": 7.23953737938449e-07, "log_odds_chosen": 0.30200833082199097, "log_odds_ratio": -0.5846055150032043, "logits/chosen": -3.5353026390075684, "logits/rejected": -3.525745391845703, "logps/chosen": -1.534912347793579, "logps/rejected": -1.7891271114349365, "loss": 1.3219, "nll_loss": 1.3902541399002075, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07674561440944672, "rewards/margins": 0.01271074078977108, "rewards/rejected": -0.08945635706186295, "step": 4770 }, { "epoch": 2.7904261529480445, "grad_norm": 1.3468577137268831, "learning_rate": 7.231960676754647e-07, "log_odds_chosen": 0.20221289992332458, "log_odds_ratio": -0.6124210357666016, "logits/chosen": -3.546177387237549, "logits/rejected": -3.6907095909118652, "logps/chosen": -1.2519643306732178, "logps/rejected": -1.394335150718689, "loss": 1.3717, "nll_loss": 1.2550040483474731, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06259821355342865, "rewards/margins": 0.00711854686960578, "rewards/rejected": -0.06971676647663116, "step": 4780 }, { "epoch": 2.7962638645650904, "grad_norm": 1.1238574851105516, "learning_rate": 7.224407713151682e-07, "log_odds_chosen": -0.08280225098133087, "log_odds_ratio": -0.7606834173202515, "logits/chosen": -3.6576504707336426, "logits/rejected": -3.717287540435791, "logps/chosen": -1.3169838190078735, "logps/rejected": -1.2781918048858643, "loss": 1.3783, "nll_loss": 1.2804020643234253, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0658491998910904, "rewards/margins": -0.0019395959097892046, "rewards/rejected": -0.06390959769487381, "step": 4790 }, { "epoch": 2.8021015761821366, "grad_norm": 0.976281466747957, "learning_rate": 7.216878364870323e-07, "log_odds_chosen": 0.49105507135391235, "log_odds_ratio": -0.4918888211250305, "logits/chosen": -3.6056270599365234, "logits/rejected": -3.9965407848358154, "logps/chosen": -1.2902162075042725, "logps/rejected": -1.6546659469604492, "loss": 1.3694, "nll_loss": 1.3958704471588135, "rewards/accuracies": 1.0, "rewards/chosen": -0.06451081484556198, "rewards/margins": 0.01822248101234436, "rewards/rejected": -0.08273328840732574, "step": 4800 }, { "epoch": 2.807939287799183, "grad_norm": 1.9745506493228975, "learning_rate": 7.209372509105906e-07, "log_odds_chosen": 0.43622279167175293, "log_odds_ratio": -0.5490923523902893, "logits/chosen": -3.1636528968811035, "logits/rejected": -3.7730298042297363, "logps/chosen": -1.162138819694519, "logps/rejected": -1.377042531967163, "loss": 1.3471, "nll_loss": 1.2243326902389526, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05810694023966789, "rewards/margins": 0.010745184496045113, "rewards/rejected": -0.06885213404893875, "step": 4810 }, { "epoch": 2.8137769994162287, "grad_norm": 2.057040477255737, "learning_rate": 7.201890023945968e-07, "log_odds_chosen": -0.021474841982126236, "log_odds_ratio": -0.7320640087127686, "logits/chosen": -3.7691283226013184, "logits/rejected": -3.582946300506592, "logps/chosen": -1.5911940336227417, "logps/rejected": -1.5787818431854248, "loss": 1.4302, "nll_loss": 1.5725514888763428, "rewards/accuracies": 0.5, "rewards/chosen": -0.07955970615148544, "rewards/margins": -0.0006206061807461083, "rewards/rejected": -0.07893909513950348, "step": 4820 }, { "epoch": 2.819614711033275, "grad_norm": 2.2897822454318715, "learning_rate": 7.194430788361928e-07, "log_odds_chosen": 0.18522337079048157, "log_odds_ratio": -0.6272876858711243, "logits/chosen": -3.5267395973205566, "logits/rejected": -3.5713164806365967, "logps/chosen": -1.2218061685562134, "logps/rejected": -1.3421391248703003, "loss": 1.4248, "nll_loss": 1.3614808320999146, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06109030917286873, "rewards/margins": 0.006016649305820465, "rewards/rejected": -0.0671069547533989, "step": 4830 }, { "epoch": 2.8254524226503213, "grad_norm": 1.4747230940626501, "learning_rate": 7.186994682200863e-07, "log_odds_chosen": 0.25256818532943726, "log_odds_ratio": -0.6601889729499817, "logits/chosen": -3.507951021194458, "logits/rejected": -3.5265510082244873, "logps/chosen": -1.1795693635940552, "logps/rejected": -1.3606133460998535, "loss": 1.3226, "nll_loss": 1.2867867946624756, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05897846817970276, "rewards/margins": 0.009052194654941559, "rewards/rejected": -0.06803066283464432, "step": 4840 }, { "epoch": 2.831290134267367, "grad_norm": 1.0057097488841282, "learning_rate": 7.179581586177383e-07, "log_odds_chosen": 0.5760866403579712, "log_odds_ratio": -0.4957245886325836, "logits/chosen": -3.736769437789917, "logits/rejected": -3.627021312713623, "logps/chosen": -1.0285656452178955, "logps/rejected": -1.4217157363891602, "loss": 1.2334, "nll_loss": 1.1759603023529053, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.051428280770778656, "rewards/margins": 0.019657505676150322, "rewards/rejected": -0.07108578085899353, "step": 4850 }, { "epoch": 2.8371278458844134, "grad_norm": 1.2977413397818283, "learning_rate": 7.172191381865586e-07, "log_odds_chosen": 0.1523984968662262, "log_odds_ratio": -0.6450749635696411, "logits/chosen": -3.745401382446289, "logits/rejected": -3.75288462638855, "logps/chosen": -1.4588468074798584, "logps/rejected": -1.5747840404510498, "loss": 1.3202, "nll_loss": 1.3577721118927002, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0729423314332962, "rewards/margins": 0.005796866957098246, "rewards/rejected": -0.0787392109632492, "step": 4860 }, { "epoch": 2.8429655575014596, "grad_norm": 1.0542455631628278, "learning_rate": 7.16482395169113e-07, "log_odds_chosen": 0.052707720547914505, "log_odds_ratio": -0.6822970509529114, "logits/chosen": -3.776388645172119, "logits/rejected": -3.7857918739318848, "logps/chosen": -1.526362657546997, "logps/rejected": -1.5634758472442627, "loss": 1.3865, "nll_loss": 1.437971591949463, "rewards/accuracies": 0.5, "rewards/chosen": -0.07631812989711761, "rewards/margins": 0.0018556617433205247, "rewards/rejected": -0.07817380130290985, "step": 4870 }, { "epoch": 2.8488032691185055, "grad_norm": 2.0880258187757423, "learning_rate": 7.157479178923353e-07, "log_odds_chosen": 0.4818245768547058, "log_odds_ratio": -0.5438110828399658, "logits/chosen": -3.613074779510498, "logits/rejected": -3.8193893432617188, "logps/chosen": -1.3227379322052002, "logps/rejected": -1.6431710720062256, "loss": 1.3631, "nll_loss": 1.2465620040893555, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06613688915967941, "rewards/margins": 0.01602165959775448, "rewards/rejected": -0.08215855062007904, "step": 4880 }, { "epoch": 2.8546409807355517, "grad_norm": 1.4064662056579467, "learning_rate": 7.150156947667522e-07, "log_odds_chosen": 0.073939248919487, "log_odds_ratio": -0.7200990915298462, "logits/chosen": -3.582414150238037, "logits/rejected": -3.726696014404297, "logps/chosen": -1.4725496768951416, "logps/rejected": -1.5422803163528442, "loss": 1.4937, "nll_loss": 1.3883978128433228, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07362748682498932, "rewards/margins": 0.0034865315537899733, "rewards/rejected": -0.07711401581764221, "step": 4890 }, { "epoch": 2.860478692352598, "grad_norm": 1.0136407871497484, "learning_rate": 7.142857142857143e-07, "log_odds_chosen": 0.3341895043849945, "log_odds_ratio": -0.584162712097168, "logits/chosen": -3.4705593585968018, "logits/rejected": -3.6005496978759766, "logps/chosen": -1.2120481729507446, "logps/rejected": -1.470122218132019, "loss": 1.4021, "nll_loss": 1.2653777599334717, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06060240790247917, "rewards/margins": 0.012903702445328236, "rewards/rejected": -0.07350611686706543, "step": 4900 }, { "epoch": 2.866316403969644, "grad_norm": 1.255427693160042, "learning_rate": 7.135579650246376e-07, "log_odds_chosen": 0.19888867437839508, "log_odds_ratio": -0.6345914006233215, "logits/chosen": -3.8538613319396973, "logits/rejected": -3.525426149368286, "logps/chosen": -1.5102226734161377, "logps/rejected": -1.6777499914169312, "loss": 1.4139, "nll_loss": 1.3985984325408936, "rewards/accuracies": 0.5, "rewards/chosen": -0.07551112771034241, "rewards/margins": 0.008376377634704113, "rewards/rejected": -0.0838875025510788, "step": 4910 }, { "epoch": 2.87215411558669, "grad_norm": 1.100829159529874, "learning_rate": 7.128324356402513e-07, "log_odds_chosen": 0.35180580615997314, "log_odds_ratio": -0.5786446332931519, "logits/chosen": -3.4198174476623535, "logits/rejected": -3.7499771118164062, "logps/chosen": -1.3485723733901978, "logps/rejected": -1.5934226512908936, "loss": 1.3104, "nll_loss": 1.2728549242019653, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06742862612009048, "rewards/margins": 0.012242515571415424, "rewards/rejected": -0.07967112958431244, "step": 4920 }, { "epoch": 2.8779918272037364, "grad_norm": 1.633492177714293, "learning_rate": 7.121091148698564e-07, "log_odds_chosen": 0.02440325915813446, "log_odds_ratio": -0.7204264402389526, "logits/chosen": -3.669640064239502, "logits/rejected": -3.8634121417999268, "logps/chosen": -1.455682396888733, "logps/rejected": -1.5026096105575562, "loss": 1.4778, "nll_loss": 1.3796799182891846, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07278411090373993, "rewards/margins": 0.0023463729303330183, "rewards/rejected": -0.07513047754764557, "step": 4930 }, { "epoch": 2.883829538820782, "grad_norm": 1.620973414097792, "learning_rate": 7.113879915305904e-07, "log_odds_chosen": 0.42252975702285767, "log_odds_ratio": -0.5425761938095093, "logits/chosen": -3.5415871143341064, "logits/rejected": -3.7708239555358887, "logps/chosen": -1.3101261854171753, "logps/rejected": -1.5984896421432495, "loss": 1.3061, "nll_loss": 1.2917754650115967, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06550632417201996, "rewards/margins": 0.014418167062103748, "rewards/rejected": -0.07992447912693024, "step": 4940 }, { "epoch": 2.8896672504378285, "grad_norm": 1.8588906581467872, "learning_rate": 7.106690545187016e-07, "log_odds_chosen": 0.6594442129135132, "log_odds_ratio": -0.48768702149391174, "logits/chosen": -3.683623790740967, "logits/rejected": -3.56306529045105, "logps/chosen": -1.2954213619232178, "logps/rejected": -1.794274091720581, "loss": 1.3028, "nll_loss": 1.3033201694488525, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06477106362581253, "rewards/margins": 0.02494264766573906, "rewards/rejected": -0.08971370756626129, "step": 4950 }, { "epoch": 2.8955049620548747, "grad_norm": 0.9720147297125841, "learning_rate": 7.09952292808831e-07, "log_odds_chosen": 0.303494930267334, "log_odds_ratio": -0.5692735910415649, "logits/chosen": -3.68292236328125, "logits/rejected": -3.819624423980713, "logps/chosen": -1.4326907396316528, "logps/rejected": -1.6629794836044312, "loss": 1.3045, "nll_loss": 1.2400435209274292, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07163453847169876, "rewards/margins": 0.011514445766806602, "rewards/rejected": -0.08314897119998932, "step": 4960 }, { "epoch": 2.9013426736719206, "grad_norm": 1.308230239393398, "learning_rate": 7.092376954533026e-07, "log_odds_chosen": 0.20888809859752655, "log_odds_ratio": -0.6367964744567871, "logits/chosen": -3.79010272026062, "logits/rejected": -3.9114480018615723, "logps/chosen": -1.5158377885818481, "logps/rejected": -1.6530811786651611, "loss": 1.4244, "nll_loss": 1.3737024068832397, "rewards/accuracies": 0.5, "rewards/chosen": -0.07579188048839569, "rewards/margins": 0.006862170994281769, "rewards/rejected": -0.08265405893325806, "step": 4970 }, { "epoch": 2.907180385288967, "grad_norm": 1.1340782396280673, "learning_rate": 7.085252515814198e-07, "log_odds_chosen": 0.2109622210264206, "log_odds_ratio": -0.6666428446769714, "logits/chosen": -3.4367527961730957, "logits/rejected": -3.469069719314575, "logps/chosen": -1.3122317790985107, "logps/rejected": -1.470611572265625, "loss": 1.4021, "nll_loss": 1.356463074684143, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0656115934252739, "rewards/margins": 0.007918991148471832, "rewards/rejected": -0.07353059202432632, "step": 4980 }, { "epoch": 2.913018096906013, "grad_norm": 1.4619587171356005, "learning_rate": 7.07814950398772e-07, "log_odds_chosen": 0.16597406566143036, "log_odds_ratio": -0.6854727268218994, "logits/chosen": -3.7159018516540527, "logits/rejected": -3.8270435333251953, "logps/chosen": -1.388608694076538, "logps/rejected": -1.5893007516860962, "loss": 1.3513, "nll_loss": 1.4567725658416748, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06943044066429138, "rewards/margins": 0.010034603998064995, "rewards/rejected": -0.07946503162384033, "step": 4990 }, { "epoch": 2.918855808523059, "grad_norm": 1.115142711334603, "learning_rate": 7.071067811865476e-07, "log_odds_chosen": 0.3503459692001343, "log_odds_ratio": -0.5732976198196411, "logits/chosen": -3.2091064453125, "logits/rejected": -3.623927354812622, "logps/chosen": -1.1526353359222412, "logps/rejected": -1.4068657159805298, "loss": 1.197, "nll_loss": 1.2021119594573975, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.057631779462099075, "rewards/margins": 0.012711510062217712, "rewards/rejected": -0.07034327834844589, "step": 5000 }, { "epoch": 2.924693520140105, "grad_norm": 0.9769127943872739, "learning_rate": 7.06400733300854e-07, "log_odds_chosen": 0.3897576630115509, "log_odds_ratio": -0.588569700717926, "logits/chosen": -3.4627292156219482, "logits/rejected": -3.6575775146484375, "logps/chosen": -1.3429551124572754, "logps/rejected": -1.6507015228271484, "loss": 1.3005, "nll_loss": 1.2481815814971924, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06714775413274765, "rewards/margins": 0.01538732461631298, "rewards/rejected": -0.08253507316112518, "step": 5010 }, { "epoch": 2.9305312317571515, "grad_norm": 1.276177185168963, "learning_rate": 7.056967961720458e-07, "log_odds_chosen": 0.4142417907714844, "log_odds_ratio": -0.5516740083694458, "logits/chosen": -3.456455707550049, "logits/rejected": -3.739861011505127, "logps/chosen": -1.378509283065796, "logps/rejected": -1.703028917312622, "loss": 1.3673, "nll_loss": 1.4015251398086548, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06892546266317368, "rewards/margins": 0.01622597500681877, "rewards/rejected": -0.08515144139528275, "step": 5020 }, { "epoch": 2.9363689433741973, "grad_norm": 1.1134202083090716, "learning_rate": 7.049949593040614e-07, "log_odds_chosen": 0.4454672336578369, "log_odds_ratio": -0.5244887471199036, "logits/chosen": -3.704958438873291, "logits/rejected": -3.726362705230713, "logps/chosen": -1.4579215049743652, "logps/rejected": -1.8256046772003174, "loss": 1.2943, "nll_loss": 1.3998974561691284, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0728960782289505, "rewards/margins": 0.018384158611297607, "rewards/rejected": -0.09128023684024811, "step": 5030 }, { "epoch": 2.9422066549912436, "grad_norm": 1.1191918160675025, "learning_rate": 7.042952122737638e-07, "log_odds_chosen": 0.28385812044143677, "log_odds_ratio": -0.5798782110214233, "logits/chosen": -3.6429266929626465, "logits/rejected": -3.55791974067688, "logps/chosen": -1.234251856803894, "logps/rejected": -1.444502830505371, "loss": 1.3643, "nll_loss": 1.3295830488204956, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0617126002907753, "rewards/margins": 0.010512548498809338, "rewards/rejected": -0.07222513854503632, "step": 5040 }, { "epoch": 2.9480443666082894, "grad_norm": 1.1124218464027986, "learning_rate": 7.035975447302919e-07, "log_odds_chosen": 0.2606307864189148, "log_odds_ratio": -0.5891803503036499, "logits/chosen": -3.7022576332092285, "logits/rejected": -3.7266430854797363, "logps/chosen": -1.2496540546417236, "logps/rejected": -1.4185154438018799, "loss": 1.3576, "nll_loss": 1.3381998538970947, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06248270720243454, "rewards/margins": 0.008443063125014305, "rewards/rejected": -0.070925772190094, "step": 5050 }, { "epoch": 2.9538820782253357, "grad_norm": 1.2452134241108683, "learning_rate": 7.029019463944166e-07, "log_odds_chosen": 0.3581957519054413, "log_odds_ratio": -0.5552545785903931, "logits/chosen": -3.835836887359619, "logits/rejected": -3.8038527965545654, "logps/chosen": -1.4564703702926636, "logps/rejected": -1.751294732093811, "loss": 1.369, "nll_loss": 1.4934278726577759, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07282352447509766, "rewards/margins": 0.014741206541657448, "rewards/rejected": -0.08756472915410995, "step": 5060 }, { "epoch": 2.959719789842382, "grad_norm": 1.0997207455704607, "learning_rate": 7.022084070579053e-07, "log_odds_chosen": 0.377839058637619, "log_odds_ratio": -0.5936296582221985, "logits/chosen": -3.542678117752075, "logits/rejected": -3.2746245861053467, "logps/chosen": -1.4691638946533203, "logps/rejected": -1.7321739196777344, "loss": 1.3359, "nll_loss": 1.3686152696609497, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07345819473266602, "rewards/margins": 0.013150503858923912, "rewards/rejected": -0.08660870045423508, "step": 5070 }, { "epoch": 2.9655575014594278, "grad_norm": 2.3793392805750875, "learning_rate": 7.015169165828922e-07, "log_odds_chosen": 0.2036566287279129, "log_odds_ratio": -0.6230908036231995, "logits/chosen": -3.4344677925109863, "logits/rejected": -3.7930426597595215, "logps/chosen": -1.3506494760513306, "logps/rejected": -1.5300681591033936, "loss": 1.3118, "nll_loss": 1.2536314725875854, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06753246486186981, "rewards/margins": 0.008970935828983784, "rewards/rejected": -0.07650341093540192, "step": 5080 }, { "epoch": 2.971395213076474, "grad_norm": 1.3560086878760775, "learning_rate": 7.008274649012563e-07, "log_odds_chosen": 0.1994781196117401, "log_odds_ratio": -0.6360189318656921, "logits/chosen": -3.449765682220459, "logits/rejected": -3.635385036468506, "logps/chosen": -1.2243576049804688, "logps/rejected": -1.4039443731307983, "loss": 1.2752, "nll_loss": 1.2880127429962158, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.061217885464429855, "rewards/margins": 0.008979340083897114, "rewards/rejected": -0.0701972246170044, "step": 5090 }, { "epoch": 2.9772329246935203, "grad_norm": 2.0925819205415315, "learning_rate": 7.001400420140049e-07, "log_odds_chosen": 0.09331758320331573, "log_odds_ratio": -0.6731566190719604, "logits/chosen": -3.617941379547119, "logits/rejected": -3.8885273933410645, "logps/chosen": -1.3228400945663452, "logps/rejected": -1.3833613395690918, "loss": 1.4071, "nll_loss": 1.2938026189804077, "rewards/accuracies": 0.5, "rewards/chosen": -0.0661420077085495, "rewards/margins": 0.0030260600615292788, "rewards/rejected": -0.06916806846857071, "step": 5100 }, { "epoch": 2.983070636310566, "grad_norm": 1.610863605129849, "learning_rate": 6.994546379906659e-07, "log_odds_chosen": 0.3954852521419525, "log_odds_ratio": -0.5645431280136108, "logits/chosen": -3.4934353828430176, "logits/rejected": -3.8555285930633545, "logps/chosen": -1.211638331413269, "logps/rejected": -1.5286996364593506, "loss": 1.4347, "nll_loss": 1.2039861679077148, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06058192253112793, "rewards/margins": 0.01585306040942669, "rewards/rejected": -0.07643498480319977, "step": 5110 }, { "epoch": 2.9889083479276124, "grad_norm": 1.3652092712307846, "learning_rate": 6.987712429686844e-07, "log_odds_chosen": 0.38353392481803894, "log_odds_ratio": -0.5664414167404175, "logits/chosen": -3.764897108078003, "logits/rejected": -3.4524929523468018, "logps/chosen": -1.3075298070907593, "logps/rejected": -1.578154444694519, "loss": 1.3465, "nll_loss": 1.384374737739563, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06537649035453796, "rewards/margins": 0.013531235046684742, "rewards/rejected": -0.07890772819519043, "step": 5120 }, { "epoch": 2.9947460595446582, "grad_norm": 1.121048327771027, "learning_rate": 6.98089847152826e-07, "log_odds_chosen": 0.49211424589157104, "log_odds_ratio": -0.5218831896781921, "logits/chosen": -3.618412494659424, "logits/rejected": -3.530489683151245, "logps/chosen": -1.2987886667251587, "logps/rejected": -1.6374279260635376, "loss": 1.3337, "nll_loss": 1.2134933471679688, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06493943184614182, "rewards/margins": 0.016931965947151184, "rewards/rejected": -0.0818714052438736, "step": 5130 }, { "epoch": 3.0, "step": 5139, "total_flos": 0.0, "train_loss": 1.4077877288524407, "train_runtime": 2781.2999, "train_samples_per_second": 7.388, "train_steps_per_second": 1.848 } ], "logging_steps": 10, "max_steps": 5139, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }