|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9456264775413712, |
|
"eval_steps": 500, |
|
"global_step": 52, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.037825059101654845, |
|
"grad_norm": 6.102660179138184, |
|
"kl": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 196634302.17142856, |
|
"logits/rejected": 99832523.03448276, |
|
"logps/chosen": -983.6032366071429, |
|
"logps/rejected": -592.0450565732758, |
|
"loss": 0.5, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07565011820330969, |
|
"grad_norm": 5.946473598480225, |
|
"kl": 0.0, |
|
"learning_rate": 6.666666666666666e-07, |
|
"logits/chosen": 174062182.4, |
|
"logits/rejected": 42135149.176470585, |
|
"logps/chosen": -1048.4553385416666, |
|
"logps/rejected": -543.3271484375, |
|
"loss": 0.5, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.11347517730496454, |
|
"grad_norm": 6.028696537017822, |
|
"kl": 1.4145872592926025, |
|
"learning_rate": 1.3333333333333332e-06, |
|
"logits/chosen": 149491836.12121212, |
|
"logits/rejected": 16149931.35483871, |
|
"logps/chosen": -1068.7826704545455, |
|
"logps/rejected": -462.9348538306452, |
|
"loss": 0.5007, |
|
"rewards/chosen": 0.07321595423149341, |
|
"rewards/margins": 0.1519100286161212, |
|
"rewards/rejected": -0.0786940743846278, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.15130023640661938, |
|
"grad_norm": 6.901468276977539, |
|
"kl": 1.32453191280365, |
|
"learning_rate": 2e-06, |
|
"logits/chosen": 169462896.0, |
|
"logits/rejected": 43728952.0, |
|
"logps/chosen": -1022.6226196289062, |
|
"logps/rejected": -445.5266418457031, |
|
"loss": 0.5168, |
|
"rewards/chosen": -0.036316584795713425, |
|
"rewards/margins": -0.012025408446788788, |
|
"rewards/rejected": -0.024291176348924637, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.18912529550827423, |
|
"grad_norm": 4.817409515380859, |
|
"kl": 0.5722990036010742, |
|
"learning_rate": 1.9979453927503364e-06, |
|
"logits/chosen": 149387924.6451613, |
|
"logits/rejected": 72442670.54545455, |
|
"logps/chosen": -950.062247983871, |
|
"logps/rejected": -557.7066169507576, |
|
"loss": 0.4998, |
|
"rewards/chosen": 0.02832491167130009, |
|
"rewards/margins": 0.0007265486908331897, |
|
"rewards/rejected": 0.0275983629804669, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.22695035460992907, |
|
"grad_norm": 3.4966697692871094, |
|
"kl": 0.5014443397521973, |
|
"learning_rate": 1.991790013823246e-06, |
|
"logits/chosen": 90473276.23529412, |
|
"logits/rejected": 14355141.333333334, |
|
"logps/chosen": -773.8732766544117, |
|
"logps/rejected": -412.08531901041664, |
|
"loss": 0.5008, |
|
"rewards/chosen": 0.015493747066049016, |
|
"rewards/margins": 0.07545245932597741, |
|
"rewards/rejected": -0.05995871225992839, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.2647754137115839, |
|
"grad_norm": 4.85699462890625, |
|
"kl": 0.9150105714797974, |
|
"learning_rate": 1.9815591569910653e-06, |
|
"logits/chosen": 177541861.5172414, |
|
"logits/rejected": 74630538.97142857, |
|
"logps/chosen": -1035.0591998922414, |
|
"logps/rejected": -533.7266741071429, |
|
"loss": 0.4996, |
|
"rewards/chosen": 0.05604078440830625, |
|
"rewards/margins": -0.02882181564575346, |
|
"rewards/rejected": 0.08486260005405971, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.30260047281323876, |
|
"grad_norm": 5.519815444946289, |
|
"kl": 1.0732237100601196, |
|
"learning_rate": 1.9672948630390295e-06, |
|
"logits/chosen": 143991003.42857143, |
|
"logits/rejected": 37585928.827586204, |
|
"logps/chosen": -981.2824776785715, |
|
"logps/rejected": -516.1851427801724, |
|
"loss": 0.4675, |
|
"rewards/chosen": 0.1541661262512207, |
|
"rewards/margins": 0.42498263326184504, |
|
"rewards/rejected": -0.2708165070106243, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 4.185125350952148, |
|
"kl": 1.2695928812026978, |
|
"learning_rate": 1.9490557470106686e-06, |
|
"logits/chosen": 144893661.86666667, |
|
"logits/rejected": 41582317.176470585, |
|
"logps/chosen": -883.7143880208333, |
|
"logps/rejected": -439.1708984375, |
|
"loss": 0.4954, |
|
"rewards/chosen": -0.04857488075892131, |
|
"rewards/margins": -0.037722761257022036, |
|
"rewards/rejected": -0.01085211950189927, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.37825059101654845, |
|
"grad_norm": 4.946338176727295, |
|
"kl": 2.036167860031128, |
|
"learning_rate": 1.9269167573460217e-06, |
|
"logits/chosen": 119253806.54545455, |
|
"logits/rejected": 4415281.548387097, |
|
"logps/chosen": -917.6138139204545, |
|
"logps/rejected": -416.56313004032256, |
|
"loss": 0.4539, |
|
"rewards/chosen": 0.2660622163252397, |
|
"rewards/margins": 0.25140741458736204, |
|
"rewards/rejected": 0.01465480173787763, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4160756501182033, |
|
"grad_norm": 4.3735785484313965, |
|
"kl": 2.626142978668213, |
|
"learning_rate": 1.9009688679024189e-06, |
|
"logits/chosen": 51137304.0, |
|
"logits/rejected": 9401010.0, |
|
"logps/chosen": -785.0097045898438, |
|
"logps/rejected": -515.2562866210938, |
|
"loss": 0.4788, |
|
"rewards/chosen": 0.15124297142028809, |
|
"rewards/margins": 0.3036640137434006, |
|
"rewards/rejected": -0.1524210423231125, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.45390070921985815, |
|
"grad_norm": 4.936854839324951, |
|
"kl": 2.3246853351593018, |
|
"learning_rate": 1.8713187041233893e-06, |
|
"logits/chosen": 181763699.61290324, |
|
"logits/rejected": 38560000.0, |
|
"logps/chosen": -970.0010080645161, |
|
"logps/rejected": -475.65030184659093, |
|
"loss": 0.4744, |
|
"rewards/chosen": 0.06626310656147619, |
|
"rewards/margins": 0.23985308752498086, |
|
"rewards/rejected": -0.17358998096350467, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.491725768321513, |
|
"grad_norm": 4.4254326820373535, |
|
"kl": 1.088865876197815, |
|
"learning_rate": 1.8380881048918404e-06, |
|
"logits/chosen": 136485074.82352942, |
|
"logits/rejected": 83436902.4, |
|
"logps/chosen": -846.1370634191177, |
|
"logps/rejected": -561.441796875, |
|
"loss": 0.4861, |
|
"rewards/chosen": 0.18190832699046416, |
|
"rewards/margins": 0.11596365535960479, |
|
"rewards/rejected": 0.06594467163085938, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.5295508274231678, |
|
"grad_norm": 3.7348413467407227, |
|
"kl": 2.592059850692749, |
|
"learning_rate": 1.8014136218679566e-06, |
|
"logits/chosen": 81516349.79310344, |
|
"logits/rejected": 33273589.028571427, |
|
"logps/chosen": -772.0715921336207, |
|
"logps/rejected": -523.1571428571428, |
|
"loss": 0.4858, |
|
"rewards/chosen": 0.25091521493319807, |
|
"rewards/margins": 0.031018516935151202, |
|
"rewards/rejected": 0.21989669799804687, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.5673758865248227, |
|
"grad_norm": 5.629943370819092, |
|
"kl": 2.344330072402954, |
|
"learning_rate": 1.7614459583691342e-06, |
|
"logits/chosen": 134785755.42857143, |
|
"logits/rejected": 69182587.5862069, |
|
"logps/chosen": -908.4704241071429, |
|
"logps/rejected": -505.3213900862069, |
|
"loss": 0.4707, |
|
"rewards/chosen": 0.317371450151716, |
|
"rewards/margins": 0.3183928394552522, |
|
"rewards/rejected": -0.0010213893035362508, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.6052009456264775, |
|
"grad_norm": 4.68190860748291, |
|
"kl": 1.7908133268356323, |
|
"learning_rate": 1.7183493500977275e-06, |
|
"logits/chosen": 170032571.73333332, |
|
"logits/rejected": 67021959.52941176, |
|
"logps/chosen": -948.2464192708334, |
|
"logps/rejected": -565.9269301470588, |
|
"loss": 0.4587, |
|
"rewards/chosen": 0.22927993138631184, |
|
"rewards/margins": 0.2928940473818311, |
|
"rewards/rejected": -0.0636141159955193, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.6430260047281324, |
|
"grad_norm": 5.001438617706299, |
|
"kl": 1.466516375541687, |
|
"learning_rate": 1.6723008902613168e-06, |
|
"logits/chosen": 149596454.78787878, |
|
"logits/rejected": 51657533.93548387, |
|
"logps/chosen": -934.278290719697, |
|
"logps/rejected": -586.933845766129, |
|
"loss": 0.4811, |
|
"rewards/chosen": 0.2862388148452296, |
|
"rewards/margins": 0.24233368356673013, |
|
"rewards/rejected": 0.04390513127849948, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 4.730621337890625, |
|
"kl": 2.506091594696045, |
|
"learning_rate": 1.6234898018587336e-06, |
|
"logits/chosen": 222986325.33333334, |
|
"logits/rejected": 85738066.8235294, |
|
"logps/chosen": -1094.235546875, |
|
"logps/rejected": -609.6760110294117, |
|
"loss": 0.5094, |
|
"rewards/chosen": 0.3892539342244466, |
|
"rewards/margins": 0.1954242855894799, |
|
"rewards/rejected": 0.19382964863496668, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.7186761229314421, |
|
"grad_norm": 4.429194450378418, |
|
"kl": 3.0494582653045654, |
|
"learning_rate": 1.5721166601221695e-06, |
|
"logits/chosen": 53398761.4117647, |
|
"logits/rejected": -3114230.933333333, |
|
"logps/chosen": -779.4967830882352, |
|
"logps/rejected": -421.95270182291665, |
|
"loss": 0.4481, |
|
"rewards/chosen": 0.40259605295517864, |
|
"rewards/margins": 0.38857168204644144, |
|
"rewards/rejected": 0.014024370908737182, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.7565011820330969, |
|
"grad_norm": 4.854764938354492, |
|
"kl": 3.306687593460083, |
|
"learning_rate": 1.5183925683105251e-06, |
|
"logits/chosen": 176390912.0, |
|
"logits/rejected": 69896576.0, |
|
"logps/chosen": -942.5886840820312, |
|
"logps/rejected": -547.885986328125, |
|
"loss": 0.4655, |
|
"rewards/chosen": 0.44881612062454224, |
|
"rewards/margins": 0.4531639628112316, |
|
"rewards/rejected": -0.004347842186689377, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7943262411347518, |
|
"grad_norm": 4.21329927444458, |
|
"kl": 2.6854774951934814, |
|
"learning_rate": 1.4625382902408354e-06, |
|
"logits/chosen": 135675238.4, |
|
"logits/rejected": 1414048.4705882352, |
|
"logps/chosen": -931.5176432291667, |
|
"logps/rejected": -442.47047334558823, |
|
"loss": 0.471, |
|
"rewards/chosen": 0.4190946896870931, |
|
"rewards/margins": 0.4137749505978005, |
|
"rewards/rejected": 0.005319739089292639, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.8321513002364066, |
|
"grad_norm": 3.600161552429199, |
|
"kl": 2.8906941413879395, |
|
"learning_rate": 1.4047833431223936e-06, |
|
"logits/chosen": 70284709.64705883, |
|
"logits/rejected": 20018178.133333333, |
|
"logps/chosen": -715.8906824448529, |
|
"logps/rejected": -460.94720052083335, |
|
"loss": 0.4582, |
|
"rewards/chosen": 0.3729521527009852, |
|
"rewards/margins": 0.4021145516750859, |
|
"rewards/rejected": -0.029162398974100747, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.8699763593380615, |
|
"grad_norm": 3.66640043258667, |
|
"kl": 2.6149392127990723, |
|
"learning_rate": 1.3453650544213076e-06, |
|
"logits/chosen": 140292640.0, |
|
"logits/rejected": 47265416.0, |
|
"logps/chosen": -893.296142578125, |
|
"logps/rejected": -523.0548095703125, |
|
"loss": 0.4231, |
|
"rewards/chosen": 0.6247925162315369, |
|
"rewards/margins": 0.7861275523900986, |
|
"rewards/rejected": -0.1613350361585617, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.9078014184397163, |
|
"grad_norm": 3.6788175106048584, |
|
"kl": 2.519148588180542, |
|
"learning_rate": 1.2845275866310324e-06, |
|
"logits/chosen": 145627733.33333334, |
|
"logits/rejected": 55117643.294117644, |
|
"logps/chosen": -871.0560546875, |
|
"logps/rejected": -437.0091911764706, |
|
"loss": 0.4735, |
|
"rewards/chosen": 0.5424024581909179, |
|
"rewards/margins": 0.49889986795537605, |
|
"rewards/rejected": 0.04350259023554185, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.9456264775413712, |
|
"grad_norm": 4.228794574737549, |
|
"kl": 3.256070137023926, |
|
"learning_rate": 1.2225209339563143e-06, |
|
"logits/chosen": 153259218.82352942, |
|
"logits/rejected": 17615726.933333334, |
|
"logps/chosen": -892.5439453125, |
|
"logps/rejected": -430.61676432291665, |
|
"loss": 0.4525, |
|
"rewards/chosen": 0.7282369838041418, |
|
"rewards/margins": 0.6316887285195145, |
|
"rewards/rejected": 0.09654825528462728, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.983451536643026, |
|
"grad_norm": 4.561258792877197, |
|
"kl": 3.3823060989379883, |
|
"learning_rate": 1.1595998950333793e-06, |
|
"logits/chosen": 148130304.0, |
|
"logits/rejected": 17554260.0, |
|
"logps/chosen": -1006.059814453125, |
|
"logps/rejected": -537.9644165039062, |
|
"loss": 0.4519, |
|
"rewards/chosen": 0.49162358045578003, |
|
"rewards/margins": 0.43815357238054276, |
|
"rewards/rejected": 0.053470008075237274, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.6852926015853882, |
|
"kl": 2.1286067962646484, |
|
"learning_rate": 1.0960230259076817e-06, |
|
"logits/chosen": 53759712.0, |
|
"logits/rejected": 33787948.0, |
|
"logps/chosen": -643.0557454427084, |
|
"logps/rejected": -457.4282531738281, |
|
"loss": 0.214, |
|
"rewards/chosen": 0.28392742077509564, |
|
"rewards/margins": 0.21173327664534253, |
|
"rewards/rejected": 0.07219414412975311, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.037825059101655, |
|
"grad_norm": 5.137193202972412, |
|
"kl": 4.823151588439941, |
|
"learning_rate": 1.0320515775716554e-06, |
|
"logits/chosen": 200775387.42857143, |
|
"logits/rejected": 100983984.55172414, |
|
"logps/chosen": -977.4030133928571, |
|
"logps/rejected": -592.4806707974138, |
|
"loss": 0.4626, |
|
"rewards/chosen": 0.6200266156877791, |
|
"rewards/margins": 0.6635866314319555, |
|
"rewards/rejected": -0.0435600157441764, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.0756501182033098, |
|
"grad_norm": 4.594226837158203, |
|
"kl": 3.3556861877441406, |
|
"learning_rate": 9.679484224283447e-07, |
|
"logits/chosen": 179323545.6, |
|
"logits/rejected": 45320342.5882353, |
|
"logps/chosen": -1038.7833333333333, |
|
"logps/rejected": -542.6672794117648, |
|
"loss": 0.4375, |
|
"rewards/chosen": 0.9672001520792644, |
|
"rewards/margins": 0.901208511988322, |
|
"rewards/rejected": 0.06599164009094238, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.1134751773049645, |
|
"grad_norm": 4.856236457824707, |
|
"kl": 4.625980377197266, |
|
"learning_rate": 9.039769740923182e-07, |
|
"logits/chosen": 153493752.24242425, |
|
"logits/rejected": 17529911.741935484, |
|
"logps/chosen": -1062.4770359848485, |
|
"logps/rejected": -463.51165574596774, |
|
"loss": 0.4398, |
|
"rewards/chosen": 0.7037940169825698, |
|
"rewards/margins": 0.8401704748121991, |
|
"rewards/rejected": -0.13637645782962923, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.1513002364066194, |
|
"grad_norm": 4.414266586303711, |
|
"kl": 4.7278618812561035, |
|
"learning_rate": 8.40400104966621e-07, |
|
"logits/chosen": 175260000.0, |
|
"logits/rejected": 45709312.0, |
|
"logps/chosen": -1013.2972412109375, |
|
"logps/rejected": -444.91375732421875, |
|
"loss": 0.4285, |
|
"rewards/chosen": 0.8962305188179016, |
|
"rewards/margins": 0.859231524169445, |
|
"rewards/rejected": 0.036998994648456573, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.1891252955082743, |
|
"grad_norm": 3.963383913040161, |
|
"kl": 4.444534778594971, |
|
"learning_rate": 7.774790660436857e-07, |
|
"logits/chosen": 153567876.12903225, |
|
"logits/rejected": 75968279.27272727, |
|
"logps/chosen": -942.1448462701613, |
|
"logps/rejected": -556.363340435606, |
|
"loss": 0.449, |
|
"rewards/chosen": 0.8200658367526147, |
|
"rewards/margins": 0.6581434252674628, |
|
"rewards/rejected": 0.16192241148515182, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.226950354609929, |
|
"grad_norm": 2.951836585998535, |
|
"kl": 3.6152563095092773, |
|
"learning_rate": 7.154724133689676e-07, |
|
"logits/chosen": 93397052.23529412, |
|
"logits/rejected": 14595782.4, |
|
"logps/chosen": -768.8926355698529, |
|
"logps/rejected": -413.5569661458333, |
|
"loss": 0.4498, |
|
"rewards/chosen": 0.5135440826416016, |
|
"rewards/margins": 0.7206665833791097, |
|
"rewards/rejected": -0.20712250073750813, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.2647754137115839, |
|
"grad_norm": 4.550557613372803, |
|
"kl": 3.669976234436035, |
|
"learning_rate": 6.546349455786925e-07, |
|
"logits/chosen": 183599315.86206895, |
|
"logits/rejected": 75403607.77142857, |
|
"logps/chosen": -1025.5631061422414, |
|
"logps/rejected": -534.7680803571428, |
|
"loss": 0.4358, |
|
"rewards/chosen": 1.0056335843842605, |
|
"rewards/margins": 1.024911846726986, |
|
"rewards/rejected": -0.01927826234272548, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.3026004728132388, |
|
"grad_norm": 4.653792381286621, |
|
"kl": 3.6588058471679688, |
|
"learning_rate": 5.952166568776062e-07, |
|
"logits/chosen": 148335016.22857141, |
|
"logits/rejected": 40633794.20689655, |
|
"logps/chosen": -974.4972098214286, |
|
"logps/rejected": -514.6373922413793, |
|
"loss": 0.4266, |
|
"rewards/chosen": 0.8326939174107143, |
|
"rewards/margins": 0.9487363669672624, |
|
"rewards/rejected": -0.11604244955654802, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.3404255319148937, |
|
"grad_norm": 3.622363328933716, |
|
"kl": 3.4761648178100586, |
|
"learning_rate": 5.37461709759165e-07, |
|
"logits/chosen": 148679406.93333334, |
|
"logits/rejected": 42793573.64705882, |
|
"logps/chosen": -877.1561197916667, |
|
"logps/rejected": -439.0417911305147, |
|
"loss": 0.4514, |
|
"rewards/chosen": 0.6072582880655925, |
|
"rewards/margins": 0.6052035388993282, |
|
"rewards/rejected": 0.0020547491662642535, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.3782505910165486, |
|
"grad_norm": 3.795612335205078, |
|
"kl": 4.738083362579346, |
|
"learning_rate": 4.816074316894749e-07, |
|
"logits/chosen": 122439517.0909091, |
|
"logits/rejected": 6666082.064516129, |
|
"logps/chosen": -911.3649976325758, |
|
"logps/rejected": -416.80128528225805, |
|
"loss": 0.3949, |
|
"rewards/chosen": 0.8909483244924834, |
|
"rewards/margins": 0.9001048640480023, |
|
"rewards/rejected": -0.009156539555518858, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.4160756501182032, |
|
"grad_norm": 3.8737242221832275, |
|
"kl": 4.693930625915527, |
|
"learning_rate": 4.278833398778305e-07, |
|
"logits/chosen": 53371148.0, |
|
"logits/rejected": 11690896.0, |
|
"logps/chosen": -780.3959350585938, |
|
"logps/rejected": -514.53857421875, |
|
"loss": 0.4622, |
|
"rewards/chosen": 0.6126159429550171, |
|
"rewards/margins": 0.6932726949453354, |
|
"rewards/rejected": -0.0806567519903183, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.4539007092198581, |
|
"grad_norm": 4.137379169464111, |
|
"kl": 3.699460983276367, |
|
"learning_rate": 3.765101981412665e-07, |
|
"logits/chosen": 185525049.80645162, |
|
"logits/rejected": 40507283.39393939, |
|
"logps/chosen": -962.7782888104839, |
|
"logps/rejected": -474.48996803977275, |
|
"loss": 0.4349, |
|
"rewards/chosen": 0.788528811547064, |
|
"rewards/margins": 0.8460882728516997, |
|
"rewards/rejected": -0.05755946130463571, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.491725768321513, |
|
"grad_norm": 3.6001534461975098, |
|
"kl": 3.141786575317383, |
|
"learning_rate": 3.276991097386831e-07, |
|
"logits/chosen": 139919826.82352942, |
|
"logits/rejected": 84876014.93333334, |
|
"logps/chosen": -839.6150045955883, |
|
"logps/rejected": -562.3081380208333, |
|
"loss": 0.4271, |
|
"rewards/chosen": 0.8341112697825712, |
|
"rewards/margins": 0.8548066302841785, |
|
"rewards/rejected": -0.02069536050160726, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.5295508274231677, |
|
"grad_norm": 3.451026439666748, |
|
"kl": 3.2268035411834717, |
|
"learning_rate": 2.816506499022725e-07, |
|
"logits/chosen": 84237550.34482759, |
|
"logits/rejected": 33436748.8, |
|
"logps/chosen": -769.4187769396551, |
|
"logps/rejected": -524.4465401785715, |
|
"loss": 0.4379, |
|
"rewards/chosen": 0.5161945079935009, |
|
"rewards/margins": 0.42524682547658543, |
|
"rewards/rejected": 0.09094768251691546, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.5673758865248226, |
|
"grad_norm": 4.36342191696167, |
|
"kl": 3.4994237422943115, |
|
"learning_rate": 2.3855404163086556e-07, |
|
"logits/chosen": 137653745.37142858, |
|
"logits/rejected": 69890074.48275863, |
|
"logps/chosen": -904.1540178571429, |
|
"logps/rejected": -506.37173356681035, |
|
"loss": 0.429, |
|
"rewards/chosen": 0.7490062168666295, |
|
"rewards/margins": 0.8550563135757823, |
|
"rewards/rejected": -0.10605009670915275, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.6052009456264775, |
|
"grad_norm": 4.330196380615234, |
|
"kl": 4.095585823059082, |
|
"learning_rate": 1.9858637813204349e-07, |
|
"logits/chosen": 171540189.86666667, |
|
"logits/rejected": 67830979.76470588, |
|
"logps/chosen": -944.7064453125, |
|
"logps/rejected": -567.6101792279412, |
|
"loss": 0.4334, |
|
"rewards/chosen": 0.5832799911499024, |
|
"rewards/margins": 0.815221789303948, |
|
"rewards/rejected": -0.23194179815404556, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.6430260047281324, |
|
"grad_norm": 4.233847141265869, |
|
"kl": 4.319591999053955, |
|
"learning_rate": 1.619118951081594e-07, |
|
"logits/chosen": 152153925.8181818, |
|
"logits/rejected": 52702876.90322581, |
|
"logps/chosen": -930.294862689394, |
|
"logps/rejected": -587.3856476814516, |
|
"loss": 0.4389, |
|
"rewards/chosen": 0.6845672491824988, |
|
"rewards/margins": 0.6858430385356541, |
|
"rewards/rejected": -0.0012757893531553208, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.6808510638297873, |
|
"grad_norm": 4.332212924957275, |
|
"kl": 4.509950637817383, |
|
"learning_rate": 1.286812958766106e-07, |
|
"logits/chosen": 224899908.26666668, |
|
"logits/rejected": 84017543.52941176, |
|
"logps/chosen": -1090.4815104166667, |
|
"logps/rejected": -611.0609489889706, |
|
"loss": 0.4422, |
|
"rewards/chosen": 0.7646568298339844, |
|
"rewards/margins": 0.7093157158178443, |
|
"rewards/rejected": 0.05534111401614021, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.7186761229314422, |
|
"grad_norm": 3.7254793643951416, |
|
"kl": 4.550510406494141, |
|
"learning_rate": 9.903113209758096e-08, |
|
"logits/chosen": 55315689.4117647, |
|
"logits/rejected": -2547915.7333333334, |
|
"logps/chosen": -776.220703125, |
|
"logps/rejected": -422.91708984375, |
|
"loss": 0.4112, |
|
"rewards/chosen": 0.7302110896391028, |
|
"rewards/margins": 0.8126279419543697, |
|
"rewards/rejected": -0.08241685231526692, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.756501182033097, |
|
"grad_norm": 4.257685661315918, |
|
"kl": 5.242433547973633, |
|
"learning_rate": 7.308324265397836e-08, |
|
"logits/chosen": 177834400.0, |
|
"logits/rejected": 68700808.0, |
|
"logps/chosen": -938.9208984375, |
|
"logps/rejected": -549.43896484375, |
|
"loss": 0.4154, |
|
"rewards/chosen": 0.8155966401100159, |
|
"rewards/margins": 0.9752314537763596, |
|
"rewards/rejected": -0.1596348136663437, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.7943262411347518, |
|
"grad_norm": 3.6647257804870605, |
|
"kl": 3.49682354927063, |
|
"learning_rate": 5.094425298933136e-08, |
|
"logits/chosen": 137424546.13333333, |
|
"logits/rejected": 2488748.2352941176, |
|
"logps/chosen": -927.8557291666667, |
|
"logps/rejected": -442.87818818933823, |
|
"loss": 0.4424, |
|
"rewards/chosen": 0.7852853775024414, |
|
"rewards/margins": 0.820735776424408, |
|
"rewards/rejected": -0.03545039892196655, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.8321513002364065, |
|
"grad_norm": 3.1744253635406494, |
|
"kl": 3.5714423656463623, |
|
"learning_rate": 3.270513696097055e-08, |
|
"logits/chosen": 72532555.29411764, |
|
"logits/rejected": 21655136.0, |
|
"logps/chosen": -712.5492302389706, |
|
"logps/rejected": -460.46861979166664, |
|
"loss": 0.4459, |
|
"rewards/chosen": 0.7071007560281193, |
|
"rewards/margins": 0.688404831348681, |
|
"rewards/rejected": 0.018695924679438272, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.8699763593380614, |
|
"grad_norm": 3.220852851867676, |
|
"kl": 3.0724895000457764, |
|
"learning_rate": 1.844084300893456e-08, |
|
"logits/chosen": 142174912.0, |
|
"logits/rejected": 48436404.0, |
|
"logps/chosen": -890.3675537109375, |
|
"logps/rejected": -522.8453369140625, |
|
"loss": 0.3967, |
|
"rewards/chosen": 0.9176401495933533, |
|
"rewards/margins": 1.058032900094986, |
|
"rewards/rejected": -0.1403927505016327, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.9078014184397163, |
|
"grad_norm": 3.456094980239868, |
|
"kl": 3.655463933944702, |
|
"learning_rate": 8.209986176753948e-09, |
|
"logits/chosen": 148127197.86666667, |
|
"logits/rejected": 54668976.941176474, |
|
"logps/chosen": -868.8671223958333, |
|
"logps/rejected": -438.52404067095586, |
|
"loss": 0.4497, |
|
"rewards/chosen": 0.7612937927246094, |
|
"rewards/margins": 0.8692790157654706, |
|
"rewards/rejected": -0.10798522304086124, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.9456264775413712, |
|
"grad_norm": 4.0304388999938965, |
|
"kl": 4.205752849578857, |
|
"learning_rate": 2.054607249663665e-09, |
|
"logits/chosen": 153849931.29411766, |
|
"logits/rejected": 19226353.066666666, |
|
"logps/chosen": -891.9115349264706, |
|
"logps/rejected": -430.0306640625, |
|
"loss": 0.4745, |
|
"rewards/chosen": 0.7914738374597886, |
|
"rewards/margins": 0.6363162396000881, |
|
"rewards/rejected": 0.1551575978597005, |
|
"step": 52 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 52, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|