|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.06764374295377677, |
|
"eval_steps": 300, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 129.59375, |
|
"epoch": 0.00045095828635851183, |
|
"grad_norm": 8.434212673465728, |
|
"kl": 0.0, |
|
"learning_rate": 9.997744700045105e-07, |
|
"log_metrics/accuracy": 0.007260729558765888, |
|
"log_metrics/iou_log": 0.0078125, |
|
"loss": 0.0, |
|
"max_completion_length": 444.5, |
|
"min_completion_length": 45.0, |
|
"reward": 0.29296875, |
|
"reward_std": 0.42108407616615295, |
|
"rewards/format_reward": 0.28515625, |
|
"rewards/iou_reward": 0.0078125, |
|
"rewards/log_reward": 0.0, |
|
"step": 1, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 142.15234375, |
|
"epoch": 0.0009019165727170237, |
|
"grad_norm": 4.356998223352768, |
|
"kl": 0.0017852783203125, |
|
"learning_rate": 9.995489400090211e-07, |
|
"log_metrics/accuracy": 0.02364518865942955, |
|
"log_metrics/iou_log": 0.0234375, |
|
"loss": 0.0001, |
|
"max_completion_length": 512.0, |
|
"min_completion_length": 38.5, |
|
"reward": 0.39453125, |
|
"reward_std": 0.4661460518836975, |
|
"rewards/format_reward": 0.37109375, |
|
"rewards/iou_reward": 0.0234375, |
|
"rewards/log_reward": 0.0, |
|
"step": 2, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 140.8671875, |
|
"epoch": 0.0013528748590755355, |
|
"grad_norm": 2.5874222658859782, |
|
"kl": 0.00408172607421875, |
|
"learning_rate": 9.993234100135317e-07, |
|
"log_metrics/accuracy": 0.04108293540775776, |
|
"log_metrics/iou_log": 0.046875, |
|
"loss": 0.0002, |
|
"max_completion_length": 512.0, |
|
"min_completion_length": 46.5, |
|
"reward": 0.7578125, |
|
"reward_std": 0.38541457056999207, |
|
"rewards/format_reward": 0.7109375, |
|
"rewards/iou_reward": 0.046875, |
|
"rewards/log_reward": 0.0, |
|
"step": 3, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 138.29296875, |
|
"epoch": 0.0018038331454340473, |
|
"grad_norm": 2.444955671283294, |
|
"kl": 0.006561279296875, |
|
"learning_rate": 9.990978800180425e-07, |
|
"log_metrics/accuracy": 0.019979181233793497, |
|
"log_metrics/iou_log": 0.0234375, |
|
"loss": 0.0003, |
|
"max_completion_length": 373.0, |
|
"min_completion_length": 53.0, |
|
"reward": 0.7578125, |
|
"reward_std": 0.3646235316991806, |
|
"rewards/format_reward": 0.734375, |
|
"rewards/iou_reward": 0.0234375, |
|
"rewards/log_reward": 0.0, |
|
"step": 4, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 139.26953125, |
|
"epoch": 0.002254791431792559, |
|
"grad_norm": 2.4127784594267783, |
|
"kl": 0.009033203125, |
|
"learning_rate": 9.98872350022553e-07, |
|
"log_metrics/accuracy": 0.019243303686380386, |
|
"log_metrics/iou_log": 0.0234375, |
|
"loss": 0.0004, |
|
"max_completion_length": 376.0, |
|
"min_completion_length": 50.5, |
|
"reward": 0.921875, |
|
"reward_std": 0.20379295945167542, |
|
"rewards/format_reward": 0.8984375, |
|
"rewards/iou_reward": 0.0234375, |
|
"rewards/log_reward": 0.0, |
|
"step": 5, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 141.515625, |
|
"epoch": 0.002705749718151071, |
|
"grad_norm": 3.4995669528512625, |
|
"kl": 0.0137939453125, |
|
"learning_rate": 9.986468200270636e-07, |
|
"log_metrics/accuracy": 0.011071678251028061, |
|
"log_metrics/iou_log": 0.01171875, |
|
"loss": 0.0006, |
|
"max_completion_length": 278.5, |
|
"min_completion_length": 76.5, |
|
"reward": 0.92578125, |
|
"reward_std": 0.18992366641759872, |
|
"rewards/format_reward": 0.9140625, |
|
"rewards/iou_reward": 0.01171875, |
|
"rewards/log_reward": 0.0, |
|
"step": 6, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 143.9609375, |
|
"epoch": 0.003156708004509583, |
|
"grad_norm": 1.3423944744269296, |
|
"kl": 0.015625, |
|
"learning_rate": 9.984212900315742e-07, |
|
"log_metrics/accuracy": 0.00020430245785973966, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 296.0, |
|
"min_completion_length": 84.5, |
|
"reward": 0.9296875, |
|
"reward_std": 0.1642879694700241, |
|
"rewards/format_reward": 0.9296875, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 7, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 136.3125, |
|
"epoch": 0.0036076662908680946, |
|
"grad_norm": 1.0911367434212607, |
|
"kl": 0.011993408203125, |
|
"learning_rate": 9.981957600360848e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 279.0, |
|
"min_completion_length": 84.5, |
|
"reward": 0.96484375, |
|
"reward_std": 0.12082062661647797, |
|
"rewards/format_reward": 0.96484375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 8, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 137.1953125, |
|
"epoch": 0.004058624577226606, |
|
"grad_norm": 1.0840026316503646, |
|
"kl": 0.014739990234375, |
|
"learning_rate": 9.979702300405953e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 262.0, |
|
"min_completion_length": 88.5, |
|
"reward": 0.93359375, |
|
"reward_std": 0.13721734285354614, |
|
"rewards/format_reward": 0.93359375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 9, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 141.21484375, |
|
"epoch": 0.004509582863585118, |
|
"grad_norm": 0.5693248065798014, |
|
"kl": 0.016387939453125, |
|
"learning_rate": 9.97744700045106e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0007, |
|
"max_completion_length": 290.5, |
|
"min_completion_length": 92.5, |
|
"reward": 0.98828125, |
|
"reward_std": 0.03697281330823898, |
|
"rewards/format_reward": 0.98828125, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 10, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 139.66015625, |
|
"epoch": 0.00496054114994363, |
|
"grad_norm": 0.7385235986125652, |
|
"kl": 0.0325927734375, |
|
"learning_rate": 9.975191700496165e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0013, |
|
"max_completion_length": 236.0, |
|
"min_completion_length": 94.5, |
|
"reward": 0.98828125, |
|
"reward_std": 0.03697281330823898, |
|
"rewards/format_reward": 0.98828125, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 11, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 142.58203125, |
|
"epoch": 0.005411499436302142, |
|
"grad_norm": 0.2699152956849861, |
|
"kl": 0.0230712890625, |
|
"learning_rate": 9.972936400541273e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0009, |
|
"max_completion_length": 243.5, |
|
"min_completion_length": 95.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 12, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 137.46484375, |
|
"epoch": 0.005862457722660654, |
|
"grad_norm": 4.498549971158498, |
|
"kl": 0.0223388671875, |
|
"learning_rate": 9.970681100586379e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0009, |
|
"max_completion_length": 224.5, |
|
"min_completion_length": 90.0, |
|
"reward": 0.98828125, |
|
"reward_std": 0.03697281330823898, |
|
"rewards/format_reward": 0.98828125, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 13, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 137.58203125, |
|
"epoch": 0.006313416009019166, |
|
"grad_norm": 1.316359142547647, |
|
"kl": 0.020263671875, |
|
"learning_rate": 9.968425800631484e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0008, |
|
"max_completion_length": 203.5, |
|
"min_completion_length": 100.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 14, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 146.5625, |
|
"epoch": 0.006764374295377677, |
|
"grad_norm": 0.5411629015808064, |
|
"kl": 0.01934814453125, |
|
"learning_rate": 9.96617050067659e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0008, |
|
"max_completion_length": 244.0, |
|
"min_completion_length": 99.0, |
|
"reward": 0.9921875, |
|
"reward_std": 0.03125, |
|
"rewards/format_reward": 0.9921875, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 15, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 145.5625, |
|
"epoch": 0.007215332581736189, |
|
"grad_norm": 0.5837134769756639, |
|
"kl": 0.01678466796875, |
|
"learning_rate": 9.963915200721696e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0007, |
|
"max_completion_length": 265.0, |
|
"min_completion_length": 89.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 16, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.44921875, |
|
"epoch": 0.007666290868094701, |
|
"grad_norm": 0.5126143243632768, |
|
"kl": 0.02001953125, |
|
"learning_rate": 9.961659900766802e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0008, |
|
"max_completion_length": 247.5, |
|
"min_completion_length": 99.0, |
|
"reward": 0.9921875, |
|
"reward_std": 0.03125, |
|
"rewards/format_reward": 0.9921875, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 17, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 139.48046875, |
|
"epoch": 0.008117249154453212, |
|
"grad_norm": 0.8984743564170407, |
|
"kl": 0.015838623046875, |
|
"learning_rate": 9.959404600811907e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 233.0, |
|
"min_completion_length": 84.0, |
|
"reward": 0.98828125, |
|
"reward_std": 0.046875, |
|
"rewards/format_reward": 0.98828125, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 18, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 138.43359375, |
|
"epoch": 0.008568207440811725, |
|
"grad_norm": 0.42188832602144527, |
|
"kl": 0.01397705078125, |
|
"learning_rate": 9.957149300857013e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 231.5, |
|
"min_completion_length": 95.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 19, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.62890625, |
|
"epoch": 0.009019165727170236, |
|
"grad_norm": 0.32128454476676716, |
|
"kl": 0.01458740234375, |
|
"learning_rate": 9.954894000902119e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 216.5, |
|
"min_completion_length": 88.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 20, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 126.828125, |
|
"epoch": 0.00947012401352875, |
|
"grad_norm": 0.48546223545241646, |
|
"kl": 0.0172119140625, |
|
"learning_rate": 9.952638700947225e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0007, |
|
"max_completion_length": 194.5, |
|
"min_completion_length": 87.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 21, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 129.39453125, |
|
"epoch": 0.00992108229988726, |
|
"grad_norm": 0.4258953137179797, |
|
"kl": 0.015869140625, |
|
"learning_rate": 9.950383400992333e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 247.5, |
|
"min_completion_length": 85.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 22, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 129.10546875, |
|
"epoch": 0.010372040586245771, |
|
"grad_norm": 0.2833217695971168, |
|
"kl": 0.013458251953125, |
|
"learning_rate": 9.948128101037438e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 192.5, |
|
"min_completion_length": 86.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 23, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.796875, |
|
"epoch": 0.010822998872604284, |
|
"grad_norm": 0.48148300062802873, |
|
"kl": 0.0255126953125, |
|
"learning_rate": 9.945872801082544e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.001, |
|
"max_completion_length": 259.5, |
|
"min_completion_length": 89.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 24, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 123.671875, |
|
"epoch": 0.011273957158962795, |
|
"grad_norm": 0.15786664076306023, |
|
"kl": 0.015167236328125, |
|
"learning_rate": 9.94361750112765e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 207.5, |
|
"min_completion_length": 83.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 25, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 122.7109375, |
|
"epoch": 0.011724915445321308, |
|
"grad_norm": 0.13036537143577448, |
|
"kl": 0.0208740234375, |
|
"learning_rate": 9.941362201172756e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0008, |
|
"max_completion_length": 187.5, |
|
"min_completion_length": 83.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 26, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 126.6171875, |
|
"epoch": 0.01217587373167982, |
|
"grad_norm": 1.025009042819294, |
|
"kl": 0.01922607421875, |
|
"learning_rate": 9.939106901217861e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0008, |
|
"max_completion_length": 222.5, |
|
"min_completion_length": 87.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 27, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 126.890625, |
|
"epoch": 0.012626832018038332, |
|
"grad_norm": 0.16079440644093526, |
|
"kl": 0.0172119140625, |
|
"learning_rate": 9.936851601262967e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0007, |
|
"max_completion_length": 204.0, |
|
"min_completion_length": 84.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 28, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 121.3671875, |
|
"epoch": 0.013077790304396843, |
|
"grad_norm": 0.19002133711259844, |
|
"kl": 0.02008056640625, |
|
"learning_rate": 9.934596301308073e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0008, |
|
"max_completion_length": 192.5, |
|
"min_completion_length": 85.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 29, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 120.9296875, |
|
"epoch": 0.013528748590755355, |
|
"grad_norm": 0.3356237867953282, |
|
"kl": 0.02044677734375, |
|
"learning_rate": 9.932341001353179e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0008, |
|
"max_completion_length": 201.0, |
|
"min_completion_length": 80.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 30, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 121.890625, |
|
"epoch": 0.013979706877113867, |
|
"grad_norm": 0.0972578202221844, |
|
"kl": 0.0167236328125, |
|
"learning_rate": 9.930085701398284e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0007, |
|
"max_completion_length": 225.0, |
|
"min_completion_length": 88.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 31, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 125.453125, |
|
"epoch": 0.014430665163472379, |
|
"grad_norm": 0.16574400218238333, |
|
"kl": 0.01788330078125, |
|
"learning_rate": 9.92783040144339e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0007, |
|
"max_completion_length": 237.5, |
|
"min_completion_length": 84.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 32, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 121.51953125, |
|
"epoch": 0.014881623449830891, |
|
"grad_norm": 0.11795457231634025, |
|
"kl": 0.02032470703125, |
|
"learning_rate": 9.925575101488498e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0008, |
|
"max_completion_length": 235.0, |
|
"min_completion_length": 86.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 33, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 124.75390625, |
|
"epoch": 0.015332581736189402, |
|
"grad_norm": 0.126113812729237, |
|
"kl": 0.01666259765625, |
|
"learning_rate": 9.923319801533604e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0007, |
|
"max_completion_length": 183.0, |
|
"min_completion_length": 87.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 34, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 124.16015625, |
|
"epoch": 0.015783540022547914, |
|
"grad_norm": 0.07662853380658595, |
|
"kl": 0.01458740234375, |
|
"learning_rate": 9.92106450157871e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 180.0, |
|
"min_completion_length": 87.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 35, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 128.6484375, |
|
"epoch": 0.016234498308906425, |
|
"grad_norm": 0.4478082644592694, |
|
"kl": 0.014801025390625, |
|
"learning_rate": 9.918809201623815e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 208.0, |
|
"min_completion_length": 84.5, |
|
"reward": 0.9921875, |
|
"reward_std": 0.03125, |
|
"rewards/format_reward": 0.9921875, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 36, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.171875, |
|
"epoch": 0.01668545659526494, |
|
"grad_norm": 0.5289725828976242, |
|
"kl": 0.01373291015625, |
|
"learning_rate": 9.91655390166892e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 223.0, |
|
"min_completion_length": 85.5, |
|
"reward": 0.9921875, |
|
"reward_std": 0.03125, |
|
"rewards/format_reward": 0.9921875, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 37, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.4609375, |
|
"epoch": 0.01713641488162345, |
|
"grad_norm": 0.05806697595641871, |
|
"kl": 0.013671875, |
|
"learning_rate": 9.914298601714027e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 257.0, |
|
"min_completion_length": 89.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 38, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.6796875, |
|
"epoch": 0.01758737316798196, |
|
"grad_norm": 0.07390387951612225, |
|
"kl": 0.015869140625, |
|
"learning_rate": 9.912043301759133e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 241.0, |
|
"min_completion_length": 94.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 39, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.99609375, |
|
"epoch": 0.018038331454340473, |
|
"grad_norm": 0.5552696074810277, |
|
"kl": 0.01605224609375, |
|
"learning_rate": 9.909788001804238e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 380.0, |
|
"min_completion_length": 91.5, |
|
"reward": 0.98828125, |
|
"reward_std": 0.03697281330823898, |
|
"rewards/format_reward": 0.98828125, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 40, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.671875, |
|
"epoch": 0.018489289740698984, |
|
"grad_norm": 0.09608326888981661, |
|
"kl": 0.0177001953125, |
|
"learning_rate": 9.907532701849346e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0007, |
|
"max_completion_length": 262.0, |
|
"min_completion_length": 90.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 41, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 128.27734375, |
|
"epoch": 0.0189402480270575, |
|
"grad_norm": 0.22125892499138428, |
|
"kl": 0.013702392578125, |
|
"learning_rate": 9.905277401894452e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 251.0, |
|
"min_completion_length": 88.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 42, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.359375, |
|
"epoch": 0.01939120631341601, |
|
"grad_norm": 0.08775045275198128, |
|
"kl": 0.015533447265625, |
|
"learning_rate": 9.903022101939558e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 291.5, |
|
"min_completion_length": 83.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 43, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.53125, |
|
"epoch": 0.01984216459977452, |
|
"grad_norm": 0.12136918886632062, |
|
"kl": 0.015167236328125, |
|
"learning_rate": 9.900766801984663e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 288.5, |
|
"min_completion_length": 88.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 44, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 128.52734375, |
|
"epoch": 0.020293122886133032, |
|
"grad_norm": 0.09774392937230035, |
|
"kl": 0.010894775390625, |
|
"learning_rate": 9.89851150202977e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 222.0, |
|
"min_completion_length": 86.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 45, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.52734375, |
|
"epoch": 0.020744081172491543, |
|
"grad_norm": 0.07216886654321743, |
|
"kl": 0.012908935546875, |
|
"learning_rate": 9.896256202074875e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 246.0, |
|
"min_completion_length": 89.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 46, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.6796875, |
|
"epoch": 0.021195039458850057, |
|
"grad_norm": 0.3652700320877076, |
|
"kl": 0.011932373046875, |
|
"learning_rate": 9.89400090211998e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 250.0, |
|
"min_completion_length": 91.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 47, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.94140625, |
|
"epoch": 0.02164599774520857, |
|
"grad_norm": 0.0712237478501456, |
|
"kl": 0.0118408203125, |
|
"learning_rate": 9.891745602165089e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 262.0, |
|
"min_completion_length": 89.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 48, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.27734375, |
|
"epoch": 0.02209695603156708, |
|
"grad_norm": 0.07461422124475418, |
|
"kl": 0.010101318359375, |
|
"learning_rate": 9.889490302210194e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 216.0, |
|
"min_completion_length": 89.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 49, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.33984375, |
|
"epoch": 0.02254791431792559, |
|
"grad_norm": 2.3229008992621183, |
|
"kl": 0.011199951171875, |
|
"learning_rate": 9.8872350022553e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 225.0, |
|
"min_completion_length": 89.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 50, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 129.7734375, |
|
"epoch": 0.022998872604284102, |
|
"grad_norm": 0.0875125604597023, |
|
"kl": 0.015960693359375, |
|
"learning_rate": 9.884979702300406e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 210.5, |
|
"min_completion_length": 90.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 51, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 129.12890625, |
|
"epoch": 0.023449830890642617, |
|
"grad_norm": 0.16094378550497987, |
|
"kl": 0.01641845703125, |
|
"learning_rate": 9.882724402345512e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0007, |
|
"max_completion_length": 203.5, |
|
"min_completion_length": 86.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 52, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.06640625, |
|
"epoch": 0.023900789177001128, |
|
"grad_norm": 0.07783652453734344, |
|
"kl": 0.013031005859375, |
|
"learning_rate": 9.880469102390617e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 195.5, |
|
"min_completion_length": 80.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 53, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 129.9453125, |
|
"epoch": 0.02435174746335964, |
|
"grad_norm": 0.6621735516896475, |
|
"kl": 0.013824462890625, |
|
"learning_rate": 9.878213802435723e-07, |
|
"log_metrics/accuracy": 0.0036423311103135347, |
|
"log_metrics/iou_log": 0.00390625, |
|
"loss": 0.0006, |
|
"max_completion_length": 208.0, |
|
"min_completion_length": 92.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.9921875, |
|
"rewards/iou_reward": 0.00390625, |
|
"rewards/log_reward": 0.0, |
|
"step": 54, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.72265625, |
|
"epoch": 0.02480270574971815, |
|
"grad_norm": 0.056449991094393824, |
|
"kl": 0.00994873046875, |
|
"learning_rate": 9.875958502480829e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 328.0, |
|
"min_completion_length": 89.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 55, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.05078125, |
|
"epoch": 0.025253664036076665, |
|
"grad_norm": 0.6059033859841335, |
|
"kl": 0.00775146484375, |
|
"learning_rate": 9.873703202525937e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0003, |
|
"max_completion_length": 228.5, |
|
"min_completion_length": 89.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 56, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.85546875, |
|
"epoch": 0.025704622322435176, |
|
"grad_norm": 0.037734113426563874, |
|
"kl": 0.009033203125, |
|
"learning_rate": 9.871447902571042e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 240.5, |
|
"min_completion_length": 92.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 57, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.78125, |
|
"epoch": 0.026155580608793687, |
|
"grad_norm": 0.09813082705622107, |
|
"kl": 0.010498046875, |
|
"learning_rate": 9.869192602616148e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 225.0, |
|
"min_completion_length": 92.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 58, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 129.6796875, |
|
"epoch": 0.026606538895152198, |
|
"grad_norm": 0.272077654342032, |
|
"kl": 0.011505126953125, |
|
"learning_rate": 9.866937302661254e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 218.5, |
|
"min_completion_length": 84.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 59, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.65234375, |
|
"epoch": 0.02705749718151071, |
|
"grad_norm": 0.0822946603987711, |
|
"kl": 0.01092529296875, |
|
"learning_rate": 9.86468200270636e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 209.0, |
|
"min_completion_length": 84.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 60, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.19140625, |
|
"epoch": 0.027508455467869224, |
|
"grad_norm": 0.055871650503174664, |
|
"kl": 0.01324462890625, |
|
"learning_rate": 9.862426702751465e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 206.5, |
|
"min_completion_length": 86.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 61, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 136.4375, |
|
"epoch": 0.027959413754227735, |
|
"grad_norm": 0.3732397114410677, |
|
"kl": 0.010528564453125, |
|
"learning_rate": 9.860171402796571e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 250.5, |
|
"min_completion_length": 86.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 62, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.015625, |
|
"epoch": 0.028410372040586246, |
|
"grad_norm": 1.215038113924891, |
|
"kl": 0.0142822265625, |
|
"learning_rate": 9.857916102841677e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 285.5, |
|
"min_completion_length": 82.0, |
|
"reward": 0.9921875, |
|
"reward_std": 0.03125, |
|
"rewards/format_reward": 0.9921875, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 63, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.875, |
|
"epoch": 0.028861330326944757, |
|
"grad_norm": 0.3792773946237974, |
|
"kl": 0.011932373046875, |
|
"learning_rate": 9.855660802886783e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 221.5, |
|
"min_completion_length": 88.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 64, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 129.75390625, |
|
"epoch": 0.029312288613303268, |
|
"grad_norm": 0.06882533947287736, |
|
"kl": 0.010345458984375, |
|
"learning_rate": 9.85340550293189e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 197.0, |
|
"min_completion_length": 90.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 65, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.9609375, |
|
"epoch": 0.029763246899661783, |
|
"grad_norm": 0.7469897504399653, |
|
"kl": 0.0126953125, |
|
"learning_rate": 9.851150202976996e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 243.0, |
|
"min_completion_length": 87.5, |
|
"reward": 0.98828125, |
|
"reward_std": 0.046875, |
|
"rewards/format_reward": 0.98828125, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 66, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.66015625, |
|
"epoch": 0.030214205186020294, |
|
"grad_norm": 0.47470539847306675, |
|
"kl": 0.01165771484375, |
|
"learning_rate": 9.848894903022102e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 269.0, |
|
"min_completion_length": 91.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 67, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.70703125, |
|
"epoch": 0.030665163472378805, |
|
"grad_norm": 0.38448758695832475, |
|
"kl": 0.016815185546875, |
|
"learning_rate": 9.846639603067208e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0007, |
|
"max_completion_length": 259.5, |
|
"min_completion_length": 91.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 68, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 138.6484375, |
|
"epoch": 0.031116121758737316, |
|
"grad_norm": 0.15390242756935207, |
|
"kl": 0.01177978515625, |
|
"learning_rate": 9.844384303112314e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 258.0, |
|
"min_completion_length": 88.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 69, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.78515625, |
|
"epoch": 0.03156708004509583, |
|
"grad_norm": 0.6291008512876635, |
|
"kl": 0.014617919921875, |
|
"learning_rate": 9.84212900315742e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 232.5, |
|
"min_completion_length": 91.5, |
|
"reward": 0.9921875, |
|
"reward_std": 0.03125, |
|
"rewards/format_reward": 0.9921875, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 70, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.33984375, |
|
"epoch": 0.03201803833145434, |
|
"grad_norm": 0.05684071522205286, |
|
"kl": 0.010711669921875, |
|
"learning_rate": 9.839873703202525e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 269.0, |
|
"min_completion_length": 90.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 71, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 127.94140625, |
|
"epoch": 0.03246899661781285, |
|
"grad_norm": 0.05157290002711523, |
|
"kl": 0.009552001953125, |
|
"learning_rate": 9.83761840324763e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 234.0, |
|
"min_completion_length": 88.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 72, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.69140625, |
|
"epoch": 0.032919954904171364, |
|
"grad_norm": 0.36195683796616246, |
|
"kl": 0.0101318359375, |
|
"learning_rate": 9.835363103292737e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 274.5, |
|
"min_completion_length": 90.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 73, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.31640625, |
|
"epoch": 0.03337091319052988, |
|
"grad_norm": 0.05629834594638958, |
|
"kl": 0.014434814453125, |
|
"learning_rate": 9.833107803337842e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 249.5, |
|
"min_completion_length": 88.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 74, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.1171875, |
|
"epoch": 0.033821871476888386, |
|
"grad_norm": 0.06013498747067093, |
|
"kl": 0.0140380859375, |
|
"learning_rate": 9.830852503382948e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 264.0, |
|
"min_completion_length": 87.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 75, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 129.15234375, |
|
"epoch": 0.0342728297632469, |
|
"grad_norm": 0.06570934118415175, |
|
"kl": 0.009246826171875, |
|
"learning_rate": 9.828597203428056e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 223.0, |
|
"min_completion_length": 86.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 76, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.3125, |
|
"epoch": 0.03472378804960541, |
|
"grad_norm": 0.05957393968454419, |
|
"kl": 0.0108642578125, |
|
"learning_rate": 9.826341903473162e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 287.5, |
|
"min_completion_length": 96.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 77, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.0, |
|
"epoch": 0.03517474633596392, |
|
"grad_norm": 0.05504430029751346, |
|
"kl": 0.008758544921875, |
|
"learning_rate": 9.824086603518268e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 221.0, |
|
"min_completion_length": 92.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 78, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.1875, |
|
"epoch": 0.03562570462232244, |
|
"grad_norm": 0.06155937354569755, |
|
"kl": 0.011016845703125, |
|
"learning_rate": 9.821831303563373e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 217.0, |
|
"min_completion_length": 88.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 79, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.0703125, |
|
"epoch": 0.036076662908680945, |
|
"grad_norm": 0.09185105404429818, |
|
"kl": 0.00909423828125, |
|
"learning_rate": 9.81957600360848e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 218.0, |
|
"min_completion_length": 87.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 80, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.06640625, |
|
"epoch": 0.03652762119503946, |
|
"grad_norm": 0.04961982123085122, |
|
"kl": 0.011138916015625, |
|
"learning_rate": 9.817320703653585e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 231.0, |
|
"min_completion_length": 84.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 81, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.515625, |
|
"epoch": 0.03697857948139797, |
|
"grad_norm": 0.0410521754154648, |
|
"kl": 0.01123046875, |
|
"learning_rate": 9.81506540369869e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 252.5, |
|
"min_completion_length": 90.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 82, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.09765625, |
|
"epoch": 0.03742953776775648, |
|
"grad_norm": 0.029161244357827847, |
|
"kl": 0.0074005126953125, |
|
"learning_rate": 9.812810103743796e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0003, |
|
"max_completion_length": 219.5, |
|
"min_completion_length": 86.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 83, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.95703125, |
|
"epoch": 0.037880496054115, |
|
"grad_norm": 0.0550794318873668, |
|
"kl": 0.011444091796875, |
|
"learning_rate": 9.810554803788902e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 253.0, |
|
"min_completion_length": 93.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 84, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.12890625, |
|
"epoch": 0.038331454340473504, |
|
"grad_norm": 0.3794360227093856, |
|
"kl": 0.0089111328125, |
|
"learning_rate": 9.80829950383401e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 235.0, |
|
"min_completion_length": 85.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 85, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.875, |
|
"epoch": 0.03878241262683202, |
|
"grad_norm": 0.05260711761694229, |
|
"kl": 0.008270263671875, |
|
"learning_rate": 9.806044203879116e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0003, |
|
"max_completion_length": 236.5, |
|
"min_completion_length": 85.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 86, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 136.50390625, |
|
"epoch": 0.03923337091319053, |
|
"grad_norm": 0.03637909476903296, |
|
"kl": 0.0069580078125, |
|
"learning_rate": 9.803788903924222e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0003, |
|
"max_completion_length": 215.0, |
|
"min_completion_length": 91.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 87, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.37109375, |
|
"epoch": 0.03968432919954904, |
|
"grad_norm": 0.031058014010275966, |
|
"kl": 0.013153076171875, |
|
"learning_rate": 9.801533603969327e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 226.5, |
|
"min_completion_length": 90.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 88, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 141.54296875, |
|
"epoch": 0.040135287485907556, |
|
"grad_norm": 0.04156967471425809, |
|
"kl": 0.01055908203125, |
|
"learning_rate": 9.799278304014433e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 287.5, |
|
"min_completion_length": 92.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 89, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.45703125, |
|
"epoch": 0.040586245772266064, |
|
"grad_norm": 0.04432572627688822, |
|
"kl": 0.0111083984375, |
|
"learning_rate": 9.797023004059539e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 210.0, |
|
"min_completion_length": 95.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 90, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.28125, |
|
"epoch": 0.04103720405862458, |
|
"grad_norm": 0.7407667374975673, |
|
"kl": 0.022186279296875, |
|
"learning_rate": 9.794767704104645e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0009, |
|
"max_completion_length": 218.0, |
|
"min_completion_length": 93.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 91, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 137.15625, |
|
"epoch": 0.041488162344983086, |
|
"grad_norm": 0.41097995708484697, |
|
"kl": 0.011474609375, |
|
"learning_rate": 9.792512404149752e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 248.0, |
|
"min_completion_length": 85.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 92, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 137.140625, |
|
"epoch": 0.0419391206313416, |
|
"grad_norm": 0.04758776396448601, |
|
"kl": 0.009674072265625, |
|
"learning_rate": 9.790257104194858e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 262.0, |
|
"min_completion_length": 85.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 93, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 141.03125, |
|
"epoch": 0.042390078917700115, |
|
"grad_norm": 0.2485793639149779, |
|
"kl": 0.0203857421875, |
|
"learning_rate": 9.788001804239964e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0008, |
|
"max_completion_length": 269.5, |
|
"min_completion_length": 98.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 94, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.24609375, |
|
"epoch": 0.04284103720405862, |
|
"grad_norm": 0.2540785535950062, |
|
"kl": 0.0247802734375, |
|
"learning_rate": 9.78574650428507e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.001, |
|
"max_completion_length": 230.0, |
|
"min_completion_length": 87.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 95, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.51953125, |
|
"epoch": 0.04329199549041714, |
|
"grad_norm": 0.16330960689627924, |
|
"kl": 0.0205078125, |
|
"learning_rate": 9.783491204330175e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0008, |
|
"max_completion_length": 217.5, |
|
"min_completion_length": 91.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 96, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 136.6953125, |
|
"epoch": 0.043742953776775645, |
|
"grad_norm": 0.14972836319565624, |
|
"kl": 0.015106201171875, |
|
"learning_rate": 9.781235904375281e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0006, |
|
"max_completion_length": 232.0, |
|
"min_completion_length": 88.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 97, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.2109375, |
|
"epoch": 0.04419391206313416, |
|
"grad_norm": 0.06666610741611737, |
|
"kl": 0.010040283203125, |
|
"learning_rate": 9.778980604420387e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 230.5, |
|
"min_completion_length": 95.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 98, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 136.07421875, |
|
"epoch": 0.044644870349492674, |
|
"grad_norm": 0.03692527598375854, |
|
"kl": 0.01007080078125, |
|
"learning_rate": 9.776725304465493e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 216.0, |
|
"min_completion_length": 87.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 99, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 138.29296875, |
|
"epoch": 0.04509582863585118, |
|
"grad_norm": 0.0697985947271456, |
|
"kl": 0.01080322265625, |
|
"learning_rate": 9.7744700045106e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 308.0, |
|
"min_completion_length": 91.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 100, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.515625, |
|
"epoch": 0.045546786922209696, |
|
"grad_norm": 0.07309140477924224, |
|
"kl": 0.010772705078125, |
|
"learning_rate": 9.772214704555706e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 224.0, |
|
"min_completion_length": 88.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 101, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.36328125, |
|
"epoch": 0.045997745208568204, |
|
"grad_norm": 0.05574240124141491, |
|
"kl": 0.01068115234375, |
|
"learning_rate": 9.769959404600812e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 208.0, |
|
"min_completion_length": 89.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 102, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 139.234375, |
|
"epoch": 0.04644870349492672, |
|
"grad_norm": 0.03519854121109168, |
|
"kl": 0.013427734375, |
|
"learning_rate": 9.767704104645918e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 327.0, |
|
"min_completion_length": 91.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 103, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.1015625, |
|
"epoch": 0.04689966178128523, |
|
"grad_norm": 0.03554996767560797, |
|
"kl": 0.010955810546875, |
|
"learning_rate": 9.765448804691024e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 238.5, |
|
"min_completion_length": 73.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 104, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 136.60546875, |
|
"epoch": 0.04735062006764374, |
|
"grad_norm": 0.042584011523593555, |
|
"kl": 0.01165771484375, |
|
"learning_rate": 9.76319350473613e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 257.5, |
|
"min_completion_length": 89.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 105, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.66796875, |
|
"epoch": 0.047801578354002255, |
|
"grad_norm": 0.04615459145131747, |
|
"kl": 0.010589599609375, |
|
"learning_rate": 9.760938204781235e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 310.5, |
|
"min_completion_length": 87.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 106, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.6328125, |
|
"epoch": 0.04825253664036077, |
|
"grad_norm": 0.045923587392494976, |
|
"kl": 0.010223388671875, |
|
"learning_rate": 9.758682904826343e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 225.0, |
|
"min_completion_length": 79.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 107, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.73046875, |
|
"epoch": 0.04870349492671928, |
|
"grad_norm": 0.044568286212471234, |
|
"kl": 0.0089111328125, |
|
"learning_rate": 9.756427604871449e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 217.5, |
|
"min_completion_length": 92.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 108, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.06640625, |
|
"epoch": 0.04915445321307779, |
|
"grad_norm": 0.2549839899096623, |
|
"kl": 0.0092620849609375, |
|
"learning_rate": 9.754172304916554e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 229.0, |
|
"min_completion_length": 83.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 109, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.9375, |
|
"epoch": 0.0496054114994363, |
|
"grad_norm": 0.28421440889201194, |
|
"kl": 0.01019287109375, |
|
"learning_rate": 9.75191700496166e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 246.0, |
|
"min_completion_length": 93.0, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 110, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.9296875, |
|
"epoch": 0.050056369785794814, |
|
"grad_norm": 0.7286024428170503, |
|
"kl": 0.01153564453125, |
|
"learning_rate": 9.749661705006766e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0005, |
|
"max_completion_length": 240.0, |
|
"min_completion_length": 90.5, |
|
"reward": 0.99609375, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 111, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.11328125, |
|
"epoch": 0.05050732807215333, |
|
"grad_norm": 0.03670784769416828, |
|
"kl": 0.010528564453125, |
|
"learning_rate": 9.747406405051872e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 231.5, |
|
"min_completion_length": 92.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 112, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 127.98828125, |
|
"epoch": 0.05095828635851184, |
|
"grad_norm": 0.042164123293671474, |
|
"kl": 0.0087890625, |
|
"learning_rate": 9.745151105096978e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 239.0, |
|
"min_completion_length": 81.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 113, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.52734375, |
|
"epoch": 0.05140924464487035, |
|
"grad_norm": 0.04359303746661243, |
|
"kl": 0.01031494140625, |
|
"learning_rate": 9.742895805142083e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 265.5, |
|
"min_completion_length": 82.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 114, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.34765625, |
|
"epoch": 0.05186020293122886, |
|
"grad_norm": 0.05312958242052849, |
|
"kl": 0.009002685546875, |
|
"learning_rate": 9.74064050518719e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 217.5, |
|
"min_completion_length": 89.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 115, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.421875, |
|
"epoch": 0.052311161217587374, |
|
"grad_norm": 0.04383689920001928, |
|
"kl": 0.0076446533203125, |
|
"learning_rate": 9.738385205232295e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0003, |
|
"max_completion_length": 257.5, |
|
"min_completion_length": 89.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 116, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 129.4375, |
|
"epoch": 0.05276211950394589, |
|
"grad_norm": 0.046719015497805653, |
|
"kl": 0.008270263671875, |
|
"learning_rate": 9.7361299052774e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0003, |
|
"max_completion_length": 225.0, |
|
"min_completion_length": 87.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 117, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.2734375, |
|
"epoch": 0.053213077790304396, |
|
"grad_norm": 0.04862725334362555, |
|
"kl": 0.00933837890625, |
|
"learning_rate": 9.733874605322508e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 237.5, |
|
"min_completion_length": 82.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 118, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.13671875, |
|
"epoch": 0.05366403607666291, |
|
"grad_norm": 0.10031986851741669, |
|
"kl": 0.0108642578125, |
|
"learning_rate": 9.731619305367614e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 243.5, |
|
"min_completion_length": 89.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 119, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.49609375, |
|
"epoch": 0.05411499436302142, |
|
"grad_norm": 0.031112150572192358, |
|
"kl": 0.0073394775390625, |
|
"learning_rate": 9.72936400541272e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0003, |
|
"max_completion_length": 288.0, |
|
"min_completion_length": 86.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 120, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.01953125, |
|
"epoch": 0.05456595264937993, |
|
"grad_norm": 0.5940559108446518, |
|
"kl": 0.013671875, |
|
"learning_rate": 9.727108705457826e-07, |
|
"log_metrics/accuracy": 0.003257421776652336, |
|
"log_metrics/iou_log": 0.00390625, |
|
"loss": 0.0005, |
|
"max_completion_length": 258.0, |
|
"min_completion_length": 85.5, |
|
"reward": 1.0, |
|
"reward_std": 0.03125, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.00390625, |
|
"rewards/log_reward": 0.0, |
|
"step": 121, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 139.04296875, |
|
"epoch": 0.05501691093573845, |
|
"grad_norm": 0.03639097352803687, |
|
"kl": 0.007720947265625, |
|
"learning_rate": 9.724853405502931e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0003, |
|
"max_completion_length": 262.0, |
|
"min_completion_length": 94.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 122, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 131.33984375, |
|
"epoch": 0.055467869222096955, |
|
"grad_norm": 0.029926586040708514, |
|
"kl": 0.008544921875, |
|
"learning_rate": 9.722598105548037e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0003, |
|
"max_completion_length": 185.5, |
|
"min_completion_length": 92.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 123, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 135.640625, |
|
"epoch": 0.05591882750845547, |
|
"grad_norm": 0.07007980580824669, |
|
"kl": 0.008758544921875, |
|
"learning_rate": 9.720342805593143e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 263.5, |
|
"min_completion_length": 88.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 124, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 134.91015625, |
|
"epoch": 0.05636978579481398, |
|
"grad_norm": 0.038029665047696073, |
|
"kl": 0.010833740234375, |
|
"learning_rate": 9.718087505638249e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 301.5, |
|
"min_completion_length": 93.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 125, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 136.80078125, |
|
"epoch": 0.05682074408117249, |
|
"grad_norm": 0.03399658389217789, |
|
"kl": 0.010498046875, |
|
"learning_rate": 9.715832205683354e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 253.5, |
|
"min_completion_length": 90.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 126, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 132.7421875, |
|
"epoch": 0.057271702367531006, |
|
"grad_norm": 0.0288651577135693, |
|
"kl": 0.0081787109375, |
|
"learning_rate": 9.71357690572846e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0003, |
|
"max_completion_length": 233.5, |
|
"min_completion_length": 87.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 127, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 138.23828125, |
|
"epoch": 0.057722660653889514, |
|
"grad_norm": 0.029894092306988484, |
|
"kl": 0.00921630859375, |
|
"learning_rate": 9.711321605773566e-07, |
|
"log_metrics/accuracy": 0.0, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 239.0, |
|
"min_completion_length": 91.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 128, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 136.296875, |
|
"epoch": 0.05817361894024803, |
|
"grad_norm": 0.02817290776298427, |
|
"kl": 0.009246826171875, |
|
"learning_rate": 9.709066305818674e-07, |
|
"log_metrics/accuracy": 0.001520317979156971, |
|
"log_metrics/iou_log": 0.0, |
|
"loss": 0.0004, |
|
"max_completion_length": 316.5, |
|
"min_completion_length": 83.5, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.0, |
|
"rewards/log_reward": 0.0, |
|
"step": 129, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 130.125, |
|
"epoch": 0.058624577226606536, |
|
"grad_norm": 0.6165285520370335, |
|
"kl": 0.01171875, |
|
"learning_rate": 9.70681100586378e-07, |
|
"log_metrics/accuracy": 0.0029867857228964567, |
|
"log_metrics/iou_log": 0.00390625, |
|
"loss": 0.0005, |
|
"max_completion_length": 216.0, |
|
"min_completion_length": 82.0, |
|
"reward": 1.00390625, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.00390625, |
|
"rewards/log_reward": 0.0, |
|
"step": 130, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 133.953125, |
|
"epoch": 0.05907553551296505, |
|
"grad_norm": 0.4803658881359604, |
|
"kl": 0.00921630859375, |
|
"learning_rate": 9.704555705908885e-07, |
|
"log_metrics/accuracy": 0.0028719999827444553, |
|
"log_metrics/iou_log": 0.00390625, |
|
"loss": 0.0004, |
|
"max_completion_length": 246.0, |
|
"min_completion_length": 79.0, |
|
"reward": 1.00390625, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.00390625, |
|
"rewards/log_reward": 0.0, |
|
"step": 131, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 129.59765625, |
|
"epoch": 0.059526493799323565, |
|
"grad_norm": 1.7435921168622184, |
|
"kl": 0.01397705078125, |
|
"learning_rate": 9.702300405953991e-07, |
|
"log_metrics/accuracy": 0.03585699386894703, |
|
"log_metrics/iou_log": 0.03515625, |
|
"loss": 0.0006, |
|
"max_completion_length": 238.0, |
|
"min_completion_length": 69.5, |
|
"reward": 1.0234375, |
|
"reward_std": 0.125, |
|
"rewards/format_reward": 0.98828125, |
|
"rewards/iou_reward": 0.03515625, |
|
"rewards/log_reward": 0.0, |
|
"step": 132, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 117.4921875, |
|
"epoch": 0.05997745208568207, |
|
"grad_norm": 6.152409630252781, |
|
"kl": 0.05615234375, |
|
"learning_rate": 9.700045105999097e-07, |
|
"log_metrics/accuracy": 0.3532668203115463, |
|
"log_metrics/iou_log": 0.3984375, |
|
"loss": 0.0022, |
|
"max_completion_length": 229.0, |
|
"min_completion_length": 62.0, |
|
"reward": 1.3828125, |
|
"reward_std": 0.4916256368160248, |
|
"rewards/format_reward": 0.984375, |
|
"rewards/iou_reward": 0.3984375, |
|
"rewards/log_reward": 0.0, |
|
"step": 133, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 99.53125, |
|
"epoch": 0.06042841037204059, |
|
"grad_norm": 24.520553591942157, |
|
"kl": 0.1240234375, |
|
"learning_rate": 9.697789806044203e-07, |
|
"log_metrics/accuracy": 0.6385847628116608, |
|
"log_metrics/iou_log": 0.6953125, |
|
"loss": 0.0049, |
|
"max_completion_length": 230.0, |
|
"min_completion_length": 55.5, |
|
"reward": 1.67578125, |
|
"reward_std": 0.31701020896434784, |
|
"rewards/format_reward": 0.98046875, |
|
"rewards/iou_reward": 0.6953125, |
|
"rewards/log_reward": 0.0, |
|
"step": 134, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 102.87890625, |
|
"epoch": 0.060879368658399095, |
|
"grad_norm": 2.4974832118570727, |
|
"kl": 0.113525390625, |
|
"learning_rate": 9.695534506089308e-07, |
|
"log_metrics/accuracy": 0.6731529831886292, |
|
"log_metrics/iou_log": 0.76171875, |
|
"loss": 0.0045, |
|
"max_completion_length": 186.0, |
|
"min_completion_length": 58.5, |
|
"reward": 1.7578125, |
|
"reward_std": 0.3095604404807091, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.76171875, |
|
"rewards/log_reward": 0.0, |
|
"step": 135, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 103.109375, |
|
"epoch": 0.06133032694475761, |
|
"grad_norm": 3.516339202499408, |
|
"kl": 0.118408203125, |
|
"learning_rate": 9.693279206134416e-07, |
|
"log_metrics/accuracy": 0.6554303467273712, |
|
"log_metrics/iou_log": 0.6796875, |
|
"loss": 0.0047, |
|
"max_completion_length": 226.5, |
|
"min_completion_length": 54.0, |
|
"reward": 1.6640625, |
|
"reward_std": 0.28480498492717743, |
|
"rewards/format_reward": 0.984375, |
|
"rewards/iou_reward": 0.6796875, |
|
"rewards/log_reward": 0.0, |
|
"step": 136, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 104.65625, |
|
"epoch": 0.061781285231116125, |
|
"grad_norm": 4.120853025373372, |
|
"kl": 0.180908203125, |
|
"learning_rate": 9.691023906179522e-07, |
|
"log_metrics/accuracy": 0.7559227645397186, |
|
"log_metrics/iou_log": 0.8984375, |
|
"loss": 0.0072, |
|
"max_completion_length": 204.0, |
|
"min_completion_length": 60.5, |
|
"reward": 1.8828125, |
|
"reward_std": 0.12466736882925034, |
|
"rewards/format_reward": 0.984375, |
|
"rewards/iou_reward": 0.8984375, |
|
"rewards/log_reward": 0.0, |
|
"step": 137, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 104.96484375, |
|
"epoch": 0.06223224351747463, |
|
"grad_norm": 1.9791163456647438, |
|
"kl": 0.11181640625, |
|
"learning_rate": 9.688768606224628e-07, |
|
"log_metrics/accuracy": 0.6887724995613098, |
|
"log_metrics/iou_log": 0.7109375, |
|
"loss": 0.0045, |
|
"max_completion_length": 246.0, |
|
"min_completion_length": 60.0, |
|
"reward": 1.70703125, |
|
"reward_std": 0.21135114878416061, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.7109375, |
|
"rewards/log_reward": 0.0, |
|
"step": 138, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 99.19140625, |
|
"epoch": 0.06268320180383315, |
|
"grad_norm": 5.5467544556496335, |
|
"kl": 0.114990234375, |
|
"learning_rate": 9.686513306269734e-07, |
|
"log_metrics/accuracy": 0.7241671979427338, |
|
"log_metrics/iou_log": 0.82421875, |
|
"loss": 0.0046, |
|
"max_completion_length": 194.0, |
|
"min_completion_length": 60.0, |
|
"reward": 1.82421875, |
|
"reward_std": 0.15309549123048782, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.82421875, |
|
"rewards/log_reward": 0.0, |
|
"step": 139, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 101.36328125, |
|
"epoch": 0.06313416009019165, |
|
"grad_norm": 12.142762617479395, |
|
"kl": 0.11865234375, |
|
"learning_rate": 9.68425800631484e-07, |
|
"log_metrics/accuracy": 0.712556004524231, |
|
"log_metrics/iou_log": 0.79296875, |
|
"loss": 0.0047, |
|
"max_completion_length": 190.0, |
|
"min_completion_length": 55.5, |
|
"reward": 1.78515625, |
|
"reward_std": 0.2706931382417679, |
|
"rewards/format_reward": 0.9921875, |
|
"rewards/iou_reward": 0.79296875, |
|
"rewards/log_reward": 0.0, |
|
"step": 140, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 101.61328125, |
|
"epoch": 0.06358511837655018, |
|
"grad_norm": 3.640232102574435, |
|
"kl": 0.1201171875, |
|
"learning_rate": 9.682002706359945e-07, |
|
"log_metrics/accuracy": 0.7055022418498993, |
|
"log_metrics/iou_log": 0.765625, |
|
"loss": 0.0048, |
|
"max_completion_length": 211.5, |
|
"min_completion_length": 55.5, |
|
"reward": 1.765625, |
|
"reward_std": 0.2804790586233139, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.765625, |
|
"rewards/log_reward": 0.0, |
|
"step": 141, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 97.63671875, |
|
"epoch": 0.06403607666290868, |
|
"grad_norm": 2.667593861521047, |
|
"kl": 0.122314453125, |
|
"learning_rate": 9.67974740640505e-07, |
|
"log_metrics/accuracy": 0.7026576399803162, |
|
"log_metrics/iou_log": 0.7890625, |
|
"loss": 0.0049, |
|
"max_completion_length": 207.0, |
|
"min_completion_length": 58.5, |
|
"reward": 1.78515625, |
|
"reward_std": 0.24758073687553406, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.7890625, |
|
"rewards/log_reward": 0.0, |
|
"step": 142, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 100.74609375, |
|
"epoch": 0.06448703494926719, |
|
"grad_norm": 3.1074124076238916, |
|
"kl": 0.123779296875, |
|
"learning_rate": 9.677492106450157e-07, |
|
"log_metrics/accuracy": 0.6754811108112335, |
|
"log_metrics/iou_log": 0.75, |
|
"loss": 0.005, |
|
"max_completion_length": 209.5, |
|
"min_completion_length": 55.5, |
|
"reward": 1.74609375, |
|
"reward_std": 0.268774151802063, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.75, |
|
"rewards/log_reward": 0.0, |
|
"step": 143, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 101.8125, |
|
"epoch": 0.0649379932356257, |
|
"grad_norm": 1.7944760480191515, |
|
"kl": 0.1220703125, |
|
"learning_rate": 9.675236806495264e-07, |
|
"log_metrics/accuracy": 0.71114382147789, |
|
"log_metrics/iou_log": 0.7890625, |
|
"loss": 0.0049, |
|
"max_completion_length": 217.0, |
|
"min_completion_length": 56.5, |
|
"reward": 1.78515625, |
|
"reward_std": 0.27572914958000183, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.7890625, |
|
"rewards/log_reward": 0.0, |
|
"step": 144, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 99.64453125, |
|
"epoch": 0.06538895152198422, |
|
"grad_norm": 2.086624174741133, |
|
"kl": 0.124267578125, |
|
"learning_rate": 9.67298150654037e-07, |
|
"log_metrics/accuracy": 0.748405933380127, |
|
"log_metrics/iou_log": 0.83984375, |
|
"loss": 0.005, |
|
"max_completion_length": 213.0, |
|
"min_completion_length": 63.5, |
|
"reward": 1.8359375, |
|
"reward_std": 0.23154567182064056, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.83984375, |
|
"rewards/log_reward": 0.0, |
|
"step": 145, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 96.7890625, |
|
"epoch": 0.06583990980834273, |
|
"grad_norm": 3.3643857707503435, |
|
"kl": 0.12841796875, |
|
"learning_rate": 9.670726206585476e-07, |
|
"log_metrics/accuracy": 0.7452348172664642, |
|
"log_metrics/iou_log": 0.8515625, |
|
"loss": 0.0052, |
|
"max_completion_length": 176.0, |
|
"min_completion_length": 59.0, |
|
"reward": 1.8515625, |
|
"reward_std": 0.23864974081516266, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.8515625, |
|
"rewards/log_reward": 0.0, |
|
"step": 146, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 97.89453125, |
|
"epoch": 0.06629086809470124, |
|
"grad_norm": 2.0566073759257706, |
|
"kl": 0.1337890625, |
|
"learning_rate": 9.668470906630582e-07, |
|
"log_metrics/accuracy": 0.7216435968875885, |
|
"log_metrics/iou_log": 0.79296875, |
|
"loss": 0.0054, |
|
"max_completion_length": 206.0, |
|
"min_completion_length": 58.5, |
|
"reward": 1.79296875, |
|
"reward_std": 0.13226625323295593, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.79296875, |
|
"rewards/log_reward": 0.0, |
|
"step": 147, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 97.98828125, |
|
"epoch": 0.06674182638105976, |
|
"grad_norm": 1.379192128046406, |
|
"kl": 0.1318359375, |
|
"learning_rate": 9.666215606675687e-07, |
|
"log_metrics/accuracy": 0.622128963470459, |
|
"log_metrics/iou_log": 0.65625, |
|
"loss": 0.0053, |
|
"max_completion_length": 198.0, |
|
"min_completion_length": 56.5, |
|
"reward": 1.65625, |
|
"reward_std": 0.21148452162742615, |
|
"rewards/format_reward": 1.0, |
|
"rewards/iou_reward": 0.65625, |
|
"rewards/log_reward": 0.0, |
|
"step": 148, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 97.74609375, |
|
"epoch": 0.06719278466741826, |
|
"grad_norm": 1.740571009912497, |
|
"kl": 0.118896484375, |
|
"learning_rate": 9.663960306720793e-07, |
|
"log_metrics/accuracy": 0.7791113555431366, |
|
"log_metrics/iou_log": 0.8515625, |
|
"loss": 0.0048, |
|
"max_completion_length": 203.0, |
|
"min_completion_length": 54.0, |
|
"reward": 1.84765625, |
|
"reward_std": 0.15834103524684906, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.8515625, |
|
"rewards/log_reward": 0.0, |
|
"step": 149, |
|
"temperature": 1.0 |
|
}, |
|
{ |
|
"completion_length": 97.7734375, |
|
"epoch": 0.06764374295377677, |
|
"grad_norm": 1.832460748446639, |
|
"kl": 0.1318359375, |
|
"learning_rate": 9.6617050067659e-07, |
|
"log_metrics/accuracy": 0.7626213431358337, |
|
"log_metrics/iou_log": 0.83203125, |
|
"loss": 0.0053, |
|
"max_completion_length": 151.5, |
|
"min_completion_length": 63.0, |
|
"reward": 1.828125, |
|
"reward_std": 0.22953035682439804, |
|
"rewards/format_reward": 0.99609375, |
|
"rewards/iou_reward": 0.83203125, |
|
"rewards/log_reward": 0.0, |
|
"step": 150, |
|
"temperature": 1.0 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 4434, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|