[ { "loss": 2.0908, "grad_norm": 0.5866689682006836, "learning_rate": 8.923190911336132e-05, "epoch": 0.22189421894218941, "step": 451 }, { "eval_loss": 1.9633848667144775, "eval_runtime": 1103.6079, "eval_samples_per_second": 26.193, "eval_steps_per_second": 1.637, "epoch": 0.22189421894218941, "step": 451 }, { "loss": 1.9265, "grad_norm": 0.5757979154586792, "learning_rate": 7.809335638429242e-05, "epoch": 0.44378843788437883, "step": 902 }, { "eval_loss": 1.9037492275238037, "eval_runtime": 1102.9066, "eval_samples_per_second": 26.21, "eval_steps_per_second": 1.638, "epoch": 0.44378843788437883, "step": 902 }, { "loss": 1.8852, "grad_norm": 0.5880784392356873, "learning_rate": 6.695480365522352e-05, "epoch": 0.6656826568265682, "step": 1353 }, { "eval_loss": 1.8703107833862305, "eval_runtime": 1103.8364, "eval_samples_per_second": 26.188, "eval_steps_per_second": 1.637, "epoch": 0.6656826568265682, "step": 1353 }, { "loss": 1.8585, "grad_norm": 0.6274667978286743, "learning_rate": 5.581625092615461e-05, "epoch": 0.8875768757687577, "step": 1804 }, { "eval_loss": 1.8478941917419434, "eval_runtime": 1103.7709, "eval_samples_per_second": 26.189, "eval_steps_per_second": 1.637, "epoch": 0.8875768757687577, "step": 1804 }, { "loss": 1.8051, "grad_norm": 0.6508978009223938, "learning_rate": 4.4677698197085704e-05, "epoch": 1.1094710947109472, "step": 2255 }, { "eval_loss": 1.835593819618225, "eval_runtime": 1103.8475, "eval_samples_per_second": 26.187, "eval_steps_per_second": 1.637, "epoch": 1.1094710947109472, "step": 2255 }, { "loss": 1.7622, "grad_norm": 0.6831102967262268, "learning_rate": 3.3539145468016795e-05, "epoch": 1.3313653136531365, "step": 2706 }, { "eval_loss": 1.8246678113937378, "eval_runtime": 1103.4364, "eval_samples_per_second": 26.197, "eval_steps_per_second": 1.638, "epoch": 1.3313653136531365, "step": 2706 }, { "loss": 1.7536, "grad_norm": 0.6920585036277771, "learning_rate": 2.240059273894789e-05, "epoch": 1.5532595325953258, "step": 3157 }, { "eval_loss": 1.8157387971878052, "eval_runtime": 1103.7228, "eval_samples_per_second": 26.19, "eval_steps_per_second": 1.637, "epoch": 1.5532595325953258, "step": 3157 }, { "loss": 1.7467, "grad_norm": 0.6837635040283203, "learning_rate": 1.1262040009878982e-05, "epoch": 1.7751537515375153, "step": 3608 }, { "eval_loss": 1.808944821357727, "eval_runtime": 1103.6481, "eval_samples_per_second": 26.192, "eval_steps_per_second": 1.637, "epoch": 1.7751537515375153, "step": 3608 }, { "loss": 1.7457, "grad_norm": 0.6841686367988586, "learning_rate": 1.2348728081007656e-07, "epoch": 1.9970479704797048, "step": 4059 }, { "eval_loss": 1.805881142616272, "eval_runtime": 1103.2943, "eval_samples_per_second": 26.201, "eval_steps_per_second": 1.638, "epoch": 1.9970479704797048, "step": 4059 }, { "train_runtime": 69487.1891, "train_samples_per_second": 7.488, "train_steps_per_second": 0.058, "total_flos": 1.0874326325169095e+19, "train_loss": 1.841461892437747, "epoch": 1.9995079950799508, "step": 4064 } ]