{ "best_metric": 2.190300464630127, "best_model_checkpoint": "./output_c/checkpoint-615", "epoch": 59.0, "global_step": 885, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 3.077622652053833, "eval_runtime": 0.2951, "eval_samples_per_second": 494.804, "eval_steps_per_second": 16.945, "step": 15 }, { "epoch": 2.0, "eval_loss": 2.9188127517700195, "eval_runtime": 0.3714, "eval_samples_per_second": 393.09, "eval_steps_per_second": 13.462, "step": 30 }, { "epoch": 3.0, "eval_loss": 2.8426482677459717, "eval_runtime": 0.2609, "eval_samples_per_second": 559.557, "eval_steps_per_second": 19.163, "step": 45 }, { "epoch": 4.0, "eval_loss": 2.6221985816955566, "eval_runtime": 0.2726, "eval_samples_per_second": 535.638, "eval_steps_per_second": 18.344, "step": 60 }, { "epoch": 5.0, "eval_loss": 2.6243655681610107, "eval_runtime": 0.2669, "eval_samples_per_second": 546.955, "eval_steps_per_second": 18.731, "step": 75 }, { "epoch": 6.0, "eval_loss": 2.6885011196136475, "eval_runtime": 0.2608, "eval_samples_per_second": 559.912, "eval_steps_per_second": 19.175, "step": 90 }, { "epoch": 7.0, "eval_loss": 2.4477977752685547, "eval_runtime": 0.266, "eval_samples_per_second": 548.901, "eval_steps_per_second": 18.798, "step": 105 }, { "epoch": 8.0, "eval_loss": 2.588456153869629, "eval_runtime": 0.3103, "eval_samples_per_second": 470.587, "eval_steps_per_second": 16.116, "step": 120 }, { "epoch": 9.0, "eval_loss": 2.4026825428009033, "eval_runtime": 0.2649, "eval_samples_per_second": 551.053, "eval_steps_per_second": 18.872, "step": 135 }, { "epoch": 10.0, "eval_loss": 2.3897533416748047, "eval_runtime": 0.3189, "eval_samples_per_second": 457.83, "eval_steps_per_second": 15.679, "step": 150 }, { "epoch": 11.0, "eval_loss": 2.473085641860962, "eval_runtime": 0.3415, "eval_samples_per_second": 427.488, "eval_steps_per_second": 14.64, "step": 165 }, { "epoch": 12.0, "eval_loss": 2.448983907699585, "eval_runtime": 0.2684, "eval_samples_per_second": 543.977, "eval_steps_per_second": 18.629, "step": 180 }, { "epoch": 13.0, "eval_loss": 2.3829126358032227, "eval_runtime": 0.2785, "eval_samples_per_second": 524.252, "eval_steps_per_second": 17.954, "step": 195 }, { "epoch": 14.0, "eval_loss": 2.501011610031128, "eval_runtime": 0.2671, "eval_samples_per_second": 546.582, "eval_steps_per_second": 18.719, "step": 210 }, { "epoch": 15.0, "eval_loss": 2.427177906036377, "eval_runtime": 0.2718, "eval_samples_per_second": 537.134, "eval_steps_per_second": 18.395, "step": 225 }, { "epoch": 16.0, "eval_loss": 2.421048879623413, "eval_runtime": 0.2742, "eval_samples_per_second": 532.533, "eval_steps_per_second": 18.237, "step": 240 }, { "epoch": 17.0, "eval_loss": 2.342533826828003, "eval_runtime": 0.2891, "eval_samples_per_second": 505.042, "eval_steps_per_second": 17.296, "step": 255 }, { "epoch": 18.0, "eval_loss": 2.398080348968506, "eval_runtime": 0.2794, "eval_samples_per_second": 522.517, "eval_steps_per_second": 17.894, "step": 270 }, { "epoch": 19.0, "eval_loss": 2.3011465072631836, "eval_runtime": 0.2774, "eval_samples_per_second": 526.407, "eval_steps_per_second": 18.028, "step": 285 }, { "epoch": 20.0, "eval_loss": 2.5109691619873047, "eval_runtime": 0.2617, "eval_samples_per_second": 557.964, "eval_steps_per_second": 19.108, "step": 300 }, { "epoch": 21.0, "eval_loss": 2.381415843963623, "eval_runtime": 0.2801, "eval_samples_per_second": 521.25, "eval_steps_per_second": 17.851, "step": 315 }, { "epoch": 22.0, "eval_loss": 2.3828046321868896, "eval_runtime": 0.2743, "eval_samples_per_second": 532.299, "eval_steps_per_second": 18.229, "step": 330 }, { "epoch": 23.0, "eval_loss": 2.353680372238159, "eval_runtime": 0.2779, "eval_samples_per_second": 525.432, "eval_steps_per_second": 17.994, "step": 345 }, { "epoch": 24.0, "eval_loss": 2.29482364654541, "eval_runtime": 0.2832, "eval_samples_per_second": 515.574, "eval_steps_per_second": 17.657, "step": 360 }, { "epoch": 25.0, "eval_loss": 2.3079590797424316, "eval_runtime": 0.3243, "eval_samples_per_second": 450.187, "eval_steps_per_second": 15.417, "step": 375 }, { "epoch": 26.0, "eval_loss": 2.379464864730835, "eval_runtime": 0.2934, "eval_samples_per_second": 497.692, "eval_steps_per_second": 17.044, "step": 390 }, { "epoch": 27.0, "eval_loss": 2.37467885017395, "eval_runtime": 0.2869, "eval_samples_per_second": 508.855, "eval_steps_per_second": 17.427, "step": 405 }, { "epoch": 28.0, "eval_loss": 2.3926637172698975, "eval_runtime": 0.2792, "eval_samples_per_second": 523.011, "eval_steps_per_second": 17.911, "step": 420 }, { "epoch": 29.0, "eval_loss": 2.2542331218719482, "eval_runtime": 0.2705, "eval_samples_per_second": 539.84, "eval_steps_per_second": 18.488, "step": 435 }, { "epoch": 30.0, "eval_loss": 2.312037944793701, "eval_runtime": 0.2823, "eval_samples_per_second": 517.14, "eval_steps_per_second": 17.71, "step": 450 }, { "epoch": 31.0, "eval_loss": 2.2595930099487305, "eval_runtime": 0.2709, "eval_samples_per_second": 538.869, "eval_steps_per_second": 18.454, "step": 465 }, { "epoch": 32.0, "eval_loss": 2.3319013118743896, "eval_runtime": 0.2718, "eval_samples_per_second": 537.135, "eval_steps_per_second": 18.395, "step": 480 }, { "epoch": 33.0, "eval_loss": 2.321133852005005, "eval_runtime": 0.3056, "eval_samples_per_second": 477.789, "eval_steps_per_second": 16.363, "step": 495 }, { "epoch": 34.0, "eval_loss": 2.3662209510803223, "eval_runtime": 0.2727, "eval_samples_per_second": 535.325, "eval_steps_per_second": 18.333, "step": 510 }, { "epoch": 35.0, "eval_loss": 2.3607561588287354, "eval_runtime": 0.2769, "eval_samples_per_second": 527.269, "eval_steps_per_second": 18.057, "step": 525 }, { "epoch": 36.0, "eval_loss": 2.2733652591705322, "eval_runtime": 0.3144, "eval_samples_per_second": 464.449, "eval_steps_per_second": 15.906, "step": 540 }, { "epoch": 37.0, "eval_loss": 2.332275390625, "eval_runtime": 0.2823, "eval_samples_per_second": 517.096, "eval_steps_per_second": 17.709, "step": 555 }, { "epoch": 38.0, "eval_loss": 2.3226001262664795, "eval_runtime": 0.2722, "eval_samples_per_second": 536.297, "eval_steps_per_second": 18.366, "step": 570 }, { "epoch": 39.0, "eval_loss": 2.2499899864196777, "eval_runtime": 0.2823, "eval_samples_per_second": 517.269, "eval_steps_per_second": 17.715, "step": 585 }, { "epoch": 40.0, "eval_loss": 2.3148353099823, "eval_runtime": 0.2735, "eval_samples_per_second": 533.755, "eval_steps_per_second": 18.279, "step": 600 }, { "epoch": 40.73, "learning_rate": 6.444444444444445e-06, "loss": 2.4323, "step": 611 }, { "epoch": 41.0, "eval_loss": 2.190300464630127, "eval_runtime": 0.279, "eval_samples_per_second": 523.306, "eval_steps_per_second": 17.921, "step": 615 }, { "epoch": 42.0, "eval_loss": 2.2688183784484863, "eval_runtime": 0.2786, "eval_samples_per_second": 524.054, "eval_steps_per_second": 17.947, "step": 630 }, { "epoch": 43.0, "eval_loss": 2.3206570148468018, "eval_runtime": 0.3166, "eval_samples_per_second": 461.08, "eval_steps_per_second": 15.79, "step": 645 }, { "epoch": 44.0, "eval_loss": 2.398860454559326, "eval_runtime": 0.2802, "eval_samples_per_second": 521.028, "eval_steps_per_second": 17.843, "step": 660 }, { "epoch": 45.0, "eval_loss": 2.329181432723999, "eval_runtime": 0.2845, "eval_samples_per_second": 513.222, "eval_steps_per_second": 17.576, "step": 675 }, { "epoch": 46.0, "eval_loss": 2.301910877227783, "eval_runtime": 0.2825, "eval_samples_per_second": 516.788, "eval_steps_per_second": 17.698, "step": 690 }, { "epoch": 47.0, "eval_loss": 2.286062002182007, "eval_runtime": 0.2774, "eval_samples_per_second": 526.332, "eval_steps_per_second": 18.025, "step": 705 }, { "epoch": 48.0, "eval_loss": 2.2627930641174316, "eval_runtime": 0.3491, "eval_samples_per_second": 418.174, "eval_steps_per_second": 14.321, "step": 720 }, { "epoch": 49.0, "eval_loss": 2.3683576583862305, "eval_runtime": 0.2828, "eval_samples_per_second": 516.226, "eval_steps_per_second": 17.679, "step": 735 }, { "epoch": 50.0, "eval_loss": 2.3841121196746826, "eval_runtime": 0.2738, "eval_samples_per_second": 533.204, "eval_steps_per_second": 18.26, "step": 750 }, { "epoch": 51.0, "eval_loss": 2.3427212238311768, "eval_runtime": 0.2841, "eval_samples_per_second": 513.874, "eval_steps_per_second": 17.598, "step": 765 }, { "epoch": 52.0, "eval_loss": 2.3786392211914062, "eval_runtime": 0.2882, "eval_samples_per_second": 506.555, "eval_steps_per_second": 17.348, "step": 780 }, { "epoch": 53.0, "eval_loss": 2.3314857482910156, "eval_runtime": 0.3054, "eval_samples_per_second": 478.014, "eval_steps_per_second": 16.37, "step": 795 }, { "epoch": 54.0, "eval_loss": 2.4228127002716064, "eval_runtime": 0.2752, "eval_samples_per_second": 530.586, "eval_steps_per_second": 18.171, "step": 810 }, { "epoch": 55.0, "eval_loss": 2.2979846000671387, "eval_runtime": 0.2804, "eval_samples_per_second": 520.739, "eval_steps_per_second": 17.834, "step": 825 }, { "epoch": 56.0, "eval_loss": 2.288037061691284, "eval_runtime": 0.2706, "eval_samples_per_second": 539.536, "eval_steps_per_second": 18.477, "step": 840 }, { "epoch": 57.0, "eval_loss": 2.375304698944092, "eval_runtime": 0.2754, "eval_samples_per_second": 530.218, "eval_steps_per_second": 18.158, "step": 855 }, { "epoch": 58.0, "eval_loss": 2.302351474761963, "eval_runtime": 0.2976, "eval_samples_per_second": 490.644, "eval_steps_per_second": 16.803, "step": 870 }, { "epoch": 59.0, "eval_loss": 2.2706165313720703, "eval_runtime": 0.3674, "eval_samples_per_second": 397.413, "eval_steps_per_second": 13.61, "step": 885 } ], "max_steps": 900, "num_train_epochs": 60, "total_flos": 196520659812864.0, "trial_name": null, "trial_params": null }