{ "best_metric": 0.8438514244500541, "best_model_checkpoint": "retrieval_model/run-3/checkpoint-17806", "epoch": 2.0, "eval_steps": 500, "global_step": 17806, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 3.4704519236458057e-06, "loss": 0.4686, "step": 500 }, { "epoch": 0.11, "learning_rate": 3.4210321821949366e-06, "loss": 0.3929, "step": 1000 }, { "epoch": 0.17, "learning_rate": 3.3716124407440683e-06, "loss": 0.3526, "step": 1500 }, { "epoch": 0.22, "learning_rate": 3.3221926992931996e-06, "loss": 0.3448, "step": 2000 }, { "epoch": 0.28, "learning_rate": 3.272772957842331e-06, "loss": 0.3648, "step": 2500 }, { "epoch": 0.34, "learning_rate": 3.223353216391462e-06, "loss": 0.3417, "step": 3000 }, { "epoch": 0.39, "learning_rate": 3.173933474940593e-06, "loss": 0.3259, "step": 3500 }, { "epoch": 0.45, "learning_rate": 3.124513733489725e-06, "loss": 0.3513, "step": 4000 }, { "epoch": 0.51, "learning_rate": 3.0750939920388557e-06, "loss": 0.3343, "step": 4500 }, { "epoch": 0.56, "learning_rate": 3.025674250587987e-06, "loss": 0.3465, "step": 5000 }, { "epoch": 0.62, "learning_rate": 2.9762545091371184e-06, "loss": 0.3218, "step": 5500 }, { "epoch": 0.67, "learning_rate": 2.9268347676862497e-06, "loss": 0.3134, "step": 6000 }, { "epoch": 0.73, "learning_rate": 2.877415026235381e-06, "loss": 0.3115, "step": 6500 }, { "epoch": 0.79, "learning_rate": 2.8279952847845123e-06, "loss": 0.3143, "step": 7000 }, { "epoch": 0.84, "learning_rate": 2.7785755433336436e-06, "loss": 0.3086, "step": 7500 }, { "epoch": 0.9, "learning_rate": 2.7291558018827745e-06, "loss": 0.3295, "step": 8000 }, { "epoch": 0.95, "learning_rate": 2.679736060431906e-06, "loss": 0.3056, "step": 8500 }, { "epoch": 1.0, "eval_f1": 0.8321005917159763, "eval_loss": 0.32298120856285095, "eval_runtime": 8.1227, "eval_samples_per_second": 487.029, "eval_steps_per_second": 7.633, "step": 8903 }, { "epoch": 1.01, "learning_rate": 2.630316318981037e-06, "loss": 0.318, "step": 9000 }, { "epoch": 1.07, "learning_rate": 2.580896577530169e-06, "loss": 0.2982, "step": 9500 }, { "epoch": 1.12, "learning_rate": 2.5314768360792997e-06, "loss": 0.2874, "step": 10000 }, { "epoch": 1.18, "learning_rate": 2.482057094628431e-06, "loss": 0.2856, "step": 10500 }, { "epoch": 1.24, "learning_rate": 2.4326373531775623e-06, "loss": 0.3088, "step": 11000 }, { "epoch": 1.29, "learning_rate": 2.3832176117266936e-06, "loss": 0.2887, "step": 11500 }, { "epoch": 1.35, "learning_rate": 2.333797870275825e-06, "loss": 0.2844, "step": 12000 }, { "epoch": 1.4, "learning_rate": 2.2843781288249563e-06, "loss": 0.2884, "step": 12500 }, { "epoch": 1.46, "learning_rate": 2.2349583873740876e-06, "loss": 0.2838, "step": 13000 }, { "epoch": 1.52, "learning_rate": 2.1855386459232185e-06, "loss": 0.2988, "step": 13500 }, { "epoch": 1.57, "learning_rate": 2.1361189044723498e-06, "loss": 0.2881, "step": 14000 }, { "epoch": 1.63, "learning_rate": 2.0866991630214815e-06, "loss": 0.2822, "step": 14500 }, { "epoch": 1.68, "learning_rate": 2.0372794215706124e-06, "loss": 0.2904, "step": 15000 }, { "epoch": 1.74, "learning_rate": 1.9878596801197437e-06, "loss": 0.289, "step": 15500 }, { "epoch": 1.8, "learning_rate": 1.938439938668875e-06, "loss": 0.3042, "step": 16000 }, { "epoch": 1.85, "learning_rate": 1.889020197218006e-06, "loss": 0.286, "step": 16500 }, { "epoch": 1.91, "learning_rate": 1.8396004557671378e-06, "loss": 0.2667, "step": 17000 }, { "epoch": 1.97, "learning_rate": 1.790180714316269e-06, "loss": 0.3005, "step": 17500 }, { "epoch": 2.0, "eval_f1": 0.8438514244500541, "eval_loss": 0.33582502603530884, "eval_runtime": 8.1185, "eval_samples_per_second": 487.283, "eval_steps_per_second": 7.637, "step": 17806 } ], "logging_steps": 500, "max_steps": 35612, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 7767932240503800.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "learning_rate": 3.5198716650966744e-06, "num_train_epochs": 4, "per_device_train_batch_size": 8, "seed": 35 } }