{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.17101325352714836, "eval_steps": 10, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0017101325352714834, "eval_loss": 3.379563808441162, "eval_runtime": 4.6539, "eval_samples_per_second": 105.934, "eval_steps_per_second": 26.645, "step": 1 }, { "epoch": 0.008550662676357419, "grad_norm": 16.06174659729004, "learning_rate": 0.0001, "loss": 3.091, "step": 5 }, { "epoch": 0.017101325352714837, "grad_norm": 30.746688842773438, "learning_rate": 0.0002, "loss": 3.2285, "step": 10 }, { "epoch": 0.017101325352714837, "eval_loss": 3.323244094848633, "eval_runtime": 4.6253, "eval_samples_per_second": 106.589, "eval_steps_per_second": 26.809, "step": 10 }, { "epoch": 0.025651988029072252, "grad_norm": 54.02665710449219, "learning_rate": 0.00019848077530122083, "loss": 3.2701, "step": 15 }, { "epoch": 0.034202650705429674, "grad_norm": 34.668582916259766, "learning_rate": 0.00019396926207859084, "loss": 3.2074, "step": 20 }, { "epoch": 0.034202650705429674, "eval_loss": 3.245879888534546, "eval_runtime": 4.8152, "eval_samples_per_second": 102.383, "eval_steps_per_second": 25.752, "step": 20 }, { "epoch": 0.04275331338178709, "grad_norm": 50.34535598754883, "learning_rate": 0.00018660254037844388, "loss": 3.5141, "step": 25 }, { "epoch": 0.051303976058144504, "grad_norm": 22.065715789794922, "learning_rate": 0.0001766044443118978, "loss": 3.1801, "step": 30 }, { "epoch": 0.051303976058144504, "eval_loss": 3.2354209423065186, "eval_runtime": 4.7495, "eval_samples_per_second": 103.8, "eval_steps_per_second": 26.108, "step": 30 }, { "epoch": 0.059854638734501926, "grad_norm": 16.38636016845703, "learning_rate": 0.00016427876096865394, "loss": 3.193, "step": 35 }, { "epoch": 0.06840530141085935, "grad_norm": 31.518808364868164, "learning_rate": 0.00015000000000000001, "loss": 3.3996, "step": 40 }, { "epoch": 0.06840530141085935, "eval_loss": 3.22153902053833, "eval_runtime": 4.3761, "eval_samples_per_second": 112.657, "eval_steps_per_second": 28.336, "step": 40 }, { "epoch": 0.07695596408721676, "grad_norm": 27.541791915893555, "learning_rate": 0.00013420201433256689, "loss": 3.2613, "step": 45 }, { "epoch": 0.08550662676357418, "grad_norm": 43.12064743041992, "learning_rate": 0.00011736481776669306, "loss": 3.5703, "step": 50 }, { "epoch": 0.08550662676357418, "eval_loss": 3.2112386226654053, "eval_runtime": 4.6985, "eval_samples_per_second": 104.927, "eval_steps_per_second": 26.391, "step": 50 }, { "epoch": 0.0940572894399316, "grad_norm": 24.017574310302734, "learning_rate": 0.0001, "loss": 3.0906, "step": 55 }, { "epoch": 0.10260795211628901, "grad_norm": 72.96064758300781, "learning_rate": 8.263518223330697e-05, "loss": 3.1746, "step": 60 }, { "epoch": 0.10260795211628901, "eval_loss": 3.197736978530884, "eval_runtime": 4.499, "eval_samples_per_second": 109.58, "eval_steps_per_second": 27.562, "step": 60 }, { "epoch": 0.11115861479264642, "grad_norm": 37.74872970581055, "learning_rate": 6.579798566743314e-05, "loss": 3.1812, "step": 65 }, { "epoch": 0.11970927746900385, "grad_norm": 22.35642433166504, "learning_rate": 5.000000000000002e-05, "loss": 3.1637, "step": 70 }, { "epoch": 0.11970927746900385, "eval_loss": 3.1739985942840576, "eval_runtime": 4.7892, "eval_samples_per_second": 102.941, "eval_steps_per_second": 25.892, "step": 70 }, { "epoch": 0.12825994014536127, "grad_norm": 37.37099075317383, "learning_rate": 3.5721239031346066e-05, "loss": 3.4387, "step": 75 }, { "epoch": 0.1368106028217187, "grad_norm": 14.69621467590332, "learning_rate": 2.339555568810221e-05, "loss": 3.0516, "step": 80 }, { "epoch": 0.1368106028217187, "eval_loss": 3.148390054702759, "eval_runtime": 4.685, "eval_samples_per_second": 105.229, "eval_steps_per_second": 26.467, "step": 80 }, { "epoch": 0.1453612654980761, "grad_norm": 32.74094009399414, "learning_rate": 1.339745962155613e-05, "loss": 3.1578, "step": 85 }, { "epoch": 0.15391192817443353, "grad_norm": 22.851362228393555, "learning_rate": 6.030737921409169e-06, "loss": 3.1172, "step": 90 }, { "epoch": 0.15391192817443353, "eval_loss": 3.1573593616485596, "eval_runtime": 4.7749, "eval_samples_per_second": 103.249, "eval_steps_per_second": 25.969, "step": 90 }, { "epoch": 0.16246259085079093, "grad_norm": 15.831418991088867, "learning_rate": 1.5192246987791981e-06, "loss": 3.3129, "step": 95 }, { "epoch": 0.17101325352714836, "grad_norm": 58.72257614135742, "learning_rate": 0.0, "loss": 3.4377, "step": 100 }, { "epoch": 0.17101325352714836, "eval_loss": 3.1490554809570312, "eval_runtime": 4.6256, "eval_samples_per_second": 106.582, "eval_steps_per_second": 26.808, "step": 100 } ], "logging_steps": 5, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2549851317338112.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }