|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.17101325352714836, |
|
"eval_steps": 10, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0017101325352714834, |
|
"eval_loss": 3.379563808441162, |
|
"eval_runtime": 4.6539, |
|
"eval_samples_per_second": 105.934, |
|
"eval_steps_per_second": 26.645, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008550662676357419, |
|
"grad_norm": 16.06174659729004, |
|
"learning_rate": 0.0001, |
|
"loss": 3.091, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017101325352714837, |
|
"grad_norm": 30.746688842773438, |
|
"learning_rate": 0.0002, |
|
"loss": 3.2285, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.017101325352714837, |
|
"eval_loss": 3.323244094848633, |
|
"eval_runtime": 4.6253, |
|
"eval_samples_per_second": 106.589, |
|
"eval_steps_per_second": 26.809, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.025651988029072252, |
|
"grad_norm": 54.02665710449219, |
|
"learning_rate": 0.00019848077530122083, |
|
"loss": 3.2701, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.034202650705429674, |
|
"grad_norm": 34.668582916259766, |
|
"learning_rate": 0.00019396926207859084, |
|
"loss": 3.2074, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.034202650705429674, |
|
"eval_loss": 3.245879888534546, |
|
"eval_runtime": 4.8152, |
|
"eval_samples_per_second": 102.383, |
|
"eval_steps_per_second": 25.752, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04275331338178709, |
|
"grad_norm": 50.34535598754883, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 3.5141, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.051303976058144504, |
|
"grad_norm": 22.065715789794922, |
|
"learning_rate": 0.0001766044443118978, |
|
"loss": 3.1801, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.051303976058144504, |
|
"eval_loss": 3.2354209423065186, |
|
"eval_runtime": 4.7495, |
|
"eval_samples_per_second": 103.8, |
|
"eval_steps_per_second": 26.108, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.059854638734501926, |
|
"grad_norm": 16.38636016845703, |
|
"learning_rate": 0.00016427876096865394, |
|
"loss": 3.193, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06840530141085935, |
|
"grad_norm": 31.518808364868164, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 3.3996, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06840530141085935, |
|
"eval_loss": 3.22153902053833, |
|
"eval_runtime": 4.3761, |
|
"eval_samples_per_second": 112.657, |
|
"eval_steps_per_second": 28.336, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07695596408721676, |
|
"grad_norm": 27.541791915893555, |
|
"learning_rate": 0.00013420201433256689, |
|
"loss": 3.2613, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08550662676357418, |
|
"grad_norm": 43.12064743041992, |
|
"learning_rate": 0.00011736481776669306, |
|
"loss": 3.5703, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08550662676357418, |
|
"eval_loss": 3.2112386226654053, |
|
"eval_runtime": 4.6985, |
|
"eval_samples_per_second": 104.927, |
|
"eval_steps_per_second": 26.391, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0940572894399316, |
|
"grad_norm": 24.017574310302734, |
|
"learning_rate": 0.0001, |
|
"loss": 3.0906, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.10260795211628901, |
|
"grad_norm": 72.96064758300781, |
|
"learning_rate": 8.263518223330697e-05, |
|
"loss": 3.1746, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10260795211628901, |
|
"eval_loss": 3.197736978530884, |
|
"eval_runtime": 4.499, |
|
"eval_samples_per_second": 109.58, |
|
"eval_steps_per_second": 27.562, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11115861479264642, |
|
"grad_norm": 37.74872970581055, |
|
"learning_rate": 6.579798566743314e-05, |
|
"loss": 3.1812, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.11970927746900385, |
|
"grad_norm": 22.35642433166504, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 3.1637, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11970927746900385, |
|
"eval_loss": 3.1739985942840576, |
|
"eval_runtime": 4.7892, |
|
"eval_samples_per_second": 102.941, |
|
"eval_steps_per_second": 25.892, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12825994014536127, |
|
"grad_norm": 37.37099075317383, |
|
"learning_rate": 3.5721239031346066e-05, |
|
"loss": 3.4387, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1368106028217187, |
|
"grad_norm": 14.69621467590332, |
|
"learning_rate": 2.339555568810221e-05, |
|
"loss": 3.0516, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1368106028217187, |
|
"eval_loss": 3.148390054702759, |
|
"eval_runtime": 4.685, |
|
"eval_samples_per_second": 105.229, |
|
"eval_steps_per_second": 26.467, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1453612654980761, |
|
"grad_norm": 32.74094009399414, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 3.1578, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.15391192817443353, |
|
"grad_norm": 22.851362228393555, |
|
"learning_rate": 6.030737921409169e-06, |
|
"loss": 3.1172, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15391192817443353, |
|
"eval_loss": 3.1573593616485596, |
|
"eval_runtime": 4.7749, |
|
"eval_samples_per_second": 103.249, |
|
"eval_steps_per_second": 25.969, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16246259085079093, |
|
"grad_norm": 15.831418991088867, |
|
"learning_rate": 1.5192246987791981e-06, |
|
"loss": 3.3129, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.17101325352714836, |
|
"grad_norm": 58.72257614135742, |
|
"learning_rate": 0.0, |
|
"loss": 3.4377, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17101325352714836, |
|
"eval_loss": 3.1490554809570312, |
|
"eval_runtime": 4.6256, |
|
"eval_samples_per_second": 106.582, |
|
"eval_steps_per_second": 26.808, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2549851317338112.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|