|
{ |
|
"best_global_step": 200, |
|
"best_metric": 0.0776444673538208, |
|
"best_model_checkpoint": "./paligemma-clevr-finetuned/checkpoint-200", |
|
"epoch": 0.25396825396825395, |
|
"eval_steps": 100, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012698412698412698, |
|
"grad_norm": 6.424576759338379, |
|
"learning_rate": 9.898477157360407e-05, |
|
"loss": 7.0764, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.025396825396825397, |
|
"grad_norm": 2.169109344482422, |
|
"learning_rate": 9.771573604060914e-05, |
|
"loss": 0.5921, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0380952380952381, |
|
"grad_norm": 1.0041604042053223, |
|
"learning_rate": 9.644670050761421e-05, |
|
"loss": 0.2141, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.050793650793650794, |
|
"grad_norm": 1.1403199434280396, |
|
"learning_rate": 9.51776649746193e-05, |
|
"loss": 0.1424, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06349206349206349, |
|
"grad_norm": 1.898563265800476, |
|
"learning_rate": 9.390862944162437e-05, |
|
"loss": 0.1758, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0761904761904762, |
|
"grad_norm": 3.6021909713745117, |
|
"learning_rate": 9.263959390862943e-05, |
|
"loss": 0.1356, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 7.687479496002197, |
|
"learning_rate": 9.137055837563452e-05, |
|
"loss": 0.1585, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.10158730158730159, |
|
"grad_norm": 1.7669498920440674, |
|
"learning_rate": 9.01015228426396e-05, |
|
"loss": 0.1537, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 0.5084421634674072, |
|
"learning_rate": 8.883248730964467e-05, |
|
"loss": 0.118, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12698412698412698, |
|
"grad_norm": 0.9338592886924744, |
|
"learning_rate": 8.756345177664976e-05, |
|
"loss": 0.0885, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12698412698412698, |
|
"eval_loss": 0.08724059909582138, |
|
"eval_runtime": 408.8329, |
|
"eval_samples_per_second": 3.424, |
|
"eval_steps_per_second": 0.856, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13968253968253969, |
|
"grad_norm": 2.7368178367614746, |
|
"learning_rate": 8.629441624365483e-05, |
|
"loss": 0.1042, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1523809523809524, |
|
"grad_norm": 2.3904881477355957, |
|
"learning_rate": 8.50253807106599e-05, |
|
"loss": 0.076, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16507936507936508, |
|
"grad_norm": 1.2015010118484497, |
|
"learning_rate": 8.375634517766498e-05, |
|
"loss": 0.1791, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 0.8636962175369263, |
|
"learning_rate": 8.248730964467005e-05, |
|
"loss": 0.0762, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 3.2262871265411377, |
|
"learning_rate": 8.134517766497463e-05, |
|
"loss": 0.1217, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.20317460317460317, |
|
"grad_norm": 4.662909030914307, |
|
"learning_rate": 8.00761421319797e-05, |
|
"loss": 0.1213, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21587301587301588, |
|
"grad_norm": 0.6849185824394226, |
|
"learning_rate": 7.880710659898477e-05, |
|
"loss": 0.1106, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 0.5793686509132385, |
|
"learning_rate": 7.753807106598985e-05, |
|
"loss": 0.1171, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24126984126984127, |
|
"grad_norm": 3.4240646362304688, |
|
"learning_rate": 7.626903553299492e-05, |
|
"loss": 0.0528, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.25396825396825395, |
|
"grad_norm": 1.2842432260513306, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.1149, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25396825396825395, |
|
"eval_loss": 0.0776444673538208, |
|
"eval_runtime": 428.189, |
|
"eval_samples_per_second": 3.27, |
|
"eval_steps_per_second": 0.817, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 788, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.432387427328e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|