Qwen2.5-VL-7B-Open-R1-Distill / trainer_state.json
shawnzzzzz's picture
Model save
4d86664 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9978976874562018,
"eval_steps": 500,
"global_step": 267,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01868722261153936,
"grad_norm": 3.5830482905634797,
"learning_rate": 1.785714285714286e-05,
"loss": 0.8152,
"mean_token_accuracy": 0.7747266553342342,
"step": 5
},
{
"epoch": 0.03737444522307872,
"grad_norm": 1.6625011834876648,
"learning_rate": 3.571428571428572e-05,
"loss": 0.7025,
"mean_token_accuracy": 0.7917468748986721,
"step": 10
},
{
"epoch": 0.05606166783461808,
"grad_norm": 1.5899441749058187,
"learning_rate": 4.9998265374824964e-05,
"loss": 0.629,
"mean_token_accuracy": 0.8071001760661602,
"step": 15
},
{
"epoch": 0.07474889044615744,
"grad_norm": 1.5979853248536116,
"learning_rate": 4.993758157237536e-05,
"loss": 0.5972,
"mean_token_accuracy": 0.8143526442348957,
"step": 20
},
{
"epoch": 0.09343611305769679,
"grad_norm": 1.2674965103641067,
"learning_rate": 4.979043378581744e-05,
"loss": 0.5727,
"mean_token_accuracy": 0.8199357651174068,
"step": 25
},
{
"epoch": 0.11212333566923616,
"grad_norm": 1.3685254142570102,
"learning_rate": 4.9557389054153965e-05,
"loss": 0.5555,
"mean_token_accuracy": 0.8241050921380519,
"step": 30
},
{
"epoch": 0.1308105582807755,
"grad_norm": 0.9457691254500147,
"learning_rate": 4.923934542318854e-05,
"loss": 0.5409,
"mean_token_accuracy": 0.8273717932403087,
"step": 35
},
{
"epoch": 0.14949778089231489,
"grad_norm": 0.9189569415750541,
"learning_rate": 4.883752848487571e-05,
"loss": 0.5272,
"mean_token_accuracy": 0.8308477029204369,
"step": 40
},
{
"epoch": 0.16818500350385424,
"grad_norm": 0.7752642460093544,
"learning_rate": 4.835348665446049e-05,
"loss": 0.5213,
"mean_token_accuracy": 0.8322811633348465,
"step": 45
},
{
"epoch": 0.18687222611539359,
"grad_norm": 0.8083941553455664,
"learning_rate": 4.7789085203607664e-05,
"loss": 0.5118,
"mean_token_accuracy": 0.8346851594746113,
"step": 50
},
{
"epoch": 0.20555944872693296,
"grad_norm": 0.9161612194082712,
"learning_rate": 4.714649907251388e-05,
"loss": 0.5088,
"mean_token_accuracy": 0.8352511122822761,
"step": 55
},
{
"epoch": 0.22424667133847231,
"grad_norm": 0.6723034421711911,
"learning_rate": 4.6428204488701576e-05,
"loss": 0.5018,
"mean_token_accuracy": 0.837028643488884,
"step": 60
},
{
"epoch": 0.2429338939500117,
"grad_norm": 0.7871127694104725,
"learning_rate": 4.563696942479205e-05,
"loss": 0.5061,
"mean_token_accuracy": 0.8351837247610092,
"step": 65
},
{
"epoch": 0.261621116561551,
"grad_norm": 0.761272463434446,
"learning_rate": 4.477584293202868e-05,
"loss": 0.4939,
"mean_token_accuracy": 0.8390413090586663,
"step": 70
},
{
"epoch": 0.2803083391730904,
"grad_norm": 0.5083377211370889,
"learning_rate": 4.384814339065424e-05,
"loss": 0.4914,
"mean_token_accuracy": 0.8395063504576683,
"step": 75
},
{
"epoch": 0.29899556178462977,
"grad_norm": 0.908648503032802,
"learning_rate": 4.285744572241972e-05,
"loss": 0.4972,
"mean_token_accuracy": 0.8376469679176808,
"step": 80
},
{
"epoch": 0.3176827843961691,
"grad_norm": 0.5938541774391267,
"learning_rate": 4.180756761450171e-05,
"loss": 0.4816,
"mean_token_accuracy": 0.8424232035875321,
"step": 85
},
{
"epoch": 0.33637000700770847,
"grad_norm": 0.4849394515949123,
"learning_rate": 4.070255480791492e-05,
"loss": 0.491,
"mean_token_accuracy": 0.8394017495214939,
"step": 90
},
{
"epoch": 0.35505722961924785,
"grad_norm": 0.5718927262308711,
"learning_rate": 3.954666550711159e-05,
"loss": 0.4851,
"mean_token_accuracy": 0.8409382797777653,
"step": 95
},
{
"epoch": 0.37374445223078717,
"grad_norm": 0.5859959003534665,
"learning_rate": 3.8344353970845606e-05,
"loss": 0.4862,
"mean_token_accuracy": 0.8411718301475049,
"step": 100
},
{
"epoch": 0.39243167484232655,
"grad_norm": 0.3782337735273932,
"learning_rate": 3.710025334753495e-05,
"loss": 0.4834,
"mean_token_accuracy": 0.8412449143826961,
"step": 105
},
{
"epoch": 0.41111889745386593,
"grad_norm": 0.41111283052381054,
"learning_rate": 3.581915782126652e-05,
"loss": 0.476,
"mean_token_accuracy": 0.8432458408176899,
"step": 110
},
{
"epoch": 0.4298061200654053,
"grad_norm": 0.3851706170856147,
"learning_rate": 3.4506004137244676e-05,
"loss": 0.4851,
"mean_token_accuracy": 0.8405788190662861,
"step": 115
},
{
"epoch": 0.44849334267694463,
"grad_norm": 0.5062426249148336,
"learning_rate": 3.3165852577875546e-05,
"loss": 0.4785,
"mean_token_accuracy": 0.8426314078271389,
"step": 120
},
{
"epoch": 0.467180565288484,
"grad_norm": 0.40499027788768055,
"learning_rate": 3.180386746279663e-05,
"loss": 0.4747,
"mean_token_accuracy": 0.843725374341011,
"step": 125
},
{
"epoch": 0.4858677879000234,
"grad_norm": 0.47044740848341193,
"learning_rate": 3.04252972479953e-05,
"loss": 0.472,
"mean_token_accuracy": 0.8443724811077118,
"step": 130
},
{
"epoch": 0.5045550105115627,
"grad_norm": 0.4272272972180237,
"learning_rate": 2.90354543007051e-05,
"loss": 0.4725,
"mean_token_accuracy": 0.8439367160201072,
"step": 135
},
{
"epoch": 0.523242233123102,
"grad_norm": 0.4584455682048115,
"learning_rate": 2.7639694428017792e-05,
"loss": 0.4777,
"mean_token_accuracy": 0.842538620531559,
"step": 140
},
{
"epoch": 0.5419294557346415,
"grad_norm": 0.4589522708203329,
"learning_rate": 2.6243396238098518e-05,
"loss": 0.4693,
"mean_token_accuracy": 0.8448904320597649,
"step": 145
},
{
"epoch": 0.5606166783461808,
"grad_norm": 0.5239037802900407,
"learning_rate": 2.4851940413536174e-05,
"loss": 0.4697,
"mean_token_accuracy": 0.8447436839342117,
"step": 150
},
{
"epoch": 0.5793039009577201,
"grad_norm": 0.4866344062046232,
"learning_rate": 2.347068897669999e-05,
"loss": 0.4687,
"mean_token_accuracy": 0.8448469452559948,
"step": 155
},
{
"epoch": 0.5979911235692595,
"grad_norm": 0.3163374085877721,
"learning_rate": 2.2104964627003848e-05,
"loss": 0.4629,
"mean_token_accuracy": 0.846843034029007,
"step": 160
},
{
"epoch": 0.6166783461807989,
"grad_norm": 0.2747336073106856,
"learning_rate": 2.0760030229702972e-05,
"loss": 0.4612,
"mean_token_accuracy": 0.8469885870814323,
"step": 165
},
{
"epoch": 0.6353655687923382,
"grad_norm": 0.24722982457005138,
"learning_rate": 1.9441068535263564e-05,
"loss": 0.4596,
"mean_token_accuracy": 0.8476050347089767,
"step": 170
},
{
"epoch": 0.6540527914038776,
"grad_norm": 0.23852151209010736,
"learning_rate": 1.815316220745756e-05,
"loss": 0.4636,
"mean_token_accuracy": 0.8460029393434525,
"step": 175
},
{
"epoch": 0.6727400140154169,
"grad_norm": 0.2597676847867842,
"learning_rate": 1.6901274237144782e-05,
"loss": 0.4669,
"mean_token_accuracy": 0.8451244607567787,
"step": 180
},
{
"epoch": 0.6914272366269563,
"grad_norm": 0.8605153163863832,
"learning_rate": 1.5690228817218815e-05,
"loss": 0.4668,
"mean_token_accuracy": 0.8468827910721302,
"step": 185
},
{
"epoch": 0.7101144592384957,
"grad_norm": 0.24915841679008277,
"learning_rate": 1.4524692752415493e-05,
"loss": 0.4591,
"mean_token_accuracy": 0.8473225735127926,
"step": 190
},
{
"epoch": 0.728801681850035,
"grad_norm": 0.29798920762515824,
"learning_rate": 1.3409157475622094e-05,
"loss": 0.4576,
"mean_token_accuracy": 0.847739252448082,
"step": 195
},
{
"epoch": 0.7474889044615743,
"grad_norm": 0.26110601129292016,
"learning_rate": 1.2347921739987815e-05,
"loss": 0.4611,
"mean_token_accuracy": 0.8468295410275459,
"step": 200
},
{
"epoch": 0.7661761270731138,
"grad_norm": 0.2785413649007615,
"learning_rate": 1.1345075053532287e-05,
"loss": 0.4615,
"mean_token_accuracy": 0.846584790199995,
"step": 205
},
{
"epoch": 0.7848633496846531,
"grad_norm": 0.281588896295376,
"learning_rate": 1.0404481920087206e-05,
"loss": 0.4532,
"mean_token_accuracy": 0.8491566374897956,
"step": 210
},
{
"epoch": 0.8035505722961925,
"grad_norm": 0.26784943568942043,
"learning_rate": 9.529766947299371e-06,
"loss": 0.4555,
"mean_token_accuracy": 0.8485622465610504,
"step": 215
},
{
"epoch": 0.8222377949077319,
"grad_norm": 0.22085700610769077,
"learning_rate": 8.724300879081718e-06,
"loss": 0.4584,
"mean_token_accuracy": 0.8476461283862591,
"step": 220
},
{
"epoch": 0.8409250175192712,
"grad_norm": 0.1833370072353519,
"learning_rate": 7.991187606337009e-06,
"loss": 0.452,
"mean_token_accuracy": 0.8494263976812363,
"step": 225
},
{
"epoch": 0.8596122401308106,
"grad_norm": 0.20272051001866034,
"learning_rate": 7.333252206008559e-06,
"loss": 0.4538,
"mean_token_accuracy": 0.8487676382064819,
"step": 230
},
{
"epoch": 0.8782994627423499,
"grad_norm": 0.19595652872474512,
"learning_rate": 6.753030054550158e-06,
"loss": 0.4506,
"mean_token_accuracy": 0.8496683083474637,
"step": 235
},
{
"epoch": 0.8969866853538893,
"grad_norm": 0.19319039281954486,
"learning_rate": 6.25275705776658e-06,
"loss": 0.4519,
"mean_token_accuracy": 0.8493411011993885,
"step": 240
},
{
"epoch": 0.9156739079654287,
"grad_norm": 0.20035376740680347,
"learning_rate": 5.834361034674521e-06,
"loss": 0.4557,
"mean_token_accuracy": 0.8482660032808781,
"step": 245
},
{
"epoch": 0.934361130576968,
"grad_norm": 0.2010469617138832,
"learning_rate": 5.499454288586379e-06,
"loss": 0.453,
"mean_token_accuracy": 0.8490205124020577,
"step": 250
},
{
"epoch": 0.9530483531885073,
"grad_norm": 0.2001591137777418,
"learning_rate": 5.24932739404462e-06,
"loss": 0.4488,
"mean_token_accuracy": 0.8502446681261062,
"step": 255
},
{
"epoch": 0.9717355758000468,
"grad_norm": 0.18321355893669744,
"learning_rate": 5.08494422354882e-06,
"loss": 0.4518,
"mean_token_accuracy": 0.849391470849514,
"step": 260
},
{
"epoch": 0.9904227984115861,
"grad_norm": 0.18951844466307075,
"learning_rate": 5.006938233240212e-06,
"loss": 0.4554,
"mean_token_accuracy": 0.8482832841575145,
"step": 265
},
{
"epoch": 0.9978976874562018,
"mean_token_accuracy": 0.8498711809515953,
"step": 267,
"total_flos": 2841831180075008.0,
"train_loss": 0.4941176689519418,
"train_runtime": 26749.4727,
"train_samples_per_second": 1.28,
"train_steps_per_second": 0.01
}
],
"logging_steps": 5,
"max_steps": 267,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2841831180075008.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}