|
{ |
|
"best_metric": 0.6936941742897034, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-500", |
|
"epoch": 0.0357347055460263, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 7.14694110920526e-05, |
|
"eval_loss": 1.1522774696350098, |
|
"eval_runtime": 271.4524, |
|
"eval_samples_per_second": 21.705, |
|
"eval_steps_per_second": 5.426, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000714694110920526, |
|
"grad_norm": 0.6790405511856079, |
|
"learning_rate": 4.0600000000000004e-05, |
|
"loss": 1.8005, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001429388221841052, |
|
"grad_norm": 0.8249218463897705, |
|
"learning_rate": 8.120000000000001e-05, |
|
"loss": 1.7599, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002144082332761578, |
|
"grad_norm": 0.9275533556938171, |
|
"learning_rate": 0.00012179999999999999, |
|
"loss": 1.7436, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.002858776443682104, |
|
"grad_norm": 1.2592201232910156, |
|
"learning_rate": 0.00016240000000000002, |
|
"loss": 1.9385, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00357347055460263, |
|
"grad_norm": 2.3246257305145264, |
|
"learning_rate": 0.000203, |
|
"loss": 2.1042, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00357347055460263, |
|
"eval_loss": 0.9425861835479736, |
|
"eval_runtime": 271.9521, |
|
"eval_samples_per_second": 21.666, |
|
"eval_steps_per_second": 5.416, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004288164665523156, |
|
"grad_norm": 0.5499325394630432, |
|
"learning_rate": 0.00020275275110137215, |
|
"loss": 1.6205, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.005002858776443682, |
|
"grad_norm": 0.657888650894165, |
|
"learning_rate": 0.00020201220897726938, |
|
"loss": 1.5361, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.005717552887364208, |
|
"grad_norm": 0.7335652112960815, |
|
"learning_rate": 0.00020078198147448128, |
|
"loss": 1.6822, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.006432246998284734, |
|
"grad_norm": 1.0221036672592163, |
|
"learning_rate": 0.00019906806213773937, |
|
"loss": 1.7223, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.00714694110920526, |
|
"grad_norm": 2.625779151916504, |
|
"learning_rate": 0.0001968788010097697, |
|
"loss": 1.8717, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00714694110920526, |
|
"eval_loss": 0.8531848788261414, |
|
"eval_runtime": 271.6576, |
|
"eval_samples_per_second": 21.689, |
|
"eval_steps_per_second": 5.422, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.007861635220125786, |
|
"grad_norm": 0.5337308049201965, |
|
"learning_rate": 0.00019422486395072398, |
|
"loss": 1.5742, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.008576329331046312, |
|
"grad_norm": 0.7009676694869995, |
|
"learning_rate": 0.0001911191806751811, |
|
"loss": 1.6358, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.009291023441966839, |
|
"grad_norm": 0.66788649559021, |
|
"learning_rate": 0.00018757688175987723, |
|
"loss": 1.6009, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.010005717552887363, |
|
"grad_norm": 1.010290265083313, |
|
"learning_rate": 0.00018361522492905716, |
|
"loss": 1.638, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01072041166380789, |
|
"grad_norm": 2.071993350982666, |
|
"learning_rate": 0.00017925351097657625, |
|
"loss": 1.6186, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01072041166380789, |
|
"eval_loss": 0.8275316953659058, |
|
"eval_runtime": 271.5389, |
|
"eval_samples_per_second": 21.699, |
|
"eval_steps_per_second": 5.425, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.011435105774728416, |
|
"grad_norm": 0.48222827911376953, |
|
"learning_rate": 0.00017451298973437308, |
|
"loss": 1.3411, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.012149799885648942, |
|
"grad_norm": 0.6215487122535706, |
|
"learning_rate": 0.0001694167565454241, |
|
"loss": 1.4259, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.012864493996569469, |
|
"grad_norm": 0.8803397417068481, |
|
"learning_rate": 0.0001639896397455543, |
|
"loss": 1.4885, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.013579188107489995, |
|
"grad_norm": 0.899418294429779, |
|
"learning_rate": 0.0001582580797022808, |
|
"loss": 1.7306, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01429388221841052, |
|
"grad_norm": 1.875759482383728, |
|
"learning_rate": 0.00015225, |
|
"loss": 1.7291, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01429388221841052, |
|
"eval_loss": 0.7944059371948242, |
|
"eval_runtime": 271.637, |
|
"eval_samples_per_second": 21.691, |
|
"eval_steps_per_second": 5.423, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.015008576329331046, |
|
"grad_norm": 0.5201715230941772, |
|
"learning_rate": 0.00014599467139909136, |
|
"loss": 1.4731, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.015723270440251572, |
|
"grad_norm": 0.6637020707130432, |
|
"learning_rate": 0.0001395225692317151, |
|
"loss": 1.4816, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.016437964551172097, |
|
"grad_norm": 0.7978131175041199, |
|
"learning_rate": 0.00013286522492905717, |
|
"loss": 1.5038, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.017152658662092625, |
|
"grad_norm": 0.9858562350273132, |
|
"learning_rate": 0.00012605507240336626, |
|
"loss": 1.6433, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01786735277301315, |
|
"grad_norm": 1.3631006479263306, |
|
"learning_rate": 0.00011912529003319345, |
|
"loss": 1.6774, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01786735277301315, |
|
"eval_loss": 0.7708129286766052, |
|
"eval_runtime": 272.7688, |
|
"eval_samples_per_second": 21.601, |
|
"eval_steps_per_second": 5.4, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.018582046883933678, |
|
"grad_norm": 0.5159099698066711, |
|
"learning_rate": 0.00011210963902166683, |
|
"loss": 1.4165, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.019296740994854202, |
|
"grad_norm": 0.6102124452590942, |
|
"learning_rate": 0.00010504229891530386, |
|
"loss": 1.3505, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.020011435105774727, |
|
"grad_norm": 0.8650922179222107, |
|
"learning_rate": 9.795770108469618e-05, |
|
"loss": 1.513, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.020726129216695255, |
|
"grad_norm": 1.0572717189788818, |
|
"learning_rate": 9.08903609783332e-05, |
|
"loss": 1.6069, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02144082332761578, |
|
"grad_norm": 1.9492979049682617, |
|
"learning_rate": 8.387470996680658e-05, |
|
"loss": 1.6611, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02144082332761578, |
|
"eval_loss": 0.7363295555114746, |
|
"eval_runtime": 271.6884, |
|
"eval_samples_per_second": 21.687, |
|
"eval_steps_per_second": 5.422, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.022155517438536308, |
|
"grad_norm": 0.5079029202461243, |
|
"learning_rate": 7.694492759663374e-05, |
|
"loss": 1.3944, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.022870211549456832, |
|
"grad_norm": 0.6057290434837341, |
|
"learning_rate": 7.013477507094284e-05, |
|
"loss": 1.3385, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.02358490566037736, |
|
"grad_norm": 0.831353485584259, |
|
"learning_rate": 6.347743076828492e-05, |
|
"loss": 1.4873, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.024299599771297885, |
|
"grad_norm": 0.8882721066474915, |
|
"learning_rate": 5.700532860090863e-05, |
|
"loss": 1.5831, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.02501429388221841, |
|
"grad_norm": 1.5294054746627808, |
|
"learning_rate": 5.075000000000002e-05, |
|
"loss": 1.5363, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.02501429388221841, |
|
"eval_loss": 0.7126619219779968, |
|
"eval_runtime": 271.693, |
|
"eval_samples_per_second": 21.686, |
|
"eval_steps_per_second": 5.422, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.025728987993138937, |
|
"grad_norm": 0.5537640452384949, |
|
"learning_rate": 4.4741920297719214e-05, |
|
"loss": 1.3678, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.026443682104059462, |
|
"grad_norm": 0.552107572555542, |
|
"learning_rate": 3.901036025444568e-05, |
|
"loss": 1.383, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.02715837621497999, |
|
"grad_norm": 0.8851740956306458, |
|
"learning_rate": 3.358324345457592e-05, |
|
"loss": 1.4735, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.027873070325900515, |
|
"grad_norm": 0.9548201560974121, |
|
"learning_rate": 2.8487010265626928e-05, |
|
"loss": 1.4667, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.02858776443682104, |
|
"grad_norm": 1.7331346273422241, |
|
"learning_rate": 2.3746489023423744e-05, |
|
"loss": 1.5478, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02858776443682104, |
|
"eval_loss": 0.7001160979270935, |
|
"eval_runtime": 271.7977, |
|
"eval_samples_per_second": 21.678, |
|
"eval_steps_per_second": 5.419, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.029302458547741567, |
|
"grad_norm": 0.5737258791923523, |
|
"learning_rate": 1.9384775070942844e-05, |
|
"loss": 1.3097, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.030017152658662092, |
|
"grad_norm": 0.623741626739502, |
|
"learning_rate": 1.5423118240122765e-05, |
|
"loss": 1.2598, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03073184676958262, |
|
"grad_norm": 0.7869819402694702, |
|
"learning_rate": 1.188081932481891e-05, |
|
"loss": 1.4332, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.031446540880503145, |
|
"grad_norm": 0.976081371307373, |
|
"learning_rate": 8.775136049276001e-06, |
|
"loss": 1.4363, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.03216123499142367, |
|
"grad_norm": 1.7860438823699951, |
|
"learning_rate": 6.121198990230306e-06, |
|
"loss": 1.555, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.03216123499142367, |
|
"eval_loss": 0.6946857571601868, |
|
"eval_runtime": 271.7817, |
|
"eval_samples_per_second": 21.679, |
|
"eval_steps_per_second": 5.42, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.032875929102344194, |
|
"grad_norm": 0.5253943204879761, |
|
"learning_rate": 3.931937862260632e-06, |
|
"loss": 1.1963, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.033590623213264725, |
|
"grad_norm": 0.5864456295967102, |
|
"learning_rate": 2.2180185255187225e-06, |
|
"loss": 1.3404, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.03430531732418525, |
|
"grad_norm": 0.8669192790985107, |
|
"learning_rate": 9.877910227306082e-07, |
|
"loss": 1.4906, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.035020011435105775, |
|
"grad_norm": 0.977800726890564, |
|
"learning_rate": 2.472488986278439e-07, |
|
"loss": 1.4202, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.0357347055460263, |
|
"grad_norm": 1.68795907497406, |
|
"learning_rate": 0.0, |
|
"loss": 1.6401, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0357347055460263, |
|
"eval_loss": 0.6936941742897034, |
|
"eval_runtime": 273.0352, |
|
"eval_samples_per_second": 21.58, |
|
"eval_steps_per_second": 5.395, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.66313582395392e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|