Spaces:
Sleeping
Sleeping
{ | |
"best_global_step": null, | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 3.0, | |
"eval_steps": 500, | |
"global_step": 735, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.040878896269800714, | |
"grad_norm": 6.032278537750244, | |
"learning_rate": 0.00019755102040816327, | |
"loss": 2.6293, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.08175779253960143, | |
"grad_norm": 3.237109899520874, | |
"learning_rate": 0.0001948299319727891, | |
"loss": 2.008, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.12263668880940215, | |
"grad_norm": 2.7581236362457275, | |
"learning_rate": 0.000192108843537415, | |
"loss": 1.855, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.16351558507920286, | |
"grad_norm": 3.083601474761963, | |
"learning_rate": 0.00018938775510204083, | |
"loss": 1.7576, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.20439448134900357, | |
"grad_norm": 2.7807652950286865, | |
"learning_rate": 0.0001866666666666667, | |
"loss": 1.7434, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.2452733776188043, | |
"grad_norm": 2.6213436126708984, | |
"learning_rate": 0.00018394557823129252, | |
"loss": 1.682, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.286152273888605, | |
"grad_norm": 2.6907846927642822, | |
"learning_rate": 0.00018122448979591838, | |
"loss": 1.6749, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.3270311701584057, | |
"grad_norm": 5.332385540008545, | |
"learning_rate": 0.00017850340136054421, | |
"loss": 1.5744, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.36791006642820645, | |
"grad_norm": 3.23760986328125, | |
"learning_rate": 0.00017578231292517008, | |
"loss": 1.5338, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.40878896269800713, | |
"grad_norm": 3.479809522628784, | |
"learning_rate": 0.00017306122448979594, | |
"loss": 1.4143, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.44966785896780787, | |
"grad_norm": 2.7317419052124023, | |
"learning_rate": 0.0001703401360544218, | |
"loss": 1.4603, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.4905467552376086, | |
"grad_norm": 2.8279190063476562, | |
"learning_rate": 0.00016761904761904763, | |
"loss": 1.5169, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.5314256515074093, | |
"grad_norm": 3.096827983856201, | |
"learning_rate": 0.0001648979591836735, | |
"loss": 1.4396, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.57230454777721, | |
"grad_norm": 2.9124717712402344, | |
"learning_rate": 0.00016217687074829932, | |
"loss": 1.3824, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.6131834440470108, | |
"grad_norm": 3.073765754699707, | |
"learning_rate": 0.00015945578231292519, | |
"loss": 1.3974, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.6540623403168114, | |
"grad_norm": 2.5229296684265137, | |
"learning_rate": 0.00015673469387755102, | |
"loss": 1.4211, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.6949412365866121, | |
"grad_norm": 2.8301663398742676, | |
"learning_rate": 0.00015401360544217688, | |
"loss": 1.3567, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.7358201328564129, | |
"grad_norm": 2.6760520935058594, | |
"learning_rate": 0.00015129251700680274, | |
"loss": 1.3552, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.7766990291262136, | |
"grad_norm": 2.741240978240967, | |
"learning_rate": 0.00014857142857142857, | |
"loss": 1.408, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.8175779253960143, | |
"grad_norm": 2.028970956802368, | |
"learning_rate": 0.00014585034013605443, | |
"loss": 1.3497, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.858456821665815, | |
"grad_norm": 2.6956677436828613, | |
"learning_rate": 0.00014312925170068027, | |
"loss": 1.3592, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.8993357179356157, | |
"grad_norm": 2.448338270187378, | |
"learning_rate": 0.00014040816326530613, | |
"loss": 1.3309, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.9402146142054164, | |
"grad_norm": 2.267707347869873, | |
"learning_rate": 0.00013768707482993196, | |
"loss": 1.329, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.9810935104752172, | |
"grad_norm": 2.4610676765441895, | |
"learning_rate": 0.00013496598639455782, | |
"loss": 1.3194, | |
"step": 240 | |
}, | |
{ | |
"epoch": 1.0204394481349004, | |
"grad_norm": 2.539577007293701, | |
"learning_rate": 0.00013224489795918368, | |
"loss": 1.2862, | |
"step": 250 | |
}, | |
{ | |
"epoch": 1.0613183444047012, | |
"grad_norm": 2.377258777618408, | |
"learning_rate": 0.00012952380952380954, | |
"loss": 1.2052, | |
"step": 260 | |
}, | |
{ | |
"epoch": 1.1021972406745018, | |
"grad_norm": 2.4516634941101074, | |
"learning_rate": 0.00012680272108843538, | |
"loss": 1.1859, | |
"step": 270 | |
}, | |
{ | |
"epoch": 1.1430761369443025, | |
"grad_norm": 3.4123401641845703, | |
"learning_rate": 0.00012408163265306124, | |
"loss": 1.2547, | |
"step": 280 | |
}, | |
{ | |
"epoch": 1.1839550332141031, | |
"grad_norm": 2.9211533069610596, | |
"learning_rate": 0.00012136054421768707, | |
"loss": 1.187, | |
"step": 290 | |
}, | |
{ | |
"epoch": 1.224833929483904, | |
"grad_norm": 2.8444571495056152, | |
"learning_rate": 0.00011863945578231292, | |
"loss": 1.1871, | |
"step": 300 | |
}, | |
{ | |
"epoch": 1.2657128257537047, | |
"grad_norm": 2.8576762676239014, | |
"learning_rate": 0.00011591836734693877, | |
"loss": 1.2033, | |
"step": 310 | |
}, | |
{ | |
"epoch": 1.3065917220235055, | |
"grad_norm": 2.8612818717956543, | |
"learning_rate": 0.00011319727891156464, | |
"loss": 1.1889, | |
"step": 320 | |
}, | |
{ | |
"epoch": 1.347470618293306, | |
"grad_norm": 2.666062355041504, | |
"learning_rate": 0.00011047619047619049, | |
"loss": 1.1122, | |
"step": 330 | |
}, | |
{ | |
"epoch": 1.3883495145631068, | |
"grad_norm": 2.7399775981903076, | |
"learning_rate": 0.00010775510204081634, | |
"loss": 1.2343, | |
"step": 340 | |
}, | |
{ | |
"epoch": 1.4292284108329074, | |
"grad_norm": 2.89231538772583, | |
"learning_rate": 0.00010503401360544218, | |
"loss": 1.2481, | |
"step": 350 | |
}, | |
{ | |
"epoch": 1.4701073071027082, | |
"grad_norm": 2.9003829956054688, | |
"learning_rate": 0.00010231292517006803, | |
"loss": 1.1994, | |
"step": 360 | |
}, | |
{ | |
"epoch": 1.510986203372509, | |
"grad_norm": 3.2669005393981934, | |
"learning_rate": 9.959183673469388e-05, | |
"loss": 1.1804, | |
"step": 370 | |
}, | |
{ | |
"epoch": 1.5518650996423098, | |
"grad_norm": 3.1723055839538574, | |
"learning_rate": 9.687074829931974e-05, | |
"loss": 1.1427, | |
"step": 380 | |
}, | |
{ | |
"epoch": 1.5927439959121104, | |
"grad_norm": 3.0546045303344727, | |
"learning_rate": 9.414965986394559e-05, | |
"loss": 1.1616, | |
"step": 390 | |
}, | |
{ | |
"epoch": 1.633622892181911, | |
"grad_norm": 3.060725450515747, | |
"learning_rate": 9.142857142857143e-05, | |
"loss": 1.2045, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.6745017884517117, | |
"grad_norm": 2.8336000442504883, | |
"learning_rate": 8.870748299319729e-05, | |
"loss": 1.1922, | |
"step": 410 | |
}, | |
{ | |
"epoch": 1.7153806847215125, | |
"grad_norm": 2.888324499130249, | |
"learning_rate": 8.598639455782314e-05, | |
"loss": 1.1646, | |
"step": 420 | |
}, | |
{ | |
"epoch": 1.7562595809913133, | |
"grad_norm": 2.559555768966675, | |
"learning_rate": 8.326530612244899e-05, | |
"loss": 1.2204, | |
"step": 430 | |
}, | |
{ | |
"epoch": 1.797138477261114, | |
"grad_norm": 3.291555643081665, | |
"learning_rate": 8.054421768707483e-05, | |
"loss": 1.1561, | |
"step": 440 | |
}, | |
{ | |
"epoch": 1.8380173735309147, | |
"grad_norm": 2.7448337078094482, | |
"learning_rate": 7.782312925170068e-05, | |
"loss": 1.1783, | |
"step": 450 | |
}, | |
{ | |
"epoch": 1.8788962698007152, | |
"grad_norm": 3.201742649078369, | |
"learning_rate": 7.510204081632653e-05, | |
"loss": 1.1499, | |
"step": 460 | |
}, | |
{ | |
"epoch": 1.919775166070516, | |
"grad_norm": 3.3023853302001953, | |
"learning_rate": 7.238095238095238e-05, | |
"loss": 1.1929, | |
"step": 470 | |
}, | |
{ | |
"epoch": 1.9606540623403168, | |
"grad_norm": 2.9392805099487305, | |
"learning_rate": 6.965986394557822e-05, | |
"loss": 1.2178, | |
"step": 480 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 3.641012668609619, | |
"learning_rate": 6.693877551020408e-05, | |
"loss": 1.1721, | |
"step": 490 | |
}, | |
{ | |
"epoch": 2.040878896269801, | |
"grad_norm": 3.4774110317230225, | |
"learning_rate": 6.421768707482993e-05, | |
"loss": 1.0634, | |
"step": 500 | |
}, | |
{ | |
"epoch": 2.0817577925396016, | |
"grad_norm": 3.280494213104248, | |
"learning_rate": 6.149659863945578e-05, | |
"loss": 1.0549, | |
"step": 510 | |
}, | |
{ | |
"epoch": 2.1226366888094024, | |
"grad_norm": 3.7747931480407715, | |
"learning_rate": 5.877551020408164e-05, | |
"loss": 1.0331, | |
"step": 520 | |
}, | |
{ | |
"epoch": 2.1635155850792027, | |
"grad_norm": 3.1277899742126465, | |
"learning_rate": 5.6054421768707486e-05, | |
"loss": 1.0384, | |
"step": 530 | |
}, | |
{ | |
"epoch": 2.2043944813490035, | |
"grad_norm": 3.6484415531158447, | |
"learning_rate": 5.333333333333333e-05, | |
"loss": 1.0136, | |
"step": 540 | |
}, | |
{ | |
"epoch": 2.2452733776188043, | |
"grad_norm": 3.5595340728759766, | |
"learning_rate": 5.061224489795918e-05, | |
"loss": 1.0227, | |
"step": 550 | |
}, | |
{ | |
"epoch": 2.286152273888605, | |
"grad_norm": 3.5124669075012207, | |
"learning_rate": 4.7891156462585034e-05, | |
"loss": 1.0109, | |
"step": 560 | |
}, | |
{ | |
"epoch": 2.327031170158406, | |
"grad_norm": 3.5096631050109863, | |
"learning_rate": 4.517006802721089e-05, | |
"loss": 1.0131, | |
"step": 570 | |
}, | |
{ | |
"epoch": 2.3679100664282062, | |
"grad_norm": 3.6057543754577637, | |
"learning_rate": 4.2448979591836735e-05, | |
"loss": 1.0791, | |
"step": 580 | |
}, | |
{ | |
"epoch": 2.408788962698007, | |
"grad_norm": 3.254915714263916, | |
"learning_rate": 3.972789115646259e-05, | |
"loss": 0.9997, | |
"step": 590 | |
}, | |
{ | |
"epoch": 2.449667858967808, | |
"grad_norm": 4.024539947509766, | |
"learning_rate": 3.7006802721088437e-05, | |
"loss": 1.0364, | |
"step": 600 | |
}, | |
{ | |
"epoch": 2.4905467552376086, | |
"grad_norm": 3.6602048873901367, | |
"learning_rate": 3.428571428571429e-05, | |
"loss": 0.9388, | |
"step": 610 | |
}, | |
{ | |
"epoch": 2.5314256515074094, | |
"grad_norm": 3.5694384574890137, | |
"learning_rate": 3.156462585034014e-05, | |
"loss": 1.0256, | |
"step": 620 | |
}, | |
{ | |
"epoch": 2.57230454777721, | |
"grad_norm": 2.8324692249298096, | |
"learning_rate": 2.8843537414965988e-05, | |
"loss": 1.0396, | |
"step": 630 | |
}, | |
{ | |
"epoch": 2.613183444047011, | |
"grad_norm": 3.545496702194214, | |
"learning_rate": 2.612244897959184e-05, | |
"loss": 1.0569, | |
"step": 640 | |
}, | |
{ | |
"epoch": 2.6540623403168113, | |
"grad_norm": 3.602918863296509, | |
"learning_rate": 2.3401360544217686e-05, | |
"loss": 1.0485, | |
"step": 650 | |
}, | |
{ | |
"epoch": 2.694941236586612, | |
"grad_norm": 3.526660442352295, | |
"learning_rate": 2.0680272108843536e-05, | |
"loss": 1.0541, | |
"step": 660 | |
}, | |
{ | |
"epoch": 2.735820132856413, | |
"grad_norm": 3.0326685905456543, | |
"learning_rate": 1.7959183673469387e-05, | |
"loss": 0.9885, | |
"step": 670 | |
}, | |
{ | |
"epoch": 2.7766990291262137, | |
"grad_norm": 3.41005277633667, | |
"learning_rate": 1.5238095238095241e-05, | |
"loss": 1.0277, | |
"step": 680 | |
}, | |
{ | |
"epoch": 2.817577925396014, | |
"grad_norm": 3.461618423461914, | |
"learning_rate": 1.251700680272109e-05, | |
"loss": 1.0553, | |
"step": 690 | |
}, | |
{ | |
"epoch": 2.858456821665815, | |
"grad_norm": 3.59061861038208, | |
"learning_rate": 9.795918367346939e-06, | |
"loss": 1.0436, | |
"step": 700 | |
}, | |
{ | |
"epoch": 2.8993357179356156, | |
"grad_norm": 3.4869349002838135, | |
"learning_rate": 7.07482993197279e-06, | |
"loss": 1.0493, | |
"step": 710 | |
}, | |
{ | |
"epoch": 2.9402146142054164, | |
"grad_norm": 3.2588963508605957, | |
"learning_rate": 4.353741496598639e-06, | |
"loss": 1.0311, | |
"step": 720 | |
}, | |
{ | |
"epoch": 2.981093510475217, | |
"grad_norm": 3.391941785812378, | |
"learning_rate": 1.63265306122449e-06, | |
"loss": 0.9652, | |
"step": 730 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 735, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 2.5661456203854643e+17, | |
"train_batch_size": 2, | |
"trial_name": null, | |
"trial_params": null | |
} | |