|
{ |
|
"best_metric": 0.9635820582698403, |
|
"best_model_checkpoint": "./distilbert_multilingual_cased_greek_latin_classifiergreek/checkpoint-14160", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 14160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05649717514124294, |
|
"grad_norm": 9.908761978149414, |
|
"learning_rate": 4.9858757062146896e-05, |
|
"loss": 0.4326, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11299435028248588, |
|
"grad_norm": 0.47730961441993713, |
|
"learning_rate": 4.971751412429379e-05, |
|
"loss": 0.2915, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1694915254237288, |
|
"grad_norm": 5.969715595245361, |
|
"learning_rate": 4.957627118644068e-05, |
|
"loss": 0.2426, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22598870056497175, |
|
"grad_norm": 2.764862537384033, |
|
"learning_rate": 4.9435028248587575e-05, |
|
"loss": 0.256, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2824858757062147, |
|
"grad_norm": 12.726994514465332, |
|
"learning_rate": 4.929378531073446e-05, |
|
"loss": 0.2165, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3389830508474576, |
|
"grad_norm": 1.1394743919372559, |
|
"learning_rate": 4.915254237288136e-05, |
|
"loss": 0.2168, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3954802259887006, |
|
"grad_norm": 4.321178913116455, |
|
"learning_rate": 4.9011299435028255e-05, |
|
"loss": 0.2059, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4519774011299435, |
|
"grad_norm": 18.238351821899414, |
|
"learning_rate": 4.887005649717514e-05, |
|
"loss": 0.2294, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5084745762711864, |
|
"grad_norm": 7.210486888885498, |
|
"learning_rate": 4.8728813559322034e-05, |
|
"loss": 0.1904, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5649717514124294, |
|
"grad_norm": 1.240628719329834, |
|
"learning_rate": 4.8587570621468934e-05, |
|
"loss": 0.1668, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6214689265536724, |
|
"grad_norm": 9.029092788696289, |
|
"learning_rate": 4.844632768361582e-05, |
|
"loss": 0.18, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"grad_norm": 4.866886615753174, |
|
"learning_rate": 4.8305084745762714e-05, |
|
"loss": 0.2082, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7344632768361582, |
|
"grad_norm": 7.6279778480529785, |
|
"learning_rate": 4.816384180790961e-05, |
|
"loss": 0.1818, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7909604519774012, |
|
"grad_norm": 8.820233345031738, |
|
"learning_rate": 4.80225988700565e-05, |
|
"loss": 0.1853, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 22.571056365966797, |
|
"learning_rate": 4.788135593220339e-05, |
|
"loss": 0.1638, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.903954802259887, |
|
"grad_norm": 4.86266565322876, |
|
"learning_rate": 4.7740112994350286e-05, |
|
"loss": 0.1761, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.96045197740113, |
|
"grad_norm": 2.2481741905212402, |
|
"learning_rate": 4.759887005649718e-05, |
|
"loss": 0.1481, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9473554127533367, |
|
"eval_f1": 0.9471867274372374, |
|
"eval_loss": 0.15506704151630402, |
|
"eval_runtime": 0.99, |
|
"eval_samples_per_second": 4086.911, |
|
"eval_steps_per_second": 64.647, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.0169491525423728, |
|
"grad_norm": 0.16318374872207642, |
|
"learning_rate": 4.745762711864407e-05, |
|
"loss": 0.1303, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.073446327683616, |
|
"grad_norm": 2.8574020862579346, |
|
"learning_rate": 4.7316384180790966e-05, |
|
"loss": 0.0916, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.1299435028248588, |
|
"grad_norm": 7.220451831817627, |
|
"learning_rate": 4.717514124293785e-05, |
|
"loss": 0.1214, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1864406779661016, |
|
"grad_norm": 0.23234207928180695, |
|
"learning_rate": 4.703389830508475e-05, |
|
"loss": 0.1005, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.2429378531073447, |
|
"grad_norm": 5.788926124572754, |
|
"learning_rate": 4.689265536723164e-05, |
|
"loss": 0.106, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.2994350282485876, |
|
"grad_norm": 14.127985954284668, |
|
"learning_rate": 4.675141242937853e-05, |
|
"loss": 0.1002, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.3559322033898304, |
|
"grad_norm": 8.628386497497559, |
|
"learning_rate": 4.6610169491525425e-05, |
|
"loss": 0.1228, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4124293785310735, |
|
"grad_norm": 3.4723896980285645, |
|
"learning_rate": 4.646892655367232e-05, |
|
"loss": 0.1193, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4689265536723164, |
|
"grad_norm": 5.112296104431152, |
|
"learning_rate": 4.632768361581921e-05, |
|
"loss": 0.0989, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.5254237288135593, |
|
"grad_norm": 8.718145370483398, |
|
"learning_rate": 4.6186440677966104e-05, |
|
"loss": 0.1291, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.5819209039548023, |
|
"grad_norm": 0.052204638719558716, |
|
"learning_rate": 4.6045197740113e-05, |
|
"loss": 0.0987, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.6384180790960452, |
|
"grad_norm": 11.152572631835938, |
|
"learning_rate": 4.590395480225989e-05, |
|
"loss": 0.113, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 3.002537965774536, |
|
"learning_rate": 4.5762711864406784e-05, |
|
"loss": 0.1215, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7514124293785311, |
|
"grad_norm": 16.578323364257812, |
|
"learning_rate": 4.562146892655367e-05, |
|
"loss": 0.0998, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.807909604519774, |
|
"grad_norm": 4.660722255706787, |
|
"learning_rate": 4.548022598870056e-05, |
|
"loss": 0.1015, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.8644067796610169, |
|
"grad_norm": 0.32472193241119385, |
|
"learning_rate": 4.533898305084746e-05, |
|
"loss": 0.0982, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.92090395480226, |
|
"grad_norm": 12.544636726379395, |
|
"learning_rate": 4.519774011299435e-05, |
|
"loss": 0.1151, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.9774011299435028, |
|
"grad_norm": 0.20591090619564056, |
|
"learning_rate": 4.505649717514124e-05, |
|
"loss": 0.1109, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9540286702916461, |
|
"eval_f1": 0.9542995811514169, |
|
"eval_loss": 0.17088210582733154, |
|
"eval_runtime": 0.9919, |
|
"eval_samples_per_second": 4078.94, |
|
"eval_steps_per_second": 64.521, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.0338983050847457, |
|
"grad_norm": 1.156205177307129, |
|
"learning_rate": 4.491525423728814e-05, |
|
"loss": 0.0764, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.0903954802259888, |
|
"grad_norm": 4.644138336181641, |
|
"learning_rate": 4.477401129943503e-05, |
|
"loss": 0.0603, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.146892655367232, |
|
"grad_norm": 0.0709792822599411, |
|
"learning_rate": 4.463276836158192e-05, |
|
"loss": 0.0649, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.2033898305084745, |
|
"grad_norm": 0.13090333342552185, |
|
"learning_rate": 4.4491525423728816e-05, |
|
"loss": 0.076, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.2598870056497176, |
|
"grad_norm": 39.72850799560547, |
|
"learning_rate": 4.435028248587571e-05, |
|
"loss": 0.0827, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.3163841807909606, |
|
"grad_norm": 0.29564905166625977, |
|
"learning_rate": 4.42090395480226e-05, |
|
"loss": 0.0701, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.3728813559322033, |
|
"grad_norm": 0.23284725844860077, |
|
"learning_rate": 4.4067796610169495e-05, |
|
"loss": 0.0697, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.4293785310734464, |
|
"grad_norm": 0.059655264019966125, |
|
"learning_rate": 4.392655367231638e-05, |
|
"loss": 0.0803, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.4858757062146895, |
|
"grad_norm": 0.01876319944858551, |
|
"learning_rate": 4.378531073446328e-05, |
|
"loss": 0.0796, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.542372881355932, |
|
"grad_norm": 0.07363492995500565, |
|
"learning_rate": 4.3644067796610175e-05, |
|
"loss": 0.0573, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.598870056497175, |
|
"grad_norm": 0.26911139488220215, |
|
"learning_rate": 4.350282485875706e-05, |
|
"loss": 0.0732, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.655367231638418, |
|
"grad_norm": 0.045297879725694656, |
|
"learning_rate": 4.3361581920903954e-05, |
|
"loss": 0.0645, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.711864406779661, |
|
"grad_norm": 0.24285119771957397, |
|
"learning_rate": 4.3220338983050854e-05, |
|
"loss": 0.0852, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.768361581920904, |
|
"grad_norm": 70.39765930175781, |
|
"learning_rate": 4.307909604519774e-05, |
|
"loss": 0.0763, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.824858757062147, |
|
"grad_norm": 3.1160919666290283, |
|
"learning_rate": 4.2937853107344634e-05, |
|
"loss": 0.0618, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.8813559322033897, |
|
"grad_norm": 0.14466217160224915, |
|
"learning_rate": 4.279661016949153e-05, |
|
"loss": 0.0628, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.937853107344633, |
|
"grad_norm": 11.605415344238281, |
|
"learning_rate": 4.265536723163842e-05, |
|
"loss": 0.0539, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.994350282485876, |
|
"grad_norm": 0.03261380270123482, |
|
"learning_rate": 4.251412429378531e-05, |
|
"loss": 0.0349, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9574888779041029, |
|
"eval_f1": 0.9573208648613947, |
|
"eval_loss": 0.20843710005283356, |
|
"eval_runtime": 0.9688, |
|
"eval_samples_per_second": 4176.286, |
|
"eval_steps_per_second": 66.061, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 3.0508474576271185, |
|
"grad_norm": 0.007287267595529556, |
|
"learning_rate": 4.2372881355932206e-05, |
|
"loss": 0.0575, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.1073446327683616, |
|
"grad_norm": 19.718822479248047, |
|
"learning_rate": 4.22316384180791e-05, |
|
"loss": 0.0423, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.1638418079096047, |
|
"grad_norm": 78.35333251953125, |
|
"learning_rate": 4.209039548022599e-05, |
|
"loss": 0.0349, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.2203389830508473, |
|
"grad_norm": 0.025877630338072777, |
|
"learning_rate": 4.1949152542372886e-05, |
|
"loss": 0.0632, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.2768361581920904, |
|
"grad_norm": 14.864492416381836, |
|
"learning_rate": 4.180790960451977e-05, |
|
"loss": 0.0651, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.002800008049234748, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.0225, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"grad_norm": 0.10183978080749512, |
|
"learning_rate": 4.152542372881356e-05, |
|
"loss": 0.0467, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.446327683615819, |
|
"grad_norm": 30.69606590270996, |
|
"learning_rate": 4.138418079096045e-05, |
|
"loss": 0.0509, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.5028248587570623, |
|
"grad_norm": 0.43340635299682617, |
|
"learning_rate": 4.1242937853107345e-05, |
|
"loss": 0.0366, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.559322033898305, |
|
"grad_norm": 3.5696895122528076, |
|
"learning_rate": 4.110169491525424e-05, |
|
"loss": 0.0576, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.615819209039548, |
|
"grad_norm": 3.981534481048584, |
|
"learning_rate": 4.096045197740113e-05, |
|
"loss": 0.0574, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.672316384180791, |
|
"grad_norm": 0.020425381138920784, |
|
"learning_rate": 4.0819209039548024e-05, |
|
"loss": 0.0582, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.7288135593220337, |
|
"grad_norm": 1.6137280464172363, |
|
"learning_rate": 4.067796610169492e-05, |
|
"loss": 0.037, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.785310734463277, |
|
"grad_norm": 0.41225990653038025, |
|
"learning_rate": 4.053672316384181e-05, |
|
"loss": 0.039, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.84180790960452, |
|
"grad_norm": 0.02651926688849926, |
|
"learning_rate": 4.0395480225988704e-05, |
|
"loss": 0.052, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.898305084745763, |
|
"grad_norm": 4.174577713012695, |
|
"learning_rate": 4.025423728813559e-05, |
|
"loss": 0.0746, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.9548022598870056, |
|
"grad_norm": 0.06549729406833649, |
|
"learning_rate": 4.011299435028249e-05, |
|
"loss": 0.0682, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9619377162629758, |
|
"eval_f1": 0.961922353652765, |
|
"eval_loss": 0.19074885547161102, |
|
"eval_runtime": 0.9577, |
|
"eval_samples_per_second": 4224.832, |
|
"eval_steps_per_second": 66.829, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 4.011299435028248, |
|
"grad_norm": 0.09852942079305649, |
|
"learning_rate": 3.997175141242938e-05, |
|
"loss": 0.0573, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 4.067796610169491, |
|
"grad_norm": 0.010253222659230232, |
|
"learning_rate": 3.983050847457627e-05, |
|
"loss": 0.0268, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.124293785310734, |
|
"grad_norm": 0.05561167746782303, |
|
"learning_rate": 3.968926553672316e-05, |
|
"loss": 0.041, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 4.1807909604519775, |
|
"grad_norm": 0.020777329802513123, |
|
"learning_rate": 3.954802259887006e-05, |
|
"loss": 0.0428, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.237288135593221, |
|
"grad_norm": 0.011439072899520397, |
|
"learning_rate": 3.940677966101695e-05, |
|
"loss": 0.0281, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.293785310734464, |
|
"grad_norm": 0.29063406586647034, |
|
"learning_rate": 3.926553672316384e-05, |
|
"loss": 0.0347, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.350282485875706, |
|
"grad_norm": 0.008078676648437977, |
|
"learning_rate": 3.9124293785310735e-05, |
|
"loss": 0.0427, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 4.406779661016949, |
|
"grad_norm": 0.0378178134560585, |
|
"learning_rate": 3.898305084745763e-05, |
|
"loss": 0.0448, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.463276836158192, |
|
"grad_norm": 0.09035930037498474, |
|
"learning_rate": 3.884180790960452e-05, |
|
"loss": 0.0328, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 4.519774011299435, |
|
"grad_norm": 0.05402543023228645, |
|
"learning_rate": 3.8700564971751415e-05, |
|
"loss": 0.0345, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.576271186440678, |
|
"grad_norm": 0.01713019795715809, |
|
"learning_rate": 3.855932203389831e-05, |
|
"loss": 0.0358, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.632768361581921, |
|
"grad_norm": 0.0475781112909317, |
|
"learning_rate": 3.84180790960452e-05, |
|
"loss": 0.0532, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.6892655367231635, |
|
"grad_norm": 0.006405588239431381, |
|
"learning_rate": 3.8276836158192094e-05, |
|
"loss": 0.0324, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.745762711864407, |
|
"grad_norm": 4.002650260925293, |
|
"learning_rate": 3.813559322033898e-05, |
|
"loss": 0.0526, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.80225988700565, |
|
"grad_norm": 0.010295218788087368, |
|
"learning_rate": 3.799435028248588e-05, |
|
"loss": 0.0428, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.858757062146893, |
|
"grad_norm": 25.15464973449707, |
|
"learning_rate": 3.7853107344632774e-05, |
|
"loss": 0.0513, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.915254237288136, |
|
"grad_norm": 0.018476568162441254, |
|
"learning_rate": 3.771186440677966e-05, |
|
"loss": 0.0627, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.971751412429379, |
|
"grad_norm": 0.02234013006091118, |
|
"learning_rate": 3.7570621468926554e-05, |
|
"loss": 0.0436, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9594661393969353, |
|
"eval_f1": 0.9595618588245926, |
|
"eval_loss": 0.24679133296012878, |
|
"eval_runtime": 0.9625, |
|
"eval_samples_per_second": 4203.719, |
|
"eval_steps_per_second": 66.495, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 5.028248587570621, |
|
"grad_norm": 0.007325501646846533, |
|
"learning_rate": 3.7429378531073453e-05, |
|
"loss": 0.011, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 5.084745762711864, |
|
"grad_norm": 0.015004786662757397, |
|
"learning_rate": 3.728813559322034e-05, |
|
"loss": 0.0239, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.141242937853107, |
|
"grad_norm": 0.02809782139956951, |
|
"learning_rate": 3.714689265536723e-05, |
|
"loss": 0.0468, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 5.19774011299435, |
|
"grad_norm": 0.061971381306648254, |
|
"learning_rate": 3.7005649717514126e-05, |
|
"loss": 0.0369, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 5.254237288135593, |
|
"grad_norm": 0.028554769232869148, |
|
"learning_rate": 3.686440677966102e-05, |
|
"loss": 0.0254, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 5.3107344632768365, |
|
"grad_norm": 0.049820106476545334, |
|
"learning_rate": 3.672316384180791e-05, |
|
"loss": 0.0371, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 5.367231638418079, |
|
"grad_norm": 0.016609592363238335, |
|
"learning_rate": 3.6581920903954806e-05, |
|
"loss": 0.0184, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.423728813559322, |
|
"grad_norm": 0.05181876942515373, |
|
"learning_rate": 3.644067796610169e-05, |
|
"loss": 0.0414, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 5.480225988700565, |
|
"grad_norm": 0.05821879953145981, |
|
"learning_rate": 3.629943502824859e-05, |
|
"loss": 0.0308, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 5.536723163841808, |
|
"grad_norm": 0.010366985574364662, |
|
"learning_rate": 3.6158192090395485e-05, |
|
"loss": 0.0278, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 5.593220338983051, |
|
"grad_norm": 0.019191740080714226, |
|
"learning_rate": 3.601694915254237e-05, |
|
"loss": 0.0435, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 5.649717514124294, |
|
"grad_norm": 0.06532129645347595, |
|
"learning_rate": 3.587570621468927e-05, |
|
"loss": 0.0237, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.706214689265536, |
|
"grad_norm": 0.009392981417477131, |
|
"learning_rate": 3.573446327683616e-05, |
|
"loss": 0.0334, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 5.762711864406779, |
|
"grad_norm": 0.023171979933977127, |
|
"learning_rate": 3.559322033898305e-05, |
|
"loss": 0.0487, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 5.8192090395480225, |
|
"grad_norm": 0.055124878883361816, |
|
"learning_rate": 3.5451977401129944e-05, |
|
"loss": 0.0412, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 5.875706214689266, |
|
"grad_norm": 0.015424055978655815, |
|
"learning_rate": 3.531073446327684e-05, |
|
"loss": 0.0292, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 5.932203389830509, |
|
"grad_norm": 0.6497403979301453, |
|
"learning_rate": 3.516949152542373e-05, |
|
"loss": 0.039, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.988700564971752, |
|
"grad_norm": 18.98410415649414, |
|
"learning_rate": 3.5028248587570624e-05, |
|
"loss": 0.0322, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9614434008897677, |
|
"eval_f1": 0.9615565973130906, |
|
"eval_loss": 0.2411661297082901, |
|
"eval_runtime": 0.9629, |
|
"eval_samples_per_second": 4201.762, |
|
"eval_steps_per_second": 66.464, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 6.045197740112994, |
|
"grad_norm": 0.013564531691372395, |
|
"learning_rate": 3.488700564971752e-05, |
|
"loss": 0.0254, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 6.101694915254237, |
|
"grad_norm": 33.3035888671875, |
|
"learning_rate": 3.474576271186441e-05, |
|
"loss": 0.0295, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 6.15819209039548, |
|
"grad_norm": 0.12126260250806808, |
|
"learning_rate": 3.46045197740113e-05, |
|
"loss": 0.0275, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 6.214689265536723, |
|
"grad_norm": 0.03739802539348602, |
|
"learning_rate": 3.446327683615819e-05, |
|
"loss": 0.0267, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.271186440677966, |
|
"grad_norm": 0.03359340503811836, |
|
"learning_rate": 3.432203389830508e-05, |
|
"loss": 0.0389, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 6.327683615819209, |
|
"grad_norm": 0.003635927801951766, |
|
"learning_rate": 3.418079096045198e-05, |
|
"loss": 0.0255, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 6.3841807909604515, |
|
"grad_norm": 0.06124364957213402, |
|
"learning_rate": 3.403954802259887e-05, |
|
"loss": 0.0229, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 6.440677966101695, |
|
"grad_norm": 0.026170525699853897, |
|
"learning_rate": 3.389830508474576e-05, |
|
"loss": 0.0319, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 6.497175141242938, |
|
"grad_norm": 0.013875061646103859, |
|
"learning_rate": 3.375706214689266e-05, |
|
"loss": 0.0276, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.553672316384181, |
|
"grad_norm": 0.009600764140486717, |
|
"learning_rate": 3.361581920903955e-05, |
|
"loss": 0.0182, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 6.610169491525424, |
|
"grad_norm": 0.02147483266890049, |
|
"learning_rate": 3.347457627118644e-05, |
|
"loss": 0.0277, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.007301884237676859, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0268, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 6.72316384180791, |
|
"grad_norm": 0.008684027940034866, |
|
"learning_rate": 3.319209039548023e-05, |
|
"loss": 0.0244, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"grad_norm": 0.0058201453648507595, |
|
"learning_rate": 3.305084745762712e-05, |
|
"loss": 0.018, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.836158192090395, |
|
"grad_norm": 0.015645477920770645, |
|
"learning_rate": 3.2909604519774014e-05, |
|
"loss": 0.0389, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 6.892655367231638, |
|
"grad_norm": 0.013589623384177685, |
|
"learning_rate": 3.27683615819209e-05, |
|
"loss": 0.0271, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 6.9491525423728815, |
|
"grad_norm": 0.004052096512168646, |
|
"learning_rate": 3.26271186440678e-05, |
|
"loss": 0.012, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.963173504695996, |
|
"eval_f1": 0.9631919351432553, |
|
"eval_loss": 0.22085699439048767, |
|
"eval_runtime": 0.9623, |
|
"eval_samples_per_second": 4204.299, |
|
"eval_steps_per_second": 66.504, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 7.005649717514125, |
|
"grad_norm": 0.06705684214830399, |
|
"learning_rate": 3.2485875706214694e-05, |
|
"loss": 0.0309, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 7.062146892655368, |
|
"grad_norm": 0.006240461952984333, |
|
"learning_rate": 3.234463276836158e-05, |
|
"loss": 0.0084, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 7.11864406779661, |
|
"grad_norm": 0.020344626158475876, |
|
"learning_rate": 3.2203389830508473e-05, |
|
"loss": 0.0168, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 7.175141242937853, |
|
"grad_norm": 0.003926662262529135, |
|
"learning_rate": 3.2062146892655373e-05, |
|
"loss": 0.022, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 7.231638418079096, |
|
"grad_norm": 0.0025492089334875345, |
|
"learning_rate": 3.192090395480226e-05, |
|
"loss": 0.0133, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 7.288135593220339, |
|
"grad_norm": 0.005623087752610445, |
|
"learning_rate": 3.177966101694915e-05, |
|
"loss": 0.0164, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 7.344632768361582, |
|
"grad_norm": 0.0032459620852023363, |
|
"learning_rate": 3.1638418079096046e-05, |
|
"loss": 0.0272, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 7.401129943502825, |
|
"grad_norm": 1.1293178796768188, |
|
"learning_rate": 3.149717514124294e-05, |
|
"loss": 0.0148, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 7.4576271186440675, |
|
"grad_norm": 0.0017996145179495215, |
|
"learning_rate": 3.135593220338983e-05, |
|
"loss": 0.0132, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 7.5141242937853105, |
|
"grad_norm": 0.008758709765970707, |
|
"learning_rate": 3.1214689265536726e-05, |
|
"loss": 0.0152, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 7.570621468926554, |
|
"grad_norm": 0.0038798090536147356, |
|
"learning_rate": 3.107344632768362e-05, |
|
"loss": 0.0106, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 7.627118644067797, |
|
"grad_norm": 0.005076746456325054, |
|
"learning_rate": 3.093220338983051e-05, |
|
"loss": 0.0158, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 7.68361581920904, |
|
"grad_norm": 0.003670661011710763, |
|
"learning_rate": 3.0790960451977405e-05, |
|
"loss": 0.0093, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 7.740112994350282, |
|
"grad_norm": 0.003522429848089814, |
|
"learning_rate": 3.064971751412429e-05, |
|
"loss": 0.0183, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 7.796610169491525, |
|
"grad_norm": 0.06700780242681503, |
|
"learning_rate": 3.050847457627119e-05, |
|
"loss": 0.0398, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 7.853107344632768, |
|
"grad_norm": 0.01462018396705389, |
|
"learning_rate": 3.036723163841808e-05, |
|
"loss": 0.0286, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 7.909604519774011, |
|
"grad_norm": 0.025290269404649734, |
|
"learning_rate": 3.022598870056497e-05, |
|
"loss": 0.037, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 7.966101694915254, |
|
"grad_norm": 0.011192042380571365, |
|
"learning_rate": 3.0084745762711864e-05, |
|
"loss": 0.0147, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9636678200692042, |
|
"eval_f1": 0.9635820582698403, |
|
"eval_loss": 0.24865780770778656, |
|
"eval_runtime": 0.9484, |
|
"eval_samples_per_second": 4266.086, |
|
"eval_steps_per_second": 67.481, |
|
"step": 14160 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 35400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1172336478105600.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|