Amal17's picture
Upload folder using huggingface_hub
5849330 verified
{
"best_metric": 0.772007503464447,
"best_model_checkpoint": "./results/models/nusaparagraph_emot/nusabert-bigru-concate-8-mean\\checkpoint-12222",
"epoch": 7.0,
"eval_steps": 500,
"global_step": 12222,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0286368843069874,
"grad_norm": 4.918474197387695,
"learning_rate": 9.985681557846507e-06,
"loss": 1.918,
"step": 50
},
{
"epoch": 0.0572737686139748,
"grad_norm": 5.332164287567139,
"learning_rate": 9.971363115693013e-06,
"loss": 1.8503,
"step": 100
},
{
"epoch": 0.0859106529209622,
"grad_norm": 13.817869186401367,
"learning_rate": 9.95704467353952e-06,
"loss": 1.7392,
"step": 150
},
{
"epoch": 0.1145475372279496,
"grad_norm": 12.340777397155762,
"learning_rate": 9.942726231386026e-06,
"loss": 1.4836,
"step": 200
},
{
"epoch": 0.143184421534937,
"grad_norm": 17.78537940979004,
"learning_rate": 9.928407789232532e-06,
"loss": 1.4447,
"step": 250
},
{
"epoch": 0.1718213058419244,
"grad_norm": 15.181681632995605,
"learning_rate": 9.914089347079038e-06,
"loss": 1.1689,
"step": 300
},
{
"epoch": 0.2004581901489118,
"grad_norm": 12.248451232910156,
"learning_rate": 9.899770904925546e-06,
"loss": 1.229,
"step": 350
},
{
"epoch": 0.2290950744558992,
"grad_norm": 15.683427810668945,
"learning_rate": 9.885452462772052e-06,
"loss": 1.0561,
"step": 400
},
{
"epoch": 0.25773195876288657,
"grad_norm": 17.413002014160156,
"learning_rate": 9.871134020618558e-06,
"loss": 0.9704,
"step": 450
},
{
"epoch": 0.286368843069874,
"grad_norm": 21.993549346923828,
"learning_rate": 9.856815578465064e-06,
"loss": 1.0062,
"step": 500
},
{
"epoch": 0.3150057273768614,
"grad_norm": 15.140169143676758,
"learning_rate": 9.84249713631157e-06,
"loss": 0.9102,
"step": 550
},
{
"epoch": 0.3436426116838488,
"grad_norm": 25.413042068481445,
"learning_rate": 9.828178694158076e-06,
"loss": 0.8857,
"step": 600
},
{
"epoch": 0.3722794959908362,
"grad_norm": 13.888834953308105,
"learning_rate": 9.813860252004582e-06,
"loss": 0.9794,
"step": 650
},
{
"epoch": 0.4009163802978236,
"grad_norm": 18.061304092407227,
"learning_rate": 9.799541809851088e-06,
"loss": 0.9635,
"step": 700
},
{
"epoch": 0.42955326460481097,
"grad_norm": 16.933076858520508,
"learning_rate": 9.785223367697596e-06,
"loss": 0.8568,
"step": 750
},
{
"epoch": 0.4581901489117984,
"grad_norm": 15.249773979187012,
"learning_rate": 9.770904925544102e-06,
"loss": 0.8865,
"step": 800
},
{
"epoch": 0.4868270332187858,
"grad_norm": 16.965801239013672,
"learning_rate": 9.756586483390608e-06,
"loss": 0.776,
"step": 850
},
{
"epoch": 0.5154639175257731,
"grad_norm": 6.5678791999816895,
"learning_rate": 9.742268041237114e-06,
"loss": 0.8698,
"step": 900
},
{
"epoch": 0.5441008018327605,
"grad_norm": 11.134130477905273,
"learning_rate": 9.72794959908362e-06,
"loss": 0.7746,
"step": 950
},
{
"epoch": 0.572737686139748,
"grad_norm": 13.739683151245117,
"learning_rate": 9.713631156930127e-06,
"loss": 0.8256,
"step": 1000
},
{
"epoch": 0.6013745704467354,
"grad_norm": 13.819252967834473,
"learning_rate": 9.699312714776633e-06,
"loss": 0.8959,
"step": 1050
},
{
"epoch": 0.6300114547537228,
"grad_norm": 21.550695419311523,
"learning_rate": 9.68499427262314e-06,
"loss": 0.8417,
"step": 1100
},
{
"epoch": 0.6586483390607102,
"grad_norm": 15.80966854095459,
"learning_rate": 9.670675830469645e-06,
"loss": 0.9138,
"step": 1150
},
{
"epoch": 0.6872852233676976,
"grad_norm": 9.978912353515625,
"learning_rate": 9.656357388316153e-06,
"loss": 0.7363,
"step": 1200
},
{
"epoch": 0.715922107674685,
"grad_norm": 15.32123851776123,
"learning_rate": 9.642038946162659e-06,
"loss": 0.849,
"step": 1250
},
{
"epoch": 0.7445589919816724,
"grad_norm": 21.705232620239258,
"learning_rate": 9.627720504009165e-06,
"loss": 0.8068,
"step": 1300
},
{
"epoch": 0.7731958762886598,
"grad_norm": 23.230682373046875,
"learning_rate": 9.613402061855671e-06,
"loss": 0.7546,
"step": 1350
},
{
"epoch": 0.8018327605956472,
"grad_norm": 18.663450241088867,
"learning_rate": 9.599083619702177e-06,
"loss": 0.7417,
"step": 1400
},
{
"epoch": 0.8304696449026346,
"grad_norm": 17.77196502685547,
"learning_rate": 9.584765177548685e-06,
"loss": 0.7517,
"step": 1450
},
{
"epoch": 0.8591065292096219,
"grad_norm": 7.162426471710205,
"learning_rate": 9.57044673539519e-06,
"loss": 0.6956,
"step": 1500
},
{
"epoch": 0.8877434135166093,
"grad_norm": 11.551986694335938,
"learning_rate": 9.556128293241697e-06,
"loss": 0.7805,
"step": 1550
},
{
"epoch": 0.9163802978235968,
"grad_norm": 28.649534225463867,
"learning_rate": 9.541809851088203e-06,
"loss": 0.7907,
"step": 1600
},
{
"epoch": 0.9450171821305842,
"grad_norm": 24.329723358154297,
"learning_rate": 9.527491408934708e-06,
"loss": 0.662,
"step": 1650
},
{
"epoch": 0.9736540664375716,
"grad_norm": 8.512353897094727,
"learning_rate": 9.513172966781214e-06,
"loss": 0.7086,
"step": 1700
},
{
"epoch": 1.0,
"eval_accuracy": 0.7155204810221721,
"eval_f1": 0.7112801498610356,
"eval_loss": 0.8033041954040527,
"eval_runtime": 36.3098,
"eval_samples_per_second": 73.286,
"eval_steps_per_second": 9.171,
"step": 1746
},
{
"epoch": 1.002290950744559,
"grad_norm": 19.602813720703125,
"learning_rate": 9.49885452462772e-06,
"loss": 0.7832,
"step": 1750
},
{
"epoch": 1.0309278350515463,
"grad_norm": 8.915902137756348,
"learning_rate": 9.484536082474226e-06,
"loss": 0.5571,
"step": 1800
},
{
"epoch": 1.0595647193585338,
"grad_norm": 44.88418960571289,
"learning_rate": 9.470217640320734e-06,
"loss": 0.5963,
"step": 1850
},
{
"epoch": 1.088201603665521,
"grad_norm": 7.115338325500488,
"learning_rate": 9.45589919816724e-06,
"loss": 0.4956,
"step": 1900
},
{
"epoch": 1.1168384879725086,
"grad_norm": 16.892269134521484,
"learning_rate": 9.441580756013746e-06,
"loss": 0.4767,
"step": 1950
},
{
"epoch": 1.145475372279496,
"grad_norm": 30.78487205505371,
"learning_rate": 9.427262313860252e-06,
"loss": 0.5456,
"step": 2000
},
{
"epoch": 1.1741122565864834,
"grad_norm": 12.62244987487793,
"learning_rate": 9.41294387170676e-06,
"loss": 0.531,
"step": 2050
},
{
"epoch": 1.2027491408934707,
"grad_norm": 20.651899337768555,
"learning_rate": 9.398625429553266e-06,
"loss": 0.529,
"step": 2100
},
{
"epoch": 1.2313860252004583,
"grad_norm": 21.23448371887207,
"learning_rate": 9.384306987399772e-06,
"loss": 0.5307,
"step": 2150
},
{
"epoch": 1.2600229095074456,
"grad_norm": 6.756899356842041,
"learning_rate": 9.369988545246278e-06,
"loss": 0.5302,
"step": 2200
},
{
"epoch": 1.2886597938144329,
"grad_norm": 11.766059875488281,
"learning_rate": 9.355670103092784e-06,
"loss": 0.5169,
"step": 2250
},
{
"epoch": 1.3172966781214204,
"grad_norm": 12.042119026184082,
"learning_rate": 9.341351660939291e-06,
"loss": 0.4705,
"step": 2300
},
{
"epoch": 1.345933562428408,
"grad_norm": 13.262438774108887,
"learning_rate": 9.327033218785797e-06,
"loss": 0.5263,
"step": 2350
},
{
"epoch": 1.3745704467353952,
"grad_norm": 24.665464401245117,
"learning_rate": 9.312714776632303e-06,
"loss": 0.5039,
"step": 2400
},
{
"epoch": 1.4032073310423825,
"grad_norm": 24.94556427001953,
"learning_rate": 9.29839633447881e-06,
"loss": 0.5736,
"step": 2450
},
{
"epoch": 1.43184421534937,
"grad_norm": 18.88966941833496,
"learning_rate": 9.284077892325315e-06,
"loss": 0.5569,
"step": 2500
},
{
"epoch": 1.4604810996563573,
"grad_norm": 8.994138717651367,
"learning_rate": 9.269759450171823e-06,
"loss": 0.4279,
"step": 2550
},
{
"epoch": 1.4891179839633448,
"grad_norm": 8.518653869628906,
"learning_rate": 9.255441008018329e-06,
"loss": 0.4764,
"step": 2600
},
{
"epoch": 1.5177548682703321,
"grad_norm": 35.21455764770508,
"learning_rate": 9.241122565864835e-06,
"loss": 0.5318,
"step": 2650
},
{
"epoch": 1.5463917525773194,
"grad_norm": 13.781972885131836,
"learning_rate": 9.226804123711341e-06,
"loss": 0.4229,
"step": 2700
},
{
"epoch": 1.575028636884307,
"grad_norm": 7.166682720184326,
"learning_rate": 9.212485681557847e-06,
"loss": 0.511,
"step": 2750
},
{
"epoch": 1.6036655211912945,
"grad_norm": 20.155414581298828,
"learning_rate": 9.198167239404353e-06,
"loss": 0.5216,
"step": 2800
},
{
"epoch": 1.6323024054982818,
"grad_norm": 13.758686065673828,
"learning_rate": 9.183848797250859e-06,
"loss": 0.5359,
"step": 2850
},
{
"epoch": 1.660939289805269,
"grad_norm": 17.821603775024414,
"learning_rate": 9.169530355097367e-06,
"loss": 0.5762,
"step": 2900
},
{
"epoch": 1.6895761741122566,
"grad_norm": 22.8956356048584,
"learning_rate": 9.155211912943873e-06,
"loss": 0.5629,
"step": 2950
},
{
"epoch": 1.718213058419244,
"grad_norm": 3.841970443725586,
"learning_rate": 9.140893470790379e-06,
"loss": 0.4847,
"step": 3000
},
{
"epoch": 1.7468499427262314,
"grad_norm": 22.353851318359375,
"learning_rate": 9.126575028636885e-06,
"loss": 0.499,
"step": 3050
},
{
"epoch": 1.7754868270332187,
"grad_norm": 6.219491481781006,
"learning_rate": 9.11225658648339e-06,
"loss": 0.6343,
"step": 3100
},
{
"epoch": 1.8041237113402062,
"grad_norm": 26.25059700012207,
"learning_rate": 9.097938144329898e-06,
"loss": 0.5259,
"step": 3150
},
{
"epoch": 1.8327605956471937,
"grad_norm": 14.963705062866211,
"learning_rate": 9.083619702176404e-06,
"loss": 0.5091,
"step": 3200
},
{
"epoch": 1.861397479954181,
"grad_norm": 4.209954261779785,
"learning_rate": 9.06930126002291e-06,
"loss": 0.5591,
"step": 3250
},
{
"epoch": 1.8900343642611683,
"grad_norm": 12.075881958007812,
"learning_rate": 9.054982817869416e-06,
"loss": 0.4983,
"step": 3300
},
{
"epoch": 1.9186712485681556,
"grad_norm": 14.535738945007324,
"learning_rate": 9.040664375715922e-06,
"loss": 0.4013,
"step": 3350
},
{
"epoch": 1.9473081328751431,
"grad_norm": 12.017505645751953,
"learning_rate": 9.02634593356243e-06,
"loss": 0.5307,
"step": 3400
},
{
"epoch": 1.9759450171821307,
"grad_norm": 24.822385787963867,
"learning_rate": 9.012027491408936e-06,
"loss": 0.4848,
"step": 3450
},
{
"epoch": 2.0,
"eval_accuracy": 0.7576099210822999,
"eval_f1": 0.7572228179173918,
"eval_loss": 0.7472622394561768,
"eval_runtime": 36.2957,
"eval_samples_per_second": 73.314,
"eval_steps_per_second": 9.175,
"step": 3492
},
{
"epoch": 2.004581901489118,
"grad_norm": 0.584539532661438,
"learning_rate": 8.997709049255442e-06,
"loss": 0.4442,
"step": 3500
},
{
"epoch": 2.0332187857961053,
"grad_norm": 48.69373321533203,
"learning_rate": 8.983390607101948e-06,
"loss": 0.3488,
"step": 3550
},
{
"epoch": 2.0618556701030926,
"grad_norm": 6.41154670715332,
"learning_rate": 8.969072164948455e-06,
"loss": 0.3281,
"step": 3600
},
{
"epoch": 2.0904925544100803,
"grad_norm": 29.08100700378418,
"learning_rate": 8.954753722794961e-06,
"loss": 0.2916,
"step": 3650
},
{
"epoch": 2.1191294387170676,
"grad_norm": 9.118326187133789,
"learning_rate": 8.940435280641467e-06,
"loss": 0.2665,
"step": 3700
},
{
"epoch": 2.147766323024055,
"grad_norm": 3.8800840377807617,
"learning_rate": 8.926116838487973e-06,
"loss": 0.2697,
"step": 3750
},
{
"epoch": 2.176403207331042,
"grad_norm": 14.715743064880371,
"learning_rate": 8.91179839633448e-06,
"loss": 0.3374,
"step": 3800
},
{
"epoch": 2.20504009163803,
"grad_norm": 46.47842025756836,
"learning_rate": 8.897479954180985e-06,
"loss": 0.2725,
"step": 3850
},
{
"epoch": 2.2336769759450172,
"grad_norm": 25.640724182128906,
"learning_rate": 8.883161512027491e-06,
"loss": 0.2656,
"step": 3900
},
{
"epoch": 2.2623138602520045,
"grad_norm": 33.861568450927734,
"learning_rate": 8.868843069873997e-06,
"loss": 0.3202,
"step": 3950
},
{
"epoch": 2.290950744558992,
"grad_norm": 1.642096996307373,
"learning_rate": 8.854524627720505e-06,
"loss": 0.2686,
"step": 4000
},
{
"epoch": 2.319587628865979,
"grad_norm": 10.57680606842041,
"learning_rate": 8.840206185567011e-06,
"loss": 0.2817,
"step": 4050
},
{
"epoch": 2.348224513172967,
"grad_norm": 22.718547821044922,
"learning_rate": 8.825887743413517e-06,
"loss": 0.3209,
"step": 4100
},
{
"epoch": 2.376861397479954,
"grad_norm": 39.80268859863281,
"learning_rate": 8.811569301260023e-06,
"loss": 0.3598,
"step": 4150
},
{
"epoch": 2.4054982817869415,
"grad_norm": 13.368090629577637,
"learning_rate": 8.797250859106529e-06,
"loss": 0.3448,
"step": 4200
},
{
"epoch": 2.434135166093929,
"grad_norm": 55.12387466430664,
"learning_rate": 8.782932416953037e-06,
"loss": 0.2845,
"step": 4250
},
{
"epoch": 2.4627720504009165,
"grad_norm": 8.037142753601074,
"learning_rate": 8.768613974799543e-06,
"loss": 0.2482,
"step": 4300
},
{
"epoch": 2.491408934707904,
"grad_norm": 14.919239044189453,
"learning_rate": 8.754295532646049e-06,
"loss": 0.2222,
"step": 4350
},
{
"epoch": 2.520045819014891,
"grad_norm": 30.16111946105957,
"learning_rate": 8.739977090492555e-06,
"loss": 0.2577,
"step": 4400
},
{
"epoch": 2.5486827033218784,
"grad_norm": 18.18327522277832,
"learning_rate": 8.725658648339062e-06,
"loss": 0.2802,
"step": 4450
},
{
"epoch": 2.5773195876288657,
"grad_norm": 14.579641342163086,
"learning_rate": 8.711340206185568e-06,
"loss": 0.2528,
"step": 4500
},
{
"epoch": 2.6059564719358534,
"grad_norm": 17.266183853149414,
"learning_rate": 8.697021764032074e-06,
"loss": 0.3592,
"step": 4550
},
{
"epoch": 2.6345933562428407,
"grad_norm": 16.718862533569336,
"learning_rate": 8.68270332187858e-06,
"loss": 0.3184,
"step": 4600
},
{
"epoch": 2.663230240549828,
"grad_norm": 25.249649047851562,
"learning_rate": 8.668384879725086e-06,
"loss": 0.2697,
"step": 4650
},
{
"epoch": 2.691867124856816,
"grad_norm": 11.022406578063965,
"learning_rate": 8.654066437571594e-06,
"loss": 0.2776,
"step": 4700
},
{
"epoch": 2.720504009163803,
"grad_norm": 8.15123176574707,
"learning_rate": 8.6397479954181e-06,
"loss": 0.3462,
"step": 4750
},
{
"epoch": 2.7491408934707904,
"grad_norm": 11.458098411560059,
"learning_rate": 8.625429553264606e-06,
"loss": 0.3657,
"step": 4800
},
{
"epoch": 2.7777777777777777,
"grad_norm": 3.9528236389160156,
"learning_rate": 8.611111111111112e-06,
"loss": 0.3086,
"step": 4850
},
{
"epoch": 2.806414662084765,
"grad_norm": 39.1205940246582,
"learning_rate": 8.596792668957618e-06,
"loss": 0.3528,
"step": 4900
},
{
"epoch": 2.8350515463917527,
"grad_norm": 53.56584167480469,
"learning_rate": 8.582474226804124e-06,
"loss": 0.2909,
"step": 4950
},
{
"epoch": 2.86368843069874,
"grad_norm": 51.294132232666016,
"learning_rate": 8.56815578465063e-06,
"loss": 0.2571,
"step": 5000
},
{
"epoch": 2.8923253150057273,
"grad_norm": 48.5758056640625,
"learning_rate": 8.553837342497136e-06,
"loss": 0.2812,
"step": 5050
},
{
"epoch": 2.9209621993127146,
"grad_norm": 9.625508308410645,
"learning_rate": 8.539518900343643e-06,
"loss": 0.2606,
"step": 5100
},
{
"epoch": 2.9495990836197024,
"grad_norm": 22.97384262084961,
"learning_rate": 8.52520045819015e-06,
"loss": 0.3566,
"step": 5150
},
{
"epoch": 2.9782359679266897,
"grad_norm": 34.34775161743164,
"learning_rate": 8.510882016036655e-06,
"loss": 0.3251,
"step": 5200
},
{
"epoch": 3.0,
"eval_accuracy": 0.7493423524990604,
"eval_f1": 0.7488536960670391,
"eval_loss": 0.9042189121246338,
"eval_runtime": 36.3022,
"eval_samples_per_second": 73.301,
"eval_steps_per_second": 9.173,
"step": 5238
},
{
"epoch": 3.006872852233677,
"grad_norm": 0.47577229142189026,
"learning_rate": 8.496563573883161e-06,
"loss": 0.2671,
"step": 5250
},
{
"epoch": 3.0355097365406642,
"grad_norm": 36.310089111328125,
"learning_rate": 8.482245131729669e-06,
"loss": 0.1493,
"step": 5300
},
{
"epoch": 3.0641466208476515,
"grad_norm": 29.30960464477539,
"learning_rate": 8.467926689576175e-06,
"loss": 0.1603,
"step": 5350
},
{
"epoch": 3.0927835051546393,
"grad_norm": 2.112786054611206,
"learning_rate": 8.453608247422681e-06,
"loss": 0.1363,
"step": 5400
},
{
"epoch": 3.1214203894616266,
"grad_norm": 3.0346627235412598,
"learning_rate": 8.439289805269187e-06,
"loss": 0.1359,
"step": 5450
},
{
"epoch": 3.150057273768614,
"grad_norm": 22.331645965576172,
"learning_rate": 8.424971363115693e-06,
"loss": 0.1479,
"step": 5500
},
{
"epoch": 3.178694158075601,
"grad_norm": 13.065728187561035,
"learning_rate": 8.4106529209622e-06,
"loss": 0.1311,
"step": 5550
},
{
"epoch": 3.207331042382589,
"grad_norm": 3.163928270339966,
"learning_rate": 8.396334478808707e-06,
"loss": 0.1364,
"step": 5600
},
{
"epoch": 3.2359679266895762,
"grad_norm": 33.371849060058594,
"learning_rate": 8.382016036655213e-06,
"loss": 0.1485,
"step": 5650
},
{
"epoch": 3.2646048109965635,
"grad_norm": 24.721717834472656,
"learning_rate": 8.367697594501719e-06,
"loss": 0.155,
"step": 5700
},
{
"epoch": 3.293241695303551,
"grad_norm": 15.719419479370117,
"learning_rate": 8.353379152348225e-06,
"loss": 0.1539,
"step": 5750
},
{
"epoch": 3.3218785796105386,
"grad_norm": 41.62794876098633,
"learning_rate": 8.339060710194732e-06,
"loss": 0.1695,
"step": 5800
},
{
"epoch": 3.350515463917526,
"grad_norm": 1.7000266313552856,
"learning_rate": 8.324742268041238e-06,
"loss": 0.187,
"step": 5850
},
{
"epoch": 3.379152348224513,
"grad_norm": 9.574384689331055,
"learning_rate": 8.310423825887744e-06,
"loss": 0.1909,
"step": 5900
},
{
"epoch": 3.4077892325315005,
"grad_norm": 19.80113410949707,
"learning_rate": 8.29610538373425e-06,
"loss": 0.1656,
"step": 5950
},
{
"epoch": 3.436426116838488,
"grad_norm": 71.53827667236328,
"learning_rate": 8.281786941580758e-06,
"loss": 0.1116,
"step": 6000
},
{
"epoch": 3.4650630011454755,
"grad_norm": 65.89808654785156,
"learning_rate": 8.267468499427262e-06,
"loss": 0.188,
"step": 6050
},
{
"epoch": 3.493699885452463,
"grad_norm": 60.68288803100586,
"learning_rate": 8.253150057273768e-06,
"loss": 0.1315,
"step": 6100
},
{
"epoch": 3.52233676975945,
"grad_norm": 25.550884246826172,
"learning_rate": 8.238831615120276e-06,
"loss": 0.1194,
"step": 6150
},
{
"epoch": 3.5509736540664374,
"grad_norm": 6.416481971740723,
"learning_rate": 8.224513172966782e-06,
"loss": 0.134,
"step": 6200
},
{
"epoch": 3.579610538373425,
"grad_norm": 1.5923579931259155,
"learning_rate": 8.210194730813288e-06,
"loss": 0.1672,
"step": 6250
},
{
"epoch": 3.6082474226804124,
"grad_norm": 19.590898513793945,
"learning_rate": 8.195876288659794e-06,
"loss": 0.14,
"step": 6300
},
{
"epoch": 3.6368843069873997,
"grad_norm": 0.4376499652862549,
"learning_rate": 8.1815578465063e-06,
"loss": 0.1425,
"step": 6350
},
{
"epoch": 3.665521191294387,
"grad_norm": 11.566116333007812,
"learning_rate": 8.167239404352808e-06,
"loss": 0.205,
"step": 6400
},
{
"epoch": 3.6941580756013748,
"grad_norm": 62.71388626098633,
"learning_rate": 8.152920962199314e-06,
"loss": 0.184,
"step": 6450
},
{
"epoch": 3.722794959908362,
"grad_norm": 0.32115602493286133,
"learning_rate": 8.13860252004582e-06,
"loss": 0.1267,
"step": 6500
},
{
"epoch": 3.7514318442153494,
"grad_norm": 53.07960891723633,
"learning_rate": 8.124284077892326e-06,
"loss": 0.1796,
"step": 6550
},
{
"epoch": 3.7800687285223367,
"grad_norm": 2.265838861465454,
"learning_rate": 8.109965635738832e-06,
"loss": 0.2187,
"step": 6600
},
{
"epoch": 3.808705612829324,
"grad_norm": 0.6725891828536987,
"learning_rate": 8.09564719358534e-06,
"loss": 0.1518,
"step": 6650
},
{
"epoch": 3.8373424971363117,
"grad_norm": 53.60797882080078,
"learning_rate": 8.081328751431845e-06,
"loss": 0.1619,
"step": 6700
},
{
"epoch": 3.865979381443299,
"grad_norm": 22.341100692749023,
"learning_rate": 8.067010309278351e-06,
"loss": 0.1378,
"step": 6750
},
{
"epoch": 3.8946162657502863,
"grad_norm": 64.15727996826172,
"learning_rate": 8.052691867124857e-06,
"loss": 0.1445,
"step": 6800
},
{
"epoch": 3.923253150057274,
"grad_norm": 36.292484283447266,
"learning_rate": 8.038373424971365e-06,
"loss": 0.1612,
"step": 6850
},
{
"epoch": 3.9518900343642613,
"grad_norm": 4.331699848175049,
"learning_rate": 8.02405498281787e-06,
"loss": 0.2337,
"step": 6900
},
{
"epoch": 3.9805269186712486,
"grad_norm": 0.28292712569236755,
"learning_rate": 8.009736540664377e-06,
"loss": 0.1779,
"step": 6950
},
{
"epoch": 4.0,
"eval_accuracy": 0.7606163096580233,
"eval_f1": 0.7597313264211647,
"eval_loss": 0.9818114042282104,
"eval_runtime": 36.2859,
"eval_samples_per_second": 73.334,
"eval_steps_per_second": 9.177,
"step": 6984
},
{
"epoch": 4.009163802978236,
"grad_norm": 14.622065544128418,
"learning_rate": 7.995418098510883e-06,
"loss": 0.1005,
"step": 7000
},
{
"epoch": 4.037800687285223,
"grad_norm": 0.4035070836544037,
"learning_rate": 7.981099656357389e-06,
"loss": 0.0777,
"step": 7050
},
{
"epoch": 4.0664375715922105,
"grad_norm": 31.626924514770508,
"learning_rate": 7.966781214203895e-06,
"loss": 0.0581,
"step": 7100
},
{
"epoch": 4.095074455899198,
"grad_norm": 0.15928135812282562,
"learning_rate": 7.9524627720504e-06,
"loss": 0.0477,
"step": 7150
},
{
"epoch": 4.123711340206185,
"grad_norm": 49.666709899902344,
"learning_rate": 7.938144329896907e-06,
"loss": 0.0752,
"step": 7200
},
{
"epoch": 4.152348224513173,
"grad_norm": 0.6751652956008911,
"learning_rate": 7.923825887743414e-06,
"loss": 0.0807,
"step": 7250
},
{
"epoch": 4.180985108820161,
"grad_norm": 0.623131513595581,
"learning_rate": 7.90950744558992e-06,
"loss": 0.1205,
"step": 7300
},
{
"epoch": 4.209621993127148,
"grad_norm": 0.47417309880256653,
"learning_rate": 7.895189003436426e-06,
"loss": 0.0286,
"step": 7350
},
{
"epoch": 4.238258877434135,
"grad_norm": 0.37885400652885437,
"learning_rate": 7.880870561282932e-06,
"loss": 0.0747,
"step": 7400
},
{
"epoch": 4.2668957617411225,
"grad_norm": 1.7955352067947388,
"learning_rate": 7.866552119129438e-06,
"loss": 0.0544,
"step": 7450
},
{
"epoch": 4.29553264604811,
"grad_norm": 46.27180862426758,
"learning_rate": 7.852233676975946e-06,
"loss": 0.0447,
"step": 7500
},
{
"epoch": 4.324169530355097,
"grad_norm": 2.345630645751953,
"learning_rate": 7.837915234822452e-06,
"loss": 0.0599,
"step": 7550
},
{
"epoch": 4.352806414662084,
"grad_norm": 0.334553986787796,
"learning_rate": 7.823596792668958e-06,
"loss": 0.0494,
"step": 7600
},
{
"epoch": 4.381443298969073,
"grad_norm": 8.703739166259766,
"learning_rate": 7.809278350515464e-06,
"loss": 0.0523,
"step": 7650
},
{
"epoch": 4.41008018327606,
"grad_norm": 36.21126937866211,
"learning_rate": 7.794959908361972e-06,
"loss": 0.115,
"step": 7700
},
{
"epoch": 4.438717067583047,
"grad_norm": 0.7834786772727966,
"learning_rate": 7.780641466208478e-06,
"loss": 0.0602,
"step": 7750
},
{
"epoch": 4.4673539518900345,
"grad_norm": 1.5702382326126099,
"learning_rate": 7.766323024054984e-06,
"loss": 0.1341,
"step": 7800
},
{
"epoch": 4.495990836197022,
"grad_norm": 0.2708655595779419,
"learning_rate": 7.75200458190149e-06,
"loss": 0.1514,
"step": 7850
},
{
"epoch": 4.524627720504009,
"grad_norm": 0.48853299021720886,
"learning_rate": 7.737686139747996e-06,
"loss": 0.1418,
"step": 7900
},
{
"epoch": 4.553264604810996,
"grad_norm": 36.37514114379883,
"learning_rate": 7.723367697594503e-06,
"loss": 0.0663,
"step": 7950
},
{
"epoch": 4.581901489117984,
"grad_norm": 30.906522750854492,
"learning_rate": 7.70904925544101e-06,
"loss": 0.0655,
"step": 8000
},
{
"epoch": 4.610538373424971,
"grad_norm": 3.7403993606567383,
"learning_rate": 7.694730813287515e-06,
"loss": 0.0496,
"step": 8050
},
{
"epoch": 4.639175257731958,
"grad_norm": 0.10339858382940292,
"learning_rate": 7.680412371134021e-06,
"loss": 0.1213,
"step": 8100
},
{
"epoch": 4.6678121420389465,
"grad_norm": 0.03920993208885193,
"learning_rate": 7.666093928980529e-06,
"loss": 0.1407,
"step": 8150
},
{
"epoch": 4.696449026345934,
"grad_norm": 70.78038024902344,
"learning_rate": 7.651775486827033e-06,
"loss": 0.0922,
"step": 8200
},
{
"epoch": 4.725085910652921,
"grad_norm": 20.5271053314209,
"learning_rate": 7.63745704467354e-06,
"loss": 0.1784,
"step": 8250
},
{
"epoch": 4.753722794959908,
"grad_norm": 0.17482055723667145,
"learning_rate": 7.623138602520046e-06,
"loss": 0.1508,
"step": 8300
},
{
"epoch": 4.782359679266896,
"grad_norm": 33.39112091064453,
"learning_rate": 7.608820160366552e-06,
"loss": 0.1115,
"step": 8350
},
{
"epoch": 4.810996563573883,
"grad_norm": 0.4592705965042114,
"learning_rate": 7.594501718213059e-06,
"loss": 0.1188,
"step": 8400
},
{
"epoch": 4.83963344788087,
"grad_norm": 38.45854187011719,
"learning_rate": 7.580183276059565e-06,
"loss": 0.0788,
"step": 8450
},
{
"epoch": 4.868270332187858,
"grad_norm": 60.54808044433594,
"learning_rate": 7.565864833906072e-06,
"loss": 0.1159,
"step": 8500
},
{
"epoch": 4.896907216494846,
"grad_norm": 0.5872980952262878,
"learning_rate": 7.551546391752578e-06,
"loss": 0.0984,
"step": 8550
},
{
"epoch": 4.925544100801833,
"grad_norm": 0.09889842569828033,
"learning_rate": 7.5372279495990845e-06,
"loss": 0.1722,
"step": 8600
},
{
"epoch": 4.95418098510882,
"grad_norm": 0.07930275052785873,
"learning_rate": 7.5229095074455904e-06,
"loss": 0.0861,
"step": 8650
},
{
"epoch": 4.982817869415808,
"grad_norm": 9.635697364807129,
"learning_rate": 7.5085910652920964e-06,
"loss": 0.1452,
"step": 8700
},
{
"epoch": 5.0,
"eval_accuracy": 0.7602405110860578,
"eval_f1": 0.7599417916848927,
"eval_loss": 1.1236233711242676,
"eval_runtime": 36.3176,
"eval_samples_per_second": 73.27,
"eval_steps_per_second": 9.169,
"step": 8730
},
{
"epoch": 5.011454753722795,
"grad_norm": 0.5619956254959106,
"learning_rate": 7.494272623138603e-06,
"loss": 0.0687,
"step": 8750
},
{
"epoch": 5.040091638029782,
"grad_norm": 85.73548126220703,
"learning_rate": 7.479954180985109e-06,
"loss": 0.0478,
"step": 8800
},
{
"epoch": 5.0687285223367695,
"grad_norm": 13.715363502502441,
"learning_rate": 7.465635738831616e-06,
"loss": 0.0396,
"step": 8850
},
{
"epoch": 5.097365406643757,
"grad_norm": 70.08007049560547,
"learning_rate": 7.451317296678122e-06,
"loss": 0.0562,
"step": 8900
},
{
"epoch": 5.126002290950744,
"grad_norm": 0.8938388824462891,
"learning_rate": 7.436998854524629e-06,
"loss": 0.0974,
"step": 8950
},
{
"epoch": 5.154639175257732,
"grad_norm": 0.3734937012195587,
"learning_rate": 7.422680412371135e-06,
"loss": 0.0742,
"step": 9000
},
{
"epoch": 5.18327605956472,
"grad_norm": 10.571038246154785,
"learning_rate": 7.408361970217641e-06,
"loss": 0.021,
"step": 9050
},
{
"epoch": 5.211912943871707,
"grad_norm": 0.09976017475128174,
"learning_rate": 7.394043528064148e-06,
"loss": 0.0546,
"step": 9100
},
{
"epoch": 5.240549828178694,
"grad_norm": 0.07766138762235641,
"learning_rate": 7.379725085910654e-06,
"loss": 0.0827,
"step": 9150
},
{
"epoch": 5.2691867124856815,
"grad_norm": 0.021182745695114136,
"learning_rate": 7.3654066437571605e-06,
"loss": 0.0508,
"step": 9200
},
{
"epoch": 5.297823596792669,
"grad_norm": 0.08311637490987778,
"learning_rate": 7.3510882016036665e-06,
"loss": 0.0359,
"step": 9250
},
{
"epoch": 5.326460481099656,
"grad_norm": 10.934857368469238,
"learning_rate": 7.336769759450172e-06,
"loss": 0.0431,
"step": 9300
},
{
"epoch": 5.355097365406643,
"grad_norm": 0.061881110072135925,
"learning_rate": 7.3224513172966785e-06,
"loss": 0.0245,
"step": 9350
},
{
"epoch": 5.383734249713632,
"grad_norm": 0.09978004544973373,
"learning_rate": 7.3081328751431845e-06,
"loss": 0.0201,
"step": 9400
},
{
"epoch": 5.412371134020619,
"grad_norm": 0.04522474855184555,
"learning_rate": 7.293814432989691e-06,
"loss": 0.0554,
"step": 9450
},
{
"epoch": 5.441008018327606,
"grad_norm": 27.210500717163086,
"learning_rate": 7.279495990836197e-06,
"loss": 0.0289,
"step": 9500
},
{
"epoch": 5.4696449026345935,
"grad_norm": 88.04645538330078,
"learning_rate": 7.265177548682703e-06,
"loss": 0.0786,
"step": 9550
},
{
"epoch": 5.498281786941581,
"grad_norm": 9.045210838317871,
"learning_rate": 7.25085910652921e-06,
"loss": 0.0776,
"step": 9600
},
{
"epoch": 5.526918671248568,
"grad_norm": 0.031076449900865555,
"learning_rate": 7.236540664375716e-06,
"loss": 0.0917,
"step": 9650
},
{
"epoch": 5.555555555555555,
"grad_norm": 0.07342424243688583,
"learning_rate": 7.222222222222223e-06,
"loss": 0.067,
"step": 9700
},
{
"epoch": 5.584192439862543,
"grad_norm": 0.07349126785993576,
"learning_rate": 7.207903780068729e-06,
"loss": 0.0275,
"step": 9750
},
{
"epoch": 5.61282932416953,
"grad_norm": 0.4687565267086029,
"learning_rate": 7.193585337915236e-06,
"loss": 0.0308,
"step": 9800
},
{
"epoch": 5.641466208476518,
"grad_norm": 0.03109198622405529,
"learning_rate": 7.179266895761742e-06,
"loss": 0.0512,
"step": 9850
},
{
"epoch": 5.670103092783505,
"grad_norm": 2.063464403152466,
"learning_rate": 7.164948453608248e-06,
"loss": 0.0436,
"step": 9900
},
{
"epoch": 5.698739977090493,
"grad_norm": 0.7259678244590759,
"learning_rate": 7.1506300114547545e-06,
"loss": 0.0452,
"step": 9950
},
{
"epoch": 5.72737686139748,
"grad_norm": 22.87729263305664,
"learning_rate": 7.1363115693012605e-06,
"loss": 0.0871,
"step": 10000
},
{
"epoch": 5.756013745704467,
"grad_norm": 0.6141005754470825,
"learning_rate": 7.121993127147767e-06,
"loss": 0.0432,
"step": 10050
},
{
"epoch": 5.784650630011455,
"grad_norm": 0.29292017221450806,
"learning_rate": 7.107674684994273e-06,
"loss": 0.0424,
"step": 10100
},
{
"epoch": 5.813287514318442,
"grad_norm": 0.059430770576000214,
"learning_rate": 7.09335624284078e-06,
"loss": 0.0751,
"step": 10150
},
{
"epoch": 5.841924398625429,
"grad_norm": 10.410508155822754,
"learning_rate": 7.079037800687286e-06,
"loss": 0.1061,
"step": 10200
},
{
"epoch": 5.870561282932417,
"grad_norm": 1.1330084800720215,
"learning_rate": 7.064719358533792e-06,
"loss": 0.0421,
"step": 10250
},
{
"epoch": 5.899198167239405,
"grad_norm": 0.1360820233821869,
"learning_rate": 7.050400916380299e-06,
"loss": 0.0664,
"step": 10300
},
{
"epoch": 5.927835051546392,
"grad_norm": 1.3874069452285767,
"learning_rate": 7.036082474226805e-06,
"loss": 0.1134,
"step": 10350
},
{
"epoch": 5.956471935853379,
"grad_norm": 0.0816207155585289,
"learning_rate": 7.02176403207331e-06,
"loss": 0.052,
"step": 10400
},
{
"epoch": 5.985108820160367,
"grad_norm": 0.0460013747215271,
"learning_rate": 7.007445589919817e-06,
"loss": 0.0362,
"step": 10450
},
{
"epoch": 6.0,
"eval_accuracy": 0.7591131153701616,
"eval_f1": 0.7565525682738199,
"eval_loss": 1.2660281658172607,
"eval_runtime": 36.2808,
"eval_samples_per_second": 73.345,
"eval_steps_per_second": 9.178,
"step": 10476
},
{
"epoch": 6.013745704467354,
"grad_norm": 0.09728560596704483,
"learning_rate": 6.993127147766323e-06,
"loss": 0.0709,
"step": 10500
},
{
"epoch": 6.042382588774341,
"grad_norm": 0.10100292414426804,
"learning_rate": 6.97880870561283e-06,
"loss": 0.0754,
"step": 10550
},
{
"epoch": 6.0710194730813285,
"grad_norm": 0.19107265770435333,
"learning_rate": 6.964490263459336e-06,
"loss": 0.0408,
"step": 10600
},
{
"epoch": 6.099656357388316,
"grad_norm": 0.0762198194861412,
"learning_rate": 6.9501718213058426e-06,
"loss": 0.0228,
"step": 10650
},
{
"epoch": 6.128293241695303,
"grad_norm": 0.023416342213749886,
"learning_rate": 6.9358533791523485e-06,
"loss": 0.0326,
"step": 10700
},
{
"epoch": 6.156930126002291,
"grad_norm": 0.13443826138973236,
"learning_rate": 6.9215349369988545e-06,
"loss": 0.021,
"step": 10750
},
{
"epoch": 6.185567010309279,
"grad_norm": 47.05551528930664,
"learning_rate": 6.907216494845361e-06,
"loss": 0.0505,
"step": 10800
},
{
"epoch": 6.214203894616266,
"grad_norm": 0.19994887709617615,
"learning_rate": 6.892898052691867e-06,
"loss": 0.044,
"step": 10850
},
{
"epoch": 6.242840778923253,
"grad_norm": 6.800403118133545,
"learning_rate": 6.878579610538374e-06,
"loss": 0.0212,
"step": 10900
},
{
"epoch": 6.2714776632302405,
"grad_norm": 0.18680407106876373,
"learning_rate": 6.86426116838488e-06,
"loss": 0.04,
"step": 10950
},
{
"epoch": 6.300114547537228,
"grad_norm": 2.269585132598877,
"learning_rate": 6.849942726231387e-06,
"loss": 0.0385,
"step": 11000
},
{
"epoch": 6.328751431844215,
"grad_norm": 0.05255923420190811,
"learning_rate": 6.835624284077893e-06,
"loss": 0.0364,
"step": 11050
},
{
"epoch": 6.357388316151202,
"grad_norm": 0.026086222380399704,
"learning_rate": 6.821305841924399e-06,
"loss": 0.0462,
"step": 11100
},
{
"epoch": 6.3860252004581906,
"grad_norm": 0.026915445923805237,
"learning_rate": 6.806987399770906e-06,
"loss": 0.0473,
"step": 11150
},
{
"epoch": 6.414662084765178,
"grad_norm": 30.492359161376953,
"learning_rate": 6.792668957617412e-06,
"loss": 0.0508,
"step": 11200
},
{
"epoch": 6.443298969072165,
"grad_norm": 0.030431082472205162,
"learning_rate": 6.778350515463919e-06,
"loss": 0.0555,
"step": 11250
},
{
"epoch": 6.4719358533791524,
"grad_norm": 0.19676260650157928,
"learning_rate": 6.764032073310425e-06,
"loss": 0.0435,
"step": 11300
},
{
"epoch": 6.50057273768614,
"grad_norm": 0.3724329471588135,
"learning_rate": 6.7497136311569314e-06,
"loss": 0.0209,
"step": 11350
},
{
"epoch": 6.529209621993127,
"grad_norm": 0.07832983881235123,
"learning_rate": 6.735395189003437e-06,
"loss": 0.0368,
"step": 11400
},
{
"epoch": 6.557846506300114,
"grad_norm": 1.6286810636520386,
"learning_rate": 6.721076746849944e-06,
"loss": 0.0546,
"step": 11450
},
{
"epoch": 6.586483390607102,
"grad_norm": 10.142606735229492,
"learning_rate": 6.706758304696449e-06,
"loss": 0.0537,
"step": 11500
},
{
"epoch": 6.615120274914089,
"grad_norm": 0.03260861337184906,
"learning_rate": 6.692439862542955e-06,
"loss": 0.0801,
"step": 11550
},
{
"epoch": 6.643757159221077,
"grad_norm": 0.06774479895830154,
"learning_rate": 6.678121420389461e-06,
"loss": 0.0276,
"step": 11600
},
{
"epoch": 6.672394043528064,
"grad_norm": 0.10083146393299103,
"learning_rate": 6.663802978235968e-06,
"loss": 0.0355,
"step": 11650
},
{
"epoch": 6.701030927835052,
"grad_norm": 0.5465721487998962,
"learning_rate": 6.649484536082474e-06,
"loss": 0.0531,
"step": 11700
},
{
"epoch": 6.729667812142039,
"grad_norm": 0.36361464858055115,
"learning_rate": 6.635166093928981e-06,
"loss": 0.0534,
"step": 11750
},
{
"epoch": 6.758304696449026,
"grad_norm": 0.3213222622871399,
"learning_rate": 6.620847651775487e-06,
"loss": 0.021,
"step": 11800
},
{
"epoch": 6.786941580756014,
"grad_norm": 1.3060314655303955,
"learning_rate": 6.606529209621994e-06,
"loss": 0.0371,
"step": 11850
},
{
"epoch": 6.815578465063001,
"grad_norm": 15.44947338104248,
"learning_rate": 6.5922107674685e-06,
"loss": 0.0475,
"step": 11900
},
{
"epoch": 6.844215349369988,
"grad_norm": 0.1092597097158432,
"learning_rate": 6.577892325315006e-06,
"loss": 0.0549,
"step": 11950
},
{
"epoch": 6.872852233676976,
"grad_norm": 0.24638314545154572,
"learning_rate": 6.563573883161513e-06,
"loss": 0.044,
"step": 12000
},
{
"epoch": 6.901489117983964,
"grad_norm": 0.17163485288619995,
"learning_rate": 6.549255441008019e-06,
"loss": 0.0478,
"step": 12050
},
{
"epoch": 6.930126002290951,
"grad_norm": 0.5006637573242188,
"learning_rate": 6.5349369988545254e-06,
"loss": 0.0595,
"step": 12100
},
{
"epoch": 6.958762886597938,
"grad_norm": 0.09898879379034042,
"learning_rate": 6.520618556701031e-06,
"loss": 0.0415,
"step": 12150
},
{
"epoch": 6.987399770904926,
"grad_norm": 0.8853304386138916,
"learning_rate": 6.506300114547538e-06,
"loss": 0.023,
"step": 12200
},
{
"epoch": 7.0,
"eval_accuracy": 0.7718902668169861,
"eval_f1": 0.772007503464447,
"eval_loss": 1.2936097383499146,
"eval_runtime": 36.2937,
"eval_samples_per_second": 73.319,
"eval_steps_per_second": 9.175,
"step": 12222
}
],
"logging_steps": 50,
"max_steps": 34920,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.283848509009536e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}