malexandersalazar's picture
Upload folder using huggingface_hub
bc77471 verified
{
"best_metric": 0.9842267397484589,
"best_model_checkpoint": "/home/alexanders/Workspaces/malexandersalazar/xlm-roberta-large-cls-toxicity/data/models/xlm-roberta-large-cls-toxicity/config_5/checkpoint-28610",
"epoch": 9.999737922599808,
"eval_steps": 500,
"global_step": 28610,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1747182667947934,
"grad_norm": 12.17603588104248,
"learning_rate": 6.934153502027121e-07,
"loss": 0.7119,
"step": 500
},
{
"epoch": 0.3494365335895868,
"grad_norm": 13.519432067871094,
"learning_rate": 1.3910247448622957e-06,
"loss": 0.3407,
"step": 1000
},
{
"epoch": 0.5241548003843802,
"grad_norm": 11.993674278259277,
"learning_rate": 2.090032154340836e-06,
"loss": 0.1725,
"step": 1500
},
{
"epoch": 0.6988730671791736,
"grad_norm": 13.306364059448242,
"learning_rate": 2.789039563819377e-06,
"loss": 0.1498,
"step": 2000
},
{
"epoch": 0.8735913339739669,
"grad_norm": 13.724279403686523,
"learning_rate": 3.488046973297917e-06,
"loss": 0.1368,
"step": 2500
},
{
"epoch": 0.9997379225998078,
"eval_accuracy": 0.9578529000698812,
"eval_f1": 0.9580195344026164,
"eval_loss": 0.14260601997375488,
"eval_precision": 0.9585943807196626,
"eval_recall": 0.9578529000698812,
"eval_runtime": 68.6764,
"eval_samples_per_second": 333.389,
"eval_steps_per_second": 20.837,
"step": 2861
},
{
"epoch": 1.0485716781689525,
"grad_norm": 15.129600524902344,
"learning_rate": 4.187054382776457e-06,
"loss": 0.1225,
"step": 3000
},
{
"epoch": 1.223289944963746,
"grad_norm": 18.516948699951172,
"learning_rate": 4.884663777436041e-06,
"loss": 0.111,
"step": 3500
},
{
"epoch": 1.3980082117585393,
"grad_norm": 3.170327663421631,
"learning_rate": 5.583671186914582e-06,
"loss": 0.109,
"step": 4000
},
{
"epoch": 1.5727264785533328,
"grad_norm": 9.642043113708496,
"learning_rate": 6.281280581574165e-06,
"loss": 0.1045,
"step": 4500
},
{
"epoch": 1.747444745348126,
"grad_norm": 12.047298431396484,
"learning_rate": 6.980287991052706e-06,
"loss": 0.0981,
"step": 5000
},
{
"epoch": 1.9221630121429194,
"grad_norm": 11.591704368591309,
"learning_rate": 7.679295400531246e-06,
"loss": 0.0983,
"step": 5500
},
{
"epoch": 1.9997379225998078,
"eval_accuracy": 0.9699074074074074,
"eval_f1": 0.9698689694737291,
"eval_loss": 0.09155124425888062,
"eval_precision": 0.9698681669937823,
"eval_recall": 0.9699074074074074,
"eval_runtime": 68.6299,
"eval_samples_per_second": 333.616,
"eval_steps_per_second": 20.851,
"step": 5722
},
{
"epoch": 2.097143356337905,
"grad_norm": 7.699872970581055,
"learning_rate": 8.378302810009785e-06,
"loss": 0.0818,
"step": 6000
},
{
"epoch": 2.2718616231326987,
"grad_norm": 2.9679672718048096,
"learning_rate": 9.077310219488328e-06,
"loss": 0.078,
"step": 6500
},
{
"epoch": 2.446579889927492,
"grad_norm": 5.794170379638672,
"learning_rate": 9.776317628966868e-06,
"loss": 0.0769,
"step": 7000
},
{
"epoch": 2.6212981567222853,
"grad_norm": 7.152242183685303,
"learning_rate": 9.842009600596542e-06,
"loss": 0.0719,
"step": 7500
},
{
"epoch": 2.7960164235170786,
"grad_norm": 3.8340728282928467,
"learning_rate": 9.608985412685837e-06,
"loss": 0.0708,
"step": 8000
},
{
"epoch": 2.970734690311872,
"grad_norm": 13.128349304199219,
"learning_rate": 9.375961224775132e-06,
"loss": 0.0748,
"step": 8500
},
{
"epoch": 2.9997379225998078,
"eval_accuracy": 0.9731394129979035,
"eval_f1": 0.9731494346588756,
"eval_loss": 0.09937231242656708,
"eval_precision": 0.9731628120366744,
"eval_recall": 0.9731394129979035,
"eval_runtime": 68.6513,
"eval_samples_per_second": 333.512,
"eval_steps_per_second": 20.844,
"step": 8583
},
{
"epoch": 3.145715034506858,
"grad_norm": 20.52392578125,
"learning_rate": 9.142937036864427e-06,
"loss": 0.0504,
"step": 9000
},
{
"epoch": 3.320433301301651,
"grad_norm": 33.67174530029297,
"learning_rate": 8.909912848953722e-06,
"loss": 0.049,
"step": 9500
},
{
"epoch": 3.4951515680964444,
"grad_norm": 0.8490325212478638,
"learning_rate": 8.676888661043017e-06,
"loss": 0.0464,
"step": 10000
},
{
"epoch": 3.6698698348912377,
"grad_norm": 13.017010688781738,
"learning_rate": 8.443864473132312e-06,
"loss": 0.0463,
"step": 10500
},
{
"epoch": 3.844588101686031,
"grad_norm": 0.6549157500267029,
"learning_rate": 8.210840285221606e-06,
"loss": 0.049,
"step": 11000
},
{
"epoch": 3.9997379225998078,
"eval_accuracy": 0.9785552061495457,
"eval_f1": 0.9785386610066998,
"eval_loss": 0.0886269360780716,
"eval_precision": 0.9785361579407956,
"eval_recall": 0.9785552061495457,
"eval_runtime": 68.6459,
"eval_samples_per_second": 333.538,
"eval_steps_per_second": 20.846,
"step": 11444
},
{
"epoch": 4.019568445881017,
"grad_norm": 40.50947952270508,
"learning_rate": 7.977816097310901e-06,
"loss": 0.0454,
"step": 11500
},
{
"epoch": 4.19428671267581,
"grad_norm": 1.1528542041778564,
"learning_rate": 7.744791909400196e-06,
"loss": 0.029,
"step": 12000
},
{
"epoch": 4.369004979470604,
"grad_norm": 42.926326751708984,
"learning_rate": 7.512233769865313e-06,
"loss": 0.0313,
"step": 12500
},
{
"epoch": 4.543723246265397,
"grad_norm": 13.328317642211914,
"learning_rate": 7.2792095819546074e-06,
"loss": 0.0326,
"step": 13000
},
{
"epoch": 4.71844151306019,
"grad_norm": 0.08160242438316345,
"learning_rate": 7.0461853940439014e-06,
"loss": 0.0293,
"step": 13500
},
{
"epoch": 4.893159779854984,
"grad_norm": 21.219942092895508,
"learning_rate": 6.813161206133197e-06,
"loss": 0.0281,
"step": 14000
},
{
"epoch": 4.999737922599808,
"eval_accuracy": 0.9791229909154437,
"eval_f1": 0.9791175700337718,
"eval_loss": 0.12131603062152863,
"eval_precision": 0.9791137606071209,
"eval_recall": 0.9791229909154437,
"eval_runtime": 68.6286,
"eval_samples_per_second": 333.622,
"eval_steps_per_second": 20.851,
"step": 14305
},
{
"epoch": 5.068140124049969,
"grad_norm": 8.010854721069336,
"learning_rate": 6.580137018222492e-06,
"loss": 0.0292,
"step": 14500
},
{
"epoch": 5.242858390844763,
"grad_norm": 4.4188337326049805,
"learning_rate": 6.347112830311787e-06,
"loss": 0.0215,
"step": 15000
},
{
"epoch": 5.4175766576395565,
"grad_norm": 65.55230712890625,
"learning_rate": 6.1145546907769025e-06,
"loss": 0.021,
"step": 15500
},
{
"epoch": 5.592294924434349,
"grad_norm": 0.20221485197544098,
"learning_rate": 5.881530502866198e-06,
"loss": 0.0225,
"step": 16000
},
{
"epoch": 5.767013191229143,
"grad_norm": 65.68180084228516,
"learning_rate": 5.648506314955493e-06,
"loss": 0.0199,
"step": 16500
},
{
"epoch": 5.941731458023936,
"grad_norm": 0.33296695351600647,
"learning_rate": 5.415482127044788e-06,
"loss": 0.0211,
"step": 17000
},
{
"epoch": 5.999737922599808,
"eval_accuracy": 0.9804769392033543,
"eval_f1": 0.98049806714974,
"eval_loss": 0.10157082974910736,
"eval_precision": 0.9805481970936862,
"eval_recall": 0.9804769392033543,
"eval_runtime": 68.666,
"eval_samples_per_second": 333.44,
"eval_steps_per_second": 20.84,
"step": 17166
},
{
"epoch": 6.116711802218922,
"grad_norm": 0.5054177045822144,
"learning_rate": 5.182457939134083e-06,
"loss": 0.0149,
"step": 17500
},
{
"epoch": 6.291430069013716,
"grad_norm": 0.08331651240587234,
"learning_rate": 4.9494337512233775e-06,
"loss": 0.0141,
"step": 18000
},
{
"epoch": 6.466148335808509,
"grad_norm": 0.04097016155719757,
"learning_rate": 4.716409563312672e-06,
"loss": 0.0107,
"step": 18500
},
{
"epoch": 6.640866602603302,
"grad_norm": 0.009394422173500061,
"learning_rate": 4.483385375401967e-06,
"loss": 0.0142,
"step": 19000
},
{
"epoch": 6.815584869398096,
"grad_norm": 55.671607971191406,
"learning_rate": 4.250361187491262e-06,
"loss": 0.0112,
"step": 19500
},
{
"epoch": 6.990303136192889,
"grad_norm": 37.646053314208984,
"learning_rate": 4.018269096332199e-06,
"loss": 0.0143,
"step": 20000
},
{
"epoch": 6.999737922599808,
"eval_accuracy": 0.9821366177498253,
"eval_f1": 0.9821391542473246,
"eval_loss": 0.12774688005447388,
"eval_precision": 0.9821421791956497,
"eval_recall": 0.9821366177498253,
"eval_runtime": 68.6239,
"eval_samples_per_second": 333.645,
"eval_steps_per_second": 20.853,
"step": 20027
},
{
"epoch": 7.165283480387875,
"grad_norm": 4.926445007324219,
"learning_rate": 3.7852449084214942e-06,
"loss": 0.0079,
"step": 20500
},
{
"epoch": 7.340001747182668,
"grad_norm": 8.786434173583984,
"learning_rate": 3.5522207205107895e-06,
"loss": 0.0089,
"step": 21000
},
{
"epoch": 7.5147200139774615,
"grad_norm": 0.1356939673423767,
"learning_rate": 3.319196532600084e-06,
"loss": 0.0074,
"step": 21500
},
{
"epoch": 7.689438280772254,
"grad_norm": 16.782020568847656,
"learning_rate": 3.0861723446893788e-06,
"loss": 0.0096,
"step": 22000
},
{
"epoch": 7.864156547567048,
"grad_norm": 0.0004071469884365797,
"learning_rate": 2.853148156778674e-06,
"loss": 0.007,
"step": 22500
},
{
"epoch": 7.999737922599808,
"eval_accuracy": 0.9819619147449337,
"eval_f1": 0.9819917414068494,
"eval_loss": 0.14060455560684204,
"eval_precision": 0.9820904217450387,
"eval_recall": 0.9819619147449337,
"eval_runtime": 68.6348,
"eval_samples_per_second": 333.592,
"eval_steps_per_second": 20.849,
"step": 22888
},
{
"epoch": 8.039136891762034,
"grad_norm": 0.003943814896047115,
"learning_rate": 2.6201239688679685e-06,
"loss": 0.008,
"step": 23000
},
{
"epoch": 8.213855158556827,
"grad_norm": 12.356912612915039,
"learning_rate": 2.3870997809572637e-06,
"loss": 0.0051,
"step": 23500
},
{
"epoch": 8.38857342535162,
"grad_norm": 0.018336663022637367,
"learning_rate": 2.1540755930465586e-06,
"loss": 0.0053,
"step": 24000
},
{
"epoch": 8.563291692146414,
"grad_norm": 0.031037895008921623,
"learning_rate": 1.9210514051358534e-06,
"loss": 0.0048,
"step": 24500
},
{
"epoch": 8.738009958941207,
"grad_norm": 0.0018096828134730458,
"learning_rate": 1.6884932656009695e-06,
"loss": 0.0041,
"step": 25000
},
{
"epoch": 8.912728225736,
"grad_norm": 0.006106176879256964,
"learning_rate": 1.455935126066086e-06,
"loss": 0.0043,
"step": 25500
},
{
"epoch": 8.999737922599808,
"eval_accuracy": 0.9835779175401816,
"eval_f1": 0.9835697887213877,
"eval_loss": 0.15101368725299835,
"eval_precision": 0.9835675135482098,
"eval_recall": 0.9835779175401816,
"eval_runtime": 68.619,
"eval_samples_per_second": 333.668,
"eval_steps_per_second": 20.854,
"step": 25749
},
{
"epoch": 9.087708569930987,
"grad_norm": 0.00021469616331160069,
"learning_rate": 1.2229109381553806e-06,
"loss": 0.0036,
"step": 26000
},
{
"epoch": 9.26242683672578,
"grad_norm": 0.008365228772163391,
"learning_rate": 9.898867502446755e-07,
"loss": 0.0027,
"step": 26500
},
{
"epoch": 9.437145103520573,
"grad_norm": 0.0006792581407353282,
"learning_rate": 7.568625623339704e-07,
"loss": 0.0028,
"step": 27000
},
{
"epoch": 9.611863370315366,
"grad_norm": 0.004307614639401436,
"learning_rate": 5.238383744232652e-07,
"loss": 0.0021,
"step": 27500
},
{
"epoch": 9.78658163711016,
"grad_norm": 0.0005787264672107995,
"learning_rate": 2.9081418651256006e-07,
"loss": 0.0031,
"step": 28000
},
{
"epoch": 9.961299903904953,
"grad_norm": 0.008150073699653149,
"learning_rate": 5.7789998601854876e-08,
"loss": 0.0019,
"step": 28500
},
{
"epoch": 9.999737922599808,
"eval_accuracy": 0.9842330538085255,
"eval_f1": 0.9842291652151357,
"eval_loss": 0.14871150255203247,
"eval_precision": 0.9842267397484589,
"eval_recall": 0.9842330538085255,
"eval_runtime": 68.5176,
"eval_samples_per_second": 334.162,
"eval_steps_per_second": 20.885,
"step": 28610
}
],
"logging_steps": 500,
"max_steps": 28610,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.7066719997407396e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}