|
{ |
|
"best_metric": 0.886983632112237, |
|
"best_model_checkpoint": "./results/finetunes/20250129-172154__microsoft_phi-4__ft/checkpoint-432", |
|
"epoch": 0.1884961884961885, |
|
"eval_steps": 16, |
|
"global_step": 1088, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003465003465003465, |
|
"grad_norm": 86.02561950683594, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.6643, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.000693000693000693, |
|
"grad_norm": 69.82108306884766, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.471, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0010395010395010396, |
|
"grad_norm": 56.067874908447266, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.6652, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001386001386001386, |
|
"grad_norm": 20.953514099121094, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.7931, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0017325017325017325, |
|
"grad_norm": 66.40070343017578, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.3449, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002079002079002079, |
|
"grad_norm": 19.0939998626709, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.4636, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0024255024255024253, |
|
"grad_norm": 7.584406852722168, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.4794, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.002772002772002772, |
|
"grad_norm": 114.57583618164062, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.6834, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.002772002772002772, |
|
"eval_1_ratio_diff": -0.1504286827747467, |
|
"eval_accuracy": 0.7778643803585347, |
|
"eval_f1": 0.7382920110192838, |
|
"eval_loss": 0.5040489435195923, |
|
"eval_precision": 0.8973214285714286, |
|
"eval_recall": 0.6271450858034321, |
|
"eval_runtime": 3804.9177, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0031185031185031187, |
|
"grad_norm": 83.26435089111328, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.5774, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.003465003465003465, |
|
"grad_norm": 59.174560546875, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.7834, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0038115038115038116, |
|
"grad_norm": 26.088254928588867, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.3587, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004158004158004158, |
|
"grad_norm": 40.37193298339844, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.3989, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0045045045045045045, |
|
"grad_norm": 75.50234985351562, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.5288, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.004851004851004851, |
|
"grad_norm": 37.00468444824219, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.2342, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.005197505197505198, |
|
"grad_norm": 74.53498077392578, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.3931, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005544005544005544, |
|
"grad_norm": 40.15735626220703, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.1786, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.005544005544005544, |
|
"eval_1_ratio_diff": 0.09586905689789549, |
|
"eval_accuracy": 0.8199532346063912, |
|
"eval_f1": 0.8355871886120997, |
|
"eval_loss": 0.44859689474105835, |
|
"eval_precision": 0.768324607329843, |
|
"eval_recall": 0.9157566302652106, |
|
"eval_runtime": 3804.4441, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.00589050589050589, |
|
"grad_norm": 17.47291374206543, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.628, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.006237006237006237, |
|
"grad_norm": 52.683101654052734, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.8133, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.006583506583506584, |
|
"grad_norm": 12.70147705078125, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.2731, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.00693000693000693, |
|
"grad_norm": 68.10077667236328, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.3009, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.007276507276507277, |
|
"grad_norm": 6.308539867401123, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.625, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.007623007623007623, |
|
"grad_norm": 9.908273696899414, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0649, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.00796950796950797, |
|
"grad_norm": 42.916542053222656, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.2181, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.008316008316008316, |
|
"grad_norm": 13.416431427001953, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.1225, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.008316008316008316, |
|
"eval_1_ratio_diff": 0.13951675759937643, |
|
"eval_accuracy": 0.8043647700701481, |
|
"eval_f1": 0.8281998631074606, |
|
"eval_loss": 0.7583639025688171, |
|
"eval_precision": 0.7378048780487805, |
|
"eval_recall": 0.9438377535101404, |
|
"eval_runtime": 3804.8896, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.008662508662508662, |
|
"grad_norm": 118.25779724121094, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.0563, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.009009009009009009, |
|
"grad_norm": 0.019266022369265556, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.1503, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.009355509355509356, |
|
"grad_norm": 0.027064168825745583, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.3849, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.009702009702009701, |
|
"grad_norm": 0.009735088795423508, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 2.0022, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.010048510048510048, |
|
"grad_norm": 18.206382751464844, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.6092, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.010395010395010396, |
|
"grad_norm": 107.0077896118164, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 2.1483, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.010741510741510741, |
|
"grad_norm": 11.034400939941406, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0388, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.011088011088011088, |
|
"grad_norm": 209.00735473632812, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 3.7767, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.011088011088011088, |
|
"eval_1_ratio_diff": 0.031956352299298496, |
|
"eval_accuracy": 0.8667186282151208, |
|
"eval_f1": 0.8707482993197279, |
|
"eval_loss": 0.5764707922935486, |
|
"eval_precision": 0.844574780058651, |
|
"eval_recall": 0.8985959438377535, |
|
"eval_runtime": 3804.7578, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.011434511434511435, |
|
"grad_norm": 5.225222110748291, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.8703, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.01178101178101178, |
|
"grad_norm": 106.7333984375, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.8151, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.012127512127512128, |
|
"grad_norm": 8.81540298461914, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.054, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.012474012474012475, |
|
"grad_norm": 1.4214346408843994, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0035, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.01282051282051282, |
|
"grad_norm": 1.0482317209243774, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0068, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.013167013167013167, |
|
"grad_norm": 107.0990219116211, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 2.6722, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.013513513513513514, |
|
"grad_norm": 10.672940254211426, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.3053, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.01386001386001386, |
|
"grad_norm": 99.93098449707031, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 2.6363, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01386001386001386, |
|
"eval_1_ratio_diff": 0.15666406858924398, |
|
"eval_accuracy": 0.8121590023382697, |
|
"eval_f1": 0.837491571139582, |
|
"eval_loss": 0.9510833621025085, |
|
"eval_precision": 0.7375296912114014, |
|
"eval_recall": 0.968798751950078, |
|
"eval_runtime": 3804.6615, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.014206514206514207, |
|
"grad_norm": 104.2468490600586, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.3811, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.014553014553014554, |
|
"grad_norm": 8.673962593078613, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.052, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0148995148995149, |
|
"grad_norm": 101.39873504638672, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 2.7375, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.015246015246015246, |
|
"grad_norm": 32.66648864746094, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0983, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.015592515592515593, |
|
"grad_norm": 1.59808349609375, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0137, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01593901593901594, |
|
"grad_norm": 0.20981302857398987, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.6231, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.016285516285516284, |
|
"grad_norm": 102.21177673339844, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.4008, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.016632016632016633, |
|
"grad_norm": 0.24047495424747467, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.4377, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.016632016632016633, |
|
"eval_1_ratio_diff": -0.030397505845674244, |
|
"eval_accuracy": 0.8667186282151208, |
|
"eval_f1": 0.8624296057924377, |
|
"eval_loss": 0.5004476308822632, |
|
"eval_precision": 0.8903654485049833, |
|
"eval_recall": 0.8361934477379095, |
|
"eval_runtime": 3804.5287, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.01697851697851698, |
|
"grad_norm": 25.925752639770508, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.2565, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.017325017325017324, |
|
"grad_norm": 17.991100311279297, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0732, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.017671517671517672, |
|
"grad_norm": 9.48592758178711, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.1476, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.018018018018018018, |
|
"grad_norm": 25.491506576538086, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0673, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.018364518364518363, |
|
"grad_norm": 7.842948913574219, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.3734, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.018711018711018712, |
|
"grad_norm": 7.493876934051514, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.044, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.019057519057519057, |
|
"grad_norm": 1.7811343669891357, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0072, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.019404019404019403, |
|
"grad_norm": 3.076145648956299, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0114, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.019404019404019403, |
|
"eval_1_ratio_diff": -0.0093530787217459, |
|
"eval_accuracy": 0.8706157443491817, |
|
"eval_f1": 0.8692913385826772, |
|
"eval_loss": 0.4880940914154053, |
|
"eval_precision": 0.8775834658187599, |
|
"eval_recall": 0.8611544461778471, |
|
"eval_runtime": 3804.9107, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.01975051975051975, |
|
"grad_norm": 0.05920056626200676, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0008, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.020097020097020097, |
|
"grad_norm": 0.1755896806716919, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.9493, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.020443520443520442, |
|
"grad_norm": 0.08734409511089325, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0014, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.02079002079002079, |
|
"grad_norm": 0.04737528786063194, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.8342, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.021136521136521137, |
|
"grad_norm": 106.53982543945312, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.3666, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.021483021483021482, |
|
"grad_norm": 100.54167175292969, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.5401, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.02182952182952183, |
|
"grad_norm": 96.30493927001953, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.6086, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.022176022176022176, |
|
"grad_norm": 0.14051453769207, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.4729, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.022176022176022176, |
|
"eval_1_ratio_diff": -0.02883865939204988, |
|
"eval_accuracy": 0.8713951675759938, |
|
"eval_f1": 0.8674698795180723, |
|
"eval_loss": 0.6076126098632812, |
|
"eval_precision": 0.8940397350993378, |
|
"eval_recall": 0.8424336973478939, |
|
"eval_runtime": 3804.7525, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02252252252252252, |
|
"grad_norm": 10.388425827026367, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.1845, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02286902286902287, |
|
"grad_norm": 199.0683135986328, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.7664, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.023215523215523216, |
|
"grad_norm": 107.45567321777344, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 2.095, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.02356202356202356, |
|
"grad_norm": 0.16609467566013336, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.0607, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.02390852390852391, |
|
"grad_norm": 17.739356994628906, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0496, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.024255024255024255, |
|
"grad_norm": 0.5711311101913452, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.652, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0246015246015246, |
|
"grad_norm": 12.555821418762207, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0351, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.02494802494802495, |
|
"grad_norm": 21.08517837524414, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0624, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.02494802494802495, |
|
"eval_1_ratio_diff": 0.08106001558846454, |
|
"eval_accuracy": 0.8597038191738113, |
|
"eval_f1": 0.8701298701298701, |
|
"eval_loss": 0.7122698426246643, |
|
"eval_precision": 0.8093959731543624, |
|
"eval_recall": 0.9407176287051482, |
|
"eval_runtime": 3805.0376, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.025294525294525295, |
|
"grad_norm": 46.71989822387695, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.152, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.02564102564102564, |
|
"grad_norm": 2.8813726902008057, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0211, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.02598752598752599, |
|
"grad_norm": 99.26583099365234, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.1805, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.026334026334026334, |
|
"grad_norm": 99.44942474365234, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 2.6296, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.02668052668052668, |
|
"grad_norm": 100.0189208984375, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 2.0272, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.02702702702702703, |
|
"grad_norm": 0.5884932279586792, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0016, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.027373527373527374, |
|
"grad_norm": 0.00022523404913954437, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.02772002772002772, |
|
"grad_norm": 38.54767608642578, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.1184, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02772002772002772, |
|
"eval_1_ratio_diff": -0.07560405300077944, |
|
"eval_accuracy": 0.848012470771629, |
|
"eval_f1": 0.8354430379746836, |
|
"eval_loss": 0.981860339641571, |
|
"eval_precision": 0.9099264705882353, |
|
"eval_recall": 0.7722308892355694, |
|
"eval_runtime": 3804.9612, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.028066528066528068, |
|
"grad_norm": 4.101857848581858e-05, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 2.4463, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.028413028413028413, |
|
"grad_norm": 0.3266747295856476, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0015, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.02875952875952876, |
|
"grad_norm": 92.61214447021484, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.2523, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.029106029106029108, |
|
"grad_norm": 88.13154602050781, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.5995, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.029452529452529453, |
|
"grad_norm": 93.40657043457031, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.7273, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0297990297990298, |
|
"grad_norm": 0.0034895113203674555, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0041, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.030145530145530147, |
|
"grad_norm": 1.7105669975280762, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0051, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.030492030492030493, |
|
"grad_norm": 15.390508651733398, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 1.8573, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.030492030492030493, |
|
"eval_1_ratio_diff": -0.016367887763055367, |
|
"eval_accuracy": 0.8745128604832424, |
|
"eval_f1": 0.8723235527359239, |
|
"eval_loss": 0.67668217420578, |
|
"eval_precision": 0.8870967741935484, |
|
"eval_recall": 0.858034321372855, |
|
"eval_runtime": 3804.3744, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.030838530838530838, |
|
"grad_norm": 0.000692114292178303, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.8897, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.031185031185031187, |
|
"grad_norm": 91.26844024658203, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.7938, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03153153153153153, |
|
"grad_norm": 0.2341953068971634, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0028, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.03187803187803188, |
|
"grad_norm": 16.670625686645508, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0469, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.032224532224532226, |
|
"grad_norm": 94.17417907714844, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.7944, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.03257103257103257, |
|
"grad_norm": 97.2259292602539, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 3.8379, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.03291753291753292, |
|
"grad_norm": 0.2022901326417923, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.0015, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.033264033264033266, |
|
"grad_norm": 0.9778112173080444, |
|
"learning_rate": 2.345481288954896e-05, |
|
"loss": 0.4574, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.033264033264033266, |
|
"eval_1_ratio_diff": -0.01948558067030398, |
|
"eval_accuracy": 0.8791893998441154, |
|
"eval_f1": 0.8766905330151153, |
|
"eval_loss": 0.6556122303009033, |
|
"eval_precision": 0.8944805194805194, |
|
"eval_recall": 0.859594383775351, |
|
"eval_runtime": 3804.2343, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03361053361053361, |
|
"grad_norm": 2.296571969985962, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.051, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.03395703395703396, |
|
"grad_norm": 95.68350982666016, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 1.1658, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.034303534303534305, |
|
"grad_norm": 0.33530309796333313, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0035, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03465003465003465, |
|
"grad_norm": 0.3190847337245941, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0046, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.034996534996534996, |
|
"grad_norm": 0.22778594493865967, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0008, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.035343035343035345, |
|
"grad_norm": 61.27931594848633, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.2352, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.03568953568953569, |
|
"grad_norm": 0.07557390630245209, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.2882, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.036036036036036036, |
|
"grad_norm": 69.33809661865234, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.3218, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.036036036036036036, |
|
"eval_1_ratio_diff": -0.015588464536243185, |
|
"eval_accuracy": 0.8799688230709275, |
|
"eval_f1": 0.8779714738510301, |
|
"eval_loss": 0.6531423926353455, |
|
"eval_precision": 0.8921095008051529, |
|
"eval_recall": 0.8642745709828393, |
|
"eval_runtime": 3804.7065, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.036382536382536385, |
|
"grad_norm": 0.0005657664150930941, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.036729036729036726, |
|
"grad_norm": 196.73025512695312, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 4.7327, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.037075537075537075, |
|
"grad_norm": 0.0739484652876854, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0007, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.037422037422037424, |
|
"grad_norm": 45.3680305480957, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.1615, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.037768537768537766, |
|
"grad_norm": 0.040038663893938065, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.5819, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.038115038115038115, |
|
"grad_norm": 96.53976440429688, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 1.343, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 0.06292600184679031, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.6704, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.038808038808038806, |
|
"grad_norm": 0.059508323669433594, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0004, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.038808038808038806, |
|
"eval_1_ratio_diff": -0.017926734216679674, |
|
"eval_accuracy": 0.8807482462977396, |
|
"eval_f1": 0.8784749801429707, |
|
"eval_loss": 0.6496536731719971, |
|
"eval_precision": 0.8948220064724919, |
|
"eval_recall": 0.8627145085803433, |
|
"eval_runtime": 3804.1312, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.039154539154539154, |
|
"grad_norm": 51.095741271972656, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.1843, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.0395010395010395, |
|
"grad_norm": 2.8822214603424072, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.024, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.039847539847539845, |
|
"grad_norm": 82.8652572631836, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.5421, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.040194040194040194, |
|
"grad_norm": 46.17012405395508, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 1.2965, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.04054054054054054, |
|
"grad_norm": 95.0387191772461, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.854, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.040887040887040885, |
|
"grad_norm": 25.17544174194336, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0751, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.041233541233541234, |
|
"grad_norm": 0.18485400080680847, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0016, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.04158004158004158, |
|
"grad_norm": 28.657712936401367, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0891, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04158004158004158, |
|
"eval_1_ratio_diff": -0.017926734216679674, |
|
"eval_accuracy": 0.882307092751364, |
|
"eval_f1": 0.8800635424940428, |
|
"eval_loss": 0.6529919505119324, |
|
"eval_precision": 0.8964401294498382, |
|
"eval_recall": 0.8642745709828393, |
|
"eval_runtime": 3804.0239, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.041926541926541924, |
|
"grad_norm": 1.6055676937103271, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0042, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.04227304227304227, |
|
"grad_norm": 0.9823045134544373, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 2.1695, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.04261954261954262, |
|
"grad_norm": 0.0013798171421512961, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0006, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.042966042966042964, |
|
"grad_norm": 0.12650461494922638, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 1.4929, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.04331254331254331, |
|
"grad_norm": 1.4423363208770752, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0039, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04365904365904366, |
|
"grad_norm": 0.10318942368030548, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0007, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.044005544005544, |
|
"grad_norm": 0.8829823136329651, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0025, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.04435204435204435, |
|
"grad_norm": 0.7373402714729309, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.002, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.04435204435204435, |
|
"eval_1_ratio_diff": -0.017147310989867492, |
|
"eval_accuracy": 0.8815276695245519, |
|
"eval_f1": 0.8793650793650793, |
|
"eval_loss": 0.645000696182251, |
|
"eval_precision": 0.8949919224555735, |
|
"eval_recall": 0.8642745709828393, |
|
"eval_runtime": 3801.6968, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.0446985446985447, |
|
"grad_norm": 10.895577430725098, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0309, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.04504504504504504, |
|
"grad_norm": 0.052276190370321274, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0007, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04539154539154539, |
|
"grad_norm": 10.15628719329834, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0267, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.04573804573804574, |
|
"grad_norm": 99.17190551757812, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 1.4613, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.04608454608454608, |
|
"grad_norm": 0.14940449595451355, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0004, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.04643104643104643, |
|
"grad_norm": 0.054311566054821014, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.001, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.04677754677754678, |
|
"grad_norm": 175.11187744140625, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 2.7088, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.04712404712404712, |
|
"grad_norm": 99.04315185546875, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.3569, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.04712404712404712, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8830865159781761, |
|
"eval_f1": 0.8829953198127926, |
|
"eval_loss": 0.6330925822257996, |
|
"eval_precision": 0.8829953198127926, |
|
"eval_recall": 0.8829953198127926, |
|
"eval_runtime": 3801.9691, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.04747054747054747, |
|
"grad_norm": 0.02164226956665516, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0002, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.04781704781704782, |
|
"grad_norm": 0.0005396510241553187, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.04816354816354816, |
|
"grad_norm": 0.11029522866010666, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0008, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.04851004851004851, |
|
"grad_norm": 0.056431207805871964, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0002, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.04885654885654886, |
|
"grad_norm": 0.05988427251577377, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0007, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.0492030492030492, |
|
"grad_norm": 0.038764405995607376, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0003, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.04954954954954955, |
|
"grad_norm": 0.042382217943668365, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0004, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.0498960498960499, |
|
"grad_norm": 0.08615046739578247, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 1.0097, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.0498960498960499, |
|
"eval_1_ratio_diff": 0.002338269680436489, |
|
"eval_accuracy": 0.882307092751364, |
|
"eval_f1": 0.8824902723735408, |
|
"eval_loss": 0.636997401714325, |
|
"eval_precision": 0.8804347826086957, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.1055, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.05024255024255024, |
|
"grad_norm": 0.03646495193243027, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 1.8997, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05058905058905059, |
|
"grad_norm": 96.74433135986328, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 1.111, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.05093555093555094, |
|
"grad_norm": 0.34065139293670654, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 1.1679, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.05128205128205128, |
|
"grad_norm": 0.07861107587814331, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0003, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.05162855162855163, |
|
"grad_norm": 0.1301509290933609, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0006, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.05197505197505198, |
|
"grad_norm": 0.19169773161411285, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0006, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05232155232155232, |
|
"grad_norm": 0.07935987412929535, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0007, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.05266805266805267, |
|
"grad_norm": 0.38771602511405945, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0037, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.05266805266805267, |
|
"eval_1_ratio_diff": -0.010911925175370263, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8832807570977917, |
|
"eval_loss": 0.6364841461181641, |
|
"eval_precision": 0.8931419457735247, |
|
"eval_recall": 0.8736349453978159, |
|
"eval_runtime": 3801.9684, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.05301455301455302, |
|
"grad_norm": 0.006659930571913719, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.05336105336105336, |
|
"grad_norm": 0.036822691559791565, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0001, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.05370755370755371, |
|
"grad_norm": 0.08990409970283508, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0004, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05405405405405406, |
|
"grad_norm": 18.95056915283203, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0538, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.0544005544005544, |
|
"grad_norm": 0.5704998970031738, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0048, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.05474705474705475, |
|
"grad_norm": 0.26951637864112854, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0007, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.0550935550935551, |
|
"grad_norm": 0.029281673952937126, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0046, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.05544005544005544, |
|
"grad_norm": 0.11716895550489426, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0003, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05544005544005544, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6468711495399475, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3801.9776, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05578655578655579, |
|
"grad_norm": 0.0007287299376912415, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.056133056133056136, |
|
"grad_norm": 0.49943840503692627, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0016, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.05647955647955648, |
|
"grad_norm": 66.82319641113281, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.6704, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.05682605682605683, |
|
"grad_norm": 0.013491444289684296, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0001, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.057172557172557176, |
|
"grad_norm": 22.91457176208496, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0746, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.05751905751905752, |
|
"grad_norm": 0.12269003689289093, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0005, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.057865557865557866, |
|
"grad_norm": 0.27446281909942627, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.002, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.058212058212058215, |
|
"grad_norm": 0.0032145013101398945, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0001, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.058212058212058215, |
|
"eval_1_ratio_diff": -0.006235385814497285, |
|
"eval_accuracy": 0.8862042088854248, |
|
"eval_f1": 0.8854003139717426, |
|
"eval_loss": 0.6679523587226868, |
|
"eval_precision": 0.8909952606635071, |
|
"eval_recall": 0.8798751950078003, |
|
"eval_runtime": 3801.9487, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.05855855855855856, |
|
"grad_norm": 0.025797845795750618, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.7783, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.058905058905058906, |
|
"grad_norm": 46.18172073364258, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.1467, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.059251559251559255, |
|
"grad_norm": 14.45876407623291, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0406, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.0595980595980596, |
|
"grad_norm": 0.057107917964458466, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0016, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.059944559944559946, |
|
"grad_norm": 0.2026216834783554, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0008, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.060291060291060294, |
|
"grad_norm": 0.0018065160838887095, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.1469, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.060637560637560636, |
|
"grad_norm": 0.2094137966632843, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.004, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.060984060984060985, |
|
"grad_norm": 0.04397908225655556, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0003, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.060984060984060985, |
|
"eval_1_ratio_diff": 0.00623538581449723, |
|
"eval_accuracy": 0.8815276695245519, |
|
"eval_f1": 0.8821705426356589, |
|
"eval_loss": 0.6661366820335388, |
|
"eval_precision": 0.8767334360554699, |
|
"eval_recall": 0.8876755070202809, |
|
"eval_runtime": 3801.813, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.061330561330561334, |
|
"grad_norm": 0.3771282434463501, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.001, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.061677061677061676, |
|
"grad_norm": 0.006506125908344984, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0003, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.062023562023562025, |
|
"grad_norm": 0.04021889716386795, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0001, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.062370062370062374, |
|
"grad_norm": 0.03679339215159416, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0002, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06271656271656272, |
|
"grad_norm": 1.2173601388931274, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0037, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.06306306306306306, |
|
"grad_norm": 0.01900528371334076, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0001, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.06340956340956341, |
|
"grad_norm": 2.7245795726776123, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 2.5894, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.06375606375606375, |
|
"grad_norm": 0.049179498106241226, |
|
"learning_rate": 2.3454812889548964e-06, |
|
"loss": 0.0003, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.06375606375606375, |
|
"eval_1_ratio_diff": 0.0038971161340607408, |
|
"eval_accuracy": 0.882307092751364, |
|
"eval_f1": 0.8826728826728827, |
|
"eval_loss": 0.6829211711883545, |
|
"eval_precision": 0.8792569659442725, |
|
"eval_recall": 0.8861154446177847, |
|
"eval_runtime": 3801.791, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.0641025641025641, |
|
"grad_norm": 0.01943264901638031, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.06444906444906445, |
|
"grad_norm": 0.01157083548605442, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.9679, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.0647955647955648, |
|
"grad_norm": 1.1828685998916626, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0031, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.06514206514206514, |
|
"grad_norm": 1.0368878841400146, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0026, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.06548856548856549, |
|
"grad_norm": 0.016720596700906754, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.06583506583506583, |
|
"grad_norm": 92.59363555908203, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.7584, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06618156618156618, |
|
"grad_norm": 85.17770385742188, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.4196, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.06652806652806653, |
|
"grad_norm": 0.10658948123455048, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0003, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.06652806652806653, |
|
"eval_1_ratio_diff": 0.002338269680436489, |
|
"eval_accuracy": 0.882307092751364, |
|
"eval_f1": 0.8824902723735408, |
|
"eval_loss": 0.6842545866966248, |
|
"eval_precision": 0.8804347826086957, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3801.8143, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.06687456687456687, |
|
"grad_norm": 0.14093957841396332, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0004, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.06722106722106722, |
|
"grad_norm": 0.016837403178215027, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.06756756756756757, |
|
"grad_norm": 0.32615694403648376, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.001, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.06791406791406791, |
|
"grad_norm": 0.0006421082653105259, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0019, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.06826056826056826, |
|
"grad_norm": 0.19320330023765564, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0005, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.06860706860706861, |
|
"grad_norm": 0.11701953411102295, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.7289, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.06895356895356895, |
|
"grad_norm": 95.60856628417969, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.9569, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.0693000693000693, |
|
"grad_norm": 45.88581085205078, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.1621, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0693000693000693, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6871820092201233, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3801.8519, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06964656964656965, |
|
"grad_norm": 0.018819378688931465, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.06999306999306999, |
|
"grad_norm": 95.983642578125, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.1498, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.07033957033957033, |
|
"grad_norm": 0.26662153005599976, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0017, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.07068607068607069, |
|
"grad_norm": 14.524856567382812, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0997, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.07103257103257103, |
|
"grad_norm": 98.88104248046875, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 3.3431, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07137907137907137, |
|
"grad_norm": 0.023577246814966202, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0008, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.07172557172557173, |
|
"grad_norm": 0.0023175834212452173, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.5407, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.07207207207207207, |
|
"grad_norm": 0.04252276197075844, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0002, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.07207207207207207, |
|
"eval_1_ratio_diff": -0.0015588464536243074, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.884375, |
|
"eval_loss": 0.6888303756713867, |
|
"eval_precision": 0.8857589984350548, |
|
"eval_recall": 0.8829953198127926, |
|
"eval_runtime": 3802.0883, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.07241857241857241, |
|
"grad_norm": 0.016715947538614273, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0007, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.07276507276507277, |
|
"grad_norm": 1.2063138484954834, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0032, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07311157311157311, |
|
"grad_norm": 79.85220336914062, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.389, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.07345807345807345, |
|
"grad_norm": 0.020562905818223953, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.07380457380457381, |
|
"grad_norm": 0.09144292026758194, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0003, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.07415107415107415, |
|
"grad_norm": 0.320324182510376, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 2.3083, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.07449757449757449, |
|
"grad_norm": 0.01271853782236576, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.07484407484407485, |
|
"grad_norm": 0.5471435785293579, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0014, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.07484407484407485, |
|
"eval_1_ratio_diff": -0.0038971161340607963, |
|
"eval_accuracy": 0.886983632112237, |
|
"eval_f1": 0.8864526233359437, |
|
"eval_loss": 0.6897381544113159, |
|
"eval_precision": 0.889937106918239, |
|
"eval_recall": 0.8829953198127926, |
|
"eval_runtime": 3802.2915, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.07519057519057519, |
|
"grad_norm": 48.383155822753906, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 3.1628, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.07553707553707553, |
|
"grad_norm": 0.08458105474710464, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 2.3924, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.07588357588357589, |
|
"grad_norm": 102.9222640991211, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.1745, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.07623007623007623, |
|
"grad_norm": 0.4161507785320282, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0012, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07657657657657657, |
|
"grad_norm": 8.339736938476562, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0215, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 13.342203140258789, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0368, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.07726957726957727, |
|
"grad_norm": 0.16003106534481049, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.1956, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.07761607761607761, |
|
"grad_norm": 0.4954013526439667, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0014, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.07761607761607761, |
|
"eval_1_ratio_diff": -0.0038971161340607963, |
|
"eval_accuracy": 0.886983632112237, |
|
"eval_f1": 0.8864526233359437, |
|
"eval_loss": 0.6851783990859985, |
|
"eval_precision": 0.889937106918239, |
|
"eval_recall": 0.8829953198127926, |
|
"eval_runtime": 3802.2925, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.07796257796257797, |
|
"grad_norm": 102.0475845336914, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.8356, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07830907830907831, |
|
"grad_norm": 100.94009399414062, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.3047, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.07865557865557865, |
|
"grad_norm": 1.5232117176055908, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.9998, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.079002079002079, |
|
"grad_norm": 0.28479334712028503, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.3865, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.07934857934857935, |
|
"grad_norm": 0.4387876093387604, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0023, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.07969507969507969, |
|
"grad_norm": 0.09818091988563538, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0003, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08004158004158005, |
|
"grad_norm": 15.28391170501709, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.1902, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.08038808038808039, |
|
"grad_norm": 0.02123742178082466, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0003, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.08038808038808039, |
|
"eval_1_ratio_diff": -0.0015588464536243074, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.884375, |
|
"eval_loss": 0.6811160445213318, |
|
"eval_precision": 0.8857589984350548, |
|
"eval_recall": 0.8829953198127926, |
|
"eval_runtime": 3802.0709, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.08073458073458073, |
|
"grad_norm": 1.7175835371017456, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.1125, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.08108108108108109, |
|
"grad_norm": 0.018997719511389732, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.08142758142758143, |
|
"grad_norm": 0.009305741637945175, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.08177408177408177, |
|
"grad_norm": 0.016941837966442108, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.08212058212058213, |
|
"grad_norm": 0.039568159729242325, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.08246708246708247, |
|
"grad_norm": 0.0007386144134216011, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0096, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.08281358281358281, |
|
"grad_norm": 103.17326354980469, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.6978, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.08316008316008316, |
|
"grad_norm": 0.013824285939335823, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 1.9363, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08316008316008316, |
|
"eval_1_ratio_diff": -0.0007794232268121815, |
|
"eval_accuracy": 0.8854247856586126, |
|
"eval_f1": 0.8852459016393442, |
|
"eval_loss": 0.6808629035949707, |
|
"eval_precision": 0.8859375, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3801.9206, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.0835065835065835, |
|
"grad_norm": 0.8375845551490784, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0107, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.08385308385308385, |
|
"grad_norm": 0.024480916559696198, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0002, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.0841995841995842, |
|
"grad_norm": 81.85368347167969, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.488, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.08454608454608455, |
|
"grad_norm": 0.019407041370868683, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.3867, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.08489258489258489, |
|
"grad_norm": 0.12843383848667145, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0005, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.08523908523908524, |
|
"grad_norm": 0.011416368186473846, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0004, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.08558558558558559, |
|
"grad_norm": 0.032860685139894485, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.2092, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.08593208593208593, |
|
"grad_norm": 92.4006576538086, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.765, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.08593208593208593, |
|
"eval_1_ratio_diff": -0.0015588464536243074, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.884375, |
|
"eval_loss": 0.6817346811294556, |
|
"eval_precision": 0.8857589984350548, |
|
"eval_recall": 0.8829953198127926, |
|
"eval_runtime": 3802.0239, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.08627858627858628, |
|
"grad_norm": 0.006762477569282055, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.08662508662508663, |
|
"grad_norm": 0.021069064736366272, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0083, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08697158697158697, |
|
"grad_norm": 0.017759568989276886, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0003, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.08731808731808732, |
|
"grad_norm": 100.52206420898438, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 2.3802, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.08766458766458766, |
|
"grad_norm": 105.62979125976562, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 3.7172, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.088011088011088, |
|
"grad_norm": 0.34661927819252014, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.003, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.08835758835758836, |
|
"grad_norm": 99.1572265625, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 2.9664, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.0887040887040887, |
|
"grad_norm": 0.0003038525173906237, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.0887040887040887, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8845553822152886, |
|
"eval_loss": 0.6775835752487183, |
|
"eval_precision": 0.8845553822152886, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.2121, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.08905058905058905, |
|
"grad_norm": 0.016024667769670486, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 2.7406, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.0893970893970894, |
|
"grad_norm": 100.74127960205078, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 2.0749, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.08974358974358974, |
|
"grad_norm": 0.0011968504404649138, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.09009009009009009, |
|
"grad_norm": 2.873019218444824, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0077, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.09043659043659044, |
|
"grad_norm": 0.018814735114574432, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.09078309078309078, |
|
"grad_norm": 0.0029318886809051037, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.09112959112959113, |
|
"grad_norm": 0.009292243048548698, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0013, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.09147609147609148, |
|
"grad_norm": 0.013486395590007305, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0001, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.09147609147609148, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6737587451934814, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.0754, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.09182259182259182, |
|
"grad_norm": 0.030322887003421783, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 2.1096, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.09216909216909216, |
|
"grad_norm": 0.053928058594465256, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0004, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.09251559251559252, |
|
"grad_norm": 102.34916687011719, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 2.1453, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.09286209286209286, |
|
"grad_norm": 0.9909892678260803, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0034, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.0932085932085932, |
|
"grad_norm": 95.83029174804688, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.613, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.09355509355509356, |
|
"grad_norm": 108.8365707397461, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 2.8814, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.0939015939015939, |
|
"grad_norm": 1.0528830289840698, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.004, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.09424809424809424, |
|
"grad_norm": 0.0003182841173838824, |
|
"learning_rate": 2.3454812889548964e-07, |
|
"loss": 0.0, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.09424809424809424, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8845553822152886, |
|
"eval_loss": 0.6737085580825806, |
|
"eval_precision": 0.8845553822152886, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3801.9716, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.0945945945945946, |
|
"grad_norm": 0.010217499919235706, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0015, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.09494109494109494, |
|
"grad_norm": 0.3472006022930145, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0009, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.09528759528759528, |
|
"grad_norm": 92.27816772460938, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.5726, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09563409563409564, |
|
"grad_norm": 0.005271604750305414, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.4792, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.09598059598059598, |
|
"grad_norm": 98.78565216064453, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.797, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.09632709632709632, |
|
"grad_norm": 43.5427131652832, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.1413, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.09667359667359668, |
|
"grad_norm": 0.0004461357893887907, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0002, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.09702009702009702, |
|
"grad_norm": 0.028169138357043266, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.1394, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09702009702009702, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8845553822152886, |
|
"eval_loss": 0.6733962893486023, |
|
"eval_precision": 0.8845553822152886, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.0044, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09736659736659736, |
|
"grad_norm": 0.0005732557619921863, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.09771309771309772, |
|
"grad_norm": 106.34477996826172, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.1237, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.09805959805959806, |
|
"grad_norm": 0.2676280438899994, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0008, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.0984060984060984, |
|
"grad_norm": 0.001712639699690044, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0032, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.09875259875259876, |
|
"grad_norm": 0.03806428983807564, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.0990990990990991, |
|
"grad_norm": 0.9465712308883667, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0024, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.09944559944559944, |
|
"grad_norm": 1.9612773656845093, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.4378, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.0997920997920998, |
|
"grad_norm": 100.2083969116211, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.7637, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.0997920997920998, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8845553822152886, |
|
"eval_loss": 0.6731467843055725, |
|
"eval_precision": 0.8845553822152886, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3803.3785, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.10013860013860014, |
|
"grad_norm": 91.13343811035156, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.6642, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.10048510048510048, |
|
"grad_norm": 0.021487416699528694, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0007, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.10083160083160084, |
|
"grad_norm": 0.017454765737056732, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.10117810117810118, |
|
"grad_norm": 0.001247262000106275, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.10152460152460152, |
|
"grad_norm": 0.01062464714050293, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.10187110187110188, |
|
"grad_norm": 111.11028289794922, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 5.5258, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.10221760221760222, |
|
"grad_norm": 0.04351111128926277, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0003, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.10256410256410256, |
|
"grad_norm": 0.013359926640987396, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.10256410256410256, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8845553822152886, |
|
"eval_loss": 0.6729430556297302, |
|
"eval_precision": 0.8845553822152886, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.5461, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.10291060291060292, |
|
"grad_norm": 0.05155809223651886, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.10325710325710326, |
|
"grad_norm": 99.85391998291016, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.0198, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.1036036036036036, |
|
"grad_norm": 10.50502872467041, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.4264, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.10395010395010396, |
|
"grad_norm": 0.01816392131149769, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.5265, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1042966042966043, |
|
"grad_norm": 38.740116119384766, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.2613, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.10464310464310464, |
|
"grad_norm": 0.011164786294102669, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.104989604989605, |
|
"grad_norm": 0.003219365607947111, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.10533610533610534, |
|
"grad_norm": 0.0027089028153568506, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0005, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.10533610533610534, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8845553822152886, |
|
"eval_loss": 0.6724082827568054, |
|
"eval_precision": 0.8845553822152886, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.7555, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.10568260568260568, |
|
"grad_norm": 3.065094232559204, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0082, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.10602910602910603, |
|
"grad_norm": 0.002183800796046853, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.10637560637560638, |
|
"grad_norm": 104.22856140136719, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.6673, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.10672210672210672, |
|
"grad_norm": 0.039993271231651306, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.5715, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.10706860706860707, |
|
"grad_norm": 81.30303955078125, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.5197, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.10741510741510742, |
|
"grad_norm": 0.011346640065312386, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0006, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.10776160776160776, |
|
"grad_norm": 6.598723411560059, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.8566, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.10810810810810811, |
|
"grad_norm": 31.924484252929688, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.4598, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.10810810810810811, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8845553822152886, |
|
"eval_loss": 0.6718815565109253, |
|
"eval_precision": 0.8845553822152886, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3803.0388, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.10845460845460846, |
|
"grad_norm": 0.2010572999715805, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0006, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.1088011088011088, |
|
"grad_norm": 0.026479966938495636, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0002, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.10914760914760915, |
|
"grad_norm": 95.62553405761719, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.2849, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1094941094941095, |
|
"grad_norm": 0.10254587233066559, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.08, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.10984060984060984, |
|
"grad_norm": 102.76111602783203, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.4606, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.1101871101871102, |
|
"grad_norm": 0.008490975946187973, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0003, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.11053361053361054, |
|
"grad_norm": 0.08022568374872208, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0004, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.11088011088011088, |
|
"grad_norm": 79.22454833984375, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.8434, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.11088011088011088, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8845553822152886, |
|
"eval_loss": 0.6716776490211487, |
|
"eval_precision": 0.8845553822152886, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.9999, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.11122661122661123, |
|
"grad_norm": 0.44935956597328186, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0074, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.11157311157311157, |
|
"grad_norm": 0.178094282746315, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0005, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.11191961191961192, |
|
"grad_norm": 0.0018482008017599583, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.11226611226611227, |
|
"grad_norm": 0.659843385219574, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.6941, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.11261261261261261, |
|
"grad_norm": 104.63482666015625, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.2871, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11295911295911296, |
|
"grad_norm": 0.0014776097377762198, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.11330561330561331, |
|
"grad_norm": 0.003828430315479636, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.11365211365211365, |
|
"grad_norm": 49.71247100830078, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.524, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.11365211365211365, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8845553822152886, |
|
"eval_loss": 0.671708881855011, |
|
"eval_precision": 0.8845553822152886, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.9149, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.113998613998614, |
|
"grad_norm": 91.23478698730469, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.5816, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.11434511434511435, |
|
"grad_norm": 0.015174830332398415, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1146916146916147, |
|
"grad_norm": 0.04287717118859291, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0002, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.11503811503811504, |
|
"grad_norm": 101.90594482421875, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 3.0768, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.11538461538461539, |
|
"grad_norm": 0.49903520941734314, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0015, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.11573111573111573, |
|
"grad_norm": 7.688581943511963, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0208, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.11607761607761607, |
|
"grad_norm": 84.524169921875, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.45, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.11642411642411643, |
|
"grad_norm": 102.66691589355469, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.2867, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.11642411642411643, |
|
"eval_1_ratio_diff": 0.0, |
|
"eval_accuracy": 0.8846453624318005, |
|
"eval_f1": 0.8845553822152886, |
|
"eval_loss": 0.6711748242378235, |
|
"eval_precision": 0.8845553822152886, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3803.1931, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.11677061677061677, |
|
"grad_norm": 99.22567749023438, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.1032, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.11711711711711711, |
|
"grad_norm": 0.6447390913963318, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0024, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.11746361746361747, |
|
"grad_norm": 0.9273783564567566, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.8471, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.11781011781011781, |
|
"grad_norm": 0.02292543835937977, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.831, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.11815661815661815, |
|
"grad_norm": 0.0013903329381719232, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.11850311850311851, |
|
"grad_norm": 0.007690189406275749, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.11884961884961885, |
|
"grad_norm": 78.83476257324219, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.3904, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.1191961191961192, |
|
"grad_norm": 40.72224426269531, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.1344, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.1191961191961192, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6707317233085632, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3801.8243, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.11954261954261955, |
|
"grad_norm": 0.020250441506505013, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0034, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.11988911988911989, |
|
"grad_norm": 0.024622227996587753, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.12023562023562023, |
|
"grad_norm": 0.14477354288101196, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.8727, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.12058212058212059, |
|
"grad_norm": 8.177030563354492, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0608, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.12092862092862093, |
|
"grad_norm": 0.03661353141069412, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0004, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.12127512127512127, |
|
"grad_norm": 0.255051851272583, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0007, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12162162162162163, |
|
"grad_norm": 0.0030992343090474606, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0002, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.12196812196812197, |
|
"grad_norm": 0.005417773965746164, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0024, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.12196812196812197, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6705958247184753, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.2575, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.12231462231462231, |
|
"grad_norm": 0.03200246021151543, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.3592, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.12266112266112267, |
|
"grad_norm": 99.44486236572266, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 1.044, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.12300762300762301, |
|
"grad_norm": 70.03866577148438, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.4255, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.12335412335412335, |
|
"grad_norm": 0.028057171031832695, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.12370062370062371, |
|
"grad_norm": 207.3563232421875, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 3.7546, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.12404712404712405, |
|
"grad_norm": 0.0021035911049693823, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.0001, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.12439362439362439, |
|
"grad_norm": 98.69009399414062, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 2.157, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.12474012474012475, |
|
"grad_norm": 47.113651275634766, |
|
"learning_rate": 2.3454812889548966e-08, |
|
"loss": 0.1673, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.12474012474012475, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6703583002090454, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.4945, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.12508662508662508, |
|
"grad_norm": 98.36994934082031, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 3.9073, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.12543312543312543, |
|
"grad_norm": 0.0256715826690197, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.0689, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.1257796257796258, |
|
"grad_norm": 0.0007734175305813551, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.12612612612612611, |
|
"grad_norm": 0.04009055346250534, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.1743, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.12647262647262647, |
|
"grad_norm": 0.00048400016385130584, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.6207, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.12681912681912683, |
|
"grad_norm": 96.37062072753906, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.0242, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.12716562716562715, |
|
"grad_norm": 101.28919982910156, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.2, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.1275121275121275, |
|
"grad_norm": 99.32202911376953, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.3346, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.1275121275121275, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6703282594680786, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.7507, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.12785862785862787, |
|
"grad_norm": 0.006389938294887543, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.1282051282051282, |
|
"grad_norm": 109.00679779052734, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.6368, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12855162855162855, |
|
"grad_norm": 92.6392822265625, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.152, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.1288981288981289, |
|
"grad_norm": 0.07208535820245743, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.12924462924462923, |
|
"grad_norm": 0.04709033668041229, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.1295911295911296, |
|
"grad_norm": 0.021052101626992226, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.12993762993762994, |
|
"grad_norm": 0.29711541533470154, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.0028, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.13028413028413027, |
|
"grad_norm": 0.04428931698203087, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0004, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.13028413028413027, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6703124642372131, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3803.2777, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.13063063063063063, |
|
"grad_norm": 0.3277018666267395, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.7119, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.13097713097713098, |
|
"grad_norm": 0.014452395960688591, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.1313236313236313, |
|
"grad_norm": 0.006934754550457001, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.7701, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.13167013167013167, |
|
"grad_norm": 0.014140780083835125, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0019, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.13201663201663202, |
|
"grad_norm": 0.06956882029771805, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.13236313236313235, |
|
"grad_norm": 97.27932739257812, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.4485, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.1327096327096327, |
|
"grad_norm": 97.00716400146484, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.0216, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.13305613305613306, |
|
"grad_norm": 0.0003238619538024068, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.13305613305613306, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702964305877686, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3803.644, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.1334026334026334, |
|
"grad_norm": 0.010110430419445038, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.13374913374913375, |
|
"grad_norm": 0.04101982340216637, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.1396, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.1340956340956341, |
|
"grad_norm": 0.004015587270259857, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.13444213444213443, |
|
"grad_norm": 0.25856757164001465, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.613, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.1347886347886348, |
|
"grad_norm": 17.537782669067383, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0506, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.13513513513513514, |
|
"grad_norm": 0.02263958565890789, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.7827, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.13548163548163547, |
|
"grad_norm": 0.00452468590810895, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.13582813582813583, |
|
"grad_norm": 97.92658233642578, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.7204, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.13582813582813583, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702899932861328, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3803.7672, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.13617463617463618, |
|
"grad_norm": 5.811797142028809, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0154, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.1365211365211365, |
|
"grad_norm": 0.0009533663396723568, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.6813, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.13686763686763687, |
|
"grad_norm": 0.21574796736240387, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0006, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.13721413721413722, |
|
"grad_norm": 0.029973836615681648, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0027, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.13756063756063755, |
|
"grad_norm": 0.0021266574040055275, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.1379071379071379, |
|
"grad_norm": 1.722880482673645, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0046, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.13825363825363826, |
|
"grad_norm": 0.1858731210231781, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0415, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.1386001386001386, |
|
"grad_norm": 0.15411172807216644, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0006, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1386001386001386, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702878475189209, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.7764, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13894663894663895, |
|
"grad_norm": 0.022141670808196068, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.033, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.1392931392931393, |
|
"grad_norm": 6.732369899749756, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0182, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.13963963963963963, |
|
"grad_norm": 0.018432870507240295, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.13998613998613998, |
|
"grad_norm": 45.47744369506836, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.1551, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.14033264033264034, |
|
"grad_norm": 99.37589263916016, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.0688, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.14067914067914067, |
|
"grad_norm": 96.54620361328125, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.242, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.14102564102564102, |
|
"grad_norm": 37.72175979614258, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.1738, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.14137214137214138, |
|
"grad_norm": 0.07114993035793304, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0005, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.14137214137214138, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702972054481506, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.7247, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.1417186417186417, |
|
"grad_norm": 0.011865837499499321, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.6889, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.14206514206514206, |
|
"grad_norm": 0.008003904484212399, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0004, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.14241164241164242, |
|
"grad_norm": 0.012314215302467346, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.14275814275814275, |
|
"grad_norm": 14.464447021484375, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0418, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.1431046431046431, |
|
"grad_norm": 0.0005685996729880571, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.14345114345114346, |
|
"grad_norm": 0.08294668048620224, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0974, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.1437976437976438, |
|
"grad_norm": 0.041373468935489655, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.14414414414414414, |
|
"grad_norm": 0.04561450332403183, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0003, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.14414414414414414, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702831387519836, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.3171, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.1444906444906445, |
|
"grad_norm": 0.10258769989013672, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0004, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.14483714483714483, |
|
"grad_norm": 0.01617802493274212, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.14518364518364518, |
|
"grad_norm": 1.6996452808380127, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.1919, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.14553014553014554, |
|
"grad_norm": 0.003590661333873868, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.14587664587664587, |
|
"grad_norm": 13.620148658752441, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0395, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.14622314622314622, |
|
"grad_norm": 1.1218122243881226, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.6076, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.14656964656964658, |
|
"grad_norm": 1.5158343315124512, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.3674, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.1469161469161469, |
|
"grad_norm": 0.009215708822011948, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0003, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.1469161469161469, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702794432640076, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.8989, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.14726264726264726, |
|
"grad_norm": 90.3687973022461, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.6296, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.14760914760914762, |
|
"grad_norm": 0.01208855863660574, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0328, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.14795564795564795, |
|
"grad_norm": 0.09359738975763321, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.2666, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.1483021483021483, |
|
"grad_norm": 0.0012692202581092715, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.14864864864864866, |
|
"grad_norm": 0.02175830490887165, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.14899514899514898, |
|
"grad_norm": 60.99715042114258, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.9586, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.14934164934164934, |
|
"grad_norm": 0.07070188969373703, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.9788, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.1496881496881497, |
|
"grad_norm": 0.05645833909511566, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0004, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.1496881496881497, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702664494514465, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.2679, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.15003465003465002, |
|
"grad_norm": 0.04621017724275589, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.15038115038115038, |
|
"grad_norm": 98.49105072021484, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.8225, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.15072765072765074, |
|
"grad_norm": 0.5321390628814697, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0015, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.15107415107415106, |
|
"grad_norm": 0.09718296676874161, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0006, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.15142065142065142, |
|
"grad_norm": 0.001202503452077508, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.7257, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.15176715176715178, |
|
"grad_norm": 0.02575511857867241, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0279, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.1521136521136521, |
|
"grad_norm": 0.4885219633579254, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0013, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.15246015246015246, |
|
"grad_norm": 0.019609682261943817, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.15246015246015246, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702576279640198, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.8587, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.15280665280665282, |
|
"grad_norm": 0.0061721052043139935, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.15315315315315314, |
|
"grad_norm": 0.013560596853494644, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0007, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.1534996534996535, |
|
"grad_norm": 0.3585527837276459, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0009, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 2.086585521697998, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.3566, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.15419265419265418, |
|
"grad_norm": 0.023040562868118286, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.15453915453915454, |
|
"grad_norm": 0.017716137692332268, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0099, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.1548856548856549, |
|
"grad_norm": 137.6589813232422, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.9727, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.15523215523215522, |
|
"grad_norm": 0.09003114700317383, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.674, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.15523215523215522, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702753305435181, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.4603, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.15557865557865558, |
|
"grad_norm": 0.014279451221227646, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.1168, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.15592515592515593, |
|
"grad_norm": 3.479095220565796, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0089, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15627165627165626, |
|
"grad_norm": 0.09242203831672668, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.8641, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.15661815661815662, |
|
"grad_norm": 0.01231786422431469, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.15696465696465697, |
|
"grad_norm": 0.10587727278470993, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0006, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.1573111573111573, |
|
"grad_norm": 70.37655639648438, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.2845, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.15765765765765766, |
|
"grad_norm": 19.5317325592041, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.3898, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.158004158004158, |
|
"grad_norm": 0.010684994980692863, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.158004158004158, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702710390090942, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3803.9983, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.15835065835065834, |
|
"grad_norm": 47.22987365722656, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.3641, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.1586971586971587, |
|
"grad_norm": 0.014747441746294498, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.15904365904365905, |
|
"grad_norm": 57.34768295288086, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 6.9391, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.15939015939015938, |
|
"grad_norm": 0.011359051801264286, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.9423, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.15973665973665974, |
|
"grad_norm": 0.0191196296364069, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.1600831600831601, |
|
"grad_norm": 0.665374219417572, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0018, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.16042966042966042, |
|
"grad_norm": 0.0511230044066906, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.16077616077616078, |
|
"grad_norm": 0.04708551988005638, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.16077616077616078, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702331900596619, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3805.3893, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.16112266112266113, |
|
"grad_norm": 102.17916870117188, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.7039, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.16146916146916146, |
|
"grad_norm": 0.40711769461631775, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0011, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.16181566181566182, |
|
"grad_norm": 90.60123443603516, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.5507, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.16216216216216217, |
|
"grad_norm": 0.001160971587523818, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.1625086625086625, |
|
"grad_norm": 4.936448574066162, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0126, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.16285516285516285, |
|
"grad_norm": 0.0048201605677604675, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.1632016632016632, |
|
"grad_norm": 0.0016580702504143119, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.8544, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.16354816354816354, |
|
"grad_norm": 0.019202932715415955, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0005, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.16354816354816354, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702118515968323, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3805.2809, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.1638946638946639, |
|
"grad_norm": 0.028465600684285164, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.16424116424116425, |
|
"grad_norm": 0.009240957908332348, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.16458766458766458, |
|
"grad_norm": 101.55481719970703, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.9137, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.16493416493416493, |
|
"grad_norm": 0.03400309756398201, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.1652806652806653, |
|
"grad_norm": 0.03532765433192253, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0018, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.16562716562716562, |
|
"grad_norm": 0.0010142261162400246, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.0985, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.16597366597366597, |
|
"grad_norm": 0.0018879002891480923, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0004, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.16632016632016633, |
|
"grad_norm": 99.82229614257812, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.0414, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.16632016632016633, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6702096462249756, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3806.2291, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.024355348199605942, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.167013167013167, |
|
"grad_norm": 0.0004158661758992821, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.16735966735966737, |
|
"grad_norm": 0.020543742924928665, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.1677061677061677, |
|
"grad_norm": 0.08691411465406418, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.16805266805266805, |
|
"grad_norm": 7.804852485656738, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.1703, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.1683991683991684, |
|
"grad_norm": 0.007481154054403305, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.16874566874566874, |
|
"grad_norm": 102.13117218017578, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.2312, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.1690921690921691, |
|
"grad_norm": 49.337467193603516, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.1639, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.1690921690921691, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6701992154121399, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3805.4338, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.16943866943866945, |
|
"grad_norm": 91.43649291992188, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.643, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.16978516978516978, |
|
"grad_norm": 0.013101785443723202, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.17013167013167013, |
|
"grad_norm": 0.19214844703674316, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.8884, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.1704781704781705, |
|
"grad_norm": 99.21739959716797, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.0275, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.17082467082467082, |
|
"grad_norm": 0.042131222784519196, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.17117117117117117, |
|
"grad_norm": 0.005484807770699263, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.1135, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.17151767151767153, |
|
"grad_norm": 72.73301696777344, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.31, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.17186417186417186, |
|
"grad_norm": 102.85774230957031, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.533, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.17186417186417186, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6701642870903015, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.1444, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.1722106722106722, |
|
"grad_norm": 5.119304656982422, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.1034, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.17255717255717257, |
|
"grad_norm": 0.05094608664512634, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0011, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.1729036729036729, |
|
"grad_norm": 0.018024293705821037, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.17325017325017325, |
|
"grad_norm": 0.012675195932388306, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1735966735966736, |
|
"grad_norm": 101.12162780761719, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.4301, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.17394317394317393, |
|
"grad_norm": 0.08050279319286346, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0004, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.1742896742896743, |
|
"grad_norm": 0.004985997918993235, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.17463617463617465, |
|
"grad_norm": 0.0335320308804512, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.17463617463617465, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6701443195343018, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3803.1697, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.17498267498267497, |
|
"grad_norm": 58.16429138183594, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.2296, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.17532917532917533, |
|
"grad_norm": 0.015778416767716408, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0024, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.17567567567567569, |
|
"grad_norm": 0.007109211757779121, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.176022176022176, |
|
"grad_norm": 0.001206032931804657, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.17636867636867637, |
|
"grad_norm": 0.0016432058764621615, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.3836, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.17671517671517672, |
|
"grad_norm": 0.0008347645052708685, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.17706167706167705, |
|
"grad_norm": 0.03109140321612358, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0004, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.1774081774081774, |
|
"grad_norm": 0.0419655367732048, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.5482, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.1774081774081774, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6701238751411438, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3804.5554, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.17775467775467776, |
|
"grad_norm": 0.032015491276979446, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.1781011781011781, |
|
"grad_norm": 0.0072747585363686085, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0011, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.17844767844767845, |
|
"grad_norm": 0.22382956743240356, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0142, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.1787941787941788, |
|
"grad_norm": 9.476286888122559, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.041, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.17914067914067913, |
|
"grad_norm": 61.077667236328125, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 4.2437, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.1794871794871795, |
|
"grad_norm": 0.008154085837304592, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.9777, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.17983367983367984, |
|
"grad_norm": 0.6985426545143127, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.8364, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.18018018018018017, |
|
"grad_norm": 0.012879273854196072, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0002, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.18018018018018017, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6700866222381592, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3806.3018, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.18052668052668053, |
|
"grad_norm": 98.58509826660156, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.8793, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.18087318087318088, |
|
"grad_norm": 0.18119540810585022, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0005, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.1812196812196812, |
|
"grad_norm": 0.866092324256897, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.6971, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.18156618156618157, |
|
"grad_norm": 105.97624206542969, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.1413, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.18191268191268192, |
|
"grad_norm": 20.15652084350586, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.9196, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.18225918225918225, |
|
"grad_norm": 0.022677650675177574, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.2512, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.1826056826056826, |
|
"grad_norm": 0.011547055095434189, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.2487, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.18295218295218296, |
|
"grad_norm": 0.012869571335613728, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.18295218295218296, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6700414419174194, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3803.3482, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.1832986832986833, |
|
"grad_norm": 0.03256943076848984, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.18364518364518365, |
|
"grad_norm": 0.031881775707006454, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0076, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.183991683991684, |
|
"grad_norm": 2.0490903854370117, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.3692, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.18433818433818433, |
|
"grad_norm": 103.4950180053711, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.1407, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.18468468468468469, |
|
"grad_norm": 17.134321212768555, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0473, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.18503118503118504, |
|
"grad_norm": 0.029201500117778778, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0009, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.18537768537768537, |
|
"grad_norm": 0.01404550950974226, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0005, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.18572418572418573, |
|
"grad_norm": 0.020491065457463264, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0457, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.18572418572418573, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6700317859649658, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3804.41, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.18607068607068608, |
|
"grad_norm": 0.025645218789577484, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0027, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.1864171864171864, |
|
"grad_norm": 0.014836100861430168, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0001, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.18676368676368676, |
|
"grad_norm": 0.026932615786790848, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.992, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.18711018711018712, |
|
"grad_norm": 6.111782550811768, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0163, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.18745668745668745, |
|
"grad_norm": 106.83539581298828, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 2.4242, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.1878031878031878, |
|
"grad_norm": 0.020867686718702316, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0007, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.18814968814968816, |
|
"grad_norm": 0.005603364668786526, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 0.0, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.1884961884961885, |
|
"grad_norm": 0.012470322661101818, |
|
"learning_rate": 2.345481288954897e-09, |
|
"loss": 1.0732, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.1884961884961885, |
|
"eval_1_ratio_diff": 0.000779423226812126, |
|
"eval_accuracy": 0.8838659392049883, |
|
"eval_f1": 0.8838659392049883, |
|
"eval_loss": 0.6700374484062195, |
|
"eval_precision": 0.883177570093458, |
|
"eval_recall": 0.8845553822152886, |
|
"eval_runtime": 3802.6858, |
|
"eval_samples_per_second": 0.337, |
|
"eval_steps_per_second": 0.169, |
|
"step": 1088 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 46176, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 64, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 100, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 41 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.20498247008256e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|