add: model files

Files changed (15) hide show

.gitignore +1 -0
config.json +34 -0
greek-latin-emissions.csv +2 -0
logs/events.out.tfevents.1735139166.49c9ca38522d.6642.0 +3 -0
logs/events.out.tfevents.1735139214.49c9ca38522d.7518.0 +3 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +56 -0
trainer_state.json +1109 -0
training_args.bin +3 -0
vocab.txt +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.DS_Store

config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "distilbert_multilingual_cased_greek_latin_classifier",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "Greek",
+    "1": "Latin"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "Greek": 0,
+    "Latin": 1
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.1",
+  "vocab_size": 119547
+}

greek-latin-emissions.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2	+ 2024-12-25T15:16:45,codecarbon,f658b237-20c1-45cf-a8ee-cdb5e8521351,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,591.3640720729998,0.017871522337723503,3.0220845637570942e-05,42.5,105.04535891243202,31.30389261245728,0.006974275640934743,0.025850867347344004,0.005136130757212294,0.03796127374549103,Singapore,SGP,,,,Linux-6.1.85+-x86_64-with-glibc2.35,3.10.12,2.8.2,12,Intel(R) Xeon(R) CPU @ 2.20GHz,1,1 x NVIDIA A100-SXM4-40GB,103.8503,1.2868,83.47704696655273,machine,N,1.0

logs/events.out.tfevents.1735139166.49c9ca38522d.6642.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec7414fa22502f5ef0d7c5f52d365638b066dd1919a6b3a93629d482d49a2c66
+size 4184

logs/events.out.tfevents.1735139214.49c9ca38522d.7518.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f838b6e8c8504aef5e7c251628fabfd9a900cf61dc48cc2d905e5a6564c1595
+size 50378

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c9296003fd0fb8618db1ed16d87b534346cc0ac4b6124cff8ed59299c5a04e3
+size 541317368

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21941a651fa3476118e1cdf74e425cc957ab413ac16623c4e0c6b8f4c7b5230f
+size 1082696890

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f87605e6c67d5a5d9a11aa1efb02af19902881be7fdf3b3e1c6bbca0fd808e5c
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f311cf2c6b954a71898a0497794c30a06bad24230f99e1c5541cecdc5621e10
+size 1064

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1109 @@

+{
+  "best_metric": 0.9635820582698403,
+  "best_model_checkpoint": "./distilbert_multilingual_cased_greek_latin_classifiergreek/checkpoint-14160",
+  "epoch": 8.0,
+  "eval_steps": 500,
+  "global_step": 14160,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05649717514124294,
+      "grad_norm": 9.908761978149414,
+      "learning_rate": 4.9858757062146896e-05,
+      "loss": 0.4326,
+      "step": 100
+    },
+    {
+      "epoch": 0.11299435028248588,
+      "grad_norm": 0.47730961441993713,
+      "learning_rate": 4.971751412429379e-05,
+      "loss": 0.2915,
+      "step": 200
+    },
+    {
+      "epoch": 0.1694915254237288,
+      "grad_norm": 5.969715595245361,
+      "learning_rate": 4.957627118644068e-05,
+      "loss": 0.2426,
+      "step": 300
+    },
+    {
+      "epoch": 0.22598870056497175,
+      "grad_norm": 2.764862537384033,
+      "learning_rate": 4.9435028248587575e-05,
+      "loss": 0.256,
+      "step": 400
+    },
+    {
+      "epoch": 0.2824858757062147,
+      "grad_norm": 12.726994514465332,
+      "learning_rate": 4.929378531073446e-05,
+      "loss": 0.2165,
+      "step": 500
+    },
+    {
+      "epoch": 0.3389830508474576,
+      "grad_norm": 1.1394743919372559,
+      "learning_rate": 4.915254237288136e-05,
+      "loss": 0.2168,
+      "step": 600
+    },
+    {
+      "epoch": 0.3954802259887006,
+      "grad_norm": 4.321178913116455,
+      "learning_rate": 4.9011299435028255e-05,
+      "loss": 0.2059,
+      "step": 700
+    },
+    {
+      "epoch": 0.4519774011299435,
+      "grad_norm": 18.238351821899414,
+      "learning_rate": 4.887005649717514e-05,
+      "loss": 0.2294,
+      "step": 800
+    },
+    {
+      "epoch": 0.5084745762711864,
+      "grad_norm": 7.210486888885498,
+      "learning_rate": 4.8728813559322034e-05,
+      "loss": 0.1904,
+      "step": 900
+    },
+    {
+      "epoch": 0.5649717514124294,
+      "grad_norm": 1.240628719329834,
+      "learning_rate": 4.8587570621468934e-05,
+      "loss": 0.1668,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6214689265536724,
+      "grad_norm": 9.029092788696289,
+      "learning_rate": 4.844632768361582e-05,
+      "loss": 0.18,
+      "step": 1100
+    },
+    {
+      "epoch": 0.6779661016949152,
+      "grad_norm": 4.866886615753174,
+      "learning_rate": 4.8305084745762714e-05,
+      "loss": 0.2082,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7344632768361582,
+      "grad_norm": 7.6279778480529785,
+      "learning_rate": 4.816384180790961e-05,
+      "loss": 0.1818,
+      "step": 1300
+    },
+    {
+      "epoch": 0.7909604519774012,
+      "grad_norm": 8.820233345031738,
+      "learning_rate": 4.80225988700565e-05,
+      "loss": 0.1853,
+      "step": 1400
+    },
+    {
+      "epoch": 0.847457627118644,
+      "grad_norm": 22.571056365966797,
+      "learning_rate": 4.788135593220339e-05,
+      "loss": 0.1638,
+      "step": 1500
+    },
+    {
+      "epoch": 0.903954802259887,
+      "grad_norm": 4.86266565322876,
+      "learning_rate": 4.7740112994350286e-05,
+      "loss": 0.1761,
+      "step": 1600
+    },
+    {
+      "epoch": 0.96045197740113,
+      "grad_norm": 2.2481741905212402,
+      "learning_rate": 4.759887005649718e-05,
+      "loss": 0.1481,
+      "step": 1700
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9473554127533367,
+      "eval_f1": 0.9471867274372374,
+      "eval_loss": 0.15506704151630402,
+      "eval_runtime": 0.99,
+      "eval_samples_per_second": 4086.911,
+      "eval_steps_per_second": 64.647,
+      "step": 1770
+    },
+    {
+      "epoch": 1.0169491525423728,
+      "grad_norm": 0.16318374872207642,
+      "learning_rate": 4.745762711864407e-05,
+      "loss": 0.1303,
+      "step": 1800
+    },
+    {
+      "epoch": 1.073446327683616,
+      "grad_norm": 2.8574020862579346,
+      "learning_rate": 4.7316384180790966e-05,
+      "loss": 0.0916,
+      "step": 1900
+    },
+    {
+      "epoch": 1.1299435028248588,
+      "grad_norm": 7.220451831817627,
+      "learning_rate": 4.717514124293785e-05,
+      "loss": 0.1214,
+      "step": 2000
+    },
+    {
+      "epoch": 1.1864406779661016,
+      "grad_norm": 0.23234207928180695,
+      "learning_rate": 4.703389830508475e-05,
+      "loss": 0.1005,
+      "step": 2100
+    },
+    {
+      "epoch": 1.2429378531073447,
+      "grad_norm": 5.788926124572754,
+      "learning_rate": 4.689265536723164e-05,
+      "loss": 0.106,
+      "step": 2200
+    },
+    {
+      "epoch": 1.2994350282485876,
+      "grad_norm": 14.127985954284668,
+      "learning_rate": 4.675141242937853e-05,
+      "loss": 0.1002,
+      "step": 2300
+    },
+    {
+      "epoch": 1.3559322033898304,
+      "grad_norm": 8.628386497497559,
+      "learning_rate": 4.6610169491525425e-05,
+      "loss": 0.1228,
+      "step": 2400
+    },
+    {
+      "epoch": 1.4124293785310735,
+      "grad_norm": 3.4723896980285645,
+      "learning_rate": 4.646892655367232e-05,
+      "loss": 0.1193,
+      "step": 2500
+    },
+    {
+      "epoch": 1.4689265536723164,
+      "grad_norm": 5.112296104431152,
+      "learning_rate": 4.632768361581921e-05,
+      "loss": 0.0989,
+      "step": 2600
+    },
+    {
+      "epoch": 1.5254237288135593,
+      "grad_norm": 8.718145370483398,
+      "learning_rate": 4.6186440677966104e-05,
+      "loss": 0.1291,
+      "step": 2700
+    },
+    {
+      "epoch": 1.5819209039548023,
+      "grad_norm": 0.052204638719558716,
+      "learning_rate": 4.6045197740113e-05,
+      "loss": 0.0987,
+      "step": 2800
+    },
+    {
+      "epoch": 1.6384180790960452,
+      "grad_norm": 11.152572631835938,
+      "learning_rate": 4.590395480225989e-05,
+      "loss": 0.113,
+      "step": 2900
+    },
+    {
+      "epoch": 1.694915254237288,
+      "grad_norm": 3.002537965774536,
+      "learning_rate": 4.5762711864406784e-05,
+      "loss": 0.1215,
+      "step": 3000
+    },
+    {
+      "epoch": 1.7514124293785311,
+      "grad_norm": 16.578323364257812,
+      "learning_rate": 4.562146892655367e-05,
+      "loss": 0.0998,
+      "step": 3100
+    },
+    {
+      "epoch": 1.807909604519774,
+      "grad_norm": 4.660722255706787,
+      "learning_rate": 4.548022598870056e-05,
+      "loss": 0.1015,
+      "step": 3200
+    },
+    {
+      "epoch": 1.8644067796610169,
+      "grad_norm": 0.32472193241119385,
+      "learning_rate": 4.533898305084746e-05,
+      "loss": 0.0982,
+      "step": 3300
+    },
+    {
+      "epoch": 1.92090395480226,
+      "grad_norm": 12.544636726379395,
+      "learning_rate": 4.519774011299435e-05,
+      "loss": 0.1151,
+      "step": 3400
+    },
+    {
+      "epoch": 1.9774011299435028,
+      "grad_norm": 0.20591090619564056,
+      "learning_rate": 4.505649717514124e-05,
+      "loss": 0.1109,
+      "step": 3500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9540286702916461,
+      "eval_f1": 0.9542995811514169,
+      "eval_loss": 0.17088210582733154,
+      "eval_runtime": 0.9919,
+      "eval_samples_per_second": 4078.94,
+      "eval_steps_per_second": 64.521,
+      "step": 3540
+    },
+    {
+      "epoch": 2.0338983050847457,
+      "grad_norm": 1.156205177307129,
+      "learning_rate": 4.491525423728814e-05,
+      "loss": 0.0764,
+      "step": 3600
+    },
+    {
+      "epoch": 2.0903954802259888,
+      "grad_norm": 4.644138336181641,
+      "learning_rate": 4.477401129943503e-05,
+      "loss": 0.0603,
+      "step": 3700
+    },
+    {
+      "epoch": 2.146892655367232,
+      "grad_norm": 0.0709792822599411,
+      "learning_rate": 4.463276836158192e-05,
+      "loss": 0.0649,
+      "step": 3800
+    },
+    {
+      "epoch": 2.2033898305084745,
+      "grad_norm": 0.13090333342552185,
+      "learning_rate": 4.4491525423728816e-05,
+      "loss": 0.076,
+      "step": 3900
+    },
+    {
+      "epoch": 2.2598870056497176,
+      "grad_norm": 39.72850799560547,
+      "learning_rate": 4.435028248587571e-05,
+      "loss": 0.0827,
+      "step": 4000
+    },
+    {
+      "epoch": 2.3163841807909606,
+      "grad_norm": 0.29564905166625977,
+      "learning_rate": 4.42090395480226e-05,
+      "loss": 0.0701,
+      "step": 4100
+    },
+    {
+      "epoch": 2.3728813559322033,
+      "grad_norm": 0.23284725844860077,
+      "learning_rate": 4.4067796610169495e-05,
+      "loss": 0.0697,
+      "step": 4200
+    },
+    {
+      "epoch": 2.4293785310734464,
+      "grad_norm": 0.059655264019966125,
+      "learning_rate": 4.392655367231638e-05,
+      "loss": 0.0803,
+      "step": 4300
+    },
+    {
+      "epoch": 2.4858757062146895,
+      "grad_norm": 0.01876319944858551,
+      "learning_rate": 4.378531073446328e-05,
+      "loss": 0.0796,
+      "step": 4400
+    },
+    {
+      "epoch": 2.542372881355932,
+      "grad_norm": 0.07363492995500565,
+      "learning_rate": 4.3644067796610175e-05,
+      "loss": 0.0573,
+      "step": 4500
+    },
+    {
+      "epoch": 2.598870056497175,
+      "grad_norm": 0.26911139488220215,
+      "learning_rate": 4.350282485875706e-05,
+      "loss": 0.0732,
+      "step": 4600
+    },
+    {
+      "epoch": 2.655367231638418,
+      "grad_norm": 0.045297879725694656,
+      "learning_rate": 4.3361581920903954e-05,
+      "loss": 0.0645,
+      "step": 4700
+    },
+    {
+      "epoch": 2.711864406779661,
+      "grad_norm": 0.24285119771957397,
+      "learning_rate": 4.3220338983050854e-05,
+      "loss": 0.0852,
+      "step": 4800
+    },
+    {
+      "epoch": 2.768361581920904,
+      "grad_norm": 70.39765930175781,
+      "learning_rate": 4.307909604519774e-05,
+      "loss": 0.0763,
+      "step": 4900
+    },
+    {
+      "epoch": 2.824858757062147,
+      "grad_norm": 3.1160919666290283,
+      "learning_rate": 4.2937853107344634e-05,
+      "loss": 0.0618,
+      "step": 5000
+    },
+    {
+      "epoch": 2.8813559322033897,
+      "grad_norm": 0.14466217160224915,
+      "learning_rate": 4.279661016949153e-05,
+      "loss": 0.0628,
+      "step": 5100
+    },
+    {
+      "epoch": 2.937853107344633,
+      "grad_norm": 11.605415344238281,
+      "learning_rate": 4.265536723163842e-05,
+      "loss": 0.0539,
+      "step": 5200
+    },
+    {
+      "epoch": 2.994350282485876,
+      "grad_norm": 0.03261380270123482,
+      "learning_rate": 4.251412429378531e-05,
+      "loss": 0.0349,
+      "step": 5300
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9574888779041029,
+      "eval_f1": 0.9573208648613947,
+      "eval_loss": 0.20843710005283356,
+      "eval_runtime": 0.9688,
+      "eval_samples_per_second": 4176.286,
+      "eval_steps_per_second": 66.061,
+      "step": 5310
+    },
+    {
+      "epoch": 3.0508474576271185,
+      "grad_norm": 0.007287267595529556,
+      "learning_rate": 4.2372881355932206e-05,
+      "loss": 0.0575,
+      "step": 5400
+    },
+    {
+      "epoch": 3.1073446327683616,
+      "grad_norm": 19.718822479248047,
+      "learning_rate": 4.22316384180791e-05,
+      "loss": 0.0423,
+      "step": 5500
+    },
+    {
+      "epoch": 3.1638418079096047,
+      "grad_norm": 78.35333251953125,
+      "learning_rate": 4.209039548022599e-05,
+      "loss": 0.0349,
+      "step": 5600
+    },
+    {
+      "epoch": 3.2203389830508473,
+      "grad_norm": 0.025877630338072777,
+      "learning_rate": 4.1949152542372886e-05,
+      "loss": 0.0632,
+      "step": 5700
+    },
+    {
+      "epoch": 3.2768361581920904,
+      "grad_norm": 14.864492416381836,
+      "learning_rate": 4.180790960451977e-05,
+      "loss": 0.0651,
+      "step": 5800
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.002800008049234748,
+      "learning_rate": 4.166666666666667e-05,
+      "loss": 0.0225,
+      "step": 5900
+    },
+    {
+      "epoch": 3.389830508474576,
+      "grad_norm": 0.10183978080749512,
+      "learning_rate": 4.152542372881356e-05,
+      "loss": 0.0467,
+      "step": 6000
+    },
+    {
+      "epoch": 3.446327683615819,
+      "grad_norm": 30.69606590270996,
+      "learning_rate": 4.138418079096045e-05,
+      "loss": 0.0509,
+      "step": 6100
+    },
+    {
+      "epoch": 3.5028248587570623,
+      "grad_norm": 0.43340635299682617,
+      "learning_rate": 4.1242937853107345e-05,
+      "loss": 0.0366,
+      "step": 6200
+    },
+    {
+      "epoch": 3.559322033898305,
+      "grad_norm": 3.5696895122528076,
+      "learning_rate": 4.110169491525424e-05,
+      "loss": 0.0576,
+      "step": 6300
+    },
+    {
+      "epoch": 3.615819209039548,
+      "grad_norm": 3.981534481048584,
+      "learning_rate": 4.096045197740113e-05,
+      "loss": 0.0574,
+      "step": 6400
+    },
+    {
+      "epoch": 3.672316384180791,
+      "grad_norm": 0.020425381138920784,
+      "learning_rate": 4.0819209039548024e-05,
+      "loss": 0.0582,
+      "step": 6500
+    },
+    {
+      "epoch": 3.7288135593220337,
+      "grad_norm": 1.6137280464172363,
+      "learning_rate": 4.067796610169492e-05,
+      "loss": 0.037,
+      "step": 6600
+    },
+    {
+      "epoch": 3.785310734463277,
+      "grad_norm": 0.41225990653038025,
+      "learning_rate": 4.053672316384181e-05,
+      "loss": 0.039,
+      "step": 6700
+    },
+    {
+      "epoch": 3.84180790960452,
+      "grad_norm": 0.02651926688849926,
+      "learning_rate": 4.0395480225988704e-05,
+      "loss": 0.052,
+      "step": 6800
+    },
+    {
+      "epoch": 3.898305084745763,
+      "grad_norm": 4.174577713012695,
+      "learning_rate": 4.025423728813559e-05,
+      "loss": 0.0746,
+      "step": 6900
+    },
+    {
+      "epoch": 3.9548022598870056,
+      "grad_norm": 0.06549729406833649,
+      "learning_rate": 4.011299435028249e-05,
+      "loss": 0.0682,
+      "step": 7000
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9619377162629758,
+      "eval_f1": 0.961922353652765,
+      "eval_loss": 0.19074885547161102,
+      "eval_runtime": 0.9577,
+      "eval_samples_per_second": 4224.832,
+      "eval_steps_per_second": 66.829,
+      "step": 7080
+    },
+    {
+      "epoch": 4.011299435028248,
+      "grad_norm": 0.09852942079305649,
+      "learning_rate": 3.997175141242938e-05,
+      "loss": 0.0573,
+      "step": 7100
+    },
+    {
+      "epoch": 4.067796610169491,
+      "grad_norm": 0.010253222659230232,
+      "learning_rate": 3.983050847457627e-05,
+      "loss": 0.0268,
+      "step": 7200
+    },
+    {
+      "epoch": 4.124293785310734,
+      "grad_norm": 0.05561167746782303,
+      "learning_rate": 3.968926553672316e-05,
+      "loss": 0.041,
+      "step": 7300
+    },
+    {
+      "epoch": 4.1807909604519775,
+      "grad_norm": 0.020777329802513123,
+      "learning_rate": 3.954802259887006e-05,
+      "loss": 0.0428,
+      "step": 7400
+    },
+    {
+      "epoch": 4.237288135593221,
+      "grad_norm": 0.011439072899520397,
+      "learning_rate": 3.940677966101695e-05,
+      "loss": 0.0281,
+      "step": 7500
+    },
+    {
+      "epoch": 4.293785310734464,
+      "grad_norm": 0.29063406586647034,
+      "learning_rate": 3.926553672316384e-05,
+      "loss": 0.0347,
+      "step": 7600
+    },
+    {
+      "epoch": 4.350282485875706,
+      "grad_norm": 0.008078676648437977,
+      "learning_rate": 3.9124293785310735e-05,
+      "loss": 0.0427,
+      "step": 7700
+    },
+    {
+      "epoch": 4.406779661016949,
+      "grad_norm": 0.0378178134560585,
+      "learning_rate": 3.898305084745763e-05,
+      "loss": 0.0448,
+      "step": 7800
+    },
+    {
+      "epoch": 4.463276836158192,
+      "grad_norm": 0.09035930037498474,
+      "learning_rate": 3.884180790960452e-05,
+      "loss": 0.0328,
+      "step": 7900
+    },
+    {
+      "epoch": 4.519774011299435,
+      "grad_norm": 0.05402543023228645,
+      "learning_rate": 3.8700564971751415e-05,
+      "loss": 0.0345,
+      "step": 8000
+    },
+    {
+      "epoch": 4.576271186440678,
+      "grad_norm": 0.01713019795715809,
+      "learning_rate": 3.855932203389831e-05,
+      "loss": 0.0358,
+      "step": 8100
+    },
+    {
+      "epoch": 4.632768361581921,
+      "grad_norm": 0.0475781112909317,
+      "learning_rate": 3.84180790960452e-05,
+      "loss": 0.0532,
+      "step": 8200
+    },
+    {
+      "epoch": 4.6892655367231635,
+      "grad_norm": 0.006405588239431381,
+      "learning_rate": 3.8276836158192094e-05,
+      "loss": 0.0324,
+      "step": 8300
+    },
+    {
+      "epoch": 4.745762711864407,
+      "grad_norm": 4.002650260925293,
+      "learning_rate": 3.813559322033898e-05,
+      "loss": 0.0526,
+      "step": 8400
+    },
+    {
+      "epoch": 4.80225988700565,
+      "grad_norm": 0.010295218788087368,
+      "learning_rate": 3.799435028248588e-05,
+      "loss": 0.0428,
+      "step": 8500
+    },
+    {
+      "epoch": 4.858757062146893,
+      "grad_norm": 25.15464973449707,
+      "learning_rate": 3.7853107344632774e-05,
+      "loss": 0.0513,
+      "step": 8600
+    },
+    {
+      "epoch": 4.915254237288136,
+      "grad_norm": 0.018476568162441254,
+      "learning_rate": 3.771186440677966e-05,
+      "loss": 0.0627,
+      "step": 8700
+    },
+    {
+      "epoch": 4.971751412429379,
+      "grad_norm": 0.02234013006091118,
+      "learning_rate": 3.7570621468926554e-05,
+      "loss": 0.0436,
+      "step": 8800
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.9594661393969353,
+      "eval_f1": 0.9595618588245926,
+      "eval_loss": 0.24679133296012878,
+      "eval_runtime": 0.9625,
+      "eval_samples_per_second": 4203.719,
+      "eval_steps_per_second": 66.495,
+      "step": 8850
+    },
+    {
+      "epoch": 5.028248587570621,
+      "grad_norm": 0.007325501646846533,
+      "learning_rate": 3.7429378531073453e-05,
+      "loss": 0.011,
+      "step": 8900
+    },
+    {
+      "epoch": 5.084745762711864,
+      "grad_norm": 0.015004786662757397,
+      "learning_rate": 3.728813559322034e-05,
+      "loss": 0.0239,
+      "step": 9000
+    },
+    {
+      "epoch": 5.141242937853107,
+      "grad_norm": 0.02809782139956951,
+      "learning_rate": 3.714689265536723e-05,
+      "loss": 0.0468,
+      "step": 9100
+    },
+    {
+      "epoch": 5.19774011299435,
+      "grad_norm": 0.061971381306648254,
+      "learning_rate": 3.7005649717514126e-05,
+      "loss": 0.0369,
+      "step": 9200
+    },
+    {
+      "epoch": 5.254237288135593,
+      "grad_norm": 0.028554769232869148,
+      "learning_rate": 3.686440677966102e-05,
+      "loss": 0.0254,
+      "step": 9300
+    },
+    {
+      "epoch": 5.3107344632768365,
+      "grad_norm": 0.049820106476545334,
+      "learning_rate": 3.672316384180791e-05,
+      "loss": 0.0371,
+      "step": 9400
+    },
+    {
+      "epoch": 5.367231638418079,
+      "grad_norm": 0.016609592363238335,
+      "learning_rate": 3.6581920903954806e-05,
+      "loss": 0.0184,
+      "step": 9500
+    },
+    {
+      "epoch": 5.423728813559322,
+      "grad_norm": 0.05181876942515373,
+      "learning_rate": 3.644067796610169e-05,
+      "loss": 0.0414,
+      "step": 9600
+    },
+    {
+      "epoch": 5.480225988700565,
+      "grad_norm": 0.05821879953145981,
+      "learning_rate": 3.629943502824859e-05,
+      "loss": 0.0308,
+      "step": 9700
+    },
+    {
+      "epoch": 5.536723163841808,
+      "grad_norm": 0.010366985574364662,
+      "learning_rate": 3.6158192090395485e-05,
+      "loss": 0.0278,
+      "step": 9800
+    },
+    {
+      "epoch": 5.593220338983051,
+      "grad_norm": 0.019191740080714226,
+      "learning_rate": 3.601694915254237e-05,
+      "loss": 0.0435,
+      "step": 9900
+    },
+    {
+      "epoch": 5.649717514124294,
+      "grad_norm": 0.06532129645347595,
+      "learning_rate": 3.587570621468927e-05,
+      "loss": 0.0237,
+      "step": 10000
+    },
+    {
+      "epoch": 5.706214689265536,
+      "grad_norm": 0.009392981417477131,
+      "learning_rate": 3.573446327683616e-05,
+      "loss": 0.0334,
+      "step": 10100
+    },
+    {
+      "epoch": 5.762711864406779,
+      "grad_norm": 0.023171979933977127,
+      "learning_rate": 3.559322033898305e-05,
+      "loss": 0.0487,
+      "step": 10200
+    },
+    {
+      "epoch": 5.8192090395480225,
+      "grad_norm": 0.055124878883361816,
+      "learning_rate": 3.5451977401129944e-05,
+      "loss": 0.0412,
+      "step": 10300
+    },
+    {
+      "epoch": 5.875706214689266,
+      "grad_norm": 0.015424055978655815,
+      "learning_rate": 3.531073446327684e-05,
+      "loss": 0.0292,
+      "step": 10400
+    },
+    {
+      "epoch": 5.932203389830509,
+      "grad_norm": 0.6497403979301453,
+      "learning_rate": 3.516949152542373e-05,
+      "loss": 0.039,
+      "step": 10500
+    },
+    {
+      "epoch": 5.988700564971752,
+      "grad_norm": 18.98410415649414,
+      "learning_rate": 3.5028248587570624e-05,
+      "loss": 0.0322,
+      "step": 10600
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.9614434008897677,
+      "eval_f1": 0.9615565973130906,
+      "eval_loss": 0.2411661297082901,
+      "eval_runtime": 0.9629,
+      "eval_samples_per_second": 4201.762,
+      "eval_steps_per_second": 66.464,
+      "step": 10620
+    },
+    {
+      "epoch": 6.045197740112994,
+      "grad_norm": 0.013564531691372395,
+      "learning_rate": 3.488700564971752e-05,
+      "loss": 0.0254,
+      "step": 10700
+    },
+    {
+      "epoch": 6.101694915254237,
+      "grad_norm": 33.3035888671875,
+      "learning_rate": 3.474576271186441e-05,
+      "loss": 0.0295,
+      "step": 10800
+    },
+    {
+      "epoch": 6.15819209039548,
+      "grad_norm": 0.12126260250806808,
+      "learning_rate": 3.46045197740113e-05,
+      "loss": 0.0275,
+      "step": 10900
+    },
+    {
+      "epoch": 6.214689265536723,
+      "grad_norm": 0.03739802539348602,
+      "learning_rate": 3.446327683615819e-05,
+      "loss": 0.0267,
+      "step": 11000
+    },
+    {
+      "epoch": 6.271186440677966,
+      "grad_norm": 0.03359340503811836,
+      "learning_rate": 3.432203389830508e-05,
+      "loss": 0.0389,
+      "step": 11100
+    },
+    {
+      "epoch": 6.327683615819209,
+      "grad_norm": 0.003635927801951766,
+      "learning_rate": 3.418079096045198e-05,
+      "loss": 0.0255,
+      "step": 11200
+    },
+    {
+      "epoch": 6.3841807909604515,
+      "grad_norm": 0.06124364957213402,
+      "learning_rate": 3.403954802259887e-05,
+      "loss": 0.0229,
+      "step": 11300
+    },
+    {
+      "epoch": 6.440677966101695,
+      "grad_norm": 0.026170525699853897,
+      "learning_rate": 3.389830508474576e-05,
+      "loss": 0.0319,
+      "step": 11400
+    },
+    {
+      "epoch": 6.497175141242938,
+      "grad_norm": 0.013875061646103859,
+      "learning_rate": 3.375706214689266e-05,
+      "loss": 0.0276,
+      "step": 11500
+    },
+    {
+      "epoch": 6.553672316384181,
+      "grad_norm": 0.009600764140486717,
+      "learning_rate": 3.361581920903955e-05,
+      "loss": 0.0182,
+      "step": 11600
+    },
+    {
+      "epoch": 6.610169491525424,
+      "grad_norm": 0.02147483266890049,
+      "learning_rate": 3.347457627118644e-05,
+      "loss": 0.0277,
+      "step": 11700
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.007301884237676859,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.0268,
+      "step": 11800
+    },
+    {
+      "epoch": 6.72316384180791,
+      "grad_norm": 0.008684027940034866,
+      "learning_rate": 3.319209039548023e-05,
+      "loss": 0.0244,
+      "step": 11900
+    },
+    {
+      "epoch": 6.779661016949152,
+      "grad_norm": 0.0058201453648507595,
+      "learning_rate": 3.305084745762712e-05,
+      "loss": 0.018,
+      "step": 12000
+    },
+    {
+      "epoch": 6.836158192090395,
+      "grad_norm": 0.015645477920770645,
+      "learning_rate": 3.2909604519774014e-05,
+      "loss": 0.0389,
+      "step": 12100
+    },
+    {
+      "epoch": 6.892655367231638,
+      "grad_norm": 0.013589623384177685,
+      "learning_rate": 3.27683615819209e-05,
+      "loss": 0.0271,
+      "step": 12200
+    },
+    {
+      "epoch": 6.9491525423728815,
+      "grad_norm": 0.004052096512168646,
+      "learning_rate": 3.26271186440678e-05,
+      "loss": 0.012,
+      "step": 12300
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.963173504695996,
+      "eval_f1": 0.9631919351432553,
+      "eval_loss": 0.22085699439048767,
+      "eval_runtime": 0.9623,
+      "eval_samples_per_second": 4204.299,
+      "eval_steps_per_second": 66.504,
+      "step": 12390
+    },
+    {
+      "epoch": 7.005649717514125,
+      "grad_norm": 0.06705684214830399,
+      "learning_rate": 3.2485875706214694e-05,
+      "loss": 0.0309,
+      "step": 12400
+    },
+    {
+      "epoch": 7.062146892655368,
+      "grad_norm": 0.006240461952984333,
+      "learning_rate": 3.234463276836158e-05,
+      "loss": 0.0084,
+      "step": 12500
+    },
+    {
+      "epoch": 7.11864406779661,
+      "grad_norm": 0.020344626158475876,
+      "learning_rate": 3.2203389830508473e-05,
+      "loss": 0.0168,
+      "step": 12600
+    },
+    {
+      "epoch": 7.175141242937853,
+      "grad_norm": 0.003926662262529135,
+      "learning_rate": 3.2062146892655373e-05,
+      "loss": 0.022,
+      "step": 12700
+    },
+    {
+      "epoch": 7.231638418079096,
+      "grad_norm": 0.0025492089334875345,
+      "learning_rate": 3.192090395480226e-05,
+      "loss": 0.0133,
+      "step": 12800
+    },
+    {
+      "epoch": 7.288135593220339,
+      "grad_norm": 0.005623087752610445,
+      "learning_rate": 3.177966101694915e-05,
+      "loss": 0.0164,
+      "step": 12900
+    },
+    {
+      "epoch": 7.344632768361582,
+      "grad_norm": 0.0032459620852023363,
+      "learning_rate": 3.1638418079096046e-05,
+      "loss": 0.0272,
+      "step": 13000
+    },
+    {
+      "epoch": 7.401129943502825,
+      "grad_norm": 1.1293178796768188,
+      "learning_rate": 3.149717514124294e-05,
+      "loss": 0.0148,
+      "step": 13100
+    },
+    {
+      "epoch": 7.4576271186440675,
+      "grad_norm": 0.0017996145179495215,
+      "learning_rate": 3.135593220338983e-05,
+      "loss": 0.0132,
+      "step": 13200
+    },
+    {
+      "epoch": 7.5141242937853105,
+      "grad_norm": 0.008758709765970707,
+      "learning_rate": 3.1214689265536726e-05,
+      "loss": 0.0152,
+      "step": 13300
+    },
+    {
+      "epoch": 7.570621468926554,
+      "grad_norm": 0.0038798090536147356,
+      "learning_rate": 3.107344632768362e-05,
+      "loss": 0.0106,
+      "step": 13400
+    },
+    {
+      "epoch": 7.627118644067797,
+      "grad_norm": 0.005076746456325054,
+      "learning_rate": 3.093220338983051e-05,
+      "loss": 0.0158,
+      "step": 13500
+    },
+    {
+      "epoch": 7.68361581920904,
+      "grad_norm": 0.003670661011710763,
+      "learning_rate": 3.0790960451977405e-05,
+      "loss": 0.0093,
+      "step": 13600
+    },
+    {
+      "epoch": 7.740112994350282,
+      "grad_norm": 0.003522429848089814,
+      "learning_rate": 3.064971751412429e-05,
+      "loss": 0.0183,
+      "step": 13700
+    },
+    {
+      "epoch": 7.796610169491525,
+      "grad_norm": 0.06700780242681503,
+      "learning_rate": 3.050847457627119e-05,
+      "loss": 0.0398,
+      "step": 13800
+    },
+    {
+      "epoch": 7.853107344632768,
+      "grad_norm": 0.01462018396705389,
+      "learning_rate": 3.036723163841808e-05,
+      "loss": 0.0286,
+      "step": 13900
+    },
+    {
+      "epoch": 7.909604519774011,
+      "grad_norm": 0.025290269404649734,
+      "learning_rate": 3.022598870056497e-05,
+      "loss": 0.037,
+      "step": 14000
+    },
+    {
+      "epoch": 7.966101694915254,
+      "grad_norm": 0.011192042380571365,
+      "learning_rate": 3.0084745762711864e-05,
+      "loss": 0.0147,
+      "step": 14100
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.9636678200692042,
+      "eval_f1": 0.9635820582698403,
+      "eval_loss": 0.24865780770778656,
+      "eval_runtime": 0.9484,
+      "eval_samples_per_second": 4266.086,
+      "eval_steps_per_second": 67.481,
+      "step": 14160
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 35400,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1172336478105600.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a911aa07d9ca1f76618367adcad9ebf5a951be0ecd3ebd50dbcf89c173eadc77
+size 5304

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff