Upload folder using huggingface_hub
Browse files- checkpoint-5628/config.json +35 -0
- checkpoint-5628/model.safetensors +3 -0
- checkpoint-5628/optimizer.pt +3 -0
- checkpoint-5628/rng_state.pth +3 -0
- checkpoint-5628/scheduler.pt +3 -0
- checkpoint-5628/trainer_state.json +108 -0
- checkpoint-5628/training_args.bin +3 -0
- checkpoint-7504/config.json +35 -0
- checkpoint-7504/model.safetensors +3 -0
- checkpoint-7504/optimizer.pt +3 -0
- checkpoint-7504/rng_state.pth +3 -0
- checkpoint-7504/scheduler.pt +3 -0
- checkpoint-7504/trainer_state.json +130 -0
- checkpoint-7504/training_args.bin +3 -0
- checkpoint-9380/config.json +35 -0
- checkpoint-9380/model.safetensors +3 -0
- checkpoint-9380/optimizer.pt +3 -0
- checkpoint-9380/rng_state.pth +3 -0
- checkpoint-9380/scheduler.pt +3 -0
- checkpoint-9380/trainer_state.json +152 -0
- checkpoint-9380/training_args.bin +3 -0
- config.json +35 -0
- events.out.tfevents.1740349787.d7086afb14ac.759.3 +3 -0
- events.out.tfevents.1740350033.d7086afb14ac.759.4 +3 -0
- events.out.tfevents.1740350707.d7086afb14ac.759.5 +3 -0
- model.safetensors +3 -0
- runs/Feb22_13-02-01_c4da036077fd/events.out.tfevents.1740229322.c4da036077fd.6169.0 +3 -0
- runs/Feb22_13-05-37_c4da036077fd/events.out.tfevents.1740229538.c4da036077fd.6169.1 +3 -0
- runs/Feb22_13-06-15_c4da036077fd/events.out.tfevents.1740229576.c4da036077fd.6169.2 +3 -0
- runs/Feb22_13-07-10_c4da036077fd/events.out.tfevents.1740229631.c4da036077fd.6169.3 +3 -0
- runs/Feb22_13-08-22_c4da036077fd/events.out.tfevents.1740229703.c4da036077fd.6169.4 +3 -0
- runs/Feb22_13-10-35_c4da036077fd/events.out.tfevents.1740229837.c4da036077fd.6169.5 +3 -0
- runs/Feb22_13-11-22_c4da036077fd/events.out.tfevents.1740229884.c4da036077fd.6169.6 +3 -0
- runs/Feb22_13-12-47_c4da036077fd/events.out.tfevents.1740229969.c4da036077fd.6169.7 +3 -0
- runs/Feb22_13-16-26_c4da036077fd/events.out.tfevents.1740230187.c4da036077fd.6169.8 +3 -0
- runs/Feb22_13-30-31_c4da036077fd/events.out.tfevents.1740231033.c4da036077fd.9367.0 +3 -0
- runs/Feb22_13-34-19_c4da036077fd/events.out.tfevents.1740231261.c4da036077fd.9367.1 +3 -0
- runs/Feb22_13-35-13_c4da036077fd/events.out.tfevents.1740231315.c4da036077fd.9367.2 +3 -0
- runs/Feb22_13-40-16_c4da036077fd/events.out.tfevents.1740231618.c4da036077fd.9367.3 +3 -0
- runs/Feb22_13-40-16_c4da036077fd/events.out.tfevents.1740232282.c4da036077fd.9367.4 +3 -0
- training_args.bin +3 -0
checkpoint-5628/config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert/distilbert-base-multilingual-cased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0",
|
13 |
+
"1": "LABEL_1",
|
14 |
+
"2": "LABEL_2"
|
15 |
+
},
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"label2id": {
|
18 |
+
"LABEL_0": 0,
|
19 |
+
"LABEL_1": 1,
|
20 |
+
"LABEL_2": 2
|
21 |
+
},
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "distilbert",
|
24 |
+
"n_heads": 12,
|
25 |
+
"n_layers": 6,
|
26 |
+
"output_past": true,
|
27 |
+
"pad_token_id": 0,
|
28 |
+
"qa_dropout": 0.1,
|
29 |
+
"seq_classif_dropout": 0.2,
|
30 |
+
"sinusoidal_pos_embds": false,
|
31 |
+
"tie_weights_": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.49.0",
|
34 |
+
"vocab_size": 119547
|
35 |
+
}
|
checkpoint-5628/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0963cd6fac43ba582b0e2aec7c473eba8ecb1bec1ade10985cb3181333c3ea49
|
3 |
+
size 541320452
|
checkpoint-5628/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a2e3f7c7acfbaa29be8fa176655032f13e4a2cc4c3410221eb39c83053e9f47
|
3 |
+
size 1082703034
|
checkpoint-5628/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32d5b2ab33ba699de67499a86896add23c725c5b337505deccb47263b5728e71
|
3 |
+
size 14244
|
checkpoint-5628/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f8189f8adcf760ed2f809786703eec9f3cb051b1d3fce373553e9e709dcfc97
|
3 |
+
size 1064
|
checkpoint-5628/trainer_state.json
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.5992706992706992,
|
3 |
+
"best_model_checkpoint": "model/checkpoint-3752",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"eval_steps": 50,
|
6 |
+
"global_step": 5628,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"grad_norm": 0.232726588845253,
|
14 |
+
"learning_rate": 9.084300622945162e-06,
|
15 |
+
"loss": 0.0817,
|
16 |
+
"step": 1876
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.8464818763326226,
|
21 |
+
"eval_f1_macro": 0.4624149659863946,
|
22 |
+
"eval_f1_weighted": 0.8011493190026544,
|
23 |
+
"eval_loss": 0.06743289530277252,
|
24 |
+
"eval_precision_macro": 0.7010764171889492,
|
25 |
+
"eval_precision_weighted": 0.8229165574281427,
|
26 |
+
"eval_recall_macro": 0.44028979481565683,
|
27 |
+
"eval_recall_weighted": 0.8464818763326226,
|
28 |
+
"eval_runtime": 5.0821,
|
29 |
+
"eval_samples_per_second": 184.568,
|
30 |
+
"eval_steps_per_second": 46.24,
|
31 |
+
"step": 1876
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 2.0,
|
35 |
+
"grad_norm": 0.18798664212226868,
|
36 |
+
"learning_rate": 6.593041210714562e-06,
|
37 |
+
"loss": 0.0588,
|
38 |
+
"step": 3752
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 2.0,
|
42 |
+
"eval_accuracy": 0.8251599147121536,
|
43 |
+
"eval_f1_macro": 0.5992706992706992,
|
44 |
+
"eval_f1_weighted": 0.829400158611246,
|
45 |
+
"eval_loss": 0.06970743834972382,
|
46 |
+
"eval_precision_macro": 0.6140893452063056,
|
47 |
+
"eval_precision_weighted": 0.8365234937694226,
|
48 |
+
"eval_recall_macro": 0.5961441974373008,
|
49 |
+
"eval_recall_weighted": 0.8251599147121536,
|
50 |
+
"eval_runtime": 5.0788,
|
51 |
+
"eval_samples_per_second": 184.689,
|
52 |
+
"eval_steps_per_second": 46.271,
|
53 |
+
"step": 3752
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 3.0,
|
57 |
+
"grad_norm": 0.03722580894827843,
|
58 |
+
"learning_rate": 3.486973746877771e-06,
|
59 |
+
"loss": 0.0395,
|
60 |
+
"step": 5628
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"epoch": 3.0,
|
64 |
+
"eval_accuracy": 0.849680170575693,
|
65 |
+
"eval_f1_macro": 0.5773129404708353,
|
66 |
+
"eval_f1_weighted": 0.8345452465837384,
|
67 |
+
"eval_loss": 0.08055932819843292,
|
68 |
+
"eval_precision_macro": 0.6424363946177801,
|
69 |
+
"eval_precision_weighted": 0.8274003556291218,
|
70 |
+
"eval_recall_macro": 0.5397910877436739,
|
71 |
+
"eval_recall_weighted": 0.849680170575693,
|
72 |
+
"eval_runtime": 5.0482,
|
73 |
+
"eval_samples_per_second": 185.807,
|
74 |
+
"eval_steps_per_second": 46.551,
|
75 |
+
"step": 5628
|
76 |
+
}
|
77 |
+
],
|
78 |
+
"logging_steps": 50,
|
79 |
+
"max_steps": 9380,
|
80 |
+
"num_input_tokens_seen": 0,
|
81 |
+
"num_train_epochs": 5,
|
82 |
+
"save_steps": 50,
|
83 |
+
"stateful_callbacks": {
|
84 |
+
"EarlyStoppingCallback": {
|
85 |
+
"args": {
|
86 |
+
"early_stopping_patience": 2,
|
87 |
+
"early_stopping_threshold": 0.0
|
88 |
+
},
|
89 |
+
"attributes": {
|
90 |
+
"early_stopping_patience_counter": 1
|
91 |
+
}
|
92 |
+
},
|
93 |
+
"TrainerControl": {
|
94 |
+
"args": {
|
95 |
+
"should_epoch_stop": false,
|
96 |
+
"should_evaluate": false,
|
97 |
+
"should_log": false,
|
98 |
+
"should_save": true,
|
99 |
+
"should_training_stop": false
|
100 |
+
},
|
101 |
+
"attributes": {}
|
102 |
+
}
|
103 |
+
},
|
104 |
+
"total_flos": 1490682220803072.0,
|
105 |
+
"train_batch_size": 4,
|
106 |
+
"trial_name": null,
|
107 |
+
"trial_params": null
|
108 |
+
}
|
checkpoint-5628/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:715c96820c45effa2b6e5bb29121868a8187d8c31c98bf6d56350dbbfcd41a1b
|
3 |
+
size 5240
|
checkpoint-7504/config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert/distilbert-base-multilingual-cased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0",
|
13 |
+
"1": "LABEL_1",
|
14 |
+
"2": "LABEL_2"
|
15 |
+
},
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"label2id": {
|
18 |
+
"LABEL_0": 0,
|
19 |
+
"LABEL_1": 1,
|
20 |
+
"LABEL_2": 2
|
21 |
+
},
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "distilbert",
|
24 |
+
"n_heads": 12,
|
25 |
+
"n_layers": 6,
|
26 |
+
"output_past": true,
|
27 |
+
"pad_token_id": 0,
|
28 |
+
"qa_dropout": 0.1,
|
29 |
+
"seq_classif_dropout": 0.2,
|
30 |
+
"sinusoidal_pos_embds": false,
|
31 |
+
"tie_weights_": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.49.0",
|
34 |
+
"vocab_size": 119547
|
35 |
+
}
|
checkpoint-7504/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fa0daa19a98b896474d8829711aba4f1fd1851b1c4255f4ef69dd5d3eb102c1
|
3 |
+
size 541320452
|
checkpoint-7504/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5321247d7246325e84a33fc274ccf1b4051888bcaafe2e00cbce2509ba659500
|
3 |
+
size 1082703034
|
checkpoint-7504/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1994c0acf7b2b0a8e2ec7ec0412f7e58dba29f484c5d6adbbf73047d792f9baa
|
3 |
+
size 14244
|
checkpoint-7504/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:232e1e07cc83243f2cc6b60588afdfb89921234cd56328a8f6a1396fb5048933
|
3 |
+
size 1064
|
checkpoint-7504/trainer_state.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.6007231820008035,
|
3 |
+
"best_model_checkpoint": "model/checkpoint-7504",
|
4 |
+
"epoch": 4.0,
|
5 |
+
"eval_steps": 50,
|
6 |
+
"global_step": 7504,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"grad_norm": 0.232726588845253,
|
14 |
+
"learning_rate": 9.084300622945162e-06,
|
15 |
+
"loss": 0.0817,
|
16 |
+
"step": 1876
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.8464818763326226,
|
21 |
+
"eval_f1_macro": 0.4624149659863946,
|
22 |
+
"eval_f1_weighted": 0.8011493190026544,
|
23 |
+
"eval_loss": 0.06743289530277252,
|
24 |
+
"eval_precision_macro": 0.7010764171889492,
|
25 |
+
"eval_precision_weighted": 0.8229165574281427,
|
26 |
+
"eval_recall_macro": 0.44028979481565683,
|
27 |
+
"eval_recall_weighted": 0.8464818763326226,
|
28 |
+
"eval_runtime": 5.0821,
|
29 |
+
"eval_samples_per_second": 184.568,
|
30 |
+
"eval_steps_per_second": 46.24,
|
31 |
+
"step": 1876
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 2.0,
|
35 |
+
"grad_norm": 0.18798664212226868,
|
36 |
+
"learning_rate": 6.593041210714562e-06,
|
37 |
+
"loss": 0.0588,
|
38 |
+
"step": 3752
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 2.0,
|
42 |
+
"eval_accuracy": 0.8251599147121536,
|
43 |
+
"eval_f1_macro": 0.5992706992706992,
|
44 |
+
"eval_f1_weighted": 0.829400158611246,
|
45 |
+
"eval_loss": 0.06970743834972382,
|
46 |
+
"eval_precision_macro": 0.6140893452063056,
|
47 |
+
"eval_precision_weighted": 0.8365234937694226,
|
48 |
+
"eval_recall_macro": 0.5961441974373008,
|
49 |
+
"eval_recall_weighted": 0.8251599147121536,
|
50 |
+
"eval_runtime": 5.0788,
|
51 |
+
"eval_samples_per_second": 184.689,
|
52 |
+
"eval_steps_per_second": 46.271,
|
53 |
+
"step": 3752
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 3.0,
|
57 |
+
"grad_norm": 0.03722580894827843,
|
58 |
+
"learning_rate": 3.486973746877771e-06,
|
59 |
+
"loss": 0.0395,
|
60 |
+
"step": 5628
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"epoch": 3.0,
|
64 |
+
"eval_accuracy": 0.849680170575693,
|
65 |
+
"eval_f1_macro": 0.5773129404708353,
|
66 |
+
"eval_f1_weighted": 0.8345452465837384,
|
67 |
+
"eval_loss": 0.08055932819843292,
|
68 |
+
"eval_precision_macro": 0.6424363946177801,
|
69 |
+
"eval_precision_weighted": 0.8274003556291218,
|
70 |
+
"eval_recall_macro": 0.5397910877436739,
|
71 |
+
"eval_recall_weighted": 0.849680170575693,
|
72 |
+
"eval_runtime": 5.0482,
|
73 |
+
"eval_samples_per_second": 185.807,
|
74 |
+
"eval_steps_per_second": 46.551,
|
75 |
+
"step": 5628
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"epoch": 4.0,
|
79 |
+
"grad_norm": 0.22263826429843903,
|
80 |
+
"learning_rate": 9.648338779170968e-07,
|
81 |
+
"loss": 0.0239,
|
82 |
+
"step": 7504
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"epoch": 4.0,
|
86 |
+
"eval_accuracy": 0.8336886993603412,
|
87 |
+
"eval_f1_macro": 0.6007231820008035,
|
88 |
+
"eval_f1_weighted": 0.8327353646735044,
|
89 |
+
"eval_loss": 0.0956118032336235,
|
90 |
+
"eval_precision_macro": 0.6171054012879108,
|
91 |
+
"eval_precision_weighted": 0.8329684712250969,
|
92 |
+
"eval_recall_macro": 0.5898380302115934,
|
93 |
+
"eval_recall_weighted": 0.8336886993603412,
|
94 |
+
"eval_runtime": 5.0679,
|
95 |
+
"eval_samples_per_second": 185.087,
|
96 |
+
"eval_steps_per_second": 46.371,
|
97 |
+
"step": 7504
|
98 |
+
}
|
99 |
+
],
|
100 |
+
"logging_steps": 50,
|
101 |
+
"max_steps": 9380,
|
102 |
+
"num_input_tokens_seen": 0,
|
103 |
+
"num_train_epochs": 5,
|
104 |
+
"save_steps": 50,
|
105 |
+
"stateful_callbacks": {
|
106 |
+
"EarlyStoppingCallback": {
|
107 |
+
"args": {
|
108 |
+
"early_stopping_patience": 2,
|
109 |
+
"early_stopping_threshold": 0.0
|
110 |
+
},
|
111 |
+
"attributes": {
|
112 |
+
"early_stopping_patience_counter": 0
|
113 |
+
}
|
114 |
+
},
|
115 |
+
"TrainerControl": {
|
116 |
+
"args": {
|
117 |
+
"should_epoch_stop": false,
|
118 |
+
"should_evaluate": false,
|
119 |
+
"should_log": false,
|
120 |
+
"should_save": true,
|
121 |
+
"should_training_stop": false
|
122 |
+
},
|
123 |
+
"attributes": {}
|
124 |
+
}
|
125 |
+
},
|
126 |
+
"total_flos": 1987576294404096.0,
|
127 |
+
"train_batch_size": 4,
|
128 |
+
"trial_name": null,
|
129 |
+
"trial_params": null
|
130 |
+
}
|
checkpoint-7504/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:715c96820c45effa2b6e5bb29121868a8187d8c31c98bf6d56350dbbfcd41a1b
|
3 |
+
size 5240
|
checkpoint-9380/config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert/distilbert-base-multilingual-cased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0",
|
13 |
+
"1": "LABEL_1",
|
14 |
+
"2": "LABEL_2"
|
15 |
+
},
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"label2id": {
|
18 |
+
"LABEL_0": 0,
|
19 |
+
"LABEL_1": 1,
|
20 |
+
"LABEL_2": 2
|
21 |
+
},
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "distilbert",
|
24 |
+
"n_heads": 12,
|
25 |
+
"n_layers": 6,
|
26 |
+
"output_past": true,
|
27 |
+
"pad_token_id": 0,
|
28 |
+
"qa_dropout": 0.1,
|
29 |
+
"seq_classif_dropout": 0.2,
|
30 |
+
"sinusoidal_pos_embds": false,
|
31 |
+
"tie_weights_": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.49.0",
|
34 |
+
"vocab_size": 119547
|
35 |
+
}
|
checkpoint-9380/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4a5ead72a2594f36b1057a7e9b143980d2aa440c42a278eac54996835532002
|
3 |
+
size 541320452
|
checkpoint-9380/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fddbb5e1314edd61875e1d554350b01b8ffcaa3549e28c1dea54020bd0ca03c
|
3 |
+
size 1082703034
|
checkpoint-9380/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcfbff5bc84a8ebf0204467cc036c44a9325bd1e3df59fa8df328d90be66f86c
|
3 |
+
size 14244
|
checkpoint-9380/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbed8c49a5fc15cc0d6078ec60fa0ae98198507be7a37a106b7a52fb78ec09a6
|
3 |
+
size 1064
|
checkpoint-9380/trainer_state.json
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.6007231820008035,
|
3 |
+
"best_model_checkpoint": "model/checkpoint-7504",
|
4 |
+
"epoch": 5.0,
|
5 |
+
"eval_steps": 50,
|
6 |
+
"global_step": 9380,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"grad_norm": 0.232726588845253,
|
14 |
+
"learning_rate": 9.084300622945162e-06,
|
15 |
+
"loss": 0.0817,
|
16 |
+
"step": 1876
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.8464818763326226,
|
21 |
+
"eval_f1_macro": 0.4624149659863946,
|
22 |
+
"eval_f1_weighted": 0.8011493190026544,
|
23 |
+
"eval_loss": 0.06743289530277252,
|
24 |
+
"eval_precision_macro": 0.7010764171889492,
|
25 |
+
"eval_precision_weighted": 0.8229165574281427,
|
26 |
+
"eval_recall_macro": 0.44028979481565683,
|
27 |
+
"eval_recall_weighted": 0.8464818763326226,
|
28 |
+
"eval_runtime": 5.0821,
|
29 |
+
"eval_samples_per_second": 184.568,
|
30 |
+
"eval_steps_per_second": 46.24,
|
31 |
+
"step": 1876
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 2.0,
|
35 |
+
"grad_norm": 0.18798664212226868,
|
36 |
+
"learning_rate": 6.593041210714562e-06,
|
37 |
+
"loss": 0.0588,
|
38 |
+
"step": 3752
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 2.0,
|
42 |
+
"eval_accuracy": 0.8251599147121536,
|
43 |
+
"eval_f1_macro": 0.5992706992706992,
|
44 |
+
"eval_f1_weighted": 0.829400158611246,
|
45 |
+
"eval_loss": 0.06970743834972382,
|
46 |
+
"eval_precision_macro": 0.6140893452063056,
|
47 |
+
"eval_precision_weighted": 0.8365234937694226,
|
48 |
+
"eval_recall_macro": 0.5961441974373008,
|
49 |
+
"eval_recall_weighted": 0.8251599147121536,
|
50 |
+
"eval_runtime": 5.0788,
|
51 |
+
"eval_samples_per_second": 184.689,
|
52 |
+
"eval_steps_per_second": 46.271,
|
53 |
+
"step": 3752
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 3.0,
|
57 |
+
"grad_norm": 0.03722580894827843,
|
58 |
+
"learning_rate": 3.486973746877771e-06,
|
59 |
+
"loss": 0.0395,
|
60 |
+
"step": 5628
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"epoch": 3.0,
|
64 |
+
"eval_accuracy": 0.849680170575693,
|
65 |
+
"eval_f1_macro": 0.5773129404708353,
|
66 |
+
"eval_f1_weighted": 0.8345452465837384,
|
67 |
+
"eval_loss": 0.08055932819843292,
|
68 |
+
"eval_precision_macro": 0.6424363946177801,
|
69 |
+
"eval_precision_weighted": 0.8274003556291218,
|
70 |
+
"eval_recall_macro": 0.5397910877436739,
|
71 |
+
"eval_recall_weighted": 0.849680170575693,
|
72 |
+
"eval_runtime": 5.0482,
|
73 |
+
"eval_samples_per_second": 185.807,
|
74 |
+
"eval_steps_per_second": 46.551,
|
75 |
+
"step": 5628
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"epoch": 4.0,
|
79 |
+
"grad_norm": 0.22263826429843903,
|
80 |
+
"learning_rate": 9.648338779170968e-07,
|
81 |
+
"loss": 0.0239,
|
82 |
+
"step": 7504
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"epoch": 4.0,
|
86 |
+
"eval_accuracy": 0.8336886993603412,
|
87 |
+
"eval_f1_macro": 0.6007231820008035,
|
88 |
+
"eval_f1_weighted": 0.8327353646735044,
|
89 |
+
"eval_loss": 0.0956118032336235,
|
90 |
+
"eval_precision_macro": 0.6171054012879108,
|
91 |
+
"eval_precision_weighted": 0.8329684712250969,
|
92 |
+
"eval_recall_macro": 0.5898380302115934,
|
93 |
+
"eval_recall_weighted": 0.8336886993603412,
|
94 |
+
"eval_runtime": 5.0679,
|
95 |
+
"eval_samples_per_second": 185.087,
|
96 |
+
"eval_steps_per_second": 46.371,
|
97 |
+
"step": 7504
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"epoch": 5.0,
|
101 |
+
"grad_norm": 0.006782012525945902,
|
102 |
+
"learning_rate": 0.0,
|
103 |
+
"loss": 0.0168,
|
104 |
+
"step": 9380
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 5.0,
|
108 |
+
"eval_accuracy": 0.8390191897654584,
|
109 |
+
"eval_f1_macro": 0.5986856629739029,
|
110 |
+
"eval_f1_weighted": 0.8356391273708549,
|
111 |
+
"eval_loss": 0.09994609653949738,
|
112 |
+
"eval_precision_macro": 0.6148482050686072,
|
113 |
+
"eval_precision_weighted": 0.8329834413768394,
|
114 |
+
"eval_recall_macro": 0.5854920143713247,
|
115 |
+
"eval_recall_weighted": 0.8390191897654584,
|
116 |
+
"eval_runtime": 5.1031,
|
117 |
+
"eval_samples_per_second": 183.809,
|
118 |
+
"eval_steps_per_second": 46.05,
|
119 |
+
"step": 9380
|
120 |
+
}
|
121 |
+
],
|
122 |
+
"logging_steps": 50,
|
123 |
+
"max_steps": 9380,
|
124 |
+
"num_input_tokens_seen": 0,
|
125 |
+
"num_train_epochs": 5,
|
126 |
+
"save_steps": 50,
|
127 |
+
"stateful_callbacks": {
|
128 |
+
"EarlyStoppingCallback": {
|
129 |
+
"args": {
|
130 |
+
"early_stopping_patience": 2,
|
131 |
+
"early_stopping_threshold": 0.0
|
132 |
+
},
|
133 |
+
"attributes": {
|
134 |
+
"early_stopping_patience_counter": 1
|
135 |
+
}
|
136 |
+
},
|
137 |
+
"TrainerControl": {
|
138 |
+
"args": {
|
139 |
+
"should_epoch_stop": false,
|
140 |
+
"should_evaluate": false,
|
141 |
+
"should_log": false,
|
142 |
+
"should_save": true,
|
143 |
+
"should_training_stop": true
|
144 |
+
},
|
145 |
+
"attributes": {}
|
146 |
+
}
|
147 |
+
},
|
148 |
+
"total_flos": 2484470368005120.0,
|
149 |
+
"train_batch_size": 4,
|
150 |
+
"trial_name": null,
|
151 |
+
"trial_params": null
|
152 |
+
}
|
checkpoint-9380/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:715c96820c45effa2b6e5bb29121868a8187d8c31c98bf6d56350dbbfcd41a1b
|
3 |
+
size 5240
|
config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert/distilbert-base-multilingual-cased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0",
|
13 |
+
"1": "LABEL_1",
|
14 |
+
"2": "LABEL_2"
|
15 |
+
},
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"label2id": {
|
18 |
+
"LABEL_0": 0,
|
19 |
+
"LABEL_1": 1,
|
20 |
+
"LABEL_2": 2
|
21 |
+
},
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "distilbert",
|
24 |
+
"n_heads": 12,
|
25 |
+
"n_layers": 6,
|
26 |
+
"output_past": true,
|
27 |
+
"pad_token_id": 0,
|
28 |
+
"qa_dropout": 0.1,
|
29 |
+
"seq_classif_dropout": 0.2,
|
30 |
+
"sinusoidal_pos_embds": false,
|
31 |
+
"tie_weights_": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.49.0",
|
34 |
+
"vocab_size": 119547
|
35 |
+
}
|
events.out.tfevents.1740349787.d7086afb14ac.759.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aaaeb8b3efa39e2d2efd8a29fb90567e053185a5e770465c514fab7e86b7383b
|
3 |
+
size 5764
|
events.out.tfevents.1740350033.d7086afb14ac.759.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f5b6397ad8a6d79d51d8368946d12081fe863c6c09563d541ffd354c9d95e56
|
3 |
+
size 5764
|
events.out.tfevents.1740350707.d7086afb14ac.759.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21a6b2cc67220dd0fc0f96637bd3ac88b702149f5c517ea978aaaa8188046bb0
|
3 |
+
size 9820
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fa0daa19a98b896474d8829711aba4f1fd1851b1c4255f4ef69dd5d3eb102c1
|
3 |
+
size 541320452
|
runs/Feb22_13-02-01_c4da036077fd/events.out.tfevents.1740229322.c4da036077fd.6169.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f6e11dd354bca679e52c1fb04eb65d3c8a1bf5bf9089dcc63716b0bda6e43ef
|
3 |
+
size 5260
|
runs/Feb22_13-05-37_c4da036077fd/events.out.tfevents.1740229538.c4da036077fd.6169.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2a9dd7ce9f2b2ff96880e633ca103ae353c14486fee1c9d3034b54602ce9fbf
|
3 |
+
size 5258
|
runs/Feb22_13-06-15_c4da036077fd/events.out.tfevents.1740229576.c4da036077fd.6169.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:450eaf33ecdfa9b8ecb07dce4b18a558db3056de39888149297150c62814f04b
|
3 |
+
size 5261
|
runs/Feb22_13-07-10_c4da036077fd/events.out.tfevents.1740229631.c4da036077fd.6169.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58bcf68632f1f04dd9a9f72316889bc5078184c1e25efe7fc70623882e3c1086
|
3 |
+
size 5261
|
runs/Feb22_13-08-22_c4da036077fd/events.out.tfevents.1740229703.c4da036077fd.6169.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a93d9b038e1536a43909d796324c56074b2bc8a6b18b7d5318b12453933f1f89
|
3 |
+
size 5260
|
runs/Feb22_13-10-35_c4da036077fd/events.out.tfevents.1740229837.c4da036077fd.6169.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba102fc5e8886c888a39079779d0d8c6dc3e936d6cf05c6d6cb03baa0df0b9d5
|
3 |
+
size 5261
|
runs/Feb22_13-11-22_c4da036077fd/events.out.tfevents.1740229884.c4da036077fd.6169.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dee085d1b0493107185c7ac923233568b6990751842be14d4ade6c3455d7f0d
|
3 |
+
size 5259
|
runs/Feb22_13-12-47_c4da036077fd/events.out.tfevents.1740229969.c4da036077fd.6169.7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9b0cb488c66afa4bb81efbd2a2842a55b227416750240ef15327d1489b3327c
|
3 |
+
size 5689
|
runs/Feb22_13-16-26_c4da036077fd/events.out.tfevents.1740230187.c4da036077fd.6169.8
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c68bf3056cc3b51ee82f7ad24a138d8e28e611852aed7034b0fcd34398af08ff
|
3 |
+
size 5259
|
runs/Feb22_13-30-31_c4da036077fd/events.out.tfevents.1740231033.c4da036077fd.9367.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e1ad23f38788764778c6dc40b99d6543fff155aa1948d2419fb719eb7e24c79
|
3 |
+
size 5689
|
runs/Feb22_13-34-19_c4da036077fd/events.out.tfevents.1740231261.c4da036077fd.9367.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5ac60542d39ed33a6e1567ec80aacf77ac2fb1e2f6b89cdd7f2d63a167a1454
|
3 |
+
size 5263
|
runs/Feb22_13-35-13_c4da036077fd/events.out.tfevents.1740231315.c4da036077fd.9367.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3a8bf45edb3efc01beb4465d323a506f0685ff64b744cc78bfc5f9aa037fe00
|
3 |
+
size 6214
|
runs/Feb22_13-40-16_c4da036077fd/events.out.tfevents.1740231618.c4da036077fd.9367.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a2c520c78ca0876056d46eaa91cd4361383e9c5d69312765ddf0c212d9b2302
|
3 |
+
size 9628
|
runs/Feb22_13-40-16_c4da036077fd/events.out.tfevents.1740232282.c4da036077fd.9367.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe811108afd5a92a7bac36f41385aa9063276fabaed4724f35e321071ba4253a
|
3 |
+
size 1037
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:715c96820c45effa2b6e5bb29121868a8187d8c31c98bf6d56350dbbfcd41a1b
|
3 |
+
size 5240
|