Model save
Browse files
README.md
CHANGED
@@ -5,24 +5,24 @@ base_model: vinai/phobert-base
|
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
model-index:
|
8 |
-
- name:
|
9 |
results: []
|
10 |
---
|
11 |
|
12 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
13 |
should probably proofread and complete it, then remove this comment. -->
|
14 |
|
15 |
-
#
|
16 |
|
17 |
This model is a fine-tuned version of [vinai/phobert-base](https://huggingface.co/vinai/phobert-base) on the None dataset.
|
18 |
It achieves the following results on the evaluation set:
|
19 |
-
- Loss: 1.
|
20 |
-
- Micro F1:
|
21 |
-
- Micro Precision:
|
22 |
-
- Micro Recall:
|
23 |
-
- Macro F1:
|
24 |
-
- Macro Precision:
|
25 |
-
- Macro Recall:
|
26 |
|
27 |
## Model description
|
28 |
|
@@ -51,14 +51,18 @@ The following hyperparameters were used during training:
|
|
51 |
- optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
52 |
- lr_scheduler_type: cosine
|
53 |
- lr_scheduler_warmup_ratio: 0.01
|
54 |
-
- num_epochs:
|
55 |
- mixed_precision_training: Native AMP
|
56 |
|
57 |
### Training results
|
58 |
|
59 |
| Training Loss | Epoch | Step | Validation Loss | Micro F1 | Micro Precision | Micro Recall | Macro F1 | Macro Precision | Macro Recall |
|
60 |
|:-------------:|:------:|:----:|:---------------:|:--------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|
|
61 |
-
| 1.9456 | 0
|
|
|
|
|
|
|
|
|
62 |
|
63 |
|
64 |
### Framework versions
|
|
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
model-index:
|
8 |
+
- name: roberta-base-multilingual-sentiment
|
9 |
results: []
|
10 |
---
|
11 |
|
12 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
13 |
should probably proofread and complete it, then remove this comment. -->
|
14 |
|
15 |
+
# roberta-base-multilingual-sentiment
|
16 |
|
17 |
This model is a fine-tuned version of [vinai/phobert-base](https://huggingface.co/vinai/phobert-base) on the None dataset.
|
18 |
It achieves the following results on the evaluation set:
|
19 |
+
- Loss: 1.7158
|
20 |
+
- Micro F1: 31.1953
|
21 |
+
- Micro Precision: 31.1953
|
22 |
+
- Micro Recall: 31.1953
|
23 |
+
- Macro F1: 6.7937
|
24 |
+
- Macro Precision: 4.4565
|
25 |
+
- Macro Recall: 14.2857
|
26 |
|
27 |
## Model description
|
28 |
|
|
|
51 |
- optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
52 |
- lr_scheduler_type: cosine
|
53 |
- lr_scheduler_warmup_ratio: 0.01
|
54 |
+
- num_epochs: 5.0
|
55 |
- mixed_precision_training: Native AMP
|
56 |
|
57 |
### Training results
|
58 |
|
59 |
| Training Loss | Epoch | Step | Validation Loss | Micro F1 | Micro Precision | Micro Recall | Macro F1 | Macro Precision | Macro Recall |
|
60 |
|:-------------:|:------:|:----:|:---------------:|:--------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|
|
61 |
+
| 1.9456 | 1.0 | 6 | 1.9434 | 18.2216 | 18.2216 | 18.2216 | 9.4327 | 17.2520 | 13.4610 |
|
62 |
+
| 1.7519 | 2.0 | 12 | 1.9434 | 18.2216 | 18.2216 | 18.2216 | 9.4327 | 17.2520 | 13.4610 |
|
63 |
+
| 1.7517 | 3.0 | 18 | 1.9434 | 18.2216 | 18.2216 | 18.2216 | 9.4327 | 17.2520 | 13.4610 |
|
64 |
+
| 1.7491 | 4.0 | 24 | 1.7236 | 31.1953 | 31.1953 | 31.1953 | 6.7937 | 4.4565 | 14.2857 |
|
65 |
+
| 1.6352 | 4.1818 | 25 | 1.7158 | 31.1953 | 31.1953 | 31.1953 | 6.7937 | 4.4565 | 14.2857 |
|
66 |
|
67 |
|
68 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
"eval_f1": 0.09432731429418081,
|
4 |
"eval_loss": 1.943359375,
|
5 |
"eval_macro_f1": 9.432731429418082,
|
@@ -26,8 +26,8 @@
|
|
26 |
"test_runtime": 0.3027,
|
27 |
"test_samples_per_second": 2289.139,
|
28 |
"test_steps_per_second": 6.606,
|
29 |
-
"train_loss": 1.
|
30 |
-
"train_runtime":
|
31 |
-
"train_samples_per_second":
|
32 |
-
"train_steps_per_second": 0.
|
33 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 4.181818181818182,
|
3 |
"eval_f1": 0.09432731429418081,
|
4 |
"eval_loss": 1.943359375,
|
5 |
"eval_macro_f1": 9.432731429418082,
|
|
|
26 |
"test_runtime": 0.3027,
|
27 |
"test_samples_per_second": 2289.139,
|
28 |
"test_steps_per_second": 6.606,
|
29 |
+
"train_loss": 1.7666796875,
|
30 |
+
"train_runtime": 35.9676,
|
31 |
+
"train_samples_per_second": 771.25,
|
32 |
+
"train_steps_per_second": 0.695
|
33 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 270031446
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fbc9ff253665d600c1d25222ca96bc4c818f7dc1a2e4c071fd0a7e36e13cb23
|
3 |
size 270031446
|
runs/May08_07-42-18_hn-fornix-testing-gpu-platform-2/events.out.tfevents.1746690161.hn-fornix-testing-gpu-platform-2.2694805.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:540208cefc1933f6678ef01d47dd8d6430c84a8f4c48e5ac87c465c76f604ccc
|
3 |
+
size 9986
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss": 1.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second":
|
6 |
-
"train_steps_per_second": 0.
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 4.181818181818182,
|
3 |
+
"train_loss": 1.7666796875,
|
4 |
+
"train_runtime": 35.9676,
|
5 |
+
"train_samples_per_second": 771.25,
|
6 |
+
"train_steps_per_second": 0.695
|
7 |
}
|
trainer_state.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
-
"best_global_step":
|
3 |
-
"best_metric":
|
4 |
-
"best_model_checkpoint": "/data/hungnm/unisentiment/roberta-base-sentiment/checkpoint-
|
5 |
-
"epoch":
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -17,7 +17,7 @@
|
|
17 |
"step": 5
|
18 |
},
|
19 |
{
|
20 |
-
"epoch": 0
|
21 |
"eval_loss": 1.943359375,
|
22 |
"eval_macro_f1": 9.432731429418082,
|
23 |
"eval_macro_precision": 17.25200652415304,
|
@@ -25,25 +25,109 @@
|
|
25 |
"eval_micro_f1": 18.221574344023324,
|
26 |
"eval_micro_precision": 18.221574344023324,
|
27 |
"eval_micro_recall": 18.221574344023324,
|
28 |
-
"eval_runtime": 0.
|
29 |
-
"eval_samples_per_second":
|
30 |
-
"eval_steps_per_second":
|
31 |
-
"step":
|
32 |
},
|
33 |
{
|
34 |
-
"epoch":
|
35 |
-
"
|
36 |
-
"
|
37 |
-
"
|
38 |
-
"
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
}
|
42 |
],
|
43 |
"logging_steps": 5,
|
44 |
-
"max_steps":
|
45 |
"num_input_tokens_seen": 0,
|
46 |
-
"num_train_epochs":
|
47 |
"save_steps": 5.0,
|
48 |
"stateful_callbacks": {
|
49 |
"TrainerControl": {
|
@@ -57,7 +141,7 @@
|
|
57 |
"attributes": {}
|
58 |
}
|
59 |
},
|
60 |
-
"total_flos":
|
61 |
"train_batch_size": 512,
|
62 |
"trial_name": null,
|
63 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_global_step": 24,
|
3 |
+
"best_metric": 31.195335276967928,
|
4 |
+
"best_model_checkpoint": "/data/hungnm/unisentiment/roberta-base-sentiment/checkpoint-24",
|
5 |
+
"epoch": 4.181818181818182,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 25,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
17 |
"step": 5
|
18 |
},
|
19 |
{
|
20 |
+
"epoch": 1.0,
|
21 |
"eval_loss": 1.943359375,
|
22 |
"eval_macro_f1": 9.432731429418082,
|
23 |
"eval_macro_precision": 17.25200652415304,
|
|
|
25 |
"eval_micro_f1": 18.221574344023324,
|
26 |
"eval_micro_precision": 18.221574344023324,
|
27 |
"eval_micro_recall": 18.221574344023324,
|
28 |
+
"eval_runtime": 0.1703,
|
29 |
+
"eval_samples_per_second": 4027.509,
|
30 |
+
"eval_steps_per_second": 11.742,
|
31 |
+
"step": 6
|
32 |
},
|
33 |
{
|
34 |
+
"epoch": 1.7272727272727273,
|
35 |
+
"grad_norm": 0.0,
|
36 |
+
"learning_rate": 0.0,
|
37 |
+
"loss": 1.7519,
|
38 |
+
"step": 10
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 2.0,
|
42 |
+
"eval_loss": 1.943359375,
|
43 |
+
"eval_macro_f1": 9.432731429418082,
|
44 |
+
"eval_macro_precision": 17.25200652415304,
|
45 |
+
"eval_macro_recall": 13.460980331549635,
|
46 |
+
"eval_micro_f1": 18.221574344023324,
|
47 |
+
"eval_micro_precision": 18.221574344023324,
|
48 |
+
"eval_micro_recall": 18.221574344023324,
|
49 |
+
"eval_runtime": 0.1648,
|
50 |
+
"eval_samples_per_second": 4161.491,
|
51 |
+
"eval_steps_per_second": 12.133,
|
52 |
+
"step": 12
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 2.5454545454545454,
|
56 |
+
"grad_norm": 0.0,
|
57 |
+
"learning_rate": 0.0,
|
58 |
+
"loss": 1.7517,
|
59 |
+
"step": 15
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 3.0,
|
63 |
+
"eval_loss": 1.943359375,
|
64 |
+
"eval_macro_f1": 9.432731429418082,
|
65 |
+
"eval_macro_precision": 17.25200652415304,
|
66 |
+
"eval_macro_recall": 13.460980331549635,
|
67 |
+
"eval_micro_f1": 18.221574344023324,
|
68 |
+
"eval_micro_precision": 18.221574344023324,
|
69 |
+
"eval_micro_recall": 18.221574344023324,
|
70 |
+
"eval_runtime": 0.1698,
|
71 |
+
"eval_samples_per_second": 4040.415,
|
72 |
+
"eval_steps_per_second": 11.78,
|
73 |
+
"step": 18
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 3.3636363636363638,
|
77 |
+
"grad_norm": 1.8663069009780884,
|
78 |
+
"learning_rate": 5e-05,
|
79 |
+
"loss": 1.7491,
|
80 |
+
"step": 20
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 4.0,
|
84 |
+
"eval_loss": 1.7236328125,
|
85 |
+
"eval_macro_f1": 6.7936507936507935,
|
86 |
+
"eval_macro_precision": 4.456476468138275,
|
87 |
+
"eval_macro_recall": 14.285714285714285,
|
88 |
+
"eval_micro_f1": 31.195335276967928,
|
89 |
+
"eval_micro_precision": 31.195335276967928,
|
90 |
+
"eval_micro_recall": 31.195335276967928,
|
91 |
+
"eval_runtime": 0.1645,
|
92 |
+
"eval_samples_per_second": 4170.418,
|
93 |
+
"eval_steps_per_second": 12.159,
|
94 |
+
"step": 24
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"epoch": 4.181818181818182,
|
98 |
+
"grad_norm": 2.6474199295043945,
|
99 |
+
"learning_rate": 4.4833833507280884e-05,
|
100 |
+
"loss": 1.6352,
|
101 |
+
"step": 25
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"epoch": 4.181818181818182,
|
105 |
+
"eval_loss": 1.7158203125,
|
106 |
+
"eval_macro_f1": 6.7936507936507935,
|
107 |
+
"eval_macro_precision": 4.456476468138275,
|
108 |
+
"eval_macro_recall": 14.285714285714285,
|
109 |
+
"eval_micro_f1": 31.195335276967928,
|
110 |
+
"eval_micro_precision": 31.195335276967928,
|
111 |
+
"eval_micro_recall": 31.195335276967928,
|
112 |
+
"eval_runtime": 0.1665,
|
113 |
+
"eval_samples_per_second": 4120.855,
|
114 |
+
"eval_steps_per_second": 12.014,
|
115 |
+
"step": 25
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"epoch": 4.181818181818182,
|
119 |
+
"step": 25,
|
120 |
+
"total_flos": 1301189864980480.0,
|
121 |
+
"train_loss": 1.7666796875,
|
122 |
+
"train_runtime": 35.9676,
|
123 |
+
"train_samples_per_second": 771.25,
|
124 |
+
"train_steps_per_second": 0.695
|
125 |
}
|
126 |
],
|
127 |
"logging_steps": 5,
|
128 |
+
"max_steps": 25,
|
129 |
"num_input_tokens_seen": 0,
|
130 |
+
"num_train_epochs": 5,
|
131 |
"save_steps": 5.0,
|
132 |
"stateful_callbacks": {
|
133 |
"TrainerControl": {
|
|
|
141 |
"attributes": {}
|
142 |
}
|
143 |
},
|
144 |
+
"total_flos": 1301189864980480.0,
|
145 |
"train_batch_size": 512,
|
146 |
"trial_name": null,
|
147 |
"trial_params": null
|