phongv7104 commited on
Commit
0901c5e
·
verified ·
1 Parent(s): dfe0860

Upload trained model

Browse files
Files changed (7) hide show
  1. config.json +29 -0
  2. model.safetensors +3 -0
  3. optimizer.pt +3 -0
  4. rng_state.pth +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +185 -0
  7. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nguyenvulebinh/vi-mrc-large",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 4096,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_position_embeddings": 514,
18
+ "model_type": "roberta",
19
+ "num_attention_heads": 16,
20
+ "num_hidden_layers": 24,
21
+ "output_past": true,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "absolute",
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.47.0",
26
+ "type_vocab_size": 1,
27
+ "use_cache": true,
28
+ "vocab_size": 250002
29
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4121b1fee3e80eb1e560fd8912d092f2e6056ad037a080a7558122e38e15ae2
3
+ size 2235420048
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a18d54d8054789dff6f06542d4f16916220d7632585a78d83c268624dd14aece
3
+ size 4471073855
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:133ea15aa50a6e6209338fc82b56e2c6ff4b4604152b4b075c74ee582942a61b
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0818c2db39a12247f3a5aeef5aec0b169c515576ed57912eac8323d6d8266a19
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4843710611445973,
3
+ "best_model_checkpoint": "./reader_model/checkpoint-8730",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 8730,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1718213058419244,
13
+ "grad_norm": 75.05482482910156,
14
+ "learning_rate": 4.713631156930126e-05,
15
+ "loss": 2.9216,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.3436426116838488,
20
+ "grad_norm": 21.536182403564453,
21
+ "learning_rate": 4.427262313860252e-05,
22
+ "loss": 2.3951,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.5154639175257731,
27
+ "grad_norm": 185.8365020751953,
28
+ "learning_rate": 4.140893470790378e-05,
29
+ "loss": 2.1697,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.6872852233676976,
34
+ "grad_norm": 48.49885559082031,
35
+ "learning_rate": 3.854524627720504e-05,
36
+ "loss": 2.2481,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.8591065292096219,
41
+ "grad_norm": 56.232383728027344,
42
+ "learning_rate": 3.5681557846506306e-05,
43
+ "loss": 1.9251,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 1.0,
48
+ "eval_bertscore": 0.9379365905851414,
49
+ "eval_bleu1": 0.0,
50
+ "eval_f1": 0.44161785163619677,
51
+ "eval_loss": 1.82194983959198,
52
+ "eval_runtime": 87.956,
53
+ "eval_samples_per_second": 16.44,
54
+ "eval_steps_per_second": 4.116,
55
+ "step": 2910
56
+ },
57
+ {
58
+ "epoch": 1.0309278350515463,
59
+ "grad_norm": 71.09529876708984,
60
+ "learning_rate": 3.2817869415807564e-05,
61
+ "loss": 1.8373,
62
+ "step": 3000
63
+ },
64
+ {
65
+ "epoch": 1.2027491408934707,
66
+ "grad_norm": 58.3028450012207,
67
+ "learning_rate": 2.9954180985108822e-05,
68
+ "loss": 1.5451,
69
+ "step": 3500
70
+ },
71
+ {
72
+ "epoch": 1.3745704467353952,
73
+ "grad_norm": 84.97217559814453,
74
+ "learning_rate": 2.709049255441008e-05,
75
+ "loss": 1.461,
76
+ "step": 4000
77
+ },
78
+ {
79
+ "epoch": 1.5463917525773194,
80
+ "grad_norm": 41.77628707885742,
81
+ "learning_rate": 2.422680412371134e-05,
82
+ "loss": 1.3964,
83
+ "step": 4500
84
+ },
85
+ {
86
+ "epoch": 1.718213058419244,
87
+ "grad_norm": 4.4470438957214355,
88
+ "learning_rate": 2.13631156930126e-05,
89
+ "loss": 1.3665,
90
+ "step": 5000
91
+ },
92
+ {
93
+ "epoch": 1.8900343642611683,
94
+ "grad_norm": 43.41935729980469,
95
+ "learning_rate": 1.849942726231386e-05,
96
+ "loss": 1.3783,
97
+ "step": 5500
98
+ },
99
+ {
100
+ "epoch": 2.0,
101
+ "eval_bertscore": 0.9420511256774596,
102
+ "eval_bleu1": 0.0,
103
+ "eval_f1": 0.47942977396358094,
104
+ "eval_loss": 1.7279274463653564,
105
+ "eval_runtime": 81.3708,
106
+ "eval_samples_per_second": 17.77,
107
+ "eval_steps_per_second": 4.449,
108
+ "step": 5820
109
+ },
110
+ {
111
+ "epoch": 2.0618556701030926,
112
+ "grad_norm": 29.33169937133789,
113
+ "learning_rate": 1.5635738831615122e-05,
114
+ "loss": 1.2364,
115
+ "step": 6000
116
+ },
117
+ {
118
+ "epoch": 2.2336769759450172,
119
+ "grad_norm": 101.00111389160156,
120
+ "learning_rate": 1.277205040091638e-05,
121
+ "loss": 0.952,
122
+ "step": 6500
123
+ },
124
+ {
125
+ "epoch": 2.4054982817869415,
126
+ "grad_norm": 163.68463134765625,
127
+ "learning_rate": 9.90836197021764e-06,
128
+ "loss": 0.9447,
129
+ "step": 7000
130
+ },
131
+ {
132
+ "epoch": 2.5773195876288657,
133
+ "grad_norm": 105.01953125,
134
+ "learning_rate": 7.0446735395189e-06,
135
+ "loss": 0.8667,
136
+ "step": 7500
137
+ },
138
+ {
139
+ "epoch": 2.7491408934707904,
140
+ "grad_norm": 41.25511169433594,
141
+ "learning_rate": 4.18098510882016e-06,
142
+ "loss": 0.8508,
143
+ "step": 8000
144
+ },
145
+ {
146
+ "epoch": 2.9209621993127146,
147
+ "grad_norm": 22.426847457885742,
148
+ "learning_rate": 1.3172966781214204e-06,
149
+ "loss": 0.8592,
150
+ "step": 8500
151
+ },
152
+ {
153
+ "epoch": 3.0,
154
+ "eval_bertscore": 0.9449925996868113,
155
+ "eval_bleu1": 0.0,
156
+ "eval_f1": 0.4843710611445973,
157
+ "eval_loss": 1.917205572128296,
158
+ "eval_runtime": 81.3736,
159
+ "eval_samples_per_second": 17.77,
160
+ "eval_steps_per_second": 4.449,
161
+ "step": 8730
162
+ }
163
+ ],
164
+ "logging_steps": 500,
165
+ "max_steps": 8730,
166
+ "num_input_tokens_seen": 0,
167
+ "num_train_epochs": 3,
168
+ "save_steps": 500,
169
+ "stateful_callbacks": {
170
+ "TrainerControl": {
171
+ "args": {
172
+ "should_epoch_stop": false,
173
+ "should_evaluate": false,
174
+ "should_log": false,
175
+ "should_save": true,
176
+ "should_training_stop": true
177
+ },
178
+ "attributes": {}
179
+ }
180
+ },
181
+ "total_flos": 3.242487592315699e+16,
182
+ "train_batch_size": 4,
183
+ "trial_name": null,
184
+ "trial_params": null
185
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70ae6d8f432b3bcbf3955b42f8070a6cfeb79eab9fdcf845793fb5670d923d62
3
+ size 5368