mapama247 commited on
Commit
7c15122
1 Parent(s): d2e083d

upload ipc_level1_G model

Browse files
config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../models/roberta-large/",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "finetuning_task": "ipc1",
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "01",
16
+ "1": "02",
17
+ "2": "03",
18
+ "3": "04",
19
+ "4": "05",
20
+ "5": "06",
21
+ "6": "07",
22
+ "7": "08",
23
+ "8": "09",
24
+ "9": "10",
25
+ "10": "11",
26
+ "11": "12",
27
+ "12": "16",
28
+ "13": "21",
29
+ "14": "99"
30
+ },
31
+ "initializer_range": 0.02,
32
+ "intermediate_size": 4096,
33
+ "label2id": {
34
+ "01": 0,
35
+ "02": 1,
36
+ "03": 2,
37
+ "04": 3,
38
+ "05": 4,
39
+ "06": 5,
40
+ "07": 6,
41
+ "08": 7,
42
+ "09": 8,
43
+ "10": 9,
44
+ "11": 10,
45
+ "12": 11,
46
+ "16": 12,
47
+ "21": 13,
48
+ "99": 14
49
+ },
50
+ "layer_norm_eps": 1e-05,
51
+ "max_position_embeddings": 514,
52
+ "model_type": "roberta",
53
+ "num_attention_heads": 16,
54
+ "num_hidden_layers": 24,
55
+ "pad_token_id": 1,
56
+ "position_embedding_type": "absolute",
57
+ "problem_type": "multi_label_classification",
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.9.2",
60
+ "type_vocab_size": 1,
61
+ "use_cache": true,
62
+ "vocab_size": 50265
63
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da3a92db5bc9dfb55bf0b56cba33d7c91c72b198598f9819fbaa6ed580903b7b
3
+ size 1421664557
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46df9fa0b0f11fbf3401de806fc1a967c7fd25b901534d06ed530221bf803d03
3
+ size 15523
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": "../models/roberta-large/", "tokenizer_class": "RobertaTokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8754267260214113,
3
+ "best_model_checkpoint": "./output//roberta-large_ipc1_G_5_32_5e-6_0.01_0.06_07-08-22_06-40/checkpoint-24000",
4
+ "epoch": 0.14484881405033495,
5
+ "global_step": 24000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 2.0117890840324302e-07,
13
+ "loss": 0.4247,
14
+ "step": 2000
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "eval_accuracy": 0.03380152270937254,
19
+ "eval_f1": 0.0619012340183338,
20
+ "eval_loss": 0.20814624428749084,
21
+ "eval_roc_auc": 0.5159319239399485,
22
+ "eval_runtime": 8197.4985,
23
+ "eval_samples_per_second": 33.455,
24
+ "eval_steps_per_second": 2.091,
25
+ "step": 2000
26
+ },
27
+ {
28
+ "epoch": 0.02,
29
+ "learning_rate": 4.0235781680648604e-07,
30
+ "loss": 0.1716,
31
+ "step": 4000
32
+ },
33
+ {
34
+ "epoch": 0.02,
35
+ "eval_accuracy": 0.6045477086432718,
36
+ "eval_f1": 0.7149013381719211,
37
+ "eval_loss": 0.12680654227733612,
38
+ "eval_roc_auc": 0.7994218657046591,
39
+ "eval_runtime": 8196.5733,
40
+ "eval_samples_per_second": 33.459,
41
+ "eval_steps_per_second": 2.091,
42
+ "step": 4000
43
+ },
44
+ {
45
+ "epoch": 0.04,
46
+ "learning_rate": 6.03536725209729e-07,
47
+ "loss": 0.114,
48
+ "step": 6000
49
+ },
50
+ {
51
+ "epoch": 0.04,
52
+ "eval_accuracy": 0.7269478719990665,
53
+ "eval_f1": 0.8064969810911458,
54
+ "eval_loss": 0.09042555838823318,
55
+ "eval_roc_auc": 0.863152127136236,
56
+ "eval_runtime": 8195.1585,
57
+ "eval_samples_per_second": 33.465,
58
+ "eval_steps_per_second": 2.092,
59
+ "step": 6000
60
+ },
61
+ {
62
+ "epoch": 0.05,
63
+ "learning_rate": 8.047156336129721e-07,
64
+ "loss": 0.0865,
65
+ "step": 8000
66
+ },
67
+ {
68
+ "epoch": 0.05,
69
+ "eval_accuracy": 0.774762258977276,
70
+ "eval_f1": 0.8402673463726624,
71
+ "eval_loss": 0.07436466217041016,
72
+ "eval_roc_auc": 0.8920166546037435,
73
+ "eval_runtime": 8191.8248,
74
+ "eval_samples_per_second": 33.478,
75
+ "eval_steps_per_second": 2.092,
76
+ "step": 8000
77
+ },
78
+ {
79
+ "epoch": 0.06,
80
+ "learning_rate": 1.005894542016215e-06,
81
+ "loss": 0.0743,
82
+ "step": 10000
83
+ },
84
+ {
85
+ "epoch": 0.06,
86
+ "eval_accuracy": 0.7829883900703013,
87
+ "eval_f1": 0.8487029462020401,
88
+ "eval_loss": 0.06654931604862213,
89
+ "eval_roc_auc": 0.9001618402078589,
90
+ "eval_runtime": 8196.3415,
91
+ "eval_samples_per_second": 33.46,
92
+ "eval_steps_per_second": 2.091,
93
+ "step": 10000
94
+ },
95
+ {
96
+ "epoch": 0.07,
97
+ "learning_rate": 1.207073450419458e-06,
98
+ "loss": 0.0664,
99
+ "step": 12000
100
+ },
101
+ {
102
+ "epoch": 0.07,
103
+ "eval_accuracy": 0.794452466380794,
104
+ "eval_f1": 0.859163976123391,
105
+ "eval_loss": 0.061000920832157135,
106
+ "eval_roc_auc": 0.9083440937813021,
107
+ "eval_runtime": 8197.7585,
108
+ "eval_samples_per_second": 33.454,
109
+ "eval_steps_per_second": 2.091,
110
+ "step": 12000
111
+ },
112
+ {
113
+ "epoch": 0.08,
114
+ "learning_rate": 1.4082523588227012e-06,
115
+ "loss": 0.062,
116
+ "step": 14000
117
+ },
118
+ {
119
+ "epoch": 0.08,
120
+ "eval_accuracy": 0.7962172923777019,
121
+ "eval_f1": 0.8624403660379281,
122
+ "eval_loss": 0.05871045961976051,
123
+ "eval_roc_auc": 0.9133217347648154,
124
+ "eval_runtime": 8199.4678,
125
+ "eval_samples_per_second": 33.447,
126
+ "eval_steps_per_second": 2.091,
127
+ "step": 14000
128
+ },
129
+ {
130
+ "epoch": 0.1,
131
+ "learning_rate": 1.6094312672259442e-06,
132
+ "loss": 0.0599,
133
+ "step": 16000
134
+ },
135
+ {
136
+ "epoch": 0.1,
137
+ "eval_accuracy": 0.7979055453458184,
138
+ "eval_f1": 0.8656510165555269,
139
+ "eval_loss": 0.05596928298473358,
140
+ "eval_roc_auc": 0.9150237468241987,
141
+ "eval_runtime": 8200.3089,
142
+ "eval_samples_per_second": 33.444,
143
+ "eval_steps_per_second": 2.09,
144
+ "step": 16000
145
+ },
146
+ {
147
+ "epoch": 0.11,
148
+ "learning_rate": 1.8106101756291871e-06,
149
+ "loss": 0.0569,
150
+ "step": 18000
151
+ },
152
+ {
153
+ "epoch": 0.11,
154
+ "eval_accuracy": 0.8009939908404072,
155
+ "eval_f1": 0.8665697356924421,
156
+ "eval_loss": 0.05469416454434395,
157
+ "eval_roc_auc": 0.9135503362863645,
158
+ "eval_runtime": 8201.2258,
159
+ "eval_samples_per_second": 33.44,
160
+ "eval_steps_per_second": 2.09,
161
+ "step": 18000
162
+ },
163
+ {
164
+ "epoch": 0.12,
165
+ "learning_rate": 2.01178908403243e-06,
166
+ "loss": 0.0554,
167
+ "step": 20000
168
+ },
169
+ {
170
+ "epoch": 0.12,
171
+ "eval_accuracy": 0.8071891135031066,
172
+ "eval_f1": 0.8723540166082954,
173
+ "eval_loss": 0.052826616913080215,
174
+ "eval_roc_auc": 0.9192588608785235,
175
+ "eval_runtime": 8204.8057,
176
+ "eval_samples_per_second": 33.425,
177
+ "eval_steps_per_second": 2.089,
178
+ "step": 20000
179
+ },
180
+ {
181
+ "epoch": 0.13,
182
+ "learning_rate": 2.2129679924356733e-06,
183
+ "loss": 0.0541,
184
+ "step": 22000
185
+ },
186
+ {
187
+ "epoch": 0.13,
188
+ "eval_accuracy": 0.8054935678655816,
189
+ "eval_f1": 0.8706366827585739,
190
+ "eval_loss": 0.05298588052392006,
191
+ "eval_roc_auc": 0.9176729146649489,
192
+ "eval_runtime": 8206.5949,
193
+ "eval_samples_per_second": 33.418,
194
+ "eval_steps_per_second": 2.089,
195
+ "step": 22000
196
+ },
197
+ {
198
+ "epoch": 0.14,
199
+ "learning_rate": 2.414146900838916e-06,
200
+ "loss": 0.0522,
201
+ "step": 24000
202
+ },
203
+ {
204
+ "epoch": 0.14,
205
+ "eval_accuracy": 0.8137306379627198,
206
+ "eval_f1": 0.8754267260214113,
207
+ "eval_loss": 0.05081520974636078,
208
+ "eval_roc_auc": 0.9180724092422241,
209
+ "eval_runtime": 8275.6912,
210
+ "eval_samples_per_second": 33.139,
211
+ "eval_steps_per_second": 2.071,
212
+ "step": 24000
213
+ }
214
+ ],
215
+ "max_steps": 828450,
216
+ "num_train_epochs": 5,
217
+ "total_flos": 7.15754724655104e+17,
218
+ "trial_name": null,
219
+ "trial_params": null
220
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c37f67c21e6efe2882d0cdfa94321def51beccd3525adee20f2d06e4cb5a3b
3
+ size 2735
vocab.json ADDED
The diff for this file is too large to render. See raw diff