Transformers
PyTorch
avijit commited on
Commit
5d06d18
·
verified ·
1 Parent(s): c2dcd55

Upload folder using huggingface_hub

Browse files
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89573e8136980abe922819c2a69a20e355467a119741643c67699d65f66b6a71
3
+ size 1033285754
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7151ef54d19a4977dff2907f4abc5e3116c4cf74e83cf6ad99f66d3ac7734c72
3
+ size 516633490
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f437d73f01af5dca7457e8b1abc58c3639216f9d818679806cac982bcc8c644a
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d339b3af84f62003fcbc2f7413fb6663506d299c4b8cfb91bf6acd2921942dbf
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": "<|endoftext|>",
10
+ "mask_token": {
11
+ "content": "<mask>",
12
+ "lstrip": true,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ "pad_token": "<|endoftext|>",
18
+ "unk_token": {
19
+ "content": "<|endoftext|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ }
25
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|padding|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "50254": {
21
+ "content": "<mask>",
22
+ "lstrip": true,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "50255": {
29
+ "content": " ",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
+ "50256": {
37
+ "content": " ",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": false
43
+ },
44
+ "50257": {
45
+ "content": " ",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "50258": {
53
+ "content": " ",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "50259": {
61
+ "content": " ",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "50260": {
69
+ "content": " ",
70
+ "lstrip": false,
71
+ "normalized": true,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "50261": {
77
+ "content": " ",
78
+ "lstrip": false,
79
+ "normalized": true,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "50262": {
85
+ "content": " ",
86
+ "lstrip": false,
87
+ "normalized": true,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "50263": {
93
+ "content": " ",
94
+ "lstrip": false,
95
+ "normalized": true,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "50264": {
101
+ "content": " ",
102
+ "lstrip": false,
103
+ "normalized": true,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "50265": {
109
+ "content": " ",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "50266": {
117
+ "content": " ",
118
+ "lstrip": false,
119
+ "normalized": true,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "50267": {
125
+ "content": " ",
126
+ "lstrip": false,
127
+ "normalized": true,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "50268": {
133
+ "content": " ",
134
+ "lstrip": false,
135
+ "normalized": true,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "50269": {
141
+ "content": " ",
142
+ "lstrip": false,
143
+ "normalized": true,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "50270": {
149
+ "content": " ",
150
+ "lstrip": false,
151
+ "normalized": true,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "50271": {
157
+ "content": " ",
158
+ "lstrip": false,
159
+ "normalized": true,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "50272": {
165
+ "content": " ",
166
+ "lstrip": false,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "50273": {
173
+ "content": " ",
174
+ "lstrip": false,
175
+ "normalized": true,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "50274": {
181
+ "content": " ",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "50275": {
189
+ "content": " ",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "50276": {
197
+ "content": " ",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ }
204
+ },
205
+ "bos_token": "<|endoftext|>",
206
+ "clean_up_tokenization_spaces": true,
207
+ "eos_token": "<|endoftext|>",
208
+ "mask_token": "<mask>",
209
+ "model_max_length": 1000000000000000019884624838656,
210
+ "pad_token": "<|endoftext|>",
211
+ "tokenizer_class": "GPTNeoXTokenizer",
212
+ "unk_token": "<|endoftext|>"
213
+ }
trainer_state.json ADDED
@@ -0,0 +1,1199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9134487053983307,
3
+ "best_model_checkpoint": "./saved_models/mamba_prompt_sbdh_gpt4_v2_0/checkpoint-912",
4
+ "epoch": 38.0,
5
+ "eval_steps": 500,
6
+ "global_step": 912,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 8.483881950378418,
14
+ "learning_rate": 0.0003,
15
+ "loss": 1.2055,
16
+ "step": 24
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_acc_macro": 0.07039666648893216,
21
+ "eval_acc_micro": 0.17493620640770696,
22
+ "eval_auc_macro": 0.7109566412757288,
23
+ "eval_auc_micro": 0.6509757698362547,
24
+ "eval_f1_at_5": 0.2568182822325969,
25
+ "eval_f1_at_8": 0.22372279922068555,
26
+ "eval_f1_macro": 0.11555043034927992,
27
+ "eval_f1_micro": 0.2977799227799084,
28
+ "eval_loss": 0.4126754105091095,
29
+ "eval_prec_at_5": 0.16506849315068495,
30
+ "eval_prec_at_8": 0.1317066210045662,
31
+ "eval_prec_macro": 0.0712520271567465,
32
+ "eval_prec_micro": 0.21000680735193294,
33
+ "eval_rec_at_5": 0.5781963470319634,
34
+ "eval_rec_at_8": 0.7423896499238966,
35
+ "eval_rec_macro": 0.3187634216175277,
36
+ "eval_rec_micro": 0.5116086235488796,
37
+ "eval_runtime": 6.0156,
38
+ "eval_samples_per_second": 145.621,
39
+ "eval_steps_per_second": 18.286,
40
+ "step": 24
41
+ },
42
+ {
43
+ "epoch": 2.0,
44
+ "grad_norm": 2.6864304542541504,
45
+ "learning_rate": 0.0003,
46
+ "loss": 0.3514,
47
+ "step": 48
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_acc_macro": 0.5158272708489711,
52
+ "eval_acc_micro": 0.5578747628083138,
53
+ "eval_auc_macro": 0.94707367087266,
54
+ "eval_auc_micro": 0.913787855621242,
55
+ "eval_f1_at_5": 0.3898187899179248,
56
+ "eval_f1_at_8": 0.2799604575624912,
57
+ "eval_f1_macro": 0.646736079674674,
58
+ "eval_f1_micro": 0.7161997563945827,
59
+ "eval_loss": 0.2387184202671051,
60
+ "eval_prec_at_5": 0.24908675799086763,
61
+ "eval_prec_at_8": 0.1643835616438356,
62
+ "eval_prec_macro": 0.6802826117671307,
63
+ "eval_prec_micro": 0.7016706443913523,
64
+ "eval_rec_at_5": 0.8961187214611872,
65
+ "eval_rec_at_8": 0.9429223744292238,
66
+ "eval_rec_macro": 0.6832268849511623,
67
+ "eval_rec_micro": 0.7313432835820289,
68
+ "eval_runtime": 6.048,
69
+ "eval_samples_per_second": 144.841,
70
+ "eval_steps_per_second": 18.188,
71
+ "step": 48
72
+ },
73
+ {
74
+ "epoch": 3.0,
75
+ "grad_norm": 2.4849371910095215,
76
+ "learning_rate": 0.0003,
77
+ "loss": 0.1682,
78
+ "step": 72
79
+ },
80
+ {
81
+ "epoch": 3.0,
82
+ "eval_acc_macro": 0.7840879005682694,
83
+ "eval_acc_micro": 0.8028064992613883,
84
+ "eval_auc_macro": 0.9886749124239317,
85
+ "eval_auc_micro": 0.990273843904626,
86
+ "eval_f1_at_5": 0.42751438858977425,
87
+ "eval_f1_at_8": 0.2925113305987028,
88
+ "eval_f1_macro": 0.8710656054515781,
89
+ "eval_f1_micro": 0.8906185989347897,
90
+ "eval_loss": 0.07425953447818756,
91
+ "eval_prec_at_5": 0.27374429223744295,
92
+ "eval_prec_at_8": 0.17194634703196346,
93
+ "eval_prec_macro": 0.878761745264005,
94
+ "eval_prec_micro": 0.8801619433197667,
95
+ "eval_rec_at_5": 0.9754566210045662,
96
+ "eval_rec_at_8": 0.9788812785388128,
97
+ "eval_rec_macro": 0.8815695470069095,
98
+ "eval_rec_micro": 0.9013266998340878,
99
+ "eval_runtime": 6.0775,
100
+ "eval_samples_per_second": 144.138,
101
+ "eval_steps_per_second": 18.1,
102
+ "step": 72
103
+ },
104
+ {
105
+ "epoch": 4.0,
106
+ "grad_norm": 1.911177396774292,
107
+ "learning_rate": 0.0003,
108
+ "loss": 0.0646,
109
+ "step": 96
110
+ },
111
+ {
112
+ "epoch": 4.0,
113
+ "eval_acc_macro": 0.8321415173591932,
114
+ "eval_acc_micro": 0.8384442782347914,
115
+ "eval_auc_macro": 0.9920630589905021,
116
+ "eval_auc_micro": 0.9932832791412234,
117
+ "eval_f1_at_5": 0.4278476261962846,
118
+ "eval_f1_at_8": 0.29274329072668936,
119
+ "eval_f1_macro": 0.9063058610149609,
120
+ "eval_f1_micro": 0.9121236777867442,
121
+ "eval_loss": 0.06308761239051819,
122
+ "eval_prec_at_5": 0.27397260273972607,
123
+ "eval_prec_at_8": 0.1720890410958904,
124
+ "eval_prec_macro": 0.8935275833247776,
125
+ "eval_prec_micro": 0.8953674121405035,
126
+ "eval_rec_at_5": 0.976027397260274,
127
+ "eval_rec_at_8": 0.9794520547945206,
128
+ "eval_rec_macro": 0.9215877913582228,
129
+ "eval_rec_micro": 0.929519071310039,
130
+ "eval_runtime": 6.085,
131
+ "eval_samples_per_second": 143.962,
132
+ "eval_steps_per_second": 18.077,
133
+ "step": 96
134
+ },
135
+ {
136
+ "epoch": 5.0,
137
+ "grad_norm": 0.14926180243492126,
138
+ "learning_rate": 0.0003,
139
+ "loss": 0.0416,
140
+ "step": 120
141
+ },
142
+ {
143
+ "epoch": 5.0,
144
+ "eval_acc_macro": 0.835711620798801,
145
+ "eval_acc_micro": 0.844599844599779,
146
+ "eval_auc_macro": 0.99367524750647,
147
+ "eval_auc_micro": 0.9950181038357179,
148
+ "eval_f1_at_5": 0.428217417200729,
149
+ "eval_f1_at_8": 0.2925113305987028,
150
+ "eval_f1_macro": 0.9086207273252621,
151
+ "eval_f1_micro": 0.9157540016848428,
152
+ "eval_loss": 0.056059833616018295,
153
+ "eval_prec_at_5": 0.2742009132420091,
154
+ "eval_prec_at_8": 0.17194634703196346,
155
+ "eval_prec_macro": 0.9238859033605576,
156
+ "eval_prec_micro": 0.9306506849314271,
157
+ "eval_rec_at_5": 0.9769786910197868,
158
+ "eval_rec_at_8": 0.9788812785388128,
159
+ "eval_rec_macro": 0.8949097880182088,
160
+ "eval_rec_micro": 0.9013266998340878,
161
+ "eval_runtime": 6.0515,
162
+ "eval_samples_per_second": 144.758,
163
+ "eval_steps_per_second": 18.177,
164
+ "step": 120
165
+ },
166
+ {
167
+ "epoch": 6.0,
168
+ "grad_norm": 0.24564415216445923,
169
+ "learning_rate": 0.0003,
170
+ "loss": 0.0281,
171
+ "step": 144
172
+ },
173
+ {
174
+ "epoch": 6.0,
175
+ "eval_acc_macro": 0.8427241930303886,
176
+ "eval_acc_micro": 0.8486486486485831,
177
+ "eval_auc_macro": 0.9930039223823353,
178
+ "eval_auc_micro": 0.9943083137218841,
179
+ "eval_f1_at_5": 0.42721766176318765,
180
+ "eval_f1_at_8": 0.2925113305987028,
181
+ "eval_f1_macro": 0.9126603684260771,
182
+ "eval_f1_micro": 0.9181286549706835,
183
+ "eval_loss": 0.05612677335739136,
184
+ "eval_prec_at_5": 0.27351598173515984,
185
+ "eval_prec_at_8": 0.17194634703196346,
186
+ "eval_prec_macro": 0.9258898765719902,
187
+ "eval_prec_micro": 0.9250841750840971,
188
+ "eval_rec_at_5": 0.9752663622526635,
189
+ "eval_rec_at_8": 0.9788812785388128,
190
+ "eval_rec_macro": 0.9012770790636472,
191
+ "eval_rec_micro": 0.9112769485903058,
192
+ "eval_runtime": 6.0637,
193
+ "eval_samples_per_second": 144.466,
194
+ "eval_steps_per_second": 18.141,
195
+ "step": 144
196
+ },
197
+ {
198
+ "epoch": 7.0,
199
+ "grad_norm": 0.17717961966991425,
200
+ "learning_rate": 0.0003,
201
+ "loss": 0.0163,
202
+ "step": 168
203
+ },
204
+ {
205
+ "epoch": 7.0,
206
+ "eval_acc_macro": 0.8320704316636002,
207
+ "eval_acc_micro": 0.8395155185464921,
208
+ "eval_auc_macro": 0.9927596537595381,
209
+ "eval_auc_micro": 0.9942892457976611,
210
+ "eval_f1_at_5": 0.4287923569488756,
211
+ "eval_f1_at_8": 0.29274329072668936,
212
+ "eval_f1_macro": 0.9054968756763103,
213
+ "eval_f1_micro": 0.9127572016460155,
214
+ "eval_loss": 0.07535412162542343,
215
+ "eval_prec_at_5": 0.27465753424657535,
216
+ "eval_prec_at_8": 0.1720890410958904,
217
+ "eval_prec_macro": 0.895837475867586,
218
+ "eval_prec_micro": 0.9060457516339129,
219
+ "eval_rec_at_5": 0.9771689497716894,
220
+ "eval_rec_at_8": 0.9794520547945206,
221
+ "eval_rec_macro": 0.9167258374697121,
222
+ "eval_rec_micro": 0.9195688225538209,
223
+ "eval_runtime": 6.0576,
224
+ "eval_samples_per_second": 144.612,
225
+ "eval_steps_per_second": 18.159,
226
+ "step": 168
227
+ },
228
+ {
229
+ "epoch": 8.0,
230
+ "grad_norm": 0.27908530831336975,
231
+ "learning_rate": 0.0003,
232
+ "loss": 0.0109,
233
+ "step": 192
234
+ },
235
+ {
236
+ "epoch": 8.0,
237
+ "eval_acc_macro": 0.8295268992370044,
238
+ "eval_acc_micro": 0.832209737827653,
239
+ "eval_auc_macro": 0.9930334009743317,
240
+ "eval_auc_micro": 0.9938628905761329,
241
+ "eval_f1_at_5": 0.42918048633589306,
242
+ "eval_f1_at_8": 0.29274329072668936,
243
+ "eval_f1_macro": 0.9041576767954438,
244
+ "eval_f1_micro": 0.908421913327808,
245
+ "eval_loss": 0.07922064512968063,
246
+ "eval_prec_at_5": 0.27488584474885847,
247
+ "eval_prec_at_8": 0.1720890410958904,
248
+ "eval_prec_macro": 0.8907484139672496,
249
+ "eval_prec_micro": 0.8959677419354116,
250
+ "eval_rec_at_5": 0.978310502283105,
251
+ "eval_rec_at_8": 0.9794520547945206,
252
+ "eval_rec_macro": 0.9213085265888775,
253
+ "eval_rec_micro": 0.9212271973465239,
254
+ "eval_runtime": 6.0722,
255
+ "eval_samples_per_second": 144.265,
256
+ "eval_steps_per_second": 18.115,
257
+ "step": 192
258
+ },
259
+ {
260
+ "epoch": 9.0,
261
+ "grad_norm": 0.14036260545253754,
262
+ "learning_rate": 0.0003,
263
+ "loss": 0.0074,
264
+ "step": 216
265
+ },
266
+ {
267
+ "epoch": 9.0,
268
+ "eval_acc_macro": 0.8385687066413228,
269
+ "eval_acc_micro": 0.844961240310012,
270
+ "eval_auc_macro": 0.9927809510576332,
271
+ "eval_auc_micro": 0.9942359625327764,
272
+ "eval_f1_at_5": 0.4295136788854873,
273
+ "eval_f1_at_8": 0.29274329072668936,
274
+ "eval_f1_macro": 0.9094505520360644,
275
+ "eval_f1_micro": 0.915966386554545,
276
+ "eval_loss": 0.08278516680002213,
277
+ "eval_prec_at_5": 0.2751141552511416,
278
+ "eval_prec_at_8": 0.1720890410958904,
279
+ "eval_prec_macro": 0.9210993789406117,
280
+ "eval_prec_micro": 0.928449744463294,
281
+ "eval_rec_at_5": 0.9788812785388128,
282
+ "eval_rec_at_8": 0.9794520547945206,
283
+ "eval_rec_macro": 0.8989352557922053,
284
+ "eval_rec_micro": 0.9038142620231423,
285
+ "eval_runtime": 6.0864,
286
+ "eval_samples_per_second": 143.928,
287
+ "eval_steps_per_second": 18.073,
288
+ "step": 216
289
+ },
290
+ {
291
+ "epoch": 10.0,
292
+ "grad_norm": 0.192245751619339,
293
+ "learning_rate": 0.0003,
294
+ "loss": 0.0066,
295
+ "step": 240
296
+ },
297
+ {
298
+ "epoch": 10.0,
299
+ "eval_acc_macro": 0.841830141960627,
300
+ "eval_acc_micro": 0.8498475609755449,
301
+ "eval_auc_macro": 0.9923313106695196,
302
+ "eval_auc_micro": 0.9933376316355039,
303
+ "eval_f1_at_5": 0.4275509226486327,
304
+ "eval_f1_at_8": 0.2922793659426448,
305
+ "eval_f1_macro": 0.9117612718437879,
306
+ "eval_f1_micro": 0.9188298310670853,
307
+ "eval_loss": 0.0772981271147728,
308
+ "eval_prec_at_5": 0.27374429223744295,
309
+ "eval_prec_at_8": 0.17180365296803654,
310
+ "eval_prec_macro": 0.9114916513520281,
311
+ "eval_prec_micro": 0.9131859131858384,
312
+ "eval_rec_at_5": 0.9758371385083713,
313
+ "eval_rec_at_8": 0.978310502283105,
314
+ "eval_rec_macro": 0.9136710504527534,
315
+ "eval_rec_micro": 0.92454394693193,
316
+ "eval_runtime": 5.9768,
317
+ "eval_samples_per_second": 146.566,
318
+ "eval_steps_per_second": 18.404,
319
+ "step": 240
320
+ },
321
+ {
322
+ "epoch": 11.0,
323
+ "grad_norm": 0.11837983131408691,
324
+ "learning_rate": 0.0003,
325
+ "loss": 0.0057,
326
+ "step": 264
327
+ },
328
+ {
329
+ "epoch": 11.0,
330
+ "eval_acc_macro": 0.8320358706720068,
331
+ "eval_acc_micro": 0.836842105263095,
332
+ "eval_auc_macro": 0.990286728874436,
333
+ "eval_auc_micro": 0.9913156297458193,
334
+ "eval_f1_at_5": 0.42747783226694747,
335
+ "eval_f1_at_8": 0.2920219218917755,
336
+ "eval_f1_macro": 0.9058723874403682,
337
+ "eval_f1_micro": 0.911174785100212,
338
+ "eval_loss": 0.10789646208286285,
339
+ "eval_prec_at_5": 0.27374429223744295,
340
+ "eval_prec_at_8": 0.1716609589041096,
341
+ "eval_prec_macro": 0.8914304927691732,
342
+ "eval_prec_micro": 0.8997574777687227,
343
+ "eval_rec_at_5": 0.975076103500761,
344
+ "eval_rec_at_8": 0.9771689497716894,
345
+ "eval_rec_macro": 0.9231550617780325,
346
+ "eval_rec_micro": 0.922885572139227,
347
+ "eval_runtime": 6.0106,
348
+ "eval_samples_per_second": 145.742,
349
+ "eval_steps_per_second": 18.301,
350
+ "step": 264
351
+ },
352
+ {
353
+ "epoch": 12.0,
354
+ "grad_norm": 0.16925422847270966,
355
+ "learning_rate": 0.0003,
356
+ "loss": 0.005,
357
+ "step": 288
358
+ },
359
+ {
360
+ "epoch": 12.0,
361
+ "eval_acc_macro": 0.8364454962531706,
362
+ "eval_acc_micro": 0.8397581254723477,
363
+ "eval_auc_macro": 0.9908748071505288,
364
+ "eval_auc_micro": 0.9917851996554229,
365
+ "eval_f1_at_5": 0.42708974483795964,
366
+ "eval_f1_at_8": 0.2925113305987028,
367
+ "eval_f1_macro": 0.9084368033771916,
368
+ "eval_f1_micro": 0.9129005751848058,
369
+ "eval_loss": 0.09441287070512772,
370
+ "eval_prec_at_5": 0.27351598173515984,
371
+ "eval_prec_at_8": 0.17194634703196346,
372
+ "eval_prec_macro": 0.8995515858307809,
373
+ "eval_prec_micro": 0.9047231270357569,
374
+ "eval_rec_at_5": 0.9739345509893454,
375
+ "eval_rec_at_8": 0.9788812785388128,
376
+ "eval_rec_macro": 0.9188760380015673,
377
+ "eval_rec_micro": 0.9212271973465239,
378
+ "eval_runtime": 6.0148,
379
+ "eval_samples_per_second": 145.64,
380
+ "eval_steps_per_second": 18.288,
381
+ "step": 288
382
+ },
383
+ {
384
+ "epoch": 13.0,
385
+ "grad_norm": 0.25462788343429565,
386
+ "learning_rate": 0.0003,
387
+ "loss": 0.0045,
388
+ "step": 312
389
+ },
390
+ {
391
+ "epoch": 13.0,
392
+ "eval_acc_macro": 0.8219219437469505,
393
+ "eval_acc_micro": 0.8327165062915741,
394
+ "eval_auc_macro": 0.9892036337462419,
395
+ "eval_auc_micro": 0.9914230873000853,
396
+ "eval_f1_at_5": 0.42851407428878907,
397
+ "eval_f1_at_8": 0.2918154230125642,
398
+ "eval_f1_macro": 0.8990693006867976,
399
+ "eval_f1_micro": 0.9087237479805405,
400
+ "eval_loss": 0.11245805770158768,
401
+ "eval_prec_at_5": 0.2744292237442923,
402
+ "eval_prec_at_8": 0.17151826484018265,
403
+ "eval_prec_macro": 0.872997498581981,
404
+ "eval_prec_micro": 0.8858267716534736,
405
+ "eval_rec_at_5": 0.9771689497716894,
406
+ "eval_rec_at_8": 0.9771689497716894,
407
+ "eval_rec_macro": 0.9280030786669166,
408
+ "eval_rec_micro": 0.932835820895445,
409
+ "eval_runtime": 6.2398,
410
+ "eval_samples_per_second": 140.389,
411
+ "eval_steps_per_second": 17.629,
412
+ "step": 312
413
+ },
414
+ {
415
+ "epoch": 14.0,
416
+ "grad_norm": 0.13963262736797333,
417
+ "learning_rate": 0.0003,
418
+ "loss": 0.0029,
419
+ "step": 336
420
+ },
421
+ {
422
+ "epoch": 14.0,
423
+ "eval_acc_macro": 0.8398618464555664,
424
+ "eval_acc_micro": 0.8437499999999356,
425
+ "eval_auc_macro": 0.9909268418920506,
426
+ "eval_auc_micro": 0.9918402649693007,
427
+ "eval_f1_at_5": 0.42740465274793293,
428
+ "eval_f1_at_8": 0.2920473967500829,
429
+ "eval_f1_macro": 0.910421950810437,
430
+ "eval_f1_micro": 0.9152542372880599,
431
+ "eval_loss": 0.13447707891464233,
432
+ "eval_prec_at_5": 0.27374429223744295,
433
+ "eval_prec_at_8": 0.1716609589041096,
434
+ "eval_prec_macro": 0.907525335697792,
435
+ "eval_prec_micro": 0.9126133553173196,
436
+ "eval_rec_at_5": 0.9743150684931506,
437
+ "eval_rec_at_8": 0.9777397260273972,
438
+ "eval_rec_macro": 0.9137157897519602,
439
+ "eval_rec_micro": 0.9179104477611179,
440
+ "eval_runtime": 6.2625,
441
+ "eval_samples_per_second": 139.881,
442
+ "eval_steps_per_second": 17.565,
443
+ "step": 336
444
+ },
445
+ {
446
+ "epoch": 15.0,
447
+ "grad_norm": 0.1901983767747879,
448
+ "learning_rate": 0.0003,
449
+ "loss": 0.0032,
450
+ "step": 360
451
+ },
452
+ {
453
+ "epoch": 15.0,
454
+ "eval_acc_macro": 0.8357224650035576,
455
+ "eval_acc_micro": 0.8455098934550346,
456
+ "eval_auc_macro": 0.9892990457525107,
457
+ "eval_auc_micro": 0.990561377509615,
458
+ "eval_f1_at_5": 0.4254418080308462,
459
+ "eval_f1_at_8": 0.2922793659426448,
460
+ "eval_f1_macro": 0.9080581868434588,
461
+ "eval_f1_micro": 0.9162886597937389,
462
+ "eval_loss": 0.10820472985506058,
463
+ "eval_prec_at_5": 0.27237442922374433,
464
+ "eval_prec_at_8": 0.17180365296803654,
465
+ "eval_prec_macro": 0.9057447684648601,
466
+ "eval_prec_micro": 0.9114027891713772,
467
+ "eval_rec_at_5": 0.9712709284627092,
468
+ "eval_rec_at_8": 0.978310502283105,
469
+ "eval_rec_macro": 0.9111621041819294,
470
+ "eval_rec_micro": 0.9212271973465239,
471
+ "eval_runtime": 6.3038,
472
+ "eval_samples_per_second": 138.963,
473
+ "eval_steps_per_second": 17.45,
474
+ "step": 360
475
+ },
476
+ {
477
+ "epoch": 16.0,
478
+ "grad_norm": 0.17108500003814697,
479
+ "learning_rate": 0.0003,
480
+ "loss": 0.0027,
481
+ "step": 384
482
+ },
483
+ {
484
+ "epoch": 16.0,
485
+ "eval_acc_macro": 0.8297448303381417,
486
+ "eval_acc_micro": 0.8398169336383798,
487
+ "eval_auc_macro": 0.9902632756647023,
488
+ "eval_auc_micro": 0.9922601157120051,
489
+ "eval_f1_at_5": 0.4277927620668724,
490
+ "eval_f1_at_8": 0.2922793659426448,
491
+ "eval_f1_macro": 0.9041211387604378,
492
+ "eval_f1_micro": 0.912935323383009,
493
+ "eval_loss": 0.12706510722637177,
494
+ "eval_prec_at_5": 0.273972602739726,
495
+ "eval_prec_at_8": 0.17180365296803654,
496
+ "eval_prec_macro": 0.9060353096841967,
497
+ "eval_prec_micro": 0.9129353233830089,
498
+ "eval_rec_at_5": 0.9754566210045662,
499
+ "eval_rec_at_8": 0.978310502283105,
500
+ "eval_rec_macro": 0.9032650308288451,
501
+ "eval_rec_micro": 0.9129353233830089,
502
+ "eval_runtime": 6.7247,
503
+ "eval_samples_per_second": 130.266,
504
+ "eval_steps_per_second": 16.358,
505
+ "step": 384
506
+ },
507
+ {
508
+ "epoch": 17.0,
509
+ "grad_norm": 0.06544364243745804,
510
+ "learning_rate": 0.0003,
511
+ "loss": 0.0029,
512
+ "step": 408
513
+ },
514
+ {
515
+ "epoch": 17.0,
516
+ "eval_acc_macro": 0.829765891496947,
517
+ "eval_acc_micro": 0.8365384615383996,
518
+ "eval_auc_macro": 0.9893010896141785,
519
+ "eval_auc_micro": 0.9914586391774918,
520
+ "eval_f1_at_5": 0.42714459980714947,
521
+ "eval_f1_at_8": 0.2922793659426448,
522
+ "eval_f1_macro": 0.9037391479019817,
523
+ "eval_f1_micro": 0.9109947643978323,
524
+ "eval_loss": 0.10541332513093948,
525
+ "eval_prec_at_5": 0.27351598173515984,
526
+ "eval_prec_at_8": 0.17180365296803654,
527
+ "eval_prec_macro": 0.8801020034653725,
528
+ "eval_prec_micro": 0.8856695379795704,
529
+ "eval_rec_at_5": 0.9745053272450532,
530
+ "eval_rec_at_8": 0.978310502283105,
531
+ "eval_rec_macro": 0.9310175234124923,
532
+ "eval_rec_micro": 0.937810945273554,
533
+ "eval_runtime": 6.3322,
534
+ "eval_samples_per_second": 138.34,
535
+ "eval_steps_per_second": 17.371,
536
+ "step": 408
537
+ },
538
+ {
539
+ "epoch": 18.0,
540
+ "grad_norm": 0.12380898743867874,
541
+ "learning_rate": 0.0003,
542
+ "loss": 0.0028,
543
+ "step": 432
544
+ },
545
+ {
546
+ "epoch": 18.0,
547
+ "eval_acc_macro": 0.8295264706506825,
548
+ "eval_acc_micro": 0.8390718562873624,
549
+ "eval_auc_macro": 0.9886105199219366,
550
+ "eval_auc_micro": 0.9908788495376829,
551
+ "eval_f1_at_5": 0.42679308597884263,
552
+ "eval_f1_at_8": 0.2918154230125642,
553
+ "eval_f1_macro": 0.9042231261610075,
554
+ "eval_f1_micro": 0.9124949124948383,
555
+ "eval_loss": 0.11824628710746765,
556
+ "eval_prec_at_5": 0.2732876712328767,
557
+ "eval_prec_at_8": 0.17151826484018265,
558
+ "eval_prec_macro": 0.8888519630940449,
559
+ "eval_prec_micro": 0.8960831334931337,
560
+ "eval_rec_at_5": 0.973744292237443,
561
+ "eval_rec_at_8": 0.9771689497716894,
562
+ "eval_rec_macro": 0.9215852252593782,
563
+ "eval_rec_micro": 0.929519071310039,
564
+ "eval_runtime": 6.3155,
565
+ "eval_samples_per_second": 138.707,
566
+ "eval_steps_per_second": 17.418,
567
+ "step": 432
568
+ },
569
+ {
570
+ "epoch": 19.0,
571
+ "grad_norm": 0.21812734007835388,
572
+ "learning_rate": 0.0003,
573
+ "loss": 0.0024,
574
+ "step": 456
575
+ },
576
+ {
577
+ "epoch": 19.0,
578
+ "eval_acc_macro": 0.829661993373597,
579
+ "eval_acc_micro": 0.8388554216866838,
580
+ "eval_auc_macro": 0.9894816577253542,
581
+ "eval_auc_micro": 0.991705185655646,
582
+ "eval_f1_at_5": 0.42714459980714947,
583
+ "eval_f1_at_8": 0.2922793659426448,
584
+ "eval_f1_macro": 0.9036099130579357,
585
+ "eval_f1_micro": 0.9123669123668376,
586
+ "eval_loss": 0.1079055592417717,
587
+ "eval_prec_at_5": 0.27351598173515984,
588
+ "eval_prec_at_8": 0.17180365296803654,
589
+ "eval_prec_macro": 0.8959752170714762,
590
+ "eval_prec_micro": 0.9012944983818041,
591
+ "eval_rec_at_5": 0.9745053272450532,
592
+ "eval_rec_at_8": 0.978310502283105,
593
+ "eval_rec_macro": 0.9119091133829933,
594
+ "eval_rec_micro": 0.9237147595355785,
595
+ "eval_runtime": 6.2636,
596
+ "eval_samples_per_second": 139.855,
597
+ "eval_steps_per_second": 17.562,
598
+ "step": 456
599
+ },
600
+ {
601
+ "epoch": 20.0,
602
+ "grad_norm": 0.14500826597213745,
603
+ "learning_rate": 0.0003,
604
+ "loss": 0.0026,
605
+ "step": 480
606
+ },
607
+ {
608
+ "epoch": 20.0,
609
+ "eval_acc_macro": 0.8388928681043534,
610
+ "eval_acc_micro": 0.8470764617690519,
611
+ "eval_auc_macro": 0.9914346196687225,
612
+ "eval_auc_micro": 0.9929207212736375,
613
+ "eval_f1_at_5": 0.42845916764894976,
614
+ "eval_f1_at_8": 0.2925113305987028,
615
+ "eval_f1_macro": 0.9099987722528428,
616
+ "eval_f1_micro": 0.9172077922077178,
617
+ "eval_loss": 0.12154436111450195,
618
+ "eval_prec_at_5": 0.2744292237442923,
619
+ "eval_prec_at_8": 0.17194634703196346,
620
+ "eval_prec_macro": 0.8884541003489376,
621
+ "eval_prec_micro": 0.898251192368768,
622
+ "eval_rec_at_5": 0.9765981735159818,
623
+ "eval_rec_at_8": 0.9788812785388128,
624
+ "eval_rec_macro": 0.9349802408736304,
625
+ "eval_rec_micro": 0.9369817578772025,
626
+ "eval_runtime": 6.1697,
627
+ "eval_samples_per_second": 141.984,
628
+ "eval_steps_per_second": 17.829,
629
+ "step": 480
630
+ },
631
+ {
632
+ "epoch": 21.0,
633
+ "grad_norm": 0.12059065699577332,
634
+ "learning_rate": 0.0003,
635
+ "loss": 0.002,
636
+ "step": 504
637
+ },
638
+ {
639
+ "epoch": 21.0,
640
+ "eval_acc_macro": 0.840415915936909,
641
+ "eval_acc_micro": 0.8496978851963104,
642
+ "eval_auc_macro": 0.9902023505450398,
643
+ "eval_auc_micro": 0.9918164746152466,
644
+ "eval_f1_at_5": 0.42708974483795964,
645
+ "eval_f1_at_8": 0.2922793659426448,
646
+ "eval_f1_macro": 0.9110529776466078,
647
+ "eval_f1_micro": 0.9187423438137264,
648
+ "eval_loss": 0.1368735283613205,
649
+ "eval_prec_at_5": 0.27351598173515984,
650
+ "eval_prec_at_8": 0.17180365296803654,
651
+ "eval_prec_macro": 0.8969397087072427,
652
+ "eval_prec_micro": 0.9050683829444163,
653
+ "eval_rec_at_5": 0.9739345509893454,
654
+ "eval_rec_at_8": 0.978310502283105,
655
+ "eval_rec_macro": 0.9266197213944322,
656
+ "eval_rec_micro": 0.932835820895445,
657
+ "eval_runtime": 6.2964,
658
+ "eval_samples_per_second": 139.126,
659
+ "eval_steps_per_second": 17.47,
660
+ "step": 504
661
+ },
662
+ {
663
+ "epoch": 22.0,
664
+ "grad_norm": 0.1459818333387375,
665
+ "learning_rate": 0.0003,
666
+ "loss": 0.0022,
667
+ "step": 528
668
+ },
669
+ {
670
+ "epoch": 22.0,
671
+ "eval_acc_macro": 0.8299074532155761,
672
+ "eval_acc_micro": 0.8387573964496421,
673
+ "eval_auc_macro": 0.9912484493432677,
674
+ "eval_auc_micro": 0.9923215964022568,
675
+ "eval_f1_at_5": 0.42714459980714947,
676
+ "eval_f1_at_8": 0.29205588274802374,
677
+ "eval_f1_macro": 0.9036433853106406,
678
+ "eval_f1_micro": 0.9123089300079716,
679
+ "eval_loss": 0.12988413870334625,
680
+ "eval_prec_at_5": 0.27351598173515984,
681
+ "eval_prec_at_8": 0.1716609589041096,
682
+ "eval_prec_macro": 0.8778252010823914,
683
+ "eval_prec_micro": 0.8859374999999308,
684
+ "eval_rec_at_5": 0.9745053272450532,
685
+ "eval_rec_at_8": 0.9779299847792998,
686
+ "eval_rec_macro": 0.9334072472404634,
687
+ "eval_rec_micro": 0.9402985074626086,
688
+ "eval_runtime": 6.2795,
689
+ "eval_samples_per_second": 139.501,
690
+ "eval_steps_per_second": 17.517,
691
+ "step": 528
692
+ },
693
+ {
694
+ "epoch": 23.0,
695
+ "grad_norm": 0.23651210963726044,
696
+ "learning_rate": 0.0003,
697
+ "loss": 0.0023,
698
+ "step": 552
699
+ },
700
+ {
701
+ "epoch": 23.0,
702
+ "eval_acc_macro": 0.8331982367002823,
703
+ "eval_acc_micro": 0.8412213740457373,
704
+ "eval_auc_macro": 0.9907246610096498,
705
+ "eval_auc_micro": 0.9922808765827714,
706
+ "eval_f1_at_5": 0.42686614175859805,
707
+ "eval_f1_at_8": 0.2922793659426448,
708
+ "eval_f1_macro": 0.9057038011597318,
709
+ "eval_f1_micro": 0.9137645107793605,
710
+ "eval_loss": 0.1514243185520172,
711
+ "eval_prec_at_5": 0.2732876712328767,
712
+ "eval_prec_at_8": 0.17180365296803654,
713
+ "eval_prec_macro": 0.9057572791903493,
714
+ "eval_prec_micro": 0.9137645107793604,
715
+ "eval_rec_at_5": 0.9745053272450532,
716
+ "eval_rec_at_8": 0.978310502283105,
717
+ "eval_rec_macro": 0.9065147380552868,
718
+ "eval_rec_micro": 0.9137645107793604,
719
+ "eval_runtime": 6.3102,
720
+ "eval_samples_per_second": 138.822,
721
+ "eval_steps_per_second": 17.432,
722
+ "step": 552
723
+ },
724
+ {
725
+ "epoch": 24.0,
726
+ "grad_norm": 0.09079229086637497,
727
+ "learning_rate": 0.0003,
728
+ "loss": 0.0026,
729
+ "step": 576
730
+ },
731
+ {
732
+ "epoch": 24.0,
733
+ "eval_acc_macro": 0.8361929136599193,
734
+ "eval_acc_micro": 0.8474446987032153,
735
+ "eval_auc_macro": 0.990970820997881,
736
+ "eval_auc_micro": 0.9927761079978714,
737
+ "eval_f1_at_5": 0.4275326584009282,
738
+ "eval_f1_at_8": 0.2922793659426448,
739
+ "eval_f1_macro": 0.9082187775109158,
740
+ "eval_f1_micro": 0.9174236168455063,
741
+ "eval_loss": 0.1161712110042572,
742
+ "eval_prec_at_5": 0.27374429223744295,
743
+ "eval_prec_at_8": 0.17180365296803654,
744
+ "eval_prec_macro": 0.9024833914958587,
745
+ "eval_prec_micro": 0.9136513157893985,
746
+ "eval_rec_at_5": 0.9756468797564688,
747
+ "eval_rec_at_8": 0.978310502283105,
748
+ "eval_rec_macro": 0.9147050110934346,
749
+ "eval_rec_micro": 0.9212271973465239,
750
+ "eval_runtime": 6.2478,
751
+ "eval_samples_per_second": 140.209,
752
+ "eval_steps_per_second": 17.606,
753
+ "step": 576
754
+ },
755
+ {
756
+ "epoch": 25.0,
757
+ "grad_norm": 0.1555357277393341,
758
+ "learning_rate": 0.0003,
759
+ "loss": 0.0011,
760
+ "step": 600
761
+ },
762
+ {
763
+ "epoch": 25.0,
764
+ "eval_acc_macro": 0.8261213062726463,
765
+ "eval_acc_micro": 0.8319763138415373,
766
+ "eval_auc_macro": 0.990040119063344,
767
+ "eval_auc_micro": 0.9911265543476828,
768
+ "eval_f1_at_5": 0.4265511126863068,
769
+ "eval_f1_at_8": 0.2920473967500829,
770
+ "eval_f1_macro": 0.9014179315917096,
771
+ "eval_f1_micro": 0.9082828282827549,
772
+ "eval_loss": 0.17262162268161774,
773
+ "eval_prec_at_5": 0.2730593607305936,
774
+ "eval_prec_at_8": 0.1716609589041096,
775
+ "eval_prec_macro": 0.8792666911124373,
776
+ "eval_prec_micro": 0.8857368006303479,
777
+ "eval_rec_at_5": 0.974124809741248,
778
+ "eval_rec_at_8": 0.9777397260273972,
779
+ "eval_rec_macro": 0.9306207402078307,
780
+ "eval_rec_micro": 0.9320066334990935,
781
+ "eval_runtime": 6.1427,
782
+ "eval_samples_per_second": 142.608,
783
+ "eval_steps_per_second": 17.907,
784
+ "step": 600
785
+ },
786
+ {
787
+ "epoch": 26.0,
788
+ "grad_norm": 0.054534025490283966,
789
+ "learning_rate": 0.0003,
790
+ "loss": 0.0016,
791
+ "step": 624
792
+ },
793
+ {
794
+ "epoch": 26.0,
795
+ "eval_acc_macro": 0.826194090509476,
796
+ "eval_acc_micro": 0.8362004487658312,
797
+ "eval_auc_macro": 0.9894373225891121,
798
+ "eval_auc_micro": 0.9909356077981039,
799
+ "eval_f1_at_5": 0.42686614175859805,
800
+ "eval_f1_at_8": 0.2918154230125642,
801
+ "eval_f1_macro": 0.9018300611095432,
802
+ "eval_f1_micro": 0.910794297352268,
803
+ "eval_loss": 0.1341952532529831,
804
+ "eval_prec_at_5": 0.2732876712328767,
805
+ "eval_prec_at_8": 0.17151826484018265,
806
+ "eval_prec_macro": 0.8863083621122576,
807
+ "eval_prec_micro": 0.8951160928742278,
808
+ "eval_rec_at_5": 0.9745053272450532,
809
+ "eval_rec_at_8": 0.9771689497716894,
810
+ "eval_rec_macro": 0.9193115078475559,
811
+ "eval_rec_micro": 0.9270315091209844,
812
+ "eval_runtime": 6.2441,
813
+ "eval_samples_per_second": 140.292,
814
+ "eval_steps_per_second": 17.617,
815
+ "step": 624
816
+ },
817
+ {
818
+ "epoch": 27.0,
819
+ "grad_norm": 0.18318401277065277,
820
+ "learning_rate": 0.0003,
821
+ "loss": 0.0017,
822
+ "step": 648
823
+ },
824
+ {
825
+ "epoch": 27.0,
826
+ "eval_acc_macro": 0.8331625409086375,
827
+ "eval_acc_micro": 0.844547563805039,
828
+ "eval_auc_macro": 0.9903700036429086,
829
+ "eval_auc_micro": 0.9911557799511647,
830
+ "eval_f1_at_5": 0.4260900570497148,
831
+ "eval_f1_at_8": 0.2915919235644548,
832
+ "eval_f1_macro": 0.9059994264183651,
833
+ "eval_f1_micro": 0.9157232704401748,
834
+ "eval_loss": 0.16900603473186493,
835
+ "eval_prec_at_5": 0.27283105022831056,
836
+ "eval_prec_at_8": 0.1713755707762557,
837
+ "eval_prec_macro": 0.9152116568385661,
838
+ "eval_prec_micro": 0.9262086513994126,
839
+ "eval_rec_at_5": 0.9722222222222222,
840
+ "eval_rec_at_8": 0.9767884322678843,
841
+ "eval_rec_macro": 0.8973109240252097,
842
+ "eval_rec_micro": 0.9054726368158453,
843
+ "eval_runtime": 6.3369,
844
+ "eval_samples_per_second": 138.239,
845
+ "eval_steps_per_second": 17.359,
846
+ "step": 648
847
+ },
848
+ {
849
+ "epoch": 28.0,
850
+ "grad_norm": 0.09100370109081268,
851
+ "learning_rate": 0.0003,
852
+ "loss": 0.002,
853
+ "step": 672
854
+ },
855
+ {
856
+ "epoch": 28.0,
857
+ "eval_acc_macro": 0.834589729212194,
858
+ "eval_acc_micro": 0.8436781609194756,
859
+ "eval_auc_macro": 0.9901591404278554,
860
+ "eval_auc_micro": 0.9904570385410867,
861
+ "eval_f1_at_5": 0.4261448479661988,
862
+ "eval_f1_at_8": 0.29135993712848907,
863
+ "eval_f1_macro": 0.906536458365489,
864
+ "eval_f1_micro": 0.9152119700747368,
865
+ "eval_loss": 0.12356158345937729,
866
+ "eval_prec_at_5": 0.27283105022831056,
867
+ "eval_prec_at_8": 0.17123287671232876,
868
+ "eval_prec_macro": 0.9101814375182885,
869
+ "eval_prec_micro": 0.9174999999999235,
870
+ "eval_rec_at_5": 0.97279299847793,
871
+ "eval_rec_at_8": 0.9762176560121765,
872
+ "eval_rec_macro": 0.9036660250083727,
873
+ "eval_rec_micro": 0.9129353233830089,
874
+ "eval_runtime": 6.2292,
875
+ "eval_samples_per_second": 140.629,
876
+ "eval_steps_per_second": 17.659,
877
+ "step": 672
878
+ },
879
+ {
880
+ "epoch": 29.0,
881
+ "grad_norm": 0.031184401363134384,
882
+ "learning_rate": 0.0003,
883
+ "loss": 0.0013,
884
+ "step": 696
885
+ },
886
+ {
887
+ "epoch": 29.0,
888
+ "eval_acc_macro": 0.8341647633718688,
889
+ "eval_acc_micro": 0.8435321456234822,
890
+ "eval_auc_macro": 0.9909608628516886,
891
+ "eval_auc_micro": 0.9916707030076353,
892
+ "eval_f1_at_5": 0.42647810767423283,
893
+ "eval_f1_at_8": 0.2915834447216144,
894
+ "eval_f1_macro": 0.9066166365653049,
895
+ "eval_f1_micro": 0.9151260504200911,
896
+ "eval_loss": 0.1419263482093811,
897
+ "eval_prec_at_5": 0.27305936073059367,
898
+ "eval_prec_at_8": 0.1713755707762557,
899
+ "eval_prec_macro": 0.9206967590925624,
900
+ "eval_prec_micro": 0.9275979557069056,
901
+ "eval_rec_at_5": 0.9733637747336377,
902
+ "eval_rec_at_8": 0.9765981735159818,
903
+ "eval_rec_macro": 0.8938908104868867,
904
+ "eval_rec_micro": 0.9029850746267908,
905
+ "eval_runtime": 6.2406,
906
+ "eval_samples_per_second": 140.372,
907
+ "eval_steps_per_second": 17.627,
908
+ "step": 696
909
+ },
910
+ {
911
+ "epoch": 30.0,
912
+ "grad_norm": 0.12148793786764145,
913
+ "learning_rate": 0.0003,
914
+ "loss": 0.001,
915
+ "step": 720
916
+ },
917
+ {
918
+ "epoch": 30.0,
919
+ "eval_acc_macro": 0.8301734612803756,
920
+ "eval_acc_micro": 0.8415007656967196,
921
+ "eval_auc_macro": 0.9904576570433402,
922
+ "eval_auc_micro": 0.9915527313643118,
923
+ "eval_f1_at_5": 0.42708974483795964,
924
+ "eval_f1_at_8": 0.2918154230125642,
925
+ "eval_f1_macro": 0.9043513567183213,
926
+ "eval_f1_micro": 0.9139293139292379,
927
+ "eval_loss": 0.1600634753704071,
928
+ "eval_prec_at_5": 0.27351598173515984,
929
+ "eval_prec_at_8": 0.17151826484018265,
930
+ "eval_prec_macro": 0.9050210447593683,
931
+ "eval_prec_micro": 0.9165971643035098,
932
+ "eval_rec_at_5": 0.9739345509893454,
933
+ "eval_rec_at_8": 0.9771689497716894,
934
+ "eval_rec_macro": 0.9048079473209437,
935
+ "eval_rec_micro": 0.9112769485903058,
936
+ "eval_runtime": 6.2514,
937
+ "eval_samples_per_second": 140.128,
938
+ "eval_steps_per_second": 17.596,
939
+ "step": 720
940
+ },
941
+ {
942
+ "epoch": 31.0,
943
+ "grad_norm": 0.11479064077138901,
944
+ "learning_rate": 0.0003,
945
+ "loss": 0.0016,
946
+ "step": 744
947
+ },
948
+ {
949
+ "epoch": 31.0,
950
+ "eval_acc_macro": 0.833818894129361,
951
+ "eval_acc_micro": 0.8425787106446144,
952
+ "eval_auc_macro": 0.9902376130208236,
953
+ "eval_auc_micro": 0.9914569462309486,
954
+ "eval_f1_at_5": 0.4271994046239887,
955
+ "eval_f1_at_8": 0.2920473967500829,
956
+ "eval_f1_macro": 0.9060990360615312,
957
+ "eval_f1_micro": 0.9145646867371103,
958
+ "eval_loss": 0.1426621824502945,
959
+ "eval_prec_at_5": 0.27351598173515984,
960
+ "eval_prec_at_8": 0.1716609589041096,
961
+ "eval_prec_macro": 0.8877458031473394,
962
+ "eval_prec_micro": 0.8977635782746887,
963
+ "eval_rec_at_5": 0.975076103500761,
964
+ "eval_rec_at_8": 0.9777397260273972,
965
+ "eval_rec_macro": 0.9271635027618895,
966
+ "eval_rec_micro": 0.9320066334990935,
967
+ "eval_runtime": 6.1703,
968
+ "eval_samples_per_second": 141.971,
969
+ "eval_steps_per_second": 17.827,
970
+ "step": 744
971
+ },
972
+ {
973
+ "epoch": 32.0,
974
+ "grad_norm": 0.0649285838007927,
975
+ "learning_rate": 0.0003,
976
+ "loss": 0.0011,
977
+ "step": 768
978
+ },
979
+ {
980
+ "epoch": 32.0,
981
+ "eval_acc_macro": 0.830982026700095,
982
+ "eval_acc_micro": 0.8421862971515902,
983
+ "eval_auc_macro": 0.9898071389389859,
984
+ "eval_auc_micro": 0.9906281152443956,
985
+ "eval_f1_at_5": 0.4261448479661988,
986
+ "eval_f1_at_8": 0.2918154230125642,
987
+ "eval_f1_macro": 0.9044437561510542,
988
+ "eval_f1_micro": 0.9143334726284233,
989
+ "eval_loss": 0.15522228181362152,
990
+ "eval_prec_at_5": 0.27283105022831056,
991
+ "eval_prec_at_8": 0.17151826484018265,
992
+ "eval_prec_macro": 0.914549760838821,
993
+ "eval_prec_micro": 0.9216512215668979,
994
+ "eval_rec_at_5": 0.97279299847793,
995
+ "eval_rec_at_8": 0.9771689497716894,
996
+ "eval_rec_macro": 0.89634766919242,
997
+ "eval_rec_micro": 0.9071310116085483,
998
+ "eval_runtime": 6.2856,
999
+ "eval_samples_per_second": 139.366,
1000
+ "eval_steps_per_second": 17.5,
1001
+ "step": 768
1002
+ },
1003
+ {
1004
+ "epoch": 33.0,
1005
+ "grad_norm": 0.10257603228092194,
1006
+ "learning_rate": 0.0003,
1007
+ "loss": 0.001,
1008
+ "step": 792
1009
+ },
1010
+ {
1011
+ "epoch": 33.0,
1012
+ "eval_acc_macro": 0.8321851395971892,
1013
+ "eval_acc_micro": 0.8429878048779845,
1014
+ "eval_auc_macro": 0.9889132300057429,
1015
+ "eval_auc_micro": 0.9893601873860157,
1016
+ "eval_f1_at_5": 0.42647810767423283,
1017
+ "eval_f1_at_8": 0.2915919235644548,
1018
+ "eval_f1_macro": 0.9051919207399707,
1019
+ "eval_f1_micro": 0.9148056244829681,
1020
+ "eval_loss": 0.149822399020195,
1021
+ "eval_prec_at_5": 0.27305936073059367,
1022
+ "eval_prec_at_8": 0.1713755707762557,
1023
+ "eval_prec_macro": 0.9067171865229405,
1024
+ "eval_prec_micro": 0.9125412541253373,
1025
+ "eval_rec_at_5": 0.9733637747336377,
1026
+ "eval_rec_at_8": 0.9767884322678843,
1027
+ "eval_rec_macro": 0.906988014574614,
1028
+ "eval_rec_micro": 0.9170812603647663,
1029
+ "eval_runtime": 6.3392,
1030
+ "eval_samples_per_second": 138.188,
1031
+ "eval_steps_per_second": 17.352,
1032
+ "step": 792
1033
+ },
1034
+ {
1035
+ "epoch": 34.0,
1036
+ "grad_norm": 0.06269343197345734,
1037
+ "learning_rate": 0.0003,
1038
+ "loss": 0.0012,
1039
+ "step": 816
1040
+ },
1041
+ {
1042
+ "epoch": 34.0,
1043
+ "eval_acc_macro": 0.8356570074540155,
1044
+ "eval_acc_micro": 0.8457446808509995,
1045
+ "eval_auc_macro": 0.9885679122519199,
1046
+ "eval_auc_micro": 0.9892555811101381,
1047
+ "eval_f1_at_5": 0.42581157914042916,
1048
+ "eval_f1_at_8": 0.29135993712848907,
1049
+ "eval_f1_macro": 0.907980034255221,
1050
+ "eval_f1_micro": 0.9164265129682243,
1051
+ "eval_loss": 0.18450404703617096,
1052
+ "eval_prec_at_5": 0.2726027397260274,
1053
+ "eval_prec_at_8": 0.17123287671232876,
1054
+ "eval_prec_macro": 0.9065894385720981,
1055
+ "eval_prec_micro": 0.9100572363040956,
1056
+ "eval_rec_at_5": 0.9722222222222222,
1057
+ "eval_rec_at_8": 0.9762176560121765,
1058
+ "eval_rec_macro": 0.9109305984894259,
1059
+ "eval_rec_micro": 0.922885572139227,
1060
+ "eval_runtime": 6.2222,
1061
+ "eval_samples_per_second": 140.786,
1062
+ "eval_steps_per_second": 17.679,
1063
+ "step": 816
1064
+ },
1065
+ {
1066
+ "epoch": 35.0,
1067
+ "grad_norm": 0.060569193214178085,
1068
+ "learning_rate": 0.0003,
1069
+ "loss": 0.0015,
1070
+ "step": 840
1071
+ },
1072
+ {
1073
+ "epoch": 35.0,
1074
+ "eval_acc_macro": 0.8271139319864672,
1075
+ "eval_acc_micro": 0.8386855862583391,
1076
+ "eval_auc_macro": 0.9891685604004818,
1077
+ "eval_auc_micro": 0.9908738152492784,
1078
+ "eval_f1_at_5": 0.4271994046239887,
1079
+ "eval_f1_at_8": 0.29205588274802374,
1080
+ "eval_f1_macro": 0.9021260521405651,
1081
+ "eval_f1_micro": 0.9122664500405433,
1082
+ "eval_loss": 0.13544484972953796,
1083
+ "eval_prec_at_5": 0.27351598173515984,
1084
+ "eval_prec_at_8": 0.1716609589041096,
1085
+ "eval_prec_macro": 0.8873506319173318,
1086
+ "eval_prec_micro": 0.8941082802547059,
1087
+ "eval_rec_at_5": 0.975076103500761,
1088
+ "eval_rec_at_8": 0.9779299847792998,
1089
+ "eval_rec_macro": 0.9204235623584355,
1090
+ "eval_rec_micro": 0.931177446102742,
1091
+ "eval_runtime": 6.3105,
1092
+ "eval_samples_per_second": 138.815,
1093
+ "eval_steps_per_second": 17.431,
1094
+ "step": 840
1095
+ },
1096
+ {
1097
+ "epoch": 36.0,
1098
+ "grad_norm": 0.011365901678800583,
1099
+ "learning_rate": 0.0003,
1100
+ "loss": 0.0013,
1101
+ "step": 864
1102
+ },
1103
+ {
1104
+ "epoch": 36.0,
1105
+ "eval_acc_macro": 0.8432467559973965,
1106
+ "eval_acc_micro": 0.8506097560974961,
1107
+ "eval_auc_macro": 0.9889068205774477,
1108
+ "eval_auc_micro": 0.9905521999573019,
1109
+ "eval_f1_at_5": 0.42684788616938474,
1110
+ "eval_f1_at_8": 0.2922793659426448,
1111
+ "eval_f1_macro": 0.9125863753805709,
1112
+ "eval_f1_micro": 0.9192751235584086,
1113
+ "eval_loss": 0.1388498842716217,
1114
+ "eval_prec_at_5": 0.2732876712328767,
1115
+ "eval_prec_at_8": 0.17180365296803654,
1116
+ "eval_prec_macro": 0.9106178664851478,
1117
+ "eval_prec_micro": 0.9132569558100725,
1118
+ "eval_rec_at_5": 0.9743150684931506,
1119
+ "eval_rec_at_8": 0.978310502283105,
1120
+ "eval_rec_macro": 0.9154554431692706,
1121
+ "eval_rec_micro": 0.9253731343282815,
1122
+ "eval_runtime": 6.1794,
1123
+ "eval_samples_per_second": 141.761,
1124
+ "eval_steps_per_second": 17.801,
1125
+ "step": 864
1126
+ },
1127
+ {
1128
+ "epoch": 37.0,
1129
+ "grad_norm": 0.00758476834744215,
1130
+ "learning_rate": 0.0003,
1131
+ "loss": 0.0009,
1132
+ "step": 888
1133
+ },
1134
+ {
1135
+ "epoch": 37.0,
1136
+ "eval_acc_macro": 0.8416703156465917,
1137
+ "eval_acc_micro": 0.848985725018719,
1138
+ "eval_auc_macro": 0.9891403369644753,
1139
+ "eval_auc_micro": 0.9905162916700971,
1140
+ "eval_f1_at_5": 0.4261995886925304,
1141
+ "eval_f1_at_8": 0.2915919235644548,
1142
+ "eval_f1_macro": 0.9111295247504767,
1143
+ "eval_f1_micro": 0.9183258837870039,
1144
+ "eval_loss": 0.13994979858398438,
1145
+ "eval_prec_at_5": 0.27283105022831056,
1146
+ "eval_prec_at_8": 0.1713755707762557,
1147
+ "eval_prec_macro": 0.8945120800094545,
1148
+ "eval_prec_micro": 0.9003984063744302,
1149
+ "eval_rec_at_5": 0.9733637747336377,
1150
+ "eval_rec_at_8": 0.9767884322678843,
1151
+ "eval_rec_macro": 0.9295761415839934,
1152
+ "eval_rec_micro": 0.9369817578772025,
1153
+ "eval_runtime": 6.299,
1154
+ "eval_samples_per_second": 139.071,
1155
+ "eval_steps_per_second": 17.463,
1156
+ "step": 888
1157
+ },
1158
+ {
1159
+ "epoch": 38.0,
1160
+ "grad_norm": 0.005984355695545673,
1161
+ "learning_rate": 0.0003,
1162
+ "loss": 0.0009,
1163
+ "step": 912
1164
+ },
1165
+ {
1166
+ "epoch": 38.0,
1167
+ "eval_acc_macro": 0.8453744008028368,
1168
+ "eval_acc_micro": 0.8551617873651113,
1169
+ "eval_auc_macro": 0.9890338148789555,
1170
+ "eval_auc_micro": 0.9911242376839919,
1171
+ "eval_f1_at_5": 0.4271994046239887,
1172
+ "eval_f1_at_8": 0.29182390543479947,
1173
+ "eval_f1_macro": 0.9134487053983307,
1174
+ "eval_f1_micro": 0.9219269102989267,
1175
+ "eval_loss": 0.13640232384204865,
1176
+ "eval_prec_at_5": 0.27351598173515984,
1177
+ "eval_prec_at_8": 0.17151826484018265,
1178
+ "eval_prec_macro": 0.9185694511313961,
1179
+ "eval_prec_micro": 0.923460898502419,
1180
+ "eval_rec_at_5": 0.975076103500761,
1181
+ "eval_rec_at_8": 0.977359208523592,
1182
+ "eval_rec_macro": 0.9098787791583011,
1183
+ "eval_rec_micro": 0.9203980099501724,
1184
+ "eval_runtime": 6.2788,
1185
+ "eval_samples_per_second": 139.517,
1186
+ "eval_steps_per_second": 17.519,
1187
+ "step": 912
1188
+ }
1189
+ ],
1190
+ "logging_steps": 500,
1191
+ "max_steps": 960,
1192
+ "num_input_tokens_seen": 0,
1193
+ "num_train_epochs": 40,
1194
+ "save_steps": 500,
1195
+ "total_flos": 0.0,
1196
+ "train_batch_size": 32,
1197
+ "trial_name": null,
1198
+ "trial_params": null
1199
+ }