maximuspowers commited on
Commit
6d4f9a8
·
verified ·
1 Parent(s): 5e73f53

End of training

Browse files
README.md CHANGED
@@ -16,15 +16,15 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [maximuspowers/bert-philosophy-adapted](https://huggingface.co/maximuspowers/bert-philosophy-adapted) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.7948
20
- - Exact Match Accuracy: 0.225
21
- - Macro Precision: 0.2908
22
- - Macro Recall: 0.1502
23
- - Macro F1: 0.1930
24
- - Micro Precision: 0.7083
25
- - Micro Recall: 0.2982
26
- - Micro F1: 0.4198
27
- - Hamming Loss: 0.0691
28
 
29
  ## Model description
30
 
 
16
 
17
  This model is a fine-tuned version of [maximuspowers/bert-philosophy-adapted](https://huggingface.co/maximuspowers/bert-philosophy-adapted) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.4468
20
+ - Exact Match Accuracy: 0.425
21
+ - Macro Precision: 0.3078
22
+ - Macro Recall: 0.2221
23
+ - Macro F1: 0.2248
24
+ - Micro Precision: 0.8966
25
+ - Micro Recall: 0.4561
26
+ - Micro F1: 0.6047
27
+ - Hamming Loss: 0.05
28
 
29
  ## Model description
30
 
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "epoch": 50.0,
3
- "eval_exact_match_accuracy": 0.4,
4
- "eval_hamming_loss": 0.052941176470588235,
5
- "eval_loss": 0.5290737152099609,
6
- "eval_macro_f1": 0.14097904608067482,
7
- "eval_macro_precision": 0.1657754010695187,
8
- "eval_macro_recall": 0.1264705882352941,
9
- "eval_micro_f1": 0.5609756097560976,
10
- "eval_micro_precision": 0.92,
11
- "eval_micro_recall": 0.40350877192982454,
12
- "eval_runtime": 0.2121,
13
- "eval_samples_per_second": 188.615,
14
- "eval_steps_per_second": 23.577,
15
  "total_flos": 0.0,
16
  "train_loss": 0.8574352493286133,
17
  "train_runtime": 257.7927,
 
1
  {
2
  "epoch": 50.0,
3
+ "eval_exact_match_accuracy": 0.425,
4
+ "eval_hamming_loss": 0.05,
5
+ "eval_loss": 0.44675666093826294,
6
+ "eval_macro_f1": 0.22477092910529442,
7
+ "eval_macro_precision": 0.307843137254902,
8
+ "eval_macro_recall": 0.22205882352941175,
9
+ "eval_micro_f1": 0.6046511627906976,
10
+ "eval_micro_precision": 0.896551724137931,
11
+ "eval_micro_recall": 0.45614035087719296,
12
+ "eval_runtime": 0.2085,
13
+ "eval_samples_per_second": 191.847,
14
+ "eval_steps_per_second": 23.981,
15
  "total_flos": 0.0,
16
  "train_loss": 0.8574352493286133,
17
  "train_runtime": 257.7927,
runs/Jun15_00-58-48_92b2e0e6fb20/events.out.tfevents.1749949423.92b2e0e6fb20.2194.14 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5681d8937701cc06c9ca22fe68ebe13b29d8ca175164cd965fa953de1ae2047
3
+ size 5853
test_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 45.0,
3
- "eval_exact_match_accuracy": 0.4,
4
- "eval_hamming_loss": 0.052941176470588235,
5
- "eval_loss": 0.5290737152099609,
6
- "eval_macro_f1": 0.14097904608067482,
7
- "eval_macro_precision": 0.1657754010695187,
8
- "eval_macro_recall": 0.1264705882352941,
9
- "eval_micro_f1": 0.5609756097560976,
10
- "eval_micro_precision": 0.92,
11
- "eval_micro_recall": 0.40350877192982454,
12
- "eval_runtime": 0.2121,
13
- "eval_samples_per_second": 188.615,
14
- "eval_steps_per_second": 23.577
15
  }
 
1
  {
2
+ "epoch": 50.0,
3
+ "eval_exact_match_accuracy": 0.425,
4
+ "eval_hamming_loss": 0.05,
5
+ "eval_loss": 0.44675666093826294,
6
+ "eval_macro_f1": 0.22477092910529442,
7
+ "eval_macro_precision": 0.307843137254902,
8
+ "eval_macro_recall": 0.22205882352941175,
9
+ "eval_micro_f1": 0.6046511627906976,
10
+ "eval_micro_precision": 0.896551724137931,
11
+ "eval_micro_recall": 0.45614035087719296,
12
+ "eval_runtime": 0.2085,
13
+ "eval_samples_per_second": 191.847,
14
+ "eval_steps_per_second": 23.981
15
  }
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 600,
3
  "best_metric": 0.42105263157894735,
4
  "best_model_checkpoint": null,
5
- "epoch": 45.0,
6
  "eval_steps": 100,
7
- "global_step": 900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -12,1179 +12,1308 @@
12
  {
13
  "epoch": 0,
14
  "step": 0,
15
- "train/classification_loss": 0.7007833123207092,
16
- "train/contrastive_loss": 9.572936058044434,
17
- "train/negative_loss": 9.572856903076172,
18
  "train/num_negatives": 46,
19
  "train/num_positives": 10,
20
- "train/positive_loss": 7.908708357717842e-05,
21
- "train/total_loss": 2.615370512008667
22
  },
23
  {
24
  "epoch": 0,
25
  "step": 0,
26
- "train/classification_loss": 0.7040252089500427,
27
- "train/contrastive_loss": 9.725648880004883,
28
- "train/negative_loss": 9.725597381591797,
29
  "train/num_negatives": 46,
30
  "train/num_positives": 10,
31
- "train/positive_loss": 5.152364246896468e-05,
32
- "train/total_loss": 2.6491549015045166
33
  },
34
  {
35
  "epoch": 2.5,
36
- "grad_norm": 18.165754318237305,
37
- "learning_rate": 9.600000000000001e-06,
38
- "loss": 3.7884,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 2.5,
43
  "step": 50,
44
- "train/classification_loss": 0.6516271233558655,
45
- "train/contrastive_loss": 1.8623473644256592,
46
- "train/negative_loss": 1.4998806715011597,
47
  "train/num_negatives": 38,
48
  "train/num_positives": 18,
49
- "train/positive_loss": 0.36246663331985474,
50
- "train/total_loss": 1.0240966081619263
51
  },
52
  {
53
  "epoch": 2.5,
54
  "step": 50,
55
- "train/classification_loss": 0.6451266407966614,
56
- "train/contrastive_loss": 1.7446768283843994,
57
- "train/negative_loss": 1.668500304222107,
58
  "train/num_negatives": 50,
59
  "train/num_positives": 6,
60
- "train/positive_loss": 0.07617650926113129,
61
- "train/total_loss": 0.9940620064735413
62
  },
63
  {
64
  "epoch": 5.0,
65
- "grad_norm": 16.30499839782715,
66
- "learning_rate": 1.9600000000000002e-05,
67
- "loss": 1.7889,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 5.0,
72
  "step": 100,
73
- "train/classification_loss": 0.4927652180194855,
74
- "train/contrastive_loss": 2.06559419631958,
75
- "train/negative_loss": 1.8223981857299805,
76
  "train/num_negatives": 44,
77
  "train/num_positives": 12,
78
- "train/positive_loss": 0.24319612979888916,
79
- "train/total_loss": 0.9058840274810791
80
  },
81
  {
82
  "epoch": 5.0,
83
  "step": 100,
84
- "train/classification_loss": 0.5166366100311279,
85
- "train/contrastive_loss": 3.1131491661071777,
86
- "train/negative_loss": 3.112612724304199,
87
  "train/num_negatives": 44,
88
  "train/num_positives": 12,
89
- "train/positive_loss": 0.0005365243996493518,
90
- "train/total_loss": 1.1392664909362793
91
  },
92
  {
93
  "epoch": 5.0,
94
  "step": 100,
95
- "train/classification_loss": 0.4922243058681488,
96
- "train/contrastive_loss": 1.7773994207382202,
97
- "train/negative_loss": 1.7757622003555298,
98
  "train/num_negatives": 52,
99
  "train/num_positives": 4,
100
- "train/positive_loss": 0.0016372093232348561,
101
- "train/total_loss": 0.8477041721343994
102
  },
103
  {
104
  "epoch": 5.0,
105
  "step": 100,
106
- "train/classification_loss": 0.5021852850914001,
107
- "train/contrastive_loss": 3.2608611583709717,
108
- "train/negative_loss": 2.6507816314697266,
109
  "train/num_negatives": 48,
110
  "train/num_positives": 8,
111
- "train/positive_loss": 0.6100795269012451,
112
- "train/total_loss": 1.1543575525283813
113
  },
114
  {
115
  "epoch": 5.0,
116
  "step": 100,
117
- "train/classification_loss": 0.4944652020931244,
118
- "train/contrastive_loss": 2.343458652496338,
119
- "train/negative_loss": 1.1261494159698486,
120
  "train/num_negatives": 46,
121
  "train/num_positives": 8,
122
- "train/positive_loss": 1.2173092365264893,
123
- "train/total_loss": 0.9631569385528564
124
  },
125
  {
126
  "epoch": 5.0,
127
  "eval_exact_match_accuracy": 0.0,
128
- "eval_hamming_loss": 0.08529411764705883,
129
- "eval_loss": 1.002073884010315,
130
- "eval_macro_f1": 0.0,
131
- "eval_macro_precision": 0.0,
132
- "eval_macro_recall": 0.0,
133
- "eval_micro_f1": 0.0,
134
- "eval_micro_precision": 0.0,
135
- "eval_micro_recall": 0.0,
136
- "eval_runtime": 0.2122,
137
- "eval_samples_per_second": 188.524,
138
- "eval_steps_per_second": 23.566,
139
  "step": 100
140
  },
141
  {
142
  "epoch": 5.0,
143
  "step": 100,
144
- "train/classification_loss": 0.5136489272117615,
145
- "train/contrastive_loss": 1.3931580781936646,
146
- "train/negative_loss": 1.0857487916946411,
147
  "train/num_negatives": 42,
148
  "train/num_positives": 12,
149
- "train/positive_loss": 0.30740925669670105,
150
- "train/total_loss": 0.7922805547714233
151
  },
152
  {
153
  "epoch": 5.0,
154
  "step": 100,
155
- "train/classification_loss": 0.5019432902336121,
156
- "train/contrastive_loss": 0.7777740955352783,
157
- "train/negative_loss": 0.5738040804862976,
158
  "train/num_negatives": 40,
159
  "train/num_positives": 16,
160
- "train/positive_loss": 0.20396998524665833,
161
- "train/total_loss": 0.6574981212615967
162
  },
163
  {
164
  "epoch": 7.5,
165
- "grad_norm": 12.862069129943848,
166
- "learning_rate": 1.8933333333333334e-05,
167
- "loss": 1.396,
168
  "step": 150
169
  },
170
  {
171
  "epoch": 7.5,
172
  "step": 150,
173
- "train/classification_loss": 0.3993939161300659,
174
- "train/contrastive_loss": 1.1298128366470337,
175
- "train/negative_loss": 0.8979541659355164,
176
  "train/num_negatives": 44,
177
  "train/num_positives": 8,
178
- "train/positive_loss": 0.23185870051383972,
179
- "train/total_loss": 0.6253564953804016
180
  },
181
  {
182
  "epoch": 7.5,
183
  "step": 150,
184
- "train/classification_loss": 0.393746018409729,
185
- "train/contrastive_loss": 1.2817302942276,
186
- "train/negative_loss": 1.1390491724014282,
187
  "train/num_negatives": 44,
188
  "train/num_positives": 10,
189
- "train/positive_loss": 0.14268112182617188,
190
- "train/total_loss": 0.6500921249389648
191
  },
192
  {
193
  "epoch": 10.0,
194
- "grad_norm": 23.76511573791504,
195
- "learning_rate": 1.782222222222222e-05,
196
- "loss": 1.156,
197
  "step": 200
198
  },
199
  {
200
  "epoch": 10.0,
201
  "step": 200,
202
- "train/classification_loss": 0.30127307772636414,
203
- "train/contrastive_loss": 1.8966163396835327,
204
- "train/negative_loss": 1.7765212059020996,
205
  "train/num_negatives": 44,
206
  "train/num_positives": 12,
207
- "train/positive_loss": 0.12009509652853012,
208
- "train/total_loss": 0.6805963516235352
209
  },
210
  {
211
  "epoch": 10.0,
212
  "step": 200,
213
- "train/classification_loss": 0.3545684218406677,
214
- "train/contrastive_loss": 3.0673155784606934,
215
- "train/negative_loss": 3.067002058029175,
216
  "train/num_negatives": 44,
217
  "train/num_positives": 12,
218
- "train/positive_loss": 0.0003136020968668163,
219
- "train/total_loss": 0.9680315256118774
220
  },
221
  {
222
  "epoch": 10.0,
223
  "step": 200,
224
- "train/classification_loss": 0.32993215322494507,
225
- "train/contrastive_loss": 2.723947048187256,
226
- "train/negative_loss": 2.7232000827789307,
227
  "train/num_negatives": 52,
228
  "train/num_positives": 4,
229
- "train/positive_loss": 0.0007470683194696903,
230
- "train/total_loss": 0.8747215867042542
231
  },
232
  {
233
  "epoch": 10.0,
234
  "step": 200,
235
- "train/classification_loss": 0.32662659883499146,
236
- "train/contrastive_loss": 3.6181204319000244,
237
- "train/negative_loss": 3.4868171215057373,
238
  "train/num_negatives": 48,
239
  "train/num_positives": 8,
240
- "train/positive_loss": 0.1313033103942871,
241
- "train/total_loss": 1.050250768661499
242
  },
243
  {
244
  "epoch": 10.0,
245
  "step": 200,
246
- "train/classification_loss": 0.33877384662628174,
247
- "train/contrastive_loss": 2.0167577266693115,
248
- "train/negative_loss": 1.4009835720062256,
249
  "train/num_negatives": 46,
250
  "train/num_positives": 8,
251
- "train/positive_loss": 0.6157740950584412,
252
- "train/total_loss": 0.742125391960144
253
  },
254
  {
255
  "epoch": 10.0,
256
  "eval_exact_match_accuracy": 0.0,
257
- "eval_hamming_loss": 0.0838235294117647,
258
- "eval_loss": 0.8631451725959778,
259
- "eval_macro_f1": 0.0,
260
- "eval_macro_precision": 0.0,
261
- "eval_macro_recall": 0.0,
262
- "eval_micro_f1": 0.0,
263
- "eval_micro_precision": 0.0,
264
- "eval_micro_recall": 0.0,
265
- "eval_runtime": 0.202,
266
- "eval_samples_per_second": 198.018,
267
- "eval_steps_per_second": 24.752,
268
  "step": 200
269
  },
270
  {
271
  "epoch": 10.0,
272
  "step": 200,
273
- "train/classification_loss": 0.34425634145736694,
274
- "train/contrastive_loss": 1.217971682548523,
275
- "train/negative_loss": 0.8478565216064453,
276
  "train/num_negatives": 42,
277
  "train/num_positives": 14,
278
- "train/positive_loss": 0.37011516094207764,
279
- "train/total_loss": 0.5878506898880005
280
  },
281
  {
282
  "epoch": 10.0,
283
  "step": 200,
284
- "train/classification_loss": 0.3122542202472687,
285
- "train/contrastive_loss": 0.6875693798065186,
286
- "train/negative_loss": 0.686568558216095,
287
  "train/num_negatives": 42,
288
  "train/num_positives": 14,
289
- "train/positive_loss": 0.0010008324170485139,
290
- "train/total_loss": 0.4497680962085724
291
  },
292
  {
293
  "epoch": 12.5,
294
- "grad_norm": 7.754025459289551,
295
- "learning_rate": 1.6711111111111112e-05,
296
- "loss": 1.0042,
297
  "step": 250
298
  },
299
  {
300
  "epoch": 12.5,
301
  "step": 250,
302
- "train/classification_loss": 0.24464763700962067,
303
- "train/contrastive_loss": 0.6364108920097351,
304
- "train/negative_loss": 0.4732590615749359,
305
  "train/num_negatives": 44,
306
  "train/num_positives": 12,
307
- "train/positive_loss": 0.163151815533638,
308
- "train/total_loss": 0.3719298243522644
309
  },
310
  {
311
  "epoch": 12.5,
312
  "step": 250,
313
- "train/classification_loss": 0.3077278137207031,
314
- "train/contrastive_loss": 0.4854884147644043,
315
- "train/negative_loss": 0.33287519216537476,
316
  "train/num_negatives": 42,
317
  "train/num_positives": 8,
318
- "train/positive_loss": 0.15261322259902954,
319
- "train/total_loss": 0.40482550859451294
320
  },
321
  {
322
  "epoch": 15.0,
323
- "grad_norm": 16.385419845581055,
324
- "learning_rate": 1.5600000000000003e-05,
325
- "loss": 0.8775,
326
  "step": 300
327
  },
328
  {
329
  "epoch": 15.0,
330
  "step": 300,
331
- "train/classification_loss": 0.23176752030849457,
332
- "train/contrastive_loss": 3.0300073623657227,
333
- "train/negative_loss": 2.5657095909118652,
334
  "train/num_negatives": 44,
335
  "train/num_positives": 12,
336
- "train/positive_loss": 0.4642978310585022,
337
- "train/total_loss": 0.8377690315246582
338
  },
339
  {
340
  "epoch": 15.0,
341
  "step": 300,
342
- "train/classification_loss": 0.3002067804336548,
343
- "train/contrastive_loss": 3.911193609237671,
344
- "train/negative_loss": 3.911159038543701,
345
  "train/num_negatives": 44,
346
  "train/num_positives": 12,
347
- "train/positive_loss": 3.4572090953588486e-05,
348
- "train/total_loss": 1.082445502281189
349
  },
350
  {
351
  "epoch": 15.0,
352
  "step": 300,
353
- "train/classification_loss": 0.2759508788585663,
354
- "train/contrastive_loss": 2.758004665374756,
355
- "train/negative_loss": 2.7472095489501953,
356
  "train/num_negatives": 52,
357
  "train/num_positives": 4,
358
- "train/positive_loss": 0.010795066133141518,
359
- "train/total_loss": 0.8275518417358398
360
  },
361
  {
362
  "epoch": 15.0,
363
  "step": 300,
364
- "train/classification_loss": 0.27436333894729614,
365
- "train/contrastive_loss": 4.6799540519714355,
366
- "train/negative_loss": 4.251977920532227,
367
  "train/num_negatives": 48,
368
  "train/num_positives": 8,
369
- "train/positive_loss": 0.4279760420322418,
370
- "train/total_loss": 1.2103540897369385
371
  },
372
  {
373
  "epoch": 15.0,
374
  "step": 300,
375
- "train/classification_loss": 0.2907729744911194,
376
- "train/contrastive_loss": 2.066659927368164,
377
- "train/negative_loss": 1.4751646518707275,
378
  "train/num_negatives": 46,
379
  "train/num_positives": 8,
380
- "train/positive_loss": 0.5914952754974365,
381
- "train/total_loss": 0.7041049599647522
382
  },
383
  {
384
  "epoch": 15.0,
385
  "eval_exact_match_accuracy": 0.05,
386
- "eval_hamming_loss": 0.07647058823529412,
387
- "eval_loss": 0.9324451684951782,
388
- "eval_macro_f1": 0.03676470588235294,
389
- "eval_macro_precision": 0.058823529411764705,
390
  "eval_macro_recall": 0.026737967914438502,
391
- "eval_micro_f1": 0.16129032258064516,
392
- "eval_micro_precision": 1.0,
393
  "eval_micro_recall": 0.08771929824561403,
394
- "eval_runtime": 0.207,
395
- "eval_samples_per_second": 193.242,
396
- "eval_steps_per_second": 24.155,
397
  "step": 300
398
  },
399
  {
400
  "epoch": 15.0,
401
  "step": 300,
402
- "train/classification_loss": 0.26088976860046387,
403
- "train/contrastive_loss": 0.5876651406288147,
404
- "train/negative_loss": 0.5801823139190674,
405
  "train/num_negatives": 32,
406
  "train/num_positives": 20,
407
- "train/positive_loss": 0.007482839282602072,
408
- "train/total_loss": 0.3784227967262268
409
  },
410
  {
411
  "epoch": 15.0,
412
  "step": 300,
413
- "train/classification_loss": 0.2548993229866028,
414
- "train/contrastive_loss": 1.50383722782135,
415
- "train/negative_loss": 1.4029262065887451,
416
  "train/num_negatives": 46,
417
  "train/num_positives": 10,
418
- "train/positive_loss": 0.10091102123260498,
419
- "train/total_loss": 0.5556668043136597
420
  },
421
  {
422
  "epoch": 17.5,
423
- "grad_norm": 16.8145809173584,
424
- "learning_rate": 1.448888888888889e-05,
425
- "loss": 0.827,
426
  "step": 350
427
  },
428
  {
429
  "epoch": 17.5,
430
  "step": 350,
431
- "train/classification_loss": 0.24073848128318787,
432
- "train/contrastive_loss": 0.6602705717086792,
433
- "train/negative_loss": 0.4261236786842346,
434
  "train/num_negatives": 42,
435
  "train/num_positives": 14,
436
- "train/positive_loss": 0.23414692282676697,
437
- "train/total_loss": 0.3727926015853882
438
  },
439
  {
440
  "epoch": 17.5,
441
  "step": 350,
442
- "train/classification_loss": 0.2786425054073334,
443
- "train/contrastive_loss": 0.29787567257881165,
444
- "train/negative_loss": 0.2970171868801117,
445
  "train/num_negatives": 44,
446
  "train/num_positives": 6,
447
- "train/positive_loss": 0.0008584868628531694,
448
- "train/total_loss": 0.3382176458835602
449
  },
450
  {
451
  "epoch": 20.0,
452
- "grad_norm": 18.7548828125,
453
- "learning_rate": 1.3377777777777778e-05,
454
- "loss": 0.7747,
455
  "step": 400
456
  },
457
  {
458
  "epoch": 20.0,
459
  "step": 400,
460
- "train/classification_loss": 0.20234902203083038,
461
- "train/contrastive_loss": 1.7551348209381104,
462
- "train/negative_loss": 1.1972665786743164,
463
  "train/num_negatives": 44,
464
  "train/num_positives": 12,
465
- "train/positive_loss": 0.557868242263794,
466
- "train/total_loss": 0.5533760190010071
467
  },
468
  {
469
  "epoch": 20.0,
470
  "step": 400,
471
- "train/classification_loss": 0.27940884232521057,
472
- "train/contrastive_loss": 3.1547234058380127,
473
- "train/negative_loss": 3.1508476734161377,
474
  "train/num_negatives": 44,
475
  "train/num_positives": 12,
476
- "train/positive_loss": 0.0038756858557462692,
477
- "train/total_loss": 0.9103535413742065
478
  },
479
  {
480
  "epoch": 20.0,
481
  "step": 400,
482
- "train/classification_loss": 0.2563062608242035,
483
- "train/contrastive_loss": 3.433103084564209,
484
- "train/negative_loss": 3.146430253982544,
485
  "train/num_negatives": 52,
486
  "train/num_positives": 4,
487
- "train/positive_loss": 0.2866727411746979,
488
- "train/total_loss": 0.9429268836975098
489
  },
490
  {
491
  "epoch": 20.0,
492
  "step": 400,
493
- "train/classification_loss": 0.24611052870750427,
494
- "train/contrastive_loss": 3.2940289974212646,
495
- "train/negative_loss": 2.822859525680542,
496
  "train/num_negatives": 48,
497
  "train/num_positives": 8,
498
- "train/positive_loss": 0.47116944193840027,
499
- "train/total_loss": 0.9049162864685059
500
  },
501
  {
502
  "epoch": 20.0,
503
  "step": 400,
504
- "train/classification_loss": 0.26658472418785095,
505
- "train/contrastive_loss": 0.9518164992332458,
506
- "train/negative_loss": 0.90028977394104,
507
  "train/num_negatives": 46,
508
  "train/num_positives": 8,
509
- "train/positive_loss": 0.051526736468076706,
510
- "train/total_loss": 0.45694804191589355
511
  },
512
  {
513
  "epoch": 20.0,
514
  "eval_exact_match_accuracy": 0.1,
515
  "eval_hamming_loss": 0.075,
516
- "eval_loss": 0.7537041902542114,
517
- "eval_macro_f1": 0.0784313725490196,
518
- "eval_macro_precision": 0.1092436974789916,
519
- "eval_macro_recall": 0.06149732620320855,
520
- "eval_micro_f1": 0.2153846153846154,
521
- "eval_micro_precision": 0.875,
522
- "eval_micro_recall": 0.12280701754385964,
523
- "eval_runtime": 0.2075,
524
- "eval_samples_per_second": 192.749,
525
- "eval_steps_per_second": 24.094,
526
  "step": 400
527
  },
528
  {
529
  "epoch": 20.0,
530
  "step": 400,
531
- "train/classification_loss": 0.24341967701911926,
532
- "train/contrastive_loss": 0.7751690149307251,
533
- "train/negative_loss": 0.517412543296814,
534
  "train/num_negatives": 42,
535
  "train/num_positives": 10,
536
- "train/positive_loss": 0.25775647163391113,
537
- "train/total_loss": 0.3984534740447998
538
  },
539
  {
540
  "epoch": 20.0,
541
  "step": 400,
542
- "train/classification_loss": 0.2490834891796112,
543
- "train/contrastive_loss": 0.44699349999427795,
544
- "train/negative_loss": 0.4466739594936371,
545
  "train/num_negatives": 52,
546
  "train/num_positives": 4,
547
- "train/positive_loss": 0.0003195433528162539,
548
- "train/total_loss": 0.33848220109939575
549
  },
550
  {
551
  "epoch": 22.5,
552
- "grad_norm": 3.832901954650879,
553
- "learning_rate": 1.2266666666666667e-05,
554
- "loss": 0.6929,
555
  "step": 450
556
  },
557
  {
558
  "epoch": 22.5,
559
  "step": 450,
560
- "train/classification_loss": 0.23125219345092773,
561
- "train/contrastive_loss": 0.7171761393547058,
562
- "train/negative_loss": 0.7147800922393799,
563
  "train/num_negatives": 50,
564
  "train/num_positives": 4,
565
- "train/positive_loss": 0.0023960734251886606,
566
- "train/total_loss": 0.37468743324279785
567
  },
568
  {
569
  "epoch": 22.5,
570
  "step": 450,
571
- "train/classification_loss": 0.20611771941184998,
572
- "train/contrastive_loss": 0.7421404123306274,
573
- "train/negative_loss": 0.4822021722793579,
574
  "train/num_negatives": 50,
575
  "train/num_positives": 6,
576
- "train/positive_loss": 0.2599382698535919,
577
- "train/total_loss": 0.35454580187797546
578
  },
579
  {
580
  "epoch": 25.0,
581
- "grad_norm": 11.741353988647461,
582
- "learning_rate": 1.1155555555555556e-05,
583
- "loss": 0.7074,
584
  "step": 500
585
  },
586
  {
587
  "epoch": 25.0,
588
  "step": 500,
589
- "train/classification_loss": 0.1859707236289978,
590
- "train/contrastive_loss": 2.448401689529419,
591
- "train/negative_loss": 1.6338316202163696,
592
  "train/num_negatives": 44,
593
  "train/num_positives": 12,
594
- "train/positive_loss": 0.8145700693130493,
595
- "train/total_loss": 0.6756510734558105
596
  },
597
  {
598
  "epoch": 25.0,
599
  "step": 500,
600
- "train/classification_loss": 0.2645534574985504,
601
- "train/contrastive_loss": 2.718876838684082,
602
- "train/negative_loss": 2.718696355819702,
603
  "train/num_negatives": 44,
604
  "train/num_positives": 12,
605
- "train/positive_loss": 0.00018060117145068944,
606
- "train/total_loss": 0.8083288669586182
607
  },
608
  {
609
  "epoch": 25.0,
610
  "step": 500,
611
- "train/classification_loss": 0.244839608669281,
612
- "train/contrastive_loss": 4.527173042297363,
613
- "train/negative_loss": 2.9302120208740234,
614
  "train/num_negatives": 52,
615
  "train/num_positives": 4,
616
- "train/positive_loss": 1.5969611406326294,
617
- "train/total_loss": 1.1502742767333984
618
  },
619
  {
620
  "epoch": 25.0,
621
  "step": 500,
622
- "train/classification_loss": 0.22313973307609558,
623
- "train/contrastive_loss": 3.153029203414917,
624
- "train/negative_loss": 2.193068504333496,
625
  "train/num_negatives": 48,
626
  "train/num_positives": 8,
627
- "train/positive_loss": 0.9599607586860657,
628
- "train/total_loss": 0.8537455797195435
629
  },
630
  {
631
  "epoch": 25.0,
632
  "step": 500,
633
- "train/classification_loss": 0.25405386090278625,
634
- "train/contrastive_loss": 1.7682042121887207,
635
- "train/negative_loss": 0.7665292620658875,
636
  "train/num_negatives": 46,
637
  "train/num_positives": 8,
638
- "train/positive_loss": 1.001675009727478,
639
- "train/total_loss": 0.6076947450637817
640
  },
641
  {
642
  "epoch": 25.0,
643
  "eval_exact_match_accuracy": 0.175,
644
- "eval_hamming_loss": 0.07205882352941176,
645
- "eval_loss": 0.8191388845443726,
646
- "eval_macro_f1": 0.10560224089635854,
647
- "eval_macro_precision": 0.14869281045751634,
648
- "eval_macro_recall": 0.08449197860962566,
649
- "eval_micro_f1": 0.30985915492957744,
650
- "eval_micro_precision": 0.7857142857142857,
651
- "eval_micro_recall": 0.19298245614035087,
652
- "eval_runtime": 0.2062,
653
- "eval_samples_per_second": 193.983,
654
- "eval_steps_per_second": 24.248,
655
  "step": 500
656
  },
657
  {
658
  "epoch": 25.0,
659
  "step": 500,
660
- "train/classification_loss": 0.16842614114284515,
661
- "train/contrastive_loss": 0.2924913763999939,
662
- "train/negative_loss": 0.28081196546554565,
663
  "train/num_negatives": 42,
664
  "train/num_positives": 14,
665
- "train/positive_loss": 0.011679417453706264,
666
- "train/total_loss": 0.22692441940307617
667
  },
668
  {
669
  "epoch": 25.0,
670
  "step": 500,
671
- "train/classification_loss": 0.24865391850471497,
672
- "train/contrastive_loss": 1.3489311933517456,
673
- "train/negative_loss": 0.7319411635398865,
674
  "train/num_negatives": 44,
675
  "train/num_positives": 12,
676
- "train/positive_loss": 0.6169900298118591,
677
- "train/total_loss": 0.5184401273727417
678
  },
679
  {
680
  "epoch": 27.5,
681
- "grad_norm": 6.354713439941406,
682
- "learning_rate": 1.0044444444444446e-05,
683
- "loss": 0.6366,
684
  "step": 550
685
  },
686
  {
687
  "epoch": 27.5,
688
  "step": 550,
689
- "train/classification_loss": 0.23189660906791687,
690
- "train/contrastive_loss": 0.8256056308746338,
691
- "train/negative_loss": 0.8255752325057983,
692
  "train/num_negatives": 54,
693
  "train/num_positives": 2,
694
- "train/positive_loss": 3.039883085875772e-05,
695
- "train/total_loss": 0.3970177173614502
696
  },
697
  {
698
  "epoch": 27.5,
699
  "step": 550,
700
- "train/classification_loss": 0.2541985809803009,
701
- "train/contrastive_loss": 1.3727329969406128,
702
- "train/negative_loss": 1.0316259860992432,
703
  "train/num_negatives": 40,
704
  "train/num_positives": 12,
705
- "train/positive_loss": 0.34110698103904724,
706
- "train/total_loss": 0.528745174407959
707
  },
708
  {
709
  "epoch": 30.0,
710
- "grad_norm": 12.662666320800781,
711
- "learning_rate": 8.933333333333333e-06,
712
- "loss": 0.6281,
713
  "step": 600
714
  },
715
  {
716
  "epoch": 30.0,
717
  "step": 600,
718
- "train/classification_loss": 0.17969225347042084,
719
- "train/contrastive_loss": 2.1998844146728516,
720
- "train/negative_loss": 1.262956976890564,
721
  "train/num_negatives": 44,
722
  "train/num_positives": 12,
723
- "train/positive_loss": 0.9369274973869324,
724
- "train/total_loss": 0.6196691393852234
725
  },
726
  {
727
  "epoch": 30.0,
728
  "step": 600,
729
- "train/classification_loss": 0.2561105787754059,
730
- "train/contrastive_loss": 3.2061843872070312,
731
- "train/negative_loss": 3.2013988494873047,
732
  "train/num_negatives": 44,
733
  "train/num_positives": 12,
734
- "train/positive_loss": 0.00478551909327507,
735
- "train/total_loss": 0.8973474502563477
736
  },
737
  {
738
  "epoch": 30.0,
739
  "step": 600,
740
- "train/classification_loss": 0.24141749739646912,
741
- "train/contrastive_loss": 4.734986305236816,
742
- "train/negative_loss": 3.0736501216888428,
743
  "train/num_negatives": 52,
744
  "train/num_positives": 4,
745
- "train/positive_loss": 1.6613364219665527,
746
- "train/total_loss": 1.1884148120880127
747
  },
748
  {
749
  "epoch": 30.0,
750
  "step": 600,
751
- "train/classification_loss": 0.22462235391139984,
752
- "train/contrastive_loss": 4.2080254554748535,
753
- "train/negative_loss": 3.6183528900146484,
754
  "train/num_negatives": 48,
755
  "train/num_positives": 8,
756
- "train/positive_loss": 0.5896727442741394,
757
- "train/total_loss": 1.0662274360656738
758
  },
759
  {
760
  "epoch": 30.0,
761
  "step": 600,
762
- "train/classification_loss": 0.24234618246555328,
763
- "train/contrastive_loss": 1.1968506574630737,
764
- "train/negative_loss": 0.8532204031944275,
765
  "train/num_negatives": 46,
766
  "train/num_positives": 8,
767
- "train/positive_loss": 0.34363028407096863,
768
- "train/total_loss": 0.4817163348197937
769
  },
770
  {
771
  "epoch": 30.0,
772
- "eval_exact_match_accuracy": 0.275,
773
- "eval_hamming_loss": 0.06470588235294118,
774
- "eval_loss": 0.8506749868392944,
775
- "eval_macro_f1": 0.12978524743230624,
776
- "eval_macro_precision": 0.15735294117647058,
777
- "eval_macro_recall": 0.11336898395721925,
778
- "eval_micro_f1": 0.42105263157894735,
779
- "eval_micro_precision": 0.8421052631578947,
780
- "eval_micro_recall": 0.2807017543859649,
781
- "eval_runtime": 0.2062,
782
- "eval_samples_per_second": 193.977,
783
- "eval_steps_per_second": 24.247,
784
  "step": 600
785
  },
786
  {
787
  "epoch": 30.0,
788
  "step": 600,
789
- "train/classification_loss": 0.16461151838302612,
790
- "train/contrastive_loss": 0.5128712058067322,
791
- "train/negative_loss": 0.23724108934402466,
792
  "train/num_negatives": 38,
793
  "train/num_positives": 18,
794
- "train/positive_loss": 0.2756301164627075,
795
- "train/total_loss": 0.2671857476234436
796
  },
797
  {
798
  "epoch": 30.0,
799
  "step": 600,
800
- "train/classification_loss": 0.2038976103067398,
801
- "train/contrastive_loss": 1.0636850595474243,
802
- "train/negative_loss": 0.5897871255874634,
803
  "train/num_negatives": 48,
804
  "train/num_positives": 8,
805
- "train/positive_loss": 0.47389790415763855,
806
- "train/total_loss": 0.41663461923599243
807
  },
808
  {
809
  "epoch": 32.5,
810
- "grad_norm": 11.408817291259766,
811
- "learning_rate": 7.822222222222224e-06,
812
- "loss": 0.5854,
813
  "step": 650
814
  },
815
  {
816
  "epoch": 32.5,
817
  "step": 650,
818
- "train/classification_loss": 0.1786508709192276,
819
- "train/contrastive_loss": 0.5145746469497681,
820
- "train/negative_loss": 0.14455223083496094,
821
  "train/num_negatives": 36,
822
  "train/num_positives": 18,
823
- "train/positive_loss": 0.3700224459171295,
824
- "train/total_loss": 0.28156578540802
825
  },
826
  {
827
  "epoch": 32.5,
828
  "step": 650,
829
- "train/classification_loss": 0.2088967263698578,
830
- "train/contrastive_loss": 1.686006784439087,
831
- "train/negative_loss": 1.5359704494476318,
832
  "train/num_negatives": 50,
833
  "train/num_positives": 6,
834
- "train/positive_loss": 0.1500363051891327,
835
- "train/total_loss": 0.5460981130599976
836
  },
837
  {
838
  "epoch": 35.0,
839
- "grad_norm": 5.847558975219727,
840
- "learning_rate": 6.711111111111111e-06,
841
- "loss": 0.5506,
842
  "step": 700
843
  },
844
  {
845
  "epoch": 35.0,
846
  "step": 700,
847
- "train/classification_loss": 0.17107558250427246,
848
- "train/contrastive_loss": 2.287914276123047,
849
- "train/negative_loss": 1.0503000020980835,
850
  "train/num_negatives": 44,
851
  "train/num_positives": 12,
852
- "train/positive_loss": 1.237614393234253,
853
- "train/total_loss": 0.6286584138870239
854
  },
855
  {
856
  "epoch": 35.0,
857
  "step": 700,
858
- "train/classification_loss": 0.2500559389591217,
859
- "train/contrastive_loss": 2.9361915588378906,
860
- "train/negative_loss": 2.824162721633911,
861
  "train/num_negatives": 44,
862
  "train/num_positives": 12,
863
- "train/positive_loss": 0.1120288297533989,
864
- "train/total_loss": 0.8372942209243774
865
  },
866
  {
867
  "epoch": 35.0,
868
  "step": 700,
869
- "train/classification_loss": 0.23473431169986725,
870
- "train/contrastive_loss": 4.627296447753906,
871
- "train/negative_loss": 3.135880708694458,
872
  "train/num_negatives": 52,
873
  "train/num_positives": 4,
874
- "train/positive_loss": 1.4914155006408691,
875
- "train/total_loss": 1.160193681716919
876
  },
877
  {
878
  "epoch": 35.0,
879
  "step": 700,
880
- "train/classification_loss": 0.20871424674987793,
881
- "train/contrastive_loss": 2.308566093444824,
882
- "train/negative_loss": 1.4599714279174805,
883
  "train/num_negatives": 48,
884
  "train/num_positives": 8,
885
- "train/positive_loss": 0.8485947251319885,
886
- "train/total_loss": 0.6704274415969849
887
  },
888
  {
889
  "epoch": 35.0,
890
  "step": 700,
891
- "train/classification_loss": 0.23494853079319,
892
- "train/contrastive_loss": 0.9399895071983337,
893
- "train/negative_loss": 0.4285624623298645,
894
  "train/num_negatives": 46,
895
  "train/num_positives": 8,
896
- "train/positive_loss": 0.5114270448684692,
897
- "train/total_loss": 0.4229464530944824
898
  },
899
  {
900
  "epoch": 35.0,
901
  "eval_exact_match_accuracy": 0.25,
902
- "eval_hamming_loss": 0.0661764705882353,
903
- "eval_loss": 0.7439039945602417,
904
- "eval_macro_f1": 0.12555610479485912,
905
- "eval_macro_precision": 0.15630252100840336,
906
- "eval_macro_recall": 0.10748663101604278,
907
- "eval_micro_f1": 0.4,
908
- "eval_micro_precision": 0.8333333333333334,
909
- "eval_micro_recall": 0.2631578947368421,
910
- "eval_runtime": 0.2043,
911
- "eval_samples_per_second": 195.76,
912
- "eval_steps_per_second": 24.47,
913
  "step": 700
914
  },
915
  {
916
  "epoch": 35.0,
917
  "step": 700,
918
- "train/classification_loss": 0.18580235540866852,
919
- "train/contrastive_loss": 0.5447431802749634,
920
- "train/negative_loss": 0.26964136958122253,
921
  "train/num_negatives": 40,
922
  "train/num_positives": 16,
923
- "train/positive_loss": 0.27510178089141846,
924
- "train/total_loss": 0.29475098848342896
925
  },
926
  {
927
  "epoch": 35.0,
928
  "step": 700,
929
- "train/classification_loss": 0.19202794134616852,
930
- "train/contrastive_loss": 1.1511749029159546,
931
- "train/negative_loss": 0.8905836343765259,
932
  "train/num_negatives": 40,
933
  "train/num_positives": 16,
934
- "train/positive_loss": 0.2605912685394287,
935
- "train/total_loss": 0.42226290702819824
936
  },
937
  {
938
  "epoch": 37.5,
939
- "grad_norm": 5.041801452636719,
940
- "learning_rate": 5.600000000000001e-06,
941
- "loss": 0.5486,
942
  "step": 750
943
  },
944
  {
945
  "epoch": 37.5,
946
  "step": 750,
947
- "train/classification_loss": 0.18688128888607025,
948
- "train/contrastive_loss": 0.7704952955245972,
949
- "train/negative_loss": 0.7704557180404663,
950
  "train/num_negatives": 48,
951
  "train/num_positives": 8,
952
- "train/positive_loss": 3.9578346331836656e-05,
953
- "train/total_loss": 0.3409803509712219
954
  },
955
  {
956
  "epoch": 37.5,
957
  "step": 750,
958
- "train/classification_loss": 0.19349302351474762,
959
- "train/contrastive_loss": 0.4310402274131775,
960
- "train/negative_loss": 0.4191313683986664,
961
  "train/num_negatives": 46,
962
  "train/num_positives": 10,
963
- "train/positive_loss": 0.011908866465091705,
964
- "train/total_loss": 0.2797010540962219
965
  },
966
  {
967
  "epoch": 40.0,
968
- "grad_norm": 8.75462532043457,
969
- "learning_rate": 4.488888888888889e-06,
970
- "loss": 0.5091,
971
  "step": 800
972
  },
973
  {
974
  "epoch": 40.0,
975
  "step": 800,
976
- "train/classification_loss": 0.1726197898387909,
977
- "train/contrastive_loss": 1.8750531673431396,
978
- "train/negative_loss": 0.9292571544647217,
979
  "train/num_negatives": 44,
980
  "train/num_positives": 12,
981
- "train/positive_loss": 0.9457959532737732,
982
- "train/total_loss": 0.5476304292678833
983
  },
984
  {
985
  "epoch": 40.0,
986
  "step": 800,
987
- "train/classification_loss": 0.24574041366577148,
988
- "train/contrastive_loss": 3.074068784713745,
989
- "train/negative_loss": 3.055785894393921,
990
  "train/num_negatives": 44,
991
  "train/num_positives": 12,
992
- "train/positive_loss": 0.018282821401953697,
993
- "train/total_loss": 0.8605541586875916
994
  },
995
  {
996
  "epoch": 40.0,
997
  "step": 800,
998
- "train/classification_loss": 0.2323407232761383,
999
- "train/contrastive_loss": 4.7350592613220215,
1000
- "train/negative_loss": 2.995204210281372,
1001
  "train/num_negatives": 52,
1002
  "train/num_positives": 4,
1003
- "train/positive_loss": 1.739855170249939,
1004
- "train/total_loss": 1.1793526411056519
1005
  },
1006
  {
1007
  "epoch": 40.0,
1008
  "step": 800,
1009
- "train/classification_loss": 0.20833879709243774,
1010
- "train/contrastive_loss": 3.4109816551208496,
1011
- "train/negative_loss": 2.4856531620025635,
1012
  "train/num_negatives": 48,
1013
  "train/num_positives": 8,
1014
- "train/positive_loss": 0.9253284931182861,
1015
- "train/total_loss": 0.8905351161956787
1016
  },
1017
  {
1018
  "epoch": 40.0,
1019
  "step": 800,
1020
- "train/classification_loss": 0.23090128600597382,
1021
- "train/contrastive_loss": 1.3861477375030518,
1022
- "train/negative_loss": 0.5846720933914185,
1023
  "train/num_negatives": 46,
1024
  "train/num_positives": 8,
1025
- "train/positive_loss": 0.8014755845069885,
1026
- "train/total_loss": 0.5081308484077454
1027
  },
1028
  {
1029
  "epoch": 40.0,
1030
- "eval_exact_match_accuracy": 0.275,
1031
  "eval_hamming_loss": 0.06470588235294118,
1032
- "eval_loss": 0.7972406148910522,
1033
- "eval_macro_f1": 0.12978524743230624,
1034
- "eval_macro_precision": 0.15735294117647058,
1035
- "eval_macro_recall": 0.11336898395721925,
1036
  "eval_micro_f1": 0.42105263157894735,
1037
  "eval_micro_precision": 0.8421052631578947,
1038
  "eval_micro_recall": 0.2807017543859649,
1039
- "eval_runtime": 0.2048,
1040
- "eval_samples_per_second": 195.328,
1041
- "eval_steps_per_second": 24.416,
1042
  "step": 800
1043
  },
1044
  {
1045
  "epoch": 40.0,
1046
  "step": 800,
1047
- "train/classification_loss": 0.16540196537971497,
1048
- "train/contrastive_loss": 0.28263404965400696,
1049
- "train/negative_loss": 0.28260505199432373,
1050
  "train/num_negatives": 44,
1051
  "train/num_positives": 12,
1052
- "train/positive_loss": 2.9008200726821087e-05,
1053
- "train/total_loss": 0.22192877531051636
1054
  },
1055
  {
1056
  "epoch": 40.0,
1057
  "step": 800,
1058
- "train/classification_loss": 0.12730641663074493,
1059
- "train/contrastive_loss": 0.2957398295402527,
1060
- "train/negative_loss": 0.29565563797950745,
1061
  "train/num_negatives": 30,
1062
  "train/num_positives": 26,
1063
- "train/positive_loss": 8.41914297780022e-05,
1064
- "train/total_loss": 0.1864543855190277
1065
  },
1066
  {
1067
  "epoch": 42.5,
1068
- "grad_norm": 6.981760501861572,
1069
- "learning_rate": 3.377777777777778e-06,
1070
- "loss": 0.4948,
1071
  "step": 850
1072
  },
1073
  {
1074
  "epoch": 42.5,
1075
  "step": 850,
1076
- "train/classification_loss": 0.14926917850971222,
1077
- "train/contrastive_loss": 0.2675209641456604,
1078
- "train/negative_loss": 0.22713389992713928,
1079
  "train/num_negatives": 40,
1080
  "train/num_positives": 16,
1081
- "train/positive_loss": 0.04038705304265022,
1082
- "train/total_loss": 0.20277337729930878
1083
  },
1084
  {
1085
  "epoch": 42.5,
1086
  "step": 850,
1087
- "train/classification_loss": 0.17404112219810486,
1088
- "train/contrastive_loss": 0.08631884306669235,
1089
- "train/negative_loss": 0.08508215099573135,
1090
  "train/num_negatives": 40,
1091
  "train/num_positives": 16,
1092
- "train/positive_loss": 0.0012366925366222858,
1093
- "train/total_loss": 0.19130489230155945
1094
  },
1095
  {
1096
  "epoch": 45.0,
1097
- "grad_norm": 3.230358839035034,
1098
- "learning_rate": 2.266666666666667e-06,
1099
- "loss": 0.5038,
1100
  "step": 900
1101
  },
1102
  {
1103
  "epoch": 45.0,
1104
  "step": 900,
1105
- "train/classification_loss": 0.17026303708553314,
1106
- "train/contrastive_loss": 2.131587266921997,
1107
- "train/negative_loss": 0.9242226481437683,
1108
  "train/num_negatives": 44,
1109
  "train/num_positives": 12,
1110
- "train/positive_loss": 1.207364559173584,
1111
- "train/total_loss": 0.5965805053710938
1112
  },
1113
  {
1114
  "epoch": 45.0,
1115
  "step": 900,
1116
- "train/classification_loss": 0.24070139229297638,
1117
- "train/contrastive_loss": 3.244175910949707,
1118
- "train/negative_loss": 3.2388288974761963,
1119
  "train/num_negatives": 44,
1120
  "train/num_positives": 12,
1121
- "train/positive_loss": 0.005346930585801601,
1122
- "train/total_loss": 0.8895365595817566
1123
  },
1124
  {
1125
  "epoch": 45.0,
1126
  "step": 900,
1127
- "train/classification_loss": 0.2271755486726761,
1128
- "train/contrastive_loss": 4.544618129730225,
1129
- "train/negative_loss": 2.900235891342163,
1130
  "train/num_negatives": 52,
1131
  "train/num_positives": 4,
1132
- "train/positive_loss": 1.6443822383880615,
1133
- "train/total_loss": 1.1360992193222046
1134
  },
1135
  {
1136
  "epoch": 45.0,
1137
  "step": 900,
1138
- "train/classification_loss": 0.20477482676506042,
1139
- "train/contrastive_loss": 3.36401104927063,
1140
- "train/negative_loss": 2.652341365814209,
1141
  "train/num_negatives": 48,
1142
  "train/num_positives": 8,
1143
- "train/positive_loss": 0.7116697430610657,
1144
- "train/total_loss": 0.8775770664215088
1145
  },
1146
  {
1147
  "epoch": 45.0,
1148
  "step": 900,
1149
- "train/classification_loss": 0.22650422155857086,
1150
- "train/contrastive_loss": 1.7576167583465576,
1151
- "train/negative_loss": 0.546459436416626,
1152
  "train/num_negatives": 46,
1153
  "train/num_positives": 8,
1154
- "train/positive_loss": 1.2111573219299316,
1155
- "train/total_loss": 0.578027606010437
1156
  },
1157
  {
1158
  "epoch": 45.0,
1159
- "eval_exact_match_accuracy": 0.275,
1160
- "eval_hamming_loss": 0.06470588235294118,
1161
- "eval_loss": 0.8155641555786133,
1162
- "eval_macro_f1": 0.12978524743230624,
1163
- "eval_macro_precision": 0.15735294117647058,
1164
- "eval_macro_recall": 0.11336898395721925,
1165
- "eval_micro_f1": 0.42105263157894735,
1166
- "eval_micro_precision": 0.8421052631578947,
1167
  "eval_micro_recall": 0.2807017543859649,
1168
- "eval_runtime": 0.2057,
1169
- "eval_samples_per_second": 194.486,
1170
- "eval_steps_per_second": 24.311,
1171
  "step": 900
1172
  },
1173
  {
1174
  "epoch": 45.0,
1175
  "step": 900,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1176
  "total_flos": 0.0,
1177
- "train_loss": 0.9705644819471572,
1178
- "train_runtime": 232.6541,
1179
- "train_samples_per_second": 67.912,
1180
- "train_steps_per_second": 4.298
1181
  }
1182
  ],
1183
  "logging_steps": 50,
1184
- "max_steps": 1000,
1185
  "num_input_tokens_seen": 0,
1186
- "num_train_epochs": 50,
1187
- "save_steps": 50000,
1188
  "stateful_callbacks": {
1189
  "EarlyStoppingCallback": {
1190
  "args": {
 
1
  {
2
+ "best_global_step": 700,
3
  "best_metric": 0.42105263157894735,
4
  "best_model_checkpoint": null,
5
+ "epoch": 50.0,
6
  "eval_steps": 100,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
12
  {
13
  "epoch": 0,
14
  "step": 0,
15
+ "train/classification_loss": 0.6896063685417175,
16
+ "train/contrastive_loss": 9.115899085998535,
17
+ "train/negative_loss": 9.115803718566895,
18
  "train/num_negatives": 46,
19
  "train/num_positives": 10,
20
+ "train/positive_loss": 9.557702287565917e-05,
21
+ "train/total_loss": 2.5127861499786377
22
  },
23
  {
24
  "epoch": 0,
25
  "step": 0,
26
+ "train/classification_loss": 0.7036023736000061,
27
+ "train/contrastive_loss": 8.648970603942871,
28
+ "train/negative_loss": 8.648796081542969,
29
  "train/num_negatives": 46,
30
  "train/num_positives": 10,
31
+ "train/positive_loss": 0.0001749610819388181,
32
+ "train/total_loss": 2.433396577835083
33
  },
34
  {
35
  "epoch": 2.5,
36
+ "grad_norm": 18.205059051513672,
37
+ "learning_rate": 9.4e-06,
38
+ "loss": 3.6074,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 2.5,
43
  "step": 50,
44
+ "train/classification_loss": 0.6570730805397034,
45
+ "train/contrastive_loss": 1.7841553688049316,
46
+ "train/negative_loss": 1.5581305027008057,
47
  "train/num_negatives": 38,
48
  "train/num_positives": 18,
49
+ "train/positive_loss": 0.22602489590644836,
50
+ "train/total_loss": 1.013904094696045
51
  },
52
  {
53
  "epoch": 2.5,
54
  "step": 50,
55
+ "train/classification_loss": 0.6499872207641602,
56
+ "train/contrastive_loss": 1.9876362085342407,
57
+ "train/negative_loss": 1.9320745468139648,
58
  "train/num_negatives": 50,
59
  "train/num_positives": 6,
60
+ "train/positive_loss": 0.05556164309382439,
61
+ "train/total_loss": 1.0475144386291504
62
  },
63
  {
64
  "epoch": 5.0,
65
+ "grad_norm": 13.659893035888672,
66
+ "learning_rate": 1.94e-05,
67
+ "loss": 1.796,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 5.0,
72
  "step": 100,
73
+ "train/classification_loss": 0.49852314591407776,
74
+ "train/contrastive_loss": 2.1901540756225586,
75
+ "train/negative_loss": 1.469272494316101,
76
  "train/num_negatives": 44,
77
  "train/num_positives": 12,
78
+ "train/positive_loss": 0.7208815813064575,
79
+ "train/total_loss": 0.936553955078125
80
  },
81
  {
82
  "epoch": 5.0,
83
  "step": 100,
84
+ "train/classification_loss": 0.5119871497154236,
85
+ "train/contrastive_loss": 2.697293996810913,
86
+ "train/negative_loss": 2.695932388305664,
87
  "train/num_negatives": 44,
88
  "train/num_positives": 12,
89
+ "train/positive_loss": 0.0013614993076771498,
90
+ "train/total_loss": 1.0514459609985352
91
  },
92
  {
93
  "epoch": 5.0,
94
  "step": 100,
95
+ "train/classification_loss": 0.49341386556625366,
96
+ "train/contrastive_loss": 1.4812039136886597,
97
+ "train/negative_loss": 1.4763706922531128,
98
  "train/num_negatives": 52,
99
  "train/num_positives": 4,
100
+ "train/positive_loss": 0.004833280108869076,
101
+ "train/total_loss": 0.7896546125411987
102
  },
103
  {
104
  "epoch": 5.0,
105
  "step": 100,
106
+ "train/classification_loss": 0.49600034952163696,
107
+ "train/contrastive_loss": 3.470733165740967,
108
+ "train/negative_loss": 2.635651111602783,
109
  "train/num_negatives": 48,
110
  "train/num_positives": 8,
111
+ "train/positive_loss": 0.835081934928894,
112
+ "train/total_loss": 1.1901469230651855
113
  },
114
  {
115
  "epoch": 5.0,
116
  "step": 100,
117
+ "train/classification_loss": 0.49302685260772705,
118
+ "train/contrastive_loss": 1.5160608291625977,
119
+ "train/negative_loss": 1.3313394784927368,
120
  "train/num_negatives": 46,
121
  "train/num_positives": 8,
122
+ "train/positive_loss": 0.18472139537334442,
123
+ "train/total_loss": 0.7962390184402466
124
  },
125
  {
126
  "epoch": 5.0,
127
  "eval_exact_match_accuracy": 0.0,
128
+ "eval_hamming_loss": 0.08235294117647059,
129
+ "eval_loss": 0.9528080821037292,
130
+ "eval_macro_f1": 0.00980392156862745,
131
+ "eval_macro_precision": 0.058823529411764705,
132
+ "eval_macro_recall": 0.005347593582887701,
133
+ "eval_micro_f1": 0.034482758620689655,
134
+ "eval_micro_precision": 1.0,
135
+ "eval_micro_recall": 0.017543859649122806,
136
+ "eval_runtime": 0.2063,
137
+ "eval_samples_per_second": 193.875,
138
+ "eval_steps_per_second": 24.234,
139
  "step": 100
140
  },
141
  {
142
  "epoch": 5.0,
143
  "step": 100,
144
+ "train/classification_loss": 0.5103878378868103,
145
+ "train/contrastive_loss": 1.4061696529388428,
146
+ "train/negative_loss": 0.860652506351471,
147
  "train/num_negatives": 42,
148
  "train/num_positives": 12,
149
+ "train/positive_loss": 0.5455171465873718,
150
+ "train/total_loss": 0.7916218042373657
151
  },
152
  {
153
  "epoch": 5.0,
154
  "step": 100,
155
+ "train/classification_loss": 0.49409669637680054,
156
+ "train/contrastive_loss": 0.6720238327980042,
157
+ "train/negative_loss": 0.5605456233024597,
158
  "train/num_negatives": 40,
159
  "train/num_positives": 16,
160
+ "train/positive_loss": 0.11147819459438324,
161
+ "train/total_loss": 0.6285014748573303
162
  },
163
  {
164
  "epoch": 7.5,
165
+ "grad_norm": 14.211859703063965,
166
+ "learning_rate": 1.9905050505050507e-05,
167
+ "loss": 1.3708,
168
  "step": 150
169
  },
170
  {
171
  "epoch": 7.5,
172
  "step": 150,
173
+ "train/classification_loss": 0.40184715390205383,
174
+ "train/contrastive_loss": 1.165258765220642,
175
+ "train/negative_loss": 0.698477566242218,
176
  "train/num_negatives": 44,
177
  "train/num_positives": 8,
178
+ "train/positive_loss": 0.4667811989784241,
179
+ "train/total_loss": 0.6348989009857178
180
  },
181
  {
182
  "epoch": 7.5,
183
  "step": 150,
184
+ "train/classification_loss": 0.3879987299442291,
185
+ "train/contrastive_loss": 0.8239786028862,
186
+ "train/negative_loss": 0.7370874285697937,
187
  "train/num_negatives": 44,
188
  "train/num_positives": 10,
189
+ "train/positive_loss": 0.08689115941524506,
190
+ "train/total_loss": 0.5527944564819336
191
  },
192
  {
193
  "epoch": 10.0,
194
+ "grad_norm": 25.8126277923584,
195
+ "learning_rate": 1.9804040404040406e-05,
196
+ "loss": 1.142,
197
  "step": 200
198
  },
199
  {
200
  "epoch": 10.0,
201
  "step": 200,
202
+ "train/classification_loss": 0.2806307375431061,
203
+ "train/contrastive_loss": 2.156310558319092,
204
+ "train/negative_loss": 2.146817445755005,
205
  "train/num_negatives": 44,
206
  "train/num_positives": 12,
207
+ "train/positive_loss": 0.009493212215602398,
208
+ "train/total_loss": 0.71189284324646
209
  },
210
  {
211
  "epoch": 10.0,
212
  "step": 200,
213
+ "train/classification_loss": 0.3437887132167816,
214
+ "train/contrastive_loss": 3.3835480213165283,
215
+ "train/negative_loss": 3.3831517696380615,
216
  "train/num_negatives": 44,
217
  "train/num_positives": 12,
218
+ "train/positive_loss": 0.00039613869739696383,
219
+ "train/total_loss": 1.020498275756836
220
  },
221
  {
222
  "epoch": 10.0,
223
  "step": 200,
224
+ "train/classification_loss": 0.31499814987182617,
225
+ "train/contrastive_loss": 2.7768659591674805,
226
+ "train/negative_loss": 2.7635276317596436,
227
  "train/num_negatives": 52,
228
  "train/num_positives": 4,
229
+ "train/positive_loss": 0.013338344171643257,
230
+ "train/total_loss": 0.8703713417053223
231
  },
232
  {
233
  "epoch": 10.0,
234
  "step": 200,
235
+ "train/classification_loss": 0.3134444057941437,
236
+ "train/contrastive_loss": 3.8373122215270996,
237
+ "train/negative_loss": 2.7304084300994873,
238
  "train/num_negatives": 48,
239
  "train/num_positives": 8,
240
+ "train/positive_loss": 1.1069039106369019,
241
+ "train/total_loss": 1.080906867980957
242
  },
243
  {
244
  "epoch": 10.0,
245
  "step": 200,
246
+ "train/classification_loss": 0.3265349864959717,
247
+ "train/contrastive_loss": 1.5294461250305176,
248
+ "train/negative_loss": 1.4643787145614624,
249
  "train/num_negatives": 46,
250
  "train/num_positives": 8,
251
+ "train/positive_loss": 0.06506740301847458,
252
+ "train/total_loss": 0.6324242353439331
253
  },
254
  {
255
  "epoch": 10.0,
256
  "eval_exact_match_accuracy": 0.0,
257
+ "eval_hamming_loss": 0.08235294117647059,
258
+ "eval_loss": 0.8632186651229858,
259
+ "eval_macro_f1": 0.00980392156862745,
260
+ "eval_macro_precision": 0.058823529411764705,
261
+ "eval_macro_recall": 0.005347593582887701,
262
+ "eval_micro_f1": 0.034482758620689655,
263
+ "eval_micro_precision": 1.0,
264
+ "eval_micro_recall": 0.017543859649122806,
265
+ "eval_runtime": 0.2085,
266
+ "eval_samples_per_second": 191.856,
267
+ "eval_steps_per_second": 23.982,
268
  "step": 200
269
  },
270
  {
271
  "epoch": 10.0,
272
  "step": 200,
273
+ "train/classification_loss": 0.3289062976837158,
274
+ "train/contrastive_loss": 1.158113956451416,
275
+ "train/negative_loss": 0.9101917147636414,
276
  "train/num_negatives": 42,
277
  "train/num_positives": 14,
278
+ "train/positive_loss": 0.24792218208312988,
279
+ "train/total_loss": 0.5605291128158569
280
  },
281
  {
282
  "epoch": 10.0,
283
  "step": 200,
284
+ "train/classification_loss": 0.28807589411735535,
285
+ "train/contrastive_loss": 1.227806568145752,
286
+ "train/negative_loss": 1.2268885374069214,
287
  "train/num_negatives": 42,
288
  "train/num_positives": 14,
289
+ "train/positive_loss": 0.0009180314373224974,
290
+ "train/total_loss": 0.5336372256278992
291
  },
292
  {
293
  "epoch": 12.5,
294
+ "grad_norm": 7.029545307159424,
295
+ "learning_rate": 1.9703030303030302e-05,
296
+ "loss": 1.0178,
297
  "step": 250
298
  },
299
  {
300
  "epoch": 12.5,
301
  "step": 250,
302
+ "train/classification_loss": 0.24251216650009155,
303
+ "train/contrastive_loss": 1.3313517570495605,
304
+ "train/negative_loss": 1.1213274002075195,
305
  "train/num_negatives": 44,
306
  "train/num_positives": 12,
307
+ "train/positive_loss": 0.21002434194087982,
308
+ "train/total_loss": 0.5087825059890747
309
  },
310
  {
311
  "epoch": 12.5,
312
  "step": 250,
313
+ "train/classification_loss": 0.2935709059238434,
314
+ "train/contrastive_loss": 0.40547823905944824,
315
+ "train/negative_loss": 0.3695138692855835,
316
  "train/num_negatives": 42,
317
  "train/num_positives": 8,
318
+ "train/positive_loss": 0.03596435487270355,
319
+ "train/total_loss": 0.37466657161712646
320
  },
321
  {
322
  "epoch": 15.0,
323
+ "grad_norm": 10.424208641052246,
324
+ "learning_rate": 1.9602020202020205e-05,
325
+ "loss": 0.8805,
326
  "step": 300
327
  },
328
  {
329
  "epoch": 15.0,
330
  "step": 300,
331
+ "train/classification_loss": 0.22187121212482452,
332
+ "train/contrastive_loss": 2.8647470474243164,
333
+ "train/negative_loss": 2.7984812259674072,
334
  "train/num_negatives": 44,
335
  "train/num_positives": 12,
336
+ "train/positive_loss": 0.0662657767534256,
337
+ "train/total_loss": 0.7948206067085266
338
  },
339
  {
340
  "epoch": 15.0,
341
  "step": 300,
342
+ "train/classification_loss": 0.2955247759819031,
343
+ "train/contrastive_loss": 4.269000053405762,
344
+ "train/negative_loss": 4.268981456756592,
345
  "train/num_negatives": 44,
346
  "train/num_positives": 12,
347
+ "train/positive_loss": 1.871622953331098e-05,
348
+ "train/total_loss": 1.1493247747421265
349
  },
350
  {
351
  "epoch": 15.0,
352
  "step": 300,
353
+ "train/classification_loss": 0.26941171288490295,
354
+ "train/contrastive_loss": 4.073124885559082,
355
+ "train/negative_loss": 2.919600486755371,
356
  "train/num_negatives": 52,
357
  "train/num_positives": 4,
358
+ "train/positive_loss": 1.1535242795944214,
359
+ "train/total_loss": 1.0840367078781128
360
  },
361
  {
362
  "epoch": 15.0,
363
  "step": 300,
364
+ "train/classification_loss": 0.2642236351966858,
365
+ "train/contrastive_loss": 5.231786727905273,
366
+ "train/negative_loss": 4.430575370788574,
367
  "train/num_negatives": 48,
368
  "train/num_positives": 8,
369
+ "train/positive_loss": 0.8012115359306335,
370
+ "train/total_loss": 1.3105809688568115
371
  },
372
  {
373
  "epoch": 15.0,
374
  "step": 300,
375
+ "train/classification_loss": 0.2807493805885315,
376
+ "train/contrastive_loss": 1.4644469022750854,
377
+ "train/negative_loss": 1.4178317785263062,
378
  "train/num_negatives": 46,
379
  "train/num_positives": 8,
380
+ "train/positive_loss": 0.04661515727639198,
381
+ "train/total_loss": 0.5736387968063354
382
  },
383
  {
384
  "epoch": 15.0,
385
  "eval_exact_match_accuracy": 0.05,
386
+ "eval_hamming_loss": 0.07794117647058824,
387
+ "eval_loss": 0.9824804067611694,
388
+ "eval_macro_f1": 0.03460207612456748,
389
+ "eval_macro_precision": 0.049019607843137254,
390
  "eval_macro_recall": 0.026737967914438502,
391
+ "eval_micro_f1": 0.15873015873015872,
392
+ "eval_micro_precision": 0.8333333333333334,
393
  "eval_micro_recall": 0.08771929824561403,
394
+ "eval_runtime": 0.2153,
395
+ "eval_samples_per_second": 185.81,
396
+ "eval_steps_per_second": 23.226,
397
  "step": 300
398
  },
399
  {
400
  "epoch": 15.0,
401
  "step": 300,
402
+ "train/classification_loss": 0.24673022329807281,
403
+ "train/contrastive_loss": 0.4545953869819641,
404
+ "train/negative_loss": 0.443155974149704,
405
  "train/num_negatives": 32,
406
  "train/num_positives": 20,
407
+ "train/positive_loss": 0.011439427733421326,
408
+ "train/total_loss": 0.33764928579330444
409
  },
410
  {
411
  "epoch": 15.0,
412
  "step": 300,
413
+ "train/classification_loss": 0.2520601153373718,
414
+ "train/contrastive_loss": 1.4950696229934692,
415
+ "train/negative_loss": 1.429970145225525,
416
  "train/num_negatives": 46,
417
  "train/num_positives": 10,
418
+ "train/positive_loss": 0.06509946286678314,
419
+ "train/total_loss": 0.5510740280151367
420
  },
421
  {
422
  "epoch": 17.5,
423
+ "grad_norm": 9.645092964172363,
424
+ "learning_rate": 1.95010101010101e-05,
425
+ "loss": 0.7785,
426
  "step": 350
427
  },
428
  {
429
  "epoch": 17.5,
430
  "step": 350,
431
+ "train/classification_loss": 0.2225627303123474,
432
+ "train/contrastive_loss": 0.3660619258880615,
433
+ "train/negative_loss": 0.36337095499038696,
434
  "train/num_negatives": 42,
435
  "train/num_positives": 14,
436
+ "train/positive_loss": 0.0026909795124083757,
437
+ "train/total_loss": 0.2957751154899597
438
  },
439
  {
440
  "epoch": 17.5,
441
  "step": 350,
442
+ "train/classification_loss": 0.2583811581134796,
443
+ "train/contrastive_loss": 0.25796303153038025,
444
+ "train/negative_loss": 0.25655531883239746,
445
  "train/num_negatives": 44,
446
  "train/num_positives": 6,
447
+ "train/positive_loss": 0.0014077159576117992,
448
+ "train/total_loss": 0.3099737763404846
449
  },
450
  {
451
  "epoch": 20.0,
452
+ "grad_norm": 5.90708065032959,
453
+ "learning_rate": 1.94e-05,
454
+ "loss": 0.7442,
455
  "step": 400
456
  },
457
  {
458
  "epoch": 20.0,
459
  "step": 400,
460
+ "train/classification_loss": 0.1851058006286621,
461
+ "train/contrastive_loss": 2.0512137413024902,
462
+ "train/negative_loss": 1.7761945724487305,
463
  "train/num_negatives": 44,
464
  "train/num_positives": 12,
465
+ "train/positive_loss": 0.27501922845840454,
466
+ "train/total_loss": 0.595348596572876
467
  },
468
  {
469
  "epoch": 20.0,
470
  "step": 400,
471
+ "train/classification_loss": 0.2684085965156555,
472
+ "train/contrastive_loss": 3.0632779598236084,
473
+ "train/negative_loss": 3.05769419670105,
474
  "train/num_negatives": 44,
475
  "train/num_positives": 12,
476
+ "train/positive_loss": 0.0055836960673332214,
477
+ "train/total_loss": 0.8810641765594482
478
  },
479
  {
480
  "epoch": 20.0,
481
  "step": 400,
482
+ "train/classification_loss": 0.25380370020866394,
483
+ "train/contrastive_loss": 4.407430648803711,
484
+ "train/negative_loss": 3.715952157974243,
485
  "train/num_negatives": 52,
486
  "train/num_positives": 4,
487
+ "train/positive_loss": 0.6914786100387573,
488
+ "train/total_loss": 1.1352897882461548
489
  },
490
  {
491
  "epoch": 20.0,
492
  "step": 400,
493
+ "train/classification_loss": 0.23351122438907623,
494
+ "train/contrastive_loss": 2.3350398540496826,
495
+ "train/negative_loss": 1.7342523336410522,
496
  "train/num_negatives": 48,
497
  "train/num_positives": 8,
498
+ "train/positive_loss": 0.6007875800132751,
499
+ "train/total_loss": 0.7005192041397095
500
  },
501
  {
502
  "epoch": 20.0,
503
  "step": 400,
504
+ "train/classification_loss": 0.2567104995250702,
505
+ "train/contrastive_loss": 1.2914104461669922,
506
+ "train/negative_loss": 1.1075626611709595,
507
  "train/num_negatives": 46,
508
  "train/num_positives": 8,
509
+ "train/positive_loss": 0.18384775519371033,
510
+ "train/total_loss": 0.5149925947189331
511
  },
512
  {
513
  "epoch": 20.0,
514
  "eval_exact_match_accuracy": 0.1,
515
  "eval_hamming_loss": 0.075,
516
+ "eval_loss": 0.7654428482055664,
517
+ "eval_macro_f1": 0.0803921568627451,
518
+ "eval_macro_precision": 0.1045751633986928,
519
+ "eval_macro_recall": 0.06684491978609625,
520
+ "eval_micro_f1": 0.23880597014925373,
521
+ "eval_micro_precision": 0.8,
522
+ "eval_micro_recall": 0.14035087719298245,
523
+ "eval_runtime": 0.2071,
524
+ "eval_samples_per_second": 193.116,
525
+ "eval_steps_per_second": 24.139,
526
  "step": 400
527
  },
528
  {
529
  "epoch": 20.0,
530
  "step": 400,
531
+ "train/classification_loss": 0.23103763163089752,
532
+ "train/contrastive_loss": 0.9073829650878906,
533
+ "train/negative_loss": 0.5319927930831909,
534
  "train/num_negatives": 42,
535
  "train/num_positives": 10,
536
+ "train/positive_loss": 0.3753901422023773,
537
+ "train/total_loss": 0.41251420974731445
538
  },
539
  {
540
  "epoch": 20.0,
541
  "step": 400,
542
+ "train/classification_loss": 0.2296096831560135,
543
+ "train/contrastive_loss": 0.5736224055290222,
544
+ "train/negative_loss": 0.573104977607727,
545
  "train/num_negatives": 52,
546
  "train/num_positives": 4,
547
+ "train/positive_loss": 0.0005174159305170178,
548
+ "train/total_loss": 0.3443341553211212
549
  },
550
  {
551
  "epoch": 22.5,
552
+ "grad_norm": 9.208708763122559,
553
+ "learning_rate": 1.92989898989899e-05,
554
+ "loss": 0.6661,
555
  "step": 450
556
  },
557
  {
558
  "epoch": 22.5,
559
  "step": 450,
560
+ "train/classification_loss": 0.21061192452907562,
561
+ "train/contrastive_loss": 1.1704968214035034,
562
+ "train/negative_loss": 1.170390248298645,
563
  "train/num_negatives": 50,
564
  "train/num_positives": 4,
565
+ "train/positive_loss": 0.00010655130608938634,
566
+ "train/total_loss": 0.444711297750473
567
  },
568
  {
569
  "epoch": 22.5,
570
  "step": 450,
571
+ "train/classification_loss": 0.18421001732349396,
572
+ "train/contrastive_loss": 1.406832218170166,
573
+ "train/negative_loss": 0.8008975982666016,
574
  "train/num_negatives": 50,
575
  "train/num_positives": 6,
576
+ "train/positive_loss": 0.6059346795082092,
577
+ "train/total_loss": 0.4655764698982239
578
  },
579
  {
580
  "epoch": 25.0,
581
+ "grad_norm": 9.534709930419922,
582
+ "learning_rate": 1.91979797979798e-05,
583
+ "loss": 0.6332,
584
  "step": 500
585
  },
586
  {
587
  "epoch": 25.0,
588
  "step": 500,
589
+ "train/classification_loss": 0.17407798767089844,
590
+ "train/contrastive_loss": 3.3778998851776123,
591
+ "train/negative_loss": 3.208540678024292,
592
  "train/num_negatives": 44,
593
  "train/num_positives": 12,
594
+ "train/positive_loss": 0.16935913264751434,
595
+ "train/total_loss": 0.8496579527854919
596
  },
597
  {
598
  "epoch": 25.0,
599
  "step": 500,
600
+ "train/classification_loss": 0.25022628903388977,
601
+ "train/contrastive_loss": 2.501643180847168,
602
+ "train/negative_loss": 2.5011374950408936,
603
  "train/num_negatives": 44,
604
  "train/num_positives": 12,
605
+ "train/positive_loss": 0.0005057163070887327,
606
+ "train/total_loss": 0.7505549192428589
607
  },
608
  {
609
  "epoch": 25.0,
610
  "step": 500,
611
+ "train/classification_loss": 0.24351510405540466,
612
+ "train/contrastive_loss": 5.5227251052856445,
613
+ "train/negative_loss": 3.6524746417999268,
614
  "train/num_negatives": 52,
615
  "train/num_positives": 4,
616
+ "train/positive_loss": 1.8702502250671387,
617
+ "train/total_loss": 1.348060131072998
618
  },
619
  {
620
  "epoch": 25.0,
621
  "step": 500,
622
+ "train/classification_loss": 0.20675694942474365,
623
+ "train/contrastive_loss": 2.3911428451538086,
624
+ "train/negative_loss": 1.8613694906234741,
625
  "train/num_negatives": 48,
626
  "train/num_positives": 8,
627
+ "train/positive_loss": 0.5297732949256897,
628
+ "train/total_loss": 0.6849855184555054
629
  },
630
  {
631
  "epoch": 25.0,
632
  "step": 500,
633
+ "train/classification_loss": 0.24148425459861755,
634
+ "train/contrastive_loss": 1.3859646320343018,
635
+ "train/negative_loss": 0.7059023380279541,
636
  "train/num_negatives": 46,
637
  "train/num_positives": 8,
638
+ "train/positive_loss": 0.6800622940063477,
639
+ "train/total_loss": 0.5186771750450134
640
  },
641
  {
642
  "epoch": 25.0,
643
  "eval_exact_match_accuracy": 0.175,
644
+ "eval_hamming_loss": 0.07352941176470588,
645
+ "eval_loss": 0.8303871154785156,
646
+ "eval_macro_f1": 0.10802139037433156,
647
+ "eval_macro_precision": 0.14331550802139037,
648
+ "eval_macro_recall": 0.09037433155080213,
649
+ "eval_micro_f1": 0.32432432432432434,
650
+ "eval_micro_precision": 0.7058823529411765,
651
+ "eval_micro_recall": 0.21052631578947367,
652
+ "eval_runtime": 0.2078,
653
+ "eval_samples_per_second": 192.449,
654
+ "eval_steps_per_second": 24.056,
655
  "step": 500
656
  },
657
  {
658
  "epoch": 25.0,
659
  "step": 500,
660
+ "train/classification_loss": 0.14580048620700836,
661
+ "train/contrastive_loss": 0.2984220087528229,
662
+ "train/negative_loss": 0.29449352622032166,
663
  "train/num_negatives": 42,
664
  "train/num_positives": 14,
665
+ "train/positive_loss": 0.003928476478904486,
666
+ "train/total_loss": 0.20548489689826965
667
  },
668
  {
669
  "epoch": 25.0,
670
  "step": 500,
671
+ "train/classification_loss": 0.23653770983219147,
672
+ "train/contrastive_loss": 0.7606168389320374,
673
+ "train/negative_loss": 0.737991988658905,
674
  "train/num_negatives": 44,
675
  "train/num_positives": 12,
676
+ "train/positive_loss": 0.022624850273132324,
677
+ "train/total_loss": 0.38866108655929565
678
  },
679
  {
680
  "epoch": 27.5,
681
+ "grad_norm": 6.362896919250488,
682
+ "learning_rate": 1.90969696969697e-05,
683
+ "loss": 0.5843,
684
  "step": 550
685
  },
686
  {
687
  "epoch": 27.5,
688
  "step": 550,
689
+ "train/classification_loss": 0.2056518942117691,
690
+ "train/contrastive_loss": 0.9691342711448669,
691
+ "train/negative_loss": 0.9691179394721985,
692
  "train/num_negatives": 54,
693
  "train/num_positives": 2,
694
+ "train/positive_loss": 1.633180545468349e-05,
695
+ "train/total_loss": 0.3994787335395813
696
  },
697
  {
698
  "epoch": 27.5,
699
  "step": 550,
700
+ "train/classification_loss": 0.22270001471042633,
701
+ "train/contrastive_loss": 1.5889058113098145,
702
+ "train/negative_loss": 1.251386046409607,
703
  "train/num_negatives": 40,
704
  "train/num_positives": 12,
705
+ "train/positive_loss": 0.3375198245048523,
706
+ "train/total_loss": 0.5404812097549438
707
  },
708
  {
709
  "epoch": 30.0,
710
+ "grad_norm": 9.474464416503906,
711
+ "learning_rate": 1.8995959595959595e-05,
712
+ "loss": 0.5572,
713
  "step": 600
714
  },
715
  {
716
  "epoch": 30.0,
717
  "step": 600,
718
+ "train/classification_loss": 0.16965512931346893,
719
+ "train/contrastive_loss": 1.8685212135314941,
720
+ "train/negative_loss": 1.4877853393554688,
721
  "train/num_negatives": 44,
722
  "train/num_positives": 12,
723
+ "train/positive_loss": 0.3807358741760254,
724
+ "train/total_loss": 0.5433593988418579
725
  },
726
  {
727
  "epoch": 30.0,
728
  "step": 600,
729
+ "train/classification_loss": 0.2326871156692505,
730
+ "train/contrastive_loss": 3.803422451019287,
731
+ "train/negative_loss": 3.799872875213623,
732
  "train/num_negatives": 44,
733
  "train/num_positives": 12,
734
+ "train/positive_loss": 0.003549614455550909,
735
+ "train/total_loss": 0.9933716058731079
736
  },
737
  {
738
  "epoch": 30.0,
739
  "step": 600,
740
+ "train/classification_loss": 0.21336901187896729,
741
+ "train/contrastive_loss": 2.9807915687561035,
742
+ "train/negative_loss": 2.322481155395508,
743
  "train/num_negatives": 52,
744
  "train/num_positives": 4,
745
+ "train/positive_loss": 0.6583104133605957,
746
+ "train/total_loss": 0.8095273375511169
747
  },
748
  {
749
  "epoch": 30.0,
750
  "step": 600,
751
+ "train/classification_loss": 0.21435928344726562,
752
+ "train/contrastive_loss": 4.913402080535889,
753
+ "train/negative_loss": 4.489163875579834,
754
  "train/num_negatives": 48,
755
  "train/num_positives": 8,
756
+ "train/positive_loss": 0.4242383539676666,
757
+ "train/total_loss": 1.1970397233963013
758
  },
759
  {
760
  "epoch": 30.0,
761
  "step": 600,
762
+ "train/classification_loss": 0.22257567942142487,
763
+ "train/contrastive_loss": 0.9281859993934631,
764
+ "train/negative_loss": 0.7518287301063538,
765
  "train/num_negatives": 46,
766
  "train/num_positives": 8,
767
+ "train/positive_loss": 0.17635725438594818,
768
+ "train/total_loss": 0.40821290016174316
769
  },
770
  {
771
  "epoch": 30.0,
772
+ "eval_exact_match_accuracy": 0.225,
773
+ "eval_hamming_loss": 0.06764705882352941,
774
+ "eval_loss": 0.7903022170066833,
775
+ "eval_macro_f1": 0.11995386389850057,
776
+ "eval_macro_precision": 0.1596638655462185,
777
+ "eval_macro_recall": 0.09679144385026739,
778
+ "eval_micro_f1": 0.3611111111111111,
779
+ "eval_micro_precision": 0.8666666666666667,
780
+ "eval_micro_recall": 0.22807017543859648,
781
+ "eval_runtime": 0.2099,
782
+ "eval_samples_per_second": 190.603,
783
+ "eval_steps_per_second": 23.825,
784
  "step": 600
785
  },
786
  {
787
  "epoch": 30.0,
788
  "step": 600,
789
+ "train/classification_loss": 0.14723831415176392,
790
+ "train/contrastive_loss": 0.6149296760559082,
791
+ "train/negative_loss": 0.4006352722644806,
792
  "train/num_negatives": 38,
793
  "train/num_positives": 18,
794
+ "train/positive_loss": 0.21429438889026642,
795
+ "train/total_loss": 0.2702242434024811
796
  },
797
  {
798
  "epoch": 30.0,
799
  "step": 600,
800
+ "train/classification_loss": 0.18867409229278564,
801
+ "train/contrastive_loss": 0.7789785265922546,
802
+ "train/negative_loss": 0.6380741000175476,
803
  "train/num_negatives": 48,
804
  "train/num_positives": 8,
805
+ "train/positive_loss": 0.14090441167354584,
806
+ "train/total_loss": 0.3444697856903076
807
  },
808
  {
809
  "epoch": 32.5,
810
+ "grad_norm": 7.496462821960449,
811
+ "learning_rate": 1.8894949494949498e-05,
812
+ "loss": 0.5177,
813
  "step": 650
814
  },
815
  {
816
  "epoch": 32.5,
817
  "step": 650,
818
+ "train/classification_loss": 0.1460404396057129,
819
+ "train/contrastive_loss": 0.8524841070175171,
820
+ "train/negative_loss": 0.623662531375885,
821
  "train/num_negatives": 36,
822
  "train/num_positives": 18,
823
+ "train/positive_loss": 0.22882159054279327,
824
+ "train/total_loss": 0.3165372610092163
825
  },
826
  {
827
  "epoch": 32.5,
828
  "step": 650,
829
+ "train/classification_loss": 0.18225882947444916,
830
+ "train/contrastive_loss": 1.4429905414581299,
831
+ "train/negative_loss": 1.4358431100845337,
832
  "train/num_negatives": 50,
833
  "train/num_positives": 6,
834
+ "train/positive_loss": 0.007147490046918392,
835
+ "train/total_loss": 0.4708569645881653
836
  },
837
  {
838
  "epoch": 35.0,
839
+ "grad_norm": 6.565901756286621,
840
+ "learning_rate": 1.8793939393939394e-05,
841
+ "loss": 0.4788,
842
  "step": 700
843
  },
844
  {
845
  "epoch": 35.0,
846
  "step": 700,
847
+ "train/classification_loss": 0.16156421601772308,
848
+ "train/contrastive_loss": 3.9083566665649414,
849
+ "train/negative_loss": 2.979792594909668,
850
  "train/num_negatives": 44,
851
  "train/num_positives": 12,
852
+ "train/positive_loss": 0.9285640716552734,
853
+ "train/total_loss": 0.9432355761528015
854
  },
855
  {
856
  "epoch": 35.0,
857
  "step": 700,
858
+ "train/classification_loss": 0.22515544295310974,
859
+ "train/contrastive_loss": 3.891486167907715,
860
+ "train/negative_loss": 3.8570120334625244,
861
  "train/num_negatives": 44,
862
  "train/num_positives": 12,
863
+ "train/positive_loss": 0.034474026411771774,
864
+ "train/total_loss": 1.0034526586532593
865
  },
866
  {
867
  "epoch": 35.0,
868
  "step": 700,
869
+ "train/classification_loss": 0.22171786427497864,
870
+ "train/contrastive_loss": 4.330800533294678,
871
+ "train/negative_loss": 3.1184730529785156,
872
  "train/num_negatives": 52,
873
  "train/num_positives": 4,
874
+ "train/positive_loss": 1.2123275995254517,
875
+ "train/total_loss": 1.0878779888153076
876
  },
877
  {
878
  "epoch": 35.0,
879
  "step": 700,
880
+ "train/classification_loss": 0.1916349083185196,
881
+ "train/contrastive_loss": 2.0170962810516357,
882
+ "train/negative_loss": 1.338474154472351,
883
  "train/num_negatives": 48,
884
  "train/num_positives": 8,
885
+ "train/positive_loss": 0.6786221861839294,
886
+ "train/total_loss": 0.5950541496276855
887
  },
888
  {
889
  "epoch": 35.0,
890
  "step": 700,
891
+ "train/classification_loss": 0.20504897832870483,
892
+ "train/contrastive_loss": 0.6244919896125793,
893
+ "train/negative_loss": 0.39054688811302185,
894
  "train/num_negatives": 46,
895
  "train/num_positives": 8,
896
+ "train/positive_loss": 0.2339451164007187,
897
+ "train/total_loss": 0.3299473822116852
898
  },
899
  {
900
  "epoch": 35.0,
901
  "eval_exact_match_accuracy": 0.25,
902
+ "eval_hamming_loss": 0.06470588235294118,
903
+ "eval_loss": 0.7919135689735413,
904
+ "eval_macro_f1": 0.14236282748393475,
905
+ "eval_macro_precision": 0.21512605042016808,
906
+ "eval_macro_recall": 0.11729055258467024,
907
+ "eval_micro_f1": 0.42105263157894735,
908
+ "eval_micro_precision": 0.8421052631578947,
909
+ "eval_micro_recall": 0.2807017543859649,
910
+ "eval_runtime": 0.2075,
911
+ "eval_samples_per_second": 192.814,
912
+ "eval_steps_per_second": 24.102,
913
  "step": 700
914
  },
915
  {
916
  "epoch": 35.0,
917
  "step": 700,
918
+ "train/classification_loss": 0.14606674015522003,
919
+ "train/contrastive_loss": 0.6812934875488281,
920
+ "train/negative_loss": 0.45186591148376465,
921
  "train/num_negatives": 40,
922
  "train/num_positives": 16,
923
+ "train/positive_loss": 0.2294275462627411,
924
+ "train/total_loss": 0.2823254466056824
925
  },
926
  {
927
  "epoch": 35.0,
928
  "step": 700,
929
+ "train/classification_loss": 0.1429990828037262,
930
+ "train/contrastive_loss": 0.5248066782951355,
931
+ "train/negative_loss": 0.3964087963104248,
932
  "train/num_negatives": 40,
933
  "train/num_positives": 16,
934
+ "train/positive_loss": 0.1283978968858719,
935
+ "train/total_loss": 0.2479604184627533
936
  },
937
  {
938
  "epoch": 37.5,
939
+ "grad_norm": 3.286250114440918,
940
+ "learning_rate": 1.8692929292929294e-05,
941
+ "loss": 0.4698,
942
  "step": 750
943
  },
944
  {
945
  "epoch": 37.5,
946
  "step": 750,
947
+ "train/classification_loss": 0.15292038023471832,
948
+ "train/contrastive_loss": 0.8152571320533752,
949
+ "train/negative_loss": 0.8152315616607666,
950
  "train/num_negatives": 48,
951
  "train/num_positives": 8,
952
+ "train/positive_loss": 2.557095831434708e-05,
953
+ "train/total_loss": 0.3159717917442322
954
  },
955
  {
956
  "epoch": 37.5,
957
  "step": 750,
958
+ "train/classification_loss": 0.15736867487430573,
959
+ "train/contrastive_loss": 0.38875800371170044,
960
+ "train/negative_loss": 0.26881521940231323,
961
  "train/num_negatives": 46,
962
  "train/num_positives": 10,
963
+ "train/positive_loss": 0.11994278430938721,
964
+ "train/total_loss": 0.2351202666759491
965
  },
966
  {
967
  "epoch": 40.0,
968
+ "grad_norm": 8.739147186279297,
969
+ "learning_rate": 1.8591919191919193e-05,
970
+ "loss": 0.418,
971
  "step": 800
972
  },
973
  {
974
  "epoch": 40.0,
975
  "step": 800,
976
+ "train/classification_loss": 0.16104553639888763,
977
+ "train/contrastive_loss": 3.0951993465423584,
978
+ "train/negative_loss": 1.8000407218933105,
979
  "train/num_negatives": 44,
980
  "train/num_positives": 12,
981
+ "train/positive_loss": 1.2951586246490479,
982
+ "train/total_loss": 0.7800854444503784
983
  },
984
  {
985
  "epoch": 40.0,
986
  "step": 800,
987
+ "train/classification_loss": 0.22088098526000977,
988
+ "train/contrastive_loss": 3.92935848236084,
989
+ "train/negative_loss": 3.9107954502105713,
990
  "train/num_negatives": 44,
991
  "train/num_positives": 12,
992
+ "train/positive_loss": 0.018563104793429375,
993
+ "train/total_loss": 1.0067527294158936
994
  },
995
  {
996
  "epoch": 40.0,
997
  "step": 800,
998
+ "train/classification_loss": 0.20756281912326813,
999
+ "train/contrastive_loss": 5.228389739990234,
1000
+ "train/negative_loss": 2.9686391353607178,
1001
  "train/num_negatives": 52,
1002
  "train/num_positives": 4,
1003
+ "train/positive_loss": 2.2597508430480957,
1004
+ "train/total_loss": 1.2532408237457275
1005
  },
1006
  {
1007
  "epoch": 40.0,
1008
  "step": 800,
1009
+ "train/classification_loss": 0.1891981065273285,
1010
+ "train/contrastive_loss": 1.6292307376861572,
1011
+ "train/negative_loss": 1.227111577987671,
1012
  "train/num_negatives": 48,
1013
  "train/num_positives": 8,
1014
+ "train/positive_loss": 0.40211910009384155,
1015
+ "train/total_loss": 0.5150442719459534
1016
  },
1017
  {
1018
  "epoch": 40.0,
1019
  "step": 800,
1020
+ "train/classification_loss": 0.19581612944602966,
1021
+ "train/contrastive_loss": 0.958620548248291,
1022
+ "train/negative_loss": 0.6886929869651794,
1023
  "train/num_negatives": 46,
1024
  "train/num_positives": 8,
1025
+ "train/positive_loss": 0.2699275612831116,
1026
+ "train/total_loss": 0.38754022121429443
1027
  },
1028
  {
1029
  "epoch": 40.0,
1030
+ "eval_exact_match_accuracy": 0.2,
1031
  "eval_hamming_loss": 0.06470588235294118,
1032
+ "eval_loss": 0.7885327339172363,
1033
+ "eval_macro_f1": 0.18102240896358546,
1034
+ "eval_macro_precision": 0.3300653594771242,
1035
+ "eval_macro_recall": 0.1354723707664884,
1036
  "eval_micro_f1": 0.42105263157894735,
1037
  "eval_micro_precision": 0.8421052631578947,
1038
  "eval_micro_recall": 0.2807017543859649,
1039
+ "eval_runtime": 0.2094,
1040
+ "eval_samples_per_second": 191.056,
1041
+ "eval_steps_per_second": 23.882,
1042
  "step": 800
1043
  },
1044
  {
1045
  "epoch": 40.0,
1046
  "step": 800,
1047
+ "train/classification_loss": 0.12362033128738403,
1048
+ "train/contrastive_loss": 0.3485001027584076,
1049
+ "train/negative_loss": 0.3484821319580078,
1050
  "train/num_negatives": 44,
1051
  "train/num_positives": 12,
1052
+ "train/positive_loss": 1.7980994016397744e-05,
1053
+ "train/total_loss": 0.1933203637599945
1054
  },
1055
  {
1056
  "epoch": 40.0,
1057
  "step": 800,
1058
+ "train/classification_loss": 0.09559500962495804,
1059
+ "train/contrastive_loss": 0.22876113653182983,
1060
+ "train/negative_loss": 0.2287520468235016,
1061
  "train/num_negatives": 30,
1062
  "train/num_positives": 26,
1063
+ "train/positive_loss": 9.087464604817796e-06,
1064
+ "train/total_loss": 0.1413472294807434
1065
  },
1066
  {
1067
  "epoch": 42.5,
1068
+ "grad_norm": 3.746946334838867,
1069
+ "learning_rate": 1.8490909090909093e-05,
1070
+ "loss": 0.3927,
1071
  "step": 850
1072
  },
1073
  {
1074
  "epoch": 42.5,
1075
  "step": 850,
1076
+ "train/classification_loss": 0.09173674136400223,
1077
+ "train/contrastive_loss": 0.13494296371936798,
1078
+ "train/negative_loss": 0.13212107121944427,
1079
  "train/num_negatives": 40,
1080
  "train/num_positives": 16,
1081
+ "train/positive_loss": 0.002821887144818902,
1082
+ "train/total_loss": 0.11872533708810806
1083
  },
1084
  {
1085
  "epoch": 42.5,
1086
  "step": 850,
1087
+ "train/classification_loss": 0.12884384393692017,
1088
+ "train/contrastive_loss": 0.07590682804584503,
1089
+ "train/negative_loss": 0.07587877660989761,
1090
  "train/num_negatives": 40,
1091
  "train/num_positives": 16,
1092
+ "train/positive_loss": 2.8053931600879878e-05,
1093
+ "train/total_loss": 0.14402520656585693
1094
  },
1095
  {
1096
  "epoch": 45.0,
1097
+ "grad_norm": 2.8717594146728516,
1098
+ "learning_rate": 1.8389898989898992e-05,
1099
+ "loss": 0.3975,
1100
  "step": 900
1101
  },
1102
  {
1103
  "epoch": 45.0,
1104
  "step": 900,
1105
+ "train/classification_loss": 0.16052784025669098,
1106
+ "train/contrastive_loss": 4.337796688079834,
1107
+ "train/negative_loss": 3.431094169616699,
1108
  "train/num_negatives": 44,
1109
  "train/num_positives": 12,
1110
+ "train/positive_loss": 0.9067023992538452,
1111
+ "train/total_loss": 1.0280872583389282
1112
  },
1113
  {
1114
  "epoch": 45.0,
1115
  "step": 900,
1116
+ "train/classification_loss": 0.2030077874660492,
1117
+ "train/contrastive_loss": 3.2337257862091064,
1118
+ "train/negative_loss": 3.2324299812316895,
1119
  "train/num_negatives": 44,
1120
  "train/num_positives": 12,
1121
+ "train/positive_loss": 0.001295788330025971,
1122
+ "train/total_loss": 0.8497529029846191
1123
  },
1124
  {
1125
  "epoch": 45.0,
1126
  "step": 900,
1127
+ "train/classification_loss": 0.2113606035709381,
1128
+ "train/contrastive_loss": 4.936408996582031,
1129
+ "train/negative_loss": 3.0232598781585693,
1130
  "train/num_negatives": 52,
1131
  "train/num_positives": 4,
1132
+ "train/positive_loss": 1.913149356842041,
1133
+ "train/total_loss": 1.198642373085022
1134
  },
1135
  {
1136
  "epoch": 45.0,
1137
  "step": 900,
1138
+ "train/classification_loss": 0.17921508848667145,
1139
+ "train/contrastive_loss": 2.6765177249908447,
1140
+ "train/negative_loss": 1.9601576328277588,
1141
  "train/num_negatives": 48,
1142
  "train/num_positives": 8,
1143
+ "train/positive_loss": 0.7163600921630859,
1144
+ "train/total_loss": 0.7145186066627502
1145
  },
1146
  {
1147
  "epoch": 45.0,
1148
  "step": 900,
1149
+ "train/classification_loss": 0.18929758667945862,
1150
+ "train/contrastive_loss": 0.7077760696411133,
1151
+ "train/negative_loss": 0.6388620138168335,
1152
  "train/num_negatives": 46,
1153
  "train/num_positives": 8,
1154
+ "train/positive_loss": 0.06891404092311859,
1155
+ "train/total_loss": 0.33085280656814575
1156
  },
1157
  {
1158
  "epoch": 45.0,
1159
+ "eval_exact_match_accuracy": 0.225,
1160
+ "eval_hamming_loss": 0.06911764705882353,
1161
+ "eval_loss": 0.8243707418441772,
1162
+ "eval_macro_f1": 0.15536244800950683,
1163
+ "eval_macro_precision": 0.2290552584670232,
1164
+ "eval_macro_recall": 0.12611408199643495,
1165
+ "eval_micro_f1": 0.4050632911392405,
1166
+ "eval_micro_precision": 0.7272727272727273,
1167
  "eval_micro_recall": 0.2807017543859649,
1168
+ "eval_runtime": 0.2089,
1169
+ "eval_samples_per_second": 191.499,
1170
+ "eval_steps_per_second": 23.937,
1171
  "step": 900
1172
  },
1173
  {
1174
  "epoch": 45.0,
1175
  "step": 900,
1176
+ "train/classification_loss": 0.11641532182693481,
1177
+ "train/contrastive_loss": 1.0102875232696533,
1178
+ "train/negative_loss": 1.0102651119232178,
1179
+ "train/num_negatives": 54,
1180
+ "train/num_positives": 2,
1181
+ "train/positive_loss": 2.2411597456084564e-05,
1182
+ "train/total_loss": 0.31847283244132996
1183
+ },
1184
+ {
1185
+ "epoch": 45.0,
1186
+ "step": 900,
1187
+ "train/classification_loss": 0.11128353327512741,
1188
+ "train/contrastive_loss": 0.24437068402767181,
1189
+ "train/negative_loss": 0.24322275817394257,
1190
+ "train/num_negatives": 46,
1191
+ "train/num_positives": 10,
1192
+ "train/positive_loss": 0.0011479274835437536,
1193
+ "train/total_loss": 0.16015766561031342
1194
+ },
1195
+ {
1196
+ "epoch": 47.5,
1197
+ "grad_norm": 2.6908442974090576,
1198
+ "learning_rate": 1.8288888888888888e-05,
1199
+ "loss": 0.353,
1200
+ "step": 950
1201
+ },
1202
+ {
1203
+ "epoch": 47.5,
1204
+ "step": 950,
1205
+ "train/classification_loss": 0.10373832285404205,
1206
+ "train/contrastive_loss": 0.37015679478645325,
1207
+ "train/negative_loss": 0.3693355917930603,
1208
+ "train/num_negatives": 46,
1209
+ "train/num_positives": 8,
1210
+ "train/positive_loss": 0.000821194495074451,
1211
+ "train/total_loss": 0.17776969075202942
1212
+ },
1213
+ {
1214
+ "epoch": 47.5,
1215
+ "step": 950,
1216
+ "train/classification_loss": 0.1416180431842804,
1217
+ "train/contrastive_loss": 0.16073152422904968,
1218
+ "train/negative_loss": 0.15973255038261414,
1219
+ "train/num_negatives": 36,
1220
+ "train/num_positives": 16,
1221
+ "train/positive_loss": 0.0009989773388952017,
1222
+ "train/total_loss": 0.17376434803009033
1223
+ },
1224
+ {
1225
+ "epoch": 50.0,
1226
+ "grad_norm": 5.927994251251221,
1227
+ "learning_rate": 1.818787878787879e-05,
1228
+ "loss": 0.3431,
1229
+ "step": 1000
1230
+ },
1231
+ {
1232
+ "epoch": 50.0,
1233
+ "step": 1000,
1234
+ "train/classification_loss": 0.16533198952674866,
1235
+ "train/contrastive_loss": 3.354642868041992,
1236
+ "train/negative_loss": 2.9447507858276367,
1237
+ "train/num_negatives": 44,
1238
+ "train/num_positives": 12,
1239
+ "train/positive_loss": 0.409892201423645,
1240
+ "train/total_loss": 0.8362605571746826
1241
+ },
1242
+ {
1243
+ "epoch": 50.0,
1244
+ "step": 1000,
1245
+ "train/classification_loss": 0.1996230185031891,
1246
+ "train/contrastive_loss": 2.7300727367401123,
1247
+ "train/negative_loss": 2.6814076900482178,
1248
+ "train/num_negatives": 44,
1249
+ "train/num_positives": 12,
1250
+ "train/positive_loss": 0.04866510629653931,
1251
+ "train/total_loss": 0.7456375360488892
1252
+ },
1253
+ {
1254
+ "epoch": 50.0,
1255
+ "step": 1000,
1256
+ "train/classification_loss": 0.22982969880104065,
1257
+ "train/contrastive_loss": 5.4518961906433105,
1258
+ "train/negative_loss": 3.708037853240967,
1259
+ "train/num_negatives": 52,
1260
+ "train/num_positives": 4,
1261
+ "train/positive_loss": 1.7438582181930542,
1262
+ "train/total_loss": 1.3202089071273804
1263
+ },
1264
+ {
1265
+ "epoch": 50.0,
1266
+ "step": 1000,
1267
+ "train/classification_loss": 0.19913451373577118,
1268
+ "train/contrastive_loss": 2.374002456665039,
1269
+ "train/negative_loss": 1.768825888633728,
1270
+ "train/num_negatives": 48,
1271
+ "train/num_positives": 8,
1272
+ "train/positive_loss": 0.6051765084266663,
1273
+ "train/total_loss": 0.6739349961280823
1274
+ },
1275
+ {
1276
+ "epoch": 50.0,
1277
+ "step": 1000,
1278
+ "train/classification_loss": 0.1891087144613266,
1279
+ "train/contrastive_loss": 1.0433825254440308,
1280
+ "train/negative_loss": 0.5633068680763245,
1281
+ "train/num_negatives": 46,
1282
+ "train/num_positives": 8,
1283
+ "train/positive_loss": 0.4800756871700287,
1284
+ "train/total_loss": 0.3977852165699005
1285
+ },
1286
+ {
1287
+ "epoch": 50.0,
1288
+ "eval_exact_match_accuracy": 0.225,
1289
+ "eval_hamming_loss": 0.06911764705882353,
1290
+ "eval_loss": 0.7947654128074646,
1291
+ "eval_macro_f1": 0.1929738562091503,
1292
+ "eval_macro_precision": 0.2908496732026144,
1293
+ "eval_macro_recall": 0.15017825311942956,
1294
+ "eval_micro_f1": 0.41975308641975306,
1295
+ "eval_micro_precision": 0.7083333333333334,
1296
+ "eval_micro_recall": 0.2982456140350877,
1297
+ "eval_runtime": 0.2051,
1298
+ "eval_samples_per_second": 194.982,
1299
+ "eval_steps_per_second": 24.373,
1300
+ "step": 1000
1301
+ },
1302
+ {
1303
+ "epoch": 50.0,
1304
+ "step": 1000,
1305
  "total_flos": 0.0,
1306
+ "train_loss": 0.8574352493286133,
1307
+ "train_runtime": 257.7927,
1308
+ "train_samples_per_second": 612.896,
1309
+ "train_steps_per_second": 38.791
1310
  }
1311
  ],
1312
  "logging_steps": 50,
1313
+ "max_steps": 10000,
1314
  "num_input_tokens_seen": 0,
1315
+ "num_train_epochs": 500,
1316
+ "save_steps": 10000,
1317
  "stateful_callbacks": {
1318
  "EarlyStoppingCallback": {
1319
  "args": {