bcai001 commited on
Commit
0bbe439
·
verified ·
1 Parent(s): 993e076

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +451 -0
trainer_state.json ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8770001309177381,
3
+ "best_model_checkpoint": "/mnt/nfs-storage-pvc-n28/user_codes/rizeJin/wzl/projects/20240825.sts-semantic-contribution-degree/outputs/sumcse/5401.reimple-robert-large",
4
+ "epoch": 3.0,
5
+ "global_step": 6462,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "eval_avg_sts": 0.8284010538709128,
13
+ "eval_sickr_spearman": 0.8087322680313793,
14
+ "eval_stsb_spearman": 0.8480698397104464,
15
+ "step": 125
16
+ },
17
+ {
18
+ "epoch": 0.12,
19
+ "eval_avg_sts": 0.8484297373111971,
20
+ "eval_sickr_spearman": 0.8391122278775942,
21
+ "eval_stsb_spearman": 0.8577472467448001,
22
+ "step": 250
23
+ },
24
+ {
25
+ "epoch": 0.17,
26
+ "eval_avg_sts": 0.8522751763001004,
27
+ "eval_sickr_spearman": 0.8446321541792678,
28
+ "eval_stsb_spearman": 0.859918198420933,
29
+ "step": 375
30
+ },
31
+ {
32
+ "epoch": 0.23,
33
+ "learning_rate": 9.226245744351595e-06,
34
+ "loss": 0.9779,
35
+ "step": 500
36
+ },
37
+ {
38
+ "epoch": 0.23,
39
+ "eval_avg_sts": 0.8529384795629767,
40
+ "eval_sickr_spearman": 0.8425804096191359,
41
+ "eval_stsb_spearman": 0.8632965495068176,
42
+ "step": 500
43
+ },
44
+ {
45
+ "epoch": 0.29,
46
+ "eval_avg_sts": 0.854393833444526,
47
+ "eval_sickr_spearman": 0.8428722668204894,
48
+ "eval_stsb_spearman": 0.8659154000685627,
49
+ "step": 625
50
+ },
51
+ {
52
+ "epoch": 0.35,
53
+ "eval_avg_sts": 0.8530765893420603,
54
+ "eval_sickr_spearman": 0.8375766255335162,
55
+ "eval_stsb_spearman": 0.8685765531506044,
56
+ "step": 750
57
+ },
58
+ {
59
+ "epoch": 0.41,
60
+ "eval_avg_sts": 0.8598428877676623,
61
+ "eval_sickr_spearman": 0.8496016440584608,
62
+ "eval_stsb_spearman": 0.8700841314768637,
63
+ "step": 875
64
+ },
65
+ {
66
+ "epoch": 0.46,
67
+ "learning_rate": 8.452491488703189e-06,
68
+ "loss": 0.5021,
69
+ "step": 1000
70
+ },
71
+ {
72
+ "epoch": 0.46,
73
+ "eval_avg_sts": 0.8553352649068404,
74
+ "eval_sickr_spearman": 0.8414943783845188,
75
+ "eval_stsb_spearman": 0.8691761514291622,
76
+ "step": 1000
77
+ },
78
+ {
79
+ "epoch": 0.52,
80
+ "eval_avg_sts": 0.8611558909552111,
81
+ "eval_sickr_spearman": 0.8500423294141554,
82
+ "eval_stsb_spearman": 0.8722694524962668,
83
+ "step": 1125
84
+ },
85
+ {
86
+ "epoch": 0.58,
87
+ "eval_avg_sts": 0.8470077761608523,
88
+ "eval_sickr_spearman": 0.824938778185323,
89
+ "eval_stsb_spearman": 0.8690767741363816,
90
+ "step": 1250
91
+ },
92
+ {
93
+ "epoch": 0.64,
94
+ "eval_avg_sts": 0.8523760853013311,
95
+ "eval_sickr_spearman": 0.8380687041677498,
96
+ "eval_stsb_spearman": 0.8666834664349123,
97
+ "step": 1375
98
+ },
99
+ {
100
+ "epoch": 0.7,
101
+ "learning_rate": 7.678737233054782e-06,
102
+ "loss": 0.4518,
103
+ "step": 1500
104
+ },
105
+ {
106
+ "epoch": 0.7,
107
+ "eval_avg_sts": 0.8575383621060841,
108
+ "eval_sickr_spearman": 0.8451627057257532,
109
+ "eval_stsb_spearman": 0.869914018486415,
110
+ "step": 1500
111
+ },
112
+ {
113
+ "epoch": 0.75,
114
+ "eval_avg_sts": 0.8567243246034306,
115
+ "eval_sickr_spearman": 0.8404439381960673,
116
+ "eval_stsb_spearman": 0.8730047110107938,
117
+ "step": 1625
118
+ },
119
+ {
120
+ "epoch": 0.81,
121
+ "eval_avg_sts": 0.855931320312622,
122
+ "eval_sickr_spearman": 0.8433319042322368,
123
+ "eval_stsb_spearman": 0.8685307363930072,
124
+ "step": 1750
125
+ },
126
+ {
127
+ "epoch": 0.87,
128
+ "eval_avg_sts": 0.8568209870234867,
129
+ "eval_sickr_spearman": 0.8408516742161756,
130
+ "eval_stsb_spearman": 0.8727902998307979,
131
+ "step": 1875
132
+ },
133
+ {
134
+ "epoch": 0.93,
135
+ "learning_rate": 6.904982977406376e-06,
136
+ "loss": 0.4282,
137
+ "step": 2000
138
+ },
139
+ {
140
+ "epoch": 0.93,
141
+ "eval_avg_sts": 0.8540671502188022,
142
+ "eval_sickr_spearman": 0.8388130421467362,
143
+ "eval_stsb_spearman": 0.869321258290868,
144
+ "step": 2000
145
+ },
146
+ {
147
+ "epoch": 0.99,
148
+ "eval_avg_sts": 0.8558945239823259,
149
+ "eval_sickr_spearman": 0.8423186881473997,
150
+ "eval_stsb_spearman": 0.869470359817252,
151
+ "step": 2125
152
+ },
153
+ {
154
+ "epoch": 1.04,
155
+ "eval_avg_sts": 0.8555080041232501,
156
+ "eval_sickr_spearman": 0.8408939896165424,
157
+ "eval_stsb_spearman": 0.870122018629958,
158
+ "step": 2250
159
+ },
160
+ {
161
+ "epoch": 1.1,
162
+ "eval_avg_sts": 0.8568942691339498,
163
+ "eval_sickr_spearman": 0.8418429400876551,
164
+ "eval_stsb_spearman": 0.8719455981802446,
165
+ "step": 2375
166
+ },
167
+ {
168
+ "epoch": 1.16,
169
+ "learning_rate": 6.13122872175797e-06,
170
+ "loss": 0.3707,
171
+ "step": 2500
172
+ },
173
+ {
174
+ "epoch": 1.16,
175
+ "eval_avg_sts": 0.8557911670636686,
176
+ "eval_sickr_spearman": 0.8392199616381198,
177
+ "eval_stsb_spearman": 0.8723623724892173,
178
+ "step": 2500
179
+ },
180
+ {
181
+ "epoch": 1.22,
182
+ "eval_avg_sts": 0.85588148387946,
183
+ "eval_sickr_spearman": 0.83787566717107,
184
+ "eval_stsb_spearman": 0.8738873005878499,
185
+ "step": 2625
186
+ },
187
+ {
188
+ "epoch": 1.28,
189
+ "eval_avg_sts": 0.8622210733781358,
190
+ "eval_sickr_spearman": 0.8489977970511808,
191
+ "eval_stsb_spearman": 0.8754443497050908,
192
+ "step": 2750
193
+ },
194
+ {
195
+ "epoch": 1.33,
196
+ "eval_avg_sts": 0.8624794115602034,
197
+ "eval_sickr_spearman": 0.8479586922026686,
198
+ "eval_stsb_spearman": 0.8770001309177381,
199
+ "step": 2875
200
+ },
201
+ {
202
+ "epoch": 1.39,
203
+ "learning_rate": 5.357474466109565e-06,
204
+ "loss": 0.3458,
205
+ "step": 3000
206
+ },
207
+ {
208
+ "epoch": 1.39,
209
+ "eval_avg_sts": 0.8548885349566437,
210
+ "eval_sickr_spearman": 0.8383297051729646,
211
+ "eval_stsb_spearman": 0.871447364740323,
212
+ "step": 3000
213
+ },
214
+ {
215
+ "epoch": 1.45,
216
+ "eval_avg_sts": 0.8562784353766513,
217
+ "eval_sickr_spearman": 0.841656867600684,
218
+ "eval_stsb_spearman": 0.8709000031526186,
219
+ "step": 3125
220
+ },
221
+ {
222
+ "epoch": 1.51,
223
+ "eval_avg_sts": 0.8578722811343277,
224
+ "eval_sickr_spearman": 0.8429833425291002,
225
+ "eval_stsb_spearman": 0.8727612197395551,
226
+ "step": 3250
227
+ },
228
+ {
229
+ "epoch": 1.57,
230
+ "eval_avg_sts": 0.8600916280834878,
231
+ "eval_sickr_spearman": 0.8445091945595862,
232
+ "eval_stsb_spearman": 0.8756740616073895,
233
+ "step": 3375
234
+ },
235
+ {
236
+ "epoch": 1.62,
237
+ "learning_rate": 4.583720210461158e-06,
238
+ "loss": 0.3435,
239
+ "step": 3500
240
+ },
241
+ {
242
+ "epoch": 1.62,
243
+ "eval_avg_sts": 0.8593220407937633,
244
+ "eval_sickr_spearman": 0.8451772111183875,
245
+ "eval_stsb_spearman": 0.8734668704691391,
246
+ "step": 3500
247
+ },
248
+ {
249
+ "epoch": 1.68,
250
+ "eval_avg_sts": 0.861320493545486,
251
+ "eval_sickr_spearman": 0.8473822229232085,
252
+ "eval_stsb_spearman": 0.8752587641677635,
253
+ "step": 3625
254
+ },
255
+ {
256
+ "epoch": 1.74,
257
+ "eval_avg_sts": 0.860298885852978,
258
+ "eval_sickr_spearman": 0.8464292858706762,
259
+ "eval_stsb_spearman": 0.8741684858352797,
260
+ "step": 3750
261
+ },
262
+ {
263
+ "epoch": 1.8,
264
+ "eval_avg_sts": 0.8550809080819418,
265
+ "eval_sickr_spearman": 0.8354855435013065,
266
+ "eval_stsb_spearman": 0.8746762726625771,
267
+ "step": 3875
268
+ },
269
+ {
270
+ "epoch": 1.86,
271
+ "learning_rate": 3.8099659548127517e-06,
272
+ "loss": 0.3442,
273
+ "step": 4000
274
+ },
275
+ {
276
+ "epoch": 1.86,
277
+ "eval_avg_sts": 0.8559180414679683,
278
+ "eval_sickr_spearman": 0.8384507435485884,
279
+ "eval_stsb_spearman": 0.8733853393873484,
280
+ "step": 4000
281
+ },
282
+ {
283
+ "epoch": 1.92,
284
+ "eval_avg_sts": 0.8571707122552362,
285
+ "eval_sickr_spearman": 0.8428135045544151,
286
+ "eval_stsb_spearman": 0.8715279199560574,
287
+ "step": 4125
288
+ },
289
+ {
290
+ "epoch": 1.97,
291
+ "eval_avg_sts": 0.8560277042936426,
292
+ "eval_sickr_spearman": 0.8397806286852069,
293
+ "eval_stsb_spearman": 0.8722747799020784,
294
+ "step": 4250
295
+ },
296
+ {
297
+ "epoch": 2.03,
298
+ "eval_avg_sts": 0.8578281901982555,
299
+ "eval_sickr_spearman": 0.8433445364119149,
300
+ "eval_stsb_spearman": 0.8723118439845962,
301
+ "step": 4375
302
+ },
303
+ {
304
+ "epoch": 2.09,
305
+ "learning_rate": 3.036211699164346e-06,
306
+ "loss": 0.3131,
307
+ "step": 4500
308
+ },
309
+ {
310
+ "epoch": 2.09,
311
+ "eval_avg_sts": 0.8546342996226579,
312
+ "eval_sickr_spearman": 0.8383026636628548,
313
+ "eval_stsb_spearman": 0.8709659355824609,
314
+ "step": 4500
315
+ },
316
+ {
317
+ "epoch": 2.15,
318
+ "eval_avg_sts": 0.8559652822309323,
319
+ "eval_sickr_spearman": 0.840341391794497,
320
+ "eval_stsb_spearman": 0.8715891726673677,
321
+ "step": 4625
322
+ },
323
+ {
324
+ "epoch": 2.21,
325
+ "eval_avg_sts": 0.8546617687967435,
326
+ "eval_sickr_spearman": 0.8391962823051108,
327
+ "eval_stsb_spearman": 0.8701272552883762,
328
+ "step": 4750
329
+ },
330
+ {
331
+ "epoch": 2.26,
332
+ "eval_avg_sts": 0.858705271470795,
333
+ "eval_sickr_spearman": 0.8465492195309672,
334
+ "eval_stsb_spearman": 0.8708613234106228,
335
+ "step": 4875
336
+ },
337
+ {
338
+ "epoch": 2.32,
339
+ "learning_rate": 2.2624574435159395e-06,
340
+ "loss": 0.2846,
341
+ "step": 5000
342
+ },
343
+ {
344
+ "epoch": 2.32,
345
+ "eval_avg_sts": 0.8597192689340263,
346
+ "eval_sickr_spearman": 0.8464290457151691,
347
+ "eval_stsb_spearman": 0.8730094921528834,
348
+ "step": 5000
349
+ },
350
+ {
351
+ "epoch": 2.38,
352
+ "eval_avg_sts": 0.8606101916960156,
353
+ "eval_sickr_spearman": 0.8477548001770637,
354
+ "eval_stsb_spearman": 0.8734655832149676,
355
+ "step": 5125
356
+ },
357
+ {
358
+ "epoch": 2.44,
359
+ "eval_avg_sts": 0.859970718351403,
360
+ "eval_sickr_spearman": 0.8465689603136584,
361
+ "eval_stsb_spearman": 0.8733724763891477,
362
+ "step": 5250
363
+ },
364
+ {
365
+ "epoch": 2.5,
366
+ "eval_avg_sts": 0.8611164416935826,
367
+ "eval_sickr_spearman": 0.8490797381102341,
368
+ "eval_stsb_spearman": 0.873153145276931,
369
+ "step": 5375
370
+ },
371
+ {
372
+ "epoch": 2.55,
373
+ "learning_rate": 1.4887031878675335e-06,
374
+ "loss": 0.2804,
375
+ "step": 5500
376
+ },
377
+ {
378
+ "epoch": 2.55,
379
+ "eval_avg_sts": 0.860553080718742,
380
+ "eval_sickr_spearman": 0.8488836271230623,
381
+ "eval_stsb_spearman": 0.8722225343144216,
382
+ "step": 5500
383
+ },
384
+ {
385
+ "epoch": 2.61,
386
+ "eval_avg_sts": 0.8602837879281291,
387
+ "eval_sickr_spearman": 0.8481850627837464,
388
+ "eval_stsb_spearman": 0.8723825130725117,
389
+ "step": 5625
390
+ },
391
+ {
392
+ "epoch": 2.67,
393
+ "eval_avg_sts": 0.857967378909783,
394
+ "eval_sickr_spearman": 0.8440642824669651,
395
+ "eval_stsb_spearman": 0.8718704753526009,
396
+ "step": 5750
397
+ },
398
+ {
399
+ "epoch": 2.73,
400
+ "eval_avg_sts": 0.8601815553282466,
401
+ "eval_sickr_spearman": 0.8467775593872041,
402
+ "eval_stsb_spearman": 0.873585551269289,
403
+ "step": 5875
404
+ },
405
+ {
406
+ "epoch": 2.79,
407
+ "learning_rate": 7.149489322191272e-07,
408
+ "loss": 0.2824,
409
+ "step": 6000
410
+ },
411
+ {
412
+ "epoch": 2.79,
413
+ "eval_avg_sts": 0.860910326239195,
414
+ "eval_sickr_spearman": 0.8483497614305777,
415
+ "eval_stsb_spearman": 0.8734708910478122,
416
+ "step": 6000
417
+ },
418
+ {
419
+ "epoch": 2.84,
420
+ "eval_avg_sts": 0.860010166556528,
421
+ "eval_sickr_spearman": 0.8470775616467865,
422
+ "eval_stsb_spearman": 0.8729427714662694,
423
+ "step": 6125
424
+ },
425
+ {
426
+ "epoch": 2.9,
427
+ "eval_avg_sts": 0.8590174618570985,
428
+ "eval_sickr_spearman": 0.8456035832058536,
429
+ "eval_stsb_spearman": 0.8724313405083433,
430
+ "step": 6250
431
+ },
432
+ {
433
+ "epoch": 2.96,
434
+ "eval_avg_sts": 0.8593514591951028,
435
+ "eval_sickr_spearman": 0.8458110775640663,
436
+ "eval_stsb_spearman": 0.8728918408261394,
437
+ "step": 6375
438
+ },
439
+ {
440
+ "epoch": 3.0,
441
+ "step": 6462,
442
+ "train_runtime": 8943.5159,
443
+ "train_samples_per_second": 0.723
444
+ }
445
+ ],
446
+ "max_steps": 6462,
447
+ "num_train_epochs": 3,
448
+ "total_flos": 169236001393016832,
449
+ "trial_name": null,
450
+ "trial_params": null
451
+ }