DaniilOr commited on
Commit
abfa868
·
verified ·
1 Parent(s): 87390a9

Upload 12 files

Browse files
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +93 -3
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef88acd7b940c0a42e85cb82aad1441811e9cad6a2a30dbe70b1e6dfd39899d4
3
  size 1115283588
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd48e3380c56b58e2149650ba8529096ec45c2d8a4d341e757dbac15836cab9c
3
  size 1115283588
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e95ab3a0e177ebef1a730ea6c25411246adf47a11f94abb1102b76fae92c729a
3
  size 2230686277
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f976b1edf0530e7271e36b457726806574321cfc8fb25126461bbda3ddddd1b3
3
  size 2230686277
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de71cb290b64bc136ac49050f7bce13d74391577dd275bda3c8142bd03f8bfb1
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db6ae44865facac041c5d35025e75900dcf566f91bdfcbbe8128b143e2cd9a6
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b6716fb4bc72b67621113830aa02a129248ffd9a4100203fff469ffeaeccbf5
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d23ecd5f364728c2633cde01dd1980cb8e93cd41ca1a08aca9672958fd115a4
3
  size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.9327729940414429,
3
  "best_model_checkpoint": "./results/checkpoint-322",
4
- "epoch": 16.0,
5
  "eval_steps": 500,
6
- "global_step": 5152,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -243,6 +243,96 @@
243
  "eval_samples_per_second": 91.207,
244
  "eval_steps_per_second": 0.462,
245
  "step": 5152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  }
247
  ],
248
  "logging_steps": 500,
@@ -250,7 +340,7 @@
250
  "num_input_tokens_seen": 0,
251
  "num_train_epochs": 50,
252
  "save_steps": 500,
253
- "total_flos": 2.7107193386223206e+17,
254
  "train_batch_size": 200,
255
  "trial_name": null,
256
  "trial_params": null
 
1
  {
2
  "best_metric": 1.9327729940414429,
3
  "best_model_checkpoint": "./results/checkpoint-322",
4
+ "epoch": 22.0,
5
  "eval_steps": 500,
6
+ "global_step": 7084,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
243
  "eval_samples_per_second": 91.207,
244
  "eval_steps_per_second": 0.462,
245
  "step": 5152
246
+ },
247
+ {
248
+ "epoch": 17.0,
249
+ "eval_accuracy": 0.3341034103410341,
250
+ "eval_loss": 5.483065605163574,
251
+ "eval_precision": 0.352613586631227,
252
+ "eval_recall": 0.3341034103410341,
253
+ "eval_runtime": 98.2468,
254
+ "eval_samples_per_second": 92.522,
255
+ "eval_steps_per_second": 0.468,
256
+ "step": 5474
257
+ },
258
+ {
259
+ "epoch": 17.08,
260
+ "learning_rate": 6.58385093167702e-05,
261
+ "loss": 0.0173,
262
+ "step": 5500
263
+ },
264
+ {
265
+ "epoch": 18.0,
266
+ "eval_accuracy": 0.3547854785478548,
267
+ "eval_loss": 5.973876476287842,
268
+ "eval_precision": 0.37074034210656404,
269
+ "eval_recall": 0.3547854785478548,
270
+ "eval_runtime": 99.9918,
271
+ "eval_samples_per_second": 90.907,
272
+ "eval_steps_per_second": 0.46,
273
+ "step": 5796
274
+ },
275
+ {
276
+ "epoch": 18.63,
277
+ "learning_rate": 6.273291925465838e-05,
278
+ "loss": 0.017,
279
+ "step": 6000
280
+ },
281
+ {
282
+ "epoch": 19.0,
283
+ "eval_accuracy": 0.3471947194719472,
284
+ "eval_loss": 5.270116329193115,
285
+ "eval_precision": 0.3540165595958133,
286
+ "eval_recall": 0.3471947194719472,
287
+ "eval_runtime": 98.3519,
288
+ "eval_samples_per_second": 92.423,
289
+ "eval_steps_per_second": 0.468,
290
+ "step": 6118
291
+ },
292
+ {
293
+ "epoch": 20.0,
294
+ "eval_accuracy": 0.34994499449944994,
295
+ "eval_loss": 6.121872901916504,
296
+ "eval_precision": 0.3608309273966228,
297
+ "eval_recall": 0.34994499449944994,
298
+ "eval_runtime": 98.8185,
299
+ "eval_samples_per_second": 91.987,
300
+ "eval_steps_per_second": 0.465,
301
+ "step": 6440
302
+ },
303
+ {
304
+ "epoch": 20.19,
305
+ "learning_rate": 5.962732919254659e-05,
306
+ "loss": 0.0152,
307
+ "step": 6500
308
+ },
309
+ {
310
+ "epoch": 21.0,
311
+ "eval_accuracy": 0.3448844884488449,
312
+ "eval_loss": 6.195789337158203,
313
+ "eval_precision": 0.3597641027560754,
314
+ "eval_recall": 0.3448844884488449,
315
+ "eval_runtime": 98.2972,
316
+ "eval_samples_per_second": 92.475,
317
+ "eval_steps_per_second": 0.468,
318
+ "step": 6762
319
+ },
320
+ {
321
+ "epoch": 21.74,
322
+ "learning_rate": 5.652173913043478e-05,
323
+ "loss": 0.011,
324
+ "step": 7000
325
+ },
326
+ {
327
+ "epoch": 22.0,
328
+ "eval_accuracy": 0.35687568756875687,
329
+ "eval_loss": 5.533013820648193,
330
+ "eval_precision": 0.3680252789908563,
331
+ "eval_recall": 0.35687568756875687,
332
+ "eval_runtime": 97.9019,
333
+ "eval_samples_per_second": 92.848,
334
+ "eval_steps_per_second": 0.47,
335
+ "step": 7084
336
  }
337
  ],
338
  "logging_steps": 500,
 
340
  "num_input_tokens_seen": 0,
341
  "num_train_epochs": 50,
342
  "save_steps": 500,
343
+ "total_flos": 3.727239090605691e+17,
344
  "train_batch_size": 200,
345
  "trial_name": null,
346
  "trial_params": null