wcyat commited on
Commit
33b5c40
·
verified ·
1 Parent(s): 09466d7

Training in progress, step 650, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e0b88df280fa0f30bf5e5a5d5f23ce00dd2bb9235aa6fd79cb616e1b88b7c43
3
  size 1304192904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4540ca1235af2c875e539ac3b5e586e4e382144e0ba3c5f3becf22bd60877612
3
  size 1304192904
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b81e573972743b4058f7f1f776861f288637e51f5ffb0c5bf72fdbd0a6e38976
3
  size 2608620781
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a698da9caeff532a802010e3682120ee85a9c61532b1612dffea9783e6c3601
3
  size 2608620781
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86301a0f7cc62f6260b9478c1f43594e34ae98b6bb8fef981eb0359e36b65a43
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a4f106ba33756e9a0a48fca5987af21381f69fdf3950392b72e934885bb98ba
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4dfa613499337ec4457a58c90204e2eecf20f93384b8c9c5881b6fea8e4327e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f059210d151648230eef71b7e1b4e4d9ec6e64c9baba4310c209aeb20d3735f3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.37029457092285156,
3
  "best_model_checkpoint": "./results/checkpoint-360",
4
- "epoch": 1.5384615384615383,
5
  "eval_steps": 20,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -407,6 +407,118 @@
407
  "eval_samples_per_second": 10.355,
408
  "eval_steps_per_second": 2.623,
409
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  }
411
  ],
412
  "logging_steps": 20,
@@ -421,12 +533,12 @@
421
  "should_evaluate": false,
422
  "should_log": false,
423
  "should_save": true,
424
- "should_training_stop": false
425
  },
426
  "attributes": {}
427
  }
428
  },
429
- "total_flos": 1631797255759776.0,
430
  "train_batch_size": 4,
431
  "trial_name": null,
432
  "trial_params": null
 
1
  {
2
  "best_metric": 0.37029457092285156,
3
  "best_model_checkpoint": "./results/checkpoint-360",
4
+ "epoch": 2.0,
5
  "eval_steps": 20,
6
+ "global_step": 650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
407
  "eval_samples_per_second": 10.355,
408
  "eval_steps_per_second": 2.623,
409
  "step": 500
410
+ },
411
+ {
412
+ "epoch": 1.6,
413
+ "grad_norm": 0.36008283495903015,
414
+ "learning_rate": 4.000000000000001e-06,
415
+ "loss": 0.4698,
416
+ "step": 520
417
+ },
418
+ {
419
+ "epoch": 1.6,
420
+ "eval_accuracy": 0.8933333333333333,
421
+ "eval_loss": 0.4106966555118561,
422
+ "eval_runtime": 14.9453,
423
+ "eval_samples_per_second": 10.037,
424
+ "eval_steps_per_second": 2.543,
425
+ "step": 520
426
+ },
427
+ {
428
+ "epoch": 1.6615384615384614,
429
+ "grad_norm": 0.17318658530712128,
430
+ "learning_rate": 3.384615384615385e-06,
431
+ "loss": 0.2827,
432
+ "step": 540
433
+ },
434
+ {
435
+ "epoch": 1.6615384615384614,
436
+ "eval_accuracy": 0.86,
437
+ "eval_loss": 0.5434665083885193,
438
+ "eval_runtime": 14.2953,
439
+ "eval_samples_per_second": 10.493,
440
+ "eval_steps_per_second": 2.658,
441
+ "step": 540
442
+ },
443
+ {
444
+ "epoch": 1.7230769230769232,
445
+ "grad_norm": 55.85834884643555,
446
+ "learning_rate": 2.7692307692307697e-06,
447
+ "loss": 0.3331,
448
+ "step": 560
449
+ },
450
+ {
451
+ "epoch": 1.7230769230769232,
452
+ "eval_accuracy": 0.8733333333333333,
453
+ "eval_loss": 0.5320577621459961,
454
+ "eval_runtime": 14.6346,
455
+ "eval_samples_per_second": 10.25,
456
+ "eval_steps_per_second": 2.597,
457
+ "step": 560
458
+ },
459
+ {
460
+ "epoch": 1.7846153846153845,
461
+ "grad_norm": 0.38822901248931885,
462
+ "learning_rate": 2.153846153846154e-06,
463
+ "loss": 0.1841,
464
+ "step": 580
465
+ },
466
+ {
467
+ "epoch": 1.7846153846153845,
468
+ "eval_accuracy": 0.8866666666666667,
469
+ "eval_loss": 0.42079824209213257,
470
+ "eval_runtime": 14.3758,
471
+ "eval_samples_per_second": 10.434,
472
+ "eval_steps_per_second": 2.643,
473
+ "step": 580
474
+ },
475
+ {
476
+ "epoch": 1.8461538461538463,
477
+ "grad_norm": 0.18515652418136597,
478
+ "learning_rate": 1.5384615384615387e-06,
479
+ "loss": 0.2626,
480
+ "step": 600
481
+ },
482
+ {
483
+ "epoch": 1.8461538461538463,
484
+ "eval_accuracy": 0.8933333333333333,
485
+ "eval_loss": 0.4386911988258362,
486
+ "eval_runtime": 14.3806,
487
+ "eval_samples_per_second": 10.431,
488
+ "eval_steps_per_second": 2.642,
489
+ "step": 600
490
+ },
491
+ {
492
+ "epoch": 1.9076923076923076,
493
+ "grad_norm": 1.0158417224884033,
494
+ "learning_rate": 9.230769230769232e-07,
495
+ "loss": 0.3547,
496
+ "step": 620
497
+ },
498
+ {
499
+ "epoch": 1.9076923076923076,
500
+ "eval_accuracy": 0.8866666666666667,
501
+ "eval_loss": 0.43599483370780945,
502
+ "eval_runtime": 14.4539,
503
+ "eval_samples_per_second": 10.378,
504
+ "eval_steps_per_second": 2.629,
505
+ "step": 620
506
+ },
507
+ {
508
+ "epoch": 1.9692307692307693,
509
+ "grad_norm": 0.07184287160634995,
510
+ "learning_rate": 3.0769230769230774e-07,
511
+ "loss": 0.3534,
512
+ "step": 640
513
+ },
514
+ {
515
+ "epoch": 1.9692307692307693,
516
+ "eval_accuracy": 0.8866666666666667,
517
+ "eval_loss": 0.4359683096408844,
518
+ "eval_runtime": 14.4689,
519
+ "eval_samples_per_second": 10.367,
520
+ "eval_steps_per_second": 2.626,
521
+ "step": 640
522
  }
523
  ],
524
  "logging_steps": 20,
 
533
  "should_evaluate": false,
534
  "should_log": false,
535
  "should_save": true,
536
+ "should_training_stop": true
537
  },
538
  "attributes": {}
539
  }
540
  },
541
+ "total_flos": 2124286577709744.0,
542
  "train_batch_size": 4,
543
  "trial_name": null,
544
  "trial_params": null