tryingpro commited on
Commit
4cc3f04
·
verified ·
1 Parent(s): b1ef46c

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:350f4ad4951edb55c18c7b67f3bea1315179542755ad34f4347dd9e4b7e1ba0a
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ce26b2833b28803c8aab9ef27885305339358f453f9374089eff208aa943c9
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75713f15c37cd28c11dc59de161ca5cb088721401f311c23767c073d1790f08f
3
  size 202110330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e9d12b28ba1e071c6cda15ed53d26da5d977cf291c9026ab54c9d4599d25a4e
3
  size 202110330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b8c6bb9c8f6083e00444ec19c8fde0355452bb525efe07516fc2fd1912e2fb4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deb0faa09832f00a697bad032c8f4da1c80c1ae05de14fb01ee2fd76432202ec
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7041fa0be7432c15ac468c1425bc3b5d595e4b5b769cfa7728743ab80ef12291
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1308874566187407,
5
  "eval_steps": 25,
6
- "global_step": 99,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -732,6 +732,21 @@
732
  "learning_rate": 7.709637592770991e-08,
733
  "loss": 1.3893,
734
  "step": 99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735
  }
736
  ],
737
  "logging_steps": 1,
@@ -746,12 +761,12 @@
746
  "should_evaluate": false,
747
  "should_log": false,
748
  "should_save": true,
749
- "should_training_stop": false
750
  },
751
  "attributes": {}
752
  }
753
  },
754
- "total_flos": 8.25082228160594e+16,
755
  "train_batch_size": 2,
756
  "trial_name": null,
757
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.13220955214014213,
5
  "eval_steps": 25,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
732
  "learning_rate": 7.709637592770991e-08,
733
  "loss": 1.3893,
734
  "step": 99
735
+ },
736
+ {
737
+ "epoch": 0.13220955214014213,
738
+ "grad_norm": 0.27371945977211,
739
+ "learning_rate": 0.0,
740
+ "loss": 1.459,
741
+ "step": 100
742
+ },
743
+ {
744
+ "epoch": 0.13220955214014213,
745
+ "eval_loss": 1.4851912260055542,
746
+ "eval_runtime": 186.5599,
747
+ "eval_samples_per_second": 13.658,
748
+ "eval_steps_per_second": 6.829,
749
+ "step": 100
750
  }
751
  ],
752
  "logging_steps": 1,
 
761
  "should_evaluate": false,
762
  "should_log": false,
763
  "should_save": true,
764
+ "should_training_stop": true
765
  },
766
  "attributes": {}
767
  }
768
  },
769
+ "total_flos": 8.33416392081408e+16,
770
  "train_batch_size": 2,
771
  "trial_name": null,
772
  "trial_params": null