suhani-sarvam commited on
Commit
5170cb1
·
verified ·
1 Parent(s): 1991b6e

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40afff962f83d25242ae886d301c4ec413d496314897aef7825a0fcf5eeaecfc
3
  size 399673504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0107738a3fc62d785cac3110b92ee540cdb6876b0a2d78e0c3fb2de49de1c85d
3
  size 399673504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:939d17523ab726ce2ddcb344be8b053d797c6f2780cea931524280bb3efd9753
3
  size 35917596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96d2be8f0dbf1c12e884583a5c0138b66bb30ef6ca537a84b482ac914f357865
3
  size 35917596
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c02148ac9ea527d4e3ffbba1429252cae43c93d94b6a876879af84d3a387d4ac
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9908c65f888932a672ea89ea1f3e3f2ba8a2341258f8e1b8d57f7777b0c1416
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f58ba35c8b651c13de09248709fe177fe61d200042e5942ab821d19bb8977c9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:684c3e2167c5a4e88fb6c84465f9d1b861ad3145efebcf9adf0596790d98dbaa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.17271157167530224,
5
  "eval_steps": 1000,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -14,6 +14,13 @@
14
  "learning_rate": 1e-05,
15
  "loss": 1.8574,
16
  "step": 100
 
 
 
 
 
 
 
17
  }
18
  ],
19
  "logging_steps": 100,
@@ -33,7 +40,7 @@
33
  "attributes": {}
34
  }
35
  },
36
- "total_flos": 5937091156377600.0,
37
  "train_batch_size": 16,
38
  "trial_name": null,
39
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3454231433506045,
5
  "eval_steps": 1000,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
14
  "learning_rate": 1e-05,
15
  "loss": 1.8574,
16
  "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.3454231433506045,
20
+ "grad_norm": 3.3716881275177,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.8131,
23
+ "step": 200
24
  }
25
  ],
26
  "logging_steps": 100,
 
40
  "attributes": {}
41
  }
42
  },
43
+ "total_flos": 1.18741823127552e+16,
44
  "train_batch_size": 16,
45
  "trial_name": null,
46
  "trial_params": null