vaibhav1 commited on
Commit
5da3e36
·
verified ·
1 Parent(s): 5d26904

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4853732d5c5187f500f15cbe605a09c1f694db0b89d14223ed94327a34fe118
3
  size 27280152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eba7056fd399e1059a89eabd49a56be2ed8f342d50bb40715053752c47047dc9
3
  size 27280152
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1fe25557fe1cdae29d9cd56ed99d0de30b337173d1401d64e357843aba2b2be
3
  size 54633978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0f52fe00bc97bd174d591edb500019cf5ee2c5aa778a33cd64699eaaba661ed
3
  size 54633978
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16cc509c9606ba9de627d27965415804a7297fe216a129b8cb0b83ce075eaccd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d03ccbe83e3e708cdc550a3d9c0d87bf08436a7e1a6b3f8de5d854497b9be5e6
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:393afb4cbd250bf22d5d5ace359604e7205b76612aad4125a76131e274b10710
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92873c8c89778fe11b8eeb338a181eefdf056f2f8096c36bf259c3fd791afb34
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c27036b199a4c30a73639d9297e51f7392b3e00ab56964e4ff5bf53b55c162dc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62db676ea589f2e897f3ed22ee3133a534ed12d0dd978bfaec8bc59572ea976b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.49777777777777776,
6
  "eval_steps": 20,
7
- "global_step": 280,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -246,6 +246,23 @@
246
  "eval_samples_per_second": 3.3,
247
  "eval_steps_per_second": 0.413,
248
  "step": 280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  }
250
  ],
251
  "logging_steps": 20,
@@ -265,7 +282,7 @@
265
  "attributes": {}
266
  }
267
  },
268
- "total_flos": 5.943388413394944e+16,
269
  "train_batch_size": 4,
270
  "trial_name": null,
271
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5333333333333333,
6
  "eval_steps": 20,
7
+ "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
246
  "eval_samples_per_second": 3.3,
247
  "eval_steps_per_second": 0.413,
248
  "step": 280
249
+ },
250
+ {
251
+ "epoch": 0.5333333333333333,
252
+ "grad_norm": 1.1581426858901978,
253
+ "learning_rate": 0.00016438896236023375,
254
+ "loss": 1.6868,
255
+ "step": 300
256
+ },
257
+ {
258
+ "epoch": 0.5333333333333333,
259
+ "eval_loss": 1.8817518949508667,
260
+ "eval_mean_token_accuracy": 0.5664287745952606,
261
+ "eval_num_tokens": 1429137.0,
262
+ "eval_runtime": 303.9322,
263
+ "eval_samples_per_second": 3.29,
264
+ "eval_steps_per_second": 0.411,
265
+ "step": 300
266
  }
267
  ],
268
  "logging_steps": 20,
 
282
  "attributes": {}
283
  }
284
  },
285
+ "total_flos": 6.137234291392512e+16,
286
  "train_batch_size": 4,
287
  "trial_name": null,
288
  "trial_params": null