suhani-sarvam commited on
Commit
d8aeab9
·
verified ·
1 Parent(s): f6503dd

Training in progress, step 3474, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49f933b839b39d4ea5b711a39c3d96e083669ab787323d2c3a263e59741c3418
3
  size 678616072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:357ae939fcb410e6b99ec86035610dd8f49a84dae7f3c1295d149581f4b6ffb7
3
  size 678616072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e31cce8802518823cbba6378d69472af5cc920225c511639f6968237303ab8ca
3
  size 35918620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72fcabc7f0556c4da51ab570b31d9ac89d55fdf7aa1fe0bd18bfdabd1928148b
3
  size 35918620
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e35f27c0e810ea2c4295eb8b934526418c1259a8125586d1614dbc4bfed6cb5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc1b6b8e868013db57c0396ae7239b53f297d3c4d8ffddd8e9eb89d144075c78
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e4dd0bd6be37524d79857f9676dd6ba4b10d38ff5759d1470f3d5532961ebb3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfd16bd47d2a528ae4a16d4e28b182d543a83fa480feb06c902c23c3b7635e47
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.181347150259067,
5
  "eval_steps": 1000,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -238,6 +238,34 @@
238
  "eval_samples_per_second": 7.731,
239
  "eval_steps_per_second": 0.969,
240
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  }
242
  ],
243
  "logging_steps": 100,
@@ -252,12 +280,12 @@
252
  "should_evaluate": false,
253
  "should_log": false,
254
  "should_save": true,
255
- "should_training_stop": false
256
  },
257
  "attributes": {}
258
  }
259
  },
260
- "total_flos": 8.896375584768e+16,
261
  "train_batch_size": 16,
262
  "trial_name": null,
263
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.0,
5
  "eval_steps": 1000,
6
+ "global_step": 3474,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
238
  "eval_samples_per_second": 7.731,
239
  "eval_steps_per_second": 0.969,
240
  "step": 3000
241
+ },
242
+ {
243
+ "epoch": 5.354058721934369,
244
+ "grad_norm": 3.369074821472168,
245
+ "learning_rate": 6.287827841291191e-06,
246
+ "loss": 2.342,
247
+ "step": 3100
248
+ },
249
+ {
250
+ "epoch": 5.526770293609672,
251
+ "grad_norm": 3.664003610610962,
252
+ "learning_rate": 4.606590450571621e-06,
253
+ "loss": 2.3859,
254
+ "step": 3200
255
+ },
256
+ {
257
+ "epoch": 5.699481865284974,
258
+ "grad_norm": 3.5953192710876465,
259
+ "learning_rate": 2.9253530598520513e-06,
260
+ "loss": 2.3374,
261
+ "step": 3300
262
+ },
263
+ {
264
+ "epoch": 5.872193436960276,
265
+ "grad_norm": 3.2417404651641846,
266
+ "learning_rate": 1.2441156691324815e-06,
267
+ "loss": 2.3414,
268
+ "step": 3400
269
  }
270
  ],
271
  "logging_steps": 100,
 
280
  "should_evaluate": false,
281
  "should_log": false,
282
  "should_save": true,
283
+ "should_training_stop": true
284
  },
285
  "attributes": {}
286
  }
287
  },
288
+ "total_flos": 1.030161330487296e+17,
289
  "train_batch_size": 16,
290
  "trial_name": null,
291
  "trial_params": null