suhani commited on
Commit
083e673
·
verified ·
1 Parent(s): 0881a2a

Training in progress, step 4740, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c99faddfad100fa77c0039f881c84ac5f3a3a476e17a63ba466ba08fe8ab8e4c
3
  size 678616072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d77bee8e521431d0d810269afca006ef1827174939871728823f4302637331d
3
  size 678616072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d50c75c95f9950fcf39e3323c3040ae8a2ba627959291a9b84e2e2ebccac1cc3
3
  size 35918620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd8a49ff38db4a28ca2cd50fd6b91f6e6123e2285bb7a1642faf86e84ac899d7
3
  size 35918620
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc5e30f0122c3006101bb791664dadef26bc6297649ce424c7f6635c0291cc9e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dda5d5e3fb4873072a3f6c6dbcb5ae3abb4df64a040f016331e928413ed9ae3c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbf353364c20b461a84ee85e28a511189bdf113495f91172acf4273106ef6546
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0087a623fb1ae91ceff851544989b0b5e0ae3009ea2753b5d119d172a1937c7f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9493670886075949,
5
  "eval_steps": 500,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -322,6 +322,20 @@
322
  "learning_rate": 2.830188679245283e-06,
323
  "loss": 2.609,
324
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  }
326
  ],
327
  "logging_steps": 100,
@@ -336,12 +350,12 @@
336
  "should_evaluate": false,
337
  "should_log": false,
338
  "should_save": true,
339
- "should_training_stop": false
340
  },
341
  "attributes": {}
342
  }
343
  },
344
- "total_flos": 1.335847845888e+17,
345
  "train_batch_size": 16,
346
  "trial_name": null,
347
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 4740,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
322
  "learning_rate": 2.830188679245283e-06,
323
  "loss": 2.609,
324
  "step": 4500
325
+ },
326
+ {
327
+ "epoch": 0.9704641350210971,
328
+ "grad_norm": 4.100802898406982,
329
+ "learning_rate": 1.650943396226415e-06,
330
+ "loss": 2.6422,
331
+ "step": 4600
332
+ },
333
+ {
334
+ "epoch": 0.9915611814345991,
335
+ "grad_norm": 3.709977149963379,
336
+ "learning_rate": 4.7169811320754717e-07,
337
+ "loss": 2.6155,
338
+ "step": 4700
339
  }
340
  ],
341
  "logging_steps": 100,
 
350
  "should_evaluate": false,
351
  "should_log": false,
352
  "should_save": true,
353
+ "should_training_stop": true
354
  },
355
  "attributes": {}
356
  }
357
  },
358
+ "total_flos": 1.406870423027712e+17,
359
  "train_batch_size": 16,
360
  "trial_name": null,
361
  "trial_params": null