JoshMe1 commited on
Commit
13473e0
·
verified ·
1 Parent(s): 0173546

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daa43241d6bbe1b8688d4b3d389cb19ce25aa440ea5237cf9c9d6223655991c2
3
  size 37762064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa5f8a7cd68528767b439d0b894c881a41126f67afd9d793cf381c4dd083c2e8
3
  size 37762064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e0c1d1cac268ffb1aa4041c772c1044868f894ccf93ae99499f84f8f7a17796
3
  size 75554618
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5650fc7d3e852c12db5d5486177c4d10e906e6125b8f3ede3046568ca1c08dee
3
  size 75554618
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d877db9ecc482231270e060af265e5ab3a05c4da102ab8c9891457049722d35
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fc274fc8b34494ca7e848ad7db694a9a80d044675d4931e9e0d05b8bbbbb8b3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b255f2d1c973cfa51ba1fda92e29986986fe92677ef2d8675dbfd3f56dee4fb
3
  size 1192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9e8319412821b43121efd2a24b1f933f272e0b3d457c9e0efc6e16a87d08f4
3
  size 1192
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 5.1122517585754395,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
- "epoch": 0.03385311977656941,
5
  "eval_steps": 100,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -327,6 +327,84 @@
327
  "eval_samples_per_second": 42.698,
328
  "eval_steps_per_second": 10.677,
329
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  }
331
  ],
332
  "logging_steps": 10,
@@ -350,12 +428,12 @@
350
  "should_evaluate": false,
351
  "should_log": false,
352
  "should_save": true,
353
- "should_training_stop": false
354
  },
355
  "attributes": {}
356
  }
357
  },
358
- "total_flos": 1.04695062331392e+16,
359
  "train_batch_size": 4,
360
  "trial_name": null,
361
  "trial_params": null
 
1
  {
2
+ "best_metric": 4.980154037475586,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.04231639972071176,
5
  "eval_steps": 100,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
327
  "eval_samples_per_second": 42.698,
328
  "eval_steps_per_second": 10.677,
329
  "step": 400
330
+ },
331
+ {
332
+ "epoch": 0.034699447770983646,
333
+ "grad_norm": 379.8584289550781,
334
+ "learning_rate": 0.0001,
335
+ "loss": 20.2979,
336
+ "step": 410
337
+ },
338
+ {
339
+ "epoch": 0.03554577576539788,
340
+ "grad_norm": 289.8234558105469,
341
+ "learning_rate": 0.0001,
342
+ "loss": 22.648,
343
+ "step": 420
344
+ },
345
+ {
346
+ "epoch": 0.03639210375981212,
347
+ "grad_norm": 171.6800079345703,
348
+ "learning_rate": 0.0001,
349
+ "loss": 20.6801,
350
+ "step": 430
351
+ },
352
+ {
353
+ "epoch": 0.03723843175422635,
354
+ "grad_norm": 210.1263427734375,
355
+ "learning_rate": 0.0001,
356
+ "loss": 20.6733,
357
+ "step": 440
358
+ },
359
+ {
360
+ "epoch": 0.03808475974864058,
361
+ "grad_norm": 193.63482666015625,
362
+ "learning_rate": 0.0001,
363
+ "loss": 20.3255,
364
+ "step": 450
365
+ },
366
+ {
367
+ "epoch": 0.03893108774305482,
368
+ "grad_norm": 141.79612731933594,
369
+ "learning_rate": 0.0001,
370
+ "loss": 20.8687,
371
+ "step": 460
372
+ },
373
+ {
374
+ "epoch": 0.039777415737469055,
375
+ "grad_norm": 349.568603515625,
376
+ "learning_rate": 0.0001,
377
+ "loss": 20.5823,
378
+ "step": 470
379
+ },
380
+ {
381
+ "epoch": 0.040623743731883294,
382
+ "grad_norm": 530.2523193359375,
383
+ "learning_rate": 0.0001,
384
+ "loss": 20.1998,
385
+ "step": 480
386
+ },
387
+ {
388
+ "epoch": 0.041470071726297526,
389
+ "grad_norm": 237.16026306152344,
390
+ "learning_rate": 0.0001,
391
+ "loss": 20.5495,
392
+ "step": 490
393
+ },
394
+ {
395
+ "epoch": 0.04231639972071176,
396
+ "grad_norm": 135.14451599121094,
397
+ "learning_rate": 0.0001,
398
+ "loss": 20.3705,
399
+ "step": 500
400
+ },
401
+ {
402
+ "epoch": 0.04231639972071176,
403
+ "eval_loss": 4.980154037475586,
404
+ "eval_runtime": 233.3643,
405
+ "eval_samples_per_second": 42.637,
406
+ "eval_steps_per_second": 10.661,
407
+ "step": 500
408
  }
409
  ],
410
  "logging_steps": 10,
 
428
  "should_evaluate": false,
429
  "should_log": false,
430
  "should_save": true,
431
+ "should_training_stop": true
432
  },
433
  "attributes": {}
434
  }
435
  },
436
+ "total_flos": 1.3086882791424e+16,
437
  "train_batch_size": 4,
438
  "trial_name": null,
439
  "trial_params": null