romainnn commited on
Commit
c1301d2
·
verified ·
1 Parent(s): ba0e71e

Training in progress, step 1344, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d428d0fcbd28adbb948bf89771cac30bc1be8468636fe48cbec5bd96726d703f
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d381fc9d2baf1537400899407212ebb9a4d0bfe09fb947104079ee02fbc31282
3
  size 159967880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:093077c817c8ec77b7d913173bb54ed6bc38a1faf3eab86f346d2a21243606db
3
  size 81730644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d2daa0b731a3407eabb4ad05b7706b9214c2a2d905533de698a8df123909280
3
  size 81730644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9be6563fd9bac8b0f5354c2a89cc3e2af9d56ec377cec6cc13d1b649d2bac02c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c69e020a9e400b6d738a8ec17a4d6c33a7f2337aa9121cbdc73f0efaaa46392
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fc481c1c7314024438320e4d4df85c394e6afa7a5f85f07beec6af0275441f4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:463032c4d55d0ec912965e262645f4cf323e9bf452edfe308ba59def68123cc6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6225559711456299,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1300",
4
- "epoch": 0.053874844591794445,
5
  "eval_steps": 100,
6
- "global_step": 1300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9219,6 +9219,314 @@
9219
  "eval_samples_per_second": 7.295,
9220
  "eval_steps_per_second": 1.824,
9221
  "step": 1300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9222
  }
9223
  ],
9224
  "logging_steps": 1,
@@ -9242,12 +9550,12 @@
9242
  "should_evaluate": false,
9243
  "should_log": false,
9244
  "should_save": true,
9245
- "should_training_stop": false
9246
  },
9247
  "attributes": {}
9248
  }
9249
  },
9250
- "total_flos": 3.4438208526452e+18,
9251
  "train_batch_size": 4,
9252
  "trial_name": null,
9253
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6225559711456299,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1300",
4
+ "epoch": 0.055698300870285954,
5
  "eval_steps": 100,
6
+ "global_step": 1344,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9219
  "eval_samples_per_second": 7.295,
9220
  "eval_steps_per_second": 1.824,
9221
  "step": 1300
9222
+ },
9223
+ {
9224
+ "epoch": 0.05391628677994198,
9225
+ "grad_norm": 0.27076980471611023,
9226
+ "learning_rate": 5.122993894501326e-07,
9227
+ "loss": 0.6658,
9228
+ "step": 1301
9229
+ },
9230
+ {
9231
+ "epoch": 0.053957728968089515,
9232
+ "grad_norm": 0.29129520058631897,
9233
+ "learning_rate": 4.887677832549287e-07,
9234
+ "loss": 0.6478,
9235
+ "step": 1302
9236
+ },
9237
+ {
9238
+ "epoch": 0.05399917115623705,
9239
+ "grad_norm": 0.2577592730522156,
9240
+ "learning_rate": 4.6578807654562575e-07,
9241
+ "loss": 0.6443,
9242
+ "step": 1303
9243
+ },
9244
+ {
9245
+ "epoch": 0.054040613344384585,
9246
+ "grad_norm": 0.30056890845298767,
9247
+ "learning_rate": 4.4336039677002063e-07,
9248
+ "loss": 0.6037,
9249
+ "step": 1304
9250
+ },
9251
+ {
9252
+ "epoch": 0.05408205553253212,
9253
+ "grad_norm": 0.30648863315582275,
9254
+ "learning_rate": 4.214848683143258e-07,
9255
+ "loss": 0.6497,
9256
+ "step": 1305
9257
+ },
9258
+ {
9259
+ "epoch": 0.054123497720679654,
9260
+ "grad_norm": 0.26266247034072876,
9261
+ "learning_rate": 4.00161612502481e-07,
9262
+ "loss": 0.6044,
9263
+ "step": 1306
9264
+ },
9265
+ {
9266
+ "epoch": 0.05416493990882719,
9267
+ "grad_norm": 0.30877622961997986,
9268
+ "learning_rate": 3.793907475954206e-07,
9269
+ "loss": 0.5746,
9270
+ "step": 1307
9271
+ },
9272
+ {
9273
+ "epoch": 0.05420638209697472,
9274
+ "grad_norm": 0.294593870639801,
9275
+ "learning_rate": 3.591723887904963e-07,
9276
+ "loss": 0.5445,
9277
+ "step": 1308
9278
+ },
9279
+ {
9280
+ "epoch": 0.05424782428512225,
9281
+ "grad_norm": 0.2581484019756317,
9282
+ "learning_rate": 3.395066482208109e-07,
9283
+ "loss": 0.5654,
9284
+ "step": 1309
9285
+ },
9286
+ {
9287
+ "epoch": 0.05428926647326979,
9288
+ "grad_norm": 0.3082786500453949,
9289
+ "learning_rate": 3.203936349545522e-07,
9290
+ "loss": 0.8039,
9291
+ "step": 1310
9292
+ },
9293
+ {
9294
+ "epoch": 0.05433070866141732,
9295
+ "grad_norm": 0.2795157730579376,
9296
+ "learning_rate": 3.0183345499447124e-07,
9297
+ "loss": 0.6605,
9298
+ "step": 1311
9299
+ },
9300
+ {
9301
+ "epoch": 0.05437215084956486,
9302
+ "grad_norm": 0.2830980122089386,
9303
+ "learning_rate": 2.838262112772161e-07,
9304
+ "loss": 0.6486,
9305
+ "step": 1312
9306
+ },
9307
+ {
9308
+ "epoch": 0.05441359303771239,
9309
+ "grad_norm": 0.2507122755050659,
9310
+ "learning_rate": 2.66372003672799e-07,
9311
+ "loss": 0.7049,
9312
+ "step": 1313
9313
+ },
9314
+ {
9315
+ "epoch": 0.05445503522585993,
9316
+ "grad_norm": 0.26423487067222595,
9317
+ "learning_rate": 2.4947092898406356e-07,
9318
+ "loss": 0.6777,
9319
+ "step": 1314
9320
+ },
9321
+ {
9322
+ "epoch": 0.05449647741400746,
9323
+ "grad_norm": 0.2655765414237976,
9324
+ "learning_rate": 2.3312308094607382e-07,
9325
+ "loss": 0.5595,
9326
+ "step": 1315
9327
+ },
9328
+ {
9329
+ "epoch": 0.054537919602155,
9330
+ "grad_norm": 0.26185721158981323,
9331
+ "learning_rate": 2.1732855022570388e-07,
9332
+ "loss": 0.7014,
9333
+ "step": 1316
9334
+ },
9335
+ {
9336
+ "epoch": 0.054579361790302525,
9337
+ "grad_norm": 0.27304011583328247,
9338
+ "learning_rate": 2.020874244210047e-07,
9339
+ "loss": 0.6617,
9340
+ "step": 1317
9341
+ },
9342
+ {
9343
+ "epoch": 0.05462080397845006,
9344
+ "grad_norm": 0.2839515805244446,
9345
+ "learning_rate": 1.8739978806082691e-07,
9346
+ "loss": 0.5651,
9347
+ "step": 1318
9348
+ },
9349
+ {
9350
+ "epoch": 0.054662246166597594,
9351
+ "grad_norm": 0.25976067781448364,
9352
+ "learning_rate": 1.7326572260433216e-07,
9353
+ "loss": 0.5801,
9354
+ "step": 1319
9355
+ },
9356
+ {
9357
+ "epoch": 0.05470368835474513,
9358
+ "grad_norm": 0.28524279594421387,
9359
+ "learning_rate": 1.596853064404602e-07,
9360
+ "loss": 0.665,
9361
+ "step": 1320
9362
+ },
9363
+ {
9364
+ "epoch": 0.054745130542892664,
9365
+ "grad_norm": 0.28713470697402954,
9366
+ "learning_rate": 1.4665861488761813e-07,
9367
+ "loss": 0.6184,
9368
+ "step": 1321
9369
+ },
9370
+ {
9371
+ "epoch": 0.0547865727310402,
9372
+ "grad_norm": 0.24788232147693634,
9373
+ "learning_rate": 1.3418572019314736e-07,
9374
+ "loss": 0.5796,
9375
+ "step": 1322
9376
+ },
9377
+ {
9378
+ "epoch": 0.054828014919187734,
9379
+ "grad_norm": 0.2749631404876709,
9380
+ "learning_rate": 1.2226669153302395e-07,
9381
+ "loss": 0.625,
9382
+ "step": 1323
9383
+ },
9384
+ {
9385
+ "epoch": 0.05486945710733527,
9386
+ "grad_norm": 0.27082398533821106,
9387
+ "learning_rate": 1.109015950113812e-07,
9388
+ "loss": 0.5955,
9389
+ "step": 1324
9390
+ },
9391
+ {
9392
+ "epoch": 0.054910899295482804,
9393
+ "grad_norm": 0.23720403015613556,
9394
+ "learning_rate": 1.0009049366022094e-07,
9395
+ "loss": 0.6055,
9396
+ "step": 1325
9397
+ },
9398
+ {
9399
+ "epoch": 0.05495234148363034,
9400
+ "grad_norm": 0.254350483417511,
9401
+ "learning_rate": 8.98334474390139e-08,
9402
+ "loss": 0.6254,
9403
+ "step": 1326
9404
+ },
9405
+ {
9406
+ "epoch": 0.05499378367177787,
9407
+ "grad_norm": 0.2699224352836609,
9408
+ "learning_rate": 8.01305132343999e-08,
9409
+ "loss": 0.5862,
9410
+ "step": 1327
9411
+ },
9412
+ {
9413
+ "epoch": 0.0550352258599254,
9414
+ "grad_norm": 0.26495426893234253,
9415
+ "learning_rate": 7.098174485982156e-08,
9416
+ "loss": 0.7193,
9417
+ "step": 1328
9418
+ },
9419
+ {
9420
+ "epoch": 0.05507666804807294,
9421
+ "grad_norm": 0.2615576386451721,
9422
+ "learning_rate": 6.238719305530216e-08,
9423
+ "loss": 0.6821,
9424
+ "step": 1329
9425
+ },
9426
+ {
9427
+ "epoch": 0.05511811023622047,
9428
+ "grad_norm": 0.27609899640083313,
9429
+ "learning_rate": 5.4346905487101526e-08,
9430
+ "loss": 0.5864,
9431
+ "step": 1330
9432
+ },
9433
+ {
9434
+ "epoch": 0.05515955242436801,
9435
+ "grad_norm": 0.29252058267593384,
9436
+ "learning_rate": 4.686092674748288e-08,
9437
+ "loss": 0.7111,
9438
+ "step": 1331
9439
+ },
9440
+ {
9441
+ "epoch": 0.05520099461251554,
9442
+ "grad_norm": 0.31968146562576294,
9443
+ "learning_rate": 3.9929298354446365e-08,
9444
+ "loss": 0.6327,
9445
+ "step": 1332
9446
+ },
9447
+ {
9448
+ "epoch": 0.055242436800663076,
9449
+ "grad_norm": 0.2793462574481964,
9450
+ "learning_rate": 3.3552058751518124e-08,
9451
+ "loss": 0.7731,
9452
+ "step": 1333
9453
+ },
9454
+ {
9455
+ "epoch": 0.05528387898881061,
9456
+ "grad_norm": 0.2697681486606598,
9457
+ "learning_rate": 2.772924330751714e-08,
9458
+ "loss": 0.6196,
9459
+ "step": 1334
9460
+ },
9461
+ {
9462
+ "epoch": 0.055325321176958146,
9463
+ "grad_norm": 0.271357923746109,
9464
+ "learning_rate": 2.2460884316377607e-08,
9465
+ "loss": 0.5377,
9466
+ "step": 1335
9467
+ },
9468
+ {
9469
+ "epoch": 0.055366763365105674,
9470
+ "grad_norm": 0.26375240087509155,
9471
+ "learning_rate": 1.7747010996949087e-08,
9472
+ "loss": 0.6309,
9473
+ "step": 1336
9474
+ },
9475
+ {
9476
+ "epoch": 0.05540820555325321,
9477
+ "grad_norm": 0.25772351026535034,
9478
+ "learning_rate": 1.3587649492874388e-08,
9479
+ "loss": 0.7001,
9480
+ "step": 1337
9481
+ },
9482
+ {
9483
+ "epoch": 0.055449647741400744,
9484
+ "grad_norm": 0.29098233580589294,
9485
+ "learning_rate": 9.982822872378617e-09,
9486
+ "loss": 0.6397,
9487
+ "step": 1338
9488
+ },
9489
+ {
9490
+ "epoch": 0.05549108992954828,
9491
+ "grad_norm": 0.29976314306259155,
9492
+ "learning_rate": 6.932551128202569e-09,
9493
+ "loss": 0.5868,
9494
+ "step": 1339
9495
+ },
9496
+ {
9497
+ "epoch": 0.055532532117695814,
9498
+ "grad_norm": 0.26712578535079956,
9499
+ "learning_rate": 4.436851177480605e-09,
9500
+ "loss": 0.603,
9501
+ "step": 1340
9502
+ },
9503
+ {
9504
+ "epoch": 0.05557397430584335,
9505
+ "grad_norm": 0.2561587393283844,
9506
+ "learning_rate": 2.495736861607423e-09,
9507
+ "loss": 0.6869,
9508
+ "step": 1341
9509
+ },
9510
+ {
9511
+ "epoch": 0.055615416493990884,
9512
+ "grad_norm": 0.3008159399032593,
9513
+ "learning_rate": 1.1092189462047486e-09,
9514
+ "loss": 0.6112,
9515
+ "step": 1342
9516
+ },
9517
+ {
9518
+ "epoch": 0.05565685868213842,
9519
+ "grad_norm": 0.2835893929004669,
9520
+ "learning_rate": 2.7730512104362506e-10,
9521
+ "loss": 0.6679,
9522
+ "step": 1343
9523
+ },
9524
+ {
9525
+ "epoch": 0.055698300870285954,
9526
+ "grad_norm": 0.3091060519218445,
9527
+ "learning_rate": 0.0,
9528
+ "loss": 0.7057,
9529
+ "step": 1344
9530
  }
9531
  ],
9532
  "logging_steps": 1,
 
9550
  "should_evaluate": false,
9551
  "should_log": false,
9552
  "should_save": true,
9553
+ "should_training_stop": true
9554
  },
9555
  "attributes": {}
9556
  }
9557
  },
9558
+ "total_flos": 3.559845444924211e+18,
9559
  "train_batch_size": 4,
9560
  "trial_name": null,
9561
  "trial_params": null