MohamedAhmedAE commited on
Commit
24605c3
·
verified ·
1 Parent(s): 3378e04

Training in progress, step 161000, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
26
  "up_proj",
 
27
  "gate_proj",
28
  "k_proj",
29
- "v_proj",
30
- "down_proj",
31
- "o_proj",
32
- "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "down_proj",
27
+ "v_proj",
28
  "up_proj",
29
+ "q_proj",
30
  "gate_proj",
31
  "k_proj",
32
+ "o_proj"
 
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:521de62bb4838c45dec76e6c34ea11fd647aecdff9a953b901524a65d131b4f8
3
  size 1556140392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:776b57e8cf86af8f45ec851c7f40dff293a2260f466d76c2635a529669ea7be1
3
  size 1556140392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eab9a0bc30d111a8d214bac8a9f22289389b5c8dba44e3e3060691bceccf3d9f
3
  size 791683586
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e35e6b330681427d92536d5b866820fd3210e6f1045fb36af35a966e2802ccfa
3
  size 791683586
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be061cbaa280e39435db7c14f80119164fc3f359a50ce457db220691644cb576
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd19186e1a1c47e5da80a5032267ec74bc9c4149fe528d2a46b67a9b8d64752
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.10501783564500076,
5
  "eval_steps": 500,
6
- "global_step": 151000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5292,6 +5292,356 @@
5292
  "learning_rate": 1.9904972598324345e-05,
5293
  "loss": 1.8064,
5294
  "step": 151000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5295
  }
5296
  ],
5297
  "logging_steps": 200,
@@ -5311,7 +5661,7 @@
5311
  "attributes": {}
5312
  }
5313
  },
5314
- "total_flos": 8.25450882511233e+17,
5315
  "train_batch_size": 1,
5316
  "trial_name": null,
5317
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.11197265919764983,
5
  "eval_steps": 500,
6
+ "global_step": 161000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5292
  "learning_rate": 1.9904972598324345e-05,
5293
  "loss": 1.8064,
5294
  "step": 151000
5295
+ },
5296
+ {
5297
+ "epoch": 0.10515693211605374,
5298
+ "grad_norm": 0.6552620530128479,
5299
+ "learning_rate": 1.9904721276394122e-05,
5300
+ "loss": 1.7752,
5301
+ "step": 151200
5302
+ },
5303
+ {
5304
+ "epoch": 0.10529602858710672,
5305
+ "grad_norm": 0.8357005715370178,
5306
+ "learning_rate": 1.990446962484043e-05,
5307
+ "loss": 1.7828,
5308
+ "step": 151400
5309
+ },
5310
+ {
5311
+ "epoch": 0.10543512505815972,
5312
+ "grad_norm": 1.1673591136932373,
5313
+ "learning_rate": 1.9904217643675287e-05,
5314
+ "loss": 1.8316,
5315
+ "step": 151600
5316
+ },
5317
+ {
5318
+ "epoch": 0.1055742215292127,
5319
+ "grad_norm": 3.075096845626831,
5320
+ "learning_rate": 1.9903965332910706e-05,
5321
+ "loss": 1.7649,
5322
+ "step": 151800
5323
+ },
5324
+ {
5325
+ "epoch": 0.10571331800026568,
5326
+ "grad_norm": 1.600201964378357,
5327
+ "learning_rate": 1.990371269255875e-05,
5328
+ "loss": 1.815,
5329
+ "step": 152000
5330
+ },
5331
+ {
5332
+ "epoch": 0.10585241447131866,
5333
+ "grad_norm": 1.1825553178787231,
5334
+ "learning_rate": 1.9903459722631466e-05,
5335
+ "loss": 1.8219,
5336
+ "step": 152200
5337
+ },
5338
+ {
5339
+ "epoch": 0.10599151094237164,
5340
+ "grad_norm": 0.8359824419021606,
5341
+ "learning_rate": 1.9903206423140936e-05,
5342
+ "loss": 1.7919,
5343
+ "step": 152400
5344
+ },
5345
+ {
5346
+ "epoch": 0.10613060741342462,
5347
+ "grad_norm": 1.7081679105758667,
5348
+ "learning_rate": 1.9902952794099257e-05,
5349
+ "loss": 1.7764,
5350
+ "step": 152600
5351
+ },
5352
+ {
5353
+ "epoch": 0.1062697038844776,
5354
+ "grad_norm": 0.9274424314498901,
5355
+ "learning_rate": 1.9902698835518533e-05,
5356
+ "loss": 1.7792,
5357
+ "step": 152800
5358
+ },
5359
+ {
5360
+ "epoch": 0.10640880035553058,
5361
+ "grad_norm": 1.007084608078003,
5362
+ "learning_rate": 1.9902444547410883e-05,
5363
+ "loss": 1.7766,
5364
+ "step": 153000
5365
+ },
5366
+ {
5367
+ "epoch": 0.10654789682658355,
5368
+ "grad_norm": 0.8499106764793396,
5369
+ "learning_rate": 1.9902189929788453e-05,
5370
+ "loss": 1.7173,
5371
+ "step": 153200
5372
+ },
5373
+ {
5374
+ "epoch": 0.10668699329763655,
5375
+ "grad_norm": 0.8728858232498169,
5376
+ "learning_rate": 1.9901934982663393e-05,
5377
+ "loss": 1.7235,
5378
+ "step": 153400
5379
+ },
5380
+ {
5381
+ "epoch": 0.10682608976868953,
5382
+ "grad_norm": 1.5445681810379028,
5383
+ "learning_rate": 1.990167970604788e-05,
5384
+ "loss": 1.8182,
5385
+ "step": 153600
5386
+ },
5387
+ {
5388
+ "epoch": 0.10696518623974251,
5389
+ "grad_norm": 1.0307021141052246,
5390
+ "learning_rate": 1.9901424099954094e-05,
5391
+ "loss": 1.7935,
5392
+ "step": 153800
5393
+ },
5394
+ {
5395
+ "epoch": 0.10710428271079549,
5396
+ "grad_norm": 0.4203742444515228,
5397
+ "learning_rate": 1.9901168164394242e-05,
5398
+ "loss": 1.7632,
5399
+ "step": 154000
5400
+ },
5401
+ {
5402
+ "epoch": 0.10724337918184847,
5403
+ "grad_norm": 1.2339669466018677,
5404
+ "learning_rate": 1.990091189938054e-05,
5405
+ "loss": 1.7986,
5406
+ "step": 154200
5407
+ },
5408
+ {
5409
+ "epoch": 0.10738247565290145,
5410
+ "grad_norm": 0.9977461099624634,
5411
+ "learning_rate": 1.9900655304925225e-05,
5412
+ "loss": 1.7751,
5413
+ "step": 154400
5414
+ },
5415
+ {
5416
+ "epoch": 0.10752157212395443,
5417
+ "grad_norm": 0.8145340085029602,
5418
+ "learning_rate": 1.990039838104054e-05,
5419
+ "loss": 1.7175,
5420
+ "step": 154600
5421
+ },
5422
+ {
5423
+ "epoch": 0.1076606685950074,
5424
+ "grad_norm": 0.8700305819511414,
5425
+ "learning_rate": 1.9900141127738757e-05,
5426
+ "loss": 1.8136,
5427
+ "step": 154800
5428
+ },
5429
+ {
5430
+ "epoch": 0.10779976506606039,
5431
+ "grad_norm": 1.0328384637832642,
5432
+ "learning_rate": 1.989988354503215e-05,
5433
+ "loss": 1.8334,
5434
+ "step": 155000
5435
+ },
5436
+ {
5437
+ "epoch": 0.10793886153711338,
5438
+ "grad_norm": 1.1774046421051025,
5439
+ "learning_rate": 1.9899625632933027e-05,
5440
+ "loss": 1.7683,
5441
+ "step": 155200
5442
+ },
5443
+ {
5444
+ "epoch": 0.10807795800816636,
5445
+ "grad_norm": 1.0147098302841187,
5446
+ "learning_rate": 1.989936739145369e-05,
5447
+ "loss": 1.8265,
5448
+ "step": 155400
5449
+ },
5450
+ {
5451
+ "epoch": 0.10821705447921934,
5452
+ "grad_norm": 1.3206279277801514,
5453
+ "learning_rate": 1.989910882060647e-05,
5454
+ "loss": 1.7738,
5455
+ "step": 155600
5456
+ },
5457
+ {
5458
+ "epoch": 0.10835615095027232,
5459
+ "grad_norm": 0.8004774451255798,
5460
+ "learning_rate": 1.9898849920403708e-05,
5461
+ "loss": 1.8271,
5462
+ "step": 155800
5463
+ },
5464
+ {
5465
+ "epoch": 0.1084952474213253,
5466
+ "grad_norm": 0.9754022359848022,
5467
+ "learning_rate": 1.9898590690857774e-05,
5468
+ "loss": 1.7882,
5469
+ "step": 156000
5470
+ },
5471
+ {
5472
+ "epoch": 0.10863434389237828,
5473
+ "grad_norm": 1.2540456056594849,
5474
+ "learning_rate": 1.989833113198103e-05,
5475
+ "loss": 1.8104,
5476
+ "step": 156200
5477
+ },
5478
+ {
5479
+ "epoch": 0.10877344036343126,
5480
+ "grad_norm": 0.7259592413902283,
5481
+ "learning_rate": 1.9898071243785876e-05,
5482
+ "loss": 1.7974,
5483
+ "step": 156400
5484
+ },
5485
+ {
5486
+ "epoch": 0.10891253683448424,
5487
+ "grad_norm": 1.9316837787628174,
5488
+ "learning_rate": 1.9897811026284718e-05,
5489
+ "loss": 1.7933,
5490
+ "step": 156600
5491
+ },
5492
+ {
5493
+ "epoch": 0.10905163330553722,
5494
+ "grad_norm": 0.8343173861503601,
5495
+ "learning_rate": 1.989755047948997e-05,
5496
+ "loss": 1.7924,
5497
+ "step": 156800
5498
+ },
5499
+ {
5500
+ "epoch": 0.10919072977659021,
5501
+ "grad_norm": 0.7665181159973145,
5502
+ "learning_rate": 1.989728960341408e-05,
5503
+ "loss": 1.7123,
5504
+ "step": 157000
5505
+ },
5506
+ {
5507
+ "epoch": 0.10932982624764319,
5508
+ "grad_norm": 1.001929759979248,
5509
+ "learning_rate": 1.9897028398069503e-05,
5510
+ "loss": 1.8386,
5511
+ "step": 157200
5512
+ },
5513
+ {
5514
+ "epoch": 0.10946892271869617,
5515
+ "grad_norm": 1.0837798118591309,
5516
+ "learning_rate": 1.98967668634687e-05,
5517
+ "loss": 1.7943,
5518
+ "step": 157400
5519
+ },
5520
+ {
5521
+ "epoch": 0.10960801918974915,
5522
+ "grad_norm": 0.9598066806793213,
5523
+ "learning_rate": 1.9896504999624163e-05,
5524
+ "loss": 1.7959,
5525
+ "step": 157600
5526
+ },
5527
+ {
5528
+ "epoch": 0.10974711566080213,
5529
+ "grad_norm": 0.9790207743644714,
5530
+ "learning_rate": 1.989624280654839e-05,
5531
+ "loss": 1.7813,
5532
+ "step": 157800
5533
+ },
5534
+ {
5535
+ "epoch": 0.10988621213185511,
5536
+ "grad_norm": 0.6536526679992676,
5537
+ "learning_rate": 1.98959802842539e-05,
5538
+ "loss": 1.7947,
5539
+ "step": 158000
5540
+ },
5541
+ {
5542
+ "epoch": 0.11002530860290809,
5543
+ "grad_norm": 1.2658668756484985,
5544
+ "learning_rate": 1.9895717432753222e-05,
5545
+ "loss": 1.7699,
5546
+ "step": 158200
5547
+ },
5548
+ {
5549
+ "epoch": 0.11016440507396107,
5550
+ "grad_norm": 2.004922866821289,
5551
+ "learning_rate": 1.9895454252058903e-05,
5552
+ "loss": 1.7686,
5553
+ "step": 158400
5554
+ },
5555
+ {
5556
+ "epoch": 0.11030350154501405,
5557
+ "grad_norm": 1.3296949863433838,
5558
+ "learning_rate": 1.9895190742183518e-05,
5559
+ "loss": 1.7689,
5560
+ "step": 158600
5561
+ },
5562
+ {
5563
+ "epoch": 0.11044259801606704,
5564
+ "grad_norm": 1.112878680229187,
5565
+ "learning_rate": 1.9894926903139633e-05,
5566
+ "loss": 1.8022,
5567
+ "step": 158800
5568
+ },
5569
+ {
5570
+ "epoch": 0.11058169448712002,
5571
+ "grad_norm": 1.8036686182022095,
5572
+ "learning_rate": 1.9894662734939847e-05,
5573
+ "loss": 1.7359,
5574
+ "step": 159000
5575
+ },
5576
+ {
5577
+ "epoch": 0.110720790958173,
5578
+ "grad_norm": 1.7240679264068604,
5579
+ "learning_rate": 1.989439823759678e-05,
5580
+ "loss": 1.794,
5581
+ "step": 159200
5582
+ },
5583
+ {
5584
+ "epoch": 0.11085988742922598,
5585
+ "grad_norm": 0.8310656547546387,
5586
+ "learning_rate": 1.9894133411123047e-05,
5587
+ "loss": 1.7825,
5588
+ "step": 159400
5589
+ },
5590
+ {
5591
+ "epoch": 0.11099898390027896,
5592
+ "grad_norm": 1.0748811960220337,
5593
+ "learning_rate": 1.9893868255531295e-05,
5594
+ "loss": 1.7347,
5595
+ "step": 159600
5596
+ },
5597
+ {
5598
+ "epoch": 0.11113808037133194,
5599
+ "grad_norm": 0.7499359250068665,
5600
+ "learning_rate": 1.989360277083419e-05,
5601
+ "loss": 1.8134,
5602
+ "step": 159800
5603
+ },
5604
+ {
5605
+ "epoch": 0.11127717684238492,
5606
+ "grad_norm": 0.793464183807373,
5607
+ "learning_rate": 1.9893336957044394e-05,
5608
+ "loss": 1.7876,
5609
+ "step": 160000
5610
+ },
5611
+ {
5612
+ "epoch": 0.1114162733134379,
5613
+ "grad_norm": 1.079917073249817,
5614
+ "learning_rate": 1.9893070814174604e-05,
5615
+ "loss": 1.8132,
5616
+ "step": 160200
5617
+ },
5618
+ {
5619
+ "epoch": 0.11155536978449088,
5620
+ "grad_norm": 1.041785717010498,
5621
+ "learning_rate": 1.9892804342237518e-05,
5622
+ "loss": 1.7536,
5623
+ "step": 160400
5624
+ },
5625
+ {
5626
+ "epoch": 0.11169446625554387,
5627
+ "grad_norm": 0.9491516351699829,
5628
+ "learning_rate": 1.9892537541245865e-05,
5629
+ "loss": 1.7811,
5630
+ "step": 160600
5631
+ },
5632
+ {
5633
+ "epoch": 0.11183356272659685,
5634
+ "grad_norm": 1.1019930839538574,
5635
+ "learning_rate": 1.989227041121238e-05,
5636
+ "loss": 1.7485,
5637
+ "step": 160800
5638
+ },
5639
+ {
5640
+ "epoch": 0.11197265919764983,
5641
+ "grad_norm": 1.0586832761764526,
5642
+ "learning_rate": 1.9892002952149815e-05,
5643
+ "loss": 1.7691,
5644
+ "step": 161000
5645
  }
5646
  ],
5647
  "logging_steps": 200,
 
5661
  "attributes": {}
5662
  }
5663
  },
5664
+ "total_flos": 8.806317924606628e+17,
5665
  "train_batch_size": 1,
5666
  "trial_name": null,
5667
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4a6ea4c36d16b3575db891122bc12b5aa423f1afb93579d54d6fe7412f2e22c
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d151f8446fd7799d24b0e9a99d447d7d465b181c00e55fa7d90224c183012544
3
  size 6776