MohamedAhmedAE commited on
Commit
56b28ea
·
verified ·
1 Parent(s): 8320523

Training in progress, step 325800

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c868a77233ef8b4ffccf85da2925d507b66fc046e34d4b9fc2f6e440b88a6e39
3
  size 778096664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5a877bc1ced1c90857fb89b0d39b0f5b375a46377a51f59e1855144d87589c3
3
  size 778096664
last-checkpoint/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "up_proj",
27
- "q_proj",
28
- "gate_proj",
29
  "k_proj",
 
30
  "o_proj",
 
31
  "v_proj",
32
- "down_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
26
  "k_proj",
27
+ "q_proj",
28
  "o_proj",
29
+ "down_proj",
30
  "v_proj",
31
+ "gate_proj",
32
+ "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c868a77233ef8b4ffccf85da2925d507b66fc046e34d4b9fc2f6e440b88a6e39
3
  size 778096664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b2960647651f74a021854aa83d2f1e44e7edf833c93d30f3f0898a3b62998f8
3
  size 778096664
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4c385aaaa6fe54c1f3155d2d1568d637b549fc2e0574e8e48d3469802a32964
3
  size 396582274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1edc103979e14bdcf4aecb105646466ee2930c03c3298174482e60391794d7b7
3
  size 396582274
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3571f55c7fa54aa6d4a15b56b980812efeacc7637bba147c6a1430e2b591b85
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f617a484c0b6df4d5c6eea4305a692f5553b5d1313d4e6a74ecee2702d9ed2da
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca27a8a440d342b66d03ff4f15a2f748adf85ff99e3b3c24a325f044d3abf679
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d177f6580f087efd416cbcb9dd0a3c522d49c6ad1fc91a3bc8e0586cfb577f5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9057955895321632,
5
  "eval_steps": 500,
6
- "global_step": 325600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11060,349 +11060,6 @@
11060
  "learning_rate": 1.432751373441014e-05,
11061
  "loss": 1.715,
11062
  "step": 315800
11063
- },
11064
- {
11065
- "epoch": 0.8790890856638931,
11066
- "grad_norm": 2.656094551086426,
11067
- "learning_rate": 1.4321506470902243e-05,
11068
- "loss": 1.6757,
11069
- "step": 316000
11070
- },
11071
- {
11072
- "epoch": 0.8796454711611487,
11073
- "grad_norm": 2.0576250553131104,
11074
- "learning_rate": 1.4315498198000568e-05,
11075
- "loss": 1.695,
11076
- "step": 316200
11077
- },
11078
- {
11079
- "epoch": 0.8802018566584043,
11080
- "grad_norm": 1.9753005504608154,
11081
- "learning_rate": 1.4309488920294356e-05,
11082
- "loss": 1.7,
11083
- "step": 316400
11084
- },
11085
- {
11086
- "epoch": 0.88075824215566,
11087
- "grad_norm": 1.6092278957366943,
11088
- "learning_rate": 1.430347864237362e-05,
11089
- "loss": 1.6588,
11090
- "step": 316600
11091
- },
11092
- {
11093
- "epoch": 0.8813146276529156,
11094
- "grad_norm": 1.910270094871521,
11095
- "learning_rate": 1.4297467368829136e-05,
11096
- "loss": 1.6998,
11097
- "step": 316800
11098
- },
11099
- {
11100
- "epoch": 0.8818710131501712,
11101
- "grad_norm": 3.1925556659698486,
11102
- "learning_rate": 1.4291455104252431e-05,
11103
- "loss": 1.6877,
11104
- "step": 317000
11105
- },
11106
- {
11107
- "epoch": 0.8824273986474268,
11108
- "grad_norm": 2.2833807468414307,
11109
- "learning_rate": 1.4285441853235804e-05,
11110
- "loss": 1.6788,
11111
- "step": 317200
11112
- },
11113
- {
11114
- "epoch": 0.8829837841446825,
11115
- "grad_norm": 2.9970617294311523,
11116
- "learning_rate": 1.4279427620372295e-05,
11117
- "loss": 1.6621,
11118
- "step": 317400
11119
- },
11120
- {
11121
- "epoch": 0.8835401696419382,
11122
- "grad_norm": 3.0393621921539307,
11123
- "learning_rate": 1.4273412410255701e-05,
11124
- "loss": 1.69,
11125
- "step": 317600
11126
- },
11127
- {
11128
- "epoch": 0.8840965551391937,
11129
- "grad_norm": 1.8881953954696655,
11130
- "learning_rate": 1.4267396227480556e-05,
11131
- "loss": 1.6958,
11132
- "step": 317800
11133
- },
11134
- {
11135
- "epoch": 0.8846529406364494,
11136
- "grad_norm": 2.3096697330474854,
11137
- "learning_rate": 1.4261379076642155e-05,
11138
- "loss": 1.6904,
11139
- "step": 318000
11140
- },
11141
- {
11142
- "epoch": 0.885209326133705,
11143
- "grad_norm": 2.055274486541748,
11144
- "learning_rate": 1.4255360962336515e-05,
11145
- "loss": 1.6806,
11146
- "step": 318200
11147
- },
11148
- {
11149
- "epoch": 0.8857657116309606,
11150
- "grad_norm": 2.894618511199951,
11151
- "learning_rate": 1.4249341889160393e-05,
11152
- "loss": 1.691,
11153
- "step": 318400
11154
- },
11155
- {
11156
- "epoch": 0.8863220971282163,
11157
- "grad_norm": 2.6434152126312256,
11158
- "learning_rate": 1.4243321861711285e-05,
11159
- "loss": 1.707,
11160
- "step": 318600
11161
- },
11162
- {
11163
- "epoch": 0.8868784826254719,
11164
- "grad_norm": 1.936525821685791,
11165
- "learning_rate": 1.423730088458741e-05,
11166
- "loss": 1.7093,
11167
- "step": 318800
11168
- },
11169
- {
11170
- "epoch": 0.8874348681227275,
11171
- "grad_norm": 1.9834107160568237,
11172
- "learning_rate": 1.4231278962387715e-05,
11173
- "loss": 1.713,
11174
- "step": 319000
11175
- },
11176
- {
11177
- "epoch": 0.8879912536199831,
11178
- "grad_norm": 2.6799023151397705,
11179
- "learning_rate": 1.4225256099711869e-05,
11180
- "loss": 1.6735,
11181
- "step": 319200
11182
- },
11183
- {
11184
- "epoch": 0.8885476391172388,
11185
- "grad_norm": 2.4906699657440186,
11186
- "learning_rate": 1.4219232301160252e-05,
11187
- "loss": 1.6612,
11188
- "step": 319400
11189
- },
11190
- {
11191
- "epoch": 0.8891040246144944,
11192
- "grad_norm": 3.0069377422332764,
11193
- "learning_rate": 1.4213207571333971e-05,
11194
- "loss": 1.6597,
11195
- "step": 319600
11196
- },
11197
- {
11198
- "epoch": 0.88966041011175,
11199
- "grad_norm": 2.4262261390686035,
11200
- "learning_rate": 1.4207181914834838e-05,
11201
- "loss": 1.6934,
11202
- "step": 319800
11203
- },
11204
- {
11205
- "epoch": 0.8902167956090057,
11206
- "grad_norm": 1.9308116436004639,
11207
- "learning_rate": 1.420115533626537e-05,
11208
- "loss": 1.7004,
11209
- "step": 320000
11210
- },
11211
- {
11212
- "epoch": 0.8907731811062612,
11213
- "grad_norm": 2.419318199157715,
11214
- "learning_rate": 1.4195127840228795e-05,
11215
- "loss": 1.6635,
11216
- "step": 320200
11217
- },
11218
- {
11219
- "epoch": 0.8913295666035169,
11220
- "grad_norm": 2.2832937240600586,
11221
- "learning_rate": 1.4189099431329036e-05,
11222
- "loss": 1.6958,
11223
- "step": 320400
11224
- },
11225
- {
11226
- "epoch": 0.8918859521007726,
11227
- "grad_norm": 2.273939609527588,
11228
- "learning_rate": 1.4183070114170717e-05,
11229
- "loss": 1.6445,
11230
- "step": 320600
11231
- },
11232
- {
11233
- "epoch": 0.8924423375980282,
11234
- "grad_norm": 2.2276997566223145,
11235
- "learning_rate": 1.4177039893359151e-05,
11236
- "loss": 1.6742,
11237
- "step": 320800
11238
- },
11239
- {
11240
- "epoch": 0.8929987230952838,
11241
- "grad_norm": 2.669675350189209,
11242
- "learning_rate": 1.4171008773500346e-05,
11243
- "loss": 1.7064,
11244
- "step": 321000
11245
- },
11246
- {
11247
- "epoch": 0.8935551085925394,
11248
- "grad_norm": 1.8450345993041992,
11249
- "learning_rate": 1.4164976759200998e-05,
11250
- "loss": 1.6997,
11251
- "step": 321200
11252
- },
11253
- {
11254
- "epoch": 0.8941114940897951,
11255
- "grad_norm": 2.5424864292144775,
11256
- "learning_rate": 1.4158943855068478e-05,
11257
- "loss": 1.6985,
11258
- "step": 321400
11259
- },
11260
- {
11261
- "epoch": 0.8946678795870506,
11262
- "grad_norm": 2.899928569793701,
11263
- "learning_rate": 1.415291006571085e-05,
11264
- "loss": 1.688,
11265
- "step": 321600
11266
- },
11267
- {
11268
- "epoch": 0.8952242650843063,
11269
- "grad_norm": 2.4306631088256836,
11270
- "learning_rate": 1.4146875395736839e-05,
11271
- "loss": 1.7061,
11272
- "step": 321800
11273
- },
11274
- {
11275
- "epoch": 0.895780650581562,
11276
- "grad_norm": 3.6513545513153076,
11277
- "learning_rate": 1.4140839849755852e-05,
11278
- "loss": 1.7021,
11279
- "step": 322000
11280
- },
11281
- {
11282
- "epoch": 0.8963370360788175,
11283
- "grad_norm": 2.4351377487182617,
11284
- "learning_rate": 1.4134803432377963e-05,
11285
- "loss": 1.6831,
11286
- "step": 322200
11287
- },
11288
- {
11289
- "epoch": 0.8968934215760732,
11290
- "grad_norm": 2.7318243980407715,
11291
- "learning_rate": 1.4128766148213911e-05,
11292
- "loss": 1.7039,
11293
- "step": 322400
11294
- },
11295
- {
11296
- "epoch": 0.8974498070733288,
11297
- "grad_norm": 2.675609827041626,
11298
- "learning_rate": 1.4122728001875102e-05,
11299
- "loss": 1.6713,
11300
- "step": 322600
11301
- },
11302
- {
11303
- "epoch": 0.8980061925705844,
11304
- "grad_norm": 2.1647613048553467,
11305
- "learning_rate": 1.4116688997973589e-05,
11306
- "loss": 1.678,
11307
- "step": 322800
11308
- },
11309
- {
11310
- "epoch": 0.8985625780678401,
11311
- "grad_norm": 2.230194330215454,
11312
- "learning_rate": 1.411064914112209e-05,
11313
- "loss": 1.6854,
11314
- "step": 323000
11315
- },
11316
- {
11317
- "epoch": 0.8991189635650957,
11318
- "grad_norm": 2.459803819656372,
11319
- "learning_rate": 1.4104608435933977e-05,
11320
- "loss": 1.6688,
11321
- "step": 323200
11322
- },
11323
- {
11324
- "epoch": 0.8996753490623514,
11325
- "grad_norm": 2.098407030105591,
11326
- "learning_rate": 1.4098566887023254e-05,
11327
- "loss": 1.6779,
11328
- "step": 323400
11329
- },
11330
- {
11331
- "epoch": 0.9002317345596069,
11332
- "grad_norm": 2.0758612155914307,
11333
- "learning_rate": 1.4092524499004592e-05,
11334
- "loss": 1.6629,
11335
- "step": 323600
11336
- },
11337
- {
11338
- "epoch": 0.9007881200568626,
11339
- "grad_norm": 4.269808769226074,
11340
- "learning_rate": 1.4086481276493285e-05,
11341
- "loss": 1.6618,
11342
- "step": 323800
11343
- },
11344
- {
11345
- "epoch": 0.9013445055541183,
11346
- "grad_norm": 2.4955685138702393,
11347
- "learning_rate": 1.4080437224105268e-05,
11348
- "loss": 1.6878,
11349
- "step": 324000
11350
- },
11351
- {
11352
- "epoch": 0.9019008910513738,
11353
- "grad_norm": 1.8088123798370361,
11354
- "learning_rate": 1.4074392346457116e-05,
11355
- "loss": 1.6641,
11356
- "step": 324200
11357
- },
11358
- {
11359
- "epoch": 0.9024572765486295,
11360
- "grad_norm": 4.655933380126953,
11361
- "learning_rate": 1.4068346648166026e-05,
11362
- "loss": 1.6958,
11363
- "step": 324400
11364
- },
11365
- {
11366
- "epoch": 0.9030136620458851,
11367
- "grad_norm": 2.517047643661499,
11368
- "learning_rate": 1.4062300133849833e-05,
11369
- "loss": 1.6641,
11370
- "step": 324600
11371
- },
11372
- {
11373
- "epoch": 0.9035700475431407,
11374
- "grad_norm": 2.3092081546783447,
11375
- "learning_rate": 1.4056252808126984e-05,
11376
- "loss": 1.7042,
11377
- "step": 324800
11378
- },
11379
- {
11380
- "epoch": 0.9041264330403964,
11381
- "grad_norm": 1.8197965621948242,
11382
- "learning_rate": 1.4050204675616556e-05,
11383
- "loss": 1.6906,
11384
- "step": 325000
11385
- },
11386
- {
11387
- "epoch": 0.904682818537652,
11388
- "grad_norm": 2.1050660610198975,
11389
- "learning_rate": 1.4044155740938232e-05,
11390
- "loss": 1.667,
11391
- "step": 325200
11392
- },
11393
- {
11394
- "epoch": 0.9052392040349077,
11395
- "grad_norm": 2.001034736633301,
11396
- "learning_rate": 1.4038106008712313e-05,
11397
- "loss": 1.6856,
11398
- "step": 325400
11399
- },
11400
- {
11401
- "epoch": 0.9057955895321632,
11402
- "grad_norm": 3.069322109222412,
11403
- "learning_rate": 1.403205548355971e-05,
11404
- "loss": 1.6637,
11405
- "step": 325600
11406
  }
11407
  ],
11408
  "logging_steps": 200,
@@ -11422,7 +11079,7 @@
11422
  "attributes": {}
11423
  }
11424
  },
11425
- "total_flos": 6.949085056264888e+18,
11426
  "train_batch_size": 4,
11427
  "trial_name": null,
11428
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8785327001666374,
5
  "eval_steps": 500,
6
+ "global_step": 315800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11060
  "learning_rate": 1.432751373441014e-05,
11061
  "loss": 1.715,
11062
  "step": 315800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11063
  }
11064
  ],
11065
  "logging_steps": 200,
 
11079
  "attributes": {}
11080
  }
11081
  },
11082
+ "total_flos": 6.739425054956544e+18,
11083
  "train_batch_size": 4,
11084
  "trial_name": null,
11085
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd16de5adef1a3d8e5dda5ca857dfcae914dc1f4288fd4d554e06a2ef4a21584
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73f1bca4a88b95f7d26d58c9cb2c2e4f7a79a6873effa8be0378f7f0ebfb8ceb
3
  size 6840