Training in progress, step 937, checkpoint

Browse files

Files changed (5) hide show

checkpoint-937/adapter_config.json +3 -3
checkpoint-937/adapter_model.safetensors +1 -1
checkpoint-937/optimizer.pt +1 -1
checkpoint-937/trainer_state.json +54 -54
checkpoint-937/training_args.bin +1 -1

checkpoint-937/adapter_config.json CHANGED Viewed

@@ -23,12 +23,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "up_proj",
     "k_proj",
-    "down_proj",
     "v_proj",
     "q_proj",
     "gate_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "up_proj",
     "v_proj",
+    "o_proj",
     "q_proj",
+    "down_proj",
     "gate_proj"
   ],
   "task_type": "CAUSAL_LM",

checkpoint-937/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f91bd11ebfcd099bb482988fc62b4f7e281a00ad77ba968e111870cd412356da
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:40443a6f16112604e5549b7887209b1c83fdfa7d6ef2aa902ec429fcf991cc9b
 size 45118424

checkpoint-937/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3b619f2756f60ff0f6127a4dd02328968973551f06ddb7a913668ec747773d2
 size 23159546

 version https://git-lfs.github.com/spec/v1
+oid sha256:3948c74c44757b375f6e71036bbdf0e3114274345f46479bec86cdbc4b9c6d9f
 size 23159546

checkpoint-937/trainer_state.json CHANGED Viewed

@@ -10,322 +10,322 @@
   "log_history": [
     {
       "epoch": 0.021333333333333333,
-      "grad_norm": 0.47001057863235474,
       "learning_rate": 0.00019678111587982831,
-      "loss": 1.2773,
       "step": 20
     },
     {
       "epoch": 0.042666666666666665,
-      "grad_norm": 0.3469043970108032,
       "learning_rate": 0.0001924892703862661,
       "loss": 0.8918,
       "step": 40
     },
     {
       "epoch": 0.064,
-      "grad_norm": 0.4485608637332916,
       "learning_rate": 0.00018819742489270387,
       "loss": 0.8588,
       "step": 60
     },
     {
       "epoch": 0.08533333333333333,
-      "grad_norm": 0.4924505054950714,
       "learning_rate": 0.00018390557939914164,
       "loss": 0.7577,
       "step": 80
     },
     {
       "epoch": 0.10666666666666667,
-      "grad_norm": 0.3011874854564667,
       "learning_rate": 0.00017961373390557942,
       "loss": 0.7796,
       "step": 100
     },
     {
       "epoch": 0.128,
-      "grad_norm": 0.35178664326667786,
       "learning_rate": 0.00017532188841201717,
       "loss": 0.7647,
       "step": 120
     },
     {
       "epoch": 0.14933333333333335,
-      "grad_norm": 0.29895663261413574,
       "learning_rate": 0.00017103004291845494,
-      "loss": 0.6741,
       "step": 140
     },
     {
       "epoch": 0.17066666666666666,
-      "grad_norm": 0.265635222196579,
       "learning_rate": 0.00016673819742489272,
       "loss": 0.7586,
       "step": 160
     },
     {
       "epoch": 0.192,
-      "grad_norm": 0.29633283615112305,
       "learning_rate": 0.0001624463519313305,
       "loss": 0.7364,
       "step": 180
     },
     {
       "epoch": 0.21333333333333335,
-      "grad_norm": 0.3618737757205963,
       "learning_rate": 0.00015815450643776824,
       "loss": 0.7943,
       "step": 200
     },
     {
       "epoch": 0.23466666666666666,
-      "grad_norm": 0.2392752766609192,
       "learning_rate": 0.000153862660944206,
-      "loss": 0.7054,
       "step": 220
     },
     {
       "epoch": 0.256,
-      "grad_norm": 0.3207932114601135,
       "learning_rate": 0.00014957081545064377,
       "loss": 0.7495,
       "step": 240
     },
     {
       "epoch": 0.2773333333333333,
-      "grad_norm": 0.3499705493450165,
       "learning_rate": 0.00014527896995708155,
       "loss": 0.7739,
       "step": 260
     },
     {
       "epoch": 0.2986666666666667,
-      "grad_norm": 0.27541521191596985,
       "learning_rate": 0.00014098712446351932,
       "loss": 0.7125,
       "step": 280
     },
     {
       "epoch": 0.32,
-      "grad_norm": 0.27608659863471985,
       "learning_rate": 0.0001366952789699571,
-      "loss": 0.7307,
       "step": 300
     },
     {
       "epoch": 0.3413333333333333,
-      "grad_norm": 0.27710551023483276,
       "learning_rate": 0.00013240343347639485,
       "loss": 0.6974,
       "step": 320
     },
     {
       "epoch": 0.3626666666666667,
-      "grad_norm": 0.334416002035141,
       "learning_rate": 0.00012811158798283262,
       "loss": 0.7555,
       "step": 340
     },
     {
       "epoch": 0.384,
-      "grad_norm": 0.288921594619751,
       "learning_rate": 0.0001238197424892704,
-      "loss": 0.7222,
       "step": 360
     },
     {
       "epoch": 0.4053333333333333,
-      "grad_norm": 0.3428654074668884,
       "learning_rate": 0.00011952789699570816,
       "loss": 0.7466,
       "step": 380
     },
     {
       "epoch": 0.4266666666666667,
-      "grad_norm": 0.42291027307510376,
       "learning_rate": 0.00011523605150214594,
       "loss": 0.6999,
       "step": 400
     },
     {
       "epoch": 0.448,
-      "grad_norm": 0.32916492223739624,
       "learning_rate": 0.0001109442060085837,
       "loss": 0.7554,
       "step": 420
     },
     {
       "epoch": 0.4693333333333333,
-      "grad_norm": 0.343192458152771,
       "learning_rate": 0.00010665236051502145,
       "loss": 0.7347,
       "step": 440
     },
     {
       "epoch": 0.49066666666666664,
-      "grad_norm": 0.35772615671157837,
       "learning_rate": 0.00010236051502145923,
       "loss": 0.7075,
       "step": 460
     },
     {
       "epoch": 0.512,
-      "grad_norm": 0.34257206320762634,
       "learning_rate": 9.8068669527897e-05,
-      "loss": 0.7338,
       "step": 480
     },
     {
       "epoch": 0.5333333333333333,
-      "grad_norm": 0.34853076934814453,
       "learning_rate": 9.377682403433476e-05,
       "loss": 0.7269,
       "step": 500
     },
     {
       "epoch": 0.5546666666666666,
-      "grad_norm": 0.3989846706390381,
       "learning_rate": 8.948497854077254e-05,
       "loss": 0.7505,
       "step": 520
     },
     {
       "epoch": 0.576,
-      "grad_norm": 0.3323940634727478,
       "learning_rate": 8.51931330472103e-05,
       "loss": 0.7085,
       "step": 540
     },
     {
       "epoch": 0.5973333333333334,
-      "grad_norm": 0.31621086597442627,
       "learning_rate": 8.090128755364808e-05,
       "loss": 0.6989,
       "step": 560
     },
     {
       "epoch": 0.6186666666666667,
-      "grad_norm": 0.2995954155921936,
       "learning_rate": 7.660944206008584e-05,
       "loss": 0.7368,
       "step": 580
     },
     {
       "epoch": 0.64,
-      "grad_norm": 0.325448215007782,
       "learning_rate": 7.23175965665236e-05,
       "loss": 0.7167,
       "step": 600
     },
     {
       "epoch": 0.6613333333333333,
-      "grad_norm": 0.29876643419265747,
       "learning_rate": 6.802575107296138e-05,
       "loss": 0.7022,
       "step": 620
     },
     {
       "epoch": 0.6826666666666666,
-      "grad_norm": 0.3770740330219269,
       "learning_rate": 6.373390557939914e-05,
-      "loss": 0.7497,
       "step": 640
     },
     {
       "epoch": 0.704,
-      "grad_norm": 0.34811219573020935,
       "learning_rate": 5.944206008583692e-05,
       "loss": 0.7509,
       "step": 660
     },
     {
       "epoch": 0.7253333333333334,
-      "grad_norm": 0.2978745996952057,
       "learning_rate": 5.515021459227469e-05,
       "loss": 0.6887,
       "step": 680
     },
     {
       "epoch": 0.7466666666666667,
-      "grad_norm": 0.3408530652523041,
       "learning_rate": 5.085836909871244e-05,
       "loss": 0.7296,
       "step": 700
     },
     {
       "epoch": 0.768,
-      "grad_norm": 0.3775036036968231,
       "learning_rate": 4.656652360515021e-05,
-      "loss": 0.7759,
       "step": 720
     },
     {
       "epoch": 0.7893333333333333,
-      "grad_norm": 0.3197150230407715,
       "learning_rate": 4.227467811158798e-05,
       "loss": 0.7324,
       "step": 740
     },
     {
       "epoch": 0.8106666666666666,
-      "grad_norm": 0.25818583369255066,
       "learning_rate": 3.798283261802575e-05,
       "loss": 0.7155,
       "step": 760
     },
     {
       "epoch": 0.832,
-      "grad_norm": 0.319762647151947,
       "learning_rate": 3.369098712446352e-05,
       "loss": 0.7063,
       "step": 780
     },
     {
       "epoch": 0.8533333333333334,
-      "grad_norm": 0.35238417983055115,
       "learning_rate": 2.939914163090129e-05,
       "loss": 0.6556,
       "step": 800
     },
     {
       "epoch": 0.8746666666666667,
-      "grad_norm": 0.35853707790374756,
       "learning_rate": 2.510729613733906e-05,
       "loss": 0.7319,
       "step": 820
     },
     {
       "epoch": 0.896,
-      "grad_norm": 0.2910785973072052,
       "learning_rate": 2.0815450643776825e-05,
       "loss": 0.6889,
       "step": 840
     },
     {
       "epoch": 0.9173333333333333,
-      "grad_norm": 0.41235440969467163,
       "learning_rate": 1.6523605150214594e-05,
       "loss": 0.7442,
       "step": 860
     },
     {
       "epoch": 0.9386666666666666,
-      "grad_norm": 0.32353946566581726,
       "learning_rate": 1.2231759656652362e-05,
       "loss": 0.7174,
       "step": 880
     },
     {
       "epoch": 0.96,
-      "grad_norm": 0.28284719586372375,
       "learning_rate": 7.93991416309013e-06,
       "loss": 0.6603,
       "step": 900
     },
     {
       "epoch": 0.9813333333333333,
-      "grad_norm": 0.3192315101623535,
       "learning_rate": 3.648068669527897e-06,
       "loss": 0.7313,
       "step": 920

   "log_history": [
     {
       "epoch": 0.021333333333333333,
+      "grad_norm": 0.4694526791572571,
       "learning_rate": 0.00019678111587982831,
+      "loss": 1.2772,
       "step": 20
     },
     {
       "epoch": 0.042666666666666665,
+      "grad_norm": 0.34691280126571655,
       "learning_rate": 0.0001924892703862661,
       "loss": 0.8918,
       "step": 40
     },
     {
       "epoch": 0.064,
+      "grad_norm": 0.44894590973854065,
       "learning_rate": 0.00018819742489270387,
       "loss": 0.8588,
       "step": 60
     },
     {
       "epoch": 0.08533333333333333,
+      "grad_norm": 0.4901750981807709,
       "learning_rate": 0.00018390557939914164,
       "loss": 0.7577,
       "step": 80
     },
     {
       "epoch": 0.10666666666666667,
+      "grad_norm": 0.3013491630554199,
       "learning_rate": 0.00017961373390557942,
       "loss": 0.7796,
       "step": 100
     },
     {
       "epoch": 0.128,
+      "grad_norm": 0.35143589973449707,
       "learning_rate": 0.00017532188841201717,
       "loss": 0.7647,
       "step": 120
     },
     {
       "epoch": 0.14933333333333335,
+      "grad_norm": 0.29885634779930115,
       "learning_rate": 0.00017103004291845494,
+      "loss": 0.674,
       "step": 140
     },
     {
       "epoch": 0.17066666666666666,
+      "grad_norm": 0.26554301381111145,
       "learning_rate": 0.00016673819742489272,
       "loss": 0.7586,
       "step": 160
     },
     {
       "epoch": 0.192,
+      "grad_norm": 0.2963835895061493,
       "learning_rate": 0.0001624463519313305,
       "loss": 0.7364,
       "step": 180
     },
     {
       "epoch": 0.21333333333333335,
+      "grad_norm": 0.36166927218437195,
       "learning_rate": 0.00015815450643776824,
       "loss": 0.7943,
       "step": 200
     },
     {
       "epoch": 0.23466666666666666,
+      "grad_norm": 0.23920877277851105,
       "learning_rate": 0.000153862660944206,
+      "loss": 0.7055,
       "step": 220
     },
     {
       "epoch": 0.256,
+      "grad_norm": 0.32076919078826904,
       "learning_rate": 0.00014957081545064377,
       "loss": 0.7495,
       "step": 240
     },
     {
       "epoch": 0.2773333333333333,
+      "grad_norm": 0.34986230731010437,
       "learning_rate": 0.00014527896995708155,
       "loss": 0.7739,
       "step": 260
     },
     {
       "epoch": 0.2986666666666667,
+      "grad_norm": 0.27509021759033203,
       "learning_rate": 0.00014098712446351932,
       "loss": 0.7125,
       "step": 280
     },
     {
       "epoch": 0.32,
+      "grad_norm": 0.2761971652507782,
       "learning_rate": 0.0001366952789699571,
+      "loss": 0.7306,
       "step": 300
     },
     {
       "epoch": 0.3413333333333333,
+      "grad_norm": 0.27699899673461914,
       "learning_rate": 0.00013240343347639485,
       "loss": 0.6974,
       "step": 320
     },
     {
       "epoch": 0.3626666666666667,
+      "grad_norm": 0.33432355523109436,
       "learning_rate": 0.00012811158798283262,
       "loss": 0.7555,
       "step": 340
     },
     {
       "epoch": 0.384,
+      "grad_norm": 0.2890004515647888,
       "learning_rate": 0.0001238197424892704,
+      "loss": 0.7221,
       "step": 360
     },
     {
       "epoch": 0.4053333333333333,
+      "grad_norm": 0.3435133397579193,
       "learning_rate": 0.00011952789699570816,
       "loss": 0.7466,
       "step": 380
     },
     {
       "epoch": 0.4266666666666667,
+      "grad_norm": 0.42325925827026367,
       "learning_rate": 0.00011523605150214594,
       "loss": 0.6999,
       "step": 400
     },
     {
       "epoch": 0.448,
+      "grad_norm": 0.3292069733142853,
       "learning_rate": 0.0001109442060085837,
       "loss": 0.7554,
       "step": 420
     },
     {
       "epoch": 0.4693333333333333,
+      "grad_norm": 0.3431546092033386,
       "learning_rate": 0.00010665236051502145,
       "loss": 0.7347,
       "step": 440
     },
     {
       "epoch": 0.49066666666666664,
+      "grad_norm": 0.35773923993110657,
       "learning_rate": 0.00010236051502145923,
       "loss": 0.7075,
       "step": 460
     },
     {
       "epoch": 0.512,
+      "grad_norm": 0.34283700585365295,
       "learning_rate": 9.8068669527897e-05,
+      "loss": 0.7339,
       "step": 480
     },
     {
       "epoch": 0.5333333333333333,
+      "grad_norm": 0.3486020267009735,
       "learning_rate": 9.377682403433476e-05,
       "loss": 0.7269,
       "step": 500
     },
     {
       "epoch": 0.5546666666666666,
+      "grad_norm": 0.3991217315196991,
       "learning_rate": 8.948497854077254e-05,
       "loss": 0.7505,
       "step": 520
     },
     {
       "epoch": 0.576,
+      "grad_norm": 0.33239027857780457,
       "learning_rate": 8.51931330472103e-05,
       "loss": 0.7085,
       "step": 540
     },
     {
       "epoch": 0.5973333333333334,
+      "grad_norm": 0.31634777784347534,
       "learning_rate": 8.090128755364808e-05,
       "loss": 0.6989,
       "step": 560
     },
     {
       "epoch": 0.6186666666666667,
+      "grad_norm": 0.29959481954574585,
       "learning_rate": 7.660944206008584e-05,
       "loss": 0.7368,
       "step": 580
     },
     {
       "epoch": 0.64,
+      "grad_norm": 0.32558491826057434,
       "learning_rate": 7.23175965665236e-05,
       "loss": 0.7167,
       "step": 600
     },
     {
       "epoch": 0.6613333333333333,
+      "grad_norm": 0.29890871047973633,
       "learning_rate": 6.802575107296138e-05,
       "loss": 0.7022,
       "step": 620
     },
     {
       "epoch": 0.6826666666666666,
+      "grad_norm": 0.37701210379600525,
       "learning_rate": 6.373390557939914e-05,
+      "loss": 0.7496,
       "step": 640
     },
     {
       "epoch": 0.704,
+      "grad_norm": 0.3479043245315552,
       "learning_rate": 5.944206008583692e-05,
       "loss": 0.7509,
       "step": 660
     },
     {
       "epoch": 0.7253333333333334,
+      "grad_norm": 0.297861784696579,
       "learning_rate": 5.515021459227469e-05,
       "loss": 0.6887,
       "step": 680
     },
     {
       "epoch": 0.7466666666666667,
+      "grad_norm": 0.34087368845939636,
       "learning_rate": 5.085836909871244e-05,
       "loss": 0.7296,
       "step": 700
     },
     {
       "epoch": 0.768,
+      "grad_norm": 0.3773903548717499,
       "learning_rate": 4.656652360515021e-05,
+      "loss": 0.776,
       "step": 720
     },
     {
       "epoch": 0.7893333333333333,
+      "grad_norm": 0.3205181360244751,
       "learning_rate": 4.227467811158798e-05,
       "loss": 0.7324,
       "step": 740
     },
     {
       "epoch": 0.8106666666666666,
+      "grad_norm": 0.2584711015224457,
       "learning_rate": 3.798283261802575e-05,
       "loss": 0.7155,
       "step": 760
     },
     {
       "epoch": 0.832,
+      "grad_norm": 0.3200976252555847,
       "learning_rate": 3.369098712446352e-05,
       "loss": 0.7063,
       "step": 780
     },
     {
       "epoch": 0.8533333333333334,
+      "grad_norm": 0.35253649950027466,
       "learning_rate": 2.939914163090129e-05,
       "loss": 0.6556,
       "step": 800
     },
     {
       "epoch": 0.8746666666666667,
+      "grad_norm": 0.358339786529541,
       "learning_rate": 2.510729613733906e-05,
       "loss": 0.7319,
       "step": 820
     },
     {
       "epoch": 0.896,
+      "grad_norm": 0.2910996973514557,
       "learning_rate": 2.0815450643776825e-05,
       "loss": 0.6889,
       "step": 840
     },
     {
       "epoch": 0.9173333333333333,
+      "grad_norm": 0.4119875431060791,
       "learning_rate": 1.6523605150214594e-05,
       "loss": 0.7442,
       "step": 860
     },
     {
       "epoch": 0.9386666666666666,
+      "grad_norm": 0.3234633505344391,
       "learning_rate": 1.2231759656652362e-05,
       "loss": 0.7174,
       "step": 880
     },
     {
       "epoch": 0.96,
+      "grad_norm": 0.2828710377216339,
       "learning_rate": 7.93991416309013e-06,
       "loss": 0.6603,
       "step": 900
     },
     {
       "epoch": 0.9813333333333333,
+      "grad_norm": 0.3194393813610077,
       "learning_rate": 3.648068669527897e-06,
       "loss": 0.7313,
       "step": 920

checkpoint-937/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4651d06185709dd3d181207d567d0b3ed653efb01f01015cb44b8df4eebee657
 size 5560

 version https://git-lfs.github.com/spec/v1
+oid sha256:69617c1bea4a27e99b6c6729498e45489dcd01641dbfbfbf0cc0b118ad579975
 size 5560