Training in progress, step 161000, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -23,13 +23,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
     "gate_proj",
     "k_proj",
-    "v_proj",
-    "down_proj",
-    "o_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "down_proj",
+    "v_proj",
     "up_proj",
+    "q_proj",
     "gate_proj",
     "k_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:521de62bb4838c45dec76e6c34ea11fd647aecdff9a953b901524a65d131b4f8
 size 1556140392

 version https://git-lfs.github.com/spec/v1
+oid sha256:776b57e8cf86af8f45ec851c7f40dff293a2260f466d76c2635a529669ea7be1
 size 1556140392

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eab9a0bc30d111a8d214bac8a9f22289389b5c8dba44e3e3060691bceccf3d9f
 size 791683586

 version https://git-lfs.github.com/spec/v1
+oid sha256:e35e6b330681427d92536d5b866820fd3210e6f1045fb36af35a966e2802ccfa
 size 791683586

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be061cbaa280e39435db7c14f80119164fc3f359a50ce457db220691644cb576
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1cd19186e1a1c47e5da80a5032267ec74bc9c4149fe528d2a46b67a9b8d64752
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.10501783564500076,
   "eval_steps": 500,
-  "global_step": 151000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5292,6 +5292,356 @@
       "learning_rate": 1.9904972598324345e-05,
       "loss": 1.8064,
       "step": 151000
     }
   ],
   "logging_steps": 200,
@@ -5311,7 +5661,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.25450882511233e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11197265919764983,
   "eval_steps": 500,
+  "global_step": 161000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9904972598324345e-05,
       "loss": 1.8064,
       "step": 151000
+    },
+    {
+      "epoch": 0.10515693211605374,
+      "grad_norm": 0.6552620530128479,
+      "learning_rate": 1.9904721276394122e-05,
+      "loss": 1.7752,
+      "step": 151200
+    },
+    {
+      "epoch": 0.10529602858710672,
+      "grad_norm": 0.8357005715370178,
+      "learning_rate": 1.990446962484043e-05,
+      "loss": 1.7828,
+      "step": 151400
+    },
+    {
+      "epoch": 0.10543512505815972,
+      "grad_norm": 1.1673591136932373,
+      "learning_rate": 1.9904217643675287e-05,
+      "loss": 1.8316,
+      "step": 151600
+    },
+    {
+      "epoch": 0.1055742215292127,
+      "grad_norm": 3.075096845626831,
+      "learning_rate": 1.9903965332910706e-05,
+      "loss": 1.7649,
+      "step": 151800
+    },
+    {
+      "epoch": 0.10571331800026568,
+      "grad_norm": 1.600201964378357,
+      "learning_rate": 1.990371269255875e-05,
+      "loss": 1.815,
+      "step": 152000
+    },
+    {
+      "epoch": 0.10585241447131866,
+      "grad_norm": 1.1825553178787231,
+      "learning_rate": 1.9903459722631466e-05,
+      "loss": 1.8219,
+      "step": 152200
+    },
+    {
+      "epoch": 0.10599151094237164,
+      "grad_norm": 0.8359824419021606,
+      "learning_rate": 1.9903206423140936e-05,
+      "loss": 1.7919,
+      "step": 152400
+    },
+    {
+      "epoch": 0.10613060741342462,
+      "grad_norm": 1.7081679105758667,
+      "learning_rate": 1.9902952794099257e-05,
+      "loss": 1.7764,
+      "step": 152600
+    },
+    {
+      "epoch": 0.1062697038844776,
+      "grad_norm": 0.9274424314498901,
+      "learning_rate": 1.9902698835518533e-05,
+      "loss": 1.7792,
+      "step": 152800
+    },
+    {
+      "epoch": 0.10640880035553058,
+      "grad_norm": 1.007084608078003,
+      "learning_rate": 1.9902444547410883e-05,
+      "loss": 1.7766,
+      "step": 153000
+    },
+    {
+      "epoch": 0.10654789682658355,
+      "grad_norm": 0.8499106764793396,
+      "learning_rate": 1.9902189929788453e-05,
+      "loss": 1.7173,
+      "step": 153200
+    },
+    {
+      "epoch": 0.10668699329763655,
+      "grad_norm": 0.8728858232498169,
+      "learning_rate": 1.9901934982663393e-05,
+      "loss": 1.7235,
+      "step": 153400
+    },
+    {
+      "epoch": 0.10682608976868953,
+      "grad_norm": 1.5445681810379028,
+      "learning_rate": 1.990167970604788e-05,
+      "loss": 1.8182,
+      "step": 153600
+    },
+    {
+      "epoch": 0.10696518623974251,
+      "grad_norm": 1.0307021141052246,
+      "learning_rate": 1.9901424099954094e-05,
+      "loss": 1.7935,
+      "step": 153800
+    },
+    {
+      "epoch": 0.10710428271079549,
+      "grad_norm": 0.4203742444515228,
+      "learning_rate": 1.9901168164394242e-05,
+      "loss": 1.7632,
+      "step": 154000
+    },
+    {
+      "epoch": 0.10724337918184847,
+      "grad_norm": 1.2339669466018677,
+      "learning_rate": 1.990091189938054e-05,
+      "loss": 1.7986,
+      "step": 154200
+    },
+    {
+      "epoch": 0.10738247565290145,
+      "grad_norm": 0.9977461099624634,
+      "learning_rate": 1.9900655304925225e-05,
+      "loss": 1.7751,
+      "step": 154400
+    },
+    {
+      "epoch": 0.10752157212395443,
+      "grad_norm": 0.8145340085029602,
+      "learning_rate": 1.990039838104054e-05,
+      "loss": 1.7175,
+      "step": 154600
+    },
+    {
+      "epoch": 0.1076606685950074,
+      "grad_norm": 0.8700305819511414,
+      "learning_rate": 1.9900141127738757e-05,
+      "loss": 1.8136,
+      "step": 154800
+    },
+    {
+      "epoch": 0.10779976506606039,
+      "grad_norm": 1.0328384637832642,
+      "learning_rate": 1.989988354503215e-05,
+      "loss": 1.8334,
+      "step": 155000
+    },
+    {
+      "epoch": 0.10793886153711338,
+      "grad_norm": 1.1774046421051025,
+      "learning_rate": 1.9899625632933027e-05,
+      "loss": 1.7683,
+      "step": 155200
+    },
+    {
+      "epoch": 0.10807795800816636,
+      "grad_norm": 1.0147098302841187,
+      "learning_rate": 1.989936739145369e-05,
+      "loss": 1.8265,
+      "step": 155400
+    },
+    {
+      "epoch": 0.10821705447921934,
+      "grad_norm": 1.3206279277801514,
+      "learning_rate": 1.989910882060647e-05,
+      "loss": 1.7738,
+      "step": 155600
+    },
+    {
+      "epoch": 0.10835615095027232,
+      "grad_norm": 0.8004774451255798,
+      "learning_rate": 1.9898849920403708e-05,
+      "loss": 1.8271,
+      "step": 155800
+    },
+    {
+      "epoch": 0.1084952474213253,
+      "grad_norm": 0.9754022359848022,
+      "learning_rate": 1.9898590690857774e-05,
+      "loss": 1.7882,
+      "step": 156000
+    },
+    {
+      "epoch": 0.10863434389237828,
+      "grad_norm": 1.2540456056594849,
+      "learning_rate": 1.989833113198103e-05,
+      "loss": 1.8104,
+      "step": 156200
+    },
+    {
+      "epoch": 0.10877344036343126,
+      "grad_norm": 0.7259592413902283,
+      "learning_rate": 1.9898071243785876e-05,
+      "loss": 1.7974,
+      "step": 156400
+    },
+    {
+      "epoch": 0.10891253683448424,
+      "grad_norm": 1.9316837787628174,
+      "learning_rate": 1.9897811026284718e-05,
+      "loss": 1.7933,
+      "step": 156600
+    },
+    {
+      "epoch": 0.10905163330553722,
+      "grad_norm": 0.8343173861503601,
+      "learning_rate": 1.989755047948997e-05,
+      "loss": 1.7924,
+      "step": 156800
+    },
+    {
+      "epoch": 0.10919072977659021,
+      "grad_norm": 0.7665181159973145,
+      "learning_rate": 1.989728960341408e-05,
+      "loss": 1.7123,
+      "step": 157000
+    },
+    {
+      "epoch": 0.10932982624764319,
+      "grad_norm": 1.001929759979248,
+      "learning_rate": 1.9897028398069503e-05,
+      "loss": 1.8386,
+      "step": 157200
+    },
+    {
+      "epoch": 0.10946892271869617,
+      "grad_norm": 1.0837798118591309,
+      "learning_rate": 1.98967668634687e-05,
+      "loss": 1.7943,
+      "step": 157400
+    },
+    {
+      "epoch": 0.10960801918974915,
+      "grad_norm": 0.9598066806793213,
+      "learning_rate": 1.9896504999624163e-05,
+      "loss": 1.7959,
+      "step": 157600
+    },
+    {
+      "epoch": 0.10974711566080213,
+      "grad_norm": 0.9790207743644714,
+      "learning_rate": 1.989624280654839e-05,
+      "loss": 1.7813,
+      "step": 157800
+    },
+    {
+      "epoch": 0.10988621213185511,
+      "grad_norm": 0.6536526679992676,
+      "learning_rate": 1.98959802842539e-05,
+      "loss": 1.7947,
+      "step": 158000
+    },
+    {
+      "epoch": 0.11002530860290809,
+      "grad_norm": 1.2658668756484985,
+      "learning_rate": 1.9895717432753222e-05,
+      "loss": 1.7699,
+      "step": 158200
+    },
+    {
+      "epoch": 0.11016440507396107,
+      "grad_norm": 2.004922866821289,
+      "learning_rate": 1.9895454252058903e-05,
+      "loss": 1.7686,
+      "step": 158400
+    },
+    {
+      "epoch": 0.11030350154501405,
+      "grad_norm": 1.3296949863433838,
+      "learning_rate": 1.9895190742183518e-05,
+      "loss": 1.7689,
+      "step": 158600
+    },
+    {
+      "epoch": 0.11044259801606704,
+      "grad_norm": 1.112878680229187,
+      "learning_rate": 1.9894926903139633e-05,
+      "loss": 1.8022,
+      "step": 158800
+    },
+    {
+      "epoch": 0.11058169448712002,
+      "grad_norm": 1.8036686182022095,
+      "learning_rate": 1.9894662734939847e-05,
+      "loss": 1.7359,
+      "step": 159000
+    },
+    {
+      "epoch": 0.110720790958173,
+      "grad_norm": 1.7240679264068604,
+      "learning_rate": 1.989439823759678e-05,
+      "loss": 1.794,
+      "step": 159200
+    },
+    {
+      "epoch": 0.11085988742922598,
+      "grad_norm": 0.8310656547546387,
+      "learning_rate": 1.9894133411123047e-05,
+      "loss": 1.7825,
+      "step": 159400
+    },
+    {
+      "epoch": 0.11099898390027896,
+      "grad_norm": 1.0748811960220337,
+      "learning_rate": 1.9893868255531295e-05,
+      "loss": 1.7347,
+      "step": 159600
+    },
+    {
+      "epoch": 0.11113808037133194,
+      "grad_norm": 0.7499359250068665,
+      "learning_rate": 1.989360277083419e-05,
+      "loss": 1.8134,
+      "step": 159800
+    },
+    {
+      "epoch": 0.11127717684238492,
+      "grad_norm": 0.793464183807373,
+      "learning_rate": 1.9893336957044394e-05,
+      "loss": 1.7876,
+      "step": 160000
+    },
+    {
+      "epoch": 0.1114162733134379,
+      "grad_norm": 1.079917073249817,
+      "learning_rate": 1.9893070814174604e-05,
+      "loss": 1.8132,
+      "step": 160200
+    },
+    {
+      "epoch": 0.11155536978449088,
+      "grad_norm": 1.041785717010498,
+      "learning_rate": 1.9892804342237518e-05,
+      "loss": 1.7536,
+      "step": 160400
+    },
+    {
+      "epoch": 0.11169446625554387,
+      "grad_norm": 0.9491516351699829,
+      "learning_rate": 1.9892537541245865e-05,
+      "loss": 1.7811,
+      "step": 160600
+    },
+    {
+      "epoch": 0.11183356272659685,
+      "grad_norm": 1.1019930839538574,
+      "learning_rate": 1.989227041121238e-05,
+      "loss": 1.7485,
+      "step": 160800
+    },
+    {
+      "epoch": 0.11197265919764983,
+      "grad_norm": 1.0586832761764526,
+      "learning_rate": 1.9892002952149815e-05,
+      "loss": 1.7691,
+      "step": 161000
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 8.806317924606628e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4a6ea4c36d16b3575db891122bc12b5aa423f1afb93579d54d6fe7412f2e22c
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:d151f8446fd7799d24b0e9a99d447d7d465b181c00e55fa7d90224c183012544
 size 6776