Model save

Files changed (9) hide show

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yueming-lai-eigent/huggingface/runs/x4ct4ix2)
 This model was trained with SFT.

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yueming-lai-eigent/huggingface/runs/1fj4mkk2)
 This model was trained with SFT.

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 4154970079232.0,
-    "train_loss": 0.8046708256006241,
-    "train_runtime": 206.0813,
     "train_samples": 1000,
-    "train_samples_per_second": 3.052,
-    "train_steps_per_second": 0.019
 }

 {
     "total_flos": 4154970079232.0,
+    "train_loss": 0.8020447641611099,
+    "train_runtime": 203.4558,
     "train_samples": 1000,
+    "train_samples_per_second": 3.092,
+    "train_steps_per_second": 0.02
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00aa7904552db9edf2f439973390b21887ce53b435100d6e51f79557e6868864
 size 4874815080

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3266fc879a73ef07ddb194997ab8466d2d67a866a9f72c495eb105aa94aa454
 size 4874815080

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f14ecfbbf69996a9d69a4e4115af60efaeed9009a7a1e151989796babc1054a
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:21070b1b511338865273e1cf33071e62d6b809a7073d2ac7587ee6bc8b7e5edf
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:503ef6a1da798c16ce847b92fb3552e0dc40b55177fb289b8b95ca7a615b754c
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:3063c581ecf2ae83dfa0159f09b800e1463591ae2bcea3fc6c7a42e25f8f9448
 size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2dab9574ecba20d3fb3df0c4deb3b05cb345f8d658a1729b49181a895df67c66
 size 1087149184

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab8f7d3f9b87358bde8608a97442496420982639484f43bbf79e042bc0a3460c
 size 1087149184

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 4154970079232.0,
-    "train_loss": 0.8046708256006241,
-    "train_runtime": 206.0813,
     "train_samples": 1000,
-    "train_samples_per_second": 3.052,
-    "train_steps_per_second": 0.019
 }

 {
     "total_flos": 4154970079232.0,
+    "train_loss": 0.8020447641611099,
+    "train_runtime": 203.4558,
     "train_samples": 1000,
+    "train_samples_per_second": 3.092,
+    "train_steps_per_second": 0.02
 }

trainer_state.json CHANGED Viewed

@@ -11,7 +11,7 @@
   "log_history": [
     {
       "epoch": 0.20253164556962025,
-      "grad_norm": 6.907372507860453,
       "learning_rate": 2e-05,
       "loss": 0.9126,
       "mean_token_accuracy": 0.766769690439105,
@@ -20,28 +20,28 @@
     },
     {
       "epoch": 0.4050632911392405,
-      "grad_norm": 4.793664107629484,
       "learning_rate": 1.5000000000000002e-05,
-      "loss": 0.8216,
-      "mean_token_accuracy": 0.7697268016636372,
       "num_tokens": 1048576.0,
       "step": 2
     },
     {
       "epoch": 0.6075949367088608,
-      "grad_norm": 83.76859612614284,
       "learning_rate": 1e-05,
-      "loss": 0.7596,
-      "mean_token_accuracy": 0.7874771114438772,
       "num_tokens": 1572864.0,
       "step": 3
     },
     {
       "epoch": 0.810126582278481,
-      "grad_norm": 3.547629428093402,
       "learning_rate": 5e-06,
-      "loss": 0.7249,
-      "mean_token_accuracy": 0.7918228395283222,
       "num_tokens": 2094002.0,
       "step": 4
     },
@@ -49,10 +49,10 @@
       "epoch": 0.810126582278481,
       "step": 4,
       "total_flos": 4154970079232.0,
-      "train_loss": 0.8046708256006241,
-      "train_runtime": 206.0813,
-      "train_samples_per_second": 3.052,
-      "train_steps_per_second": 0.019
     }
   ],
   "logging_steps": 1.0,

   "log_history": [
     {
       "epoch": 0.20253164556962025,
+      "grad_norm": 6.9072078083746264,
       "learning_rate": 2e-05,
       "loss": 0.9126,
       "mean_token_accuracy": 0.766769690439105,
     },
     {
       "epoch": 0.4050632911392405,
+      "grad_norm": 4.753814194553616,
       "learning_rate": 1.5000000000000002e-05,
+      "loss": 0.8217,
+      "mean_token_accuracy": 0.7695875316858292,
       "num_tokens": 1048576.0,
       "step": 2
     },
     {
       "epoch": 0.6075949367088608,
+      "grad_norm": 78.58612697641571,
       "learning_rate": 1e-05,
+      "loss": 0.7497,
+      "mean_token_accuracy": 0.7892876267433167,
       "num_tokens": 1572864.0,
       "step": 3
     },
     {
       "epoch": 0.810126582278481,
+      "grad_norm": 3.129716830110077,
       "learning_rate": 5e-06,
+      "loss": 0.7242,
+      "mean_token_accuracy": 0.7917388044297695,
       "num_tokens": 2094002.0,
       "step": 4
     },
       "epoch": 0.810126582278481,
       "step": 4,
       "total_flos": 4154970079232.0,
+      "train_loss": 0.8020447641611099,
+      "train_runtime": 203.4558,
+      "train_samples_per_second": 3.092,
+      "train_steps_per_second": 0.02
     }
   ],
   "logging_steps": 1.0,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50ac9aa3f7cd957f362193c39aa821494f7268439417fd9cf7c03f5cf105e096
 size 7224

 version https://git-lfs.github.com/spec/v1
+oid sha256:2db49c9e84efc3b9db18731cdc5067d6e2d617a62caf24c4a5106a7336580334
 size 7224