Rixhabh commited on
Commit
ac9333a
·
verified ·
1 Parent(s): 7d16825

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yueming-lai-eigent/huggingface/runs/x4ct4ix2)
31
 
32
 
33
  This model was trained with SFT.
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yueming-lai-eigent/huggingface/runs/1fj4mkk2)
31
 
32
 
33
  This model was trained with SFT.
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 4154970079232.0,
3
- "train_loss": 0.8046708256006241,
4
- "train_runtime": 206.0813,
5
  "train_samples": 1000,
6
- "train_samples_per_second": 3.052,
7
- "train_steps_per_second": 0.019
8
  }
 
1
  {
2
  "total_flos": 4154970079232.0,
3
+ "train_loss": 0.8020447641611099,
4
+ "train_runtime": 203.4558,
5
  "train_samples": 1000,
6
+ "train_samples_per_second": 3.092,
7
+ "train_steps_per_second": 0.02
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00aa7904552db9edf2f439973390b21887ce53b435100d6e51f79557e6868864
3
  size 4874815080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3266fc879a73ef07ddb194997ab8466d2d67a866a9f72c495eb105aa94aa454
3
  size 4874815080
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f14ecfbbf69996a9d69a4e4115af60efaeed9009a7a1e151989796babc1054a
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21070b1b511338865273e1cf33071e62d6b809a7073d2ac7587ee6bc8b7e5edf
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:503ef6a1da798c16ce847b92fb3552e0dc40b55177fb289b8b95ca7a615b754c
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3063c581ecf2ae83dfa0159f09b800e1463591ae2bcea3fc6c7a42e25f8f9448
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dab9574ecba20d3fb3df0c4deb3b05cb345f8d658a1729b49181a895df67c66
3
  size 1087149184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab8f7d3f9b87358bde8608a97442496420982639484f43bbf79e042bc0a3460c
3
  size 1087149184
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 4154970079232.0,
3
- "train_loss": 0.8046708256006241,
4
- "train_runtime": 206.0813,
5
  "train_samples": 1000,
6
- "train_samples_per_second": 3.052,
7
- "train_steps_per_second": 0.019
8
  }
 
1
  {
2
  "total_flos": 4154970079232.0,
3
+ "train_loss": 0.8020447641611099,
4
+ "train_runtime": 203.4558,
5
  "train_samples": 1000,
6
+ "train_samples_per_second": 3.092,
7
+ "train_steps_per_second": 0.02
8
  }
trainer_state.json CHANGED
@@ -11,7 +11,7 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.20253164556962025,
14
- "grad_norm": 6.907372507860453,
15
  "learning_rate": 2e-05,
16
  "loss": 0.9126,
17
  "mean_token_accuracy": 0.766769690439105,
@@ -20,28 +20,28 @@
20
  },
21
  {
22
  "epoch": 0.4050632911392405,
23
- "grad_norm": 4.793664107629484,
24
  "learning_rate": 1.5000000000000002e-05,
25
- "loss": 0.8216,
26
- "mean_token_accuracy": 0.7697268016636372,
27
  "num_tokens": 1048576.0,
28
  "step": 2
29
  },
30
  {
31
  "epoch": 0.6075949367088608,
32
- "grad_norm": 83.76859612614284,
33
  "learning_rate": 1e-05,
34
- "loss": 0.7596,
35
- "mean_token_accuracy": 0.7874771114438772,
36
  "num_tokens": 1572864.0,
37
  "step": 3
38
  },
39
  {
40
  "epoch": 0.810126582278481,
41
- "grad_norm": 3.547629428093402,
42
  "learning_rate": 5e-06,
43
- "loss": 0.7249,
44
- "mean_token_accuracy": 0.7918228395283222,
45
  "num_tokens": 2094002.0,
46
  "step": 4
47
  },
@@ -49,10 +49,10 @@
49
  "epoch": 0.810126582278481,
50
  "step": 4,
51
  "total_flos": 4154970079232.0,
52
- "train_loss": 0.8046708256006241,
53
- "train_runtime": 206.0813,
54
- "train_samples_per_second": 3.052,
55
- "train_steps_per_second": 0.019
56
  }
57
  ],
58
  "logging_steps": 1.0,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.20253164556962025,
14
+ "grad_norm": 6.9072078083746264,
15
  "learning_rate": 2e-05,
16
  "loss": 0.9126,
17
  "mean_token_accuracy": 0.766769690439105,
 
20
  },
21
  {
22
  "epoch": 0.4050632911392405,
23
+ "grad_norm": 4.753814194553616,
24
  "learning_rate": 1.5000000000000002e-05,
25
+ "loss": 0.8217,
26
+ "mean_token_accuracy": 0.7695875316858292,
27
  "num_tokens": 1048576.0,
28
  "step": 2
29
  },
30
  {
31
  "epoch": 0.6075949367088608,
32
+ "grad_norm": 78.58612697641571,
33
  "learning_rate": 1e-05,
34
+ "loss": 0.7497,
35
+ "mean_token_accuracy": 0.7892876267433167,
36
  "num_tokens": 1572864.0,
37
  "step": 3
38
  },
39
  {
40
  "epoch": 0.810126582278481,
41
+ "grad_norm": 3.129716830110077,
42
  "learning_rate": 5e-06,
43
+ "loss": 0.7242,
44
+ "mean_token_accuracy": 0.7917388044297695,
45
  "num_tokens": 2094002.0,
46
  "step": 4
47
  },
 
49
  "epoch": 0.810126582278481,
50
  "step": 4,
51
  "total_flos": 4154970079232.0,
52
+ "train_loss": 0.8020447641611099,
53
+ "train_runtime": 203.4558,
54
+ "train_samples_per_second": 3.092,
55
+ "train_steps_per_second": 0.02
56
  }
57
  ],
58
  "logging_steps": 1.0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50ac9aa3f7cd957f362193c39aa821494f7268439417fd9cf7c03f5cf105e096
3
  size 7224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db49c9e84efc3b9db18731cdc5067d6e2d617a62caf24c4a5106a7336580334
3
  size 7224