Model save
Browse files- README.md +1 -1
- all_results.json +4 -4
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- train_results.json +4 -4
- trainer_state.json +14 -14
- training_args.bin +1 -1
README.md
CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
|
|
27 |
|
28 |
## Training procedure
|
29 |
|
30 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yueming-lai-eigent/huggingface/runs/
|
31 |
|
32 |
|
33 |
This model was trained with SFT.
|
|
|
27 |
|
28 |
## Training procedure
|
29 |
|
30 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yueming-lai-eigent/huggingface/runs/1fj4mkk2)
|
31 |
|
32 |
|
33 |
This model was trained with SFT.
|
all_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"total_flos": 4154970079232.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 1000,
|
6 |
-
"train_samples_per_second": 3.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"total_flos": 4154970079232.0,
|
3 |
+
"train_loss": 0.8020447641611099,
|
4 |
+
"train_runtime": 203.4558,
|
5 |
"train_samples": 1000,
|
6 |
+
"train_samples_per_second": 3.092,
|
7 |
+
"train_steps_per_second": 0.02
|
8 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4874815080
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3266fc879a73ef07ddb194997ab8466d2d67a866a9f72c495eb105aa94aa454
|
3 |
size 4874815080
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21070b1b511338865273e1cf33071e62d6b809a7073d2ac7587ee6bc8b7e5edf
|
3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3063c581ecf2ae83dfa0159f09b800e1463591ae2bcea3fc6c7a42e25f8f9448
|
3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1087149184
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab8f7d3f9b87358bde8608a97442496420982639484f43bbf79e042bc0a3460c
|
3 |
size 1087149184
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"total_flos": 4154970079232.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 1000,
|
6 |
-
"train_samples_per_second": 3.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"total_flos": 4154970079232.0,
|
3 |
+
"train_loss": 0.8020447641611099,
|
4 |
+
"train_runtime": 203.4558,
|
5 |
"train_samples": 1000,
|
6 |
+
"train_samples_per_second": 3.092,
|
7 |
+
"train_steps_per_second": 0.02
|
8 |
}
|
trainer_state.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.20253164556962025,
|
14 |
-
"grad_norm": 6.
|
15 |
"learning_rate": 2e-05,
|
16 |
"loss": 0.9126,
|
17 |
"mean_token_accuracy": 0.766769690439105,
|
@@ -20,28 +20,28 @@
|
|
20 |
},
|
21 |
{
|
22 |
"epoch": 0.4050632911392405,
|
23 |
-
"grad_norm": 4.
|
24 |
"learning_rate": 1.5000000000000002e-05,
|
25 |
-
"loss": 0.
|
26 |
-
"mean_token_accuracy": 0.
|
27 |
"num_tokens": 1048576.0,
|
28 |
"step": 2
|
29 |
},
|
30 |
{
|
31 |
"epoch": 0.6075949367088608,
|
32 |
-
"grad_norm":
|
33 |
"learning_rate": 1e-05,
|
34 |
-
"loss": 0.
|
35 |
-
"mean_token_accuracy": 0.
|
36 |
"num_tokens": 1572864.0,
|
37 |
"step": 3
|
38 |
},
|
39 |
{
|
40 |
"epoch": 0.810126582278481,
|
41 |
-
"grad_norm": 3.
|
42 |
"learning_rate": 5e-06,
|
43 |
-
"loss": 0.
|
44 |
-
"mean_token_accuracy": 0.
|
45 |
"num_tokens": 2094002.0,
|
46 |
"step": 4
|
47 |
},
|
@@ -49,10 +49,10 @@
|
|
49 |
"epoch": 0.810126582278481,
|
50 |
"step": 4,
|
51 |
"total_flos": 4154970079232.0,
|
52 |
-
"train_loss": 0.
|
53 |
-
"train_runtime":
|
54 |
-
"train_samples_per_second": 3.
|
55 |
-
"train_steps_per_second": 0.
|
56 |
}
|
57 |
],
|
58 |
"logging_steps": 1.0,
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.20253164556962025,
|
14 |
+
"grad_norm": 6.9072078083746264,
|
15 |
"learning_rate": 2e-05,
|
16 |
"loss": 0.9126,
|
17 |
"mean_token_accuracy": 0.766769690439105,
|
|
|
20 |
},
|
21 |
{
|
22 |
"epoch": 0.4050632911392405,
|
23 |
+
"grad_norm": 4.753814194553616,
|
24 |
"learning_rate": 1.5000000000000002e-05,
|
25 |
+
"loss": 0.8217,
|
26 |
+
"mean_token_accuracy": 0.7695875316858292,
|
27 |
"num_tokens": 1048576.0,
|
28 |
"step": 2
|
29 |
},
|
30 |
{
|
31 |
"epoch": 0.6075949367088608,
|
32 |
+
"grad_norm": 78.58612697641571,
|
33 |
"learning_rate": 1e-05,
|
34 |
+
"loss": 0.7497,
|
35 |
+
"mean_token_accuracy": 0.7892876267433167,
|
36 |
"num_tokens": 1572864.0,
|
37 |
"step": 3
|
38 |
},
|
39 |
{
|
40 |
"epoch": 0.810126582278481,
|
41 |
+
"grad_norm": 3.129716830110077,
|
42 |
"learning_rate": 5e-06,
|
43 |
+
"loss": 0.7242,
|
44 |
+
"mean_token_accuracy": 0.7917388044297695,
|
45 |
"num_tokens": 2094002.0,
|
46 |
"step": 4
|
47 |
},
|
|
|
49 |
"epoch": 0.810126582278481,
|
50 |
"step": 4,
|
51 |
"total_flos": 4154970079232.0,
|
52 |
+
"train_loss": 0.8020447641611099,
|
53 |
+
"train_runtime": 203.4558,
|
54 |
+
"train_samples_per_second": 3.092,
|
55 |
+
"train_steps_per_second": 0.02
|
56 |
}
|
57 |
],
|
58 |
"logging_steps": 1.0,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7224
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2db49c9e84efc3b9db18731cdc5067d6e2d617a62caf24c4a5106a7336580334
|
3 |
size 7224
|