Training in progress, step 2132, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 147770496
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26522d5ea558b7b30e432a3e8326c772da331af1345d7d2385595a8e092514f6
|
3 |
size 147770496
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 75455810
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f606e7bf0d69e20856ff02233f6c65f5d5d774b35c8e19ea35f2f4123eb47bc7
|
3 |
size 75455810
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e73a6957b047016025e7645447d5032c71a249e8c0df387b0746129826f0864d
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17cac372009eea4e7cef6a84a78c5a8084c9295f985d63cf718cd0dcfd001889
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -14903,6 +14903,34 @@
|
|
14903 |
"learning_rate": 7.682691173428503e-07,
|
14904 |
"loss": 1.3673,
|
14905 |
"step": 2128
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14906 |
}
|
14907 |
],
|
14908 |
"logging_steps": 1.0,
|
@@ -14922,7 +14950,7 @@
|
|
14922 |
"attributes": {}
|
14923 |
}
|
14924 |
},
|
14925 |
-
"total_flos": 2.
|
14926 |
"train_batch_size": 1,
|
14927 |
"trial_name": null,
|
14928 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.826095182453469,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2132,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
14903 |
"learning_rate": 7.682691173428503e-07,
|
14904 |
"loss": 1.3673,
|
14905 |
"step": 2128
|
14906 |
+
},
|
14907 |
+
{
|
14908 |
+
"epoch": 0.8249327595888535,
|
14909 |
+
"grad_norm": 0.21315915882587433,
|
14910 |
+
"learning_rate": 7.649628197340931e-07,
|
14911 |
+
"loss": 1.3686,
|
14912 |
+
"step": 2129
|
14913 |
+
},
|
14914 |
+
{
|
14915 |
+
"epoch": 0.8253202338770587,
|
14916 |
+
"grad_norm": 0.1892424076795578,
|
14917 |
+
"learning_rate": 7.616630625022609e-07,
|
14918 |
+
"loss": 1.3537,
|
14919 |
+
"step": 2130
|
14920 |
+
},
|
14921 |
+
{
|
14922 |
+
"epoch": 0.8257077081652638,
|
14923 |
+
"grad_norm": 0.17962083220481873,
|
14924 |
+
"learning_rate": 7.58369850743334e-07,
|
14925 |
+
"loss": 1.3944,
|
14926 |
+
"step": 2131
|
14927 |
+
},
|
14928 |
+
{
|
14929 |
+
"epoch": 0.826095182453469,
|
14930 |
+
"grad_norm": 0.18284156918525696,
|
14931 |
+
"learning_rate": 7.550831895431799e-07,
|
14932 |
+
"loss": 1.3092,
|
14933 |
+
"step": 2132
|
14934 |
}
|
14935 |
],
|
14936 |
"logging_steps": 1.0,
|
|
|
14950 |
"attributes": {}
|
14951 |
}
|
14952 |
},
|
14953 |
+
"total_flos": 2.1091234518819717e+18,
|
14954 |
"train_batch_size": 1,
|
14955 |
"trial_name": null,
|
14956 |
"trial_params": null
|