Training in progress, step 2136, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 147770496
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c4b48605b7908c6c3cb92661a446438803c1acd9c09849363fbd525f536e574
|
3 |
size 147770496
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 75455810
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fadc7ddaa7c8d0042358e2a5baa607920153a3514ff6ac69482a291d9a05ddda
|
3 |
size 75455810
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d443a2e4e38986fdfbce09bb15fb1f520183aec59744ce156f4604030fd8c589
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7461be29c782b65290c8ea4037877ee044396194b7fab404fd344a9cb3ecddbe
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -14931,6 +14931,34 @@
|
|
14931 |
"learning_rate": 7.550831895431799e-07,
|
14932 |
"loss": 1.3092,
|
14933 |
"step": 2132
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14934 |
}
|
14935 |
],
|
14936 |
"logging_steps": 1.0,
|
@@ -14950,7 +14978,7 @@
|
|
14950 |
"attributes": {}
|
14951 |
}
|
14952 |
},
|
14953 |
-
"total_flos": 2.
|
14954 |
"train_batch_size": 1,
|
14955 |
"trial_name": null,
|
14956 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8276450796062897,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2136,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
14931 |
"learning_rate": 7.550831895431799e-07,
|
14932 |
"loss": 1.3092,
|
14933 |
"step": 2132
|
14934 |
+
},
|
14935 |
+
{
|
14936 |
+
"epoch": 0.8264826567416742,
|
14937 |
+
"grad_norm": 0.1986715942621231,
|
14938 |
+
"learning_rate": 7.518030839775536e-07,
|
14939 |
+
"loss": 1.3829,
|
14940 |
+
"step": 2133
|
14941 |
+
},
|
14942 |
+
{
|
14943 |
+
"epoch": 0.8268701310298794,
|
14944 |
+
"grad_norm": 0.18789222836494446,
|
14945 |
+
"learning_rate": 7.485295391120823e-07,
|
14946 |
+
"loss": 1.313,
|
14947 |
+
"step": 2134
|
14948 |
+
},
|
14949 |
+
{
|
14950 |
+
"epoch": 0.8272576053180846,
|
14951 |
+
"grad_norm": 0.18721547722816467,
|
14952 |
+
"learning_rate": 7.452625600022629e-07,
|
14953 |
+
"loss": 1.3803,
|
14954 |
+
"step": 2135
|
14955 |
+
},
|
14956 |
+
{
|
14957 |
+
"epoch": 0.8276450796062897,
|
14958 |
+
"grad_norm": 0.20958904922008514,
|
14959 |
+
"learning_rate": 7.420021516934539e-07,
|
14960 |
+
"loss": 1.3736,
|
14961 |
+
"step": 2136
|
14962 |
}
|
14963 |
],
|
14964 |
"logging_steps": 1.0,
|
|
|
14978 |
"attributes": {}
|
14979 |
}
|
14980 |
},
|
14981 |
+
"total_flos": 2.1129038541824717e+18,
|
14982 |
"train_batch_size": 1,
|
14983 |
"trial_name": null,
|
14984 |
"trial_params": null
|