Training in progress, step 4800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 35668592
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5ea8d63209da0362125cd6447d2de658e74f5fea775b2f305ca382e4f3a5042
|
3 |
size 35668592
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18257163
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfb87c5ff52c997cb909c5bed0b628103fd402ac227d83df5d436e438df6570a
|
3 |
size 18257163
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1383
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b429070a564985551cfca2e541b4c4fca20d998c67cc7cb6e2b59f638df425a3
|
3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80a0690e68ff79fcaeb99618671437e762b40ceead1e7b8bcf2edd5ec6620941
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 1.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7058,6 +7058,156 @@
|
|
7058 |
"rewards/margins": 16.355287551879883,
|
7059 |
"rewards/rejected": -14.908761978149414,
|
7060 |
"step": 4700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7061 |
}
|
7062 |
],
|
7063 |
"logging_steps": 10,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 1.1436826115446477,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 4800,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7058 |
"rewards/margins": 16.355287551879883,
|
7059 |
"rewards/rejected": -14.908761978149414,
|
7060 |
"step": 4700
|
7061 |
+
},
|
7062 |
+
{
|
7063 |
+
"epoch": 1.1222374456424615,
|
7064 |
+
"grad_norm": 2.21546338252665e-06,
|
7065 |
+
"learning_rate": 3.9414545881179945e-05,
|
7066 |
+
"logits/chosen": 9.5210599899292,
|
7067 |
+
"logits/rejected": 8.838404655456543,
|
7068 |
+
"logps/chosen": -3.8852012157440186,
|
7069 |
+
"logps/rejected": -168.62191772460938,
|
7070 |
+
"loss": 0.0,
|
7071 |
+
"rewards/accuracies": 1.0,
|
7072 |
+
"rewards/chosen": 1.363797664642334,
|
7073 |
+
"rewards/margins": 16.466297149658203,
|
7074 |
+
"rewards/rejected": -15.102502822875977,
|
7075 |
+
"step": 4710
|
7076 |
+
},
|
7077 |
+
{
|
7078 |
+
"epoch": 1.1246202418538154,
|
7079 |
+
"grad_norm": 7.269867637660354e-06,
|
7080 |
+
"learning_rate": 3.935785819277189e-05,
|
7081 |
+
"logits/chosen": 8.97862434387207,
|
7082 |
+
"logits/rejected": 8.629661560058594,
|
7083 |
+
"logps/chosen": -3.853353500366211,
|
7084 |
+
"logps/rejected": -170.79342651367188,
|
7085 |
+
"loss": 0.0,
|
7086 |
+
"rewards/accuracies": 1.0,
|
7087 |
+
"rewards/chosen": 1.3444960117340088,
|
7088 |
+
"rewards/margins": 16.734460830688477,
|
7089 |
+
"rewards/rejected": -15.389966011047363,
|
7090 |
+
"step": 4720
|
7091 |
+
},
|
7092 |
+
{
|
7093 |
+
"epoch": 1.1270030380651694,
|
7094 |
+
"grad_norm": 1.5713922039140016e-05,
|
7095 |
+
"learning_rate": 3.930106013395591e-05,
|
7096 |
+
"logits/chosen": 9.156143188476562,
|
7097 |
+
"logits/rejected": 8.84025764465332,
|
7098 |
+
"logps/chosen": -3.664874315261841,
|
7099 |
+
"logps/rejected": -169.4293670654297,
|
7100 |
+
"loss": 0.0,
|
7101 |
+
"rewards/accuracies": 1.0,
|
7102 |
+
"rewards/chosen": 1.40371572971344,
|
7103 |
+
"rewards/margins": 16.51462173461914,
|
7104 |
+
"rewards/rejected": -15.110905647277832,
|
7105 |
+
"step": 4730
|
7106 |
+
},
|
7107 |
+
{
|
7108 |
+
"epoch": 1.1293858342765235,
|
7109 |
+
"grad_norm": 1.7429217677999986e-06,
|
7110 |
+
"learning_rate": 3.924415214134479e-05,
|
7111 |
+
"logits/chosen": 9.136969566345215,
|
7112 |
+
"logits/rejected": 8.276262283325195,
|
7113 |
+
"logps/chosen": -4.189261436462402,
|
7114 |
+
"logps/rejected": -169.33648681640625,
|
7115 |
+
"loss": 0.0,
|
7116 |
+
"rewards/accuracies": 1.0,
|
7117 |
+
"rewards/chosen": 1.3977916240692139,
|
7118 |
+
"rewards/margins": 16.54281997680664,
|
7119 |
+
"rewards/rejected": -15.145029067993164,
|
7120 |
+
"step": 4740
|
7121 |
+
},
|
7122 |
+
{
|
7123 |
+
"epoch": 1.1317686304878776,
|
7124 |
+
"grad_norm": 1.802428232622333e-05,
|
7125 |
+
"learning_rate": 3.9187134652396454e-05,
|
7126 |
+
"logits/chosen": 8.582501411437988,
|
7127 |
+
"logits/rejected": 8.448382377624512,
|
7128 |
+
"logps/chosen": -4.113119602203369,
|
7129 |
+
"logps/rejected": -168.79092407226562,
|
7130 |
+
"loss": 0.0,
|
7131 |
+
"rewards/accuracies": 1.0,
|
7132 |
+
"rewards/chosen": 1.4219516515731812,
|
7133 |
+
"rewards/margins": 16.33131980895996,
|
7134 |
+
"rewards/rejected": -14.909370422363281,
|
7135 |
+
"step": 4750
|
7136 |
+
},
|
7137 |
+
{
|
7138 |
+
"epoch": 1.1341514266992316,
|
7139 |
+
"grad_norm": 5.4121765060699545e-06,
|
7140 |
+
"learning_rate": 3.913000810541049e-05,
|
7141 |
+
"logits/chosen": 8.93317699432373,
|
7142 |
+
"logits/rejected": 8.46564769744873,
|
7143 |
+
"logps/chosen": -3.915933132171631,
|
7144 |
+
"logps/rejected": -168.9101104736328,
|
7145 |
+
"loss": 0.0,
|
7146 |
+
"rewards/accuracies": 1.0,
|
7147 |
+
"rewards/chosen": 1.3572721481323242,
|
7148 |
+
"rewards/margins": 16.39585304260254,
|
7149 |
+
"rewards/rejected": -15.038583755493164,
|
7150 |
+
"step": 4760
|
7151 |
+
},
|
7152 |
+
{
|
7153 |
+
"epoch": 1.1365342229105855,
|
7154 |
+
"grad_norm": 3.48685034623486e-06,
|
7155 |
+
"learning_rate": 3.907277293952483e-05,
|
7156 |
+
"logits/chosen": 8.73112964630127,
|
7157 |
+
"logits/rejected": 8.221918106079102,
|
7158 |
+
"logps/chosen": -3.9176087379455566,
|
7159 |
+
"logps/rejected": -170.79434204101562,
|
7160 |
+
"loss": 0.0,
|
7161 |
+
"rewards/accuracies": 1.0,
|
7162 |
+
"rewards/chosen": 1.3888115882873535,
|
7163 |
+
"rewards/margins": 16.5882511138916,
|
7164 |
+
"rewards/rejected": -15.199438095092773,
|
7165 |
+
"step": 4770
|
7166 |
+
},
|
7167 |
+
{
|
7168 |
+
"epoch": 1.1389170191219395,
|
7169 |
+
"grad_norm": 2.7596881864155876e-06,
|
7170 |
+
"learning_rate": 3.90154295947124e-05,
|
7171 |
+
"logits/chosen": 9.215482711791992,
|
7172 |
+
"logits/rejected": 8.320622444152832,
|
7173 |
+
"logps/chosen": -4.540711879730225,
|
7174 |
+
"logps/rejected": -170.30410766601562,
|
7175 |
+
"loss": 0.0,
|
7176 |
+
"rewards/accuracies": 1.0,
|
7177 |
+
"rewards/chosen": 1.3221644163131714,
|
7178 |
+
"rewards/margins": 16.54549789428711,
|
7179 |
+
"rewards/rejected": -15.223333358764648,
|
7180 |
+
"step": 4780
|
7181 |
+
},
|
7182 |
+
{
|
7183 |
+
"epoch": 1.1412998153332936,
|
7184 |
+
"grad_norm": 5.95591291130404e-06,
|
7185 |
+
"learning_rate": 3.895797851177767e-05,
|
7186 |
+
"logits/chosen": 9.725410461425781,
|
7187 |
+
"logits/rejected": 8.952868461608887,
|
7188 |
+
"logps/chosen": -4.404567718505859,
|
7189 |
+
"logps/rejected": -168.9754180908203,
|
7190 |
+
"loss": 0.0,
|
7191 |
+
"rewards/accuracies": 1.0,
|
7192 |
+
"rewards/chosen": 1.4016621112823486,
|
7193 |
+
"rewards/margins": 16.36920738220215,
|
7194 |
+
"rewards/rejected": -14.967544555664062,
|
7195 |
+
"step": 4790
|
7196 |
+
},
|
7197 |
+
{
|
7198 |
+
"epoch": 1.1436826115446477,
|
7199 |
+
"grad_norm": 1.5033992895041592e-05,
|
7200 |
+
"learning_rate": 3.890042013235334e-05,
|
7201 |
+
"logits/chosen": 9.249860763549805,
|
7202 |
+
"logits/rejected": 8.731368064880371,
|
7203 |
+
"logps/chosen": -3.9462552070617676,
|
7204 |
+
"logps/rejected": -168.36050415039062,
|
7205 |
+
"loss": 0.0,
|
7206 |
+
"rewards/accuracies": 1.0,
|
7207 |
+
"rewards/chosen": 1.422627329826355,
|
7208 |
+
"rewards/margins": 16.3912410736084,
|
7209 |
+
"rewards/rejected": -14.968612670898438,
|
7210 |
+
"step": 4800
|
7211 |
}
|
7212 |
],
|
7213 |
"logging_steps": 10,
|