amos1088 commited on
Commit
a2c02cc
·
verified ·
1 Parent(s): 79786f5

Training in progress, step 4800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b6127712206b95791c918214f16b785d763b67224b1b0e443a55295ce29d047
3
  size 35668592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ea8d63209da0362125cd6447d2de658e74f5fea775b2f305ca382e4f3a5042
3
  size 35668592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50e558348acae36b84be93a81ed01c8ccfa115577debf3c6ace9465c91fc8a89
3
  size 18257163
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb87c5ff52c997cb909c5bed0b628103fd402ac227d83df5d436e438df6570a
3
  size 18257163
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a34fec6a0d847723344253412c25b018523e427d3c1952286ab62a2afd2b427
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b429070a564985551cfca2e541b4c4fca20d998c67cc7cb6e2b59f638df425a3
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f12e6df1a2c888055dea6dcde09c6337c5435ca6c858dee81b0362f43b35c4b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80a0690e68ff79fcaeb99618671437e762b40ceead1e7b8bcf2edd5ec6620941
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.1198546494311075,
6
  "eval_steps": 500,
7
- "global_step": 4700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -7058,6 +7058,156 @@
7058
  "rewards/margins": 16.355287551879883,
7059
  "rewards/rejected": -14.908761978149414,
7060
  "step": 4700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7061
  }
7062
  ],
7063
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.1436826115446477,
6
  "eval_steps": 500,
7
+ "global_step": 4800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
7058
  "rewards/margins": 16.355287551879883,
7059
  "rewards/rejected": -14.908761978149414,
7060
  "step": 4700
7061
+ },
7062
+ {
7063
+ "epoch": 1.1222374456424615,
7064
+ "grad_norm": 2.21546338252665e-06,
7065
+ "learning_rate": 3.9414545881179945e-05,
7066
+ "logits/chosen": 9.5210599899292,
7067
+ "logits/rejected": 8.838404655456543,
7068
+ "logps/chosen": -3.8852012157440186,
7069
+ "logps/rejected": -168.62191772460938,
7070
+ "loss": 0.0,
7071
+ "rewards/accuracies": 1.0,
7072
+ "rewards/chosen": 1.363797664642334,
7073
+ "rewards/margins": 16.466297149658203,
7074
+ "rewards/rejected": -15.102502822875977,
7075
+ "step": 4710
7076
+ },
7077
+ {
7078
+ "epoch": 1.1246202418538154,
7079
+ "grad_norm": 7.269867637660354e-06,
7080
+ "learning_rate": 3.935785819277189e-05,
7081
+ "logits/chosen": 8.97862434387207,
7082
+ "logits/rejected": 8.629661560058594,
7083
+ "logps/chosen": -3.853353500366211,
7084
+ "logps/rejected": -170.79342651367188,
7085
+ "loss": 0.0,
7086
+ "rewards/accuracies": 1.0,
7087
+ "rewards/chosen": 1.3444960117340088,
7088
+ "rewards/margins": 16.734460830688477,
7089
+ "rewards/rejected": -15.389966011047363,
7090
+ "step": 4720
7091
+ },
7092
+ {
7093
+ "epoch": 1.1270030380651694,
7094
+ "grad_norm": 1.5713922039140016e-05,
7095
+ "learning_rate": 3.930106013395591e-05,
7096
+ "logits/chosen": 9.156143188476562,
7097
+ "logits/rejected": 8.84025764465332,
7098
+ "logps/chosen": -3.664874315261841,
7099
+ "logps/rejected": -169.4293670654297,
7100
+ "loss": 0.0,
7101
+ "rewards/accuracies": 1.0,
7102
+ "rewards/chosen": 1.40371572971344,
7103
+ "rewards/margins": 16.51462173461914,
7104
+ "rewards/rejected": -15.110905647277832,
7105
+ "step": 4730
7106
+ },
7107
+ {
7108
+ "epoch": 1.1293858342765235,
7109
+ "grad_norm": 1.7429217677999986e-06,
7110
+ "learning_rate": 3.924415214134479e-05,
7111
+ "logits/chosen": 9.136969566345215,
7112
+ "logits/rejected": 8.276262283325195,
7113
+ "logps/chosen": -4.189261436462402,
7114
+ "logps/rejected": -169.33648681640625,
7115
+ "loss": 0.0,
7116
+ "rewards/accuracies": 1.0,
7117
+ "rewards/chosen": 1.3977916240692139,
7118
+ "rewards/margins": 16.54281997680664,
7119
+ "rewards/rejected": -15.145029067993164,
7120
+ "step": 4740
7121
+ },
7122
+ {
7123
+ "epoch": 1.1317686304878776,
7124
+ "grad_norm": 1.802428232622333e-05,
7125
+ "learning_rate": 3.9187134652396454e-05,
7126
+ "logits/chosen": 8.582501411437988,
7127
+ "logits/rejected": 8.448382377624512,
7128
+ "logps/chosen": -4.113119602203369,
7129
+ "logps/rejected": -168.79092407226562,
7130
+ "loss": 0.0,
7131
+ "rewards/accuracies": 1.0,
7132
+ "rewards/chosen": 1.4219516515731812,
7133
+ "rewards/margins": 16.33131980895996,
7134
+ "rewards/rejected": -14.909370422363281,
7135
+ "step": 4750
7136
+ },
7137
+ {
7138
+ "epoch": 1.1341514266992316,
7139
+ "grad_norm": 5.4121765060699545e-06,
7140
+ "learning_rate": 3.913000810541049e-05,
7141
+ "logits/chosen": 8.93317699432373,
7142
+ "logits/rejected": 8.46564769744873,
7143
+ "logps/chosen": -3.915933132171631,
7144
+ "logps/rejected": -168.9101104736328,
7145
+ "loss": 0.0,
7146
+ "rewards/accuracies": 1.0,
7147
+ "rewards/chosen": 1.3572721481323242,
7148
+ "rewards/margins": 16.39585304260254,
7149
+ "rewards/rejected": -15.038583755493164,
7150
+ "step": 4760
7151
+ },
7152
+ {
7153
+ "epoch": 1.1365342229105855,
7154
+ "grad_norm": 3.48685034623486e-06,
7155
+ "learning_rate": 3.907277293952483e-05,
7156
+ "logits/chosen": 8.73112964630127,
7157
+ "logits/rejected": 8.221918106079102,
7158
+ "logps/chosen": -3.9176087379455566,
7159
+ "logps/rejected": -170.79434204101562,
7160
+ "loss": 0.0,
7161
+ "rewards/accuracies": 1.0,
7162
+ "rewards/chosen": 1.3888115882873535,
7163
+ "rewards/margins": 16.5882511138916,
7164
+ "rewards/rejected": -15.199438095092773,
7165
+ "step": 4770
7166
+ },
7167
+ {
7168
+ "epoch": 1.1389170191219395,
7169
+ "grad_norm": 2.7596881864155876e-06,
7170
+ "learning_rate": 3.90154295947124e-05,
7171
+ "logits/chosen": 9.215482711791992,
7172
+ "logits/rejected": 8.320622444152832,
7173
+ "logps/chosen": -4.540711879730225,
7174
+ "logps/rejected": -170.30410766601562,
7175
+ "loss": 0.0,
7176
+ "rewards/accuracies": 1.0,
7177
+ "rewards/chosen": 1.3221644163131714,
7178
+ "rewards/margins": 16.54549789428711,
7179
+ "rewards/rejected": -15.223333358764648,
7180
+ "step": 4780
7181
+ },
7182
+ {
7183
+ "epoch": 1.1412998153332936,
7184
+ "grad_norm": 5.95591291130404e-06,
7185
+ "learning_rate": 3.895797851177767e-05,
7186
+ "logits/chosen": 9.725410461425781,
7187
+ "logits/rejected": 8.952868461608887,
7188
+ "logps/chosen": -4.404567718505859,
7189
+ "logps/rejected": -168.9754180908203,
7190
+ "loss": 0.0,
7191
+ "rewards/accuracies": 1.0,
7192
+ "rewards/chosen": 1.4016621112823486,
7193
+ "rewards/margins": 16.36920738220215,
7194
+ "rewards/rejected": -14.967544555664062,
7195
+ "step": 4790
7196
+ },
7197
+ {
7198
+ "epoch": 1.1436826115446477,
7199
+ "grad_norm": 1.5033992895041592e-05,
7200
+ "learning_rate": 3.890042013235334e-05,
7201
+ "logits/chosen": 9.249860763549805,
7202
+ "logits/rejected": 8.731368064880371,
7203
+ "logps/chosen": -3.9462552070617676,
7204
+ "logps/rejected": -168.36050415039062,
7205
+ "loss": 0.0,
7206
+ "rewards/accuracies": 1.0,
7207
+ "rewards/chosen": 1.422627329826355,
7208
+ "rewards/margins": 16.3912410736084,
7209
+ "rewards/rejected": -14.968612670898438,
7210
+ "step": 4800
7211
  }
7212
  ],
7213
  "logging_steps": 10,