Training in progress, step 30, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f77efe7c696f1f83c1ba8383fe1b4abe439a331ad0a46072e39ea36c9855caf2
|
3 |
size 167832240
|
last-checkpoint/optimizer.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 335945362
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bed1df47aa29b52b0bf53c0fbabaacf9695c33b46c93b1877635d3dceffa91c
|
3 |
size 335945362
|
last-checkpoint/pytorch_model_fsdp.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167939550
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:161deca60bc23044836845b3011cdcd2cee90523878c3a442ac389bfa9a782ed
|
3 |
size 167939550
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2def2cd24154d8cecbaa07c36ae27e5ebb9b7273a78abfea27aa67c480e4ae2b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -307,6 +307,156 @@
|
|
307 |
"rewards/margins": 1.7580511569976807,
|
308 |
"rewards/rejected": -3.8936402797698975,
|
309 |
"step": 20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
}
|
311 |
],
|
312 |
"logging_steps": 1,
|
@@ -321,7 +471,7 @@
|
|
321 |
"should_evaluate": false,
|
322 |
"should_log": false,
|
323 |
"should_save": true,
|
324 |
-
"should_training_stop":
|
325 |
},
|
326 |
"attributes": {}
|
327 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.013530273988048258,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 30,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
307 |
"rewards/margins": 1.7580511569976807,
|
308 |
"rewards/rejected": -3.8936402797698975,
|
309 |
"step": 20
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 0.009471191791633781,
|
313 |
+
"grad_norm": 29.850914001464844,
|
314 |
+
"learning_rate": 5.7422070843492734e-05,
|
315 |
+
"logits/chosen": -2.274322986602783,
|
316 |
+
"logits/rejected": -2.284355401992798,
|
317 |
+
"logps/chosen": -80.8134994506836,
|
318 |
+
"logps/rejected": -135.7605743408203,
|
319 |
+
"loss": 0.2564,
|
320 |
+
"rewards/accuracies": 1.0,
|
321 |
+
"rewards/chosen": -2.499776840209961,
|
322 |
+
"rewards/margins": 2.5717291831970215,
|
323 |
+
"rewards/rejected": -5.071506023406982,
|
324 |
+
"step": 21
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.009922200924568723,
|
328 |
+
"grad_norm": 37.985252380371094,
|
329 |
+
"learning_rate": 4.6417320502100316e-05,
|
330 |
+
"logits/chosen": -2.089611530303955,
|
331 |
+
"logits/rejected": -2.0775773525238037,
|
332 |
+
"logps/chosen": -100.17402648925781,
|
333 |
+
"logps/rejected": -148.91050720214844,
|
334 |
+
"loss": 0.1301,
|
335 |
+
"rewards/accuracies": 1.0,
|
336 |
+
"rewards/chosen": -4.1707353591918945,
|
337 |
+
"rewards/margins": 2.9915812015533447,
|
338 |
+
"rewards/rejected": -7.16231632232666,
|
339 |
+
"step": 22
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 0.010373210057503664,
|
343 |
+
"grad_norm": 59.194644927978516,
|
344 |
+
"learning_rate": 3.6257601025131026e-05,
|
345 |
+
"logits/chosen": -2.485840320587158,
|
346 |
+
"logits/rejected": -2.474033832550049,
|
347 |
+
"logps/chosen": -94.62644958496094,
|
348 |
+
"logps/rejected": -156.5,
|
349 |
+
"loss": 0.2311,
|
350 |
+
"rewards/accuracies": 0.875,
|
351 |
+
"rewards/chosen": -3.4503886699676514,
|
352 |
+
"rewards/margins": 3.574744939804077,
|
353 |
+
"rewards/rejected": -7.025134086608887,
|
354 |
+
"step": 23
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 0.010824219190438607,
|
358 |
+
"grad_norm": 62.3109130859375,
|
359 |
+
"learning_rate": 2.7103137257858868e-05,
|
360 |
+
"logits/chosen": -1.6875534057617188,
|
361 |
+
"logits/rejected": -1.3186326026916504,
|
362 |
+
"logps/chosen": -124.91173553466797,
|
363 |
+
"logps/rejected": -129.53883361816406,
|
364 |
+
"loss": 0.4622,
|
365 |
+
"rewards/accuracies": 0.625,
|
366 |
+
"rewards/chosen": -4.360774993896484,
|
367 |
+
"rewards/margins": 0.7096513509750366,
|
368 |
+
"rewards/rejected": -5.070426940917969,
|
369 |
+
"step": 24
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"epoch": 0.011275228323373548,
|
373 |
+
"grad_norm": 56.34513473510742,
|
374 |
+
"learning_rate": 1.9098300562505266e-05,
|
375 |
+
"logits/chosen": -2.1966428756713867,
|
376 |
+
"logits/rejected": -2.1575562953948975,
|
377 |
+
"logps/chosen": -163.96261596679688,
|
378 |
+
"logps/rejected": -189.7874298095703,
|
379 |
+
"loss": 0.4808,
|
380 |
+
"rewards/accuracies": 0.75,
|
381 |
+
"rewards/chosen": -6.970208644866943,
|
382 |
+
"rewards/margins": 1.3194499015808105,
|
383 |
+
"rewards/rejected": -8.289658546447754,
|
384 |
+
"step": 25
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"epoch": 0.01172623745630849,
|
388 |
+
"grad_norm": 69.01769256591797,
|
389 |
+
"learning_rate": 1.2369331995613665e-05,
|
390 |
+
"logits/chosen": -2.300128936767578,
|
391 |
+
"logits/rejected": -2.3753676414489746,
|
392 |
+
"logps/chosen": -119.34609985351562,
|
393 |
+
"logps/rejected": -164.77468872070312,
|
394 |
+
"loss": 0.4093,
|
395 |
+
"rewards/accuracies": 0.75,
|
396 |
+
"rewards/chosen": -4.722940921783447,
|
397 |
+
"rewards/margins": 1.6089292764663696,
|
398 |
+
"rewards/rejected": -6.331870079040527,
|
399 |
+
"step": 26
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"epoch": 0.012177246589243433,
|
403 |
+
"grad_norm": 66.55928802490234,
|
404 |
+
"learning_rate": 7.022351411174866e-06,
|
405 |
+
"logits/chosen": -2.0684125423431396,
|
406 |
+
"logits/rejected": -2.194251537322998,
|
407 |
+
"logps/chosen": -105.67616271972656,
|
408 |
+
"logps/rejected": -146.2047882080078,
|
409 |
+
"loss": 0.5255,
|
410 |
+
"rewards/accuracies": 0.625,
|
411 |
+
"rewards/chosen": -4.283657550811768,
|
412 |
+
"rewards/margins": 1.8610727787017822,
|
413 |
+
"rewards/rejected": -6.144730091094971,
|
414 |
+
"step": 27
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"epoch": 0.012628255722178374,
|
418 |
+
"grad_norm": 80.93476104736328,
|
419 |
+
"learning_rate": 3.1416838871368924e-06,
|
420 |
+
"logits/chosen": -1.8295559883117676,
|
421 |
+
"logits/rejected": -1.727657437324524,
|
422 |
+
"logps/chosen": -101.6170883178711,
|
423 |
+
"logps/rejected": -151.7958984375,
|
424 |
+
"loss": 0.6077,
|
425 |
+
"rewards/accuracies": 0.5,
|
426 |
+
"rewards/chosen": -4.0400214195251465,
|
427 |
+
"rewards/margins": 1.7221603393554688,
|
428 |
+
"rewards/rejected": -5.762181758880615,
|
429 |
+
"step": 28
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"epoch": 0.013079264855113315,
|
433 |
+
"grad_norm": 63.70692443847656,
|
434 |
+
"learning_rate": 7.885298685522235e-07,
|
435 |
+
"logits/chosen": -2.315255880355835,
|
436 |
+
"logits/rejected": -2.3140575885772705,
|
437 |
+
"logps/chosen": -125.00370788574219,
|
438 |
+
"logps/rejected": -164.20457458496094,
|
439 |
+
"loss": 0.4891,
|
440 |
+
"rewards/accuracies": 0.75,
|
441 |
+
"rewards/chosen": -5.734856128692627,
|
442 |
+
"rewards/margins": 2.0604777336120605,
|
443 |
+
"rewards/rejected": -7.7953338623046875,
|
444 |
+
"step": 29
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 0.013530273988048258,
|
448 |
+
"grad_norm": 73.19666290283203,
|
449 |
+
"learning_rate": 0.0,
|
450 |
+
"logits/chosen": -1.289480209350586,
|
451 |
+
"logits/rejected": -1.168398380279541,
|
452 |
+
"logps/chosen": -81.09046936035156,
|
453 |
+
"logps/rejected": -124.95771789550781,
|
454 |
+
"loss": 0.4664,
|
455 |
+
"rewards/accuracies": 0.75,
|
456 |
+
"rewards/chosen": -2.711899757385254,
|
457 |
+
"rewards/margins": 3.076801300048828,
|
458 |
+
"rewards/rejected": -5.788701057434082,
|
459 |
+
"step": 30
|
460 |
}
|
461 |
],
|
462 |
"logging_steps": 1,
|
|
|
471 |
"should_evaluate": false,
|
472 |
"should_log": false,
|
473 |
"should_save": true,
|
474 |
+
"should_training_stop": true
|
475 |
},
|
476 |
"attributes": {}
|
477 |
}
|