|
{ |
|
"best_metric": 0.012590945698320866, |
|
"best_model_checkpoint": "./Zephyr/28-03-24-Weni-WeniGPT-QA-Zephyr-7B-4.0.2-KTO_WeniGPT Experiment using KTO trainer with no collator, Zephyr model and random system prompt.-2_max_steps-786_batch_32_2024-03-28_ppid_9/checkpoint-200", |
|
"epoch": 1.5223596574690772, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.973484516143799, |
|
"kl": 0.07447954267263412, |
|
"learning_rate": 0.00015833333333333332, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3654, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.1150100231170654, |
|
"kl": 0.24954533576965332, |
|
"learning_rate": 0.00019606299212598428, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.1911, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -157.132080078125, |
|
"eval_logps/rejected": -431.1063232421875, |
|
"eval_loss": 0.019795970991253853, |
|
"eval_rewards/chosen": 5.025023460388184, |
|
"eval_rewards/margins": 28.0367374420166, |
|
"eval_rewards/rejected": -23.0117130279541, |
|
"eval_runtime": 209.568, |
|
"eval_samples_per_second": 2.252, |
|
"eval_steps_per_second": 0.563, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.8193832039833069, |
|
"kl": 0.0, |
|
"learning_rate": 0.00019081364829396326, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.1212, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.1438875198364258, |
|
"kl": 0.0, |
|
"learning_rate": 0.00018556430446194227, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0613, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.5549229383468628, |
|
"kl": 0.30086809396743774, |
|
"learning_rate": 0.00018031496062992125, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0525, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -151.0825958251953, |
|
"eval_logps/rejected": -502.1325378417969, |
|
"eval_loss": 0.015516282990574837, |
|
"eval_rewards/chosen": 5.629973411560059, |
|
"eval_rewards/margins": 35.744300842285156, |
|
"eval_rewards/rejected": -30.11433219909668, |
|
"eval_runtime": 209.5342, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.16322359442710876, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001750656167979003, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.037, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.8122725486755371, |
|
"kl": 0.0, |
|
"learning_rate": 0.00016981627296587927, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0684, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -145.42347717285156, |
|
"eval_logps/rejected": -530.3358154296875, |
|
"eval_loss": 0.010358058847486973, |
|
"eval_rewards/chosen": 6.19588565826416, |
|
"eval_rewards/margins": 39.130550384521484, |
|
"eval_rewards/rejected": -32.934661865234375, |
|
"eval_runtime": 209.5283, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.11464398354291916, |
|
"kl": 0.0, |
|
"learning_rate": 0.00016456692913385828, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0167, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 5.260560989379883, |
|
"kl": 0.0, |
|
"learning_rate": 0.00015931758530183726, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.131, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.15409517288208008, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001540682414698163, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0253, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -147.8525390625, |
|
"eval_logps/rejected": -489.1932678222656, |
|
"eval_loss": 0.012590945698320866, |
|
"eval_rewards/chosen": 5.952979564666748, |
|
"eval_rewards/margins": 34.77338790893555, |
|
"eval_rewards/rejected": -28.82040786743164, |
|
"eval_runtime": 209.5044, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 786, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|