|
{ |
|
"best_metric": 0.015516282990574837, |
|
"best_model_checkpoint": "./Zephyr/28-03-24-Weni-WeniGPT-QA-Zephyr-7B-4.0.2-KTO_WeniGPT Experiment using KTO trainer with no collator, Zephyr model and random system prompt.-2_max_steps-786_batch_32_2024-03-28_ppid_9/checkpoint-100", |
|
"epoch": 0.7611798287345385, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.973484516143799, |
|
"kl": 0.07447954267263412, |
|
"learning_rate": 0.00015833333333333332, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.3654, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.1150100231170654, |
|
"kl": 0.24954533576965332, |
|
"learning_rate": 0.00019606299212598428, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.1911, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -157.132080078125, |
|
"eval_logps/rejected": -431.1063232421875, |
|
"eval_loss": 0.019795970991253853, |
|
"eval_rewards/chosen": 5.025023460388184, |
|
"eval_rewards/margins": 28.0367374420166, |
|
"eval_rewards/rejected": -23.0117130279541, |
|
"eval_runtime": 209.568, |
|
"eval_samples_per_second": 2.252, |
|
"eval_steps_per_second": 0.563, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.8193832039833069, |
|
"kl": 0.0, |
|
"learning_rate": 0.00019081364829396326, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.1212, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.1438875198364258, |
|
"kl": 0.0, |
|
"learning_rate": 0.00018556430446194227, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0613, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.5549229383468628, |
|
"kl": 0.30086809396743774, |
|
"learning_rate": 0.00018031496062992125, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.0525, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -151.0825958251953, |
|
"eval_logps/rejected": -502.1325378417969, |
|
"eval_loss": 0.015516282990574837, |
|
"eval_rewards/chosen": 5.629973411560059, |
|
"eval_rewards/margins": 35.744300842285156, |
|
"eval_rewards/rejected": -30.11433219909668, |
|
"eval_runtime": 209.5342, |
|
"eval_samples_per_second": 2.253, |
|
"eval_steps_per_second": 0.563, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 786, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|