|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 274, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2191780821917808, |
|
"grad_norm": 5.121945381164551, |
|
"learning_rate": 9.997441533484527e-05, |
|
"log_odds_chosen": 7.0944390296936035, |
|
"log_odds_ratio": -0.043758511543273926, |
|
"logits/chosen": -11.323039054870605, |
|
"logits/rejected": -11.33891487121582, |
|
"logps/chosen": -0.5756699442863464, |
|
"logps/rejected": -6.692573547363281, |
|
"loss": 0.721, |
|
"nll_loss": 0.716595470905304, |
|
"rewards/accuracies": 0.9791666865348816, |
|
"rewards/chosen": -0.057566989213228226, |
|
"rewards/margins": 0.6116903424263, |
|
"rewards/rejected": -0.6692573428153992, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4383561643835616, |
|
"grad_norm": 6.375761032104492, |
|
"learning_rate": 9.573779290555039e-05, |
|
"log_odds_chosen": 7.421543598175049, |
|
"log_odds_ratio": -0.0334482379257679, |
|
"logits/chosen": -12.235654830932617, |
|
"logits/rejected": -12.264952659606934, |
|
"logps/chosen": -0.5262126326560974, |
|
"logps/rejected": -6.922983169555664, |
|
"loss": 0.6832, |
|
"nll_loss": 0.679860532283783, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0526212677359581, |
|
"rewards/margins": 0.6396770477294922, |
|
"rewards/rejected": -0.6922982931137085, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6575342465753424, |
|
"grad_norm": 5.337998390197754, |
|
"learning_rate": 8.484270686572078e-05, |
|
"log_odds_chosen": 7.344568729400635, |
|
"log_odds_ratio": -0.047614481300115585, |
|
"logits/chosen": -11.1071138381958, |
|
"logits/rejected": -11.16305160522461, |
|
"logps/chosen": -0.6026201844215393, |
|
"logps/rejected": -7.023316860198975, |
|
"loss": 0.7666, |
|
"nll_loss": 0.7618839144706726, |
|
"rewards/accuracies": 0.9791666865348816, |
|
"rewards/chosen": -0.06026201695203781, |
|
"rewards/margins": 0.6420697569847107, |
|
"rewards/rejected": -0.7023317217826843, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8767123287671232, |
|
"grad_norm": 7.20003080368042, |
|
"learning_rate": 6.887525329304994e-05, |
|
"log_odds_chosen": 6.544267177581787, |
|
"log_odds_ratio": -0.08398038893938065, |
|
"logits/chosen": -9.961735725402832, |
|
"logits/rejected": -9.985981941223145, |
|
"logps/chosen": -0.7504870295524597, |
|
"logps/rejected": -6.561529636383057, |
|
"loss": 0.8312, |
|
"nll_loss": 0.8227914571762085, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": -0.07504869997501373, |
|
"rewards/margins": 0.5811043381690979, |
|
"rewards/rejected": -0.6561529636383057, |
|
"step": 240 |
|
} |
|
], |
|
"logging_steps": 60, |
|
"max_steps": 546, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|