File size: 3,296 Bytes
5c260bd 26b7788 5c260bd 26b7788 5c260bd 2ff57a0 5c260bd 26b7788 5c260bd 26b7788 5c260bd 2ff57a0 5c260bd 26b7788 5c260bd 26b7788 5c260bd 26b7788 2ff57a0 5c260bd 26b7788 5c260bd 26b7788 5c260bd 26b7788 2ff57a0 5c260bd 26b7788 5c260bd 26b7788 5c260bd 26b7788 2ff57a0 5c260bd 26b7788 3b3c2ea 2ff57a0 50e6f0f 5c260bd 26b7788 5c260bd 2ff57a0 5c260bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
{
"best_global_step": 5,
"best_metric": 0.6269411444664001,
"best_model_checkpoint": "results/Qwen3-1.7B-Base-SFT/checkpoint-5",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 5,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.1494814157485962,
"learning_rate": 0.0,
"loss": 0.6374,
"num_tokens": 22235.0,
"step": 1
},
{
"epoch": 1.0,
"eval_loss": 0.6593228578567505,
"eval_num_tokens": 22235.0,
"eval_runtime": 1.2252,
"eval_samples_per_second": 4.081,
"eval_steps_per_second": 4.081,
"step": 1
},
{
"epoch": 2.0,
"grad_norm": 1.1454317569732666,
"learning_rate": 4e-05,
"loss": 0.6374,
"num_tokens": 44470.0,
"step": 2
},
{
"epoch": 2.0,
"eval_loss": 0.6507495641708374,
"eval_num_tokens": 44470.0,
"eval_runtime": 1.0538,
"eval_samples_per_second": 4.745,
"eval_steps_per_second": 4.745,
"step": 2
},
{
"epoch": 3.0,
"grad_norm": 0.9575749039649963,
"learning_rate": 3.472792206135786e-05,
"loss": 0.628,
"num_tokens": 66705.0,
"step": 3
},
{
"epoch": 3.0,
"eval_loss": 0.6390534043312073,
"eval_num_tokens": 66705.0,
"eval_runtime": 1.0605,
"eval_samples_per_second": 4.715,
"eval_steps_per_second": 4.715,
"step": 3
},
{
"epoch": 4.0,
"grad_norm": 0.6721516847610474,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.6143,
"num_tokens": 88940.0,
"step": 4
},
{
"epoch": 4.0,
"eval_loss": 0.6301368474960327,
"eval_num_tokens": 88940.0,
"eval_runtime": 1.0549,
"eval_samples_per_second": 4.74,
"eval_steps_per_second": 4.74,
"step": 4
},
{
"epoch": 5.0,
"grad_norm": 0.5215439796447754,
"learning_rate": 9.272077938642147e-06,
"loss": 0.6061,
"num_tokens": 111175.0,
"step": 5
},
{
"epoch": 5.0,
"eval_loss": 0.6269411444664001,
"eval_num_tokens": 111175.0,
"eval_runtime": 1.0457,
"eval_samples_per_second": 4.782,
"eval_steps_per_second": 4.782,
"step": 5
},
{
"epoch": 5.0,
"step": 5,
"total_flos": 1052153750937600.0,
"train_loss": 0.0,
"train_runtime": 2.3303,
"train_samples_per_second": 10.728,
"train_steps_per_second": 2.146
}
],
"logging_steps": 1,
"max_steps": 5,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1052153750937600.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|