End of training

Browse files

Files changed (5) hide show

README.md +2 -1
all_results.json +8 -0
train_results.json +8 -0
trainer_state.json +287 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: apache-2.0
 base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: 316_globalbatchsize64_lr2e5_epochs7
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # 316_globalbatchsize64_lr2e5_epochs7
-This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on an unknown dataset.
 ## Model description

 base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: 316_globalbatchsize64_lr2e5_epochs7
 # 316_globalbatchsize64_lr2e5_epochs7
+This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on the mlfoundations-dev/openthoughts_316 dataset.
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 7.0,
+    "total_flos": 4.553463888976282e+16,
+    "train_loss": 0.5663332402706146,
+    "train_runtime": 2937.4683,
+    "train_samples_per_second": 0.753,
+    "train_steps_per_second": 0.012
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 7.0,
+    "total_flos": 4.553463888976282e+16,
+    "train_loss": 0.5663332402706146,
+    "train_runtime": 2937.4683,
+    "train_samples_per_second": 0.753,
+    "train_steps_per_second": 0.012
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,287 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 35,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.2,
+      "grad_norm": 5.582163022991408,
+      "learning_rate": 5e-06,
+      "loss": 0.832,
+      "step": 1
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 6.008039731835684,
+      "learning_rate": 1e-05,
+      "loss": 0.8909,
+      "step": 2
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 4.475038963879312,
+      "learning_rate": 1.5000000000000002e-05,
+      "loss": 0.863,
+      "step": 3
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 2.222784034655452,
+      "learning_rate": 2e-05,
+      "loss": 0.827,
+      "step": 4
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 6.135999073483918,
+      "learning_rate": 1.994869323391895e-05,
+      "loss": 0.825,
+      "step": 5
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 11.764101726522851,
+      "learning_rate": 1.9795299412524948e-05,
+      "loss": 0.8796,
+      "step": 6
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 7.354508676830827,
+      "learning_rate": 1.954139256400049e-05,
+      "loss": 0.8366,
+      "step": 7
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 4.238919320085481,
+      "learning_rate": 1.918957811620231e-05,
+      "loss": 0.7735,
+      "step": 8
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 2.9298549318791065,
+      "learning_rate": 1.8743466161445823e-05,
+      "loss": 0.7437,
+      "step": 9
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 2.1068759966644612,
+      "learning_rate": 1.8207634412072765e-05,
+      "loss": 0.6851,
+      "step": 10
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 1.8972968776086048,
+      "learning_rate": 1.758758122692791e-05,
+      "loss": 0.66,
+      "step": 11
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 1.4067227955996648,
+      "learning_rate": 1.688966919075687e-05,
+      "loss": 0.6103,
+      "step": 12
+    },
+    {
+      "epoch": 2.6,
+      "grad_norm": 1.0132739186425044,
+      "learning_rate": 1.612105982547663e-05,
+      "loss": 0.5918,
+      "step": 13
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 1.160603701917004,
+      "learning_rate": 1.5289640103269626e-05,
+      "loss": 0.6134,
+      "step": 14
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 1.0266169507236687,
+      "learning_rate": 1.4403941515576344e-05,
+      "loss": 0.5866,
+      "step": 15
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 0.9000334611175349,
+      "learning_rate": 1.3473052528448203e-05,
+      "loss": 0.5074,
+      "step": 16
+    },
+    {
+      "epoch": 3.4,
+      "grad_norm": 0.7749288343944741,
+      "learning_rate": 1.2506525322587207e-05,
+      "loss": 0.5702,
+      "step": 17
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 0.840264032378784,
+      "learning_rate": 1.1514277775045768e-05,
+      "loss": 0.5198,
+      "step": 18
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 3.9423980762897046,
+      "learning_rate": 1.0506491688387128e-05,
+      "loss": 0.5072,
+      "step": 19
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.7627830974838458,
+      "learning_rate": 9.493508311612874e-06,
+      "loss": 0.4521,
+      "step": 20
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 0.7093550273288285,
+      "learning_rate": 8.485722224954237e-06,
+      "loss": 0.4783,
+      "step": 21
+    },
+    {
+      "epoch": 4.4,
+      "grad_norm": 0.6326012475160535,
+      "learning_rate": 7.493474677412795e-06,
+      "loss": 0.4241,
+      "step": 22
+    },
+    {
+      "epoch": 4.6,
+      "grad_norm": 0.6043409226395319,
+      "learning_rate": 6.526947471551799e-06,
+      "loss": 0.4358,
+      "step": 23
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 0.5886128932973004,
+      "learning_rate": 5.5960584844236565e-06,
+      "loss": 0.4386,
+      "step": 24
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.5091039499506712,
+      "learning_rate": 4.710359896730379e-06,
+      "loss": 0.4265,
+      "step": 25
+    },
+    {
+      "epoch": 5.2,
+      "grad_norm": 0.5883683891918668,
+      "learning_rate": 3.878940174523371e-06,
+      "loss": 0.4426,
+      "step": 26
+    },
+    {
+      "epoch": 5.4,
+      "grad_norm": 0.5216083512199737,
+      "learning_rate": 3.110330809243134e-06,
+      "loss": 0.4035,
+      "step": 27
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 0.4654121175946387,
+      "learning_rate": 2.4124187730720916e-06,
+      "loss": 0.3976,
+      "step": 28
+    },
+    {
+      "epoch": 5.8,
+      "grad_norm": 0.47214671112473383,
+      "learning_rate": 1.7923655879272395e-06,
+      "loss": 0.366,
+      "step": 29
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.4239005273256747,
+      "learning_rate": 1.2565338385541792e-06,
+      "loss": 0.3635,
+      "step": 30
+    },
+    {
+      "epoch": 6.2,
+      "grad_norm": 0.4179041263780273,
+      "learning_rate": 8.10421883797694e-07,
+      "loss": 0.3764,
+      "step": 31
+    },
+    {
+      "epoch": 6.4,
+      "grad_norm": 0.4172072920004715,
+      "learning_rate": 4.5860743599951186e-07,
+      "loss": 0.3811,
+      "step": 32
+    },
+    {
+      "epoch": 6.6,
+      "grad_norm": 0.3919556118133461,
+      "learning_rate": 2.0470058747505516e-07,
+      "loss": 0.3653,
+      "step": 33
+    },
+    {
+      "epoch": 6.8,
+      "grad_norm": 0.41569714283764153,
+      "learning_rate": 5.1306766081048456e-08,
+      "loss": 0.3892,
+      "step": 34
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 0.34824589761366104,
+      "learning_rate": 0.0,
+      "loss": 0.3582,
+      "step": 35
+    },
+    {
+      "epoch": 7.0,
+      "step": 35,
+      "total_flos": 4.553463888976282e+16,
+      "train_loss": 0.5663332402706146,
+      "train_runtime": 2937.4683,
+      "train_samples_per_second": 0.753,
+      "train_steps_per_second": 0.012
+    }
+  ],
+  "logging_steps": 1.0,
+  "max_steps": 35,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.553463888976282e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_loss.png ADDED Viewed