End of training

Files changed (4) hide show

README.md CHANGED Viewed

@@ -1,17 +1,20 @@
 ---
 base_model: mistralai/Mistral-7B-v0.1
 library_name: transformers
-model_name: math_ultrachatmistral5
 tags:
 - generated_from_trainer
 - trl
 - dpo
 licence: license
 ---
-# Model Card for math_ultrachatmistral5
-This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

 ---
 base_model: mistralai/Mistral-7B-v0.1
+datasets: HuggingFaceH4/ultrafeedback_binarized
 library_name: transformers
+model_name: mistralai/Mistral-7B-v0.1
 tags:
 - generated_from_trainer
+- alignment-handbook
+- HuggingFaceH4/ultrafeedback_binarized
 - trl
 - dpo
 licence: license
 ---
+# Model Card for mistralai/Mistral-7B-v0.1
+This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the [HuggingFaceH4/ultrafeedback_binarized](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

all_results.json CHANGED Viewed

@@ -1,5 +1,18 @@
 {
     "epoch": 1.0,
     "total_flos": 0.0,
     "train_loss": 0.6713338216145833,
     "train_runtime": 13324.2114,

 {
     "epoch": 1.0,
+    "eval_logits/chosen": -3.0115926265716553,
+    "eval_logits/rejected": -3.022681474685669,
+    "eval_logps/chosen": -266.7742004394531,
+    "eval_logps/rejected": -253.80645751953125,
+    "eval_loss": 0.6836252808570862,
+    "eval_rewards/accuracies": 0.5225093364715576,
+    "eval_rewards/chosen": -0.020729802548885345,
+    "eval_rewards/margins": 0.020151691511273384,
+    "eval_rewards/rejected": -0.040862053632736206,
+    "eval_runtime": 165.8484,
+    "eval_samples": 2000,
+    "eval_samples_per_second": 12.21,
+    "eval_steps_per_second": 0.386,
     "total_flos": 0.0,
     "train_loss": 0.6713338216145833,
     "train_runtime": 13324.2114,

eval_results.json ADDED Viewed

+{
+    "epoch": 1.0,
+    "eval_logits/chosen": -3.0115926265716553,
+    "eval_logits/rejected": -3.022681474685669,
+    "eval_logps/chosen": -266.7742004394531,
+    "eval_logps/rejected": -253.80645751953125,
+    "eval_loss": 0.6836252808570862,
+    "eval_rewards/accuracies": 0.5225093364715576,
+    "eval_rewards/chosen": -0.020729802548885345,
+    "eval_rewards/margins": 0.020151691511273384,
+    "eval_rewards/rejected": -0.040862053632736206,
+    "eval_runtime": 165.8484,
+    "eval_samples": 2000,
+    "eval_samples_per_second": 12.21,
+    "eval_steps_per_second": 0.386
+}

runs/May25_22-42-33_cn-g009.server.mila.quebec/events.out.tfevents.1748242384.cn-g009.server.mila.quebec.3277230.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2760dec0e9d94f9e27fb889aba097dc13697229c09137188360215ed2a77de5
+size 815