Model save

Files changed (11) hide show

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/bootpin/huggingface/runs/lg4lg76q)
 This model was trained with SFT.

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/bootpin/huggingface/runs/q94wllqj)
 This model was trained with SFT.

all_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 1.7142857142857144,
-    "total_flos": 1113549778714624.0,
-    "train_loss": 19.26901610692342,
-    "train_runtime": 77.567,
-    "train_samples": 460341,
-    "train_samples_per_second": 1.444,
-    "train_steps_per_second": 0.077
 }

 {
+    "epoch": 1.9995261786306564,
+    "total_flos": 1.174779724933628e+18,
+    "train_loss": 1.0698493373337514,
+    "train_runtime": 77836.8266,
+    "train_samples": 460142,
+    "train_samples_per_second": 1.301,
+    "train_steps_per_second": 0.081
 }

config.json CHANGED Viewed

@@ -5,8 +5,8 @@
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
-  "bos_token_id": 0,
-  "eos_token_id": 0,
   "head_dim": 64,
   "hidden_act": "silu",
   "hidden_size": 960,
@@ -19,6 +19,7 @@
   "num_attention_heads": 15,
   "num_hidden_layers": 32,
   "num_key_value_heads": 5,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_interleaved": false,

   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
   "head_dim": 64,
   "hidden_act": "silu",
   "hidden_size": 960,
   "num_attention_heads": 15,
   "num_hidden_layers": 32,
   "num_key_value_heads": 5,
+  "pad_token_id": 2,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_interleaved": false,

generation_config.json CHANGED Viewed

@@ -1,6 +1,7 @@
 {
   "_from_model_config": true,
-  "bos_token_id": 0,
-  "eos_token_id": 0,
   "transformers_version": "4.46.3"
 }

 {
   "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 2,
   "transformers_version": "4.46.3"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa4a3d79f65de914ec12916186e80c8c6aa99a03a6d8075c215a713c8705cf07
 size 723674912

 version https://git-lfs.github.com/spec/v1
+oid sha256:05abbb502b69a004c24f32e108475eeeb2694852026cd58bdcfe3319c9665216
 size 723674912

runs/Apr03_23-32-22_afc374fd6ab1/events.out.tfevents.1743723195.afc374fd6ab1.20983.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0791cd12441ccb2769383cd599bc8ca762779a44c2e9fc379e9b29e17e0f6b3c
+size 273720

special_tokens_map.json CHANGED Viewed

@@ -1,29 +1,23 @@
 {
   "additional_special_tokens": [
-    "<|im_start|>",
-    "<|im_end|>"
   ],
-  "bos_token": {
-    "content": "<|im_start|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|im_end|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "<|im_end|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
   "unk_token": {
     "content": "<|endoftext|>",
     "lstrip": false,

 {
   "additional_special_tokens": [
+    {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
   ],
+  "bos_token": "<|im_start|>",
+  "eos_token": "<|im_end|>",
+  "pad_token": "<|im_end|>",
   "unk_token": {
     "content": "<|endoftext|>",
     "lstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -143,7 +143,7 @@
     "<|im_end|>"
   ],
   "bos_token": "<|im_start|>",
-  "chat_template": "",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
   "model_max_length": 8192,

     "<|im_end|>"
   ],
   "bos_token": "<|im_start|>",
+  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
   "model_max_length": 8192,

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 1.7142857142857144,
-    "total_flos": 1113549778714624.0,
-    "train_loss": 19.26901610692342,
-    "train_runtime": 77.567,
-    "train_samples": 460341,
-    "train_samples_per_second": 1.444,
-    "train_steps_per_second": 0.077
 }

 {
+    "epoch": 1.9995261786306564,
+    "total_flos": 1.174779724933628e+18,
+    "train_loss": 1.0698493373337514,
+    "train_runtime": 77836.8266,
+    "train_samples": 460142,
+    "train_samples_per_second": 1.301,
+    "train_steps_per_second": 0.081
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:faa19ffaf1e2ddc9132de277ead37eb9ac3104cc1baaf3c94cbde2c2fa8781b7
-size 7160

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9807ad48581fd1f16b2eb8e994fec16526cdca9d9f278e4cbce52b1e8fd15cf
+size 6904