Felladrin commited on
Commit
dcd3a7c
·
1 Parent(s): dc16f28

Update model

Browse files
Files changed (4) hide show
  1. README.md +5 -6
  2. config.json +2 -3
  3. model.safetensors +1 -1
  4. tokenizer.json +1 -6
README.md CHANGED
@@ -73,7 +73,7 @@ widget:
73
  - [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-reddit-instruct-curated)] [euclaise/reddit-instruct-curated](https://huggingface.co/datasets/euclaise/reddit-instruct-curated)
74
  - [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-aya_dataset)] [CohereForAI/aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset)
75
  - [HuggingFaceH4/ultrafeedback_binarized](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
76
- - License: [Apache License 2.0](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v3/resolve/main/license.txt)
77
 
78
  ## Recommended Prompt Format
79
 
@@ -91,7 +91,7 @@ widget:
91
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
92
  import torch
93
 
94
- model_path = "Felladrin/TinyMistral-248M-Chat-v3"
95
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
96
  tokenizer = AutoTokenizer.from_pretrained(model_path)
97
  model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
@@ -178,7 +178,6 @@ llamafactory-cli train \
178
  --preprocessing_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
179
  --dataloader_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
180
  --finetuning_type full \
181
- --template default \
182
  --flash_attn auto \
183
  --enable_liger_kernel True \
184
  --dataset_dir data \
@@ -188,15 +187,15 @@ llamafactory-cli train \
188
  --num_train_epochs 2.0 \
189
  --per_device_train_batch_size 4 \
190
  --gradient_accumulation_steps 4 \
191
- --lr_scheduler_type cosine \
192
  --max_grad_norm 1.0 \
193
  --logging_steps 10 \
194
  --save_steps 50 \
195
  --save_total_limit 1 \
196
  --warmup_ratio 0.1 \
197
  --packing False \
198
- --report_to none \
199
- --output_dir ~/TinyMistral-248M-Chat-v3 \
200
  --pure_bf16 True \
201
  --plot_loss True \
202
  --trust_remote_code True \
 
73
  - [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-reddit-instruct-curated)] [euclaise/reddit-instruct-curated](https://huggingface.co/datasets/euclaise/reddit-instruct-curated)
74
  - [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-aya_dataset)] [CohereForAI/aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset)
75
  - [HuggingFaceH4/ultrafeedback_binarized](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
76
+ - License: [Apache License 2.0](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v4/resolve/main/license.txt)
77
 
78
  ## Recommended Prompt Format
79
 
 
91
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
92
  import torch
93
 
94
+ model_path = "Felladrin/TinyMistral-248M-Chat-v4"
95
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
96
  tokenizer = AutoTokenizer.from_pretrained(model_path)
97
  model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
 
178
  --preprocessing_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
179
  --dataloader_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
180
  --finetuning_type full \
 
181
  --flash_attn auto \
182
  --enable_liger_kernel True \
183
  --dataset_dir data \
 
187
  --num_train_epochs 2.0 \
188
  --per_device_train_batch_size 4 \
189
  --gradient_accumulation_steps 4 \
190
+ --lr_scheduler_type linear \
191
  --max_grad_norm 1.0 \
192
  --logging_steps 10 \
193
  --save_steps 50 \
194
  --save_total_limit 1 \
195
  --warmup_ratio 0.1 \
196
  --packing False \
197
+ --report_to tensorboard \
198
+ --output_dir ~/TinyMistral-248M-Chat-v4 \
199
  --pure_bf16 True \
200
  --plot_loss True \
201
  --trust_remote_code True \
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "Felladrin/TinyMistral-248M-Chat-v3",
3
  "architectures": ["MistralForCausalLM"],
4
  "attention_dropout": 0.0,
5
  "bos_token_id": 32000,
@@ -19,8 +18,8 @@
19
  "sliding_window": null,
20
  "tie_word_embeddings": false,
21
  "torch_dtype": "bfloat16",
22
- "transformers_version": "4.49.0",
23
- "use_cache": true,
24
  "use_sliding_window": false,
25
  "vocab_size": 32005
26
  }
 
1
  {
 
2
  "architectures": ["MistralForCausalLM"],
3
  "attention_dropout": 0.0,
4
  "bos_token_id": 32000,
 
18
  "sliding_window": null,
19
  "tie_word_embeddings": false,
20
  "torch_dtype": "bfloat16",
21
+ "transformers_version": "4.50.0",
22
+ "use_cache": false,
23
  "use_sliding_window": false,
24
  "vocab_size": 32005
25
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddf0ded71ab5a315f90bc932d018a2b20e81987f49f8c9c6efcaf612b2d5a4d6
3
  size 496060688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da09172da13d6da1727beb0cef6c42e3fbc99bd3d9bdfedc0df8f5b2746c02a0
3
  size 496060688
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 1536,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {