Update README.md
Browse files
README.md
CHANGED
@@ -46,7 +46,63 @@ Utilized HF.Accelerator
|
|
46 |
--==[MyLLM](https://github.com/Raumberg/myllm)==--
|
47 |
|
48 |
### Model configuration (MyLLM Framework)
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
### Using the model / Как запустить?
|
52 |
|
|
|
46 |
--==[MyLLM](https://github.com/Raumberg/myllm)==--
|
47 |
|
48 |
### Model configuration (MyLLM Framework)
|
49 |
+
Full SFT finetuning
|
50 |
+
```toml
|
51 |
+
[model]
|
52 |
+
model_name_or_path = "yandex/YandexGPT-5-Lite-8B-pretrain"
|
53 |
+
|
54 |
+
[datasets]
|
55 |
+
dataset = "attn-signs/gromov-0"
|
56 |
+
conversation_field = "conversation"
|
57 |
+
generate_eval_examples = false
|
58 |
+
evaluation_strategy = "steps"
|
59 |
+
eval_steps = 100
|
60 |
+
dataloader_num_workers = 2
|
61 |
+
remove_unused_columns = true
|
62 |
+
test_size = 0.05
|
63 |
+
|
64 |
+
[run]
|
65 |
+
save_strategy = "steps"
|
66 |
+
save_steps = 300
|
67 |
+
save_total_limit = 3
|
68 |
+
run_name = "sft-gptr-8-run2"
|
69 |
+
report_to = "wandb"
|
70 |
+
logging_first_step = true
|
71 |
+
logging_steps = 1
|
72 |
+
output_dir = "models/attn-signs-gptr-8-run2"
|
73 |
+
project_name = "sft-gptr"
|
74 |
+
|
75 |
+
[training]
|
76 |
+
train_only_on_completions = true
|
77 |
+
per_device_train_batch_size = 1
|
78 |
+
per_device_eval_batch_size = 1
|
79 |
+
num_train_epochs = 3
|
80 |
+
learning_rate = 0.000009
|
81 |
+
max_seq_length = 8192
|
82 |
+
gradient_accumulation_steps = 8
|
83 |
+
gradient_checkpointing = true
|
84 |
+
warmup_steps = 10
|
85 |
+
bf16 = true
|
86 |
+
seed = 42
|
87 |
+
use_peft = false
|
88 |
+
|
89 |
+
[fusion]
|
90 |
+
attn_implementation = "flash_attention_2"
|
91 |
+
|
92 |
+
[tokenizer]
|
93 |
+
assistant_message_template = "<s>assistant\n"
|
94 |
+
eos_token = "</s>"
|
95 |
+
pad_token = "<unk>"
|
96 |
+
chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<s>' + message['role'] + '\n' + message['content'] + '</s>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<s>assistant\n' }}{% endif %}"
|
97 |
+
force_chat_template = true
|
98 |
+
added_special_tokens = [
|
99 |
+
"<think>",
|
100 |
+
"</think>"
|
101 |
+
]
|
102 |
+
system_prompt = """
|
103 |
+
[MODE: Reflection]
|
104 |
+
"""
|
105 |
+
```
|
106 |
|
107 |
### Using the model / Как запустить?
|
108 |
|