attn-signs commited on
Commit
cf97c43
·
verified ·
1 Parent(s): b9393e5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +57 -1
README.md CHANGED
@@ -46,7 +46,63 @@ Utilized HF.Accelerator
46
  --==[MyLLM](https://github.com/Raumberg/myllm)==--
47
 
48
  ### Model configuration (MyLLM Framework)
49
- TO BE DISCLOSED
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  ### Using the model / Как запустить?
52
 
 
46
  --==[MyLLM](https://github.com/Raumberg/myllm)==--
47
 
48
  ### Model configuration (MyLLM Framework)
49
+ Full SFT finetuning
50
+ ```toml
51
+ [model]
52
+ model_name_or_path = "yandex/YandexGPT-5-Lite-8B-pretrain"
53
+
54
+ [datasets]
55
+ dataset = "attn-signs/gromov-0"
56
+ conversation_field = "conversation"
57
+ generate_eval_examples = false
58
+ evaluation_strategy = "steps"
59
+ eval_steps = 100
60
+ dataloader_num_workers = 2
61
+ remove_unused_columns = true
62
+ test_size = 0.05
63
+
64
+ [run]
65
+ save_strategy = "steps"
66
+ save_steps = 300
67
+ save_total_limit = 3
68
+ run_name = "sft-gptr-8-run2"
69
+ report_to = "wandb"
70
+ logging_first_step = true
71
+ logging_steps = 1
72
+ output_dir = "models/attn-signs-gptr-8-run2"
73
+ project_name = "sft-gptr"
74
+
75
+ [training]
76
+ train_only_on_completions = true
77
+ per_device_train_batch_size = 1
78
+ per_device_eval_batch_size = 1
79
+ num_train_epochs = 3
80
+ learning_rate = 0.000009
81
+ max_seq_length = 8192
82
+ gradient_accumulation_steps = 8
83
+ gradient_checkpointing = true
84
+ warmup_steps = 10
85
+ bf16 = true
86
+ seed = 42
87
+ use_peft = false
88
+
89
+ [fusion]
90
+ attn_implementation = "flash_attention_2"
91
+
92
+ [tokenizer]
93
+ assistant_message_template = "<s>assistant\n"
94
+ eos_token = "</s>"
95
+ pad_token = "<unk>"
96
+ chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<s>' + message['role'] + '\n' + message['content'] + '</s>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<s>assistant\n' }}{% endif %}"
97
+ force_chat_template = true
98
+ added_special_tokens = [
99
+ "<think>",
100
+ "</think>"
101
+ ]
102
+ system_prompt = """
103
+ [MODE: Reflection]
104
+ """
105
+ ```
106
 
107
  ### Using the model / Как запустить?
108