Files changed (1) hide show
  1. README.md +118 -104
README.md CHANGED
@@ -1,105 +1,119 @@
1
- ---
2
- license: apache-2.0
3
- base_model:
4
- - Qwen/Qwen2.5-7B
5
- library_name: transformers
6
- ---
7
-
8
- ```
9
- base_model: Qwen/Qwen2.5-7B
10
- model_type: AutoModelForCausalLM
11
- tokenizer_type: AutoTokenizer
12
-
13
- load_in_8bit: false
14
- load_in_4bit: false
15
- strict: false
16
-
17
- datasets:
18
- - path: PocketDoc/Dans-MemoryCore-CoreCurriculum-Small
19
- type: sharegpt
20
- conversation: chatml
21
- - path: NewEden/Kalo-Opus-Instruct-22k-Refusal-Murdered
22
- type: sharegpt
23
- conversation: chatml
24
- - path: Epiculous/Synthstruct-Gens-v1.1-Filtered-n-Cleaned
25
- type: sharegpt
26
- conversation: chatml
27
- - path: NewEden/Gryphe-Sonnet-3.5-35k-Subset
28
- type: sharegpt
29
- conversation: chatml
30
- - path: Nitral-AI/Reasoning-1shot_ShareGPT
31
- type: sharegpt
32
- conversation: chatml
33
- - path: Nitral-AI/GU_Instruct-ShareGPT
34
- type: sharegpt
35
- conversation: chatml
36
- - path: Nitral-AI/Medical_Instruct-ShareGPT
37
- type: sharegpt
38
- conversation: chatml
39
-
40
- chat_template: chatml
41
-
42
- val_set_size: 0.01
43
- output_dir: ./outputs/out
44
-
45
- adapter:
46
- lora_r:
47
- lora_alpha:
48
- lora_dropout:
49
- lora_target_linear:
50
-
51
- sequence_len: 8192
52
- # sequence_len: 32768
53
- sample_packing: true
54
- eval_sample_packing: false
55
- pad_to_sequence_len: true
56
-
57
- plugins:
58
- - axolotl.integrations.liger.LigerPlugin
59
- liger_rope: true
60
- liger_rms_norm: true
61
- liger_swiglu: true
62
- liger_fused_linear_cross_entropy: true
63
-
64
- wandb_project: qwen7B
65
- wandb_entity:
66
- wandb_watch:
67
- wandb_name: qwen7B
68
- wandb_log_model:
69
-
70
- gradient_accumulation_steps: 32
71
- micro_batch_size: 1
72
- num_epochs: 2
73
- optimizer: adamw_bnb_8bit
74
- lr_scheduler: cosine
75
- learning_rate: 0.00001
76
- weight_decay: 0.05
77
-
78
- train_on_inputs: false
79
- group_by_length: false
80
- bf16: auto
81
- fp16:
82
- tf32: true
83
-
84
- gradient_checkpointing: true
85
- early_stopping_patience:
86
- resume_from_checkpoint:
87
- local_rank:
88
- logging_steps: 1
89
- xformers_attention:
90
- flash_attention: true
91
-
92
- warmup_ratio: 0.1
93
- evals_per_epoch: 4
94
- eval_table_size:
95
- eval_max_new_tokens: 128
96
- saves_per_epoch: 2
97
-
98
- debug:
99
- deepspeed:
100
- fsdp:
101
- fsdp_config:
102
-
103
- special_tokens:
104
- pad_token: <pad>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  ```
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model:
4
+ - Qwen/Qwen2.5-7B
5
+ library_name: transformers
6
+ language:
7
+ - zho
8
+ - eng
9
+ - fra
10
+ - spa
11
+ - por
12
+ - deu
13
+ - ita
14
+ - rus
15
+ - jpn
16
+ - kor
17
+ - vie
18
+ - tha
19
+ - ara
20
+ ---
21
+
22
+ ```
23
+ base_model: Qwen/Qwen2.5-7B
24
+ model_type: AutoModelForCausalLM
25
+ tokenizer_type: AutoTokenizer
26
+
27
+ load_in_8bit: false
28
+ load_in_4bit: false
29
+ strict: false
30
+
31
+ datasets:
32
+ - path: PocketDoc/Dans-MemoryCore-CoreCurriculum-Small
33
+ type: sharegpt
34
+ conversation: chatml
35
+ - path: NewEden/Kalo-Opus-Instruct-22k-Refusal-Murdered
36
+ type: sharegpt
37
+ conversation: chatml
38
+ - path: Epiculous/Synthstruct-Gens-v1.1-Filtered-n-Cleaned
39
+ type: sharegpt
40
+ conversation: chatml
41
+ - path: NewEden/Gryphe-Sonnet-3.5-35k-Subset
42
+ type: sharegpt
43
+ conversation: chatml
44
+ - path: Nitral-AI/Reasoning-1shot_ShareGPT
45
+ type: sharegpt
46
+ conversation: chatml
47
+ - path: Nitral-AI/GU_Instruct-ShareGPT
48
+ type: sharegpt
49
+ conversation: chatml
50
+ - path: Nitral-AI/Medical_Instruct-ShareGPT
51
+ type: sharegpt
52
+ conversation: chatml
53
+
54
+ chat_template: chatml
55
+
56
+ val_set_size: 0.01
57
+ output_dir: ./outputs/out
58
+
59
+ adapter:
60
+ lora_r:
61
+ lora_alpha:
62
+ lora_dropout:
63
+ lora_target_linear:
64
+
65
+ sequence_len: 8192
66
+ # sequence_len: 32768
67
+ sample_packing: true
68
+ eval_sample_packing: false
69
+ pad_to_sequence_len: true
70
+
71
+ plugins:
72
+ - axolotl.integrations.liger.LigerPlugin
73
+ liger_rope: true
74
+ liger_rms_norm: true
75
+ liger_swiglu: true
76
+ liger_fused_linear_cross_entropy: true
77
+
78
+ wandb_project: qwen7B
79
+ wandb_entity:
80
+ wandb_watch:
81
+ wandb_name: qwen7B
82
+ wandb_log_model:
83
+
84
+ gradient_accumulation_steps: 32
85
+ micro_batch_size: 1
86
+ num_epochs: 2
87
+ optimizer: adamw_bnb_8bit
88
+ lr_scheduler: cosine
89
+ learning_rate: 0.00001
90
+ weight_decay: 0.05
91
+
92
+ train_on_inputs: false
93
+ group_by_length: false
94
+ bf16: auto
95
+ fp16:
96
+ tf32: true
97
+
98
+ gradient_checkpointing: true
99
+ early_stopping_patience:
100
+ resume_from_checkpoint:
101
+ local_rank:
102
+ logging_steps: 1
103
+ xformers_attention:
104
+ flash_attention: true
105
+
106
+ warmup_ratio: 0.1
107
+ evals_per_epoch: 4
108
+ eval_table_size:
109
+ eval_max_new_tokens: 128
110
+ saves_per_epoch: 2
111
+
112
+ debug:
113
+ deepspeed:
114
+ fsdp:
115
+ fsdp_config:
116
+
117
+ special_tokens:
118
+ pad_token: <pad>
119
  ```