nightmedia commited on
Commit
bfe8288
·
verified ·
1 Parent(s): f99162d

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: llama4
3
+ library_name: mlx
4
+ base_model: deepcogito/cogito-v2-preview-llama-109B-MoE
5
+ tags:
6
+ - mlx
7
+ pipeline_tag: text-generation
8
+ ---
9
+
10
+ # cogito-v2-preview-llama-109B-MoE-q4-hi-mlx
11
+
12
+ This model [cogito-v2-preview-llama-109B-MoE-q4-hi-mlx](https://huggingface.co/cogito-v2-preview-llama-109B-MoE-q4-hi-mlx) was
13
+ converted to MLX format from [deepcogito/cogito-v2-preview-llama-109B-MoE](https://huggingface.co/deepcogito/cogito-v2-preview-llama-109B-MoE)
14
+ using mlx-lm version **0.26.1**.
15
+
16
+ ## Use with mlx
17
+
18
+ ```bash
19
+ pip install mlx-lm
20
+ ```
21
+
22
+ ```python
23
+ from mlx_lm import load, generate
24
+
25
+ model, tokenizer = load("cogito-v2-preview-llama-109B-MoE-q4-hi-mlx")
26
+
27
+ prompt = "hello"
28
+
29
+ if tokenizer.chat_template is not None:
30
+ messages = [{"role": "user", "content": prompt}]
31
+ prompt = tokenizer.apply_chat_template(
32
+ messages, add_generation_prompt=True
33
+ )
34
+
35
+ response = generate(model, tokenizer, prompt=prompt, verbose=True)
36
+ ```
chat_template.jinja ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if not tools is defined %}
3
+ {%- set tools = none %}
4
+ {%- endif %}
5
+ {%- if not enable_thinking is defined %}
6
+ {%- set enable_thinking = false %}
7
+ {%- endif %}
8
+
9
+ {#- This block extracts the system message, so we can slot it into the right place. #}
10
+ {%- if messages[0]['role'] == 'system' %}
11
+ {%- if messages[0]['content'] is string %}
12
+ {%- set system_message = messages[0]['content']|trim %}
13
+ {%- else %}
14
+ {%- set system_message = messages[0]['content'][0]['text']|trim %}
15
+ {%- endif %}
16
+ {%- set messages = messages[1:] %}
17
+ {%- else %}
18
+ {%- set system_message = "" %}
19
+ {%- endif %}
20
+
21
+ {#- Set the system message. If enable_thinking is true, add the "Enable deep thinking subroutine." #}
22
+ {%- if enable_thinking %}
23
+ {%- if system_message != "" %}
24
+ {%- set system_message = "Enable deep thinking subroutine.
25
+
26
+ " ~ system_message %}
27
+ {%- else %}
28
+ {%- set system_message = "Enable deep thinking subroutine." %}
29
+ {%- endif %}
30
+ {%- endif %}
31
+
32
+ {#- System message + tools #}
33
+ {%- if tools is not none or system_message != '' %}
34
+ {{- "<|header_start|>system<|header_end|>
35
+
36
+ " }}
37
+ {{- system_message }}
38
+ {%- if tools is not none %}
39
+ {%- if system_message != "" %}
40
+ {{- "
41
+
42
+ " }}
43
+ {%- endif %}
44
+ {{- "Available Tools:
45
+ " }}
46
+ {%- for t in tools %}
47
+ {{- t | tojson(indent=4) }}
48
+ {{- "
49
+
50
+ " }}
51
+ {%- endfor %}
52
+ {%- endif %}
53
+ {{- "<|eot|>" }}
54
+ {%- endif %}
55
+
56
+ {#- Rest of the messages #}
57
+ {%- for message in messages %}
58
+ {#- Case 1 - Usual, non tool related message. #}
59
+ {%- if not (message.role == "ipython" or message.role == "tool" or message.role == "tool_results" or (message.tool_calls is defined and message.tool_calls is not none)) %}
60
+ {{- '<|header_start|>' + message['role'] + '<|header_end|>
61
+
62
+ ' }}
63
+ {%- if message['content'] is string %}
64
+ {{- message['content'] }}
65
+ {%- else %}
66
+ {%- for content in message['content'] %}
67
+ {%- if content['type'] == 'image' %}
68
+ {{- '<|image|>' }}
69
+ {%- elif content['type'] == 'text' %}
70
+ {{- content['text'] }}
71
+ {%- endif %}
72
+ {%- endfor %}
73
+ {%- endif %}
74
+ {{- "<|eot|>" }}
75
+
76
+ {#- Case 2 - the response is from the assistant, but has a tool call returned. #}
77
+ {%- elif message.tool_calls is defined and message.tool_calls is not none %}
78
+ {{- "<|header_start|>assistant<|header_end|>
79
+
80
+ " }}
81
+ {%- if message['content'] is string %}
82
+ {{- message['content'] }}
83
+ {%- if message['content'] | trim != "" %}
84
+ {{- "
85
+
86
+ " }}
87
+ {%- endif %}
88
+ {%- else %}
89
+ {%- for content in message['content'] %}
90
+ {%- if content['type'] == 'image' %}
91
+ {{- '<|image|>' }}
92
+ {%- elif content['type'] == 'text' %}
93
+ {{- content['text'] }}
94
+ {%- if content['text'] | trim != "" %}
95
+ {{- "
96
+
97
+ " }}
98
+ {%- endif %}
99
+ {%- endif %}
100
+ {%- endfor %}
101
+ {%- endif %}
102
+ {{- "[" }}
103
+ {%- for tool_call in message.tool_calls %}
104
+ {%- if tool_call.function is defined %}
105
+ {%- set out = tool_call.function|tojson %}
106
+ {%- if not tool_call.id is defined %}
107
+ {{- out }}
108
+ {%- else %}
109
+ {{- out[:-1] }}
110
+ {{- ', "id": "' + tool_call.id + '"}' }}
111
+ {%- endif %}
112
+ {%- else %}
113
+ {{- tool_call|tojson }}
114
+ {%- endif %}
115
+ {%- if not loop.last %}
116
+ {{- ", " }}
117
+ {%- else %}
118
+ {{- "]<|eot|>" }}
119
+ {%- endif %}
120
+ {%- endfor %}
121
+
122
+ {#- Case 3 - the response is from a tool call. #}
123
+ {%- elif message.role == "ipython" or message["role"] == "tool_results" or message["role"] == "tool" %}
124
+ {{- "<|header_start|>ipython<|header_end|>
125
+
126
+ " }}
127
+ {%- if message.tool_call_id is defined and message.tool_call_id != '' %}
128
+ {{- '{"content": ' }}
129
+ {%- if message.content is mapping or (message.content is iterable and not message.content is string) %}
130
+ {{- message.content | tojson }}
131
+ {%- else %}
132
+ {{- '"' ~ message.content ~ '"' }}
133
+ {%- endif %}
134
+ {{- ', "call_id": "' ~ message.tool_call_id ~ '"}' }}
135
+ {%- else %}
136
+ {%- if message.content is mapping or (message.content is iterable and not message.content is string) %}
137
+ {{- message.content | tojson }}
138
+ {%- else %}
139
+ {{- message.content }}
140
+ {%- endif %}
141
+ {%- endif %}
142
+ {{- "<|eot|>" }}
143
+ {%- endif %}
144
+ {%- endfor %}
145
+ {%- if add_generation_prompt %}
146
+ {{- '<|header_start|>assistant<|header_end|>\n\n' }}
147
+ {%- if enable_thinking %}
148
+ {{- '<think>\n' }}
149
+ {%- endif %}
150
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Llama4ForConditionalGeneration"
4
+ ],
5
+ "boi_token_index": 200080,
6
+ "eoi_token_index": 200081,
7
+ "image_token_index": 200092,
8
+ "model_type": "llama4",
9
+ "quantization": {
10
+ "group_size": 32,
11
+ "bits": 4
12
+ },
13
+ "quantization_config": {
14
+ "group_size": 32,
15
+ "bits": 4
16
+ },
17
+ "text_config": {
18
+ "attention_bias": false,
19
+ "attention_chunk_size": 8192,
20
+ "attention_dropout": 0.0,
21
+ "attn_scale": 0.1,
22
+ "attn_temperature_tuning": true,
23
+ "bos_token_id": 200000,
24
+ "eos_token_id": [
25
+ 200001,
26
+ 200007,
27
+ 200008
28
+ ],
29
+ "floor_scale": 8192,
30
+ "for_llm_compressor": false,
31
+ "head_dim": 128,
32
+ "hidden_act": "silu",
33
+ "hidden_size": 5120,
34
+ "initializer_range": 0.02,
35
+ "interleave_moe_layer_step": 1,
36
+ "intermediate_size": 8192,
37
+ "intermediate_size_mlp": 16384,
38
+ "layer_types": [
39
+ "chunked_attention",
40
+ "chunked_attention",
41
+ "chunked_attention",
42
+ "full_attention",
43
+ "chunked_attention",
44
+ "chunked_attention",
45
+ "chunked_attention",
46
+ "full_attention",
47
+ "chunked_attention",
48
+ "chunked_attention",
49
+ "chunked_attention",
50
+ "full_attention",
51
+ "chunked_attention",
52
+ "chunked_attention",
53
+ "chunked_attention",
54
+ "full_attention",
55
+ "chunked_attention",
56
+ "chunked_attention",
57
+ "chunked_attention",
58
+ "full_attention",
59
+ "chunked_attention",
60
+ "chunked_attention",
61
+ "chunked_attention",
62
+ "full_attention",
63
+ "chunked_attention",
64
+ "chunked_attention",
65
+ "chunked_attention",
66
+ "full_attention",
67
+ "chunked_attention",
68
+ "chunked_attention",
69
+ "chunked_attention",
70
+ "full_attention",
71
+ "chunked_attention",
72
+ "chunked_attention",
73
+ "chunked_attention",
74
+ "full_attention",
75
+ "chunked_attention",
76
+ "chunked_attention",
77
+ "chunked_attention",
78
+ "full_attention",
79
+ "chunked_attention",
80
+ "chunked_attention",
81
+ "chunked_attention",
82
+ "full_attention",
83
+ "chunked_attention",
84
+ "chunked_attention",
85
+ "chunked_attention",
86
+ "full_attention"
87
+ ],
88
+ "max_position_embeddings": 262144,
89
+ "model_type": "llama4_text",
90
+ "moe_layers": [
91
+ 0,
92
+ 1,
93
+ 2,
94
+ 3,
95
+ 4,
96
+ 5,
97
+ 6,
98
+ 7,
99
+ 8,
100
+ 9,
101
+ 10,
102
+ 11,
103
+ 12,
104
+ 13,
105
+ 14,
106
+ 15,
107
+ 16,
108
+ 17,
109
+ 18,
110
+ 19,
111
+ 20,
112
+ 21,
113
+ 22,
114
+ 23,
115
+ 24,
116
+ 25,
117
+ 26,
118
+ 27,
119
+ 28,
120
+ 29,
121
+ 30,
122
+ 31,
123
+ 32,
124
+ 33,
125
+ 34,
126
+ 35,
127
+ 36,
128
+ 37,
129
+ 38,
130
+ 39,
131
+ 40,
132
+ 41,
133
+ 42,
134
+ 43,
135
+ 44,
136
+ 45,
137
+ 46,
138
+ 47
139
+ ],
140
+ "no_rope_layers": [
141
+ 1,
142
+ 1,
143
+ 1,
144
+ 0,
145
+ 1,
146
+ 1,
147
+ 1,
148
+ 0,
149
+ 1,
150
+ 1,
151
+ 1,
152
+ 0,
153
+ 1,
154
+ 1,
155
+ 1,
156
+ 0,
157
+ 1,
158
+ 1,
159
+ 1,
160
+ 0,
161
+ 1,
162
+ 1,
163
+ 1,
164
+ 0,
165
+ 1,
166
+ 1,
167
+ 1,
168
+ 0,
169
+ 1,
170
+ 1,
171
+ 1,
172
+ 0,
173
+ 1,
174
+ 1,
175
+ 1,
176
+ 0,
177
+ 1,
178
+ 1,
179
+ 1,
180
+ 0,
181
+ 1,
182
+ 1,
183
+ 1,
184
+ 0,
185
+ 1,
186
+ 1,
187
+ 1,
188
+ 0
189
+ ],
190
+ "num_attention_heads": 40,
191
+ "num_experts_per_tok": 1,
192
+ "num_hidden_layers": 48,
193
+ "num_key_value_heads": 8,
194
+ "num_local_experts": 16,
195
+ "output_router_logits": false,
196
+ "pad_token_id": 200018,
197
+ "rms_norm_eps": 1e-05,
198
+ "rope_scaling": {
199
+ "factor": 16.0,
200
+ "high_freq_factor": 1.0,
201
+ "low_freq_factor": 1.0,
202
+ "original_max_position_embeddings": 8192,
203
+ "rope_type": "llama3"
204
+ },
205
+ "rope_theta": 500000.0,
206
+ "router_aux_loss_coef": 0.001,
207
+ "router_jitter_noise": 0.0,
208
+ "torch_dtype": "bfloat16",
209
+ "use_cache": true,
210
+ "use_qk_norm": true,
211
+ "vocab_size": 201135
212
+ },
213
+ "tie_word_embeddings": false,
214
+ "torch_dtype": "bfloat16",
215
+ "transformers_version": "4.53.0"
216
+ }
model-00001-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a7f2c886faf3281458d85606d225e3446832384284dac9a6f9f89a9d13de6a
3
+ size 5231384095
model-00002-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c0e677c5bb94ea68f950c8350308017e3aba8821e62bc2cfaa29d89db1770e3
3
+ size 5085899397
model-00003-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1d6fda61a001b08ed6b4b50ba07da71d77643df3696756115cf8ec79fd31f9b
3
+ size 5345347101
model-00004-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1162d083ecef505caa973654adc4c1643b6caf06bdbfd1f222fa34e71bfd546d
3
+ size 5127842756
model-00005-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a722a053e0c949665a8c48d4c6c2cc6821726b513166b3519b98a59995e77d5b
3
+ size 5085899552
model-00006-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25ae3dcd526d432b7454237b4e1edc31f467743c99f3026b020d618f6ccc0abd
3
+ size 5085899514
model-00007-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee6336882066e46a32f8f509d612c4be3e17f2e2d27b6ddbc085aa4c9e30a283
3
+ size 5345347208
model-00008-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa59988d2e91f52addc0afcb2533ef18ecdf4c6918a285ae045cc3fa64ad1dfc
3
+ size 5127842772
model-00009-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:608180bc36ed5461d47981dd4454e3623ab8c90ee1730db30950908015c4fe6c
3
+ size 5085899606
model-00010-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f57db6bc4608e968fe5375f7ad0ad0ae01ba5aa7dc634cdd93fb7b24aeafd708
3
+ size 5085899538
model-00011-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:876793c52502e61bee5c22d556403d44b378f4415670df3f02f3300a293b8e54
3
+ size 5345347178
model-00012-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a58531dd2bbfc7fba953090e14f65faabe4bd442263c95a975477ddd2d60f658
3
+ size 5127842774
model-00013-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f61a5ade4af0f1a67925c3a33c9cf5d60c333b47f1798d7ce9771caf3c75fa08
3
+ size 5270788266
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:172c9eb4beafc72601690da3ccfcede5c2e6806a8d5ec1fca33e22acea8023a4
3
+ size 27948578
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff