JustinLin610 commited on
Commit
528281f
·
verified ·
1 Parent(s): 88bda5e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +57 -26
README.md CHANGED
@@ -5,7 +5,7 @@ license_link: https://huggingface.co/Qwen/Qwen3-14B/blob/main/LICENSE
5
  pipeline_tag: text-generation
6
  ---
7
 
8
- # Qwen3-32B
9
  <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
10
  <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
11
  </a>
@@ -46,16 +46,29 @@ KeyError: 'qwen3'
46
  ```
47
 
48
  The following contains a code snippet illustrating how to use the model generate content based on given inputs.
 
49
  ```python
50
  from mlx_lm import load, generate
 
51
  model, tokenizer = load("Qwen/Qwen3-32B-MLX-bf16")
52
- prompt = "hello, Introduce yourself, and what can you do ?"
 
53
  if tokenizer.chat_template is not None:
54
  messages = [{"role": "user", "content": prompt}]
55
  prompt = tokenizer.apply_chat_template(
56
- messages, add_generation_prompt=True
 
57
  )
58
- response = generate(model, tokenizer, prompt=prompt, verbose=True, max_tokens=1024)
 
 
 
 
 
 
 
 
 
59
  ```
60
 
61
  ## Switching Between Thinking and Non-Thinking Mode
@@ -109,6 +122,8 @@ Here is an example of a multi-turn conversation:
109
 
110
  ```python
111
  from mlx_lm import load, generate
 
 
112
  class QwenChatbot:
113
  def __init__(self, model_name="Qwen/Qwen3-32B-MLX-bf16"):
114
  self.model, self.tokenizer = load(model_name)
@@ -123,29 +138,36 @@ class QwenChatbot:
123
  add_generation_prompt=True
124
  )
125
 
126
- response = generate(self.model, self.tokenizer, prompt=text, verbose=True, max_tokens=32768)
 
 
 
 
 
 
127
  # Update history
128
  self.history.append({"role": "user", "content": user_input})
129
  self.history.append({"role": "assistant", "content": response})
130
 
131
  return response
132
 
 
133
  # Example Usage
134
  if __name__ == "__main__":
135
  chatbot = QwenChatbot()
136
 
137
  # First input (without /think or /no_think tags, thinking mode is enabled by default)
138
- user_input_1 = "How many r's in strawberries?"
139
  print(f"User: {user_input_1}")
140
  response_1 = chatbot.generate_response(user_input_1)
141
  print(f"Bot: {response_1}")
142
  print("----------------------")
143
 
144
  # Second input with /no_think
145
- user_input_2 = "Then, how many r's in blueberries? /no_think"
146
  print(f"User: {user_input_2}")
147
  response_2 = chatbot.generate_response(user_input_2)
148
- print(f"Bot: {response_2}")
149
  print("----------------------")
150
 
151
  # Third input with /think
@@ -164,52 +186,61 @@ if __name__ == "__main__":
164
  Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
165
 
166
  To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
 
167
  ```python
168
  from qwen_agent.agents import Assistant
169
 
170
  # Define LLM
171
  llm_cfg = {
172
- 'model': 'Qwen3-32B-MLX-bf16',
173
 
174
  # Use the endpoint provided by Alibaba Model Studio:
175
- # 'model_type': 'qwen_dashscope',
176
- # 'api_key': os.getenv('DASHSCOPE_API_KEY'),
177
 
178
  # Use a custom endpoint compatible with OpenAI API:
179
- 'model_server': 'http://localhost:8000/v1', # api_base
180
- 'api_key': 'EMPTY',
181
 
182
  # Other parameters:
183
- # 'generate_cfg': {
184
- # # Add: When the response content is `<think>this is the thought</think>this is the answer;
185
- # # Do not add: When the response has been separated by reasoning_content and content.
186
- # 'thought_in_content': True,
187
- # },
188
  }
189
 
190
  # Define Tools
191
  tools = [
192
- {'mcpServers': { # You can specify the MCP configuration file
193
- 'time': {
194
- 'command': 'uvx',
195
- 'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']
 
196
  },
197
  "fetch": {
198
  "command": "uvx",
199
- "args": ["mcp-server-fetch"]
200
- }
201
  }
202
  },
203
- 'code_interpreter', # Built-in tools
204
  ]
205
 
206
  # Define Agent
207
  bot = Assistant(llm=llm_cfg, function_list=tools)
208
 
209
  # Streaming generation
210
- messages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]
 
 
 
 
 
 
211
  for responses in bot.run(messages=messages):
212
  pass
 
213
  print(responses)
214
  ```
215
 
 
5
  pipeline_tag: text-generation
6
  ---
7
 
8
+ # Qwen3-32B-MLX-bf16
9
  <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
10
  <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
11
  </a>
 
46
  ```
47
 
48
  The following contains a code snippet illustrating how to use the model generate content based on given inputs.
49
+
50
  ```python
51
  from mlx_lm import load, generate
52
+
53
  model, tokenizer = load("Qwen/Qwen3-32B-MLX-bf16")
54
+ prompt = "Hello, please introduce yourself and tell me what you can do."
55
+
56
  if tokenizer.chat_template is not None:
57
  messages = [{"role": "user", "content": prompt}]
58
  prompt = tokenizer.apply_chat_template(
59
+ messages,
60
+ add_generation_prompt=True
61
  )
62
+
63
+ response = generate(
64
+ model,
65
+ tokenizer,
66
+ prompt=prompt,
67
+ verbose=True,
68
+ max_tokens=1024
69
+ )
70
+
71
+ print(response)
72
  ```
73
 
74
  ## Switching Between Thinking and Non-Thinking Mode
 
122
 
123
  ```python
124
  from mlx_lm import load, generate
125
+
126
+
127
  class QwenChatbot:
128
  def __init__(self, model_name="Qwen/Qwen3-32B-MLX-bf16"):
129
  self.model, self.tokenizer = load(model_name)
 
138
  add_generation_prompt=True
139
  )
140
 
141
+ response = generate(
142
+ self.model,
143
+ self.tokenizer,
144
+ prompt=text,
145
+ verbose=True,
146
+ max_tokens=32768
147
+ )
148
  # Update history
149
  self.history.append({"role": "user", "content": user_input})
150
  self.history.append({"role": "assistant", "content": response})
151
 
152
  return response
153
 
154
+
155
  # Example Usage
156
  if __name__ == "__main__":
157
  chatbot = QwenChatbot()
158
 
159
  # First input (without /think or /no_think tags, thinking mode is enabled by default)
160
+ user_input_1 = "How many 'r's are in strawberries?"
161
  print(f"User: {user_input_1}")
162
  response_1 = chatbot.generate_response(user_input_1)
163
  print(f"Bot: {response_1}")
164
  print("----------------------")
165
 
166
  # Second input with /no_think
167
+ user_input_2 = "Then, how many 'r's are in blueberries? /no_think"
168
  print(f"User: {user_input_2}")
169
  response_2 = chatbot.generate_response(user_input_2)
170
+ print(f"Bot: {response_2}")
171
  print("----------------------")
172
 
173
  # Third input with /think
 
186
  Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
187
 
188
  To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
189
+
190
  ```python
191
  from qwen_agent.agents import Assistant
192
 
193
  # Define LLM
194
  llm_cfg = {
195
+ "model": "Qwen3-32B-MLX-bf16",
196
 
197
  # Use the endpoint provided by Alibaba Model Studio:
198
+ # "model_type": "qwen_dashscope",
199
+ # "api_key": os.getenv("DASHSCOPE_API_KEY"),
200
 
201
  # Use a custom endpoint compatible with OpenAI API:
202
+ "model_server": "http://localhost:8000/v1", # api_base
203
+ "api_key": "EMPTY",
204
 
205
  # Other parameters:
206
+ # "generate_cfg": {
207
+ # # Add: When the response content is `<think>this is the thought</think>this is the answer;
208
+ # # Do not add: When the response has been separated by reasoning_content and content.
209
+ # "thought_in_content": True,
210
+ # },
211
  }
212
 
213
  # Define Tools
214
  tools = [
215
+ {
216
+ "mcpServers": { # You can specify the MCP configuration file
217
+ "time": {
218
+ "command": "uvx",
219
+ "args": ["mcp-server-time", "--local-timezone=Asia/Shanghai"],
220
  },
221
  "fetch": {
222
  "command": "uvx",
223
+ "args": ["mcp-server-fetch"],
224
+ },
225
  }
226
  },
227
+ "code_interpreter", # Built-in tools
228
  ]
229
 
230
  # Define Agent
231
  bot = Assistant(llm=llm_cfg, function_list=tools)
232
 
233
  # Streaming generation
234
+ messages = [
235
+ {
236
+ "role": "user",
237
+ "content": "https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen",
238
+ }
239
+ ]
240
+
241
  for responses in bot.run(messages=messages):
242
  pass
243
+
244
  print(responses)
245
  ```
246