JustinLin610 commited on
Commit
36b091f
·
verified ·
1 Parent(s): 20cd58a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +57 -26
README.md CHANGED
@@ -7,7 +7,7 @@ base_model:
7
  - Qwen/Qwen3-0.6B-Base
8
  ---
9
 
10
- # Qwen3-0.6B
11
  <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
12
  <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
13
  </a>
@@ -45,16 +45,29 @@ KeyError: 'qwen3'
45
  ```
46
 
47
  The following contains a code snippet illustrating how to use the model generate content based on given inputs.
 
48
  ```python
49
  from mlx_lm import load, generate
 
50
  model, tokenizer = load("Qwen/Qwen3-0.6B-MLX-bf16")
51
- prompt = "hello, Introduce yourself, and what can you do ?"
 
52
  if tokenizer.chat_template is not None:
53
  messages = [{"role": "user", "content": prompt}]
54
  prompt = tokenizer.apply_chat_template(
55
- messages, add_generation_prompt=True
 
56
  )
57
- response = generate(model, tokenizer, prompt=prompt, verbose=True, max_tokens=1024)
 
 
 
 
 
 
 
 
 
58
  ```
59
 
60
  ## Switching Between Thinking and Non-Thinking Mode
@@ -108,6 +121,8 @@ Here is an example of a multi-turn conversation:
108
 
109
  ```python
110
  from mlx_lm import load, generate
 
 
111
  class QwenChatbot:
112
  def __init__(self, model_name="Qwen/Qwen3-0.6B-MLX-bf16"):
113
  self.model, self.tokenizer = load(model_name)
@@ -122,29 +137,36 @@ class QwenChatbot:
122
  add_generation_prompt=True
123
  )
124
 
125
- response = generate(self.model, self.tokenizer, prompt=text, verbose=True, max_tokens=32768)
 
 
 
 
 
 
126
  # Update history
127
  self.history.append({"role": "user", "content": user_input})
128
  self.history.append({"role": "assistant", "content": response})
129
 
130
  return response
131
 
 
132
  # Example Usage
133
  if __name__ == "__main__":
134
  chatbot = QwenChatbot()
135
 
136
  # First input (without /think or /no_think tags, thinking mode is enabled by default)
137
- user_input_1 = "How many r's in strawberries?"
138
  print(f"User: {user_input_1}")
139
  response_1 = chatbot.generate_response(user_input_1)
140
  print(f"Bot: {response_1}")
141
  print("----------------------")
142
 
143
  # Second input with /no_think
144
- user_input_2 = "Then, how many r's in blueberries? /no_think"
145
  print(f"User: {user_input_2}")
146
  response_2 = chatbot.generate_response(user_input_2)
147
- print(f"Bot: {response_2}")
148
  print("----------------------")
149
 
150
  # Third input with /think
@@ -163,52 +185,61 @@ if __name__ == "__main__":
163
  Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
164
 
165
  To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
 
166
  ```python
167
  from qwen_agent.agents import Assistant
168
 
169
  # Define LLM
170
  llm_cfg = {
171
- 'model': 'Qwen3-0.6B-MLX-bf16',
172
 
173
  # Use the endpoint provided by Alibaba Model Studio:
174
- # 'model_type': 'qwen_dashscope',
175
- # 'api_key': os.getenv('DASHSCOPE_API_KEY'),
176
 
177
  # Use a custom endpoint compatible with OpenAI API:
178
- 'model_server': 'http://localhost:8000/v1', # api_base
179
- 'api_key': 'EMPTY',
180
 
181
  # Other parameters:
182
- # 'generate_cfg': {
183
- # # Add: When the response content is `<think>this is the thought</think>this is the answer;
184
- # # Do not add: When the response has been separated by reasoning_content and content.
185
- # 'thought_in_content': True,
186
- # },
187
  }
188
 
189
  # Define Tools
190
  tools = [
191
- {'mcpServers': { # You can specify the MCP configuration file
192
- 'time': {
193
- 'command': 'uvx',
194
- 'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']
 
195
  },
196
  "fetch": {
197
  "command": "uvx",
198
- "args": ["mcp-server-fetch"]
199
- }
200
  }
201
  },
202
- 'code_interpreter', # Built-in tools
203
  ]
204
 
205
  # Define Agent
206
  bot = Assistant(llm=llm_cfg, function_list=tools)
207
 
208
  # Streaming generation
209
- messages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]
 
 
 
 
 
 
210
  for responses in bot.run(messages=messages):
211
  pass
 
212
  print(responses)
213
  ```
214
 
 
7
  - Qwen/Qwen3-0.6B-Base
8
  ---
9
 
10
+ # Qwen3-0.6B-MLX-bf16
11
  <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
12
  <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
13
  </a>
 
45
  ```
46
 
47
  The following contains a code snippet illustrating how to use the model generate content based on given inputs.
48
+
49
  ```python
50
  from mlx_lm import load, generate
51
+
52
  model, tokenizer = load("Qwen/Qwen3-0.6B-MLX-bf16")
53
+ prompt = "Hello, please introduce yourself and tell me what you can do."
54
+
55
  if tokenizer.chat_template is not None:
56
  messages = [{"role": "user", "content": prompt}]
57
  prompt = tokenizer.apply_chat_template(
58
+ messages,
59
+ add_generation_prompt=True
60
  )
61
+
62
+ response = generate(
63
+ model,
64
+ tokenizer,
65
+ prompt=prompt,
66
+ verbose=True,
67
+ max_tokens=1024
68
+ )
69
+
70
+ print(response)
71
  ```
72
 
73
  ## Switching Between Thinking and Non-Thinking Mode
 
121
 
122
  ```python
123
  from mlx_lm import load, generate
124
+
125
+
126
  class QwenChatbot:
127
  def __init__(self, model_name="Qwen/Qwen3-0.6B-MLX-bf16"):
128
  self.model, self.tokenizer = load(model_name)
 
137
  add_generation_prompt=True
138
  )
139
 
140
+ response = generate(
141
+ self.model,
142
+ self.tokenizer,
143
+ prompt=text,
144
+ verbose=True,
145
+ max_tokens=32768
146
+ )
147
  # Update history
148
  self.history.append({"role": "user", "content": user_input})
149
  self.history.append({"role": "assistant", "content": response})
150
 
151
  return response
152
 
153
+
154
  # Example Usage
155
  if __name__ == "__main__":
156
  chatbot = QwenChatbot()
157
 
158
  # First input (without /think or /no_think tags, thinking mode is enabled by default)
159
+ user_input_1 = "How many 'r's are in strawberries?"
160
  print(f"User: {user_input_1}")
161
  response_1 = chatbot.generate_response(user_input_1)
162
  print(f"Bot: {response_1}")
163
  print("----------------------")
164
 
165
  # Second input with /no_think
166
+ user_input_2 = "Then, how many 'r's are in blueberries? /no_think"
167
  print(f"User: {user_input_2}")
168
  response_2 = chatbot.generate_response(user_input_2)
169
+ print(f"Bot: {response_2}")
170
  print("----------------------")
171
 
172
  # Third input with /think
 
185
  Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
186
 
187
  To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
188
+
189
  ```python
190
  from qwen_agent.agents import Assistant
191
 
192
  # Define LLM
193
  llm_cfg = {
194
+ "model": "Qwen3-0.6B-MLX-bf16",
195
 
196
  # Use the endpoint provided by Alibaba Model Studio:
197
+ # "model_type": "qwen_dashscope",
198
+ # "api_key": os.getenv("DASHSCOPE_API_KEY"),
199
 
200
  # Use a custom endpoint compatible with OpenAI API:
201
+ "model_server": "http://localhost:8000/v1", # api_base
202
+ "api_key": "EMPTY",
203
 
204
  # Other parameters:
205
+ # "generate_cfg": {
206
+ # # Add: When the response content is `<think>this is the thought</think>this is the answer;
207
+ # # Do not add: When the response has been separated by reasoning_content and content.
208
+ # "thought_in_content": True,
209
+ # },
210
  }
211
 
212
  # Define Tools
213
  tools = [
214
+ {
215
+ "mcpServers": { # You can specify the MCP configuration file
216
+ "time": {
217
+ "command": "uvx",
218
+ "args": ["mcp-server-time", "--local-timezone=Asia/Shanghai"],
219
  },
220
  "fetch": {
221
  "command": "uvx",
222
+ "args": ["mcp-server-fetch"],
223
+ },
224
  }
225
  },
226
+ "code_interpreter", # Built-in tools
227
  ]
228
 
229
  # Define Agent
230
  bot = Assistant(llm=llm_cfg, function_list=tools)
231
 
232
  # Streaming generation
233
+ messages = [
234
+ {
235
+ "role": "user",
236
+ "content": "https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen",
237
+ }
238
+ ]
239
+
240
  for responses in bot.run(messages=messages):
241
  pass
242
+
243
  print(responses)
244
  ```
245