JustinLin610 commited on
Commit
5aa2bda
·
verified ·
1 Parent(s): 3a8b4b8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +50 -20
README.md CHANGED
@@ -46,14 +46,26 @@ KeyError: 'qwen3'
46
  The following contains a code snippet illustrating how to use the model generate content based on given inputs.
47
  ```python
48
  from mlx_lm import load, generate
 
49
  model, tokenizer = load("Qwen/Qwen3-4B-MLX-6bit")
50
  prompt = "hello, Introduce yourself, and what can you do ?"
 
51
  if tokenizer.chat_template is not None:
52
  messages = [{"role": "user", "content": prompt}]
53
  prompt = tokenizer.apply_chat_template(
54
- messages, add_generation_prompt=True
 
55
  )
56
- response = generate(model, tokenizer, prompt=prompt, verbose=True, max_tokens=1024)
 
 
 
 
 
 
 
 
 
57
  ```
58
 
59
  ## Switching Between Thinking and Non-Thinking Mode
@@ -107,6 +119,8 @@ Here is an example of a multi-turn conversation:
107
 
108
  ```python
109
  from mlx_lm import load, generate
 
 
110
  class QwenChatbot:
111
  def __init__(self, model_name="Qwen/Qwen3-4B-MLX-6bit"):
112
  self.model, self.tokenizer = load(model_name)
@@ -121,13 +135,20 @@ class QwenChatbot:
121
  add_generation_prompt=True
122
  )
123
 
124
- response = generate(self.model, self.tokenizer, prompt=text, verbose=True, max_tokens=32768)
 
 
 
 
 
 
125
  # Update history
126
  self.history.append({"role": "user", "content": user_input})
127
  self.history.append({"role": "assistant", "content": response})
128
 
129
  return response
130
 
 
131
  # Example Usage
132
  if __name__ == "__main__":
133
  chatbot = QwenChatbot()
@@ -143,7 +164,7 @@ if __name__ == "__main__":
143
  user_input_2 = "Then, how many r's in blueberries? /no_think"
144
  print(f"User: {user_input_2}")
145
  response_2 = chatbot.generate_response(user_input_2)
146
- print(f"Bot: {response_2}")
147
  print("----------------------")
148
 
149
  # Third input with /think
@@ -162,35 +183,37 @@ if __name__ == "__main__":
162
  Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
163
 
164
  To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
 
165
  ```python
166
  from qwen_agent.agents import Assistant
167
 
168
  # Define LLM
169
  llm_cfg = {
170
- 'model': 'Qwen3-4B-MLX-6bit',
171
 
172
  # Use the endpoint provided by Alibaba Model Studio:
173
- # 'model_type': 'qwen_dashscope',
174
- # 'api_key': os.getenv('DASHSCOPE_API_KEY'),
175
 
176
  # Use a custom endpoint compatible with OpenAI API:
177
- 'model_server': 'http://localhost:8000/v1', # api_base
178
- 'api_key': 'EMPTY',
179
 
180
  # Other parameters:
181
- # 'generate_cfg': {
182
- # # Add: When the response content is `<think>this is the thought</think>this is the answer;
183
- # # Do not add: When the response has been separated by reasoning_content and content.
184
- # 'thought_in_content': True,
185
- # },
186
  }
187
 
188
  # Define Tools
189
  tools = [
190
- {'mcpServers': { # You can specify the MCP configuration file
191
- 'time': {
192
- 'command': 'uvx',
193
- 'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']
 
194
  },
195
  "fetch": {
196
  "command": "uvx",
@@ -198,16 +221,23 @@ tools = [
198
  }
199
  }
200
  },
201
- 'code_interpreter', # Built-in tools
202
  ]
203
 
204
  # Define Agent
205
  bot = Assistant(llm=llm_cfg, function_list=tools)
206
 
207
  # Streaming generation
208
- messages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]
 
 
 
 
 
 
209
  for responses in bot.run(messages=messages):
210
  pass
 
211
  print(responses)
212
  ```
213
 
 
46
  The following contains a code snippet illustrating how to use the model generate content based on given inputs.
47
  ```python
48
  from mlx_lm import load, generate
49
+
50
  model, tokenizer = load("Qwen/Qwen3-4B-MLX-6bit")
51
  prompt = "hello, Introduce yourself, and what can you do ?"
52
+
53
  if tokenizer.chat_template is not None:
54
  messages = [{"role": "user", "content": prompt}]
55
  prompt = tokenizer.apply_chat_template(
56
+ messages,
57
+ add_generation_prompt=True
58
  )
59
+
60
+ response = generate(
61
+ model,
62
+ tokenizer,
63
+ prompt=prompt,
64
+ verbose=True,
65
+ max_tokens=1024
66
+ )
67
+
68
+ print(response)
69
  ```
70
 
71
  ## Switching Between Thinking and Non-Thinking Mode
 
119
 
120
  ```python
121
  from mlx_lm import load, generate
122
+
123
+
124
  class QwenChatbot:
125
  def __init__(self, model_name="Qwen/Qwen3-4B-MLX-6bit"):
126
  self.model, self.tokenizer = load(model_name)
 
135
  add_generation_prompt=True
136
  )
137
 
138
+ response = generate(
139
+ self.model,
140
+ self.tokenizer,
141
+ prompt=text,
142
+ verbose=True,
143
+ max_tokens=32768
144
+ )
145
  # Update history
146
  self.history.append({"role": "user", "content": user_input})
147
  self.history.append({"role": "assistant", "content": response})
148
 
149
  return response
150
 
151
+
152
  # Example Usage
153
  if __name__ == "__main__":
154
  chatbot = QwenChatbot()
 
164
  user_input_2 = "Then, how many r's in blueberries? /no_think"
165
  print(f"User: {user_input_2}")
166
  response_2 = chatbot.generate_response(user_input_2)
167
+ print(f"Bot: {response_2}")
168
  print("----------------------")
169
 
170
  # Third input with /think
 
183
  Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
184
 
185
  To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
186
+
187
  ```python
188
  from qwen_agent.agents import Assistant
189
 
190
  # Define LLM
191
  llm_cfg = {
192
+ "model": "Qwen3-4B-MLX-6bit",
193
 
194
  # Use the endpoint provided by Alibaba Model Studio:
195
+ # "model_type": "qwen_dashscope",
196
+ # "api_key": os.getenv("DASHSCOPE_API_KEY"),
197
 
198
  # Use a custom endpoint compatible with OpenAI API:
199
+ "model_server": "http://localhost:8000/v1", # api_base
200
+ "api_key": "EMPTY",
201
 
202
  # Other parameters:
203
+ # "generate_cfg": {
204
+ # # Add: When the response content is `<think>this is the thought</think>this is the answer;
205
+ # # Do not add: When the response has been separated by reasoning_content and content.
206
+ # "thought_in_content": True,
207
+ # },
208
  }
209
 
210
  # Define Tools
211
  tools = [
212
+ {
213
+ "mcpServers": { # You can specify the MCP configuration file
214
+ "time": {
215
+ "command": "uvx",
216
+ "args": ["mcp-server-time", "--local-timezone=Asia/Shanghai"]
217
  },
218
  "fetch": {
219
  "command": "uvx",
 
221
  }
222
  }
223
  },
224
+ "code_interpreter", # Built-in tools
225
  ]
226
 
227
  # Define Agent
228
  bot = Assistant(llm=llm_cfg, function_list=tools)
229
 
230
  # Streaming generation
231
+ messages = [
232
+ {
233
+ "role": "user",
234
+ "content": "https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen"
235
+ }
236
+ ]
237
+
238
  for responses in bot.run(messages=messages):
239
  pass
240
+
241
  print(responses)
242
  ```
243