Qwen
/

Qwen3-0.6B-MLX-4bit

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions

JustinLin610 commited on 5 days ago

Commit

6b297fc

·

verified ·

1 Parent(s): 8d7d06f

Update README.md

Files changed (1) hide show

README.md +4 -4

README.md CHANGED Viewed

@@ -7,7 +7,7 @@ base_model:
 - Qwen/Qwen3-0.6B-Base
 ---
-# Qwen3-0.6B
 <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
     <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
 </a>
@@ -50,7 +50,7 @@ The following contains a code snippet illustrating how to use the model generate
 from mlx_lm import load, generate
 model, tokenizer = load("Qwen/Qwen3-0.6B-MLX-4bit")
-prompt = "hello, Introduce yourself, and what can you do?"
 if tokenizer.chat_template is not None:
     messages = [{"role": "user", "content": prompt}]
@@ -155,14 +155,14 @@ if __name__ == "__main__":
     chatbot = QwenChatbot()
     # First input (without /think or /no_think tags, thinking mode is enabled by default)
-    user_input_1 = "How many r's in strawberries?"
     print(f"User: {user_input_1}")
     response_1 = chatbot.generate_response(user_input_1)
     print(f"Bot: {response_1}")
     print("----------------------")
     # Second input with /no_think
-    user_input_2 = "Then, how many r's in blueberries? /no_think"
     print(f"User: {user_input_2}")
     response_2 = chatbot.generate_response(user_input_2)
     print(f"Bot: {response_2}")

 - Qwen/Qwen3-0.6B-Base
 ---
+# Qwen3-0.6B-MLX-4bit
 <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
     <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
 </a>
 from mlx_lm import load, generate
 model, tokenizer = load("Qwen/Qwen3-0.6B-MLX-4bit")
+prompt = "Hello, please introduce yourself and tell me what you can do."
 if tokenizer.chat_template is not None:
     messages = [{"role": "user", "content": prompt}]
     chatbot = QwenChatbot()
     # First input (without /think or /no_think tags, thinking mode is enabled by default)
+    user_input_1 = "How many 'r's are in strawberries?"
     print(f"User: {user_input_1}")
     response_1 = chatbot.generate_response(user_input_1)
     print(f"Bot: {response_1}")
     print("----------------------")
     # Second input with /no_think
+    user_input_2 = "Then, how many 'r's are in blueberries? /no_think"
     print(f"User: {user_input_2}")
     response_2 = chatbot.generate_response(user_input_2)
     print(f"Bot: {response_2}")