Qwen
/

Qwen3-4B-MLX-4bit

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions

JustinLin610 commited on 5 days ago

Commit

a437c5a

·

verified ·

1 Parent(s): aa2b6a4

Update README.md

Files changed (1) hide show

README.md +4 -4

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ license_link: https://huggingface.co/Qwen/Qwen3-14B/blob/main/LICENSE
 pipeline_tag: text-generation
 ---
-# Qwen3-4B
 <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
     <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
 </a>
@@ -49,7 +49,7 @@ The following contains a code snippet illustrating how to use the model generate
 from mlx_lm import load, generate
 model, tokenizer = load("Qwen/Qwen3-4B-MLX-4bit")
-prompt = "hello, Introduce yourself, and what can you do ?"
 if tokenizer.chat_template is not None:
     messages = [{"role": "user", "content": prompt}]
@@ -155,14 +155,14 @@ if __name__ == "__main__":
     chatbot = QwenChatbot()
     # First input (without /think or /no_think tags, thinking mode is enabled by default)
-    user_input_1 = "How many r's in strawberries?"
     print(f"User: {user_input_1}")
     response_1 = chatbot.generate_response(user_input_1)
     print(f"Bot: {response_1}")
     print("----------------------")
     # Second input with /no_think
-    user_input_2 = "Then, how many r's in blueberries? /no_think"
     print(f"User: {user_input_2}")
     response_2 = chatbot.generate_response(user_input_2)
     print(f"Bot: {response_2}")

 pipeline_tag: text-generation
 ---
+# Qwen3-4B-MLX-4bit
 <a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
     <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
 </a>
 from mlx_lm import load, generate
 model, tokenizer = load("Qwen/Qwen3-4B-MLX-4bit")
+prompt = "hello, Introduce yourself, and what can you do?"
 if tokenizer.chat_template is not None:
     messages = [{"role": "user", "content": prompt}]
     chatbot = QwenChatbot()
     # First input (without /think or /no_think tags, thinking mode is enabled by default)
+    user_input_1 = "How many 'r's are in strawberries?"
     print(f"User: {user_input_1}")
     response_1 = chatbot.generate_response(user_input_1)
     print(f"Bot: {response_1}")
     print("----------------------")
     # Second input with /no_think
+    user_input_2 = "Then, how many 'r's are in blueberries? /no_think"
     print(f"User: {user_input_2}")
     response_2 = chatbot.generate_response(user_input_2)
     print(f"Bot: {response_2}")