ankitkushwaha90
/

Qwen3-4B

Model card Files Files and versions Community

ankitkushwaha90 commited on 5 days ago

Commit

2525c6e

·

verified ·

1 Parent(s): 4f6a4be

Create README.md

Files changed (1) hide show

README.md +76 -0

README.md ADDED Viewed

	@@ -0,0 +1,76 @@

+---
+license: apache-2.0
+datasets:
+- fka/awesome-chatgpt-prompts
+language:
+- en
+metrics:
+- accuracy
+base_model:
+- moonshotai/Kimi-K2-Instruct
+new_version: moonshotai/Kimi-K2-Instruct
+pipeline_tag: summarization
+library_name: adapter-transformers
+tags:
+- finance
+---
+## accuracy and qick response balance
+```cmd
+https://huggingface.co/Qwen/Qwen3-4B/tree/main
+```
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+model_name = "./"
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",        # Use float16 or bfloat16 depending on GPU
+    device_map="auto",         # Automatically maps to GPU/CPU
+    trust_remote_code=True
+)
+model.eval()
+# Inference function
+def ask_qwen(prompt: str, max_new_tokens=128):
+    messages = [{"role": "user", "content": prompt + " /no_think"}]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+        enable_thinking=False  # Fast replies, no step-by-step thinking
+    )
+    inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=0.7,
+            top_p=0.8,
+            top_k=20,
+            min_p=0.0,
+            do_sample=True
+        )
+    generated = outputs[0][inputs["input_ids"].shape[-1]:]
+    return tokenizer.decode(generated, skip_special_tokens=True).strip()
+# Continuous loop for user prompts
+if __name__ == "__main__":
+    print("🔁 Qwen3-4B Chat Running... Type 'exit' to quit.")
+    while True:
+        prompt = input("\nYou: ")
+        if prompt.lower().strip() in ['exit', 'quit']:
+            print("👋 Exiting Qwen chat.")
+            break
+        try:
+            response = ask_qwen(prompt)
+            print(f"Qwen: {response}")
+        except Exception as e:
+            print(f"⚠️ Error: {e}")
+```