ankitkushwaha90 commited on
Commit
2525c6e
·
verified ·
1 Parent(s): 4f6a4be

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +76 -0
README.md ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - fka/awesome-chatgpt-prompts
5
+ language:
6
+ - en
7
+ metrics:
8
+ - accuracy
9
+ base_model:
10
+ - moonshotai/Kimi-K2-Instruct
11
+ new_version: moonshotai/Kimi-K2-Instruct
12
+ pipeline_tag: summarization
13
+ library_name: adapter-transformers
14
+ tags:
15
+ - finance
16
+ ---
17
+
18
+ ## accuracy and qick response balance
19
+
20
+ ```cmd
21
+ https://huggingface.co/Qwen/Qwen3-4B/tree/main
22
+ ```
23
+
24
+ ```python
25
+ from transformers import AutoModelForCausalLM, AutoTokenizer
26
+ import torch
27
+
28
+ model_name = "./"
29
+
30
+ # Load tokenizer and model
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
32
+ model = AutoModelForCausalLM.from_pretrained(
33
+ model_name,
34
+ torch_dtype="auto", # Use float16 or bfloat16 depending on GPU
35
+ device_map="auto", # Automatically maps to GPU/CPU
36
+ trust_remote_code=True
37
+ )
38
+ model.eval()
39
+
40
+ # Inference function
41
+ def ask_qwen(prompt: str, max_new_tokens=128):
42
+ messages = [{"role": "user", "content": prompt + " /no_think"}]
43
+ text = tokenizer.apply_chat_template(
44
+ messages,
45
+ tokenize=False,
46
+ add_generation_prompt=True,
47
+ enable_thinking=False # Fast replies, no step-by-step thinking
48
+ )
49
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
50
+ with torch.no_grad():
51
+ outputs = model.generate(
52
+ **inputs,
53
+ max_new_tokens=max_new_tokens,
54
+ temperature=0.7,
55
+ top_p=0.8,
56
+ top_k=20,
57
+ min_p=0.0,
58
+ do_sample=True
59
+ )
60
+ generated = outputs[0][inputs["input_ids"].shape[-1]:]
61
+ return tokenizer.decode(generated, skip_special_tokens=True).strip()
62
+
63
+ # Continuous loop for user prompts
64
+ if __name__ == "__main__":
65
+ print("🔁 Qwen3-4B Chat Running... Type 'exit' to quit.")
66
+ while True:
67
+ prompt = input("\nYou: ")
68
+ if prompt.lower().strip() in ['exit', 'quit']:
69
+ print("👋 Exiting Qwen chat.")
70
+ break
71
+ try:
72
+ response = ask_qwen(prompt)
73
+ print(f"Qwen: {response}")
74
+ except Exception as e:
75
+ print(f"⚠️ Error: {e}")
76
+ ```