guanwenyu1995 commited on
Commit
8212c3b
·
verified ·
1 Parent(s): 9f2756b

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +64 -0
README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - zh
5
+ - en
6
+ pipeline_tag: text-generation
7
+ library_name: transformers
8
+ ---
9
+ <div align="center">
10
+ <img src="https://github.com/OpenBMB/MiniCPM/blob/main/assets/minicpm_logo.png?raw=true" width="500em" ></img>
11
+ </div>
12
+
13
+ ## Usage
14
+ ### Prebuilt [AutoAWQ](https://github.com/casper-hansen/AutoAWQ.git)
15
+ ```bash
16
+ pip install autoawq
17
+ ```
18
+ ### Inference with
19
+ ```python
20
+ from awq import AutoAWQForCausalLM
21
+ import torch
22
+ from transformers import AutoTokenizer
23
+
24
+ prompt = "北京有什么好玩的地方?"
25
+ quant_path = "MiniCPM4.1-8B-AutoAWQ"
26
+
27
+ messages = [{"role": "user", "content": prompt}]
28
+
29
+ model = AutoAWQForCausalLM.from_quantized(
30
+ quant_path,
31
+ fuse_layers=False,
32
+ trust_remote_code=True
33
+ )
34
+ tokenizer = AutoTokenizer.from_pretrained(
35
+ quant_path,
36
+ trust_remote_code=True
37
+ )
38
+ device = next(model.model.parameters()).device
39
+
40
+ # if enable_think
41
+ # formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt = True, enable_thinking = True)
42
+ # if disable_think
43
+ formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt = True, enable_thinking = False)
44
+
45
+ input_ids = tokenizer.encode(formatted_prompt, return_tensors='pt').to(device)
46
+ outputs = model.generate(
47
+ input_ids,
48
+ max_new_tokens=1000,
49
+ do_sample=True
50
+ )
51
+ # if enable think
52
+ # ans = [i.split("<|im_start|> assistant\n", 1)[1].strip() for i in tokenizer.batch_decode(outputs)]
53
+ # if disable think
54
+ ans = [i.split("<|im_start|> assistant\n<think>\n\n</think>", 1)[1].strip() for i in tokenizer.batch_decode(outputs)]
55
+ ```
56
+
57
+ <p align="center">
58
+ <a href="https://github.com/OpenBMB/MiniCPM/" target="_blank">GitHub Repo</a> |
59
+ <a href="https://arxiv.org/abs/2506.07900" target="_blank">Technical Report</a> |
60
+ <a href="https://mp.weixin.qq.com/s/KIhH2nCURBXuFXAtYRpuXg?poc_token=HBIsUWijxino8oJ5s6HcjcfXFRi0Xj2LJlxPYD9c">Join Us</a>
61
+ </p>
62
+ <p align="center">
63
+ 👋 Contact us in <a href="https://discord.gg/3cGQn9b3YM" target="_blank">Discord</a> and <a href="https://github.com/OpenBMB/MiniCPM/blob/main/assets/wechat.jpg" target="_blank">WeChat</a>
64
+ </p>