Update app.py
Browse files
app.py
CHANGED
@@ -207,7 +207,7 @@ def quantize(model_path, repo_id, quant_method=None):
|
|
207 |
|
208 |
return final_path
|
209 |
|
210 |
-
def create_readme(repo_name, base_model_name, lora_model_name, quant_methods):
|
211 |
readme_path = os.path.join("output", repo_name, "README.md")
|
212 |
readme_template = """---
|
213 |
tags:
|
@@ -235,13 +235,42 @@ datasets:
|
|
235 |
- quant_methods: {quant_methods}
|
236 |
- created_at: {created_at}
|
237 |
- created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
""".format(
|
239 |
quantization="\n- quantization" if len(quant_methods) > 0 else "",
|
240 |
base_model_name=base_model_name,
|
241 |
lora_model_name=lora_model_name,
|
242 |
repo_name=repo_name,
|
243 |
quant_methods=quant_methods,
|
244 |
-
created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
|
|
|
245 |
)
|
246 |
|
247 |
with open(readme_path, "w") as f:
|
@@ -284,7 +313,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
|
|
284 |
for quant_method in quant_methods:
|
285 |
quantize(output_dir, repo_name, quant_method=quant_method)
|
286 |
|
287 |
-
create_readme(repo_name, base_model_name, lora_model_name, quant_methods)
|
288 |
|
289 |
# 上传合并后的模型和量化模型
|
290 |
api.upload_large_folder(
|
|
|
207 |
|
208 |
return final_path
|
209 |
|
210 |
+
def create_readme(repo_name, base_model_name, lora_model_name, quant_methods, username):
|
211 |
readme_path = os.path.join("output", repo_name, "README.md")
|
212 |
readme_template = """---
|
213 |
tags:
|
|
|
235 |
- quant_methods: {quant_methods}
|
236 |
- created_at: {created_at}
|
237 |
- created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
|
238 |
+
|
239 |
+
## Usage:
|
240 |
+
```python
|
241 |
+
|
242 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
243 |
+
|
244 |
+
model_path = "{username}/{repo_name}"
|
245 |
+
|
246 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
247 |
+
model = AutoModelForCausalLM.from_pretrained(
|
248 |
+
model_path,
|
249 |
+
device_map="auto",
|
250 |
+
torch_dtype='auto'
|
251 |
+
).eval()
|
252 |
+
|
253 |
+
# Prompt content: "hi"
|
254 |
+
messages = [
|
255 |
+
{"role": "user", "content": "hi"}
|
256 |
+
]
|
257 |
+
|
258 |
+
input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
|
259 |
+
output_ids = model.generate(input_ids.to('cuda'))
|
260 |
+
response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
|
261 |
+
|
262 |
+
# Model response: "Hello! How can I assist you today?"
|
263 |
+
print(response)
|
264 |
+
```
|
265 |
""".format(
|
266 |
quantization="\n- quantization" if len(quant_methods) > 0 else "",
|
267 |
base_model_name=base_model_name,
|
268 |
lora_model_name=lora_model_name,
|
269 |
repo_name=repo_name,
|
270 |
quant_methods=quant_methods,
|
271 |
+
created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
|
272 |
+
username=username,
|
273 |
+
repo_name=repo_name
|
274 |
)
|
275 |
|
276 |
with open(readme_path, "w") as f:
|
|
|
313 |
for quant_method in quant_methods:
|
314 |
quantize(output_dir, repo_name, quant_method=quant_method)
|
315 |
|
316 |
+
create_readme(repo_name, base_model_name, lora_model_name, quant_methods, username)
|
317 |
|
318 |
# 上传合并后的模型和量化模型
|
319 |
api.upload_large_folder(
|