Octagon / app.py
Georg4000's picture
Update app.py
f2ceb0a verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# ๐Ÿ”ฅ ุชุญุฏูŠุฏ ุงู„ุฌู‡ุงุฒ ุงู„ู…ู†ุงุณุจ
device = "cuda" if torch.cuda.is_available() else "cpu"
# ๐Ÿ”ฅ ุชุญู…ูŠู„ ุงู„ู…ูˆุฏูŠู„ ุจุทุฑูŠู‚ุฉ ู…ุญุณู†ุฉ
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None, # โœ… ุถุจุท ุงู„ู€ device_map ุจุดูƒู„ ุฃุฏู‚
trust_remote_code=True
).eval()
# ๐Ÿ”ฅ ุชุญู…ูŠู„ ุงู„ุชูˆูƒู†ูŠุฒุฑ
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
# ๐Ÿ”ฅ ุฅุนุฏุงุฏ ุงู„ุฏุงู„ุฉ ุงู„ุฎุงุตุฉ ุจุงู„ู€ Chatbot
def chatbot(user_input):
if not user_input.strip():
return "Please enter a message."
inputs = tokenizer(user_input, return_tensors="pt", padding=True).to(device)
with torch.no_grad():
output = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"], # โœ… ุฅุตู„ุงุญ ู…ุดูƒู„ุฉ ุงู„ู…ุงุณูƒ
max_new_tokens=50, # โœ… ุงุณุชุฎุฏุงู… max_new_tokens ุจุฏู„ุงู‹ ู…ู† max_length
temperature=0.6,
top_p=0.8,
do_sample=True,
early_stopping=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(output[0], skip_special_tokens=True)
return response
# ๐Ÿ”ฅ ุชุดุบูŠู„ ุงู„ูˆุงุฌู‡ุฉ ุจุฏูˆู† share=True
iface = gr.Interface(fn=chatbot, inputs="text", outputs="text", title="Octagon 2.0 Chatbot")
# โœ… ุฅุตู„ุงุญ ู…ุดูƒู„ุฉ POST method not allowed
iface.launch(ssl=False, debug=True)