|
import gradio as gr |
|
from optimum.intel import OVModelForCausalLM |
|
from transformers import AutoTokenizer, pipeline |
|
|
|
|
|
model_id = "hsuwill000/Qwen2.5-1.5B-Instruct-openvino-8bit" |
|
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
|
|
|
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
|
|
def respond(message, history): |
|
try: |
|
|
|
input_text = message |
|
if history: |
|
input_text = "\n".join([f"User: {h[0]}\nBot: {h[1]}" for h in history]) + f"\nUser: {message}" |
|
|
|
|
|
response = pipe( |
|
input_text, |
|
max_length=512, |
|
truncation=True, |
|
num_return_sequences=1, |
|
temperature=0.7, |
|
top_p=0.9, |
|
) |
|
reply = response[0]['generated_text'].strip() |
|
|
|
|
|
history.append((message, reply)) |
|
return history |
|
|
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return history + [(message, "Sorry, something went wrong. Please try again.")] |
|
|
|
|
|
def clear_history(): |
|
return [] |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Qwen2.5-1.5B-Instruct-openvino Chat") |
|
gr.Markdown("Chat with Qwen2.5-1.5B-Instruct-openvino model.") |
|
|
|
chatbot = gr.Chatbot() |
|
msg = gr.Textbox(label="Your Message") |
|
clear_btn = gr.Button("Clear History") |
|
|
|
msg.submit(respond, [msg, chatbot], chatbot) |
|
clear_btn.click(clear_history, None, chatbot, queue=False) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |