DialoGPT-small / app.py
ayyuce's picture
Update app.py
4b66ba0 verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load a small conversational model on CPU
MODEL_NAME = "microsoft/DialoGPT-small"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cpu")
def chat(message, history):
"""
Appends the user message to the conversation history,
generates a response from the model, and returns the updated history.
"""
if history is None:
history = []
# Build the conversation prompt by joining previous turns.
prompt = ""
for speaker, utterance in history:
prompt += f"{speaker}: {utterance}\n"
prompt += f"User: {message}\nBot: "
# Encode the prompt and generate a response (limit max new tokens for CPU speed)
input_ids = tokenizer.encode(prompt + tokenizer.eos_token, return_tensors="pt")
output_ids = model.generate(
input_ids,
max_length=input_ids.shape[1] + 50,
pad_token_id=tokenizer.eos_token_id,
do_sample=True,
top_p=0.95,
top_k=50
)
# Decode only the newly generated tokens.
response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
# Update history and return an empty message (to clear the input box)
history.append(("User", message))
history.append(("Bot", response))
return "", history
# Build the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# CPU LLM Chat Demo\nThis is a simple chat interface using DialoGPT-small.")
chatbot = gr.Chatbot()
message_input = gr.Textbox(placeholder="Type your message here...", show_label=False)
state = gr.State([])
message_input.submit(chat, [message_input, state], [message_input, chatbot])
demo.launch()