Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# Load a small conversational model on CPU | |
MODEL_NAME = "microsoft/DialoGPT-small" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cpu") | |
def chat(message, history): | |
""" | |
Appends the user message to the conversation history, | |
generates a response from the model, and returns the updated history. | |
""" | |
if history is None: | |
history = [] | |
# Build the conversation prompt by joining previous turns. | |
prompt = "" | |
for speaker, utterance in history: | |
prompt += f"{speaker}: {utterance}\n" | |
prompt += f"User: {message}\nBot: " | |
# Encode the prompt and generate a response (limit max new tokens for CPU speed) | |
input_ids = tokenizer.encode(prompt + tokenizer.eos_token, return_tensors="pt") | |
output_ids = model.generate( | |
input_ids, | |
max_length=input_ids.shape[1] + 50, | |
pad_token_id=tokenizer.eos_token_id, | |
do_sample=True, | |
top_p=0.95, | |
top_k=50 | |
) | |
# Decode only the newly generated tokens. | |
response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True) | |
# Update history and return an empty message (to clear the input box) | |
history.append(("User", message)) | |
history.append(("Bot", response)) | |
return "", history | |
# Build the Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# CPU LLM Chat Demo\nThis is a simple chat interface using DialoGPT-small.") | |
chatbot = gr.Chatbot() | |
message_input = gr.Textbox(placeholder="Type your message here...", show_label=False) | |
state = gr.State([]) | |
message_input.submit(chat, [message_input, state], [message_input, chatbot]) | |
demo.launch() | |