Spaces:
Runtime error
Runtime error
import streamlit as st | |
from huggingface_hub import InferenceClient | |
# Initialize the Hugging Face Inference Client | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
# Streamlit app title | |
st.title("Chat with Zephyr-7b-beta") | |
# Initialize chat history in session state | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history = [] | |
# Sidebar for system message and parameters | |
with st.sidebar: | |
st.header("Settings") | |
system_message = st.text_area( | |
"System Message", | |
value="You are a helpful AI assistant.", | |
help="Define the system-level behavior of the chatbot.", | |
) | |
max_tokens = st.slider( | |
"Max Tokens", | |
min_value=1, | |
max_value=500, | |
value=200, | |
help="Maximum number of tokens to generate.", | |
) | |
temperature = st.slider( | |
"Temperature", | |
min_value=0.1, | |
max_value=2.0, | |
value=1.0, | |
help="Controls randomness in the model's responses.", | |
) | |
top_p = st.slider( | |
"Top-p (Nucleus Sampling)", | |
min_value=0.1, | |
max_value=1.0, | |
value=0.9, | |
help="Controls diversity of the model's responses.", | |
) | |
# Display chat history | |
for user_message, bot_response in st.session_state.chat_history: | |
with st.chat_message("user"): | |
st.write(user_message) | |
with st.chat_message("assistant"): | |
st.write(bot_response) | |
# Function to generate chatbot response | |
def respond(message, system_message, max_tokens, temperature, top_p): | |
messages = [{"role": "system", "content": system_message}] | |
# Add chat history to messages | |
for user_msg, bot_msg in st.session_state.chat_history: | |
if user_msg: | |
messages.append({"role": "user", "content": user_msg}) | |
if bot_msg: | |
messages.append({"role": "assistant", "content": bot_msg}) | |
# Add the current user message | |
messages.append({"role": "user", "content": message}) | |
# Stream the response from the model | |
response = "" | |
for message in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message.choices[0].delta.content | |
response += token | |
yield response | |
# User input | |
user_input = st.chat_input("Type your message here...") | |
if user_input: | |
# Display user message | |
with st.chat_message("user"): | |
st.write(user_input) | |
# Generate and display bot response | |
with st.chat_message("assistant"): | |
bot_response = st.write_stream( | |
respond(user_input, system_message, max_tokens, temperature, top_p) | |
) | |
# Update chat history | |
st.session_state.chat_history.append((user_input, bot_response)) |