Spaces:
Runtime error
Runtime error
File size: 2,756 Bytes
821c3f6 40ed76b 821c3f6 40ed76b 821c3f6 40ed76b cd5c379 821c3f6 40ed76b 821c3f6 40ed76b 821c3f6 40ed76b 821c3f6 274173a 821c3f6 1516420 821c3f6 1516420 821c3f6 274173a 821c3f6 274173a 821c3f6 40ed76b 821c3f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import streamlit as st
from huggingface_hub import InferenceClient
# Initialize the Hugging Face Inference Client
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
# Streamlit app title
st.title("Chat with Zephyr-7b-beta")
# Initialize chat history in session state
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Sidebar for system message and parameters
with st.sidebar:
st.header("Settings")
system_message = st.text_area(
"System Message",
value="You are a helpful AI assistant.",
help="Define the system-level behavior of the chatbot.",
)
max_tokens = st.slider(
"Max Tokens",
min_value=1,
max_value=500,
value=200,
help="Maximum number of tokens to generate.",
)
temperature = st.slider(
"Temperature",
min_value=0.1,
max_value=2.0,
value=1.0,
help="Controls randomness in the model's responses.",
)
top_p = st.slider(
"Top-p (Nucleus Sampling)",
min_value=0.1,
max_value=1.0,
value=0.9,
help="Controls diversity of the model's responses.",
)
# Display chat history
for user_message, bot_response in st.session_state.chat_history:
with st.chat_message("user"):
st.write(user_message)
with st.chat_message("assistant"):
st.write(bot_response)
# Function to generate chatbot response
def respond(message, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}]
# Add chat history to messages
for user_msg, bot_msg in st.session_state.chat_history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
# Add the current user message
messages.append({"role": "user", "content": message})
# Stream the response from the model
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
# User input
user_input = st.chat_input("Type your message here...")
if user_input:
# Display user message
with st.chat_message("user"):
st.write(user_input)
# Generate and display bot response
with st.chat_message("assistant"):
bot_response = st.write_stream(
respond(user_input, system_message, max_tokens, temperature, top_p)
)
# Update chat history
st.session_state.chat_history.append((user_input, bot_response)) |