Spaces:
Running
Running
File size: 3,842 Bytes
fa6db27 dce0906 fa6db27 4b2ab9e dce0906 fa6db27 49d0c72 fa6db27 17f1324 fa6db27 17f1324 fa6db27 17f1324 fa6db27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import os
from huggingface_hub import InferenceClient
import gradio as gr
# Load Hugging Face token from .env
hf_token = os.getenv("HF_TOKEN")
# Define available models
models = {
"Llama-3.3-70B-Instruct": "meta-llama/llama-3.3-70B-instruct",
"QwQ-32B-Preview":"Qwen/QwQ-32B-Preview",
"Qwen2.5-Coder-32B-Instruct": "qwen/qwen2.5-coder-32B-instruct",
"Mistral-Nemo-Instruct-2407": "mistralai/Mistral-Nemo-Instruct-2407",
"microsoft/phi-4": "microsoft/phi-4",
"Hermes-3-Llama-3.2-3B":"NousResearch/Hermes-3-Llama-3.2-3B",
"Phi-3-mini-4k-instruct": "microsoft/phi-3-mini-4k-instruct",
}
# Initialize the InferenceClient with a selected model
def get_inference_client(selected_model):
return InferenceClient(
models[selected_model],
token=hf_token,
)
# Function to get a response from the chatbot
def get_response(user_input, history, selected_model, system_prompt, temperature, max_tokens, top_p):
client = get_inference_client(selected_model)
# messaages
messages = []
# Add system message, if not empty
if (len(system_prompt)) > 0:
messages = [{"role": "system", "content": system_prompt}]
# Include previous conversation history
for h in history:
messages.append({"role": h['role'], "content": h['content']})
# Add the current user input to the messages
messages.append({"role": "user", "content": user_input})
# Get response from the model
response = client.chat_completion(
messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
bot_response = response.choices[0].message.content
history.append({"role": "user", "content": user_input})
history.append({"role": "assistant", "content": bot_response})
return history
# Gradio interface
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=2):
# Set the type to 'messages' to avoid the deprecation warning
chatbot = gr.Chatbot(type="messages")
with gr.Row():
user_input = gr.Textbox(show_label=False, placeholder="Enter your message...")
send_button = gr.Button("Send")
with gr.Column(scale=1):
with gr.Accordion("Settings", open=False):
# Model selection
selected_model = gr.Dropdown(choices=list(models.keys()), label="Select Model", value="Llama-3.3-70B-Instruct")
# Chat settings
system_prompt = gr.Textbox(value="You are a friendly and open-minded chatbot.", label="System Prompt (Optional)", lines=5)
temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.7, label="Temperature")
max_tokens = gr.Slider(minimum=10, maximum=8192, step=10, value=250, label="Max Tokens")
top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.9, label="Top-p")
# Chatbot interaction
def submit_message(user_input, history, selected_model, system_prompt, temperature, max_tokens, top_p):
# Get updated history including user input and bot response
history = get_response(user_input, history, selected_model, system_prompt, temperature, max_tokens, top_p)
return "", history
# Set up the send button click functionality
send_button.click(
submit_message,
[user_input, chatbot, selected_model, system_prompt, temperature, max_tokens, top_p],
[user_input, chatbot]
)
# Trigger sending message when Enter key is pressed
user_input.submit(
submit_message,
[user_input, chatbot, selected_model, system_prompt, temperature, max_tokens, top_p],
[user_input, chatbot]
)
# Launch the Gradio interface
demo.launch()
|