INFclaudeChat / app.py
lahiruchamika27's picture
Update app.py
68c780e verified
raw
history blame
2.6 kB
import gradio as gr
from huggingface_hub import InferenceClient
import os
HF_Token = os.getenv("HF_Token")
# Initialize the inference client with a publicly available chat model
client = InferenceClient(
model="meta-llama/Llama-2-7b-chat-hf", # Using LLaMA 2 chat model
token=HF_Token # Add your HF token if you have access to LLaMA 2
)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
"""
Generate responses for the chatbot using the LLaMA 2 chat model.
Args:
message (str): The current user input message
history (list): List of previous conversation turns
system_message (str): System prompt to guide the model's behavior
max_tokens (int): Maximum number of tokens to generate
temperature (float): Controls randomness in generation
top_p (float): Controls nucleus sampling
"""
# Format the conversation history into messages
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
# Stream the response tokens
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
# Create the Gradio interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a helpful and friendly AI assistant. Provide informative and accurate responses.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
title="LLaMA 2 Chatbot",
description="A conversational AI powered by Meta's LLaMA 2 model"
)
if __name__ == "__main__":
demo.launch(share=True) # Added share=True to create a public link