INFclaudeChat / app.py
lahiruchamika27's picture
Update app.py
7322a9c verified
import gradio as gr
from huggingface_hub import InferenceClient
import os
# Get your Hugging Face token from environment variables
HF_Token = os.getenv("HF_TOKEN")
# Initialize the inference client with a coding specialized model HuggingFaceH4/zephyr-7b-beta
client = InferenceClient(
model="Qwen/Qwen2.5-Coder-1.5B-Instruct", # Using StarCoder2 which excels at code generation
token=HF_Token
)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
"""
Generate coding-focused responses using the selected model.
Args:
message (str): The current user input message
history (list): List of previous conversation turns
system_message (str): System prompt to guide the model's behavior
max_tokens (int): Maximum number of tokens to generate
temperature (float): Controls randomness in generation
top_p (float): Controls nucleus sampling
"""
# Format the conversation history into messages
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
# Stream the response tokens
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
# Create example inputs - needs to be formatted correctly for ChatInterface
example_prompts = [
"Write a Python function to find the longest palindromic substring",
"Create a React component that displays a color picker",
"How do I implement quicksort in JavaScript?",
"Explain the difference between Promise.all and Promise.allSettled in JavaScript",
"Generate a Python script to download and process CSV data from an API"
]
# Format examples properly for ChatInterface
examples = [[prompt] for prompt in example_prompts]
# Create the Gradio interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are an expert coding assistant. Provide detailed, correct, and efficient code solutions with explanations.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=2048,
value=1024,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.5,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.9,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
title="Coding Expert Assistant",
description="A specialized coding assistant powered by StarCoder2, a model trained on code repositories",
examples=examples
)
if __name__ == "__main__":
demo.launch(share=True)