Spaces:
Running
Running
File size: 3,248 Bytes
16e2350 68c780e 7efecbe 7322a9c 0224e28 7322a9c 7efecbe 0224e28 16e2350 8d7f6ed 7efecbe 8d7f6ed 16e2350 7efecbe 16e2350 8d7f6ed 16e2350 e0dfe14 8d7f6ed 16e2350 8d7f6ed 7efecbe 8d7f6ed 7efecbe 8d7f6ed 7efecbe 8d7f6ed 16e2350 7efecbe 16e2350 8d7f6ed 16e2350 7efecbe e0dfe14 16e2350 7efecbe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
# Get your Hugging Face token from environment variables
HF_Token = os.getenv("HF_TOKEN")
# Initialize the inference client with a coding specialized model HuggingFaceH4/zephyr-7b-beta
client = InferenceClient(
model="Qwen/Qwen2.5-Coder-1.5B-Instruct", # Using StarCoder2 which excels at code generation
token=HF_Token
)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
"""
Generate coding-focused responses using the selected model.
Args:
message (str): The current user input message
history (list): List of previous conversation turns
system_message (str): System prompt to guide the model's behavior
max_tokens (int): Maximum number of tokens to generate
temperature (float): Controls randomness in generation
top_p (float): Controls nucleus sampling
"""
# Format the conversation history into messages
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
# Stream the response tokens
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
# Create example inputs - needs to be formatted correctly for ChatInterface
example_prompts = [
"Write a Python function to find the longest palindromic substring",
"Create a React component that displays a color picker",
"How do I implement quicksort in JavaScript?",
"Explain the difference between Promise.all and Promise.allSettled in JavaScript",
"Generate a Python script to download and process CSV data from an API"
]
# Format examples properly for ChatInterface
examples = [[prompt] for prompt in example_prompts]
# Create the Gradio interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are an expert coding assistant. Provide detailed, correct, and efficient code solutions with explanations.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=2048,
value=1024,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.5,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.9,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
title="Coding Expert Assistant",
description="A specialized coding assistant powered by StarCoder2, a model trained on code repositories",
examples=examples
)
if __name__ == "__main__":
demo.launch(share=True) |