import gradio as gr
from huggingface_hub import InferenceClient
import os

# Get your Hugging Face token from environment variables
HF_Token = os.getenv("HF_TOKEN")

# Initialize the inference client with a coding specialized model HuggingFaceH4/zephyr-7b-beta
client = InferenceClient(
    model="Qwen/Qwen2.5-Coder-1.5B-Instruct",  # Using StarCoder2 which excels at code generation 
    token=HF_Token
)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    """
    Generate coding-focused responses using the selected model.
    
    Args:
        message (str): The current user input message
        history (list): List of previous conversation turns
        system_message (str): System prompt to guide the model's behavior
        max_tokens (int): Maximum number of tokens to generate
        temperature (float): Controls randomness in generation
        top_p (float): Controls nucleus sampling
    """
    # Format the conversation history into messages
    messages = [{"role": "system", "content": system_message}]
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    messages.append({"role": "user", "content": message})
    
    response = ""
    # Stream the response tokens
    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content
        response += token
        yield response

# Create example inputs - needs to be formatted correctly for ChatInterface
example_prompts = [
    "Write a Python function to find the longest palindromic substring",
    "Create a React component that displays a color picker",
    "How do I implement quicksort in JavaScript?",
    "Explain the difference between Promise.all and Promise.allSettled in JavaScript",
    "Generate a Python script to download and process CSV data from an API"
]

# Format examples properly for ChatInterface
examples = [[prompt] for prompt in example_prompts]

# Create the Gradio interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
            value="You are an expert coding assistant. Provide detailed, correct, and efficient code solutions with explanations.",
            label="System message"
        ),
        gr.Slider(
            minimum=1,
            maximum=2048,
            value=1024,
            step=1,
            label="Max new tokens"
        ),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.5,
            step=0.1,
            label="Temperature"
        ),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.9,
            step=0.05,
            label="Top-p (nucleus sampling)"
        ),
    ],
    title="Coding Expert Assistant",
    description="A specialized coding assistant powered by StarCoder2, a model trained on code repositories",
    examples=examples
)

if __name__ == "__main__":
    demo.launch(share=True)