import gradio as gr from huggingface_hub import InferenceClient import os # Get your Hugging Face token from environment variables HF_Token = os.getenv("HF_TOKEN") # Initialize the inference client with a coding specialized model HuggingFaceH4/zephyr-7b-beta client = InferenceClient( model="Qwen/Qwen2.5-Coder-1.5B-Instruct", # Using StarCoder2 which excels at code generation token=HF_Token ) def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): """ Generate coding-focused responses using the selected model. Args: message (str): The current user input message history (list): List of previous conversation turns system_message (str): System prompt to guide the model's behavior max_tokens (int): Maximum number of tokens to generate temperature (float): Controls randomness in generation top_p (float): Controls nucleus sampling """ # Format the conversation history into messages messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" # Stream the response tokens for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token yield response # Create example inputs - needs to be formatted correctly for ChatInterface example_prompts = [ "Write a Python function to find the longest palindromic substring", "Create a React component that displays a color picker", "How do I implement quicksort in JavaScript?", "Explain the difference between Promise.all and Promise.allSettled in JavaScript", "Generate a Python script to download and process CSV data from an API" ] # Format examples properly for ChatInterface examples = [[prompt] for prompt in example_prompts] # Create the Gradio interface demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox( value="You are an expert coding assistant. Provide detailed, correct, and efficient code solutions with explanations.", label="System message" ), gr.Slider( minimum=1, maximum=2048, value=1024, step=1, label="Max new tokens" ), gr.Slider( minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature" ), gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling)" ), ], title="Coding Expert Assistant", description="A specialized coding assistant powered by StarCoder2, a model trained on code repositories", examples=examples ) if __name__ == "__main__": demo.launch(share=True)