import gradio as gr import spaces import torch from transformers import AutoTokenizer, AutoModelForCausalLM # Load model and tokenizer model_name = "HuggingFaceTB/SmolLM3-3B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto") @spaces.GPU def chat_with_smollm3(message, history, system_prompt="", enable_thinking=True, temperature=0.6, top_p=0.95, max_tokens=32768): """ Chat with SmolLM3-3B model with full feature support """ # Prepare messages messages = [] # Add system prompt if provided if system_prompt.strip(): # Handle thinking mode flags in system prompt if enable_thinking and "/no_think" not in system_prompt: if "/think" not in system_prompt: system_prompt += "/think" elif not enable_thinking and "/think" not in system_prompt: if "/no_think" not in system_prompt: system_prompt += "/no_think" messages.append({"role": "system", "content": system_prompt}) else: # Use enable_thinking parameter if no system prompt if not enable_thinking: messages.append({"role": "system", "content": "/no_think"}) # Add conversation history for human_msg, assistant_msg in history: messages.append({"role": "user", "content": human_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Add current message messages.append({"role": "user", "content": message}) # Apply chat template text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, enable_thinking=enable_thinking if not system_prompt.strip() else None ) # Tokenize input model_inputs = tokenizer([text], return_tensors="pt").to(model.device) # Generate response with torch.no_grad(): generated_ids = model.generate( **model_inputs, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True, pad_token_id=tokenizer.eos_token_id ) # Decode response output_ids = generated_ids[0][len(model_inputs.input_ids[0]):] response = tokenizer.decode(output_ids, skip_special_tokens=True) return response @spaces.GPU def chat_with_tools(message, history, tools_json="", system_prompt="", enable_thinking=False, temperature=0.6, top_p=0.95, max_tokens=32768): """ Chat with SmolLM3-3B using tool calling capabilities """ # Parse tools if provided tools = [] if tools_json.strip(): try: import json tools = json.loads(tools_json) except: return "Error: Invalid JSON format for tools" # Prepare messages messages = [] # Add system prompt if provided if system_prompt.strip(): messages.append({"role": "system", "content": system_prompt}) # Add conversation history for human_msg, assistant_msg in history: messages.append({"role": "user", "content": human_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Add current message messages.append({"role": "user", "content": message}) # Apply chat template with tools text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, enable_thinking=enable_thinking, xml_tools=tools if tools else None ) # Tokenize input model_inputs = tokenizer([text], return_tensors="pt").to(model.device) # Generate response with torch.no_grad(): generated_ids = model.generate( **model_inputs, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True, pad_token_id=tokenizer.eos_token_id ) # Decode response output_ids = generated_ids[0][len(model_inputs.input_ids[0]):] response = tokenizer.decode(output_ids, skip_special_tokens=True) return response # Example tools for demonstration example_tools = """[ { "name": "get_weather", "description": "Get the weather in a city", "parameters": { "type": "object", "properties": { "city": { "type": "string", "description": "The city to get the weather for" } } } }, { "name": "calculate", "description": "Perform basic mathematical calculations", "parameters": { "type": "object", "properties": { "expression": { "type": "string", "description": "Mathematical expression to evaluate" } } } } ]""" # Create Gradio interface with dark theme and mobile support with gr.Blocks( title="SmolLM3-3B Chat", theme=gr.themes.Base().set( background_fill_primary="#1a1a1a", background_fill_secondary="#2d2d2d", border_color_primary="#404040", button_primary_background_fill="#4a9eff", button_primary_background_fill_hover="#5aa3ff", button_primary_text_color="#ffffff", block_background_fill="#2d2d2d", block_border_color="#404040", input_background_fill="#3a3a3a", input_border_color="#404040", slider_color="#4a9eff", ), css=""" /* Mobile-first responsive design */ @media (max-width: 768px) { .gradio-container { padding: 8px !important; } .gr-row { flex-direction: column !important; } .gr-column { width: 100% !important; min-width: 0 !important; } .gr-tabs { font-size: 14px !important; } .gr-button { width: 100% !important; margin: 2px 0 !important; } .gr-textbox { font-size: 16px !important; } .gr-chatbot { height: 400px !important; } .gr-markdown { font-size: 14px !important; } .gr-slider { width: 100% !important; } .settings-panel { margin-top: 20px !important; } } /* Settings panel styling */ .settings-panel { background-color: #2d2d2d !important; border: 1px solid #404040 !important; border-radius: 8px !important; padding: 16px !important; margin-top: 12px !important; } .settings-button { background-color: #3a3a3a !important; border: 1px solid #404040 !important; color: #ffffff !important; padding: 8px 16px !important; border-radius: 6px !important; cursor: pointer !important; font-size: 14px !important; margin-bottom: 8px !important; } .settings-button:hover { background-color: #4a4a4a !important; } /* Dark mode improvements */ .gr-chatbot { background-color: #2d2d2d !important; } .gr-chatbot .message { background-color: #3a3a3a !important; border: 1px solid #404040 !important; border-radius: 8px !important; margin: 4px 0 !important; padding: 8px !important; } .gr-chatbot .message.user { background-color: #4a9eff !important; color: white !important; } .gr-chatbot .message.bot { background-color: #3a3a3a !important; color: #ffffff !important; } /* Better mobile touch targets */ @media (max-width: 768px) { .gr-button { min-height: 44px !important; padding: 12px !important; } .gr-slider input { min-height: 44px !important; } .gr-checkbox { min-height: 44px !important; } } /* Improve readability */ .gr-markdown h1, .gr-markdown h2, .gr-markdown h3 { color: #ffffff !important; } .gr-markdown p, .gr-markdown li { color: #e0e0e0 !important; } /* Tab styling */ .gr-tabs .gr-tab { background-color: #3a3a3a !important; color: #ffffff !important; border-color: #404040 !important; } .gr-tabs .gr-tab.selected { background-color: #4a9eff !important; color: #ffffff !important; } """ ) as demo: gr.Markdown("# 🤖 SmolLM3-3B Chat Interface") gr.Markdown("Chat with SmolLM3-3B, a 3B parameter model with advanced reasoning, long context support, and tool calling capabilities.") with gr.Tabs(): with gr.TabItem("💬 Standard Chat"): chatbot = gr.Chatbot(height=500, label="Chat with SmolLM3-3B") msg = gr.Textbox(label="Your message", placeholder="Type your message here...") with gr.Row(): submit = gr.Button("Send", variant="primary") clear = gr.Button("Clear") settings_btn = gr.Button("⚙️ Settings", size="sm") with gr.Column(visible=False, elem_classes="settings-panel") as settings_panel: gr.Markdown("### ⚙️ Advanced Settings") system_prompt = gr.Textbox( label="System Prompt", placeholder="Enter system instructions (optional)", lines=3, value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest." ) enable_thinking = gr.Checkbox( label="Enable Extended Thinking", value=True, info="Enable reasoning traces for better responses" ) temperature = gr.Slider( minimum=0.0, maximum=2.0, value=0.6, step=0.1, label="Temperature" ) top_p = gr.Slider( minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top-p" ) max_tokens = gr.Slider( minimum=1, maximum=32768, value=32768, step=1, label="Max Tokens" ) def respond(message, history, sys_prompt, thinking, temp, top_p_val, max_tok): response = chat_with_smollm3(message, history, sys_prompt, thinking, temp, top_p_val, max_tok) history.append((message, response)) return "", history def toggle_settings(): return gr.update(visible=not settings_panel.visible) submit.click(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot]) msg.submit(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot]) clear.click(lambda: ([], ""), outputs=[chatbot, msg]) settings_btn.click(toggle_settings, outputs=[settings_panel]) with gr.TabItem("🛠️ Tool Calling"): tool_chatbot = gr.Chatbot(height=500, label="Chat with Tools") tool_msg = gr.Textbox(label="Your message", placeholder="Ask me to use tools...") with gr.Row(): tool_submit = gr.Button("Send", variant="primary") tool_clear = gr.Button("Clear") tool_settings_btn = gr.Button("⚙️ Settings", size="sm") with gr.Column(visible=False, elem_classes="settings-panel") as tool_settings_panel: gr.Markdown("### 🛠️ Tool Settings") tools_json = gr.Textbox( label="Tools JSON", placeholder="Enter tools as JSON array", lines=10, value=example_tools ) tool_system_prompt = gr.Textbox( label="System Prompt", placeholder="Enter system instructions (optional)", lines=2, value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest." ) tool_thinking = gr.Checkbox( label="Enable Extended Thinking", value=False, info="Enable reasoning traces for tool usage" ) tool_temperature = gr.Slider( minimum=0.0, maximum=2.0, value=0.6, step=0.1, label="Temperature" ) tool_top_p = gr.Slider( minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top-p" ) tool_max_tokens = gr.Slider( minimum=1, maximum=32768, value=32768, step=1, label="Max Tokens" ) def tool_respond(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok): response = chat_with_tools(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok) history.append((message, response)) return "", history def toggle_tool_settings(): return gr.update(visible=not tool_settings_panel.visible) tool_submit.click(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot]) tool_msg.submit(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot]) tool_clear.click(lambda: ([], ""), outputs=[tool_chatbot, tool_msg]) tool_settings_btn.click(toggle_tool_settings, outputs=[tool_settings_panel]) gr.Markdown(""" ### 📚 Model Information - **Model**: HuggingFaceTB/SmolLM3-3B - **Features**: Advanced reasoning, long context (up to 128k tokens), multilingual support - **Languages**: English, French, Spanish, German, Italian, Portuguese (+ Arabic, Chinese, Russian) - **Extended Thinking**: Provides reasoning traces for better responses - **Tool Calling**: Supports XML-based tool calling for agentic workflows ### 💡 Usage Tips - Use Extended Thinking for complex reasoning tasks - Adjust temperature (0.6 recommended) for response creativity - Try different system prompts for specialized behaviors - Use tool calling for function-based interactions """) if __name__ == "__main__": demo.launch()