Spaces:

laloadrianmorales
/

openai-oss-groq

Sleeping

App Files Files Community

laloadrianmorales commited on Aug 6

Commit

1e6d961

verified ·

1 Parent(s): 790159d

Update app.py

Browse files

Files changed (1) hide show

app.py +406 -48

app.py CHANGED Viewed

@@ -1,64 +1,422 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import os
+from typing import List, Tuple
+import json
+import time
+# Configure the model and provider
+MODEL_ID = "openai/gpt-oss-120b"
+DEFAULT_PROVIDER = "groq"  # Can be changed to fireworks, hyperbolic, etc.
+# System prompts for different modes
+SYSTEM_PROMPTS = {
+    "default": "You are a helpful AI assistant.",
+    "creative": "You are a creative and imaginative AI that thinks outside the box.",
+    "technical": "You are a technical expert AI that provides detailed, accurate technical information.",
+    "concise": "You are a concise AI that provides brief, to-the-point responses.",
+    "teacher": "You are a patient teacher who explains concepts clearly with examples.",
+    "coder": "You are an expert programmer who writes clean, efficient, well-commented code.",
+}
+# CSS for dark theme and custom styling
+custom_css = """
+#chatbot {
+    height: 600px !important;
+    background: #0a0a0a;
+}
+#chatbot .message {
+    font-size: 14px;
+    line-height: 1.6;
+}
+.dark {
+    background: #0a0a0a;
+}
+.user-message {
+    background: rgba(0, 255, 136, 0.1) !important;
+    border-left: 3px solid #00ff88;
+}
+.assistant-message {
+    background: rgba(0, 255, 255, 0.05) !important;
+    border-left: 3px solid #00ffff;
+}
+.footer {
+    text-align: center;
+    padding: 20px;
+    color: #666;
+}
+"""
+def format_message_history(history: List[Tuple[str, str]], system_prompt: str) -> List[dict]:
+    """Format chat history for the model"""
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
+    return messages
+def stream_response(message: str, history: List[Tuple[str, str]],
+                   system_prompt: str, temperature: float, max_tokens: int,
+                   top_p: float, provider: str):
+    """Generate streaming response from the model"""
+    # Format messages for the model
+    messages = format_message_history(history, system_prompt)
     messages.append({"role": "user", "content": message})
+    # Simulate streaming for demo (replace with actual API call)
+    # In production, you'd use the actual provider API here
+    demo_response = f"""I'm GPT-OSS-120B running on {provider}!
+I received your message: "{message}"
+With these settings:
+- Temperature: {temperature}
+- Max tokens: {max_tokens}
+- Top-p: {top_p}
+- System prompt: {system_prompt[:50]}...
+This is where the actual model response would appear. In production, this would connect to the {provider} API to generate real responses from the 120B parameter model.
+The model would analyze your input and provide a detailed, thoughtful response based on its massive 120 billion parameters of knowledge."""
+    # Simulate streaming effect
+    words = demo_response.split()
     response = ""
+    for i in range(0, len(words), 3):
+        chunk = " ".join(words[i:i+3])
+        response += chunk + " "
+        time.sleep(0.05)  # Simulate streaming delay
+        yield response.strip()
+def clear_chat():
+    """Clear the chat history"""
+    return None, []
+def undo_last(history):
+    """Remove the last message from history"""
+    if history:
+        return history[:-1]
+    return history
+def retry_last(message, history):
+    """Retry the last message"""
+    if history and history[-1][0]:
+        last_message = history[-1][0]
+        return last_message, history[:-1]
+    return message, history
+def load_example(example):
+    """Load an example prompt"""
+    return example
+# Create the Gradio interface
+with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="GPT-OSS-120B Chat") as demo:
+    # Header
+    gr.Markdown(
+        """
+        # 🧠 GPT-OSS-120B Mega Chat
+        ### 120 Billion Parameters of Pure Intelligence 🚀
+        Chat with OpenAI's massive GPT-OSS-120B model - one of the largest open-weight models available!
+        """
+    )
+    # Main chat interface
+    with gr.Row():
+        # Chat column
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(
+                label="Chat",
+                elem_id="chatbot",
+                bubble_full_width=False,
+                show_copy_button=True,
+                height=500,
+                type="tuples"
+            )
+            # Input area
+            with gr.Row():
+                msg = gr.Textbox(
+                    label="Message",
+                    placeholder="Ask anything... (Shift+Enter for new line, Enter to send)",
+                    lines=3,
+                    max_lines=10,
+                    scale=5,
+                    elem_classes="user-input"
+                )
+                with gr.Column(scale=1, min_width=80):
+                    send_btn = gr.Button("Send 📤", variant="primary", size="lg")
+                    stop_btn = gr.Button("Stop ⏹️", variant="stop", size="lg", visible=False)
+            # Action buttons
+            with gr.Row():
+                clear_btn = gr.Button("🗑️ Clear", size="sm")
+                undo_btn = gr.Button("↩️ Undo", size="sm")
+                retry_btn = gr.Button("🔄 Retry", size="sm")
+        # Settings column
+        with gr.Column(scale=1):
+            # Provider selection
+            with gr.Accordion("🔌 Inference Provider", open=True):
+                provider = gr.Dropdown(
+                    label="Provider",
+                    choices=["groq", "fireworks", "hyperbolic", "together", "anyscale"],
+                    value=DEFAULT_PROVIDER,
+                    info="Choose your inference provider"
+                )
+                login_btn = gr.Button("🔐 Sign in with HuggingFace", size="sm")
+            # Model settings
+            with gr.Accordion("⚙️ Model Settings", open=True):
+                system_mode = gr.Dropdown(
+                    label="System Mode",
+                    choices=list(SYSTEM_PROMPTS.keys()),
+                    value="default",
+                    info="Preset system prompts"
+                )
+                system_prompt = gr.Textbox(
+                    label="Custom System Prompt",
+                    value=SYSTEM_PROMPTS["default"],
+                    lines=3,
+                    info="Override with custom instructions"
+                )
+                temperature = gr.Slider(
+                    label="Temperature",
+                    minimum=0.0,
+                    maximum=2.0,
+                    value=0.7,
+                    step=0.05,
+                    info="Higher = more creative, Lower = more focused"
+                )
+                max_tokens = gr.Slider(
+                    label="Max Tokens",
+                    minimum=64,
+                    maximum=8192,
+                    value=2048,
+                    step=64,
+                    info="Maximum response length"
+                )
+                top_p = gr.Slider(
+                    label="Top-p (Nucleus Sampling)",
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.95,
+                    step=0.05,
+                    info="Controls response diversity"
+                )
+                with gr.Row():
+                    seed = gr.Number(
+                        label="Seed",
+                        value=-1,
+                        info="Set for reproducible outputs (-1 for random)"
+                    )
+            # Advanced settings
+            with gr.Accordion("🔬 Advanced", open=False):
+                stream_output = gr.Checkbox(
+                    label="Stream Output",
+                    value=True,
+                    info="Show response as it's generated"
+                )
+                show_reasoning = gr.Checkbox(
+                    label="Show Reasoning Process",
+                    value=False,
+                    info="Display chain-of-thought if available"
+                )
+                reasoning_lang = gr.Dropdown(
+                    label="Reasoning Language",
+                    choices=["English", "Spanish", "French", "German", "Chinese", "Japanese"],
+                    value="English",
+                    info="Language for reasoning process"
+                )
+            # Model info
+            with gr.Accordion("📊 Model Info", open=False):
+                gr.Markdown(
+                    """
+                    **Model**: openai/gpt-oss-120b
+                    - **Parameters**: 120 Billion
+                    - **Architecture**: Transformer + MoE
+                    - **Context**: 128K tokens
+                    - **Training**: Multi-lingual, code, reasoning
+                    - **License**: Open weight
+                    **Capabilities**:
+                    - Complex reasoning
+                    - Code generation
+                    - Creative writing
+                    - Technical analysis
+                    - Multi-lingual support
+                    - Function calling
+                    """
+                )
+    # Examples section
+    with gr.Accordion("💡 Example Prompts", open=True):
+        examples = gr.Examples(
+            examples=[
+                "Explain quantum computing to a 10-year-old",
+                "Write a Python function to detect palindromes with O(1) space complexity",
+                "What are the implications of AGI for society?",
+                "Create a detailed business plan for a sustainable energy startup",
+                "Translate 'Hello, how are you?' to 10 different languages",
+                "Debug this code: `def fib(n): return fib(n-1) + fib(n-2)`",
+                "Write a haiku about machine learning",
+                "Compare and contrast transformers vs RNNs for NLP tasks",
+            ],
+            inputs=msg,
+            label="Click to load an example"
+        )
+    # Stats and info
+    with gr.Row():
+        with gr.Column():
+            token_count = gr.Textbox(
+                label="Token Count",
+                value="0 tokens",
+                interactive=False,
+                scale=1
+            )
+        with gr.Column():
+            response_time = gr.Textbox(
+                label="Response Time",
+                value="0.0s",
+                interactive=False,
+                scale=1
+            )
+        with gr.Column():
+            model_status = gr.Textbox(
+                label="Status",
+                value="🟢 Ready",
+                interactive=False,
+                scale=1
+            )
+    # Event handlers
+    def update_system_prompt(mode):
+        return SYSTEM_PROMPTS.get(mode, SYSTEM_PROMPTS["default"])
+    def user_submit(message, history):
+        if not message.strip():
+            return "", history
+        return "", history + [(message, None)]
+    def bot_respond(history, system_prompt, temperature, max_tokens, top_p, provider):
+        if not history or history[-1][1] is not None:
+            return history
+        message = history[-1][0]
+        # Generate response (streaming)
+        bot_message = ""
+        for chunk in stream_response(
+            message,
+            history[:-1],
+            system_prompt,
+            temperature,
+            max_tokens,
+            top_p,
+            provider
+        ):
+            bot_message = chunk
+            history[-1] = (message, bot_message)
+            yield history
+    # Connect event handlers
+    system_mode.change(
+        update_system_prompt,
+        inputs=[system_mode],
+        outputs=[system_prompt]
+    )
+    # Message submission
+    msg.submit(
+        user_submit,
+        [msg, chatbot],
+        [msg, chatbot],
+        queue=False
+    ).then(
+        bot_respond,
+        [chatbot, system_prompt, temperature, max_tokens, top_p, provider],
+        chatbot
+    )
+    send_btn.click(
+        user_submit,
+        [msg, chatbot],
+        [msg, chatbot],
+        queue=False
+    ).then(
+        bot_respond,
+        [chatbot, system_prompt, temperature, max_tokens, top_p, provider],
+        chatbot
+    )
+    # Action buttons
+    clear_btn.click(
+        lambda: (None, ""),
+        outputs=[chatbot, msg],
+        queue=False
+    )
+    undo_btn.click(
+        undo_last,
+        inputs=[chatbot],
+        outputs=[chatbot],
+        queue=False
+    )
+    retry_btn.click(
+        retry_last,
+        inputs=[msg, chatbot],
+        outputs=[msg, chatbot],
+        queue=False
+    ).then(
+        bot_respond,
+        [chatbot, system_prompt, temperature, max_tokens, top_p, provider],
+        chatbot
+    )
+    # Login button
+    login_btn.click(
+        lambda: gr.Info("Please implement HuggingFace OAuth login"),
+        queue=False
+    )
+    # Footer
+    gr.Markdown(
+        """
+        <div class='footer'>
+        <p>Built with 🔥 for the GPT-OSS-120B community | Model: openai/gpt-oss-120b</p>
+        <p>Remember: This is a 120 billion parameter model - expect incredible responses!</p>
+        </div>
+        """,
+        elem_classes="footer"
+    )
+# Launch configuration
 if __name__ == "__main__":
+    demo.queue(max_size=50, default_concurrency_limit=10)
+    demo.launch(
+        server_name="0.0.0.0",
+        share=False,
+        show_error=True,
+        server_port=7860,
+        favicon_path=None