nvidia-Llama-3_1-Nemotron-Ultra-253B-v1-demo

Running

App Files Files Community

saneowl commited on Apr 13

Commit

280275c

verified ·

1 Parent(s): 7f94b06

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -26

app.py CHANGED Viewed

@@ -3,25 +3,28 @@ import requests
 import re
 import os
 API_ENDPOINT = os.getenv("API_ENDPOINT")
 API_TOKEN = os.getenv("API_TOKEN")
-MODEL_ID = os.getenv("MODEL_ID", "none")  # Default value if not set
 def get_ai_response(message, history):
     """Fetch AI response from the API using the modern messages format."""
     messages = [{"role": "system", "content": "You are a helpful assistant."}]
-    # Build the API history using all prior complete pairs
     for user_msg, ai_msg in history:
         if ai_msg != "⏳ Thinking...":
             clean_ai_msg = re.sub(r'<details>.*?</details>', '', ai_msg, flags=re.DOTALL)
             clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg)
             messages.append({"role": "user", "content": user_msg})
             messages.append({"role": "assistant", "content": clean_ai_msg})
-    # Append the new user message for which we want a response
     messages.append({"role": "user", "content": message})
     payload = {
-        "model": MODEL_ID,  # Use the environment variable here
         "messages": messages,
         "stream": False,
         "max_tokens": 10000,
@@ -31,6 +34,7 @@ def get_ai_response(message, history):
         "Authorization": f"Bearer {API_TOKEN}",
         "Content-Type": "application/json"
     }
     try:
         response = requests.post(API_ENDPOINT, headers=headers, json=payload)
         response.raise_for_status()
@@ -41,45 +45,51 @@ def get_ai_response(message, history):
         return f"Error: {str(e)}"
 def convert_reasoning_to_collapsible(text):
-    """Convert reasoning tags to collapsible HTML sections."""
     reasoning_pattern = re.compile(r'<reasoning>(.*?)</reasoning>', re.DOTALL)
     def replace_with_collapsible(match):
         reasoning_content = match.group(1).strip()
-        return f'<details><summary><strong>See reasoning</strong></summary><div class="reasoning-content">{reasoning_content}</div></details>'
     html_response = reasoning_pattern.sub(replace_with_collapsible, text)
     html_response = re.sub(r'<sep>.*?</sep>', '', html_response, flags=re.DOTALL)
     html_response = html_response.replace('<sep>', '').replace('</sep>', '')
     return html_response
 def add_user_message(message, history):
-    """Immediately add the user's message with a '⏳ Thinking...' assistant reply."""
     if history is None:
         history = []
     history.append((message, "⏳ Thinking..."))
-    # Return both updated state and chatbot messages
     return history, history
 def generate_response_from_history(history):
-    """Generate the assistant's reply and update the last pending message."""
     if not history:
         return history, history
-    # Get the last user message (which is paired with "⏳ Thinking...")
     last_user_message = history[-1][0]
-    # Build API history excluding pending messages
     api_history = []
     for user_msg, ai_msg in history:
         if ai_msg != "⏳ Thinking...":
             clean_ai_msg = re.sub(r'<details>.*?</details>', '', ai_msg, flags=re.DOTALL)
             clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg)
             api_history.append({"role": "user", "content": user_msg})
             api_history.append({"role": "assistant", "content": clean_ai_msg})
-    # Append the last user message to fetch the assistant's reply
     api_history.append({"role": "user", "content": last_user_message})
     ai_response = get_ai_response(last_user_message, api_history)
     history[-1] = (last_user_message, ai_response)
     return history, history
-# Modern CSS for a clean UI
 custom_css = """
 body { background-color: #1a1a1a; color: #ffffff; font-family: 'Arial', sans-serif; }
 #chatbot { height: 80vh; background-color: #2d2d2d; border: 1px solid #404040; border-radius: 8px; }
@@ -90,23 +100,25 @@ summary { cursor: pointer; color: #70a9e6; }
 .reasoning-content { padding: 10px; margin-top: 5px; background-color: #404040; border-radius: 5px; }
 """
-# Get model name for display (use the full model ID from environment variable)
 model_display_name = MODEL_ID
 with gr.Blocks(css=custom_css, title=model_display_name) as demo:
     with gr.Column():
         gr.Markdown("## nvidia-Llama-3_1-Nemotron-Ultra-253B-v1 Demo")
         gr.Markdown("This is a demo of nvidia-Llama-3_1-Nemotron-Ultra-253B-v1")
         chatbot = gr.Chatbot(elem_id="chatbot", render_markdown=False, bubble_full_width=True)
         with gr.Row():
             message = gr.Textbox(placeholder="Type your message...", show_label=False, container=False)
-            # Make the button larger by using size "lg"
             submit_btn = gr.Button("Send", size="lg")
         clear_chat_btn = gr.Button("Clear Chat")
-    # State management for chat history
     chat_state = gr.State([])
     js = """
     function() {
         const observer = new MutationObserver(function(mutations) {
@@ -130,7 +142,7 @@ with gr.Blocks(css=custom_css, title=model_display_name) as demo:
     }
     """
-    # First, add the user message with a pending reply, then update it with the actual response.
     submit_btn.click(
         add_user_message,
         [message, chat_state],
@@ -140,12 +152,10 @@ with gr.Blocks(css=custom_css, title=model_display_name) as demo:
         chat_state,
         [chat_state, chatbot]
     ).then(
-        lambda: "",  # Clear the input box after processing
-        None,
-        message
     )
-    # Enable pressing Enter to submit
     message.submit(
         add_user_message,
         [message, chat_state],
@@ -155,18 +165,17 @@ with gr.Blocks(css=custom_css, title=model_display_name) as demo:
         chat_state,
         [chat_state, chatbot]
     ).then(
-        lambda: "",
-        None,
-        message
     )
     clear_chat_btn.click(
         lambda: ([], []),
         None,
         [chat_state, chatbot]
     )
-    # Load JavaScript to enable HTML rendering in chatbot messages
     demo.load(
         fn=lambda: None,
         inputs=None,
@@ -174,5 +183,6 @@ with gr.Blocks(css=custom_css, title=model_display_name) as demo:
         js=js
     )
 demo.queue()
 demo.launch()

 import re
 import os
+# Load from environment or fallback to default
 API_ENDPOINT = os.getenv("API_ENDPOINT")
 API_TOKEN = os.getenv("API_TOKEN")
+MODEL_ID = os.getenv("MODEL_ID", "none")
 def get_ai_response(message, history):
     """Fetch AI response from the API using the modern messages format."""
     messages = [{"role": "system", "content": "You are a helpful assistant."}]
     for user_msg, ai_msg in history:
         if ai_msg != "⏳ Thinking...":
+            # Clean HTML from AI messages to avoid nesting artifacts
             clean_ai_msg = re.sub(r'<details>.*?</details>', '', ai_msg, flags=re.DOTALL)
             clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg)
             messages.append({"role": "user", "content": user_msg})
             messages.append({"role": "assistant", "content": clean_ai_msg})
+    # Add latest user message
     messages.append({"role": "user", "content": message})
     payload = {
+        "model": MODEL_ID,
         "messages": messages,
         "stream": False,
         "max_tokens": 10000,
         "Authorization": f"Bearer {API_TOKEN}",
         "Content-Type": "application/json"
     }
     try:
         response = requests.post(API_ENDPOINT, headers=headers, json=payload)
         response.raise_for_status()
         return f"Error: {str(e)}"
 def convert_reasoning_to_collapsible(text):
+    """Convert <reasoning> tags into collapsible HTML elements."""
     reasoning_pattern = re.compile(r'<reasoning>(.*?)</reasoning>', re.DOTALL)
     def replace_with_collapsible(match):
         reasoning_content = match.group(1).strip()
+        return (
+            f'<details>'
+            f'<summary><strong>See reasoning</strong></summary>'
+            f'<div class="reasoning-content">{reasoning_content}</div>'
+            f'</details>'
+        )
     html_response = reasoning_pattern.sub(replace_with_collapsible, text)
     html_response = re.sub(r'<sep>.*?</sep>', '', html_response, flags=re.DOTALL)
     html_response = html_response.replace('<sep>', '').replace('</sep>', '')
     return html_response
 def add_user_message(message, history):
+    """Add user message with a placeholder AI response ('⏳ Thinking...')."""
     if history is None:
         history = []
     history.append((message, "⏳ Thinking..."))
     return history, history
 def generate_response_from_history(history):
+    """Replace last '⏳ Thinking...' with real assistant response."""
     if not history:
         return history, history
     last_user_message = history[-1][0]
     api_history = []
     for user_msg, ai_msg in history:
         if ai_msg != "⏳ Thinking...":
             clean_ai_msg = re.sub(r'<details>.*?</details>', '', ai_msg, flags=re.DOTALL)
             clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg)
             api_history.append({"role": "user", "content": user_msg})
             api_history.append({"role": "assistant", "content": clean_ai_msg})
     api_history.append({"role": "user", "content": last_user_message})
     ai_response = get_ai_response(last_user_message, api_history)
     history[-1] = (last_user_message, ai_response)
     return history, history
+# CSS for dark mode + collapsible sections
 custom_css = """
 body { background-color: #1a1a1a; color: #ffffff; font-family: 'Arial', sans-serif; }
 #chatbot { height: 80vh; background-color: #2d2d2d; border: 1px solid #404040; border-radius: 8px; }
 .reasoning-content { padding: 10px; margin-top: 5px; background-color: #404040; border-radius: 5px; }
 """
+# Set model name for UI title
 model_display_name = MODEL_ID
+# Gradio UI definition
 with gr.Blocks(css=custom_css, title=model_display_name) as demo:
     with gr.Column():
         gr.Markdown("## nvidia-Llama-3_1-Nemotron-Ultra-253B-v1 Demo")
         gr.Markdown("This is a demo of nvidia-Llama-3_1-Nemotron-Ultra-253B-v1")
         chatbot = gr.Chatbot(elem_id="chatbot", render_markdown=False, bubble_full_width=True)
         with gr.Row():
             message = gr.Textbox(placeholder="Type your message...", show_label=False, container=False)
             submit_btn = gr.Button("Send", size="lg")
         clear_chat_btn = gr.Button("Clear Chat")
     chat_state = gr.State([])
+    # JS to allow rendering HTML in the chat
     js = """
     function() {
         const observer = new MutationObserver(function(mutations) {
     }
     """
+    # Event: Send button clicked
     submit_btn.click(
         add_user_message,
         [message, chat_state],
         chat_state,
         [chat_state, chatbot]
     ).then(
+        lambda: "", None, message  # clear textbox
     )
+    # Event: Pressing Enter key in Textbox
     message.submit(
         add_user_message,
         [message, chat_state],
         chat_state,
         [chat_state, chatbot]
     ).then(
+        lambda: "", None, message
     )
+    # Clear chat
     clear_chat_btn.click(
         lambda: ([], []),
         None,
         [chat_state, chatbot]
     )
+    # Load JS on UI load
     demo.load(
         fn=lambda: None,
         inputs=None,
         js=js
     )
+# Launch Gradio interface
 demo.queue()
 demo.launch()