Spaces:

hadadrjt
/

ai

Running

App Files Files Community

hadadrjt commited on about 18 hours ago

Commit

d17e7ef

1 Parent(s): 756e051

ai: Refactor the code.

Browse files

Files changed (7) hide show

app.py +5 -3
src/config.py → config.py +39 -15
src/cores/client.py +85 -27
src/cores/server.py +59 -17
src/cores/session.py +60 -27
src/main/file_extractors.py +220 -45
src/main/gradio.py +187 -68

app.py CHANGED Viewed

@@ -3,8 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 #
-from src.main.gradio import launch_ui
-# J.A.R.V.I.S.
 if __name__ == "__main__":
-    launch_ui()

 # SPDX-License-Identifier: Apache-2.0
 #
+from src.main.gradio import launch_ui  # Import the function responsible for starting the graphical user interface
+# The following condition checks if this script is being run as the main program.
+# If true, it calls the launch_ui function to start the user interface.
+# This ensures that the UI only launches when this file is executed directly, not when imported as a module.
 if __name__ == "__main__":
+    launch_ui()  # Start the graphical user interface for the application

src/config.py → config.py RENAMED Viewed

@@ -3,54 +3,78 @@
 # SPDX-License-Identifier: Apache-2.0
 #
-import os
-import json
-# Initial welcome messages
 JARVIS_INIT = json.loads(os.getenv("HELLO", "[]"))
-# Deep Search
 DEEP_SEARCH_PROVIDER_HOST = os.getenv("DEEP_SEARCH_PROVIDER_HOST")
 DEEP_SEARCH_PROVIDER_KEY = os.getenv('DEEP_SEARCH_PROVIDER_KEY')
 DEEP_SEARCH_INSTRUCTIONS = os.getenv("DEEP_SEARCH_INSTRUCTIONS")
-# Servers and instructions
 INTERNAL_AI_GET_SERVER = os.getenv("INTERNAL_AI_GET_SERVER")
 INTERNAL_AI_INSTRUCTIONS = os.getenv("INTERNAL_TRAINING_DATA")
-# System instructions mapping
 SYSTEM_PROMPT_MAPPING = json.loads(os.getenv("SYSTEM_PROMPT_MAPPING", "{}"))
 SYSTEM_PROMPT_DEFAULT = os.getenv("DEFAULT_SYSTEM")
-# List of available servers
 LINUX_SERVER_HOSTS = [h for h in json.loads(os.getenv("LINUX_SERVER_HOST", "[]")) if h]
-# List of available keys
 LINUX_SERVER_PROVIDER_KEYS = [k for k in json.loads(os.getenv("LINUX_SERVER_PROVIDER_KEY", "[]")) if k]
 LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
 LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
-# Server errors codes
 LINUX_SERVER_ERRORS = set(map(int, filter(None, os.getenv("LINUX_SERVER_ERROR", "").split(","))))
-# Human friendly AI setup
 AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1, 10)}
 RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 11)}
-# Model mapping
 MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
 MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
 MODEL_CHOICES = list(MODEL_MAPPING.values())
-# Default model config and key for fallback
 DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
 DEFAULT_MODEL_KEY = list(MODEL_MAPPING.keys())[0] if MODEL_MAPPING else None
-# HTML <head> codes (SEO, etc.)
 META_TAGS = os.getenv("META_TAGS")
-# Allowed file extensions
 ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS", "[]"))
-# NOTICES
 NOTICES = os.getenv('NOTICES')

 # SPDX-License-Identifier: Apache-2.0
 #
+import os  # Import os module to access environment variables and interact with the operating system
+import json  # Import json module to parse JSON strings into Python objects
+# Load initial welcome messages for the system from the environment variable "HELLO"
+# If "HELLO" is not set, default to an empty JSON array represented as "[]"
+# This variable typically contains a list of greeting messages or initialization instructions for the AI
 JARVIS_INIT = json.loads(os.getenv("HELLO", "[]"))
+# Deep Search service configuration variables loaded from environment variables
+# DEEP_SEARCH_PROVIDER_HOST holds the URL or IP address of the deep search service provider
 DEEP_SEARCH_PROVIDER_HOST = os.getenv("DEEP_SEARCH_PROVIDER_HOST")
+# DEEP_SEARCH_PROVIDER_KEY contains the API key or authentication token required to access the deep search provider
 DEEP_SEARCH_PROVIDER_KEY = os.getenv('DEEP_SEARCH_PROVIDER_KEY')
+# DEEP_SEARCH_INSTRUCTIONS may include specific instructions or parameters guiding how deep search queries should be handled
 DEEP_SEARCH_INSTRUCTIONS = os.getenv("DEEP_SEARCH_INSTRUCTIONS")
+# Internal AI server configuration and system instructions
+# INTERNAL_AI_GET_SERVER stores the endpoint URL or IP address for internal AI GET requests
 INTERNAL_AI_GET_SERVER = os.getenv("INTERNAL_AI_GET_SERVER")
+# INTERNAL_AI_INSTRUCTIONS contains system instructions used to guide the AI behavior
 INTERNAL_AI_INSTRUCTIONS = os.getenv("INTERNAL_TRAINING_DATA")
+# System instructions mappings and default instructions loaded from environment variables
+# SYSTEM_PROMPT_MAPPING is a dictionary mapping instructions keys to their corresponding instructions texts, parsed from JSON
 SYSTEM_PROMPT_MAPPING = json.loads(os.getenv("SYSTEM_PROMPT_MAPPING", "{}"))
+# SYSTEM_PROMPT_DEFAULT is the fallback instructions text used when no specific instructions mapping is found
 SYSTEM_PROMPT_DEFAULT = os.getenv("DEFAULT_SYSTEM")
+# List of available server hosts for connections or operations
+# This list is parsed from a JSON array string and filtered to exclude any empty or invalid entries
 LINUX_SERVER_HOSTS = [h for h in json.loads(os.getenv("LINUX_SERVER_HOST", "[]")) if h]
+# List of provider keys associated with servers, used for authentication
+# The list is parsed from JSON and filtered to remove empty strings
 LINUX_SERVER_PROVIDER_KEYS = [k for k in json.loads(os.getenv("LINUX_SERVER_PROVIDER_KEY", "[]")) if k]
+# Set to keep track of provider keys that have been marked or flagged during runtime
 LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
+# Dictionary to record the number of attempts made with each provider key
 LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
+# Set of server error codes that the system recognizes as critical or requiring special handling
+# The error codes are read from a comma-separated string, filtered to remove empty entries, converted to integers, and stored in a set
 LINUX_SERVER_ERRORS = set(map(int, filter(None, os.getenv("LINUX_SERVER_ERROR", "").split(","))))
+# Human-friendly AI types and response messages loaded from environment variables
+# AI_TYPES maps keys like "AI_TYPE_1" to descriptive names or categories of AI models or behaviors
 AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1, 10)}
+# RESPONSES maps keys like "RESPONSE_1" to predefined response templates or messages used by the AI system
 RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 11)}
+# Model-related configurations loaded from environment variables
+# MODEL_MAPPING is a dictionary mapping model keys to their corresponding model names or identifiers, parsed from JSON
 MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
+# MODEL_CONFIG contains detailed configuration settings for each model, such as parameters or options, parsed from JSON
 MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
+# MODEL_CHOICES is a list of available model names extracted from the values of MODEL_MAPPING, useful for selection menus or validation
 MODEL_CHOICES = list(MODEL_MAPPING.values())
+# Default model configuration and key used as fallback if no specific model is selected
+# DEFAULT_CONFIG contains default parameters or settings for the AI model, parsed from JSON
 DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
+# DEFAULT_MODEL_KEY is set to the first key found in MODEL_MAPPING if available, otherwise None
 DEFAULT_MODEL_KEY = list(MODEL_MAPPING.keys())[0] if MODEL_MAPPING else None
+# HTML meta tags for SEO and other purposes, loaded as a raw string from environment variables
+# These tags are intended to be inserted into the <head> section of generated HTML pages
 META_TAGS = os.getenv("META_TAGS")
+# List of allowed file extensions for upload or processing, parsed from a JSON array string
+# This list helps enforce file type restrictions within the system
 ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS", "[]"))
+# Notices or announcements that may be displayed to users or logged by the system
+# The content is loaded as a raw string from the environment variable "NOTICES"
 NOTICES = os.getenv('NOTICES')

src/cores/client.py CHANGED Viewed

@@ -3,40 +3,65 @@
 # SPDX-License-Identifier: Apache-2.0
 #
-import asyncio
-import httpx
-import json
-import random
-import uuid
-from src.config import *
-from src.cores.server import fetch_response_stream_async
-from src.cores.session import ensure_stop_event, get_model_key
-from datetime import datetime
 async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt, deep_search):
     """
-    Core async function to interact with AI model.
-    Prepares message history, system instructions, and optionally integrates deep search results.
-    Tries multiple backend hosts and keys with fallback.
-    Yields streamed responses for UI updates.
     """
     ensure_stop_event(sess)
     sess.stop_event.clear()
     sess.cancel_token["cancelled"] = False
     if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
-        yield ("content", RESPONSES["RESPONSE_3"])  # No providers available
         return
     if not hasattr(sess, "session_id") or not sess.session_id:
         sess.session_id = str(uuid.uuid4())
     model_key = get_model_key(model_display, MODEL_MAPPING, DEFAULT_MODEL_KEY)
     cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
     msgs = []
-    # Get current date
     current_date = datetime.now().strftime("%A, %B %d, %Y, %I:%M %p %Z")
-    # Instructions
     COMBINED_AI_INSTRUCTIONS = (
         INTERNAL_AI_INSTRUCTIONS
         + "\n\n\n"
@@ -44,11 +69,14 @@ async def chat_with_model_async(history, user_input, model_display, sess, custom
         + "\n\n\n"
     )
-    # If deep search enabled and using primary model, prepend deep search instructions and results
     if deep_search and model_display == MODEL_CHOICES[0]:
         msgs.append({"role": "system", "content": DEEP_SEARCH_INSTRUCTIONS})
         try:
             async with httpx.AsyncClient() as client:
                 payload = {
                     "query": user_input,
                     "topic": "general",
@@ -64,40 +92,70 @@ async def chat_with_model_async(history, user_input, model_display, sess, custom
                     "include_domains": [],
                     "exclude_domains": []
                 }
-                r = await client.post(DEEP_SEARCH_PROVIDER_HOST, headers={"Authorization": f"Bearer {DEEP_SEARCH_PROVIDER_KEY}"}, json=payload)
                 sr_json = r.json()
                 msgs.append({"role": "system", "content": json.dumps(sr_json)})
         except Exception:
-            # Fail silently if deep search fails
             pass
         msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})
     elif model_display == MODEL_CHOICES[0]:
-        # For primary model without deep search, use internal instructions
         msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})
     else:
-        # For other models, use default instructions
         msgs.append({"role": "system", "content": custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)})
-    # Append conversation history alternating user and assistant messages
     msgs.extend([{"role": "user", "content": u} for u, _ in history])
     msgs.extend([{"role": "assistant", "content": a} for _, a in history if a])
-    # Append current user input
     msgs.append({"role": "user", "content": user_input})
-    # Shuffle provider hosts and keys for load balancing and fallback
     candidates = [(h, k) for h in LINUX_SERVER_HOSTS for k in LINUX_SERVER_PROVIDER_KEYS]
     random.shuffle(candidates)
-    # Try each host-key pair until a successful response is received
     for h, k in candidates:
-        stream_gen = fetch_response_stream_async(h, k, model_key, msgs, cfg, sess.session_id, sess.stop_event, sess.cancel_token)
         got_responses = False
         async for chunk in stream_gen:
             if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
                 return
             got_responses = True
             yield chunk
         if got_responses:
             return
-    # If no response from any provider, yield fallback message
     yield ("content", RESPONSES["RESPONSE_2"])

 # SPDX-License-Identifier: Apache-2.0
 #
+import asyncio  # Import asyncio for asynchronous programming capabilities
+import httpx  # Import httpx to perform asynchronous HTTP requests
+import json  # Import json to handle JSON encoding and decoding
+import random  # Import random to shuffle lists for load balancing
+import uuid  # Import uuid to generate unique session identifiers
+from config import *  # Import all configuration constants and variables from config module
+from src.cores.server import fetch_response_stream_async  # Import async function to fetch streamed AI responses
+from src.cores.session import ensure_stop_event, get_model_key  # Import session helper functions
+from datetime import datetime  # Import datetime to get current date and time information
 async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt, deep_search):
     """
+    Asynchronous function to handle interaction with an AI model and stream its responses.
+    Parameters:
+    - history: List of tuples containing previous conversation messages (user and assistant)
+    - user_input: The current input string from the user
+    - model_display: The display name of the AI model to use
+    - sess: Session object containing session state, stop event, and cancellation token
+    - custom_prompt: Optional custom system instructions to override default instructions
+    - deep_search: Boolean flag indicating whether to integrate deep search results into the instructions
+    This function prepares the message history and system instructions, optionally enriches the instructions
+    with deep search results if enabled, and attempts to fetch streamed responses from multiple backend
+    providers with fallback. It yields chunks of the response asynchronously for real-time UI updates.
     """
+    # Ensure the session has a stop event initialized to control streaming cancellation
     ensure_stop_event(sess)
+    # Clear any previous stop event state to allow new streaming session
     sess.stop_event.clear()
+    # Reset the cancellation token to indicate the session is active and not cancelled
     sess.cancel_token["cancelled"] = False
+    # Check if provider keys and hosts are configured; if not, yield a predefined error response and exit
     if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
+        yield ("content", RESPONSES["RESPONSE_3"])  # Inform user no backend providers are available
         return
+    # Assign a unique session ID if not already present to track conversation context
     if not hasattr(sess, "session_id") or not sess.session_id:
         sess.session_id = str(uuid.uuid4())
+    # Determine the internal model key based on the display name, falling back to default if not found
     model_key = get_model_key(model_display, MODEL_MAPPING, DEFAULT_MODEL_KEY)
+    # Retrieve model-specific configuration parameters or use default configuration
     cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
+    # Initialize a list to hold the messages that will be sent to the AI model
     msgs = []
+    # Obtain the current date and time formatted as a readable string for context in instructions
     current_date = datetime.now().strftime("%A, %B %d, %Y, %I:%M %p %Z")
+    # Combine internal AI instructions with the current date to form a comprehensive system instructions
     COMBINED_AI_INSTRUCTIONS = (
         INTERNAL_AI_INSTRUCTIONS
         + "\n\n\n"
         + "\n\n\n"
     )
+    # If deep search is enabled and the primary model is selected, prepend deep search instructions and results
     if deep_search and model_display == MODEL_CHOICES[0]:
+        # Add deep search instructions as a system message to guide the AI
         msgs.append({"role": "system", "content": DEEP_SEARCH_INSTRUCTIONS})
         try:
+            # Create an asynchronous HTTP client session for making the deep search request
             async with httpx.AsyncClient() as client:
+                # Define the payload with parameters for the deep search query
                 payload = {
                     "query": user_input,
                     "topic": "general",
                     "include_domains": [],
                     "exclude_domains": []
                 }
+                # Send a POST request to the deep search provider with authorization header and JSON payload
+                r = await client.post(
+                    DEEP_SEARCH_PROVIDER_HOST,
+                    headers={"Authorization": f"Bearer {DEEP_SEARCH_PROVIDER_KEY}"},
+                    json=payload
+                )
+                # Parse the JSON response from the deep search provider
                 sr_json = r.json()
+                # Append the deep search results as a system message in JSON string format
                 msgs.append({"role": "system", "content": json.dumps(sr_json)})
         except Exception:
+            # If any error occurs during deep search, fail silently without interrupting the chat flow
             pass
+        # Append the combined AI instructions after the deep search content to maintain context
         msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})
+    # If deep search is not enabled but the primary model is selected, use only the combined AI instructions
     elif model_display == MODEL_CHOICES[0]:
         msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})
+    # For other models, use a custom instructions if provided, otherwise default to the system instructions mapping or default instructions
     else:
         msgs.append({"role": "system", "content": custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)})
+    # Append the conversation history to the message list, alternating user and assistant messages
+    # First add all user messages from history
     msgs.extend([{"role": "user", "content": u} for u, _ in history])
+    # Then add all assistant messages from history that are not empty
     msgs.extend([{"role": "assistant", "content": a} for _, a in history if a])
+    # Append the current user input as the latest user message
     msgs.append({"role": "user", "content": user_input})
+    # Create a list of all possible combinations of backend hosts and provider keys for load balancing and fallback
     candidates = [(h, k) for h in LINUX_SERVER_HOSTS for k in LINUX_SERVER_PROVIDER_KEYS]
+    # Randomly shuffle the list of host-key pairs to distribute load evenly and avoid bias
     random.shuffle(candidates)
+    # Iterate over each host and key pair to attempt fetching a streamed response
     for h, k in candidates:
+        # Call the async generator function to fetch streamed response chunks from the backend
+        stream_gen = fetch_response_stream_async(
+            h, k, model_key, msgs, cfg, sess.session_id, sess.stop_event, sess.cancel_token
+        )
+        # Flag to track if any response chunks were received from this provider
         got_responses = False
+        # Asynchronously iterate over each chunk yielded by the streaming generator
         async for chunk in stream_gen:
+            # If the stop event is set or cancellation requested, terminate streaming immediately
             if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
                 return
+            # Mark that at least one response chunk has been received
             got_responses = True
+            # Yield the current chunk to the caller for incremental UI update or processing
             yield chunk
+        # If any responses were received from this host-key pair, stop trying others and return
         if got_responses:
             return
+    # If no responses were received from any provider, yield a fallback message indicating failure
     yield ("content", RESPONSES["RESPONSE_2"])

src/cores/server.py CHANGED Viewed

@@ -3,57 +3,99 @@
 # SPDX-License-Identifier: Apache-2.0
 #
-import codecs # Reasoning
-import httpx
-import json
-from src.cores.session import marked_item
-from src.config import LINUX_SERVER_ERRORS, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS, RESPONSES
 async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_event, cancel_token):
     """
-    Async generator that streams AI responses from a backend server.
-    Implements retry logic and marks failing keys to avoid repeated failures.
-    Streams reasoning and content separately for richer UI updates.
     """
     for timeout in [5, 10]:
         try:
             async with httpx.AsyncClient(timeout=timeout) as client:
                 async with client.stream(
                     "POST",
                     host,
                     json={**{"model": model, "messages": msgs, "session_id": sid, "stream": True}, **cfg},
-                    headers={"Authorization": f"Bearer {key}"}
                 ) as response:
                     if response.status_code in LINUX_SERVER_ERRORS:
                         marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
                         return
                     async for line in response.aiter_lines():
                         if stop_event.is_set() or cancel_token["cancelled"]:
                             return
                         if not line:
                             continue
                         if line.startswith("data: "):
-                            data = line[6:]
                             if data.strip() == RESPONSES["RESPONSE_10"]:
                                 return
                             try:
                                 j = json.loads(data)
                                 if isinstance(j, dict) and j.get("choices"):
                                     for ch in j["choices"]:
-                                        delta = ch.get("delta", {})
-                                        # Stream reasoning text separately for UI
                                         if "reasoning" in delta and delta["reasoning"]:
                                             decoded = delta["reasoning"].encode('utf-8').decode('unicode_escape')
-                                            yield ("reasoning", decoded)
-                                        # Stream main content text
                                         if "content" in delta and delta["content"]:
-                                            yield ("content", delta["content"])
                             except Exception:
-                                # Ignore malformed JSON or unexpected data
                                 continue
         except Exception:
-            # Network or other errors, try next timeout or mark key
             continue
         marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
     return

 # SPDX-License-Identifier: Apache-2.0
 #
+import codecs  # Import codecs module for encoding and decoding operations, useful for handling text data
+import httpx  # Import httpx for making asynchronous HTTP requests to external servers or APIs
+import json  # Import json module to parse JSON formatted strings into Python objects and vice versa
+from src.cores.session import marked_item  # Import marked_item function to track and mark keys that fail repeatedly, helping to avoid using problematic keys
+from config import LINUX_SERVER_ERRORS, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS, RESPONSES  # Import various constants used for error handling, key marking, retry attempts, and predefined responses
 async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_event, cancel_token):
     """
+    Asynchronous generator function that streams AI-generated responses from a backend server endpoint.
+    Parameters:
+    - host: The URL of the backend server to send the request to.
+    - key: Authorization token (API key) used in the request header for authentication.
+    - model: The AI model identifier to be used for generating responses.
+    - msgs: The list of messages forming the conversation or prompt to send to the AI.
+    - cfg: Configuration dictionary containing additional parameters for the request.
+    - sid: Session ID string to associate the request with a particular session.
+    - stop_event: An asynchronous event object that signals when to stop streaming responses.
+    - cancel_token: A dictionary containing a 'cancelled' boolean flag to abort the streaming operation.
+    This function attempts to connect to the backend server twice with different timeout values (5 and 10 seconds).
+    It sends a POST request with JSON payload that includes model, messages, session ID, stream flag, and configuration.
+    The function streams the response line-by-line, parsing JSON data chunks as they arrive.
+    The streamed data contains two types of text parts:
+    - 'reasoning': Additional reasoning text that can be displayed separately in the UI for richer user experience.
+    - 'content': The main content text generated by the AI.
+    The function yields tuples of the form ('reasoning', text) or ('content', text) to the caller asynchronously.
+    If the server returns an error status code listed in LINUX_SERVER_ERRORS, the key is marked as problematic to avoid future use.
+    The function also respects stop_event and cancel_token to allow graceful cancellation of the streaming process.
+    If the response signals completion with a specific message defined in RESPONSES["RESPONSE_10"], the function ends the stream.
+    The function handles exceptions gracefully, including network errors and JSON parsing issues, retrying or marking keys as needed.
     """
+    # Loop over two timeout values to attempt the request with increasing timeout durations for robustness
     for timeout in [5, 10]:
         try:
+            # Create an asynchronous HTTP client with the specified timeout for the request
             async with httpx.AsyncClient(timeout=timeout) as client:
+                # Open a streaming POST request to the backend server with JSON payload and authorization header
                 async with client.stream(
                     "POST",
                     host,
+                    # Combine fixed parameters with additional configuration into the JSON body
                     json={**{"model": model, "messages": msgs, "session_id": sid, "stream": True}, **cfg},
+                    headers={"Authorization": f"Bearer {key}"}  # Use Bearer token authentication
                 ) as response:
+                    # Check if the response status code indicates a server error that should mark the key
                     if response.status_code in LINUX_SERVER_ERRORS:
+                        # Mark the key as problematic with the provided tracking function and exit the generator
                         marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
                         return
+                    # Iterate asynchronously over each line of the streamed response content
                     async for line in response.aiter_lines():
+                        # If the stop event is set or cancellation is requested, stop streaming and exit
                         if stop_event.is_set() or cancel_token["cancelled"]:
                             return
+                        # Skip empty lines to avoid unnecessary processing
                         if not line:
                             continue
+                        # Process lines that start with the prefix 'data: ' which contain JSON payloads
                         if line.startswith("data: "):
+                            data = line[6:]  # Extract the JSON string after 'data: '
+                            # If the data matches the predefined end-of-response message, stop streaming
                             if data.strip() == RESPONSES["RESPONSE_10"]:
                                 return
                             try:
+                                # Attempt to parse the JSON data string into a Python dictionary
                                 j = json.loads(data)
+                                # Check if the parsed object is a dictionary containing 'choices' key
                                 if isinstance(j, dict) and j.get("choices"):
+                                    # Iterate over each choice in the response to extract text deltas
                                     for ch in j["choices"]:
+                                        delta = ch.get("delta", {})  # Get the incremental update part
+                                        # If 'reasoning' text is present in the delta, decode unicode escapes and yield it
                                         if "reasoning" in delta and delta["reasoning"]:
                                             decoded = delta["reasoning"].encode('utf-8').decode('unicode_escape')
+                                            yield ("reasoning", decoded)  # Yield reasoning text for UI display
+                                        # If main 'content' text is present in the delta, yield it directly
                                         if "content" in delta and delta["content"]:
+                                            yield ("content", delta["content"])  # Yield main content text
                             except Exception:
+                                # Ignore exceptions from malformed JSON or unexpected data formats and continue streaming
                                 continue
         except Exception:
+            # Catch network errors, timeouts, or other exceptions and try the next timeout or retry
             continue
+        # If all attempts fail, mark the key as problematic to avoid future use
         marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
+    # Return None explicitly when streaming ends or fails after retries
     return

src/cores/session.py CHANGED Viewed

@@ -3,58 +3,91 @@
 # SPDX-License-Identifier: Apache-2.0
 #
-import asyncio
-import requests
-import uuid
-import threading
-from src.config import LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS
 class SessionWithID(requests.Session):
     """
-    Custom session object that holds a unique session ID and async control flags.
-    Used to track individual user sessions and allow cancellation of ongoing requests.
     """
     def __init__(self):
-        super().__init__()
-        self.session_id = str(uuid.uuid4())  # Unique ID per session
-        self.stop_event = asyncio.Event()    # Async event to signal stop requests
-        self.cancel_token = {"cancelled": False}  # Flag to indicate cancellation
 def create_session():
     """
-    Create and return a new SessionWithID object.
-    Called when a new user session starts or chat is reset.
     """
     return SessionWithID()
 def ensure_stop_event(sess):
     """
-    Ensure that the session object has stop_event and cancel_token attributes.
-    Useful when restoring or reusing sessions.
     """
     if not hasattr(sess, "stop_event"):
-        sess.stop_event = asyncio.Event()
     if not hasattr(sess, "cancel_token"):
-        sess.cancel_token = {"cancelled": False}
 def marked_item(item, marked, attempts):
     """
-    Mark a provider key or host as temporarily problematic after repeated failures.
-    Automatically unmark after 5 minutes to retry.
-    This helps avoid repeatedly using failing providers.
     """
-    marked.add(item)
-    attempts[item] = attempts.get(item, 0) + 1
     if attempts[item] >= 3:
         def remove():
-            marked.discard(item)
-            attempts.pop(item, None)
-        threading.Timer(300, remove).start()
 def get_model_key(display, MODEL_MAPPING, DEFAULT_MODEL_KEY):
     """
-    Get the internal model key (identifier) from the display name.
-    Returns default model key if not found.
     """
     return next((k for k, v in MODEL_MAPPING.items() if v == display), DEFAULT_MODEL_KEY)

 # SPDX-License-Identifier: Apache-2.0
 #
+import asyncio  # Import the asyncio library to handle asynchronous operations and events
+import requests  # Import the requests library for HTTP requests and session management
+import uuid  # Import the uuid library to generate unique identifiers
+import threading  # Import threading to run background timers for delayed operations
+from config import LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS  # Import configuration variables that track marked provider keys and their failure attempts
 class SessionWithID(requests.Session):
     """
+    Custom session class extending requests.Session to add unique session identification
+    and asynchronous cancellation control. This allows tracking individual user sessions
+    and managing cancellation of ongoing HTTP requests asynchronously.
     """
     def __init__(self):
+        super().__init__()  # Initialize the base requests.Session class
+        self.session_id = str(uuid.uuid4())
+        # Generate and assign a unique string ID for this session instance to identify it uniquely
+        self.stop_event = asyncio.Event()
+        # Create an asyncio Event object used to signal when the session should stop or cancel operations
+        self.cancel_token = {"cancelled": False}
+        # Dictionary flag to indicate if the current session's operations have been cancelled
 def create_session():
     """
+    Factory function to create and return a new SessionWithID instance.
+    This should be called whenever a new user session starts or a chat session is reset,
+    ensuring each session has its own unique ID and cancellation controls.
     """
     return SessionWithID()
 def ensure_stop_event(sess):
     """
+    Utility function to verify that a given session object has the required asynchronous
+    control attributes: stop_event and cancel_token. If they are missing (e.g., when restoring
+    sessions from storage), this function adds them to maintain consistent session behavior.
+    Parameters:
+    - sess: The session object to check and update.
     """
     if not hasattr(sess, "stop_event"):
+        sess.stop_event = asyncio.Event()
+        # Add an asyncio Event to signal stop requests if missing
     if not hasattr(sess, "cancel_token"):
+        sess.cancel_token = {"cancelled": False}
+        # Add a cancellation flag dictionary if missing
 def marked_item(item, marked, attempts):
     """
+    Mark a provider key or host as temporarily problematic after repeated failures to prevent
+    using unreliable providers continuously. This function adds the item to a 'marked' set
+    and increments its failure attempt count. If the failure count reaches 3 or more, a timer
+    is started to automatically unmark the item after 5 minutes (300 seconds), allowing retries.
+    Parameters:
+    - item: The provider key or host identifier to mark as problematic.
+    - marked: A set containing currently marked items.
+    - attempts: A dictionary tracking the number of failure attempts per item.
     """
+    marked.add(item)
+    # Add the item to the set of marked problematic providers
+    attempts[item] = attempts.get(item, 0) + 1
+    # Increment the failure attempt count for this item, initializing if necessary
     if attempts[item] >= 3:
+        # If the item has failed 3 or more times, schedule removal from marked after 5 minutes
         def remove():
+            marked.discard(item)
+            # Remove the item from the marked set to allow retrying
+            attempts.pop(item, None)
+            # Remove the attempt count entry for this item to reset its failure state
+        threading.Timer(300, remove).start()
+        # Start a background timer that will call remove() after 300 seconds (5 minutes)
 def get_model_key(display, MODEL_MAPPING, DEFAULT_MODEL_KEY):
     """
+    Translate a human-readable model display name into its internal model key identifier.
+    Searches the MODEL_MAPPING dictionary for the key whose value matches the display name.
+    Returns the DEFAULT_MODEL_KEY if no matching display name is found.
+    Parameters:
+    - display: The display name of the model as a string.
+    - MODEL_MAPPING: Dictionary mapping internal model keys to display names.
+    - DEFAULT_MODEL_KEY: The fallback model key to return if no match is found.
+    Returns:
+    - The internal model key string corresponding to the display name.
     """
+    # Iterate through the MODEL_MAPPING dictionary items and return the key where the value matches the display name
     return next((k for k, v in MODEL_MAPPING.items() if v == display), DEFAULT_MODEL_KEY)

src/main/file_extractors.py CHANGED Viewed

@@ -3,216 +3,391 @@
 # SPDX-License-Identifier: Apache-2.0
 #
-import pdfplumber # PDF
-import pytesseract # OCR
-import docx # Microsoft Word
-import zipfile # Microsoft Word
-import io
-import pandas as pd # Microsoft Excel
-import warnings
-import re
-from openpyxl import load_workbook # Microsoft Excel
-from pptx import Presentation # Microsoft PowerPoint
-from PIL import Image, ImageEnhance, ImageFilter # OCR
-from pathlib import Path
 def clean_text(text):
-    """Clean and normalize extracted outputs."""
-    # Remove non-printable and special characters except common punctuation
     text = re.sub(r'[^a-zA-Z0-9\s.,?!():;\'"-]', '', text)
-    # Remove isolated single letters (likely OCR noise)
     text = re.sub(r'\b[a-zA-Z]\b', '', text)
-    # Normalize whitespace and remove empty lines
     lines = [line.strip() for line in text.splitlines() if line.strip()]
     return "\n".join(lines)
 def format_table(df, max_rows=10):
-    """Format pandas DataFrame as a readable table string, limited to max rows."""
     if df.empty:
         return ""
-    # Drop fully empty rows and columns to reduce NaN clutter
     df_clean = df.dropna(axis=0, how='all').dropna(axis=1, how='all')
-    # Replace NaN with empty string to avoid 'NaN' in output
     df_clean = df_clean.fillna('')
     if df_clean.empty:
         return ""
     display_df = df_clean.head(max_rows)
     table_str = display_df.to_string(index=False)
     if len(df_clean) > max_rows:
         table_str += f"\n... ({len(df_clean) - max_rows} more rows)"
     return table_str
 def preprocess_image(img):
-    """Preprocess image for better OCR accuracy."""
     try:
-        img = img.convert("L")  # Grayscale
         enhancer = ImageEnhance.Contrast(img)
-        img = enhancer.enhance(2)  # Increase contrast
-        img = img.filter(ImageFilter.MedianFilter())  # Reduce noise
-        # Binarize image (threshold)
         img = img.point(lambda x: 0 if x < 140 else 255, '1')
         return img
     except Exception:
         return img
 def ocr_image(img):
-    """Perform OCR on PIL Image with preprocessing and clean result."""
     try:
         img = preprocess_image(img)
         text = pytesseract.image_to_string(img, lang='eng', config='--psm 6')
         text = clean_text(text)
         return text
     except Exception:
         return ""
 def extract_pdf_content(fp):
     """
-    Extract text content from PDF file.
-    Includes OCR on embedded images to capture text within images.
-    Also extracts tables as tab-separated text.
     """
     content = ""
     try:
         with pdfplumber.open(fp) as pdf:
             for i, page in enumerate(pdf.pages, 1):
                 text = page.extract_text() or ""
                 content += f"Page {i} Text:\n{clean_text(text)}\n\n"
-                # OCR on images if any
                 if page.images:
                     img_obj = page.to_image(resolution=300)
                     for img in page.images:
                         bbox = (img["x0"], img["top"], img["x1"], img["bottom"])
                         cropped = img_obj.original.crop(bbox)
                         ocr_text = ocr_image(cropped)
                         if ocr_text:
                             content += f"[OCR Text from image on page {i}]:\n{ocr_text}\n\n"
-                # Extract tables as TSV
                 tables = page.extract_tables()
                 for idx, table in enumerate(tables, 1):
                     if table:
                         df = pd.DataFrame(table[1:], columns=table[0])
                         content += f"Table {idx} on page {i}:\n{format_table(df)}\n\n"
     except Exception as e:
         content += f"\n[Error reading PDF {fp}: {e}]"
     return content.strip()
 def extract_docx_content(fp):
     """
-    Extract text from Microsoft Word files.
-    Also performs OCR on embedded images inside the Microsoft Word archive.
     """
     content = ""
     try:
         doc = docx.Document(fp)
         paragraphs = [para.text.strip() for para in doc.paragraphs if para.text.strip()]
         if paragraphs:
             content += "Paragraphs:\n" + "\n".join(paragraphs) + "\n\n"
-        # Extract tables
         tables = []
         for table in doc.tables:
             rows = []
             for row in table.rows:
                 cells = [cell.text.strip() for cell in row.cells]
                 rows.append(cells)
             if rows:
                 df = pd.DataFrame(rows[1:], columns=rows[0])
                 tables.append(df)
         for i, df in enumerate(tables, 1):
             content += f"Table {i}:\n{format_table(df)}\n\n"
-        # OCR on embedded images inside Microsoft Word
         with zipfile.ZipFile(fp) as z:
             for file in z.namelist():
                 if file.startswith("word/media/"):
                     data = z.read(file)
                     try:
                         img = Image.open(io.BytesIO(data))
                         ocr_text = ocr_image(img)
                         if ocr_text:
                             content += f"[OCR Text from embedded image]:\n{ocr_text}\n\n"
                     except Exception:
                         pass
     except Exception as e:
         content += f"\n[Error reading Microsoft Word {fp}: {e}]"
     return content.strip()
 def extract_excel_content(fp):
     """
-    Extract content from Microsoft Excel files.
-    Converts sheets to readable tables and replaces NaN values.
-    Does NOT attempt to extract images to avoid errors.
     """
     content = ""
     try:
         with warnings.catch_warnings():
-            warnings.simplefilter("ignore") # Suppress openpyxl warnings
-            # Explicitly specify the engine to avoid potential issues
             sheets = pd.read_excel(fp, sheet_name=None, engine='openpyxl')
         for sheet_name, df in sheets.items():
             content += f"Sheet: {sheet_name}\n"
             content += format_table(df) + "\n\n"
     except Exception as e:
         content += f"\n[Error reading Microsoft Excel {fp}: {e}]"
     return content.strip()
 def extract_pptx_content(fp):
     """
-    Extract text content from Microsoft PowerPoint presentation slides.
-    Includes text from shapes and tables.
-    Performs OCR on embedded images.
     """
     content = ""
     try:
         prs = Presentation(fp)
         for i, slide in enumerate(prs.slides, 1):
             slide_texts = []
             for shape in slide.shapes:
                 if hasattr(shape, "text") and shape.text.strip():
                     slide_texts.append(shape.text.strip())
                 if shape.shape_type == 13 and hasattr(shape, "image") and shape.image:
                     try:
                         img = Image.open(io.BytesIO(shape.image.blob))
                         ocr_text = ocr_image(img)
                         if ocr_text:
                             slide_texts.append(f"[OCR Text from image]:\n{ocr_text}")
                     except Exception:
                         pass
             if slide_texts:
                 content += f"Slide {i} Text:\n" + "\n".join(slide_texts) + "\n\n"
             else:
                 content += f"Slide {i} Text:\nNo text found on this slide.\n\n"
-            # Extract tables
             for shape in slide.shapes:
                 if shape.has_table:
                     rows = []
                     table = shape.table
                     for row in table.rows:
                         cells = [cell.text.strip() for cell in row.cells]
                         rows.append(cells)
                     if rows:
                         df = pd.DataFrame(rows[1:], columns=rows[0])
                         content += f"Table on slide {i}:\n{format_table(df)}\n\n"
     except Exception as e:
         content += f"\n[Error reading Microsoft PowerPoint {fp}: {e}]"
     return content.strip()
 def extract_file_content(fp):
     """
-    Determine file type by extension and extract text content accordingly.
-    For unknown types, attempts to read as plain text.
     """
     ext = Path(fp).suffix.lower()
     if ext == ".pdf":
         return extract_pdf_content(fp)
     elif ext in [".doc", ".docx"]:
         return extract_docx_content(fp)
     elif ext in [".xlsx", ".xls"]:
         return extract_excel_content(fp)
     elif ext in [".ppt", ".pptx"]:
         return extract_pptx_content(fp)
     else:
         try:
             text = Path(fp).read_text(encoding="utf-8")
             return clean_text(text)
         except Exception as e:
             return f"\n[Error reading file {fp}: {e}]"

 # SPDX-License-Identifier: Apache-2.0
 #
+import pdfplumber  # Library to extract text and tables from PDF files
+import pytesseract  # OCR tool to extract text from images
+import docx  # Library to read Microsoft Word (.docx) files
+import zipfile  # To handle zipped archives, used here to access embedded images in Word files
+import io  # Provides tools for handling byte streams, used to open images from bytes
+import pandas as pd  # Data analysis library, used here to handle tables from Excel and other files
+import warnings  # Used to suppress warnings during Excel file reading
+import re  # Regular expressions for text cleaning
+from openpyxl import load_workbook  # Excel file reading library, used for .xlsx files
+from pptx import Presentation  # Library to read Microsoft PowerPoint files
+from PIL import Image, ImageEnhance, ImageFilter  # Image processing libraries for OCR preprocessing
+from pathlib import Path  # Object-oriented filesystem paths
 def clean_text(text):
+    """
+    Clean and normalize extracted text to improve readability and remove noise.
+    This function performs several cleaning steps:
+    - Removes characters that are not letters, digits, spaces, or common punctuation.
+    - Removes isolated single letters which are often OCR errors or noise.
+    - Strips whitespace from each line and removes empty lines.
+    - Joins cleaned lines back into a single string separated by newlines.
+    Args:
+        text (str): Raw extracted text from any source.
+    Returns:
+        str: Cleaned and normalized text ready for display or further processing.
+    """
+    # Remove all characters except letters, digits, spaces, and common punctuation marks
     text = re.sub(r'[^a-zA-Z0-9\s.,?!():;\'"-]', '', text)
+    # Remove single isolated letters which are likely errors or noise from OCR
     text = re.sub(r'\b[a-zA-Z]\b', '', text)
+    # Split text into lines, strip whitespace, and remove empty lines
     lines = [line.strip() for line in text.splitlines() if line.strip()]
+    # Join cleaned lines with newline characters
     return "\n".join(lines)
 def format_table(df, max_rows=10):
+    """
+    Convert a pandas DataFrame into a clean, readable string representation of a table.
+    This function:
+    - Removes rows and columns that are completely empty to reduce clutter.
+    - Replaces any NaN values with empty strings for cleaner output.
+    - Limits the output to a maximum number of rows for brevity.
+    - Adds a note if there are more rows than displayed.
+    Args:
+        df (pandas.DataFrame): The table data to format.
+        max_rows (int): Maximum number of rows to display from the table.
+    Returns:
+        str: Formatted string representation of the table or empty string if no data.
+    """
     if df.empty:
         return ""
+    # Remove rows and columns where all values are NaN to clean the table
     df_clean = df.dropna(axis=0, how='all').dropna(axis=1, how='all')
+    # Replace remaining NaN values with empty strings for better readability
     df_clean = df_clean.fillna('')
     if df_clean.empty:
         return ""
+    # Select only the first max_rows rows for display
     display_df = df_clean.head(max_rows)
+    # Convert DataFrame to string without row indices
     table_str = display_df.to_string(index=False)
+    # Append a message if there are more rows than displayed
     if len(df_clean) > max_rows:
         table_str += f"\n... ({len(df_clean) - max_rows} more rows)"
     return table_str
 def preprocess_image(img):
+    """
+    Enhance an image to improve OCR accuracy by applying several preprocessing steps.
+    The preprocessing includes:
+    - Converting the image to grayscale to simplify colors.
+    - Increasing contrast to make text stand out more.
+    - Applying a median filter to reduce noise.
+    - Binarizing the image by thresholding to black and white.
+    Args:
+        img (PIL.Image.Image): The original image to preprocess.
+    Returns:
+        PIL.Image.Image: The processed image ready for OCR.
+        If an error occurs during processing, returns the original image.
+    """
     try:
+        # Convert image to grayscale mode
+        img = img.convert("L")
+        # Enhance contrast by a factor of 2 to make text clearer
         enhancer = ImageEnhance.Contrast(img)
+        img = enhancer.enhance(2)
+        # Apply median filter to reduce noise and smooth the image
+        img = img.filter(ImageFilter.MedianFilter())
+        # Convert image to black and white using a threshold of 140
         img = img.point(lambda x: 0 if x < 140 else 255, '1')
         return img
     except Exception:
+        # In case of any error, return the original image without changes
         return img
 def ocr_image(img):
+    """
+    Extract text from an image using OCR after preprocessing to improve results.
+    This function:
+    - Preprocesses the image to enhance text visibility.
+    - Uses pytesseract with page segmentation mode 6 (assumes a single uniform block of text).
+    - Cleans the extracted text using the clean_text function.
+    Args:
+        img (PIL.Image.Image): The image from which to extract text.
+    Returns:
+        str: The cleaned OCR-extracted text. Returns empty string if OCR fails.
+    """
     try:
+        # Preprocess image to improve OCR quality
         img = preprocess_image(img)
+        # Perform OCR using pytesseract with English language and specified config
         text = pytesseract.image_to_string(img, lang='eng', config='--psm 6')
+        # Clean the OCR output to remove noise and normalize text
         text = clean_text(text)
         return text
     except Exception:
+        # Return empty string if OCR fails for any reason
         return ""
 def extract_pdf_content(fp):
     """
+    Extract text and tables from a PDF file, including OCR on embedded images.
+    This function:
+    - Opens the PDF file and iterates through each page.
+    - Extracts and cleans text from each page.
+    - Performs OCR on images embedded in pages to extract any text within images.
+    - Extracts tables from pages and formats them as readable text.
+    - Handles exceptions by appending error messages to the content.
+    Args:
+        fp (str or Path): File path to the PDF document.
+    Returns:
+        str: Combined extracted text, OCR results, and formatted tables from the PDF.
     """
     content = ""
     try:
         with pdfplumber.open(fp) as pdf:
             for i, page in enumerate(pdf.pages, 1):
+                # Extract text from the current page, defaulting to empty string if None
                 text = page.extract_text() or ""
+                # Clean extracted text and add page header
                 content += f"Page {i} Text:\n{clean_text(text)}\n\n"
+                # If there are images on the page, perform OCR on each
                 if page.images:
+                    # Create an image object of the page with 300 dpi resolution for cropping
                     img_obj = page.to_image(resolution=300)
                     for img in page.images:
+                        # Define bounding box coordinates for the image on the page
                         bbox = (img["x0"], img["top"], img["x1"], img["bottom"])
+                        # Crop the image from the page image
                         cropped = img_obj.original.crop(bbox)
+                        # Perform OCR on the cropped image
                         ocr_text = ocr_image(cropped)
                         if ocr_text:
+                            # Append OCR text with page and image reference
                             content += f"[OCR Text from image on page {i}]:\n{ocr_text}\n\n"
+                # Extract tables from the page
                 tables = page.extract_tables()
                 for idx, table in enumerate(tables, 1):
                     if table:
+                        # Convert table list to DataFrame using first row as header
                         df = pd.DataFrame(table[1:], columns=table[0])
+                        # Format and append the table text
                         content += f"Table {idx} on page {i}:\n{format_table(df)}\n\n"
     except Exception as e:
+        # Append error message if PDF reading fails
         content += f"\n[Error reading PDF {fp}: {e}]"
+    # Return the combined content with whitespace trimmed
     return content.strip()
 def extract_docx_content(fp):
     """
+    Extract text, tables, and OCR text from images embedded in a Microsoft Word (.docx) file.
+    This function:
+    - Reads paragraphs and tables from the document.
+    - Cleans and formats extracted text and tables.
+    - Opens the .docx file as a zip archive to extract embedded images.
+    - Performs OCR on embedded images to extract any text they contain.
+    - Handles exceptions and appends error messages if reading fails.
+    Args:
+        fp (str or Path): File path to the Word document.
+    Returns:
+        str: Combined extracted paragraphs, tables, and OCR text from embedded images.
     """
     content = ""
     try:
+        # Load the Word document
         doc = docx.Document(fp)
+        # Extract and clean all non-empty paragraphs
         paragraphs = [para.text.strip() for para in doc.paragraphs if para.text.strip()]
         if paragraphs:
             content += "Paragraphs:\n" + "\n".join(paragraphs) + "\n\n"
+        # Extract tables from the document
         tables = []
         for table in doc.tables:
             rows = []
             for row in table.rows:
+                # Extract and clean text from each cell in the row
                 cells = [cell.text.strip() for cell in row.cells]
                 rows.append(cells)
             if rows:
+                # Convert rows to DataFrame using first row as header
                 df = pd.DataFrame(rows[1:], columns=rows[0])
                 tables.append(df)
+        # Format and append each extracted table
         for i, df in enumerate(tables, 1):
             content += f"Table {i}:\n{format_table(df)}\n\n"
+        # Open the .docx file as a zip archive to access embedded media files
         with zipfile.ZipFile(fp) as z:
             for file in z.namelist():
+                # Look for images inside the word/media directory
                 if file.startswith("word/media/"):
                     data = z.read(file)
                     try:
+                        # Open image from bytes
                         img = Image.open(io.BytesIO(data))
+                        # Perform OCR on the image
                         ocr_text = ocr_image(img)
                         if ocr_text:
+                            # Append OCR text extracted from embedded image
                             content += f"[OCR Text from embedded image]:\n{ocr_text}\n\n"
                     except Exception:
+                        # Ignore errors in image processing to continue extraction
                         pass
     except Exception as e:
+        # Append error message if Word document reading fails
         content += f"\n[Error reading Microsoft Word {fp}: {e}]"
+    # Return combined content trimmed of extra whitespace
     return content.strip()
 def extract_excel_content(fp):
     """
+    Extract readable table content from Microsoft Excel files (.xlsx, .xls).
+    This function:
+    - Reads all sheets in the Excel file.
+    - Converts each sheet to a formatted table string.
+    - Suppresses warnings during reading to avoid clutter.
+    - Does not attempt to extract images to avoid errors.
+    - Handles exceptions by appending error messages.
+    Args:
+        fp (str or Path): File path to the Excel workbook.
+    Returns:
+        str: Combined formatted tables from all sheets in the workbook.
     """
     content = ""
     try:
+        # Suppress warnings such as openpyxl deprecation or data type warnings
         with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            # Read all sheets into a dictionary of DataFrames using openpyxl engine
             sheets = pd.read_excel(fp, sheet_name=None, engine='openpyxl')
+        # Iterate over each sheet and format its content
         for sheet_name, df in sheets.items():
             content += f"Sheet: {sheet_name}\n"
             content += format_table(df) + "\n\n"
     except Exception as e:
+        # Append error message if Excel reading fails
         content += f"\n[Error reading Microsoft Excel {fp}: {e}]"
+    # Return combined sheet contents trimmed of whitespace
     return content.strip()
 def extract_pptx_content(fp):
     """
+    Extract text, tables, and OCR text from images in Microsoft PowerPoint (.pptx) files.
+    This function:
+    - Reads each slide in the presentation.
+    - Extracts text from shapes and tables on each slide.
+    - Performs OCR on images embedded in shapes.
+    - Handles exceptions and appends error messages if reading fails.
+    Args:
+        fp (str or Path): File path to the PowerPoint presentation.
+    Returns:
+        str: Combined extracted text, tables, and OCR results from all slides.
     """
     content = ""
     try:
+        # Load the PowerPoint presentation
         prs = Presentation(fp)
+        # Iterate through each slide by index starting at 1
         for i, slide in enumerate(prs.slides, 1):
             slide_texts = []
+            # Iterate through all shapes on the slide
             for shape in slide.shapes:
+                # Extract and clean text from shapes that have text attribute
                 if hasattr(shape, "text") and shape.text.strip():
                     slide_texts.append(shape.text.strip())
+                # Check if the shape is a picture (shape_type 13) with an image
                 if shape.shape_type == 13 and hasattr(shape, "image") and shape.image:
                     try:
+                        # Open image from the shape's binary blob data
                         img = Image.open(io.BytesIO(shape.image.blob))
+                        # Perform OCR on the image
                         ocr_text = ocr_image(img)
                         if ocr_text:
+                            # Append OCR text extracted from the image
                             slide_texts.append(f"[OCR Text from image]:\n{ocr_text}")
                     except Exception:
+                        # Ignore errors in image OCR to continue processing
                         pass
+            # Add slide text or note if no text found
             if slide_texts:
                 content += f"Slide {i} Text:\n" + "\n".join(slide_texts) + "\n\n"
             else:
                 content += f"Slide {i} Text:\nNo text found on this slide.\n\n"
+            # Extract tables from shapes that have tables
             for shape in slide.shapes:
                 if shape.has_table:
                     rows = []
                     table = shape.table
+                    # Extract text from each cell in the table rows
                     for row in table.rows:
                         cells = [cell.text.strip() for cell in row.cells]
                         rows.append(cells)
                     if rows:
+                        # Convert rows to DataFrame using first row as header
                         df = pd.DataFrame(rows[1:], columns=rows[0])
+                        # Format and append the table text
                         content += f"Table on slide {i}:\n{format_table(df)}\n\n"
     except Exception as e:
+        # Append error message if PowerPoint reading fails
         content += f"\n[Error reading Microsoft PowerPoint {fp}: {e}]"
+    # Return combined slide content trimmed of whitespace
     return content.strip()
 def extract_file_content(fp):
     """
+    Determine the file type based on its extension and extract text content accordingly.
+    This function supports:
+    - PDF files with text, tables, and OCR on images.
+    - Microsoft Word documents with paragraphs, tables, and OCR on embedded images.
+    - Microsoft Excel workbooks with formatted sheet tables.
+    - Microsoft PowerPoint presentations with slide text, tables, and OCR on images.
+    - Other file types are attempted to be read as plain UTF-8 text.
+    Args:
+        fp (str or Path): File path to the document to extract content from.
+    Returns:
+        str: Extracted and cleaned text content from the file, or an error message.
     """
+    # Get the file extension in lowercase to identify file type
     ext = Path(fp).suffix.lower()
     if ext == ".pdf":
+        # Extract content from PDF files
         return extract_pdf_content(fp)
     elif ext in [".doc", ".docx"]:
+        # Extract content from Word documents
         return extract_docx_content(fp)
     elif ext in [".xlsx", ".xls"]:
+        # Extract content from Excel workbooks
         return extract_excel_content(fp)
     elif ext in [".ppt", ".pptx"]:
+        # Extract content from PowerPoint presentations
         return extract_pptx_content(fp)
     else:
         try:
+            # Attempt to read unknown file types as plain UTF-8 text
             text = Path(fp).read_text(encoding="utf-8")
+            # Clean the extracted text before returning
             return clean_text(text)
         except Exception as e:
+            # Return error message if reading fails
             return f"\n[Error reading file {fp}: {e}]"

src/main/gradio.py CHANGED Viewed

@@ -3,177 +3,296 @@
 # SPDX-License-Identifier: Apache-2.0
 #
-import gradio as gr
-import asyncio
-from pathlib import Path
-from src.config import *
-from src.cores.session import create_session, ensure_stop_event, get_model_key
-from src.main.file_extractors import extract_file_content
-from src.cores.client import chat_with_model_async
 async def respond_async(multi, history, model_display, sess, custom_prompt, deep_search):
     """
-    Main async handler for user input submission.
-    Supports text + file uploads (multi-modal input).
-    Extracts file content and appends to user input.
-    Streams AI responses back to UI, updating chat history live.
-    Allows stopping response generation gracefully.
     """
-    ensure_stop_event(sess)
-    sess.stop_event.clear()
-    sess.cancel_token["cancelled"] = False
-    # Extract text and files from multimodal input
     msg_input = {"text": multi.get("text", "").strip(), "files": multi.get("files", [])}
-    # If no input, reset UI state and return
     if not msg_input["text"] and not msg_input["files"]:
         yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
         return
-    # Initialize input with extracted file contents
     inp = ""
     for f in msg_input["files"]:
-        # Support dict or direct file path
         fp = f.get("data", f.get("name", "")) if isinstance(f, dict) else f
-        inp += f"{Path(fp).name}\n\n{extract_file_content(fp)}\n\n"
-    # Append user text input if any
     if msg_input["text"]:
         inp += msg_input["text"]
-    # Append user input to chat history with placeholder response
-    history.append([inp, RESPONSES["RESPONSE_8"]])
     yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
-    queue = asyncio.Queue()
-    # Background async task to fetch streamed AI responses
     async def background():
-        reasoning = ""
-        responses = ""
-        content_started = False
-        ignore_reasoning = False
         async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
             if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
                 break
             if typ == "reasoning":
                 if ignore_reasoning:
                     continue
                 reasoning += chunk
-                await queue.put(("reasoning", f"<think>\n{reasoning}\n\n</think>\n\n"))
             elif typ == "content":
                 if not content_started:
                     content_started = True
                     ignore_reasoning = True
                     responses = chunk
-                    await queue.put(("reasoning", ""))  # Clear reasoning on content start
-                    await queue.put(("replace", responses))
                 else:
                     responses += chunk
                     await queue.put(("append", responses))
-        await queue.put(None)
-        return responses
-    bg_task = asyncio.create_task(background())
-    stop_task = asyncio.create_task(sess.stop_event.wait())
-    pending_tasks = {bg_task, stop_task}
     try:
         while True:
-            queue_task = asyncio.create_task(queue.get())
             pending_tasks.add(queue_task)
             done, _ = await asyncio.wait({stop_task, queue_task}, return_when=asyncio.FIRST_COMPLETED)
             for task in done:
                 pending_tasks.discard(task)
                 if task is stop_task:
-                    # User requested stop, cancel background task and update UI
                     sess.cancel_token["cancelled"] = True
                     bg_task.cancel()
                     try:
                         await bg_task
                     except asyncio.CancelledError:
                         pass
                     history[-1][1] = RESPONSES["RESPONSE_1"]
                     yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
                     return
                 result = task.result()
                 if result is None:
                     raise StopAsyncIteration
                 action, text = result
-                # Update last message content in history with streamed text
                 history[-1][1] = text
                 yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
     except StopAsyncIteration:
         pass
     finally:
         for task in pending_tasks:
             task.cancel()
         await asyncio.gather(*pending_tasks, return_exceptions=True)
     yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
 def toggle_deep_search(deep_search_value, history, sess, prompt, model):
     """
-    Toggle deep search checkbox. Keeps chat intact for production compatibility.
     """
     return history, sess, prompt, model, gr.update(value=deep_search_value)
 def change_model(new):
     """
-    Handler to change selected AI model.
-    Resets chat history and session.
-    Updates system instructions and deep search checkbox visibility accordingly.
-    Deep search is only available for default model.
     """
-    visible = new == MODEL_CHOICES[0]
     default_prompt = SYSTEM_PROMPT_MAPPING.get(get_model_key(new, MODEL_MAPPING, DEFAULT_MODEL_KEY), SYSTEM_PROMPT_DEFAULT)
-    # On model change, clear chat, create new session, reset deep search, update visibility
     return [], create_session(), new, default_prompt, False, gr.update(visible=visible)
 def stop_response(history, sess):
     """
     Handler to stop ongoing AI response generation.
-    Sets cancellation flags and updates last message to cancellation notice.
     """
-    ensure_stop_event(sess)
-    sess.stop_event.set()
-    sess.cancel_token["cancelled"] = True
     if history:
         history[-1][1] = RESPONSES["RESPONSE_1"]
     return history, None, create_session()
 def launch_ui():
     # ============================
     # System Setup
     # ============================
-    # Install Tesseract OCR and dependencies for text extraction from images.
     import os
     os.system("apt-get update -q -y && \
                apt-get install -q -y tesseract-ocr \
                tesseract-ocr-eng tesseract-ocr-ind \
                libleptonica-dev libtesseract-dev"
     )
     with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
         user_history = gr.State([])
         user_session = gr.State(create_session())
         selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
         J_A_R_V_I_S = gr.State("")
         # Chatbot UI
-        with gr.Column(): chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"], examples=JARVIS_INIT, allow_tags=["think"])
-        # User's input
-        msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count="single", file_types=[".txt"])
-        # Sidebar to select AI models and on/off deep search
         with gr.Sidebar(open=False):
             deep_search = gr.Checkbox(label=AI_TYPES["AI_TYPE_8"], value=False, info=AI_TYPES["AI_TYPE_9"], visible=True)
             deep_search.change(fn=toggle_deep_search, inputs=[deep_search, user_history, user_session, J_A_R_V_I_S, selected_model], outputs=[chatbot, user_session, J_A_R_V_I_S, selected_model, deep_search])
-            gr.Markdown() # line spacing
             model_radio = gr.Radio(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
-        with gr.Sidebar(open=False, position="right"): gr.Markdown(NOTICES)
-        # Models change
         model_radio.change(fn=change_model, inputs=[model_radio], outputs=[user_history, user_session, selected_model, J_A_R_V_I_S, deep_search, deep_search])
-        # Initial welcome messages
-        def on_example_select(evt: gr.SelectData): return evt.value
-        chatbot.example_select(fn=on_example_select, inputs=[], outputs=[msg]).then(fn=respond_async, inputs=[msg, user_history, selected_model, user_session, J_A_R_V_I_S, deep_search], outputs=[chatbot, msg, user_session])
-        # Clear chat
-        def clear_chat(history, sess, prompt, model): return [], create_session(), prompt, model, []
         chatbot.clear(fn=clear_chat, inputs=[user_history, user_session, J_A_R_V_I_S, selected_model], outputs=[chatbot, user_session, J_A_R_V_I_S, selected_model, user_history])
-        # Submit message
         msg.submit(fn=respond_async, inputs=[msg, user_history, selected_model, user_session, J_A_R_V_I_S, deep_search], outputs=[chatbot, msg, user_session], api_name=INTERNAL_AI_GET_SERVER)
-        # Stop message
         msg.stop(fn=stop_response, inputs=[user_history, user_session], outputs=[chatbot, msg, user_session])
     # Launch
     jarvis.queue(default_concurrency_limit=2).launch(max_file_size="1mb", mcp_server=True)

 # SPDX-License-Identifier: Apache-2.0
 #
+import gradio as gr  # Import Gradio library for building the web UI
+import asyncio  # Import asyncio for asynchronous programming
+from pathlib import Path  # Import Path for filesystem path manipulations
+from config import *  # Import all configuration constants and variables
+from src.cores.session import create_session, ensure_stop_event, get_model_key  # Import session management utilities
+from src.main.file_extractors import extract_file_content  # Import function to extract content from uploaded files
+from src.cores.client import chat_with_model_async  # Import async chat function with AI model
 async def respond_async(multi, history, model_display, sess, custom_prompt, deep_search):
     """
+    Asynchronous handler for processing user input submissions.
+    Supports multi-modal input including text and file uploads.
+    Extracts content from uploaded files and appends it to user text input.
+    Streams AI-generated responses back to the UI, updating chat history live.
+    Allows graceful stopping of response generation upon user request.
+    Parameters:
+    - multi: dict containing user text input and uploaded files
+    - history: list of previous chat messages (user and AI)
+    - model_display: selected AI model identifier
+    - sess: current session object managing state and cancellation
+    - custom_prompt: user-defined system instructions
+    - deep_search: boolean flag to enable extended search capabilities
+    Yields:
+    - Updated chat history and UI state for real-time interaction
     """
+    ensure_stop_event(sess)  # Ensure the session has a stop event initialized
+    sess.stop_event.clear()  # Clear any previous stop signals
+    sess.cancel_token["cancelled"] = False  # Reset cancellation flag
+    # Extract text and files from multimodal input dictionary
     msg_input = {"text": multi.get("text", "").strip(), "files": multi.get("files", [])}
+    # If no input text or files, reset UI input and return early
     if not msg_input["text"] and not msg_input["files"]:
         yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
         return
+    # Initialize combined input string with extracted file contents
     inp = ""
     for f in msg_input["files"]:
+        # Support both dict format or direct file path string
         fp = f.get("data", f.get("name", "")) if isinstance(f, dict) else f
+        # Append extracted file content with spacing
+        inp += f"```\n{extract_file_content(fp)}\n``` \n\n\n"
+    # Append user text input if present
     if msg_input["text"]:
         inp += msg_input["text"]
+    # Append user input to chat history with placeholder AI response
+    history.append([inp, RESPONSES["RESPONSE_8"]])  # RESPONSE_8 is a placeholder text
+    # Yield updated history and disable input while AI is responding
     yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
+    queue = asyncio.Queue()  # Queue to hold streamed AI response chunks
     async def background():
+        """
+        Background async task to fetch streamed AI responses from the model.
+        Handles reasoning and content chunks separately.
+        Supports cancellation via session stop event.
+        """
+        reasoning = ""  # Accumulate reasoning text
+        responses = ""  # Accumulate content text
+        content_started = False  # Flag to indicate content streaming started
+        ignore_reasoning = False  # Flag to ignore reasoning after content starts
+        # Async iterate over streaming response chunks from AI model
         async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
+            # Break if user requested stop or cancellation flagged
             if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
                 break
             if typ == "reasoning":
+                # Append reasoning chunk unless ignoring reasoning after content start
                 if ignore_reasoning:
                     continue
                 reasoning += chunk
+                # Put formatted reasoning text into queue for UI update
+                await queue.put(("reasoning", reasoning))
             elif typ == "content":
                 if not content_started:
+                    # On first content chunk, clear reasoning and start content accumulation
                     content_started = True
                     ignore_reasoning = True
                     responses = chunk
+                    await queue.put(("reasoning", ""))  # Clear reasoning display
+                    await queue.put(("replace", responses))  # Replace placeholder with content start
                 else:
+                    # Append subsequent content chunks and update UI
                     responses += chunk
                     await queue.put(("append", responses))
+        await queue.put(None)  # Signal completion of streaming
+        return responses  # Return final complete response text
+    bg_task = asyncio.create_task(background())  # Start background streaming task
+    stop_task = asyncio.create_task(sess.stop_event.wait())  # Task to wait for stop event
+    pending_tasks = {bg_task, stop_task}  # Track pending async tasks
     try:
         while True:
+            queue_task = asyncio.create_task(queue.get())  # Task to get next queued update
             pending_tasks.add(queue_task)
+            # Wait for either stop event or new queue item
             done, _ = await asyncio.wait({stop_task, queue_task}, return_when=asyncio.FIRST_COMPLETED)
             for task in done:
                 pending_tasks.discard(task)
                 if task is stop_task:
+                    # User requested stop, cancel background task and update UI accordingly
                     sess.cancel_token["cancelled"] = True
                     bg_task.cancel()
                     try:
                         await bg_task
                     except asyncio.CancelledError:
                         pass
+                    # Update last message with cancellation notice
                     history[-1][1] = RESPONSES["RESPONSE_1"]
                     yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
                     return
                 result = task.result()
                 if result is None:
+                    # Streaming finished, stop iteration
                     raise StopAsyncIteration
                 action, text = result
+                # Update last message content in history with streamed text chunk
                 history[-1][1] = text
+                # Yield updated history and UI state to refresh chat display
                 yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
     except StopAsyncIteration:
+        # Normal completion of streaming
         pass
     finally:
+        # Cancel any remaining pending tasks to clean up
         for task in pending_tasks:
             task.cancel()
         await asyncio.gather(*pending_tasks, return_exceptions=True)
+    # After completion, reset UI input to ready state
     yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
 def toggle_deep_search(deep_search_value, history, sess, prompt, model):
     """
+    Toggle the deep search checkbox state.
+    Maintains current chat history and session for production use.
+    Parameters:
+    - deep_search_value: new checkbox boolean value
+    - history: current chat history
+    - sess: current session object
+    - prompt: current system instructions
+    - model: currently selected model
+    Returns:
+    - Unchanged history, session, prompt, model
+    - Updated deep search checkbox UI state
     """
     return history, sess, prompt, model, gr.update(value=deep_search_value)
 def change_model(new):
     """
+    Handler to change the selected AI model.
+    Resets chat history and creates a new session.
+    Updates system instructions and deep search checkbox visibility.
+    Deep search is only enabled for the default model.
+    Parameters:
+    - new: newly selected model identifier
+    Returns:
+    - Empty chat history list
+    - New session object
+    - New model identifier
+    - Corresponding system instructions string
+    - Deep search checkbox reset to False
+    - UI update for deep search checkbox visibility
     """
+    visible = new == MODEL_CHOICES[0]  # Deep search visible only for default model
+    # Get system instructions for new model or fallback to default instructions
     default_prompt = SYSTEM_PROMPT_MAPPING.get(get_model_key(new, MODEL_MAPPING, DEFAULT_MODEL_KEY), SYSTEM_PROMPT_DEFAULT)
+    # Clear chat, create new session, reset deep search, update UI visibility
     return [], create_session(), new, default_prompt, False, gr.update(visible=visible)
 def stop_response(history, sess):
     """
     Handler to stop ongoing AI response generation.
+    Sets cancellation flags and updates the last message to a cancellation notice.
+    Parameters:
+    - history: current chat history list
+    - sess: current session object
+    Returns:
+    - Updated chat history with cancellation message
+    - None for input box reset
+    - New session object for fresh state
     """
+    ensure_stop_event(sess)  # Ensure stop event exists in session
+    sess.stop_event.set()  # Signal stop event to cancel ongoing tasks
+    sess.cancel_token["cancelled"] = True  # Mark cancellation flag
     if history:
+        # Replace last AI response with cancellation message
         history[-1][1] = RESPONSES["RESPONSE_1"]
     return history, None, create_session()
 def launch_ui():
+    """
+    Launch the Gradio UI for the chatbot application.
+    Sets up the UI components, event handlers, and starts the server.
+    Installs required OCR dependencies for file content extraction.
+    """
     # ============================
     # System Setup
     # ============================
+    # Install Tesseract OCR and dependencies for extracting text from images
     import os
     os.system("apt-get update -q -y && \
                apt-get install -q -y tesseract-ocr \
                tesseract-ocr-eng tesseract-ocr-ind \
                libleptonica-dev libtesseract-dev"
     )
+    # Create Gradio Blocks container for full UI layout
     with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
+        # State variables to hold chat history, session, selected model, and instructions
         user_history = gr.State([])
         user_session = gr.State(create_session())
         selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
         J_A_R_V_I_S = gr.State("")
         # Chatbot UI
+        with gr.Column():
+            chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"], examples=JARVIS_INIT, allow_tags=["think", "thinking"])
+        # User input
+        msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count=None, file_types=None, sources=[])
+        # Sidebar on left for model selection and deep search toggle
         with gr.Sidebar(open=False):
             deep_search = gr.Checkbox(label=AI_TYPES["AI_TYPE_8"], value=False, info=AI_TYPES["AI_TYPE_9"], visible=True)
+            # When deep search checkbox changes, call toggle_deep_search handler
             deep_search.change(fn=toggle_deep_search, inputs=[deep_search, user_history, user_session, J_A_R_V_I_S, selected_model], outputs=[chatbot, user_session, J_A_R_V_I_S, selected_model, deep_search])
+            gr.Markdown()  # Add spacing line
             model_radio = gr.Radio(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
+        # Sidebar on right for notices and additional information
+        with gr.Sidebar(open=False, position="right"):
+            gr.Markdown(NOTICES)
+        # When model selection changes, call change_model handler
         model_radio.change(fn=change_model, inputs=[model_radio], outputs=[user_history, user_session, selected_model, J_A_R_V_I_S, deep_search, deep_search])
+        # Event handler for selecting example messages in chatbot UI
+        def on_example_select(evt: gr.SelectData):
+            return evt.value
+        chatbot.example_select(fn=on_example_select, inputs=[], outputs=[msg]).then(
+            fn=respond_async,
+            inputs=[msg, user_history, selected_model, user_session, J_A_R_V_I_S, deep_search],
+            outputs=[chatbot, msg, user_session]
+        )
+        # Clear chat button handler resets chat, session, instructions, model, and history
+        def clear_chat(history, sess, prompt, model):
+            return [], create_session(), prompt, model, []
         chatbot.clear(fn=clear_chat, inputs=[user_history, user_session, J_A_R_V_I_S, selected_model], outputs=[chatbot, user_session, J_A_R_V_I_S, selected_model, user_history])
+        # Submit user message triggers respond_async to generate AI response
         msg.submit(fn=respond_async, inputs=[msg, user_history, selected_model, user_session, J_A_R_V_I_S, deep_search], outputs=[chatbot, msg, user_session], api_name=INTERNAL_AI_GET_SERVER)
+        # Stop button triggers stop_response handler to cancel ongoing AI generation
         msg.stop(fn=stop_response, inputs=[user_history, user_session], outputs=[chatbot, msg, user_session])
     # Launch
     jarvis.queue(default_concurrency_limit=2).launch(max_file_size="1mb", mcp_server=True)