Spaces:
Running
Running
ai: Refactor the code.
Browse files- app.py +5 -3
- src/config.py → config.py +39 -15
- src/cores/client.py +85 -27
- src/cores/server.py +59 -17
- src/cores/session.py +60 -27
- src/main/file_extractors.py +220 -45
- src/main/gradio.py +187 -68
app.py
CHANGED
@@ -3,8 +3,10 @@
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
-
from src.main.gradio import launch_ui
|
7 |
|
8 |
-
#
|
|
|
|
|
9 |
if __name__ == "__main__":
|
10 |
-
launch_ui()
|
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
+
from src.main.gradio import launch_ui # Import the function responsible for starting the graphical user interface
|
7 |
|
8 |
+
# The following condition checks if this script is being run as the main program.
|
9 |
+
# If true, it calls the launch_ui function to start the user interface.
|
10 |
+
# This ensures that the UI only launches when this file is executed directly, not when imported as a module.
|
11 |
if __name__ == "__main__":
|
12 |
+
launch_ui() # Start the graphical user interface for the application
|
src/config.py → config.py
RENAMED
@@ -3,54 +3,78 @@
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
-
import os
|
7 |
-
import json
|
8 |
|
9 |
-
#
|
|
|
|
|
10 |
JARVIS_INIT = json.loads(os.getenv("HELLO", "[]"))
|
11 |
|
12 |
-
# Deep Search
|
|
|
13 |
DEEP_SEARCH_PROVIDER_HOST = os.getenv("DEEP_SEARCH_PROVIDER_HOST")
|
|
|
14 |
DEEP_SEARCH_PROVIDER_KEY = os.getenv('DEEP_SEARCH_PROVIDER_KEY')
|
|
|
15 |
DEEP_SEARCH_INSTRUCTIONS = os.getenv("DEEP_SEARCH_INSTRUCTIONS")
|
16 |
|
17 |
-
#
|
|
|
18 |
INTERNAL_AI_GET_SERVER = os.getenv("INTERNAL_AI_GET_SERVER")
|
|
|
19 |
INTERNAL_AI_INSTRUCTIONS = os.getenv("INTERNAL_TRAINING_DATA")
|
20 |
|
21 |
-
# System instructions
|
|
|
22 |
SYSTEM_PROMPT_MAPPING = json.loads(os.getenv("SYSTEM_PROMPT_MAPPING", "{}"))
|
|
|
23 |
SYSTEM_PROMPT_DEFAULT = os.getenv("DEFAULT_SYSTEM")
|
24 |
|
25 |
-
# List of available
|
|
|
26 |
LINUX_SERVER_HOSTS = [h for h in json.loads(os.getenv("LINUX_SERVER_HOST", "[]")) if h]
|
27 |
|
28 |
-
# List of
|
|
|
29 |
LINUX_SERVER_PROVIDER_KEYS = [k for k in json.loads(os.getenv("LINUX_SERVER_PROVIDER_KEY", "[]")) if k]
|
|
|
30 |
LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
|
|
|
31 |
LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
|
32 |
|
33 |
-
#
|
|
|
34 |
LINUX_SERVER_ERRORS = set(map(int, filter(None, os.getenv("LINUX_SERVER_ERROR", "").split(","))))
|
35 |
|
36 |
-
# Human
|
|
|
37 |
AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1, 10)}
|
|
|
38 |
RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 11)}
|
39 |
|
40 |
-
# Model
|
|
|
41 |
MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
|
|
|
42 |
MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
|
|
|
43 |
MODEL_CHOICES = list(MODEL_MAPPING.values())
|
44 |
|
45 |
-
# Default model
|
|
|
46 |
DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
|
|
|
47 |
DEFAULT_MODEL_KEY = list(MODEL_MAPPING.keys())[0] if MODEL_MAPPING else None
|
48 |
|
49 |
-
# HTML
|
|
|
50 |
META_TAGS = os.getenv("META_TAGS")
|
51 |
|
52 |
-
#
|
|
|
53 |
ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS", "[]"))
|
54 |
|
55 |
-
#
|
|
|
56 |
NOTICES = os.getenv('NOTICES')
|
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
+
import os # Import os module to access environment variables and interact with the operating system
|
7 |
+
import json # Import json module to parse JSON strings into Python objects
|
8 |
|
9 |
+
# Load initial welcome messages for the system from the environment variable "HELLO"
|
10 |
+
# If "HELLO" is not set, default to an empty JSON array represented as "[]"
|
11 |
+
# This variable typically contains a list of greeting messages or initialization instructions for the AI
|
12 |
JARVIS_INIT = json.loads(os.getenv("HELLO", "[]"))
|
13 |
|
14 |
+
# Deep Search service configuration variables loaded from environment variables
|
15 |
+
# DEEP_SEARCH_PROVIDER_HOST holds the URL or IP address of the deep search service provider
|
16 |
DEEP_SEARCH_PROVIDER_HOST = os.getenv("DEEP_SEARCH_PROVIDER_HOST")
|
17 |
+
# DEEP_SEARCH_PROVIDER_KEY contains the API key or authentication token required to access the deep search provider
|
18 |
DEEP_SEARCH_PROVIDER_KEY = os.getenv('DEEP_SEARCH_PROVIDER_KEY')
|
19 |
+
# DEEP_SEARCH_INSTRUCTIONS may include specific instructions or parameters guiding how deep search queries should be handled
|
20 |
DEEP_SEARCH_INSTRUCTIONS = os.getenv("DEEP_SEARCH_INSTRUCTIONS")
|
21 |
|
22 |
+
# Internal AI server configuration and system instructions
|
23 |
+
# INTERNAL_AI_GET_SERVER stores the endpoint URL or IP address for internal AI GET requests
|
24 |
INTERNAL_AI_GET_SERVER = os.getenv("INTERNAL_AI_GET_SERVER")
|
25 |
+
# INTERNAL_AI_INSTRUCTIONS contains system instructions used to guide the AI behavior
|
26 |
INTERNAL_AI_INSTRUCTIONS = os.getenv("INTERNAL_TRAINING_DATA")
|
27 |
|
28 |
+
# System instructions mappings and default instructions loaded from environment variables
|
29 |
+
# SYSTEM_PROMPT_MAPPING is a dictionary mapping instructions keys to their corresponding instructions texts, parsed from JSON
|
30 |
SYSTEM_PROMPT_MAPPING = json.loads(os.getenv("SYSTEM_PROMPT_MAPPING", "{}"))
|
31 |
+
# SYSTEM_PROMPT_DEFAULT is the fallback instructions text used when no specific instructions mapping is found
|
32 |
SYSTEM_PROMPT_DEFAULT = os.getenv("DEFAULT_SYSTEM")
|
33 |
|
34 |
+
# List of available server hosts for connections or operations
|
35 |
+
# This list is parsed from a JSON array string and filtered to exclude any empty or invalid entries
|
36 |
LINUX_SERVER_HOSTS = [h for h in json.loads(os.getenv("LINUX_SERVER_HOST", "[]")) if h]
|
37 |
|
38 |
+
# List of provider keys associated with servers, used for authentication
|
39 |
+
# The list is parsed from JSON and filtered to remove empty strings
|
40 |
LINUX_SERVER_PROVIDER_KEYS = [k for k in json.loads(os.getenv("LINUX_SERVER_PROVIDER_KEY", "[]")) if k]
|
41 |
+
# Set to keep track of provider keys that have been marked or flagged during runtime
|
42 |
LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
|
43 |
+
# Dictionary to record the number of attempts made with each provider key
|
44 |
LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
|
45 |
|
46 |
+
# Set of server error codes that the system recognizes as critical or requiring special handling
|
47 |
+
# The error codes are read from a comma-separated string, filtered to remove empty entries, converted to integers, and stored in a set
|
48 |
LINUX_SERVER_ERRORS = set(map(int, filter(None, os.getenv("LINUX_SERVER_ERROR", "").split(","))))
|
49 |
|
50 |
+
# Human-friendly AI types and response messages loaded from environment variables
|
51 |
+
# AI_TYPES maps keys like "AI_TYPE_1" to descriptive names or categories of AI models or behaviors
|
52 |
AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1, 10)}
|
53 |
+
# RESPONSES maps keys like "RESPONSE_1" to predefined response templates or messages used by the AI system
|
54 |
RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 11)}
|
55 |
|
56 |
+
# Model-related configurations loaded from environment variables
|
57 |
+
# MODEL_MAPPING is a dictionary mapping model keys to their corresponding model names or identifiers, parsed from JSON
|
58 |
MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
|
59 |
+
# MODEL_CONFIG contains detailed configuration settings for each model, such as parameters or options, parsed from JSON
|
60 |
MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
|
61 |
+
# MODEL_CHOICES is a list of available model names extracted from the values of MODEL_MAPPING, useful for selection menus or validation
|
62 |
MODEL_CHOICES = list(MODEL_MAPPING.values())
|
63 |
|
64 |
+
# Default model configuration and key used as fallback if no specific model is selected
|
65 |
+
# DEFAULT_CONFIG contains default parameters or settings for the AI model, parsed from JSON
|
66 |
DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
|
67 |
+
# DEFAULT_MODEL_KEY is set to the first key found in MODEL_MAPPING if available, otherwise None
|
68 |
DEFAULT_MODEL_KEY = list(MODEL_MAPPING.keys())[0] if MODEL_MAPPING else None
|
69 |
|
70 |
+
# HTML meta tags for SEO and other purposes, loaded as a raw string from environment variables
|
71 |
+
# These tags are intended to be inserted into the <head> section of generated HTML pages
|
72 |
META_TAGS = os.getenv("META_TAGS")
|
73 |
|
74 |
+
# List of allowed file extensions for upload or processing, parsed from a JSON array string
|
75 |
+
# This list helps enforce file type restrictions within the system
|
76 |
ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS", "[]"))
|
77 |
|
78 |
+
# Notices or announcements that may be displayed to users or logged by the system
|
79 |
+
# The content is loaded as a raw string from the environment variable "NOTICES"
|
80 |
NOTICES = os.getenv('NOTICES')
|
src/cores/client.py
CHANGED
@@ -3,40 +3,65 @@
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
-
import asyncio
|
7 |
-
import httpx
|
8 |
-
import json
|
9 |
-
import random
|
10 |
-
import uuid
|
11 |
|
12 |
-
from
|
13 |
-
from src.cores.server import fetch_response_stream_async
|
14 |
-
from src.cores.session import ensure_stop_event, get_model_key
|
15 |
-
from datetime import datetime
|
16 |
|
17 |
async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt, deep_search):
|
18 |
"""
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
"""
|
|
|
|
|
24 |
ensure_stop_event(sess)
|
|
|
|
|
25 |
sess.stop_event.clear()
|
|
|
|
|
26 |
sess.cancel_token["cancelled"] = False
|
|
|
|
|
27 |
if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
|
28 |
-
yield ("content", RESPONSES["RESPONSE_3"]) #
|
29 |
return
|
|
|
|
|
30 |
if not hasattr(sess, "session_id") or not sess.session_id:
|
31 |
sess.session_id = str(uuid.uuid4())
|
|
|
|
|
32 |
model_key = get_model_key(model_display, MODEL_MAPPING, DEFAULT_MODEL_KEY)
|
|
|
|
|
33 |
cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
|
|
|
|
|
34 |
msgs = []
|
35 |
|
36 |
-
#
|
37 |
current_date = datetime.now().strftime("%A, %B %d, %Y, %I:%M %p %Z")
|
38 |
|
39 |
-
#
|
40 |
COMBINED_AI_INSTRUCTIONS = (
|
41 |
INTERNAL_AI_INSTRUCTIONS
|
42 |
+ "\n\n\n"
|
@@ -44,11 +69,14 @@ async def chat_with_model_async(history, user_input, model_display, sess, custom
|
|
44 |
+ "\n\n\n"
|
45 |
)
|
46 |
|
47 |
-
# If deep search enabled and
|
48 |
if deep_search and model_display == MODEL_CHOICES[0]:
|
|
|
49 |
msgs.append({"role": "system", "content": DEEP_SEARCH_INSTRUCTIONS})
|
50 |
try:
|
|
|
51 |
async with httpx.AsyncClient() as client:
|
|
|
52 |
payload = {
|
53 |
"query": user_input,
|
54 |
"topic": "general",
|
@@ -64,40 +92,70 @@ async def chat_with_model_async(history, user_input, model_display, sess, custom
|
|
64 |
"include_domains": [],
|
65 |
"exclude_domains": []
|
66 |
}
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
sr_json = r.json()
|
|
|
69 |
msgs.append({"role": "system", "content": json.dumps(sr_json)})
|
70 |
except Exception:
|
71 |
-
#
|
72 |
pass
|
|
|
73 |
msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})
|
|
|
|
|
74 |
elif model_display == MODEL_CHOICES[0]:
|
75 |
-
# For primary model without deep search, use internal instructions
|
76 |
msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})
|
|
|
|
|
77 |
else:
|
78 |
-
# For other models, use default instructions
|
79 |
msgs.append({"role": "system", "content": custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)})
|
80 |
|
81 |
-
# Append conversation history alternating user and assistant messages
|
|
|
82 |
msgs.extend([{"role": "user", "content": u} for u, _ in history])
|
|
|
83 |
msgs.extend([{"role": "assistant", "content": a} for _, a in history if a])
|
84 |
-
|
|
|
85 |
msgs.append({"role": "user", "content": user_input})
|
86 |
|
87 |
-
#
|
88 |
candidates = [(h, k) for h in LINUX_SERVER_HOSTS for k in LINUX_SERVER_PROVIDER_KEYS]
|
|
|
|
|
89 |
random.shuffle(candidates)
|
90 |
|
91 |
-
#
|
92 |
for h, k in candidates:
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
94 |
got_responses = False
|
|
|
|
|
95 |
async for chunk in stream_gen:
|
|
|
96 |
if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
|
97 |
return
|
|
|
|
|
98 |
got_responses = True
|
|
|
|
|
99 |
yield chunk
|
|
|
|
|
100 |
if got_responses:
|
101 |
return
|
102 |
-
|
|
|
103 |
yield ("content", RESPONSES["RESPONSE_2"])
|
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
+
import asyncio # Import asyncio for asynchronous programming capabilities
|
7 |
+
import httpx # Import httpx to perform asynchronous HTTP requests
|
8 |
+
import json # Import json to handle JSON encoding and decoding
|
9 |
+
import random # Import random to shuffle lists for load balancing
|
10 |
+
import uuid # Import uuid to generate unique session identifiers
|
11 |
|
12 |
+
from config import * # Import all configuration constants and variables from config module
|
13 |
+
from src.cores.server import fetch_response_stream_async # Import async function to fetch streamed AI responses
|
14 |
+
from src.cores.session import ensure_stop_event, get_model_key # Import session helper functions
|
15 |
+
from datetime import datetime # Import datetime to get current date and time information
|
16 |
|
17 |
async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt, deep_search):
|
18 |
"""
|
19 |
+
Asynchronous function to handle interaction with an AI model and stream its responses.
|
20 |
+
|
21 |
+
Parameters:
|
22 |
+
- history: List of tuples containing previous conversation messages (user and assistant)
|
23 |
+
- user_input: The current input string from the user
|
24 |
+
- model_display: The display name of the AI model to use
|
25 |
+
- sess: Session object containing session state, stop event, and cancellation token
|
26 |
+
- custom_prompt: Optional custom system instructions to override default instructions
|
27 |
+
- deep_search: Boolean flag indicating whether to integrate deep search results into the instructions
|
28 |
+
|
29 |
+
This function prepares the message history and system instructions, optionally enriches the instructions
|
30 |
+
with deep search results if enabled, and attempts to fetch streamed responses from multiple backend
|
31 |
+
providers with fallback. It yields chunks of the response asynchronously for real-time UI updates.
|
32 |
"""
|
33 |
+
|
34 |
+
# Ensure the session has a stop event initialized to control streaming cancellation
|
35 |
ensure_stop_event(sess)
|
36 |
+
|
37 |
+
# Clear any previous stop event state to allow new streaming session
|
38 |
sess.stop_event.clear()
|
39 |
+
|
40 |
+
# Reset the cancellation token to indicate the session is active and not cancelled
|
41 |
sess.cancel_token["cancelled"] = False
|
42 |
+
|
43 |
+
# Check if provider keys and hosts are configured; if not, yield a predefined error response and exit
|
44 |
if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
|
45 |
+
yield ("content", RESPONSES["RESPONSE_3"]) # Inform user no backend providers are available
|
46 |
return
|
47 |
+
|
48 |
+
# Assign a unique session ID if not already present to track conversation context
|
49 |
if not hasattr(sess, "session_id") or not sess.session_id:
|
50 |
sess.session_id = str(uuid.uuid4())
|
51 |
+
|
52 |
+
# Determine the internal model key based on the display name, falling back to default if not found
|
53 |
model_key = get_model_key(model_display, MODEL_MAPPING, DEFAULT_MODEL_KEY)
|
54 |
+
|
55 |
+
# Retrieve model-specific configuration parameters or use default configuration
|
56 |
cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
|
57 |
+
|
58 |
+
# Initialize a list to hold the messages that will be sent to the AI model
|
59 |
msgs = []
|
60 |
|
61 |
+
# Obtain the current date and time formatted as a readable string for context in instructions
|
62 |
current_date = datetime.now().strftime("%A, %B %d, %Y, %I:%M %p %Z")
|
63 |
|
64 |
+
# Combine internal AI instructions with the current date to form a comprehensive system instructions
|
65 |
COMBINED_AI_INSTRUCTIONS = (
|
66 |
INTERNAL_AI_INSTRUCTIONS
|
67 |
+ "\n\n\n"
|
|
|
69 |
+ "\n\n\n"
|
70 |
)
|
71 |
|
72 |
+
# If deep search is enabled and the primary model is selected, prepend deep search instructions and results
|
73 |
if deep_search and model_display == MODEL_CHOICES[0]:
|
74 |
+
# Add deep search instructions as a system message to guide the AI
|
75 |
msgs.append({"role": "system", "content": DEEP_SEARCH_INSTRUCTIONS})
|
76 |
try:
|
77 |
+
# Create an asynchronous HTTP client session for making the deep search request
|
78 |
async with httpx.AsyncClient() as client:
|
79 |
+
# Define the payload with parameters for the deep search query
|
80 |
payload = {
|
81 |
"query": user_input,
|
82 |
"topic": "general",
|
|
|
92 |
"include_domains": [],
|
93 |
"exclude_domains": []
|
94 |
}
|
95 |
+
# Send a POST request to the deep search provider with authorization header and JSON payload
|
96 |
+
r = await client.post(
|
97 |
+
DEEP_SEARCH_PROVIDER_HOST,
|
98 |
+
headers={"Authorization": f"Bearer {DEEP_SEARCH_PROVIDER_KEY}"},
|
99 |
+
json=payload
|
100 |
+
)
|
101 |
+
# Parse the JSON response from the deep search provider
|
102 |
sr_json = r.json()
|
103 |
+
# Append the deep search results as a system message in JSON string format
|
104 |
msgs.append({"role": "system", "content": json.dumps(sr_json)})
|
105 |
except Exception:
|
106 |
+
# If any error occurs during deep search, fail silently without interrupting the chat flow
|
107 |
pass
|
108 |
+
# Append the combined AI instructions after the deep search content to maintain context
|
109 |
msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})
|
110 |
+
|
111 |
+
# If deep search is not enabled but the primary model is selected, use only the combined AI instructions
|
112 |
elif model_display == MODEL_CHOICES[0]:
|
|
|
113 |
msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})
|
114 |
+
|
115 |
+
# For other models, use a custom instructions if provided, otherwise default to the system instructions mapping or default instructions
|
116 |
else:
|
|
|
117 |
msgs.append({"role": "system", "content": custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)})
|
118 |
|
119 |
+
# Append the conversation history to the message list, alternating user and assistant messages
|
120 |
+
# First add all user messages from history
|
121 |
msgs.extend([{"role": "user", "content": u} for u, _ in history])
|
122 |
+
# Then add all assistant messages from history that are not empty
|
123 |
msgs.extend([{"role": "assistant", "content": a} for _, a in history if a])
|
124 |
+
|
125 |
+
# Append the current user input as the latest user message
|
126 |
msgs.append({"role": "user", "content": user_input})
|
127 |
|
128 |
+
# Create a list of all possible combinations of backend hosts and provider keys for load balancing and fallback
|
129 |
candidates = [(h, k) for h in LINUX_SERVER_HOSTS for k in LINUX_SERVER_PROVIDER_KEYS]
|
130 |
+
|
131 |
+
# Randomly shuffle the list of host-key pairs to distribute load evenly and avoid bias
|
132 |
random.shuffle(candidates)
|
133 |
|
134 |
+
# Iterate over each host and key pair to attempt fetching a streamed response
|
135 |
for h, k in candidates:
|
136 |
+
# Call the async generator function to fetch streamed response chunks from the backend
|
137 |
+
stream_gen = fetch_response_stream_async(
|
138 |
+
h, k, model_key, msgs, cfg, sess.session_id, sess.stop_event, sess.cancel_token
|
139 |
+
)
|
140 |
+
|
141 |
+
# Flag to track if any response chunks were received from this provider
|
142 |
got_responses = False
|
143 |
+
|
144 |
+
# Asynchronously iterate over each chunk yielded by the streaming generator
|
145 |
async for chunk in stream_gen:
|
146 |
+
# If the stop event is set or cancellation requested, terminate streaming immediately
|
147 |
if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
|
148 |
return
|
149 |
+
|
150 |
+
# Mark that at least one response chunk has been received
|
151 |
got_responses = True
|
152 |
+
|
153 |
+
# Yield the current chunk to the caller for incremental UI update or processing
|
154 |
yield chunk
|
155 |
+
|
156 |
+
# If any responses were received from this host-key pair, stop trying others and return
|
157 |
if got_responses:
|
158 |
return
|
159 |
+
|
160 |
+
# If no responses were received from any provider, yield a fallback message indicating failure
|
161 |
yield ("content", RESPONSES["RESPONSE_2"])
|
src/cores/server.py
CHANGED
@@ -3,57 +3,99 @@
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
-
import codecs
|
7 |
-
import httpx
|
8 |
-
import json
|
9 |
|
10 |
-
from src.cores.session import marked_item
|
11 |
-
from
|
12 |
|
13 |
async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_event, cancel_token):
|
14 |
"""
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
"""
|
|
|
19 |
for timeout in [5, 10]:
|
20 |
try:
|
|
|
21 |
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
|
22 |
async with client.stream(
|
23 |
"POST",
|
24 |
host,
|
|
|
25 |
json={**{"model": model, "messages": msgs, "session_id": sid, "stream": True}, **cfg},
|
26 |
-
headers={"Authorization": f"Bearer {key}"}
|
27 |
) as response:
|
|
|
28 |
if response.status_code in LINUX_SERVER_ERRORS:
|
|
|
29 |
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
30 |
return
|
|
|
|
|
31 |
async for line in response.aiter_lines():
|
|
|
32 |
if stop_event.is_set() or cancel_token["cancelled"]:
|
33 |
return
|
|
|
34 |
if not line:
|
35 |
continue
|
|
|
36 |
if line.startswith("data: "):
|
37 |
-
data = line[6:]
|
|
|
38 |
if data.strip() == RESPONSES["RESPONSE_10"]:
|
39 |
return
|
40 |
try:
|
|
|
41 |
j = json.loads(data)
|
|
|
42 |
if isinstance(j, dict) and j.get("choices"):
|
|
|
43 |
for ch in j["choices"]:
|
44 |
-
delta = ch.get("delta", {})
|
45 |
-
#
|
46 |
if "reasoning" in delta and delta["reasoning"]:
|
47 |
decoded = delta["reasoning"].encode('utf-8').decode('unicode_escape')
|
48 |
-
yield ("reasoning", decoded)
|
49 |
-
#
|
50 |
if "content" in delta and delta["content"]:
|
51 |
-
yield ("content", delta["content"])
|
52 |
except Exception:
|
53 |
-
# Ignore malformed JSON or unexpected data
|
54 |
continue
|
55 |
except Exception:
|
56 |
-
#
|
57 |
continue
|
|
|
58 |
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
|
|
59 |
return
|
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
+
import codecs # Import codecs module for encoding and decoding operations, useful for handling text data
|
7 |
+
import httpx # Import httpx for making asynchronous HTTP requests to external servers or APIs
|
8 |
+
import json # Import json module to parse JSON formatted strings into Python objects and vice versa
|
9 |
|
10 |
+
from src.cores.session import marked_item # Import marked_item function to track and mark keys that fail repeatedly, helping to avoid using problematic keys
|
11 |
+
from config import LINUX_SERVER_ERRORS, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS, RESPONSES # Import various constants used for error handling, key marking, retry attempts, and predefined responses
|
12 |
|
13 |
async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_event, cancel_token):
|
14 |
"""
|
15 |
+
Asynchronous generator function that streams AI-generated responses from a backend server endpoint.
|
16 |
+
|
17 |
+
Parameters:
|
18 |
+
- host: The URL of the backend server to send the request to.
|
19 |
+
- key: Authorization token (API key) used in the request header for authentication.
|
20 |
+
- model: The AI model identifier to be used for generating responses.
|
21 |
+
- msgs: The list of messages forming the conversation or prompt to send to the AI.
|
22 |
+
- cfg: Configuration dictionary containing additional parameters for the request.
|
23 |
+
- sid: Session ID string to associate the request with a particular session.
|
24 |
+
- stop_event: An asynchronous event object that signals when to stop streaming responses.
|
25 |
+
- cancel_token: A dictionary containing a 'cancelled' boolean flag to abort the streaming operation.
|
26 |
+
|
27 |
+
This function attempts to connect to the backend server twice with different timeout values (5 and 10 seconds).
|
28 |
+
It sends a POST request with JSON payload that includes model, messages, session ID, stream flag, and configuration.
|
29 |
+
The function streams the response line-by-line, parsing JSON data chunks as they arrive.
|
30 |
+
|
31 |
+
The streamed data contains two types of text parts:
|
32 |
+
- 'reasoning': Additional reasoning text that can be displayed separately in the UI for richer user experience.
|
33 |
+
- 'content': The main content text generated by the AI.
|
34 |
+
|
35 |
+
The function yields tuples of the form ('reasoning', text) or ('content', text) to the caller asynchronously.
|
36 |
+
|
37 |
+
If the server returns an error status code listed in LINUX_SERVER_ERRORS, the key is marked as problematic to avoid future use.
|
38 |
+
The function also respects stop_event and cancel_token to allow graceful cancellation of the streaming process.
|
39 |
+
|
40 |
+
If the response signals completion with a specific message defined in RESPONSES["RESPONSE_10"], the function ends the stream.
|
41 |
+
|
42 |
+
The function handles exceptions gracefully, including network errors and JSON parsing issues, retrying or marking keys as needed.
|
43 |
"""
|
44 |
+
# Loop over two timeout values to attempt the request with increasing timeout durations for robustness
|
45 |
for timeout in [5, 10]:
|
46 |
try:
|
47 |
+
# Create an asynchronous HTTP client with the specified timeout for the request
|
48 |
async with httpx.AsyncClient(timeout=timeout) as client:
|
49 |
+
# Open a streaming POST request to the backend server with JSON payload and authorization header
|
50 |
async with client.stream(
|
51 |
"POST",
|
52 |
host,
|
53 |
+
# Combine fixed parameters with additional configuration into the JSON body
|
54 |
json={**{"model": model, "messages": msgs, "session_id": sid, "stream": True}, **cfg},
|
55 |
+
headers={"Authorization": f"Bearer {key}"} # Use Bearer token authentication
|
56 |
) as response:
|
57 |
+
# Check if the response status code indicates a server error that should mark the key
|
58 |
if response.status_code in LINUX_SERVER_ERRORS:
|
59 |
+
# Mark the key as problematic with the provided tracking function and exit the generator
|
60 |
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
61 |
return
|
62 |
+
|
63 |
+
# Iterate asynchronously over each line of the streamed response content
|
64 |
async for line in response.aiter_lines():
|
65 |
+
# If the stop event is set or cancellation is requested, stop streaming and exit
|
66 |
if stop_event.is_set() or cancel_token["cancelled"]:
|
67 |
return
|
68 |
+
# Skip empty lines to avoid unnecessary processing
|
69 |
if not line:
|
70 |
continue
|
71 |
+
# Process lines that start with the prefix 'data: ' which contain JSON payloads
|
72 |
if line.startswith("data: "):
|
73 |
+
data = line[6:] # Extract the JSON string after 'data: '
|
74 |
+
# If the data matches the predefined end-of-response message, stop streaming
|
75 |
if data.strip() == RESPONSES["RESPONSE_10"]:
|
76 |
return
|
77 |
try:
|
78 |
+
# Attempt to parse the JSON data string into a Python dictionary
|
79 |
j = json.loads(data)
|
80 |
+
# Check if the parsed object is a dictionary containing 'choices' key
|
81 |
if isinstance(j, dict) and j.get("choices"):
|
82 |
+
# Iterate over each choice in the response to extract text deltas
|
83 |
for ch in j["choices"]:
|
84 |
+
delta = ch.get("delta", {}) # Get the incremental update part
|
85 |
+
# If 'reasoning' text is present in the delta, decode unicode escapes and yield it
|
86 |
if "reasoning" in delta and delta["reasoning"]:
|
87 |
decoded = delta["reasoning"].encode('utf-8').decode('unicode_escape')
|
88 |
+
yield ("reasoning", decoded) # Yield reasoning text for UI display
|
89 |
+
# If main 'content' text is present in the delta, yield it directly
|
90 |
if "content" in delta and delta["content"]:
|
91 |
+
yield ("content", delta["content"]) # Yield main content text
|
92 |
except Exception:
|
93 |
+
# Ignore exceptions from malformed JSON or unexpected data formats and continue streaming
|
94 |
continue
|
95 |
except Exception:
|
96 |
+
# Catch network errors, timeouts, or other exceptions and try the next timeout or retry
|
97 |
continue
|
98 |
+
# If all attempts fail, mark the key as problematic to avoid future use
|
99 |
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
100 |
+
# Return None explicitly when streaming ends or fails after retries
|
101 |
return
|
src/cores/session.py
CHANGED
@@ -3,58 +3,91 @@
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
-
import asyncio
|
7 |
-
import requests
|
8 |
-
import uuid
|
9 |
-
import threading
|
10 |
|
11 |
-
from
|
12 |
|
13 |
class SessionWithID(requests.Session):
|
14 |
"""
|
15 |
-
Custom session
|
16 |
-
|
|
|
17 |
"""
|
18 |
def __init__(self):
|
19 |
-
super().__init__()
|
20 |
-
self.session_id = str(uuid.uuid4())
|
21 |
-
|
22 |
-
self.
|
|
|
|
|
|
|
23 |
|
24 |
def create_session():
|
25 |
"""
|
26 |
-
|
27 |
-
|
|
|
28 |
"""
|
29 |
return SessionWithID()
|
30 |
|
31 |
def ensure_stop_event(sess):
|
32 |
"""
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
35 |
"""
|
36 |
if not hasattr(sess, "stop_event"):
|
37 |
-
sess.stop_event = asyncio.Event()
|
|
|
38 |
if not hasattr(sess, "cancel_token"):
|
39 |
-
sess.cancel_token = {"cancelled": False}
|
|
|
40 |
|
41 |
def marked_item(item, marked, attempts):
|
42 |
"""
|
43 |
-
Mark a provider key or host as temporarily problematic after repeated failures
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
"""
|
47 |
-
marked.add(item)
|
48 |
-
|
|
|
|
|
49 |
if attempts[item] >= 3:
|
|
|
50 |
def remove():
|
51 |
-
marked.discard(item)
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
54 |
|
55 |
def get_model_key(display, MODEL_MAPPING, DEFAULT_MODEL_KEY):
|
56 |
"""
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
"""
|
|
|
60 |
return next((k for k, v in MODEL_MAPPING.items() if v == display), DEFAULT_MODEL_KEY)
|
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
+
import asyncio # Import the asyncio library to handle asynchronous operations and events
|
7 |
+
import requests # Import the requests library for HTTP requests and session management
|
8 |
+
import uuid # Import the uuid library to generate unique identifiers
|
9 |
+
import threading # Import threading to run background timers for delayed operations
|
10 |
|
11 |
+
from config import LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS # Import configuration variables that track marked provider keys and their failure attempts
|
12 |
|
13 |
class SessionWithID(requests.Session):
|
14 |
"""
|
15 |
+
Custom session class extending requests.Session to add unique session identification
|
16 |
+
and asynchronous cancellation control. This allows tracking individual user sessions
|
17 |
+
and managing cancellation of ongoing HTTP requests asynchronously.
|
18 |
"""
|
19 |
def __init__(self):
|
20 |
+
super().__init__() # Initialize the base requests.Session class
|
21 |
+
self.session_id = str(uuid.uuid4())
|
22 |
+
# Generate and assign a unique string ID for this session instance to identify it uniquely
|
23 |
+
self.stop_event = asyncio.Event()
|
24 |
+
# Create an asyncio Event object used to signal when the session should stop or cancel operations
|
25 |
+
self.cancel_token = {"cancelled": False}
|
26 |
+
# Dictionary flag to indicate if the current session's operations have been cancelled
|
27 |
|
28 |
def create_session():
|
29 |
"""
|
30 |
+
Factory function to create and return a new SessionWithID instance.
|
31 |
+
This should be called whenever a new user session starts or a chat session is reset,
|
32 |
+
ensuring each session has its own unique ID and cancellation controls.
|
33 |
"""
|
34 |
return SessionWithID()
|
35 |
|
36 |
def ensure_stop_event(sess):
|
37 |
"""
|
38 |
+
Utility function to verify that a given session object has the required asynchronous
|
39 |
+
control attributes: stop_event and cancel_token. If they are missing (e.g., when restoring
|
40 |
+
sessions from storage), this function adds them to maintain consistent session behavior.
|
41 |
+
|
42 |
+
Parameters:
|
43 |
+
- sess: The session object to check and update.
|
44 |
"""
|
45 |
if not hasattr(sess, "stop_event"):
|
46 |
+
sess.stop_event = asyncio.Event()
|
47 |
+
# Add an asyncio Event to signal stop requests if missing
|
48 |
if not hasattr(sess, "cancel_token"):
|
49 |
+
sess.cancel_token = {"cancelled": False}
|
50 |
+
# Add a cancellation flag dictionary if missing
|
51 |
|
52 |
def marked_item(item, marked, attempts):
|
53 |
"""
|
54 |
+
Mark a provider key or host as temporarily problematic after repeated failures to prevent
|
55 |
+
using unreliable providers continuously. This function adds the item to a 'marked' set
|
56 |
+
and increments its failure attempt count. If the failure count reaches 3 or more, a timer
|
57 |
+
is started to automatically unmark the item after 5 minutes (300 seconds), allowing retries.
|
58 |
+
|
59 |
+
Parameters:
|
60 |
+
- item: The provider key or host identifier to mark as problematic.
|
61 |
+
- marked: A set containing currently marked items.
|
62 |
+
- attempts: A dictionary tracking the number of failure attempts per item.
|
63 |
"""
|
64 |
+
marked.add(item)
|
65 |
+
# Add the item to the set of marked problematic providers
|
66 |
+
attempts[item] = attempts.get(item, 0) + 1
|
67 |
+
# Increment the failure attempt count for this item, initializing if necessary
|
68 |
if attempts[item] >= 3:
|
69 |
+
# If the item has failed 3 or more times, schedule removal from marked after 5 minutes
|
70 |
def remove():
|
71 |
+
marked.discard(item)
|
72 |
+
# Remove the item from the marked set to allow retrying
|
73 |
+
attempts.pop(item, None)
|
74 |
+
# Remove the attempt count entry for this item to reset its failure state
|
75 |
+
threading.Timer(300, remove).start()
|
76 |
+
# Start a background timer that will call remove() after 300 seconds (5 minutes)
|
77 |
|
78 |
def get_model_key(display, MODEL_MAPPING, DEFAULT_MODEL_KEY):
|
79 |
"""
|
80 |
+
Translate a human-readable model display name into its internal model key identifier.
|
81 |
+
Searches the MODEL_MAPPING dictionary for the key whose value matches the display name.
|
82 |
+
Returns the DEFAULT_MODEL_KEY if no matching display name is found.
|
83 |
+
|
84 |
+
Parameters:
|
85 |
+
- display: The display name of the model as a string.
|
86 |
+
- MODEL_MAPPING: Dictionary mapping internal model keys to display names.
|
87 |
+
- DEFAULT_MODEL_KEY: The fallback model key to return if no match is found.
|
88 |
+
|
89 |
+
Returns:
|
90 |
+
- The internal model key string corresponding to the display name.
|
91 |
"""
|
92 |
+
# Iterate through the MODEL_MAPPING dictionary items and return the key where the value matches the display name
|
93 |
return next((k for k, v in MODEL_MAPPING.items() if v == display), DEFAULT_MODEL_KEY)
|
src/main/file_extractors.py
CHANGED
@@ -3,216 +3,391 @@
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
-
import pdfplumber
|
7 |
-
import pytesseract
|
8 |
-
import docx
|
9 |
-
import zipfile
|
10 |
-
import io
|
11 |
-
import pandas as pd
|
12 |
-
import warnings
|
13 |
-
import re
|
14 |
|
15 |
-
from openpyxl import load_workbook
|
16 |
-
from pptx import Presentation
|
17 |
-
from PIL import Image, ImageEnhance, ImageFilter
|
18 |
-
from pathlib import Path
|
19 |
|
20 |
def clean_text(text):
|
21 |
-
"""
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
text = re.sub(r'[^a-zA-Z0-9\s.,?!():;\'"-]', '', text)
|
24 |
-
# Remove isolated
|
25 |
text = re.sub(r'\b[a-zA-Z]\b', '', text)
|
26 |
-
#
|
27 |
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
|
|
28 |
return "\n".join(lines)
|
29 |
|
30 |
def format_table(df, max_rows=10):
|
31 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
if df.empty:
|
33 |
return ""
|
34 |
-
#
|
35 |
df_clean = df.dropna(axis=0, how='all').dropna(axis=1, how='all')
|
36 |
-
# Replace NaN with empty
|
37 |
df_clean = df_clean.fillna('')
|
38 |
if df_clean.empty:
|
39 |
return ""
|
|
|
40 |
display_df = df_clean.head(max_rows)
|
|
|
41 |
table_str = display_df.to_string(index=False)
|
|
|
42 |
if len(df_clean) > max_rows:
|
43 |
table_str += f"\n... ({len(df_clean) - max_rows} more rows)"
|
44 |
return table_str
|
45 |
|
46 |
def preprocess_image(img):
|
47 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
try:
|
49 |
-
|
|
|
|
|
50 |
enhancer = ImageEnhance.Contrast(img)
|
51 |
-
img = enhancer.enhance(2)
|
52 |
-
|
53 |
-
|
|
|
54 |
img = img.point(lambda x: 0 if x < 140 else 255, '1')
|
55 |
return img
|
56 |
except Exception:
|
|
|
57 |
return img
|
58 |
|
59 |
def ocr_image(img):
|
60 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
try:
|
|
|
62 |
img = preprocess_image(img)
|
|
|
63 |
text = pytesseract.image_to_string(img, lang='eng', config='--psm 6')
|
|
|
64 |
text = clean_text(text)
|
65 |
return text
|
66 |
except Exception:
|
|
|
67 |
return ""
|
68 |
|
69 |
def extract_pdf_content(fp):
|
70 |
"""
|
71 |
-
Extract text
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
"""
|
75 |
content = ""
|
76 |
try:
|
77 |
with pdfplumber.open(fp) as pdf:
|
78 |
for i, page in enumerate(pdf.pages, 1):
|
|
|
79 |
text = page.extract_text() or ""
|
|
|
80 |
content += f"Page {i} Text:\n{clean_text(text)}\n\n"
|
81 |
-
#
|
82 |
if page.images:
|
|
|
83 |
img_obj = page.to_image(resolution=300)
|
84 |
for img in page.images:
|
|
|
85 |
bbox = (img["x0"], img["top"], img["x1"], img["bottom"])
|
|
|
86 |
cropped = img_obj.original.crop(bbox)
|
|
|
87 |
ocr_text = ocr_image(cropped)
|
88 |
if ocr_text:
|
|
|
89 |
content += f"[OCR Text from image on page {i}]:\n{ocr_text}\n\n"
|
90 |
-
# Extract tables
|
91 |
tables = page.extract_tables()
|
92 |
for idx, table in enumerate(tables, 1):
|
93 |
if table:
|
|
|
94 |
df = pd.DataFrame(table[1:], columns=table[0])
|
|
|
95 |
content += f"Table {idx} on page {i}:\n{format_table(df)}\n\n"
|
96 |
except Exception as e:
|
|
|
97 |
content += f"\n[Error reading PDF {fp}: {e}]"
|
|
|
98 |
return content.strip()
|
99 |
|
100 |
def extract_docx_content(fp):
|
101 |
"""
|
102 |
-
Extract text from Microsoft Word
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
"""
|
105 |
content = ""
|
106 |
try:
|
|
|
107 |
doc = docx.Document(fp)
|
|
|
108 |
paragraphs = [para.text.strip() for para in doc.paragraphs if para.text.strip()]
|
109 |
if paragraphs:
|
110 |
content += "Paragraphs:\n" + "\n".join(paragraphs) + "\n\n"
|
111 |
-
# Extract tables
|
112 |
tables = []
|
113 |
for table in doc.tables:
|
114 |
rows = []
|
115 |
for row in table.rows:
|
|
|
116 |
cells = [cell.text.strip() for cell in row.cells]
|
117 |
rows.append(cells)
|
118 |
if rows:
|
|
|
119 |
df = pd.DataFrame(rows[1:], columns=rows[0])
|
120 |
tables.append(df)
|
|
|
121 |
for i, df in enumerate(tables, 1):
|
122 |
content += f"Table {i}:\n{format_table(df)}\n\n"
|
123 |
-
#
|
124 |
with zipfile.ZipFile(fp) as z:
|
125 |
for file in z.namelist():
|
|
|
126 |
if file.startswith("word/media/"):
|
127 |
data = z.read(file)
|
128 |
try:
|
|
|
129 |
img = Image.open(io.BytesIO(data))
|
|
|
130 |
ocr_text = ocr_image(img)
|
131 |
if ocr_text:
|
|
|
132 |
content += f"[OCR Text from embedded image]:\n{ocr_text}\n\n"
|
133 |
except Exception:
|
|
|
134 |
pass
|
135 |
except Exception as e:
|
|
|
136 |
content += f"\n[Error reading Microsoft Word {fp}: {e}]"
|
|
|
137 |
return content.strip()
|
138 |
|
139 |
def extract_excel_content(fp):
|
140 |
"""
|
141 |
-
Extract content from Microsoft Excel files.
|
142 |
-
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
"""
|
145 |
content = ""
|
146 |
try:
|
|
|
147 |
with warnings.catch_warnings():
|
148 |
-
warnings.simplefilter("ignore")
|
149 |
-
#
|
150 |
sheets = pd.read_excel(fp, sheet_name=None, engine='openpyxl')
|
|
|
151 |
for sheet_name, df in sheets.items():
|
152 |
content += f"Sheet: {sheet_name}\n"
|
153 |
content += format_table(df) + "\n\n"
|
154 |
except Exception as e:
|
|
|
155 |
content += f"\n[Error reading Microsoft Excel {fp}: {e}]"
|
|
|
156 |
return content.strip()
|
157 |
|
158 |
def extract_pptx_content(fp):
|
159 |
"""
|
160 |
-
Extract text
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
"""
|
164 |
content = ""
|
165 |
try:
|
|
|
166 |
prs = Presentation(fp)
|
|
|
167 |
for i, slide in enumerate(prs.slides, 1):
|
168 |
slide_texts = []
|
|
|
169 |
for shape in slide.shapes:
|
|
|
170 |
if hasattr(shape, "text") and shape.text.strip():
|
171 |
slide_texts.append(shape.text.strip())
|
|
|
172 |
if shape.shape_type == 13 and hasattr(shape, "image") and shape.image:
|
173 |
try:
|
|
|
174 |
img = Image.open(io.BytesIO(shape.image.blob))
|
|
|
175 |
ocr_text = ocr_image(img)
|
176 |
if ocr_text:
|
|
|
177 |
slide_texts.append(f"[OCR Text from image]:\n{ocr_text}")
|
178 |
except Exception:
|
|
|
179 |
pass
|
|
|
180 |
if slide_texts:
|
181 |
content += f"Slide {i} Text:\n" + "\n".join(slide_texts) + "\n\n"
|
182 |
else:
|
183 |
content += f"Slide {i} Text:\nNo text found on this slide.\n\n"
|
184 |
-
# Extract tables
|
185 |
for shape in slide.shapes:
|
186 |
if shape.has_table:
|
187 |
rows = []
|
188 |
table = shape.table
|
|
|
189 |
for row in table.rows:
|
190 |
cells = [cell.text.strip() for cell in row.cells]
|
191 |
rows.append(cells)
|
192 |
if rows:
|
|
|
193 |
df = pd.DataFrame(rows[1:], columns=rows[0])
|
|
|
194 |
content += f"Table on slide {i}:\n{format_table(df)}\n\n"
|
195 |
except Exception as e:
|
|
|
196 |
content += f"\n[Error reading Microsoft PowerPoint {fp}: {e}]"
|
|
|
197 |
return content.strip()
|
198 |
|
199 |
def extract_file_content(fp):
|
200 |
"""
|
201 |
-
Determine file type
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
"""
|
|
|
204 |
ext = Path(fp).suffix.lower()
|
205 |
if ext == ".pdf":
|
|
|
206 |
return extract_pdf_content(fp)
|
207 |
elif ext in [".doc", ".docx"]:
|
|
|
208 |
return extract_docx_content(fp)
|
209 |
elif ext in [".xlsx", ".xls"]:
|
|
|
210 |
return extract_excel_content(fp)
|
211 |
elif ext in [".ppt", ".pptx"]:
|
|
|
212 |
return extract_pptx_content(fp)
|
213 |
else:
|
214 |
try:
|
|
|
215 |
text = Path(fp).read_text(encoding="utf-8")
|
|
|
216 |
return clean_text(text)
|
217 |
except Exception as e:
|
|
|
218 |
return f"\n[Error reading file {fp}: {e}]"
|
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
+
import pdfplumber # Library to extract text and tables from PDF files
|
7 |
+
import pytesseract # OCR tool to extract text from images
|
8 |
+
import docx # Library to read Microsoft Word (.docx) files
|
9 |
+
import zipfile # To handle zipped archives, used here to access embedded images in Word files
|
10 |
+
import io # Provides tools for handling byte streams, used to open images from bytes
|
11 |
+
import pandas as pd # Data analysis library, used here to handle tables from Excel and other files
|
12 |
+
import warnings # Used to suppress warnings during Excel file reading
|
13 |
+
import re # Regular expressions for text cleaning
|
14 |
|
15 |
+
from openpyxl import load_workbook # Excel file reading library, used for .xlsx files
|
16 |
+
from pptx import Presentation # Library to read Microsoft PowerPoint files
|
17 |
+
from PIL import Image, ImageEnhance, ImageFilter # Image processing libraries for OCR preprocessing
|
18 |
+
from pathlib import Path # Object-oriented filesystem paths
|
19 |
|
20 |
def clean_text(text):
|
21 |
+
"""
|
22 |
+
Clean and normalize extracted text to improve readability and remove noise.
|
23 |
+
|
24 |
+
This function performs several cleaning steps:
|
25 |
+
- Removes characters that are not letters, digits, spaces, or common punctuation.
|
26 |
+
- Removes isolated single letters which are often OCR errors or noise.
|
27 |
+
- Strips whitespace from each line and removes empty lines.
|
28 |
+
- Joins cleaned lines back into a single string separated by newlines.
|
29 |
+
|
30 |
+
Args:
|
31 |
+
text (str): Raw extracted text from any source.
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
str: Cleaned and normalized text ready for display or further processing.
|
35 |
+
"""
|
36 |
+
# Remove all characters except letters, digits, spaces, and common punctuation marks
|
37 |
text = re.sub(r'[^a-zA-Z0-9\s.,?!():;\'"-]', '', text)
|
38 |
+
# Remove single isolated letters which are likely errors or noise from OCR
|
39 |
text = re.sub(r'\b[a-zA-Z]\b', '', text)
|
40 |
+
# Split text into lines, strip whitespace, and remove empty lines
|
41 |
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
42 |
+
# Join cleaned lines with newline characters
|
43 |
return "\n".join(lines)
|
44 |
|
45 |
def format_table(df, max_rows=10):
|
46 |
+
"""
|
47 |
+
Convert a pandas DataFrame into a clean, readable string representation of a table.
|
48 |
+
|
49 |
+
This function:
|
50 |
+
- Removes rows and columns that are completely empty to reduce clutter.
|
51 |
+
- Replaces any NaN values with empty strings for cleaner output.
|
52 |
+
- Limits the output to a maximum number of rows for brevity.
|
53 |
+
- Adds a note if there are more rows than displayed.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
df (pandas.DataFrame): The table data to format.
|
57 |
+
max_rows (int): Maximum number of rows to display from the table.
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
str: Formatted string representation of the table or empty string if no data.
|
61 |
+
"""
|
62 |
if df.empty:
|
63 |
return ""
|
64 |
+
# Remove rows and columns where all values are NaN to clean the table
|
65 |
df_clean = df.dropna(axis=0, how='all').dropna(axis=1, how='all')
|
66 |
+
# Replace remaining NaN values with empty strings for better readability
|
67 |
df_clean = df_clean.fillna('')
|
68 |
if df_clean.empty:
|
69 |
return ""
|
70 |
+
# Select only the first max_rows rows for display
|
71 |
display_df = df_clean.head(max_rows)
|
72 |
+
# Convert DataFrame to string without row indices
|
73 |
table_str = display_df.to_string(index=False)
|
74 |
+
# Append a message if there are more rows than displayed
|
75 |
if len(df_clean) > max_rows:
|
76 |
table_str += f"\n... ({len(df_clean) - max_rows} more rows)"
|
77 |
return table_str
|
78 |
|
79 |
def preprocess_image(img):
|
80 |
+
"""
|
81 |
+
Enhance an image to improve OCR accuracy by applying several preprocessing steps.
|
82 |
+
|
83 |
+
The preprocessing includes:
|
84 |
+
- Converting the image to grayscale to simplify colors.
|
85 |
+
- Increasing contrast to make text stand out more.
|
86 |
+
- Applying a median filter to reduce noise.
|
87 |
+
- Binarizing the image by thresholding to black and white.
|
88 |
+
|
89 |
+
Args:
|
90 |
+
img (PIL.Image.Image): The original image to preprocess.
|
91 |
+
|
92 |
+
Returns:
|
93 |
+
PIL.Image.Image: The processed image ready for OCR.
|
94 |
+
If an error occurs during processing, returns the original image.
|
95 |
+
"""
|
96 |
try:
|
97 |
+
# Convert image to grayscale mode
|
98 |
+
img = img.convert("L")
|
99 |
+
# Enhance contrast by a factor of 2 to make text clearer
|
100 |
enhancer = ImageEnhance.Contrast(img)
|
101 |
+
img = enhancer.enhance(2)
|
102 |
+
# Apply median filter to reduce noise and smooth the image
|
103 |
+
img = img.filter(ImageFilter.MedianFilter())
|
104 |
+
# Convert image to black and white using a threshold of 140
|
105 |
img = img.point(lambda x: 0 if x < 140 else 255, '1')
|
106 |
return img
|
107 |
except Exception:
|
108 |
+
# In case of any error, return the original image without changes
|
109 |
return img
|
110 |
|
111 |
def ocr_image(img):
|
112 |
+
"""
|
113 |
+
Extract text from an image using OCR after preprocessing to improve results.
|
114 |
+
|
115 |
+
This function:
|
116 |
+
- Preprocesses the image to enhance text visibility.
|
117 |
+
- Uses pytesseract with page segmentation mode 6 (assumes a single uniform block of text).
|
118 |
+
- Cleans the extracted text using the clean_text function.
|
119 |
+
|
120 |
+
Args:
|
121 |
+
img (PIL.Image.Image): The image from which to extract text.
|
122 |
+
|
123 |
+
Returns:
|
124 |
+
str: The cleaned OCR-extracted text. Returns empty string if OCR fails.
|
125 |
+
"""
|
126 |
try:
|
127 |
+
# Preprocess image to improve OCR quality
|
128 |
img = preprocess_image(img)
|
129 |
+
# Perform OCR using pytesseract with English language and specified config
|
130 |
text = pytesseract.image_to_string(img, lang='eng', config='--psm 6')
|
131 |
+
# Clean the OCR output to remove noise and normalize text
|
132 |
text = clean_text(text)
|
133 |
return text
|
134 |
except Exception:
|
135 |
+
# Return empty string if OCR fails for any reason
|
136 |
return ""
|
137 |
|
138 |
def extract_pdf_content(fp):
|
139 |
"""
|
140 |
+
Extract text and tables from a PDF file, including OCR on embedded images.
|
141 |
+
|
142 |
+
This function:
|
143 |
+
- Opens the PDF file and iterates through each page.
|
144 |
+
- Extracts and cleans text from each page.
|
145 |
+
- Performs OCR on images embedded in pages to extract any text within images.
|
146 |
+
- Extracts tables from pages and formats them as readable text.
|
147 |
+
- Handles exceptions by appending error messages to the content.
|
148 |
+
|
149 |
+
Args:
|
150 |
+
fp (str or Path): File path to the PDF document.
|
151 |
+
|
152 |
+
Returns:
|
153 |
+
str: Combined extracted text, OCR results, and formatted tables from the PDF.
|
154 |
"""
|
155 |
content = ""
|
156 |
try:
|
157 |
with pdfplumber.open(fp) as pdf:
|
158 |
for i, page in enumerate(pdf.pages, 1):
|
159 |
+
# Extract text from the current page, defaulting to empty string if None
|
160 |
text = page.extract_text() or ""
|
161 |
+
# Clean extracted text and add page header
|
162 |
content += f"Page {i} Text:\n{clean_text(text)}\n\n"
|
163 |
+
# If there are images on the page, perform OCR on each
|
164 |
if page.images:
|
165 |
+
# Create an image object of the page with 300 dpi resolution for cropping
|
166 |
img_obj = page.to_image(resolution=300)
|
167 |
for img in page.images:
|
168 |
+
# Define bounding box coordinates for the image on the page
|
169 |
bbox = (img["x0"], img["top"], img["x1"], img["bottom"])
|
170 |
+
# Crop the image from the page image
|
171 |
cropped = img_obj.original.crop(bbox)
|
172 |
+
# Perform OCR on the cropped image
|
173 |
ocr_text = ocr_image(cropped)
|
174 |
if ocr_text:
|
175 |
+
# Append OCR text with page and image reference
|
176 |
content += f"[OCR Text from image on page {i}]:\n{ocr_text}\n\n"
|
177 |
+
# Extract tables from the page
|
178 |
tables = page.extract_tables()
|
179 |
for idx, table in enumerate(tables, 1):
|
180 |
if table:
|
181 |
+
# Convert table list to DataFrame using first row as header
|
182 |
df = pd.DataFrame(table[1:], columns=table[0])
|
183 |
+
# Format and append the table text
|
184 |
content += f"Table {idx} on page {i}:\n{format_table(df)}\n\n"
|
185 |
except Exception as e:
|
186 |
+
# Append error message if PDF reading fails
|
187 |
content += f"\n[Error reading PDF {fp}: {e}]"
|
188 |
+
# Return the combined content with whitespace trimmed
|
189 |
return content.strip()
|
190 |
|
191 |
def extract_docx_content(fp):
|
192 |
"""
|
193 |
+
Extract text, tables, and OCR text from images embedded in a Microsoft Word (.docx) file.
|
194 |
+
|
195 |
+
This function:
|
196 |
+
- Reads paragraphs and tables from the document.
|
197 |
+
- Cleans and formats extracted text and tables.
|
198 |
+
- Opens the .docx file as a zip archive to extract embedded images.
|
199 |
+
- Performs OCR on embedded images to extract any text they contain.
|
200 |
+
- Handles exceptions and appends error messages if reading fails.
|
201 |
+
|
202 |
+
Args:
|
203 |
+
fp (str or Path): File path to the Word document.
|
204 |
+
|
205 |
+
Returns:
|
206 |
+
str: Combined extracted paragraphs, tables, and OCR text from embedded images.
|
207 |
"""
|
208 |
content = ""
|
209 |
try:
|
210 |
+
# Load the Word document
|
211 |
doc = docx.Document(fp)
|
212 |
+
# Extract and clean all non-empty paragraphs
|
213 |
paragraphs = [para.text.strip() for para in doc.paragraphs if para.text.strip()]
|
214 |
if paragraphs:
|
215 |
content += "Paragraphs:\n" + "\n".join(paragraphs) + "\n\n"
|
216 |
+
# Extract tables from the document
|
217 |
tables = []
|
218 |
for table in doc.tables:
|
219 |
rows = []
|
220 |
for row in table.rows:
|
221 |
+
# Extract and clean text from each cell in the row
|
222 |
cells = [cell.text.strip() for cell in row.cells]
|
223 |
rows.append(cells)
|
224 |
if rows:
|
225 |
+
# Convert rows to DataFrame using first row as header
|
226 |
df = pd.DataFrame(rows[1:], columns=rows[0])
|
227 |
tables.append(df)
|
228 |
+
# Format and append each extracted table
|
229 |
for i, df in enumerate(tables, 1):
|
230 |
content += f"Table {i}:\n{format_table(df)}\n\n"
|
231 |
+
# Open the .docx file as a zip archive to access embedded media files
|
232 |
with zipfile.ZipFile(fp) as z:
|
233 |
for file in z.namelist():
|
234 |
+
# Look for images inside the word/media directory
|
235 |
if file.startswith("word/media/"):
|
236 |
data = z.read(file)
|
237 |
try:
|
238 |
+
# Open image from bytes
|
239 |
img = Image.open(io.BytesIO(data))
|
240 |
+
# Perform OCR on the image
|
241 |
ocr_text = ocr_image(img)
|
242 |
if ocr_text:
|
243 |
+
# Append OCR text extracted from embedded image
|
244 |
content += f"[OCR Text from embedded image]:\n{ocr_text}\n\n"
|
245 |
except Exception:
|
246 |
+
# Ignore errors in image processing to continue extraction
|
247 |
pass
|
248 |
except Exception as e:
|
249 |
+
# Append error message if Word document reading fails
|
250 |
content += f"\n[Error reading Microsoft Word {fp}: {e}]"
|
251 |
+
# Return combined content trimmed of extra whitespace
|
252 |
return content.strip()
|
253 |
|
254 |
def extract_excel_content(fp):
|
255 |
"""
|
256 |
+
Extract readable table content from Microsoft Excel files (.xlsx, .xls).
|
257 |
+
|
258 |
+
This function:
|
259 |
+
- Reads all sheets in the Excel file.
|
260 |
+
- Converts each sheet to a formatted table string.
|
261 |
+
- Suppresses warnings during reading to avoid clutter.
|
262 |
+
- Does not attempt to extract images to avoid errors.
|
263 |
+
- Handles exceptions by appending error messages.
|
264 |
+
|
265 |
+
Args:
|
266 |
+
fp (str or Path): File path to the Excel workbook.
|
267 |
+
|
268 |
+
Returns:
|
269 |
+
str: Combined formatted tables from all sheets in the workbook.
|
270 |
"""
|
271 |
content = ""
|
272 |
try:
|
273 |
+
# Suppress warnings such as openpyxl deprecation or data type warnings
|
274 |
with warnings.catch_warnings():
|
275 |
+
warnings.simplefilter("ignore")
|
276 |
+
# Read all sheets into a dictionary of DataFrames using openpyxl engine
|
277 |
sheets = pd.read_excel(fp, sheet_name=None, engine='openpyxl')
|
278 |
+
# Iterate over each sheet and format its content
|
279 |
for sheet_name, df in sheets.items():
|
280 |
content += f"Sheet: {sheet_name}\n"
|
281 |
content += format_table(df) + "\n\n"
|
282 |
except Exception as e:
|
283 |
+
# Append error message if Excel reading fails
|
284 |
content += f"\n[Error reading Microsoft Excel {fp}: {e}]"
|
285 |
+
# Return combined sheet contents trimmed of whitespace
|
286 |
return content.strip()
|
287 |
|
288 |
def extract_pptx_content(fp):
|
289 |
"""
|
290 |
+
Extract text, tables, and OCR text from images in Microsoft PowerPoint (.pptx) files.
|
291 |
+
|
292 |
+
This function:
|
293 |
+
- Reads each slide in the presentation.
|
294 |
+
- Extracts text from shapes and tables on each slide.
|
295 |
+
- Performs OCR on images embedded in shapes.
|
296 |
+
- Handles exceptions and appends error messages if reading fails.
|
297 |
+
|
298 |
+
Args:
|
299 |
+
fp (str or Path): File path to the PowerPoint presentation.
|
300 |
+
|
301 |
+
Returns:
|
302 |
+
str: Combined extracted text, tables, and OCR results from all slides.
|
303 |
"""
|
304 |
content = ""
|
305 |
try:
|
306 |
+
# Load the PowerPoint presentation
|
307 |
prs = Presentation(fp)
|
308 |
+
# Iterate through each slide by index starting at 1
|
309 |
for i, slide in enumerate(prs.slides, 1):
|
310 |
slide_texts = []
|
311 |
+
# Iterate through all shapes on the slide
|
312 |
for shape in slide.shapes:
|
313 |
+
# Extract and clean text from shapes that have text attribute
|
314 |
if hasattr(shape, "text") and shape.text.strip():
|
315 |
slide_texts.append(shape.text.strip())
|
316 |
+
# Check if the shape is a picture (shape_type 13) with an image
|
317 |
if shape.shape_type == 13 and hasattr(shape, "image") and shape.image:
|
318 |
try:
|
319 |
+
# Open image from the shape's binary blob data
|
320 |
img = Image.open(io.BytesIO(shape.image.blob))
|
321 |
+
# Perform OCR on the image
|
322 |
ocr_text = ocr_image(img)
|
323 |
if ocr_text:
|
324 |
+
# Append OCR text extracted from the image
|
325 |
slide_texts.append(f"[OCR Text from image]:\n{ocr_text}")
|
326 |
except Exception:
|
327 |
+
# Ignore errors in image OCR to continue processing
|
328 |
pass
|
329 |
+
# Add slide text or note if no text found
|
330 |
if slide_texts:
|
331 |
content += f"Slide {i} Text:\n" + "\n".join(slide_texts) + "\n\n"
|
332 |
else:
|
333 |
content += f"Slide {i} Text:\nNo text found on this slide.\n\n"
|
334 |
+
# Extract tables from shapes that have tables
|
335 |
for shape in slide.shapes:
|
336 |
if shape.has_table:
|
337 |
rows = []
|
338 |
table = shape.table
|
339 |
+
# Extract text from each cell in the table rows
|
340 |
for row in table.rows:
|
341 |
cells = [cell.text.strip() for cell in row.cells]
|
342 |
rows.append(cells)
|
343 |
if rows:
|
344 |
+
# Convert rows to DataFrame using first row as header
|
345 |
df = pd.DataFrame(rows[1:], columns=rows[0])
|
346 |
+
# Format and append the table text
|
347 |
content += f"Table on slide {i}:\n{format_table(df)}\n\n"
|
348 |
except Exception as e:
|
349 |
+
# Append error message if PowerPoint reading fails
|
350 |
content += f"\n[Error reading Microsoft PowerPoint {fp}: {e}]"
|
351 |
+
# Return combined slide content trimmed of whitespace
|
352 |
return content.strip()
|
353 |
|
354 |
def extract_file_content(fp):
|
355 |
"""
|
356 |
+
Determine the file type based on its extension and extract text content accordingly.
|
357 |
+
|
358 |
+
This function supports:
|
359 |
+
- PDF files with text, tables, and OCR on images.
|
360 |
+
- Microsoft Word documents with paragraphs, tables, and OCR on embedded images.
|
361 |
+
- Microsoft Excel workbooks with formatted sheet tables.
|
362 |
+
- Microsoft PowerPoint presentations with slide text, tables, and OCR on images.
|
363 |
+
- Other file types are attempted to be read as plain UTF-8 text.
|
364 |
+
|
365 |
+
Args:
|
366 |
+
fp (str or Path): File path to the document to extract content from.
|
367 |
+
|
368 |
+
Returns:
|
369 |
+
str: Extracted and cleaned text content from the file, or an error message.
|
370 |
"""
|
371 |
+
# Get the file extension in lowercase to identify file type
|
372 |
ext = Path(fp).suffix.lower()
|
373 |
if ext == ".pdf":
|
374 |
+
# Extract content from PDF files
|
375 |
return extract_pdf_content(fp)
|
376 |
elif ext in [".doc", ".docx"]:
|
377 |
+
# Extract content from Word documents
|
378 |
return extract_docx_content(fp)
|
379 |
elif ext in [".xlsx", ".xls"]:
|
380 |
+
# Extract content from Excel workbooks
|
381 |
return extract_excel_content(fp)
|
382 |
elif ext in [".ppt", ".pptx"]:
|
383 |
+
# Extract content from PowerPoint presentations
|
384 |
return extract_pptx_content(fp)
|
385 |
else:
|
386 |
try:
|
387 |
+
# Attempt to read unknown file types as plain UTF-8 text
|
388 |
text = Path(fp).read_text(encoding="utf-8")
|
389 |
+
# Clean the extracted text before returning
|
390 |
return clean_text(text)
|
391 |
except Exception as e:
|
392 |
+
# Return error message if reading fails
|
393 |
return f"\n[Error reading file {fp}: {e}]"
|
src/main/gradio.py
CHANGED
@@ -3,177 +3,296 @@
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
-
import gradio as gr
|
7 |
-
import asyncio
|
8 |
|
9 |
-
from pathlib import Path
|
10 |
-
from
|
11 |
-
from src.cores.session import create_session, ensure_stop_event, get_model_key
|
12 |
-
from src.main.file_extractors import extract_file_content
|
13 |
-
from src.cores.client import chat_with_model_async
|
14 |
|
15 |
async def respond_async(multi, history, model_display, sess, custom_prompt, deep_search):
|
16 |
"""
|
17 |
-
|
18 |
-
Supports text
|
19 |
-
Extracts
|
20 |
-
Streams AI responses back to UI, updating chat history live.
|
21 |
-
Allows stopping response generation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
"""
|
23 |
-
ensure_stop_event(sess)
|
24 |
-
sess.stop_event.clear()
|
25 |
-
sess.cancel_token["cancelled"] = False
|
26 |
-
|
|
|
27 |
msg_input = {"text": multi.get("text", "").strip(), "files": multi.get("files", [])}
|
28 |
-
|
|
|
29 |
if not msg_input["text"] and not msg_input["files"]:
|
30 |
yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
|
31 |
return
|
32 |
-
|
|
|
33 |
inp = ""
|
34 |
for f in msg_input["files"]:
|
35 |
-
# Support dict or direct file path
|
36 |
fp = f.get("data", f.get("name", "")) if isinstance(f, dict) else f
|
37 |
-
|
38 |
-
|
|
|
|
|
39 |
if msg_input["text"]:
|
40 |
inp += msg_input["text"]
|
41 |
-
|
42 |
-
history
|
|
|
|
|
|
|
43 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
44 |
-
|
45 |
-
|
|
|
46 |
async def background():
|
47 |
-
|
48 |
-
responses
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
|
|
|
52 |
if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
|
53 |
break
|
|
|
54 |
if typ == "reasoning":
|
|
|
55 |
if ignore_reasoning:
|
56 |
continue
|
57 |
reasoning += chunk
|
58 |
-
|
|
|
|
|
59 |
elif typ == "content":
|
60 |
if not content_started:
|
|
|
61 |
content_started = True
|
62 |
ignore_reasoning = True
|
63 |
responses = chunk
|
64 |
-
await queue.put(("reasoning", "")) # Clear reasoning
|
65 |
-
await queue.put(("replace", responses))
|
66 |
else:
|
|
|
67 |
responses += chunk
|
68 |
await queue.put(("append", responses))
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
74 |
try:
|
75 |
while True:
|
76 |
-
queue_task = asyncio.create_task(queue.get())
|
77 |
pending_tasks.add(queue_task)
|
|
|
|
|
78 |
done, _ = await asyncio.wait({stop_task, queue_task}, return_when=asyncio.FIRST_COMPLETED)
|
|
|
79 |
for task in done:
|
80 |
pending_tasks.discard(task)
|
|
|
81 |
if task is stop_task:
|
82 |
-
# User requested stop, cancel background task and update UI
|
83 |
sess.cancel_token["cancelled"] = True
|
84 |
bg_task.cancel()
|
85 |
try:
|
86 |
await bg_task
|
87 |
except asyncio.CancelledError:
|
88 |
pass
|
|
|
89 |
history[-1][1] = RESPONSES["RESPONSE_1"]
|
90 |
yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
|
91 |
return
|
|
|
92 |
result = task.result()
|
93 |
if result is None:
|
|
|
94 |
raise StopAsyncIteration
|
|
|
95 |
action, text = result
|
96 |
-
# Update last message content in history with streamed text
|
97 |
history[-1][1] = text
|
|
|
98 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
|
|
99 |
except StopAsyncIteration:
|
|
|
100 |
pass
|
|
|
101 |
finally:
|
|
|
102 |
for task in pending_tasks:
|
103 |
task.cancel()
|
104 |
await asyncio.gather(*pending_tasks, return_exceptions=True)
|
|
|
|
|
105 |
yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
|
106 |
|
107 |
def toggle_deep_search(deep_search_value, history, sess, prompt, model):
|
108 |
"""
|
109 |
-
Toggle deep search checkbox
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
"""
|
111 |
return history, sess, prompt, model, gr.update(value=deep_search_value)
|
112 |
|
113 |
def change_model(new):
|
114 |
"""
|
115 |
-
Handler to change selected AI model.
|
116 |
-
Resets chat history and session.
|
117 |
-
Updates system instructions and deep search checkbox visibility
|
118 |
-
Deep search is only
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
"""
|
120 |
-
visible = new == MODEL_CHOICES[0]
|
|
|
|
|
121 |
default_prompt = SYSTEM_PROMPT_MAPPING.get(get_model_key(new, MODEL_MAPPING, DEFAULT_MODEL_KEY), SYSTEM_PROMPT_DEFAULT)
|
122 |
-
|
|
|
123 |
return [], create_session(), new, default_prompt, False, gr.update(visible=visible)
|
124 |
|
125 |
def stop_response(history, sess):
|
126 |
"""
|
127 |
Handler to stop ongoing AI response generation.
|
128 |
-
Sets cancellation flags and updates last message to cancellation notice.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
"""
|
130 |
-
ensure_stop_event(sess)
|
131 |
-
sess.stop_event.set()
|
132 |
-
sess.cancel_token["cancelled"] = True
|
|
|
133 |
if history:
|
|
|
134 |
history[-1][1] = RESPONSES["RESPONSE_1"]
|
|
|
135 |
return history, None, create_session()
|
136 |
|
137 |
def launch_ui():
|
|
|
|
|
|
|
|
|
|
|
138 |
# ============================
|
139 |
# System Setup
|
140 |
# ============================
|
141 |
-
|
142 |
-
# Install Tesseract OCR and dependencies for text
|
143 |
import os
|
144 |
os.system("apt-get update -q -y && \
|
145 |
apt-get install -q -y tesseract-ocr \
|
146 |
tesseract-ocr-eng tesseract-ocr-ind \
|
147 |
libleptonica-dev libtesseract-dev"
|
148 |
)
|
149 |
-
|
|
|
150 |
with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
|
|
|
151 |
user_history = gr.State([])
|
152 |
user_session = gr.State(create_session())
|
153 |
selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
|
154 |
J_A_R_V_I_S = gr.State("")
|
|
|
155 |
# Chatbot UI
|
156 |
-
with gr.Column():
|
157 |
-
|
158 |
-
|
159 |
-
#
|
|
|
|
|
|
|
160 |
with gr.Sidebar(open=False):
|
161 |
deep_search = gr.Checkbox(label=AI_TYPES["AI_TYPE_8"], value=False, info=AI_TYPES["AI_TYPE_9"], visible=True)
|
|
|
162 |
deep_search.change(fn=toggle_deep_search, inputs=[deep_search, user_history, user_session, J_A_R_V_I_S, selected_model], outputs=[chatbot, user_session, J_A_R_V_I_S, selected_model, deep_search])
|
163 |
-
gr.Markdown()
|
164 |
model_radio = gr.Radio(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
|
165 |
-
|
166 |
-
#
|
|
|
|
|
|
|
|
|
167 |
model_radio.change(fn=change_model, inputs=[model_radio], outputs=[user_history, user_session, selected_model, J_A_R_V_I_S, deep_search, deep_search])
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
chatbot.clear(fn=clear_chat, inputs=[user_history, user_session, J_A_R_V_I_S, selected_model], outputs=[chatbot, user_session, J_A_R_V_I_S, selected_model, user_history])
|
174 |
-
|
|
|
175 |
msg.submit(fn=respond_async, inputs=[msg, user_history, selected_model, user_session, J_A_R_V_I_S, deep_search], outputs=[chatbot, msg, user_session], api_name=INTERNAL_AI_GET_SERVER)
|
176 |
-
|
|
|
177 |
msg.stop(fn=stop_response, inputs=[user_history, user_session], outputs=[chatbot, msg, user_session])
|
|
|
178 |
# Launch
|
179 |
jarvis.queue(default_concurrency_limit=2).launch(max_file_size="1mb", mcp_server=True)
|
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
+
import gradio as gr # Import Gradio library for building the web UI
|
7 |
+
import asyncio # Import asyncio for asynchronous programming
|
8 |
|
9 |
+
from pathlib import Path # Import Path for filesystem path manipulations
|
10 |
+
from config import * # Import all configuration constants and variables
|
11 |
+
from src.cores.session import create_session, ensure_stop_event, get_model_key # Import session management utilities
|
12 |
+
from src.main.file_extractors import extract_file_content # Import function to extract content from uploaded files
|
13 |
+
from src.cores.client import chat_with_model_async # Import async chat function with AI model
|
14 |
|
15 |
async def respond_async(multi, history, model_display, sess, custom_prompt, deep_search):
|
16 |
"""
|
17 |
+
Asynchronous handler for processing user input submissions.
|
18 |
+
Supports multi-modal input including text and file uploads.
|
19 |
+
Extracts content from uploaded files and appends it to user text input.
|
20 |
+
Streams AI-generated responses back to the UI, updating chat history live.
|
21 |
+
Allows graceful stopping of response generation upon user request.
|
22 |
+
|
23 |
+
Parameters:
|
24 |
+
- multi: dict containing user text input and uploaded files
|
25 |
+
- history: list of previous chat messages (user and AI)
|
26 |
+
- model_display: selected AI model identifier
|
27 |
+
- sess: current session object managing state and cancellation
|
28 |
+
- custom_prompt: user-defined system instructions
|
29 |
+
- deep_search: boolean flag to enable extended search capabilities
|
30 |
+
|
31 |
+
Yields:
|
32 |
+
- Updated chat history and UI state for real-time interaction
|
33 |
"""
|
34 |
+
ensure_stop_event(sess) # Ensure the session has a stop event initialized
|
35 |
+
sess.stop_event.clear() # Clear any previous stop signals
|
36 |
+
sess.cancel_token["cancelled"] = False # Reset cancellation flag
|
37 |
+
|
38 |
+
# Extract text and files from multimodal input dictionary
|
39 |
msg_input = {"text": multi.get("text", "").strip(), "files": multi.get("files", [])}
|
40 |
+
|
41 |
+
# If no input text or files, reset UI input and return early
|
42 |
if not msg_input["text"] and not msg_input["files"]:
|
43 |
yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
|
44 |
return
|
45 |
+
|
46 |
+
# Initialize combined input string with extracted file contents
|
47 |
inp = ""
|
48 |
for f in msg_input["files"]:
|
49 |
+
# Support both dict format or direct file path string
|
50 |
fp = f.get("data", f.get("name", "")) if isinstance(f, dict) else f
|
51 |
+
# Append extracted file content with spacing
|
52 |
+
inp += f"```\n{extract_file_content(fp)}\n``` \n\n\n"
|
53 |
+
|
54 |
+
# Append user text input if present
|
55 |
if msg_input["text"]:
|
56 |
inp += msg_input["text"]
|
57 |
+
|
58 |
+
# Append user input to chat history with placeholder AI response
|
59 |
+
history.append([inp, RESPONSES["RESPONSE_8"]]) # RESPONSE_8 is a placeholder text
|
60 |
+
|
61 |
+
# Yield updated history and disable input while AI is responding
|
62 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
63 |
+
|
64 |
+
queue = asyncio.Queue() # Queue to hold streamed AI response chunks
|
65 |
+
|
66 |
async def background():
|
67 |
+
"""
|
68 |
+
Background async task to fetch streamed AI responses from the model.
|
69 |
+
Handles reasoning and content chunks separately.
|
70 |
+
Supports cancellation via session stop event.
|
71 |
+
"""
|
72 |
+
reasoning = "" # Accumulate reasoning text
|
73 |
+
responses = "" # Accumulate content text
|
74 |
+
content_started = False # Flag to indicate content streaming started
|
75 |
+
ignore_reasoning = False # Flag to ignore reasoning after content starts
|
76 |
+
|
77 |
+
# Async iterate over streaming response chunks from AI model
|
78 |
async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
|
79 |
+
# Break if user requested stop or cancellation flagged
|
80 |
if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
|
81 |
break
|
82 |
+
|
83 |
if typ == "reasoning":
|
84 |
+
# Append reasoning chunk unless ignoring reasoning after content start
|
85 |
if ignore_reasoning:
|
86 |
continue
|
87 |
reasoning += chunk
|
88 |
+
# Put formatted reasoning text into queue for UI update
|
89 |
+
await queue.put(("reasoning", reasoning))
|
90 |
+
|
91 |
elif typ == "content":
|
92 |
if not content_started:
|
93 |
+
# On first content chunk, clear reasoning and start content accumulation
|
94 |
content_started = True
|
95 |
ignore_reasoning = True
|
96 |
responses = chunk
|
97 |
+
await queue.put(("reasoning", "")) # Clear reasoning display
|
98 |
+
await queue.put(("replace", responses)) # Replace placeholder with content start
|
99 |
else:
|
100 |
+
# Append subsequent content chunks and update UI
|
101 |
responses += chunk
|
102 |
await queue.put(("append", responses))
|
103 |
+
|
104 |
+
await queue.put(None) # Signal completion of streaming
|
105 |
+
return responses # Return final complete response text
|
106 |
+
|
107 |
+
bg_task = asyncio.create_task(background()) # Start background streaming task
|
108 |
+
stop_task = asyncio.create_task(sess.stop_event.wait()) # Task to wait for stop event
|
109 |
+
pending_tasks = {bg_task, stop_task} # Track pending async tasks
|
110 |
+
|
111 |
try:
|
112 |
while True:
|
113 |
+
queue_task = asyncio.create_task(queue.get()) # Task to get next queued update
|
114 |
pending_tasks.add(queue_task)
|
115 |
+
|
116 |
+
# Wait for either stop event or new queue item
|
117 |
done, _ = await asyncio.wait({stop_task, queue_task}, return_when=asyncio.FIRST_COMPLETED)
|
118 |
+
|
119 |
for task in done:
|
120 |
pending_tasks.discard(task)
|
121 |
+
|
122 |
if task is stop_task:
|
123 |
+
# User requested stop, cancel background task and update UI accordingly
|
124 |
sess.cancel_token["cancelled"] = True
|
125 |
bg_task.cancel()
|
126 |
try:
|
127 |
await bg_task
|
128 |
except asyncio.CancelledError:
|
129 |
pass
|
130 |
+
# Update last message with cancellation notice
|
131 |
history[-1][1] = RESPONSES["RESPONSE_1"]
|
132 |
yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
|
133 |
return
|
134 |
+
|
135 |
result = task.result()
|
136 |
if result is None:
|
137 |
+
# Streaming finished, stop iteration
|
138 |
raise StopAsyncIteration
|
139 |
+
|
140 |
action, text = result
|
141 |
+
# Update last message content in history with streamed text chunk
|
142 |
history[-1][1] = text
|
143 |
+
# Yield updated history and UI state to refresh chat display
|
144 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
145 |
+
|
146 |
except StopAsyncIteration:
|
147 |
+
# Normal completion of streaming
|
148 |
pass
|
149 |
+
|
150 |
finally:
|
151 |
+
# Cancel any remaining pending tasks to clean up
|
152 |
for task in pending_tasks:
|
153 |
task.cancel()
|
154 |
await asyncio.gather(*pending_tasks, return_exceptions=True)
|
155 |
+
|
156 |
+
# After completion, reset UI input to ready state
|
157 |
yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
|
158 |
|
159 |
def toggle_deep_search(deep_search_value, history, sess, prompt, model):
|
160 |
"""
|
161 |
+
Toggle the deep search checkbox state.
|
162 |
+
Maintains current chat history and session for production use.
|
163 |
+
|
164 |
+
Parameters:
|
165 |
+
- deep_search_value: new checkbox boolean value
|
166 |
+
- history: current chat history
|
167 |
+
- sess: current session object
|
168 |
+
- prompt: current system instructions
|
169 |
+
- model: currently selected model
|
170 |
+
|
171 |
+
Returns:
|
172 |
+
- Unchanged history, session, prompt, model
|
173 |
+
- Updated deep search checkbox UI state
|
174 |
"""
|
175 |
return history, sess, prompt, model, gr.update(value=deep_search_value)
|
176 |
|
177 |
def change_model(new):
|
178 |
"""
|
179 |
+
Handler to change the selected AI model.
|
180 |
+
Resets chat history and creates a new session.
|
181 |
+
Updates system instructions and deep search checkbox visibility.
|
182 |
+
Deep search is only enabled for the default model.
|
183 |
+
|
184 |
+
Parameters:
|
185 |
+
- new: newly selected model identifier
|
186 |
+
|
187 |
+
Returns:
|
188 |
+
- Empty chat history list
|
189 |
+
- New session object
|
190 |
+
- New model identifier
|
191 |
+
- Corresponding system instructions string
|
192 |
+
- Deep search checkbox reset to False
|
193 |
+
- UI update for deep search checkbox visibility
|
194 |
"""
|
195 |
+
visible = new == MODEL_CHOICES[0] # Deep search visible only for default model
|
196 |
+
|
197 |
+
# Get system instructions for new model or fallback to default instructions
|
198 |
default_prompt = SYSTEM_PROMPT_MAPPING.get(get_model_key(new, MODEL_MAPPING, DEFAULT_MODEL_KEY), SYSTEM_PROMPT_DEFAULT)
|
199 |
+
|
200 |
+
# Clear chat, create new session, reset deep search, update UI visibility
|
201 |
return [], create_session(), new, default_prompt, False, gr.update(visible=visible)
|
202 |
|
203 |
def stop_response(history, sess):
|
204 |
"""
|
205 |
Handler to stop ongoing AI response generation.
|
206 |
+
Sets cancellation flags and updates the last message to a cancellation notice.
|
207 |
+
|
208 |
+
Parameters:
|
209 |
+
- history: current chat history list
|
210 |
+
- sess: current session object
|
211 |
+
|
212 |
+
Returns:
|
213 |
+
- Updated chat history with cancellation message
|
214 |
+
- None for input box reset
|
215 |
+
- New session object for fresh state
|
216 |
"""
|
217 |
+
ensure_stop_event(sess) # Ensure stop event exists in session
|
218 |
+
sess.stop_event.set() # Signal stop event to cancel ongoing tasks
|
219 |
+
sess.cancel_token["cancelled"] = True # Mark cancellation flag
|
220 |
+
|
221 |
if history:
|
222 |
+
# Replace last AI response with cancellation message
|
223 |
history[-1][1] = RESPONSES["RESPONSE_1"]
|
224 |
+
|
225 |
return history, None, create_session()
|
226 |
|
227 |
def launch_ui():
|
228 |
+
"""
|
229 |
+
Launch the Gradio UI for the chatbot application.
|
230 |
+
Sets up the UI components, event handlers, and starts the server.
|
231 |
+
Installs required OCR dependencies for file content extraction.
|
232 |
+
"""
|
233 |
# ============================
|
234 |
# System Setup
|
235 |
# ============================
|
236 |
+
|
237 |
+
# Install Tesseract OCR and dependencies for extracting text from images
|
238 |
import os
|
239 |
os.system("apt-get update -q -y && \
|
240 |
apt-get install -q -y tesseract-ocr \
|
241 |
tesseract-ocr-eng tesseract-ocr-ind \
|
242 |
libleptonica-dev libtesseract-dev"
|
243 |
)
|
244 |
+
|
245 |
+
# Create Gradio Blocks container for full UI layout
|
246 |
with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
|
247 |
+
# State variables to hold chat history, session, selected model, and instructions
|
248 |
user_history = gr.State([])
|
249 |
user_session = gr.State(create_session())
|
250 |
selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
|
251 |
J_A_R_V_I_S = gr.State("")
|
252 |
+
|
253 |
# Chatbot UI
|
254 |
+
with gr.Column():
|
255 |
+
chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"], examples=JARVIS_INIT, allow_tags=["think", "thinking"])
|
256 |
+
|
257 |
+
# User input
|
258 |
+
msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count=None, file_types=None, sources=[])
|
259 |
+
|
260 |
+
# Sidebar on left for model selection and deep search toggle
|
261 |
with gr.Sidebar(open=False):
|
262 |
deep_search = gr.Checkbox(label=AI_TYPES["AI_TYPE_8"], value=False, info=AI_TYPES["AI_TYPE_9"], visible=True)
|
263 |
+
# When deep search checkbox changes, call toggle_deep_search handler
|
264 |
deep_search.change(fn=toggle_deep_search, inputs=[deep_search, user_history, user_session, J_A_R_V_I_S, selected_model], outputs=[chatbot, user_session, J_A_R_V_I_S, selected_model, deep_search])
|
265 |
+
gr.Markdown() # Add spacing line
|
266 |
model_radio = gr.Radio(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
|
267 |
+
|
268 |
+
# Sidebar on right for notices and additional information
|
269 |
+
with gr.Sidebar(open=False, position="right"):
|
270 |
+
gr.Markdown(NOTICES)
|
271 |
+
|
272 |
+
# When model selection changes, call change_model handler
|
273 |
model_radio.change(fn=change_model, inputs=[model_radio], outputs=[user_history, user_session, selected_model, J_A_R_V_I_S, deep_search, deep_search])
|
274 |
+
|
275 |
+
# Event handler for selecting example messages in chatbot UI
|
276 |
+
def on_example_select(evt: gr.SelectData):
|
277 |
+
return evt.value
|
278 |
+
|
279 |
+
chatbot.example_select(fn=on_example_select, inputs=[], outputs=[msg]).then(
|
280 |
+
fn=respond_async,
|
281 |
+
inputs=[msg, user_history, selected_model, user_session, J_A_R_V_I_S, deep_search],
|
282 |
+
outputs=[chatbot, msg, user_session]
|
283 |
+
)
|
284 |
+
|
285 |
+
# Clear chat button handler resets chat, session, instructions, model, and history
|
286 |
+
def clear_chat(history, sess, prompt, model):
|
287 |
+
return [], create_session(), prompt, model, []
|
288 |
+
|
289 |
chatbot.clear(fn=clear_chat, inputs=[user_history, user_session, J_A_R_V_I_S, selected_model], outputs=[chatbot, user_session, J_A_R_V_I_S, selected_model, user_history])
|
290 |
+
|
291 |
+
# Submit user message triggers respond_async to generate AI response
|
292 |
msg.submit(fn=respond_async, inputs=[msg, user_history, selected_model, user_session, J_A_R_V_I_S, deep_search], outputs=[chatbot, msg, user_session], api_name=INTERNAL_AI_GET_SERVER)
|
293 |
+
|
294 |
+
# Stop button triggers stop_response handler to cancel ongoing AI generation
|
295 |
msg.stop(fn=stop_response, inputs=[user_history, user_session], outputs=[chatbot, msg, user_session])
|
296 |
+
|
297 |
# Launch
|
298 |
jarvis.queue(default_concurrency_limit=2).launch(max_file_size="1mb", mcp_server=True)
|