Abid Ali Awan commited on
Commit
6598f74
·
1 Parent(s): 8159b22

Enhance app.py to implement a Gradio interface for the RegRadar AI Regulatory Compliance Assistant, featuring a chatbot, example queries, and a tool information panel. Refactor UIHandler to streamline chat processing and improve regulatory query handling. Update web_tools.py to modularize crawling and searching functionalities, enhancing code readability and maintainability.

Browse files
Files changed (4) hide show
  1. agents/ui_handler.py +75 -174
  2. app.py +118 -1
  3. tools/memory_tools.py +5 -3
  4. tools/web_tools.py +65 -38
agents/ui_handler.py CHANGED
@@ -6,7 +6,6 @@ import gradio as gr
6
  from gradio import ChatMessage
7
 
8
  from agents.reg_radar import RegRadarAgent
9
- from config.settings import AVATAR_IMAGES
10
  from tools.llm import stream_llm
11
 
12
 
@@ -37,26 +36,37 @@ class UIHandler:
37
  is_regulatory = self.agent.is_regulatory_query(message)
38
 
39
  if not is_regulatory:
40
- # General chat
41
- history.append(
42
- ChatMessage(role="assistant", content="💬 Processing general query...")
43
- )
44
- yield history, "", gr.update(interactive=False), user_id_state
45
 
46
- # Clear processing message and stream response
47
- history.pop()
48
- streaming_content = ""
49
- history.append(ChatMessage(role="assistant", content=""))
50
 
51
- for chunk in stream_llm(message):
52
- streaming_content += chunk
53
- history[-1] = ChatMessage(role="assistant", content=streaming_content)
54
- yield history, "", gr.update(interactive=False), user_id_state
 
 
55
 
56
- # Re-enable input box at the end
57
- yield history, "", gr.update(interactive=True), user_id_state
58
- return
 
 
 
 
 
 
59
 
 
 
 
 
 
 
 
60
  # Show tool detection
61
  tool_key, tool_name = self.agent.determine_intended_tool(message)
62
 
@@ -102,51 +112,13 @@ class UIHandler:
102
 
103
  # Show collapsible raw results
104
  if crawl_results["results"]:
105
- # Format results for display, remove duplicates by URL
106
- seen_urls = set()
107
- results_display = []
108
- count = 0
109
- for result in crawl_results["results"]:
110
- url = result["url"]
111
- if url in seen_urls:
112
- continue
113
- seen_urls.add(url)
114
- title = result["title"][:100] if result["title"] else "No Title"
115
- count += 1
116
- results_display.append(f"""
117
- **{count}. {result["source"]}**
118
- - Title: {title}...
119
- - URL: {url}
120
- """)
121
- if results_display:
122
- collapsible_results = f"""
123
- <details>
124
- <summary><strong>📋 Raw Regulatory Data</strong> - Click to expand</summary>
125
-
126
- {"".join(results_display)}
127
-
128
- </details>
129
- """
130
- else:
131
- collapsible_results = "<details><summary><strong>📋 Raw Regulatory Data</strong> - Click to expand</summary>\nNo unique regulatory updates found.\n</details>"
132
  history.append(ChatMessage(role="assistant", content=collapsible_results))
133
  yield history, "", gr.update(interactive=False), user_id_state
134
 
135
  # Display memory results if available
136
  if memory_results:
137
- top_memories = memory_results[:3]
138
- memory_details = ""
139
- for i, mem in enumerate(top_memories, 1):
140
- memory_text = mem.get("memory", "N/A")
141
- memory_details += f"\n**{i}. Memory:** {memory_text[:300]}...\n"
142
- memory_msg = f"""
143
- <details>
144
- <summary><strong>💾 Related Past Queries</strong> - Click to expand</summary>
145
-
146
- Found {len(memory_results)} similar past queries in memory. Top 3 shown below:
147
- {memory_details}
148
- </details>
149
- """
150
  history.append(ChatMessage(role="assistant", content=memory_msg))
151
  yield history, "", gr.update(interactive=False), user_id_state
152
 
@@ -186,124 +158,53 @@ Found {len(memory_results)} similar past queries in memory. Top 3 shown below:
186
  daemon=True,
187
  ).start()
188
 
189
- def delayed_clear(self, user_id_state):
190
- time.sleep(0.1) # 100ms delay to allow generator cancellation
191
- return [], "", gr.update(interactive=True), user_id_state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
- def create_ui(self):
194
- """Create Gradio interface"""
195
- with gr.Blocks(
196
- title="RegRadar - AI Regulatory Compliance Assistant",
197
- theme=gr.themes.Soft(),
198
- css="""
199
- .tool-status {
200
- background-color: #f0f4f8;
201
- padding: 10px;
202
- border-radius: 5px;
203
- margin: 10px 0;
204
- }
205
- """,
206
- ) as demo:
207
- # Header
208
- gr.HTML("""
209
- <center>
210
- <h1>🛡️ RegRadar</h1>
211
- AI-powered regulatory compliance assistant that monitors global regulations
212
- </center>
213
- """)
214
-
215
- # Main chat interface
216
- chatbot = gr.Chatbot(
217
- height=400,
218
- type="messages",
219
- avatar_images=AVATAR_IMAGES,
220
- show_copy_button=True,
221
- )
222
 
223
- with gr.Row(equal_height=True):
224
- msg = gr.Textbox(
225
- placeholder="Ask about regulatory updates, compliance requirements, or any industry regulations...",
226
- show_label=False,
227
- scale=18,
228
- autofocus=True,
229
- )
230
- submit = gr.Button("Send", variant="primary", scale=1, min_width=60)
231
- stop = gr.Button("Stop", variant="stop", scale=1, min_width=60)
232
- clear = gr.Button("Clear", scale=1, min_width=60)
233
-
234
- # Add user_id_state for session
235
- user_id_state = gr.State()
236
-
237
- # Example queries
238
- example_queries = [
239
- "Show me the latest SEC regulations for fintech",
240
- "What are the new data privacy rules in the EU?",
241
- "Any updates on ESG compliance for energy companies?",
242
- "Scan for healthcare regulations in the US",
243
- "What are the global trends in AI regulation?",
244
- ]
245
-
246
- gr.Examples(examples=example_queries, inputs=msg, label="Example Queries")
247
-
248
- # Tool information panel
249
- with gr.Accordion("🛠️ Available Tools", open=False):
250
- gr.Markdown("""
251
- ### RegRadar uses these intelligent tools:
252
-
253
- **🧠 Query Type Detection**
254
- - Automatically detects if your message is a regulatory compliance query or a general question
255
- - Selects the appropriate tools and response style based on your intent
256
-
257
- **📩 Information Extraction**
258
- - Extracts key details (industry, region, keywords) from your command
259
- - Ensures accurate and relevant regulatory analysis
260
-
261
- **🔍 Regulatory Web Crawler**
262
- - Crawls official regulatory websites (SEC, FDA, FTC, etc.)
263
- - Searches for recent updates and compliance changes
264
- - Focuses on last 30 days of content
265
-
266
- **🌐 Regulatory Search Engine**
267
- - Searches across multiple sources for regulatory updates
268
- - Finds industry-specific compliance information
269
- - Aggregates results from various regulatory bodies
270
-
271
- **💾 Memory System**
272
- - Remembers past queries and responses
273
- - Learns from your compliance interests
274
- - Provides context from previous interactions
275
- - Each session creates a new user for personalization
276
-
277
- **🤖 AI Analysis Engine**
278
- - Analyzes and summarizes regulatory findings
279
- - Generates actionable compliance recommendations
280
- - Creates executive summaries and action items
281
- """)
282
-
283
- # Event handlers
284
- submit_event = msg.submit(
285
- self.streaming_chatbot,
286
- [msg, chatbot, user_id_state],
287
- [chatbot, msg, msg, user_id_state],
288
- )
289
- click_event = submit.click(
290
- self.streaming_chatbot,
291
- [msg, chatbot, user_id_state],
292
- [chatbot, msg, msg, user_id_state],
293
- )
294
- stop.click(None, cancels=[submit_event, click_event])
295
- clear.click(
296
- self.delayed_clear,
297
- inputs=[user_id_state],
298
- outputs=[chatbot, msg, msg, user_id_state],
299
- )
300
 
301
- # Footer
302
- gr.HTML("""
303
- <div style="text-align: center; padding: 20px; color: #666; font-size: 0.9rem;">
304
- <p>RegRadar monitors regulatory updates from the SEC, EU Commission, and other leading global authorities.</p>
305
- <p>All analyses are AI-generated. Please verify findings with official regulatory sources.</p>
306
- </div>
307
- """)
308
 
309
- return demo
 
 
 
6
  from gradio import ChatMessage
7
 
8
  from agents.reg_radar import RegRadarAgent
 
9
  from tools.llm import stream_llm
10
 
11
 
 
36
  is_regulatory = self.agent.is_regulatory_query(message)
37
 
38
  if not is_regulatory:
39
+ yield from self._handle_general_chat(message, history, user_id_state)
40
+ return
 
 
 
41
 
42
+ yield from self._handle_regulatory_chat(
43
+ message, history, user_id_state, user_id, start_time
44
+ )
 
45
 
46
+ def _handle_general_chat(self, message, history, user_id_state):
47
+ """Handle general (non-regulatory) chat flow."""
48
+ history.append(
49
+ ChatMessage(role="assistant", content="💬 Processing general query...")
50
+ )
51
+ yield history, "", gr.update(interactive=False), user_id_state
52
 
53
+ # Clear processing message and stream response
54
+ history.pop()
55
+ streaming_content = ""
56
+ history.append(ChatMessage(role="assistant", content=""))
57
+
58
+ for chunk in stream_llm(message):
59
+ streaming_content += chunk
60
+ history[-1] = ChatMessage(role="assistant", content=streaming_content)
61
+ yield history, "", gr.update(interactive=False), user_id_state
62
 
63
+ # Re-enable input box at the end
64
+ yield history, "", gr.update(interactive=True), user_id_state
65
+
66
+ def _handle_regulatory_chat(
67
+ self, message, history, user_id_state, user_id, start_time
68
+ ):
69
+ """Handle regulatory chat flow."""
70
  # Show tool detection
71
  tool_key, tool_name = self.agent.determine_intended_tool(message)
72
 
 
112
 
113
  # Show collapsible raw results
114
  if crawl_results["results"]:
115
+ collapsible_results = self._format_crawl_results(crawl_results["results"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  history.append(ChatMessage(role="assistant", content=collapsible_results))
117
  yield history, "", gr.update(interactive=False), user_id_state
118
 
119
  # Display memory results if available
120
  if memory_results:
121
+ memory_msg = self._format_memory_results(memory_results)
 
 
 
 
 
 
 
 
 
 
 
 
122
  history.append(ChatMessage(role="assistant", content=memory_msg))
123
  yield history, "", gr.update(interactive=False), user_id_state
124
 
 
158
  daemon=True,
159
  ).start()
160
 
161
+ def _format_crawl_results(self, results):
162
+ """Format crawl results for display, removing duplicates by URL."""
163
+ seen_urls = set()
164
+ results_display = []
165
+ count = 0
166
+ for result in results:
167
+ url = result["url"]
168
+ if url in seen_urls:
169
+ continue
170
+ seen_urls.add(url)
171
+ title = result["title"][:100] if result["title"] else "No Title"
172
+ count += 1
173
+ results_display.append(f"""
174
+ **{count}. {result["source"]}**
175
+ - Title: {title}...
176
+ - URL: {url}
177
+ """)
178
+ if results_display:
179
+ collapsible_results = f"""
180
+ <details>
181
+ <summary><strong>📋 Raw Regulatory Data</strong> - Click to expand</summary>
182
 
183
+ {"".join(results_display)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
+ </details>
186
+ """
187
+ else:
188
+ collapsible_results = "<details><summary><strong>📋 Raw Regulatory Data</strong> - Click to expand</summary>\nNo unique regulatory updates found.\n</details>"
189
+ return collapsible_results
190
+
191
+ def _format_memory_results(self, memory_results):
192
+ """Format memory results for display."""
193
+ top_memories = memory_results[:3]
194
+ memory_details = ""
195
+ for i, mem in enumerate(top_memories, 1):
196
+ memory_text = mem.get("memory", "N/A")
197
+ memory_details += f"\n**{i}. Memory:** {memory_text[:300]}...\n"
198
+ memory_msg = f"""
199
+ <details>
200
+ <summary><strong>💾 Related Past Queries</strong> - Click to expand</summary>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
+ Found {len(memory_results)} similar past queries in memory. Top 3 shown below:
203
+ {memory_details}
204
+ </details>
205
+ """
206
+ return memory_msg
 
 
207
 
208
+ def delayed_clear(self, user_id_state):
209
+ time.sleep(0.1) # 100ms delay to allow generator cancellation
210
+ return [], "", gr.update(interactive=True), user_id_state
app.py CHANGED
@@ -7,14 +7,131 @@ compliance guidance for various industries and regions.
7
 
8
  import warnings
9
 
 
 
10
  from agents.ui_handler import UIHandler
 
11
 
12
  warnings.filterwarnings("ignore", category=DeprecationWarning)
13
 
14
 
15
  def create_demo():
16
  ui_handler = UIHandler() # New user for each session
17
- return ui_handler.create_ui()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  def main():
 
7
 
8
  import warnings
9
 
10
+ import gradio as gr
11
+
12
  from agents.ui_handler import UIHandler
13
+ from config.settings import AVATAR_IMAGES
14
 
15
  warnings.filterwarnings("ignore", category=DeprecationWarning)
16
 
17
 
18
  def create_demo():
19
  ui_handler = UIHandler() # New user for each session
20
+ with gr.Blocks(
21
+ title="RegRadar - AI Regulatory Compliance Assistant",
22
+ theme=gr.themes.Soft(),
23
+ css="""
24
+ .tool-status {
25
+ background-color: #f0f4f8;
26
+ padding: 10px;
27
+ border-radius: 5px;
28
+ margin: 10px 0;
29
+ }
30
+ """,
31
+ ) as demo:
32
+ # Header
33
+ gr.HTML("""
34
+ <center>
35
+ <h1>🛡️ RegRadar</h1>
36
+ AI-powered regulatory compliance assistant that monitors global regulations
37
+ </center>
38
+ """)
39
+
40
+ # Main chat interface
41
+ chatbot = gr.Chatbot(
42
+ height=400,
43
+ type="messages",
44
+ avatar_images=AVATAR_IMAGES,
45
+ show_copy_button=True,
46
+ )
47
+
48
+ with gr.Row(equal_height=True):
49
+ msg = gr.Textbox(
50
+ placeholder="Ask about regulatory updates, compliance requirements, or any industry regulations...",
51
+ show_label=False,
52
+ scale=18,
53
+ autofocus=True,
54
+ )
55
+ submit = gr.Button("Send", variant="primary", scale=1, min_width=60)
56
+ stop = gr.Button("Stop", variant="stop", scale=1, min_width=60)
57
+ clear = gr.Button("Clear", scale=1, min_width=60)
58
+
59
+ # Add user_id_state for session
60
+ user_id_state = gr.State()
61
+
62
+ # Example queries
63
+ example_queries = [
64
+ "Show me the latest SEC regulations for fintech",
65
+ "What are the new data privacy rules in the EU?",
66
+ "Any updates on ESG compliance for energy companies?",
67
+ "Scan for healthcare regulations in the US",
68
+ "What are the global trends in AI regulation?",
69
+ ]
70
+
71
+ gr.Examples(examples=example_queries, inputs=msg, label="Example Queries")
72
+
73
+ # Tool information panel
74
+ with gr.Accordion("🛠️ Available Tools", open=False):
75
+ gr.Markdown("""
76
+ ### RegRadar uses these intelligent tools:
77
+
78
+ **🧠 Query Type Detection**
79
+ - Automatically detects if your message is a regulatory compliance query or a general question
80
+ - Selects the appropriate tools and response style based on your intent
81
+
82
+ **📩 Information Extraction**
83
+ - Extracts key details (industry, region, keywords) from your command
84
+ - Ensures accurate and relevant regulatory analysis
85
+
86
+ **🔍 Regulatory Web Crawler**
87
+ - Crawls official regulatory websites (SEC, FDA, FTC, etc.)
88
+ - Searches for recent updates and compliance changes
89
+ - Focuses on last 30 days of content
90
+
91
+ **🌐 Regulatory Search Engine**
92
+ - Searches across multiple sources for regulatory updates
93
+ - Finds industry-specific compliance information
94
+ - Aggregates results from various regulatory bodies
95
+
96
+ **💾 Memory System**
97
+ - Remembers past queries and responses
98
+ - Learns from your compliance interests
99
+ - Provides context from previous interactions
100
+ - Each session creates a new user for personalization
101
+
102
+ **🤖 AI Analysis Engine**
103
+ - Analyzes and summarizes regulatory findings
104
+ - Generates actionable compliance recommendations
105
+ - Creates executive summaries and action items
106
+ """)
107
+
108
+ # Event handlers
109
+ submit_event = msg.submit(
110
+ ui_handler.streaming_chatbot,
111
+ [msg, chatbot, user_id_state],
112
+ [chatbot, msg, msg, user_id_state],
113
+ )
114
+ click_event = submit.click(
115
+ ui_handler.streaming_chatbot,
116
+ [msg, chatbot, user_id_state],
117
+ [chatbot, msg, msg, user_id_state],
118
+ )
119
+ stop.click(None, cancels=[submit_event, click_event])
120
+ clear.click(
121
+ ui_handler.delayed_clear,
122
+ inputs=[user_id_state],
123
+ outputs=[chatbot, msg, msg, user_id_state],
124
+ )
125
+
126
+ # Footer
127
+ gr.HTML("""
128
+ <div style="text-align: center; padding: 20px; color: #666; font-size: 0.9rem;">
129
+ <p>RegRadar monitors regulatory updates from the SEC, EU Commission, and other leading global authorities.</p>
130
+ <p>All analyses are AI-generated. Please verify findings with official regulatory sources.</p>
131
+ </div>
132
+ """)
133
+
134
+ return demo
135
 
136
 
137
  def main():
tools/memory_tools.py CHANGED
@@ -1,10 +1,13 @@
1
- from typing import List, Dict
 
2
  from mem0 import MemoryClient
 
3
  from config.settings import MEM0_API_KEY
4
 
5
  # Initialize memory client
6
  mem0_client = MemoryClient(api_key=MEM0_API_KEY)
7
 
 
8
  class MemoryTools:
9
  def save_to_memory(self, user_id: str, query: str, response: str):
10
  """Save interaction to memory"""
@@ -26,6 +29,5 @@ class MemoryTools:
26
  try:
27
  memories = mem0_client.search(query=query, user_id=user_id, limit=3)
28
  return memories
29
- except:
30
  return []
31
-
 
1
+ from typing import Dict, List
2
+
3
  from mem0 import MemoryClient
4
+
5
  from config.settings import MEM0_API_KEY
6
 
7
  # Initialize memory client
8
  mem0_client = MemoryClient(api_key=MEM0_API_KEY)
9
 
10
+
11
  class MemoryTools:
12
  def save_to_memory(self, user_id: str, query: str, response: str):
13
  """Save interaction to memory"""
 
29
  try:
30
  memories = mem0_client.search(query=query, user_id=user_id, limit=3)
31
  return memories
32
+ except Exception:
33
  return []
 
tools/web_tools.py CHANGED
@@ -15,46 +15,71 @@ class WebTools:
15
  self.cached_searches = {}
16
 
17
  def generate_cache_key(self, industry: str, region: str, keywords: str) -> str:
18
- """Generate a unique cache key"""
 
 
19
  key = f"{industry}:{region}:{keywords}".lower()
20
  return hashlib.md5(key.encode()).hexdigest()
21
 
22
  def crawl_regulatory_sites(self, industry: str, region: str, keywords: str) -> Dict:
23
- """Crawl regulatory websites for updates"""
24
- # Check cache first
 
25
  cache_key = self.generate_cache_key(industry, region, keywords)
26
  if cache_key in self.cached_searches:
27
  return self.cached_searches[cache_key]
28
 
29
  urls_to_crawl = REGULATORY_SOURCES.get(region, REGULATORY_SOURCES["US"])
30
  all_results = []
31
-
32
  crawl_instructions = (
33
  f"Recent {industry} {region} regulatory updates: {keywords}, 30 days"
34
  )
35
 
36
- # Crawl regulatory sites
37
  for source_name, url in list(urls_to_crawl.items())[:3]:
38
- try:
39
- crawl_response = tavily_client.crawl(
40
- url=url, max_depth=2, limit=5, instructions=crawl_instructions
41
- )
42
- for result in crawl_response.get("results", []):
43
- title = result.get("title")
44
- if not title or title == "No Title...":
45
- title = SOURCE_FULL_NAMES.get(source_name, source_name)
46
- all_results.append(
47
- {
48
- "source": source_name,
49
- "url": result.get("url", url),
50
- "title": title,
51
- "content": result.get("raw_content", "")[:1500],
52
- }
53
- )
54
- except Exception as e:
55
- print(f"Crawl error for {source_name}: {e}")
56
 
57
  # General search
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  try:
59
  search_results = tavily_client.search(
60
  query=f"{industry} {region} regulatory updates compliance {keywords} 2024 2025",
@@ -62,7 +87,7 @@ class WebTools:
62
  include_raw_content=True,
63
  )
64
  for result in search_results.get("results", []):
65
- all_results.append(
66
  {
67
  "source": "Web Search",
68
  "url": result.get("url", ""),
@@ -72,24 +97,26 @@ class WebTools:
72
  )
73
  except Exception as e:
74
  print(f"Search error: {e}")
75
-
76
- results = {"results": all_results, "total_found": len(all_results)}
77
- self.cached_searches[cache_key] = results
78
  return results
79
 
80
  def extract_parameters(self, message: str) -> Dict:
81
- """Extract industry, region, and keywords from the query using LLM (no function calling)."""
82
- prompt = f"""
83
- Extract the following information from the user query below and return ONLY a valid JSON object with keys: industry, region, keywords.
84
- - industry: The industry mentioned or implied (e.g., fintech, healthcare, energy, general).
85
- - region: The region or country explicitly mentioned (e.g., US, EU, UK, Asia, Global).
86
- - keywords: The most important regulatory topics or terms, separated by commas. Do NOT include generic words or verbs.
87
-
88
- User query: {message}
89
-
90
- Example output:
91
- {{"industry": "fintech", "region": "US", "keywords": "SEC regulations"}}
92
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  import json
94
 
95
  response = call_llm(prompt)
 
15
  self.cached_searches = {}
16
 
17
  def generate_cache_key(self, industry: str, region: str, keywords: str) -> str:
18
+ """
19
+ Generate a unique cache key based on industry, region, and keywords.
20
+ """
21
  key = f"{industry}:{region}:{keywords}".lower()
22
  return hashlib.md5(key.encode()).hexdigest()
23
 
24
  def crawl_regulatory_sites(self, industry: str, region: str, keywords: str) -> Dict:
25
+ """
26
+ Crawl regulatory websites for updates.
27
+ """
28
  cache_key = self.generate_cache_key(industry, region, keywords)
29
  if cache_key in self.cached_searches:
30
  return self.cached_searches[cache_key]
31
 
32
  urls_to_crawl = REGULATORY_SOURCES.get(region, REGULATORY_SOURCES["US"])
33
  all_results = []
 
34
  crawl_instructions = (
35
  f"Recent {industry} {region} regulatory updates: {keywords}, 30 days"
36
  )
37
 
38
+ # Crawl regulatory sites (limit to 3 sources)
39
  for source_name, url in list(urls_to_crawl.items())[:3]:
40
+ crawl_results = self._get_crawl_results(
41
+ source_name, url, crawl_instructions
42
+ )
43
+ all_results.extend(crawl_results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  # General search
46
+ search_results = self._get_search_results(industry, region, keywords)
47
+ all_results.extend(search_results)
48
+
49
+ results = {"results": all_results, "total_found": len(all_results)}
50
+ self.cached_searches[cache_key] = results
51
+ return results
52
+
53
+ def _get_crawl_results(self, source_name: str, url: str, instructions: str) -> list:
54
+ """
55
+ Crawl a single regulatory source and return formatted results.
56
+ """
57
+ results = []
58
+ try:
59
+ crawl_response = tavily_client.crawl(
60
+ url=url, max_depth=2, limit=5, instructions=instructions
61
+ )
62
+ for result in crawl_response.get("results", []):
63
+ title = result.get("title")
64
+ if not title or title == "No Title...":
65
+ title = SOURCE_FULL_NAMES.get(source_name, source_name)
66
+ results.append(
67
+ {
68
+ "source": source_name,
69
+ "url": result.get("url", url),
70
+ "title": title,
71
+ "content": result.get("raw_content", "")[:1500],
72
+ }
73
+ )
74
+ except Exception as e:
75
+ print(f"Crawl error for {source_name}: {e}")
76
+ return results
77
+
78
+ def _get_search_results(self, industry: str, region: str, keywords: str) -> list:
79
+ """
80
+ Perform a general web search and return formatted results.
81
+ """
82
+ results = []
83
  try:
84
  search_results = tavily_client.search(
85
  query=f"{industry} {region} regulatory updates compliance {keywords} 2024 2025",
 
87
  include_raw_content=True,
88
  )
89
  for result in search_results.get("results", []):
90
+ results.append(
91
  {
92
  "source": "Web Search",
93
  "url": result.get("url", ""),
 
97
  )
98
  except Exception as e:
99
  print(f"Search error: {e}")
 
 
 
100
  return results
101
 
102
  def extract_parameters(self, message: str) -> Dict:
 
 
 
 
 
 
 
 
 
 
 
103
  """
104
+ Extract industry, region, and keywords from the query using LLM (no function calling).
105
+ """
106
+ prompt = (
107
+ """
108
+ Extract the following information from the user query below and return ONLY a valid JSON object with keys: industry, region, keywords.
109
+ - industry: The industry mentioned or implied (e.g., fintech, healthcare, energy, general).
110
+ - region: The region or country explicitly mentioned (e.g., US, EU, UK, Asia, Global).
111
+ - keywords: The most important regulatory topics or terms, separated by commas. Do NOT include generic words or verbs.
112
+
113
+ User query: {message}
114
+
115
+ Example output:
116
+ {{"industry": "fintech", "region": "US", "keywords": "SEC regulations"}}
117
+ """
118
+ ).replace("{message}", message)
119
+
120
  import json
121
 
122
  response = call_llm(prompt)