Abid Ali Awan commited on
Commit
a29f782
·
1 Parent(s): be7d2ce

Enhance regulatory sources in settings.py by adding full names for each source and improving formatting for better readability. Update web_tools.py to utilize the new SOURCE_FULL_NAMES for clearer title handling in regulatory updates.

Browse files
Files changed (2) hide show
  1. config/settings.py +39 -19
  2. tools/web_tools.py +10 -4
config/settings.py CHANGED
@@ -11,34 +11,55 @@ OPENAI_BASE_URL = "https://api.keywordsai.co/api/"
11
  # Regulatory websites mapping
12
  REGULATORY_SOURCES = {
13
  "US": {
14
- "SEC": "https://www.sec.gov/news/pressreleases",
15
- "FDA": "https://www.fda.gov/news-events/fda-newsroom/press-announcements",
16
- "FTC": "https://www.ftc.gov/news-events/news/press-releases",
17
  "Federal Register": "https://www.federalregister.gov/documents/current",
18
- "CFTC": "https://www.cftc.gov/PressRoom/PressReleases",
19
- "FDIC": "https://www.fdic.gov/news/press-releases/",
20
- "FINRA": "https://www.finra.org/media-center/newsreleases",
21
- "Federal Reserve Board": "https://www.federalreserve.gov/newsevents/pressreleases.htm",
22
  },
23
  "EU": {
24
- "ESMA": "https://www.esma.europa.eu/press-news/esma-news",
25
- "EBA": "https://www.eba.europa.eu/publications-and-media",
26
- "EIOPA": "https://www.eiopa.europa.eu/media/news_en",
27
- "European Parliament News": "https://www.europarl.europa.eu/news/en/press-room",
28
- "ECB": "https://www.ecb.europa.eu/press/pr/html/index.en.html",
29
  },
30
  "Asia": {
31
- "Japan FSA": "https://www.fsa.go.jp/en/news/",
32
- "Reserve Bank of India (RBI)": "https://www.rbi.org.in/Scripts/BS_PressReleaseDisplay.aspx",
33
  },
34
  "Global": {
35
- "BIS": "https://www.bis.org/press/index.htm",
36
- "IMF": "https://www.imf.org/en/News",
37
- "World Bank": "https://www.worldbank.org/en/news/all",
38
- "OECD": "https://www.oecd.org/newsroom/",
39
  },
40
  }
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # UI settings
44
  AVATAR_IMAGES = (
@@ -49,4 +70,3 @@ AVATAR_IMAGES = (
49
  # Default chat parameters
50
  DEFAULT_LLM_TEMPERATURE = 0.3
51
  DEFAULT_LLM_MODEL = "gpt-4.1-mini"
52
-
 
11
  # Regulatory websites mapping
12
  REGULATORY_SOURCES = {
13
  "US": {
14
+ "SEC": "https://www.sec.gov/news/pressreleases",
15
+ "FDA": "https://www.fda.gov/news-events/fda-newsroom/press-announcements",
16
+ "FTC": "https://www.ftc.gov/news-events/news/press-releases",
17
  "Federal Register": "https://www.federalregister.gov/documents/current",
18
+ "CFTC": "https://www.cftc.gov/PressRoom/PressReleases",
19
+ "FDIC": "https://www.fdic.gov/news/press-releases/",
20
+ "FINRA": "https://www.finra.org/media-center/newsreleases",
21
+ "Federal Reserve Board": "https://www.federalreserve.gov/newsevents/pressreleases.htm",
22
  },
23
  "EU": {
24
+ "ESMA": "https://www.esma.europa.eu/press-news/esma-news",
25
+ "EBA": "https://www.eba.europa.eu/publications-and-media",
26
+ "EIOPA": "https://www.eiopa.europa.eu/media/news_en",
27
+ "European Parliament News": "https://www.europarl.europa.eu/news/en/press-room",
28
+ "ECB": "https://www.ecb.europa.eu/press/pr/html/index.en.html",
29
  },
30
  "Asia": {
31
+ "Japan FSA": "https://www.fsa.go.jp/en/news/",
32
+ "Reserve Bank of India (RBI)": "https://www.rbi.org.in/Scripts/BS_PressReleaseDisplay.aspx",
33
  },
34
  "Global": {
35
+ "BIS": "https://www.bis.org/press/index.htm",
36
+ "IMF": "https://www.imf.org/en/News",
37
+ "World Bank": "https://www.worldbank.org/en/news/all",
38
+ "OECD": "https://www.oecd.org/newsroom/",
39
  },
40
  }
41
 
42
+ SOURCE_FULL_NAMES = {
43
+ "SEC": "U.S. Securities and Exchange Commission",
44
+ "FDA": "U.S. Food and Drug Administration",
45
+ "FTC": "Federal Trade Commission",
46
+ "Federal Register": "Federal Register",
47
+ "CFTC": "Commodity Futures Trading Commission",
48
+ "FDIC": "Federal Deposit Insurance Corporation",
49
+ "FINRA": "Financial Industry Regulatory Authority",
50
+ "Federal Reserve Board": "Federal Reserve Board",
51
+ "ESMA": "European Securities and Markets Authority",
52
+ "EBA": "European Banking Authority",
53
+ "EIOPA": "European Insurance and Occupational Pensions Authority",
54
+ "European Parliament News": "European Parliament News",
55
+ "ECB": "European Central Bank",
56
+ "Japan FSA": "Financial Services Agency of Japan",
57
+ "Reserve Bank of India (RBI)": "Reserve Bank of India",
58
+ "BIS": "Bank for International Settlements",
59
+ "IMF": "International Monetary Fund",
60
+ "World Bank": "World Bank",
61
+ "OECD": "Organisation for Economic Co-operation and Development",
62
+ }
63
 
64
  # UI settings
65
  AVATAR_IMAGES = (
 
70
  # Default chat parameters
71
  DEFAULT_LLM_TEMPERATURE = 0.3
72
  DEFAULT_LLM_MODEL = "gpt-4.1-mini"
 
tools/web_tools.py CHANGED
@@ -3,12 +3,13 @@ from typing import Dict
3
 
4
  from tavily import TavilyClient
5
 
6
- from config.settings import REGULATORY_SOURCES, TAVILY_API_KEY
7
  from tools.llm import call_llm
8
 
9
  # Initialize Tavily client
10
  tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
11
 
 
12
  class WebTools:
13
  def __init__(self):
14
  self.cached_searches = {}
@@ -28,7 +29,9 @@ class WebTools:
28
  urls_to_crawl = REGULATORY_SOURCES.get(region, REGULATORY_SOURCES["US"])
29
  all_results = []
30
 
31
- crawl_instructions = f"{industry} regulatory updates, compliance, {keywords}, 30d"
 
 
32
 
33
  # Crawl regulatory sites
34
  for source_name, url in list(urls_to_crawl.items())[:3]:
@@ -37,11 +40,14 @@ class WebTools:
37
  url=url, max_depth=2, limit=5, instructions=crawl_instructions
38
  )
39
  for result in crawl_response.get("results", []):
 
 
 
40
  all_results.append(
41
  {
42
  "source": source_name,
43
- "url": url,
44
- "title": result.get("title", ""),
45
  "content": result.get("raw_content", "")[:1500],
46
  }
47
  )
 
3
 
4
  from tavily import TavilyClient
5
 
6
+ from config.settings import REGULATORY_SOURCES, SOURCE_FULL_NAMES, TAVILY_API_KEY
7
  from tools.llm import call_llm
8
 
9
  # Initialize Tavily client
10
  tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
11
 
12
+
13
  class WebTools:
14
  def __init__(self):
15
  self.cached_searches = {}
 
29
  urls_to_crawl = REGULATORY_SOURCES.get(region, REGULATORY_SOURCES["US"])
30
  all_results = []
31
 
32
+ crawl_instructions = (
33
+ f"{industry} regulatory updates, compliance, {keywords}, 30d"
34
+ )
35
 
36
  # Crawl regulatory sites
37
  for source_name, url in list(urls_to_crawl.items())[:3]:
 
40
  url=url, max_depth=2, limit=5, instructions=crawl_instructions
41
  )
42
  for result in crawl_response.get("results", []):
43
+ title = result.get("title")
44
+ if not title or title == "No Title...":
45
+ title = SOURCE_FULL_NAMES.get(source_name, source_name)
46
  all_results.append(
47
  {
48
  "source": source_name,
49
+ "url": result.get("url", url),
50
+ "title": title,
51
  "content": result.get("raw_content", "")[:1500],
52
  }
53
  )