Spaces:
Sleeping
Sleeping
Abid Ali Awan
commited on
Commit
·
a29f782
1
Parent(s):
be7d2ce
Enhance regulatory sources in settings.py by adding full names for each source and improving formatting for better readability. Update web_tools.py to utilize the new SOURCE_FULL_NAMES for clearer title handling in regulatory updates.
Browse files- config/settings.py +39 -19
- tools/web_tools.py +10 -4
config/settings.py
CHANGED
@@ -11,34 +11,55 @@ OPENAI_BASE_URL = "https://api.keywordsai.co/api/"
|
|
11 |
# Regulatory websites mapping
|
12 |
REGULATORY_SOURCES = {
|
13 |
"US": {
|
14 |
-
"SEC": "https://www.sec.gov/news/pressreleases",
|
15 |
-
"FDA": "https://www.fda.gov/news-events/fda-newsroom/press-announcements",
|
16 |
-
"FTC": "https://www.ftc.gov/news-events/news/press-releases",
|
17 |
"Federal Register": "https://www.federalregister.gov/documents/current",
|
18 |
-
"CFTC": "https://www.cftc.gov/PressRoom/PressReleases",
|
19 |
-
"FDIC": "https://www.fdic.gov/news/press-releases/",
|
20 |
-
"FINRA": "https://www.finra.org/media-center/newsreleases",
|
21 |
-
"Federal Reserve Board": "https://www.federalreserve.gov/newsevents/pressreleases.htm",
|
22 |
},
|
23 |
"EU": {
|
24 |
-
"ESMA": "https://www.esma.europa.eu/press-news/esma-news",
|
25 |
-
"EBA": "https://www.eba.europa.eu/publications-and-media",
|
26 |
-
"EIOPA": "https://www.eiopa.europa.eu/media/news_en",
|
27 |
-
"European Parliament News": "https://www.europarl.europa.eu/news/en/press-room",
|
28 |
-
"ECB": "https://www.ecb.europa.eu/press/pr/html/index.en.html",
|
29 |
},
|
30 |
"Asia": {
|
31 |
-
"Japan FSA": "https://www.fsa.go.jp/en/news/",
|
32 |
-
"Reserve Bank of India (RBI)": "https://www.rbi.org.in/Scripts/BS_PressReleaseDisplay.aspx",
|
33 |
},
|
34 |
"Global": {
|
35 |
-
"BIS": "https://www.bis.org/press/index.htm",
|
36 |
-
"IMF": "https://www.imf.org/en/News",
|
37 |
-
"World Bank": "https://www.worldbank.org/en/news/all",
|
38 |
-
"OECD": "https://www.oecd.org/newsroom/",
|
39 |
},
|
40 |
}
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
# UI settings
|
44 |
AVATAR_IMAGES = (
|
@@ -49,4 +70,3 @@ AVATAR_IMAGES = (
|
|
49 |
# Default chat parameters
|
50 |
DEFAULT_LLM_TEMPERATURE = 0.3
|
51 |
DEFAULT_LLM_MODEL = "gpt-4.1-mini"
|
52 |
-
|
|
|
11 |
# Regulatory websites mapping
|
12 |
REGULATORY_SOURCES = {
|
13 |
"US": {
|
14 |
+
"SEC": "https://www.sec.gov/news/pressreleases",
|
15 |
+
"FDA": "https://www.fda.gov/news-events/fda-newsroom/press-announcements",
|
16 |
+
"FTC": "https://www.ftc.gov/news-events/news/press-releases",
|
17 |
"Federal Register": "https://www.federalregister.gov/documents/current",
|
18 |
+
"CFTC": "https://www.cftc.gov/PressRoom/PressReleases",
|
19 |
+
"FDIC": "https://www.fdic.gov/news/press-releases/",
|
20 |
+
"FINRA": "https://www.finra.org/media-center/newsreleases",
|
21 |
+
"Federal Reserve Board": "https://www.federalreserve.gov/newsevents/pressreleases.htm",
|
22 |
},
|
23 |
"EU": {
|
24 |
+
"ESMA": "https://www.esma.europa.eu/press-news/esma-news",
|
25 |
+
"EBA": "https://www.eba.europa.eu/publications-and-media",
|
26 |
+
"EIOPA": "https://www.eiopa.europa.eu/media/news_en",
|
27 |
+
"European Parliament News": "https://www.europarl.europa.eu/news/en/press-room",
|
28 |
+
"ECB": "https://www.ecb.europa.eu/press/pr/html/index.en.html",
|
29 |
},
|
30 |
"Asia": {
|
31 |
+
"Japan FSA": "https://www.fsa.go.jp/en/news/",
|
32 |
+
"Reserve Bank of India (RBI)": "https://www.rbi.org.in/Scripts/BS_PressReleaseDisplay.aspx",
|
33 |
},
|
34 |
"Global": {
|
35 |
+
"BIS": "https://www.bis.org/press/index.htm",
|
36 |
+
"IMF": "https://www.imf.org/en/News",
|
37 |
+
"World Bank": "https://www.worldbank.org/en/news/all",
|
38 |
+
"OECD": "https://www.oecd.org/newsroom/",
|
39 |
},
|
40 |
}
|
41 |
|
42 |
+
SOURCE_FULL_NAMES = {
|
43 |
+
"SEC": "U.S. Securities and Exchange Commission",
|
44 |
+
"FDA": "U.S. Food and Drug Administration",
|
45 |
+
"FTC": "Federal Trade Commission",
|
46 |
+
"Federal Register": "Federal Register",
|
47 |
+
"CFTC": "Commodity Futures Trading Commission",
|
48 |
+
"FDIC": "Federal Deposit Insurance Corporation",
|
49 |
+
"FINRA": "Financial Industry Regulatory Authority",
|
50 |
+
"Federal Reserve Board": "Federal Reserve Board",
|
51 |
+
"ESMA": "European Securities and Markets Authority",
|
52 |
+
"EBA": "European Banking Authority",
|
53 |
+
"EIOPA": "European Insurance and Occupational Pensions Authority",
|
54 |
+
"European Parliament News": "European Parliament News",
|
55 |
+
"ECB": "European Central Bank",
|
56 |
+
"Japan FSA": "Financial Services Agency of Japan",
|
57 |
+
"Reserve Bank of India (RBI)": "Reserve Bank of India",
|
58 |
+
"BIS": "Bank for International Settlements",
|
59 |
+
"IMF": "International Monetary Fund",
|
60 |
+
"World Bank": "World Bank",
|
61 |
+
"OECD": "Organisation for Economic Co-operation and Development",
|
62 |
+
}
|
63 |
|
64 |
# UI settings
|
65 |
AVATAR_IMAGES = (
|
|
|
70 |
# Default chat parameters
|
71 |
DEFAULT_LLM_TEMPERATURE = 0.3
|
72 |
DEFAULT_LLM_MODEL = "gpt-4.1-mini"
|
|
tools/web_tools.py
CHANGED
@@ -3,12 +3,13 @@ from typing import Dict
|
|
3 |
|
4 |
from tavily import TavilyClient
|
5 |
|
6 |
-
from config.settings import REGULATORY_SOURCES, TAVILY_API_KEY
|
7 |
from tools.llm import call_llm
|
8 |
|
9 |
# Initialize Tavily client
|
10 |
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
|
11 |
|
|
|
12 |
class WebTools:
|
13 |
def __init__(self):
|
14 |
self.cached_searches = {}
|
@@ -28,7 +29,9 @@ class WebTools:
|
|
28 |
urls_to_crawl = REGULATORY_SOURCES.get(region, REGULATORY_SOURCES["US"])
|
29 |
all_results = []
|
30 |
|
31 |
-
crawl_instructions =
|
|
|
|
|
32 |
|
33 |
# Crawl regulatory sites
|
34 |
for source_name, url in list(urls_to_crawl.items())[:3]:
|
@@ -37,11 +40,14 @@ class WebTools:
|
|
37 |
url=url, max_depth=2, limit=5, instructions=crawl_instructions
|
38 |
)
|
39 |
for result in crawl_response.get("results", []):
|
|
|
|
|
|
|
40 |
all_results.append(
|
41 |
{
|
42 |
"source": source_name,
|
43 |
-
"url": url,
|
44 |
-
"title":
|
45 |
"content": result.get("raw_content", "")[:1500],
|
46 |
}
|
47 |
)
|
|
|
3 |
|
4 |
from tavily import TavilyClient
|
5 |
|
6 |
+
from config.settings import REGULATORY_SOURCES, SOURCE_FULL_NAMES, TAVILY_API_KEY
|
7 |
from tools.llm import call_llm
|
8 |
|
9 |
# Initialize Tavily client
|
10 |
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
|
11 |
|
12 |
+
|
13 |
class WebTools:
|
14 |
def __init__(self):
|
15 |
self.cached_searches = {}
|
|
|
29 |
urls_to_crawl = REGULATORY_SOURCES.get(region, REGULATORY_SOURCES["US"])
|
30 |
all_results = []
|
31 |
|
32 |
+
crawl_instructions = (
|
33 |
+
f"{industry} regulatory updates, compliance, {keywords}, 30d"
|
34 |
+
)
|
35 |
|
36 |
# Crawl regulatory sites
|
37 |
for source_name, url in list(urls_to_crawl.items())[:3]:
|
|
|
40 |
url=url, max_depth=2, limit=5, instructions=crawl_instructions
|
41 |
)
|
42 |
for result in crawl_response.get("results", []):
|
43 |
+
title = result.get("title")
|
44 |
+
if not title or title == "No Title...":
|
45 |
+
title = SOURCE_FULL_NAMES.get(source_name, source_name)
|
46 |
all_results.append(
|
47 |
{
|
48 |
"source": source_name,
|
49 |
+
"url": result.get("url", url),
|
50 |
+
"title": title,
|
51 |
"content": result.get("raw_content", "")[:1500],
|
52 |
}
|
53 |
)
|