Spaces:
Sleeping
Sleeping
Update config.py
Browse files
config.py
CHANGED
@@ -1,85 +1,85 @@
|
|
1 |
-
"""Configuration settings for the News Summarization application."""
|
2 |
-
|
3 |
-
import os
|
4 |
-
from dotenv import load_dotenv
|
5 |
-
|
6 |
-
# Load environment variables
|
7 |
-
load_dotenv()
|
8 |
-
|
9 |
-
# API Settings
|
10 |
-
API_HOST =
|
11 |
-
API_PORT =
|
12 |
-
API_BASE_URL =
|
13 |
-
|
14 |
-
# News Scraping Settings
|
15 |
-
ARTICLES_PER_SOURCE = int(os.getenv("ARTICLES_PER_SOURCE", "10"))
|
16 |
-
USER_AGENT = os.getenv("USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
17 |
-
|
18 |
-
# RSS Feed Settings
|
19 |
-
RSS_FEEDS = {
|
20 |
-
"BBC": "http://feeds.bbci.co.uk/news/business/rss.xml",
|
21 |
-
"CNN": "http://rss.cnn.com/rss/money_news_international.rss",
|
22 |
-
"FoxBusiness": "http://feeds.foxnews.com/foxbusiness/latest"
|
23 |
-
}
|
24 |
-
|
25 |
-
# Model Settings
|
26 |
-
SENTIMENT_MODEL = "yiyanghkust/finbert-tone" # More advanced financial sentiment model
|
27 |
-
SENTIMENT_FINE_GRAINED_MODEL = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
|
28 |
-
SUMMARIZATION_MODEL = "t5-base"
|
29 |
-
|
30 |
-
# Additional Fine-Grained Sentiment Models
|
31 |
-
FINE_GRAINED_MODELS = {
|
32 |
-
"financial": "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
|
33 |
-
"emotion": "j-hartmann/emotion-english-distilroberta-base",
|
34 |
-
"aspect": "yangheng/deberta-v3-base-absa-v1.1",
|
35 |
-
"esg": "yiyanghkust/finbert-esg",
|
36 |
-
"news_tone": "ProsusAI/finbert"
|
37 |
-
}
|
38 |
-
|
39 |
-
# Fine-Grained Sentiment Categories
|
40 |
-
SENTIMENT_CATEGORIES = {
|
41 |
-
"financial": ["positive", "negative", "neutral"],
|
42 |
-
"emotion": ["joy", "sadness", "anger", "fear", "surprise", "disgust", "neutral"],
|
43 |
-
"aspect": ["positive", "negative", "neutral"],
|
44 |
-
"esg": ["environmental", "social", "governance", "neutral"],
|
45 |
-
"news_tone": ["positive", "negative", "neutral"]
|
46 |
-
}
|
47 |
-
|
48 |
-
# Cache Settings
|
49 |
-
CACHE_DIR = os.getenv("CACHE_DIR", ".cache")
|
50 |
-
CACHE_EXPIRY = int(os.getenv("CACHE_EXPIRY", "3600")) # 1 hour
|
51 |
-
CACHE_DURATION = int(os.getenv("CACHE_DURATION", "300")) # 5 minutes in seconds
|
52 |
-
|
53 |
-
# Audio Settings
|
54 |
-
AUDIO_OUTPUT_DIR = os.getenv("AUDIO_OUTPUT_DIR", "audio_output")
|
55 |
-
DEFAULT_LANG = os.getenv("DEFAULT_LANG", "hi") # Hindi
|
56 |
-
|
57 |
-
# News Sources
|
58 |
-
NEWS_SOURCES = {
|
59 |
-
# Major News Aggregators
|
60 |
-
"google": "https://www.google.com/search?q={}&tbm=nws",
|
61 |
-
"bing": "https://www.bing.com/news/search?q={}",
|
62 |
-
"yahoo": "https://news.search.yahoo.com/search?p={}",
|
63 |
-
|
64 |
-
# Financial News
|
65 |
-
"reuters": "https://www.reuters.com/search/news?blob={}",
|
66 |
-
"marketwatch": "https://www.marketwatch.com/search?q={}&ts=0&tab=All%20News",
|
67 |
-
"investing": "https://www.investing.com/search/?q={}&tab=news",
|
68 |
-
|
69 |
-
# Tech News
|
70 |
-
"techcrunch": "https://techcrunch.com/search/{}",
|
71 |
-
"zdnet": "https://www.zdnet.com/search/?q={}",
|
72 |
-
}
|
73 |
-
|
74 |
-
# Article limits
|
75 |
-
MIN_ARTICLES = 20
|
76 |
-
MAX_ARTICLES_PER_SOURCE = 10 # Adjusted for more sources
|
77 |
-
MAX_ARTICLES = 50 # Increased to accommodate more sources
|
78 |
-
|
79 |
-
# Browser Headers
|
80 |
-
HEADERS = {
|
81 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
82 |
-
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
83 |
-
"Accept-Language": "en-US,en;q=0.5",
|
84 |
-
"Connection": "keep-alive"
|
85 |
-
}
|
|
|
1 |
+
"""Configuration settings for the News Summarization application."""
|
2 |
+
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
# Load environment variables
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
# API Settings - Modified for direct processing
|
10 |
+
API_HOST = "localhost" # Changed from 0.0.0.0
|
11 |
+
API_PORT = 8501 # Changed to Streamlit's default port
|
12 |
+
API_BASE_URL = "http://localhost:8501" # Direct URL for local processing
|
13 |
+
|
14 |
+
# News Scraping Settings
|
15 |
+
ARTICLES_PER_SOURCE = int(os.getenv("ARTICLES_PER_SOURCE", "10"))
|
16 |
+
USER_AGENT = os.getenv("USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
17 |
+
|
18 |
+
# RSS Feed Settings
|
19 |
+
RSS_FEEDS = {
|
20 |
+
"BBC": "http://feeds.bbci.co.uk/news/business/rss.xml",
|
21 |
+
"CNN": "http://rss.cnn.com/rss/money_news_international.rss",
|
22 |
+
"FoxBusiness": "http://feeds.foxnews.com/foxbusiness/latest"
|
23 |
+
}
|
24 |
+
|
25 |
+
# Model Settings
|
26 |
+
SENTIMENT_MODEL = "yiyanghkust/finbert-tone" # More advanced financial sentiment model
|
27 |
+
SENTIMENT_FINE_GRAINED_MODEL = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
|
28 |
+
SUMMARIZATION_MODEL = "t5-base"
|
29 |
+
|
30 |
+
# Additional Fine-Grained Sentiment Models
|
31 |
+
FINE_GRAINED_MODELS = {
|
32 |
+
"financial": "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
|
33 |
+
"emotion": "j-hartmann/emotion-english-distilroberta-base",
|
34 |
+
"aspect": "yangheng/deberta-v3-base-absa-v1.1",
|
35 |
+
"esg": "yiyanghkust/finbert-esg",
|
36 |
+
"news_tone": "ProsusAI/finbert"
|
37 |
+
}
|
38 |
+
|
39 |
+
# Fine-Grained Sentiment Categories
|
40 |
+
SENTIMENT_CATEGORIES = {
|
41 |
+
"financial": ["positive", "negative", "neutral"],
|
42 |
+
"emotion": ["joy", "sadness", "anger", "fear", "surprise", "disgust", "neutral"],
|
43 |
+
"aspect": ["positive", "negative", "neutral"],
|
44 |
+
"esg": ["environmental", "social", "governance", "neutral"],
|
45 |
+
"news_tone": ["positive", "negative", "neutral"]
|
46 |
+
}
|
47 |
+
|
48 |
+
# Cache Settings
|
49 |
+
CACHE_DIR = os.getenv("CACHE_DIR", ".cache")
|
50 |
+
CACHE_EXPIRY = int(os.getenv("CACHE_EXPIRY", "3600")) # 1 hour
|
51 |
+
CACHE_DURATION = int(os.getenv("CACHE_DURATION", "300")) # 5 minutes in seconds
|
52 |
+
|
53 |
+
# Audio Settings
|
54 |
+
AUDIO_OUTPUT_DIR = os.getenv("AUDIO_OUTPUT_DIR", "audio_output")
|
55 |
+
DEFAULT_LANG = os.getenv("DEFAULT_LANG", "hi") # Hindi
|
56 |
+
|
57 |
+
# News Sources
|
58 |
+
NEWS_SOURCES = {
|
59 |
+
# Major News Aggregators
|
60 |
+
"google": "https://www.google.com/search?q={}&tbm=nws",
|
61 |
+
"bing": "https://www.bing.com/news/search?q={}",
|
62 |
+
"yahoo": "https://news.search.yahoo.com/search?p={}",
|
63 |
+
|
64 |
+
# Financial News
|
65 |
+
"reuters": "https://www.reuters.com/search/news?blob={}",
|
66 |
+
"marketwatch": "https://www.marketwatch.com/search?q={}&ts=0&tab=All%20News",
|
67 |
+
"investing": "https://www.investing.com/search/?q={}&tab=news",
|
68 |
+
|
69 |
+
# Tech News
|
70 |
+
"techcrunch": "https://techcrunch.com/search/{}",
|
71 |
+
"zdnet": "https://www.zdnet.com/search/?q={}",
|
72 |
+
}
|
73 |
+
|
74 |
+
# Article limits
|
75 |
+
MIN_ARTICLES = 20
|
76 |
+
MAX_ARTICLES_PER_SOURCE = 10 # Adjusted for more sources
|
77 |
+
MAX_ARTICLES = 50 # Increased to accommodate more sources
|
78 |
+
|
79 |
+
# Browser Headers
|
80 |
+
HEADERS = {
|
81 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
82 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
83 |
+
"Accept-Language": "en-US,en;q=0.5",
|
84 |
+
"Connection": "keep-alive"
|
85 |
+
}
|