proKBD commited on
Commit
20d02ac
·
verified ·
1 Parent(s): bf4ee4b

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +85 -85
config.py CHANGED
@@ -1,85 +1,85 @@
1
- """Configuration settings for the News Summarization application."""
2
-
3
- import os
4
- from dotenv import load_dotenv
5
-
6
- # Load environment variables
7
- load_dotenv()
8
-
9
- # API Settings
10
- API_HOST = os.getenv("API_HOST", "0.0.0.0")
11
- API_PORT = int(os.getenv("API_PORT", "8005"))
12
- API_BASE_URL = os.getenv("API_BASE_URL", f"http://{API_HOST}:{API_PORT}")
13
-
14
- # News Scraping Settings
15
- ARTICLES_PER_SOURCE = int(os.getenv("ARTICLES_PER_SOURCE", "10"))
16
- USER_AGENT = os.getenv("USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
17
-
18
- # RSS Feed Settings
19
- RSS_FEEDS = {
20
- "BBC": "http://feeds.bbci.co.uk/news/business/rss.xml",
21
- "CNN": "http://rss.cnn.com/rss/money_news_international.rss",
22
- "FoxBusiness": "http://feeds.foxnews.com/foxbusiness/latest"
23
- }
24
-
25
- # Model Settings
26
- SENTIMENT_MODEL = "yiyanghkust/finbert-tone" # More advanced financial sentiment model
27
- SENTIMENT_FINE_GRAINED_MODEL = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
28
- SUMMARIZATION_MODEL = "t5-base"
29
-
30
- # Additional Fine-Grained Sentiment Models
31
- FINE_GRAINED_MODELS = {
32
- "financial": "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
33
- "emotion": "j-hartmann/emotion-english-distilroberta-base",
34
- "aspect": "yangheng/deberta-v3-base-absa-v1.1",
35
- "esg": "yiyanghkust/finbert-esg",
36
- "news_tone": "ProsusAI/finbert"
37
- }
38
-
39
- # Fine-Grained Sentiment Categories
40
- SENTIMENT_CATEGORIES = {
41
- "financial": ["positive", "negative", "neutral"],
42
- "emotion": ["joy", "sadness", "anger", "fear", "surprise", "disgust", "neutral"],
43
- "aspect": ["positive", "negative", "neutral"],
44
- "esg": ["environmental", "social", "governance", "neutral"],
45
- "news_tone": ["positive", "negative", "neutral"]
46
- }
47
-
48
- # Cache Settings
49
- CACHE_DIR = os.getenv("CACHE_DIR", ".cache")
50
- CACHE_EXPIRY = int(os.getenv("CACHE_EXPIRY", "3600")) # 1 hour
51
- CACHE_DURATION = int(os.getenv("CACHE_DURATION", "300")) # 5 minutes in seconds
52
-
53
- # Audio Settings
54
- AUDIO_OUTPUT_DIR = os.getenv("AUDIO_OUTPUT_DIR", "audio_output")
55
- DEFAULT_LANG = os.getenv("DEFAULT_LANG", "hi") # Hindi
56
-
57
- # News Sources
58
- NEWS_SOURCES = {
59
- # Major News Aggregators
60
- "google": "https://www.google.com/search?q={}&tbm=nws",
61
- "bing": "https://www.bing.com/news/search?q={}",
62
- "yahoo": "https://news.search.yahoo.com/search?p={}",
63
-
64
- # Financial News
65
- "reuters": "https://www.reuters.com/search/news?blob={}",
66
- "marketwatch": "https://www.marketwatch.com/search?q={}&ts=0&tab=All%20News",
67
- "investing": "https://www.investing.com/search/?q={}&tab=news",
68
-
69
- # Tech News
70
- "techcrunch": "https://techcrunch.com/search/{}",
71
- "zdnet": "https://www.zdnet.com/search/?q={}",
72
- }
73
-
74
- # Article limits
75
- MIN_ARTICLES = 20
76
- MAX_ARTICLES_PER_SOURCE = 10 # Adjusted for more sources
77
- MAX_ARTICLES = 50 # Increased to accommodate more sources
78
-
79
- # Browser Headers
80
- HEADERS = {
81
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
82
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
83
- "Accept-Language": "en-US,en;q=0.5",
84
- "Connection": "keep-alive"
85
- }
 
1
+ """Configuration settings for the News Summarization application."""
2
+
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
+
9
+ # API Settings - Modified for direct processing
10
+ API_HOST = "localhost" # Changed from 0.0.0.0
11
+ API_PORT = 8501 # Changed to Streamlit's default port
12
+ API_BASE_URL = "http://localhost:8501" # Direct URL for local processing
13
+
14
+ # News Scraping Settings
15
+ ARTICLES_PER_SOURCE = int(os.getenv("ARTICLES_PER_SOURCE", "10"))
16
+ USER_AGENT = os.getenv("USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
17
+
18
+ # RSS Feed Settings
19
+ RSS_FEEDS = {
20
+ "BBC": "http://feeds.bbci.co.uk/news/business/rss.xml",
21
+ "CNN": "http://rss.cnn.com/rss/money_news_international.rss",
22
+ "FoxBusiness": "http://feeds.foxnews.com/foxbusiness/latest"
23
+ }
24
+
25
+ # Model Settings
26
+ SENTIMENT_MODEL = "yiyanghkust/finbert-tone" # More advanced financial sentiment model
27
+ SENTIMENT_FINE_GRAINED_MODEL = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
28
+ SUMMARIZATION_MODEL = "t5-base"
29
+
30
+ # Additional Fine-Grained Sentiment Models
31
+ FINE_GRAINED_MODELS = {
32
+ "financial": "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
33
+ "emotion": "j-hartmann/emotion-english-distilroberta-base",
34
+ "aspect": "yangheng/deberta-v3-base-absa-v1.1",
35
+ "esg": "yiyanghkust/finbert-esg",
36
+ "news_tone": "ProsusAI/finbert"
37
+ }
38
+
39
+ # Fine-Grained Sentiment Categories
40
+ SENTIMENT_CATEGORIES = {
41
+ "financial": ["positive", "negative", "neutral"],
42
+ "emotion": ["joy", "sadness", "anger", "fear", "surprise", "disgust", "neutral"],
43
+ "aspect": ["positive", "negative", "neutral"],
44
+ "esg": ["environmental", "social", "governance", "neutral"],
45
+ "news_tone": ["positive", "negative", "neutral"]
46
+ }
47
+
48
+ # Cache Settings
49
+ CACHE_DIR = os.getenv("CACHE_DIR", ".cache")
50
+ CACHE_EXPIRY = int(os.getenv("CACHE_EXPIRY", "3600")) # 1 hour
51
+ CACHE_DURATION = int(os.getenv("CACHE_DURATION", "300")) # 5 minutes in seconds
52
+
53
+ # Audio Settings
54
+ AUDIO_OUTPUT_DIR = os.getenv("AUDIO_OUTPUT_DIR", "audio_output")
55
+ DEFAULT_LANG = os.getenv("DEFAULT_LANG", "hi") # Hindi
56
+
57
+ # News Sources
58
+ NEWS_SOURCES = {
59
+ # Major News Aggregators
60
+ "google": "https://www.google.com/search?q={}&tbm=nws",
61
+ "bing": "https://www.bing.com/news/search?q={}",
62
+ "yahoo": "https://news.search.yahoo.com/search?p={}",
63
+
64
+ # Financial News
65
+ "reuters": "https://www.reuters.com/search/news?blob={}",
66
+ "marketwatch": "https://www.marketwatch.com/search?q={}&ts=0&tab=All%20News",
67
+ "investing": "https://www.investing.com/search/?q={}&tab=news",
68
+
69
+ # Tech News
70
+ "techcrunch": "https://techcrunch.com/search/{}",
71
+ "zdnet": "https://www.zdnet.com/search/?q={}",
72
+ }
73
+
74
+ # Article limits
75
+ MIN_ARTICLES = 20
76
+ MAX_ARTICLES_PER_SOURCE = 10 # Adjusted for more sources
77
+ MAX_ARTICLES = 50 # Increased to accommodate more sources
78
+
79
+ # Browser Headers
80
+ HEADERS = {
81
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
82
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
83
+ "Accept-Language": "en-US,en;q=0.5",
84
+ "Connection": "keep-alive"
85
+ }