Awal4203 commited on
Commit
af427ba
·
verified ·
1 Parent(s): 177e4ad

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -188
app.py DELETED
@@ -1,188 +0,0 @@
1
-
2
- import time
3
- import sqlite3
4
- import feedparser
5
- from newspaper import Article, Config
6
- import nltk
7
- from nltk.sentiment.vader import SentimentIntensityAnalyzer
8
- import gradio as gr
9
-
10
- # Download the VADER lexicon if not already downloaded.
11
- nltk.download('vader_lexicon')
12
-
13
- #############################################
14
- # Database Setup (SQLite)
15
- #############################################
16
-
17
- # Connect to (or create) the SQLite database.
18
- # Use check_same_thread=False for use within Gradio's threaded server.
19
- conn = sqlite3.connect('crypto_news.db', check_same_thread=False)
20
- cursor = conn.cursor()
21
-
22
- # Create a table to store articles. The URL field is UNIQUE to avoid duplicates.
23
- cursor.execute('''
24
- CREATE TABLE IF NOT EXISTS articles (
25
- id INTEGER PRIMARY KEY AUTOINCREMENT,
26
- source TEXT,
27
- title TEXT,
28
- url TEXT UNIQUE,
29
- content TEXT,
30
- sentiment REAL,
31
- published TEXT,
32
- fetched_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
33
- )
34
- ''')
35
- conn.commit()
36
-
37
- #############################################
38
- # Sentiment Analysis Setup
39
- #############################################
40
-
41
- # Initialize NLTK's VADER sentiment analyzer.
42
- sid = SentimentIntensityAnalyzer()
43
-
44
- def analyze_sentiment(text):
45
- """Return the compound sentiment score for the given text."""
46
- scores = sid.polarity_scores(text)
47
- return scores['compound']
48
-
49
- def store_article(source, title, url, content, sentiment, published):
50
- """Insert an article into the database (ignoring duplicates)."""
51
- try:
52
- cursor.execute('''
53
- INSERT INTO articles (source, title, url, content, sentiment, published)
54
- VALUES (?, ?, ?, ?, ?, ?)
55
- ''', (source, title, url, content, sentiment, published))
56
- conn.commit()
57
- print(f"Stored article: {title}")
58
- except sqlite3.IntegrityError:
59
- print(f"Article already exists in DB: {title}")
60
-
61
- #############################################
62
- # Article Fetching with Newspaper3k
63
- #############################################
64
-
65
- def fetch_article(url):
66
- """
67
- Download and parse a news article using Newspaper3k.
68
- Returns the parsed article object, or None on error.
69
- """
70
- try:
71
- # Set a browser-like user agent to avoid 403 errors.
72
- user_agent = (
73
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
74
- 'AppleWebKit/537.36 (KHTML, like Gecko) '
75
- 'Chrome/90.0.4430.93 Safari/537.36'
76
- )
77
- config = Config()
78
- config.browser_user_agent = user_agent
79
-
80
- article = Article(url, config=config)
81
- article.download()
82
- article.parse()
83
- return article
84
- except Exception as e:
85
- print(f"Error fetching article from {url}: {e}")
86
- return None
87
-
88
- #############################################
89
- # RSS Feed Processing
90
- #############################################
91
-
92
- def fetch_feed(feed_url, source_name):
93
- """
94
- Parse an RSS feed and process each entry: download the full article,
95
- analyze its sentiment, and store it in the database.
96
- """
97
- feed = feedparser.parse(feed_url)
98
- if feed.bozo:
99
- print(f"Error parsing feed from {source_name}: {feed.bozo_exception}")
100
- return
101
-
102
- for entry in feed.entries:
103
- title = entry.get('title', 'No Title')
104
- url = entry.get('link', None)
105
- published = entry.get('published', '')
106
- if not url:
107
- continue
108
-
109
- article = fetch_article(url)
110
- if article and article.text:
111
- content = article.text
112
- sentiment = analyze_sentiment(content)
113
- store_article(source_name, title, url, content, sentiment, published)
114
- else:
115
- print(f"Skipping article (no content): {title}")
116
-
117
- def update_feeds():
118
- """
119
- Loop over a list of free crypto news RSS feeds and fetch articles.
120
- """
121
- feeds = [
122
- {'url': 'https://www.coindesk.com/arc/outboundfeeds/rss/?outputType=xml', 'source': 'CoinDesk'},
123
- {'url': 'https://cointelegraph.com/rss', 'source': 'Cointelegraph'},
124
- {'url': 'https://cryptoslate.com/feed/', 'source': 'CryptoSlate'},
125
- {'url': 'https://www.newsbtc.com/feed/', 'source': 'NewsBTC'},
126
- {'url': 'https://decrypt.co/feed', 'source': 'Decrypt'},
127
- ]
128
- for feed in feeds:
129
- print(f"Fetching feed from {feed['source']}...")
130
- fetch_feed(feed['url'], feed['source'])
131
-
132
- #############################################
133
- # Retrieve and Format Latest News for Display
134
- #############################################
135
-
136
- def get_latest_news():
137
- """
138
- Retrieve the latest 10 articles from the database and format them into an HTML string.
139
- """
140
- cursor.execute("""
141
- SELECT source, title, url, sentiment, published
142
- FROM articles
143
- ORDER BY fetched_at DESC
144
- LIMIT 10
145
- """)
146
- articles = cursor.fetchall()
147
-
148
- html_str = "<h2>Latest Crypto News</h2>"
149
- if not articles:
150
- html_str += "<p>No articles found. Try fetching news first.</p>"
151
- else:
152
- for article in articles:
153
- source, title, url, sentiment, published = article
154
- html_str += "<div style='margin-bottom:20px; border-bottom:1px solid #ccc; padding-bottom:10px;'>"
155
- html_str += f"<h3>{title}</h3>"
156
- html_str += f"<p><strong>Source:</strong> {source} | "
157
- html_str += f"<strong>Published:</strong> {published} | "
158
- html_str += f"<strong>Sentiment:</strong> {sentiment}</p>"
159
- html_str += f"<p><a href='{url}' target='_blank'>Read full article</a></p>"
160
- html_str += "</div>"
161
- return html_str
162
-
163
- #############################################
164
- # Gradio Callback Function
165
- #############################################
166
-
167
- def fetch_and_display_news():
168
- """
169
- Update the database by fetching articles from all RSS feeds,
170
- then return an HTML-formatted list of the latest articles.
171
- """
172
- update_feeds()
173
- return get_latest_news()
174
-
175
- #############################################
176
- # Gradio Interface
177
- #############################################
178
-
179
- iface = gr.Interface(
180
- fn=fetch_and_display_news,
181
- inputs=[], # No inputs: clicking the button triggers fetching.
182
- outputs="html",
183
- title="Crypto News Dashboard",
184
- description="Click the button to fetch the latest cryptocurrency news from free news sources."
185
- )
186
-
187
- if __name__ == "__main__":
188
- iface.launch()