Spaces:
Sleeping
Sleeping
8shanrahan2
commited on
Commit
Β·
42b5a1a
0
Parent(s):
Initial commit π
Browse files- agent.py +19 -0
- app.py +24 -0
- readme.MD +9 -0
- renderer.py +6 -0
- requirements.txt +6 -0
- rss_reader.py +21 -0
- templates/summary.html +0 -0
agent.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
|
3 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
4 |
+
classifier = pipeline("zero-shot-classification")
|
5 |
+
|
6 |
+
def analyze_article(text, title, link):
|
7 |
+
summary = summarizer(text[:1024], max_length=200, min_length=50, do_sample=False)[0]['summary_text']
|
8 |
+
relevance = classifier(
|
9 |
+
summary,
|
10 |
+
candidate_labels=["Useful for data analytics team", "Not useful"],
|
11 |
+
multi_label=False
|
12 |
+
)
|
13 |
+
is_useful = relevance["labels"][0] == "Useful for data analytics team"
|
14 |
+
return {
|
15 |
+
"title": title,
|
16 |
+
"summary": summary,
|
17 |
+
"link": link,
|
18 |
+
"useful": is_useful
|
19 |
+
}
|
app.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from rss_reader import fetch_articles
|
3 |
+
from agent import analyze_article
|
4 |
+
from renderer import render_html
|
5 |
+
|
6 |
+
def process_rss(rss_input):
|
7 |
+
rss_urls = [url.strip() for url in rss_input.strip().splitlines() if url.strip()]
|
8 |
+
raw_articles = fetch_articles(rss_urls)
|
9 |
+
analyzed = [analyze_article(a['text'], a['title'], a['link']) for a in raw_articles]
|
10 |
+
return render_html(analyzed)
|
11 |
+
|
12 |
+
default_rss = """https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml
|
13 |
+
https://www.reutersagency.com/feed/?best-sectors=technology"""
|
14 |
+
|
15 |
+
iface = gr.Interface(
|
16 |
+
fn=process_rss,
|
17 |
+
inputs=gr.Textbox(label="Enter RSS URLs (one per line)", value=default_rss, lines=4),
|
18 |
+
outputs=gr.HTML(label="π GenAI RSS Digest"),
|
19 |
+
title="GenAI RSS Summarizer",
|
20 |
+
description="Summarizes articles and detects whatβs useful to analytics teams"
|
21 |
+
)
|
22 |
+
|
23 |
+
if __name__ == "__main__":
|
24 |
+
iface.launch()
|
readme.MD
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# π€ GenAI RSS Summarizer
|
2 |
+
|
3 |
+
Summarizes RSS feeds using Hugging Face models and renders as a digest with relevance checks for data teams.
|
4 |
+
|
5 |
+
## To Run Locally
|
6 |
+
|
7 |
+
```bash
|
8 |
+
pip install -r requirements.txt
|
9 |
+
python app.py
|
renderer.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from jinja2 import Template
|
2 |
+
|
3 |
+
def render_html(summaries):
|
4 |
+
with open("templates/summary.html") as f:
|
5 |
+
template = Template(f.read())
|
6 |
+
return template.render(summaries=summaries)
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
transformers
|
3 |
+
feedparser
|
4 |
+
newspaper3k
|
5 |
+
jinja2
|
6 |
+
torch
|
rss_reader.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import feedparser
|
2 |
+
from newspaper import Article
|
3 |
+
|
4 |
+
def fetch_articles(rss_urls, limit=3):
|
5 |
+
articles = []
|
6 |
+
for url in rss_urls:
|
7 |
+
feed = feedparser.parse(url)
|
8 |
+
for entry in feed.entries[:limit]:
|
9 |
+
try:
|
10 |
+
article = Article(entry.link)
|
11 |
+
article.download()
|
12 |
+
article.parse()
|
13 |
+
articles.append({
|
14 |
+
"title": entry.title,
|
15 |
+
"link": entry.link,
|
16 |
+
"text": article.text,
|
17 |
+
"published": entry.published
|
18 |
+
})
|
19 |
+
except:
|
20 |
+
continue
|
21 |
+
return articles
|
templates/summary.html
ADDED
File without changes
|