Spaces:
Running
Running
Upload 2 files
Browse files- app.py +83 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Install necessary libraries
|
2 |
+
# pip install praw gradio pandas
|
3 |
+
|
4 |
+
import praw
|
5 |
+
import gradio as gr
|
6 |
+
import os
|
7 |
+
|
8 |
+
# Step 1: Set up Reddit API authentication using environment variables
|
9 |
+
reddit = praw.Reddit(
|
10 |
+
client_id=os.getenv("REDDIT_CLIENT_ID"), # Fetch from environment variables
|
11 |
+
client_secret=os.getenv("REDDIT_CLIENT_SECRET"), # Fetch from environment variables
|
12 |
+
user_agent="my-reddit-app/1.0" # User-agent can stay as is
|
13 |
+
)
|
14 |
+
|
15 |
+
|
16 |
+
# Step 2: Function to fetch posts from subreddits
|
17 |
+
def get_posts(subreddits, keywords, time_filter="day"):
|
18 |
+
posts = []
|
19 |
+
for subreddit in subreddits.split(","):
|
20 |
+
subreddit_obj = reddit.subreddit(subreddit.strip())
|
21 |
+
for post in subreddit_obj.search(" OR ".join(keywords.split(",")), time_filter=time_filter):
|
22 |
+
posts.append({
|
23 |
+
"title": post.title,
|
24 |
+
"url": post.url,
|
25 |
+
"created": post.created_utc,
|
26 |
+
"selftext": post.selftext
|
27 |
+
})
|
28 |
+
return posts
|
29 |
+
|
30 |
+
|
31 |
+
# Step 3: Function to filter posts based on keywords
|
32 |
+
def filter_posts(posts, keywords):
|
33 |
+
filtered_posts = []
|
34 |
+
for post in posts:
|
35 |
+
if any(keyword.lower() in (post['title'] + post['selftext']).lower() for keyword in keywords.split(",")):
|
36 |
+
filtered_posts.append(post)
|
37 |
+
return filtered_posts
|
38 |
+
|
39 |
+
|
40 |
+
# Step 4: Function to categorize posts into topics
|
41 |
+
def categorize_posts(posts):
|
42 |
+
categories = {
|
43 |
+
"AI": ["AI", "machine learning", "deep learning"],
|
44 |
+
"Programming": ["Python", "JavaScript", "coding"],
|
45 |
+
"Tech": ["gadgets", "technology", "innovation"]
|
46 |
+
}
|
47 |
+
categorized_posts = {category: [] for category in categories}
|
48 |
+
|
49 |
+
for post in posts:
|
50 |
+
for category, keywords in categories.items():
|
51 |
+
if any(keyword.lower() in post['title'].lower() for keyword in keywords):
|
52 |
+
categorized_posts[category].append(post)
|
53 |
+
return categorized_posts
|
54 |
+
|
55 |
+
|
56 |
+
# Step 5: Main function to process Reddit data
|
57 |
+
def process_reddit(subreddits, keywords, time_filter):
|
58 |
+
posts = get_posts(subreddits, keywords, time_filter)
|
59 |
+
filtered = filter_posts(posts, keywords)
|
60 |
+
categorized = categorize_posts(filtered)
|
61 |
+
|
62 |
+
result = {}
|
63 |
+
for category, posts in categorized.items():
|
64 |
+
result[category] = [{"Title": post["title"], "URL": post["url"]} for post in posts]
|
65 |
+
return result
|
66 |
+
|
67 |
+
|
68 |
+
# Step 6: Create Gradio interface
|
69 |
+
interface = gr.Interface(
|
70 |
+
fn=process_reddit,
|
71 |
+
inputs=[
|
72 |
+
gr.Textbox(label="Subreddits (comma-separated)", placeholder="e.g., technology, python"),
|
73 |
+
gr.Textbox(label="Keywords (comma-separated)", placeholder="e.g., AI, Python, gadgets"),
|
74 |
+
gr.Dropdown(choices=["hour", "day", "week", "month", "year", "all"], value="day", label="Time Filter")
|
75 |
+
],
|
76 |
+
outputs=gr.JSON(label="Categorized Posts"),
|
77 |
+
title="Reddit Post Filter",
|
78 |
+
description="Search specific subreddits for posts containing certain keywords and categorize them into topics."
|
79 |
+
)
|
80 |
+
|
81 |
+
# Launch the Gradio interface
|
82 |
+
if __name__ == "__main__":
|
83 |
+
interface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
praw
|
2 |
+
gradio
|
3 |
+
pandas
|