Spaces:

sahilmohurale03
/

reddit_scraper

Running

App Files Files Community

sahilmohurale03 commited on Jan 8

Commit

50e4ef7

verified ·

1 Parent(s): b7f9b7e

Upload 2 files

Browse files

Files changed (2) hide show

app.py +83 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+# Install necessary libraries
+# pip install praw gradio pandas
+import praw
+import gradio as gr
+import os
+# Step 1: Set up Reddit API authentication using environment variables
+reddit = praw.Reddit(
+    client_id=os.getenv("REDDIT_CLIENT_ID"),  # Fetch from environment variables
+    client_secret=os.getenv("REDDIT_CLIENT_SECRET"),  # Fetch from environment variables
+    user_agent="my-reddit-app/1.0"  # User-agent can stay as is
+)
+# Step 2: Function to fetch posts from subreddits
+def get_posts(subreddits, keywords, time_filter="day"):
+    posts = []
+    for subreddit in subreddits.split(","):
+        subreddit_obj = reddit.subreddit(subreddit.strip())
+        for post in subreddit_obj.search(" OR ".join(keywords.split(",")), time_filter=time_filter):
+            posts.append({
+                "title": post.title,
+                "url": post.url,
+                "created": post.created_utc,
+                "selftext": post.selftext
+            })
+    return posts
+# Step 3: Function to filter posts based on keywords
+def filter_posts(posts, keywords):
+    filtered_posts = []
+    for post in posts:
+        if any(keyword.lower() in (post['title'] + post['selftext']).lower() for keyword in keywords.split(",")):
+            filtered_posts.append(post)
+    return filtered_posts
+# Step 4: Function to categorize posts into topics
+def categorize_posts(posts):
+    categories = {
+        "AI": ["AI", "machine learning", "deep learning"],
+        "Programming": ["Python", "JavaScript", "coding"],
+        "Tech": ["gadgets", "technology", "innovation"]
+    }
+    categorized_posts = {category: [] for category in categories}
+    for post in posts:
+        for category, keywords in categories.items():
+            if any(keyword.lower() in post['title'].lower() for keyword in keywords):
+                categorized_posts[category].append(post)
+    return categorized_posts
+# Step 5: Main function to process Reddit data
+def process_reddit(subreddits, keywords, time_filter):
+    posts = get_posts(subreddits, keywords, time_filter)
+    filtered = filter_posts(posts, keywords)
+    categorized = categorize_posts(filtered)
+    result = {}
+    for category, posts in categorized.items():
+        result[category] = [{"Title": post["title"], "URL": post["url"]} for post in posts]
+    return result
+# Step 6: Create Gradio interface
+interface = gr.Interface(
+    fn=process_reddit,
+    inputs=[
+        gr.Textbox(label="Subreddits (comma-separated)", placeholder="e.g., technology, python"),
+        gr.Textbox(label="Keywords (comma-separated)", placeholder="e.g., AI, Python, gadgets"),
+        gr.Dropdown(choices=["hour", "day", "week", "month", "year", "all"], value="day", label="Time Filter")
+    ],
+    outputs=gr.JSON(label="Categorized Posts"),
+    title="Reddit Post Filter",
+    description="Search specific subreddits for posts containing certain keywords and categorize them into topics."
+)
+# Launch the Gradio interface
+if __name__ == "__main__":
+    interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+praw
+gradio
+pandas