File size: 3,093 Bytes
50e4ef7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Install necessary libraries
# pip install praw gradio pandas

import praw
import gradio as gr
import os

# Step 1: Set up Reddit API authentication using environment variables
reddit = praw.Reddit(
    client_id=os.getenv("REDDIT_CLIENT_ID"),  # Fetch from environment variables
    client_secret=os.getenv("REDDIT_CLIENT_SECRET"),  # Fetch from environment variables
    user_agent="my-reddit-app/1.0"  # User-agent can stay as is
)


# Step 2: Function to fetch posts from subreddits
def get_posts(subreddits, keywords, time_filter="day"):
    posts = []
    for subreddit in subreddits.split(","):
        subreddit_obj = reddit.subreddit(subreddit.strip())
        for post in subreddit_obj.search(" OR ".join(keywords.split(",")), time_filter=time_filter):
            posts.append({
                "title": post.title,
                "url": post.url,
                "created": post.created_utc,
                "selftext": post.selftext
            })
    return posts


# Step 3: Function to filter posts based on keywords
def filter_posts(posts, keywords):
    filtered_posts = []
    for post in posts:
        if any(keyword.lower() in (post['title'] + post['selftext']).lower() for keyword in keywords.split(",")):
            filtered_posts.append(post)
    return filtered_posts


# Step 4: Function to categorize posts into topics
def categorize_posts(posts):
    categories = {
        "AI": ["AI", "machine learning", "deep learning"],
        "Programming": ["Python", "JavaScript", "coding"],
        "Tech": ["gadgets", "technology", "innovation"]
    }
    categorized_posts = {category: [] for category in categories}

    for post in posts:
        for category, keywords in categories.items():
            if any(keyword.lower() in post['title'].lower() for keyword in keywords):
                categorized_posts[category].append(post)
    return categorized_posts


# Step 5: Main function to process Reddit data
def process_reddit(subreddits, keywords, time_filter):
    posts = get_posts(subreddits, keywords, time_filter)
    filtered = filter_posts(posts, keywords)
    categorized = categorize_posts(filtered)

    result = {}
    for category, posts in categorized.items():
        result[category] = [{"Title": post["title"], "URL": post["url"]} for post in posts]
    return result


# Step 6: Create Gradio interface
interface = gr.Interface(
    fn=process_reddit,
    inputs=[
        gr.Textbox(label="Subreddits (comma-separated)", placeholder="e.g., technology, python"),
        gr.Textbox(label="Keywords (comma-separated)", placeholder="e.g., AI, Python, gadgets"),
        gr.Dropdown(choices=["hour", "day", "week", "month", "year", "all"], value="day", label="Time Filter")
    ],
    outputs=gr.JSON(label="Categorized Posts"),
    title="Reddit Post Filter",
    description="Search specific subreddits for posts containing certain keywords and categorize them into topics."
)

# Launch the Gradio interface
if __name__ == "__main__":
    interface.launch()