antoineross commited on
Commit
fd03f1c
·
1 Parent(s): f3b82e9

first commit

Browse files
Files changed (11) hide show
  1. .env.example +3 -0
  2. .gitignore +7 -0
  3. Dockerfile +29 -0
  4. OAI_CONFIG_LIST +6 -0
  5. README.md +46 -5
  6. app.py +364 -0
  7. articles.txt +1 -0
  8. chainlit.md +3 -0
  9. newsAPI.py +59 -0
  10. redditAPI.py +59 -0
  11. requirements.txt +5 -0
.env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ OPENAI_API_KEY=
2
+ NEWSAPI_API_KEY=
3
+ REDDIT_ACCESS_TOKEN=
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ .env
2
+ .cache
3
+ __pycache__
4
+ poetry.lock
5
+ *.yaml
6
+ *.toml
7
+ *.lock
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ # Create a non-root user with home directory
4
+ RUN useradd -m -u 1000 user
5
+
6
+ # Set user and environment variables
7
+ USER user
8
+ ENV HOME=/home/user \
9
+ PATH=/home/user/.local/bin:$PATH
10
+
11
+ # Set the working directory in the container
12
+ WORKDIR $HOME/app
13
+
14
+ # Copy the requirements.txt file to the container
15
+ COPY requirements.txt $HOME/app/
16
+
17
+ # Install Python dependencies from requirements.txt
18
+ RUN pip install --user -r $HOME/app/requirements.txt
19
+
20
+ # Copy the application files, including app.py
21
+ COPY --chown=user:user . $HOME/app/
22
+
23
+ # Ensure user has write permission to the app directory
24
+ USER root
25
+ RUN chown -R user:user $HOME/app
26
+ USER user
27
+
28
+ # Specify the command to run your application
29
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
OAI_CONFIG_LIST ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "model": "gpt-4-1106-preview",
4
+ "api_key": "OPENAI_API_KEY"
5
+ }
6
+ ]
README.md CHANGED
@@ -1,11 +1,52 @@
1
  ---
2
- title: Autogen Article Generator
3
- emoji: 🐢
4
- colorFrom: red
5
- colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Soap Opera
3
+ emoji: 🔥
4
+ colorFrom: indigo
5
+ colorTo: green
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
9
  ---
10
 
11
+ # Installation and Setup
12
+ You will need Python, Conda, Docker (Optional for code-execution), Git, and a text editor installed.
13
+
14
+ First install python=3.11 and other 3rd party dependencies. If you have conda installed, you can run the following commands:
15
+
16
+ ```shell
17
+ conda create --name demo python=3.11 -y
18
+ conda activate demo
19
+
20
+ pip install -r requirements.txt
21
+ ```
22
+
23
+ If you do not have conda installed but have virtualenv installed, you can run the following commands:
24
+ ```shell
25
+ pip install virtualenv
26
+ virtualenv demo -p python3.
27
+
28
+ # on windows
29
+ demo\Scripts\activate
30
+ # on mac/linux
31
+ source demo/bin/activate
32
+
33
+ pip install -r requirements.txt
34
+ ```
35
+
36
+ # Configure the environment variables
37
+
38
+ # Usage
39
+ Run the following command to start the chat interface.
40
+
41
+ ```shell
42
+ chainlit run app.py
43
+ ```
44
+
45
+ # File Structure
46
+
47
+ This is an example of using the chainlit chat interface with multi-agent conversation between agents to complete a tasks.
48
+
49
+ The tool was developed to grab SAP data online and then process it to easily digestible human language.
50
+
51
+ `app.py` - Starts the chat interface.
52
+
app.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This is the source code for the webscraper agents that use ChainLit
3
+ Features:
4
+ - Uses top N google search based on a keyword, create a JSON file, uploads it to Google Cloud Object Storage or Locally
5
+ - Capable of asking Reddit for questions
6
+ - Continuous messaging
7
+ - Multithreading
8
+ Written by: Antoine Ross - October 2023.
9
+ """
10
+
11
+ import os
12
+ from typing import Dict, Optional, Union
13
+ from dotenv import load_dotenv, find_dotenv
14
+
15
+ import chainlit as cl
16
+ from chainlit.client.base import ConversationDict
17
+ from chainlit.types import AskFileResponse
18
+ from langchain.document_loaders import PyPDFLoader, TextLoader
19
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
20
+ from langchain.chains import ConversationalRetrievalChain
21
+
22
+ import autogen
23
+ from autogen import Agent, AssistantAgent, UserProxyAgent, config_list_from_json
24
+ from redditAPI import grab_articles
25
+
26
+ load_dotenv(find_dotenv())
27
+
28
+ CONTEXT = """- Task: ArticleGPT specializes in creating SEO-optimized articles specifically tailored for [Medium.com](http://medium.com/). The target audience are people who are looking to get started with learning AI & Machine Learning and LLM’s for their use-case.
29
+
30
+
31
+ Output Specifications:
32
+
33
+ • Output Style and Format: Craft articles that fit [Medium.com](http://medium.com/)'s style, being straightforward and concise. Ensure grammatical accuracy, coherence, and stylistic refinement. Use hooks and effective whitespace management to maintain reader's attention. Humor and Sarcasm: Including a directive for humor and sarcasm to utilize rhetorical devices, which could make the text feel more human-like.
34
+
35
+ • Tone: The tone is conversational and likable, similar to Morgan Freeman's speech style.
36
+
37
+ • Titles and Subheadings: Create titles and subheadings that are Impactful, concise and effectively capturing the content's essence.
38
+
39
+ • Titles: 5-9 words, with numbers for higher click-through rates. Prefer negative or neutral tones.
40
+
41
+ • Headlines: Structure in two parts, main and sub-headline.
42
+
43
+ • Subheadings: Spark curiosity with questions, action words, and numbers; emphasize benefits.
44
+
45
+ • Content balancing simplicity, engagement, and SEO optimization for Medium.
46
+
47
+
48
+
49
+ Sample output:
50
+
51
+ Title
52
+
53
+ Subheading 1
54
+
55
+ paragraph 1: Explain concisely the core of the article. How it can be useful for their use-case. (2-3 sentences)
56
+
57
+ Subheading 2
58
+
59
+ paragraph 2: Tell the readers how doing/having three things can dramatically improve results. (1-2 sentences)
60
+
61
+ [3 bullet points or 3 numbered list to support paragraph 2]
62
+
63
+ paragraph 3: summarize the bullet points and how it can be useful for the reader. (1-2 sentences)
64
+
65
+ Subheading 3
66
+
67
+ paragraph 4: Concluding Anecdote or Opinion: Requesting a final 'personal' touch is intended to leave the reader with a sense of individual perspective, something that machine-generated text often lacks. (2-3 sentences)
68
+ """
69
+
70
+ # Agents
71
+ USER_PROXY_NAME = "User Proxy"
72
+ PROOF_READER = "Proofreader"
73
+ WRITER = "Writer"
74
+ EMOTIONAL_STRATEGIST = "Emotional Impact Strategist"
75
+ NARRATIVE_DESIGNER = "Narrative Designer"
76
+ STYLIST = "Style Specialist"
77
+ ARTICLES = None
78
+
79
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=8192, chunk_overlap=100)
80
+
81
+ def load_articles(file_path):
82
+ try:
83
+ with open(file_path, 'r') as file:
84
+ article = file.read()
85
+ return article
86
+ except FileNotFoundError:
87
+ print("File not found")
88
+ return None
89
+
90
+ # Function to process the file
91
+ def process_file(file: AskFileResponse):
92
+ import tempfile
93
+
94
+ if file.type == "text/plain":
95
+ Loader = TextLoader
96
+ elif file.type == "application/pdf":
97
+ Loader = PyPDFLoader
98
+
99
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tempfile:
100
+ if file.type == "text/plain":
101
+ tempfile.write(file.content)
102
+ elif file.type == "application/pdf":
103
+ with open(tempfile.name, "wb") as f:
104
+ f.write(file.content)
105
+
106
+ loader = Loader(tempfile.name)
107
+ documents = loader.load()
108
+ docs = text_splitter.split_documents(documents)
109
+ for i, doc in enumerate(docs):
110
+ doc.metadata["source"] = f"source_{i}"
111
+ cl.user_session.set("docs", docs)
112
+ return docs
113
+
114
+ async def ask_helper(func, **kwargs):
115
+ res = await func(**kwargs).send()
116
+ while not res:
117
+ res = await func(**kwargs).send()
118
+ return res
119
+
120
+ class ChainlitAssistantAgent(AssistantAgent):
121
+ """
122
+ Wrapper for AutoGens Assistant Agent
123
+ """
124
+ def send(
125
+ self,
126
+ message: Union[Dict, str],
127
+ recipient: Agent,
128
+ request_reply: Optional[bool] = None,
129
+ silent: Optional[bool] = False,
130
+ ) -> bool:
131
+ cl.run_sync(
132
+ cl.Message(
133
+ content=f'*Sending message to "{recipient.name}":*\n\n{message}',
134
+ author=self.name,
135
+ ).send()
136
+ )
137
+ super(ChainlitAssistantAgent, self).send(
138
+ message=message,
139
+ recipient=recipient,
140
+ request_reply=request_reply,
141
+ silent=silent,
142
+ )
143
+ class ChainlitUserProxyAgent(UserProxyAgent):
144
+ """
145
+ Wrapper for AutoGens UserProxy Agent. Simplifies the UI by adding CL Actions.
146
+ """
147
+ def get_human_input(self, prompt: str) -> str:
148
+ if prompt.startswith(
149
+ "Provide feedback to chat_manager. Press enter to skip and use auto-reply"
150
+ ):
151
+ res = cl.run_sync(
152
+ ask_helper(
153
+ cl.AskActionMessage,
154
+ content="Continue or provide feedback?",
155
+ actions=[
156
+ cl.Action( name="continue", value="continue", label="✅ Continue" ),
157
+ cl.Action( name="feedback",value="feedback", label="💬 Provide feedback"),
158
+ cl.Action( name="exit",value="exit", label="🔚 Exit Conversation" )
159
+ ],
160
+ )
161
+ )
162
+ if res.get("value") == "continue":
163
+ return ""
164
+ if res.get("value") == "exit":
165
+ return "exit"
166
+
167
+ reply = cl.run_sync(ask_helper(cl.AskUserMessage, content=prompt, timeout=60))
168
+
169
+ return reply["content"].strip()
170
+
171
+ def send(
172
+ self,
173
+ message: Union[Dict, str],
174
+ recipient: Agent,
175
+ request_reply: Optional[bool] = None,
176
+ silent: Optional[bool] = False,
177
+ ):
178
+ cl.run_sync(
179
+ cl.Message(
180
+ content=f'*Sending message to "{recipient.name}"*:\n\n{message}',
181
+ author=self.name,
182
+ ).send()
183
+ )
184
+ super(ChainlitUserProxyAgent, self).send(
185
+ message=message,
186
+ recipient=recipient,
187
+ request_reply=request_reply,
188
+ silent=silent,
189
+ )
190
+
191
+ config_list = autogen.config_list_from_dotenv(
192
+ dotenv_file_path='.env',
193
+ model_api_key_map={
194
+ "gpt-3.5-turbo": "OPENAI_API_KEY",
195
+ },
196
+ filter_dict={
197
+ "model": {
198
+ "gpt-3.5-turbo",
199
+ }
200
+ }
201
+ )
202
+
203
+ @cl.action_callback("confirm_action")
204
+ async def on_action(action: cl.Action):
205
+ if action.value == "everything":
206
+ content = "everything"
207
+ elif action.value == "top-headlines":
208
+ content = "top_headlines"
209
+ else:
210
+ await cl.ErrorMessage(content="Invalid action").send()
211
+ return
212
+
213
+ prev_msg = cl.user_session.get("url_actions") # type: cl.Message
214
+ if prev_msg:
215
+ await prev_msg.remove_actions()
216
+ cl.user_session.set("url_actions", None)
217
+
218
+ await cl.Message(content=content).send()
219
+
220
+ @cl.on_chat_start
221
+ async def on_chat_start():
222
+ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
223
+
224
+
225
+ try:
226
+ # app_user = cl.user_session.get("user")
227
+ # await cl.Message(f"Hello {app_user.username}").send()
228
+ # config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
229
+ llm_config = {"config_list": config_list, "api_key": OPENAI_API_KEY, "seed": 42, "request_timeout": 60, "retry_wait_time": 60}
230
+ proof_reader = ChainlitAssistantAgent(
231
+ name="Proof_Reader", llm_config=llm_config,
232
+ system_message="""Proofreader. Focuses on grammatical accuracy and stylistic refinement, ensuring that articles meet Medium.com's standards.
233
+ Enhances clarity and coherence while maintaining a conversational, likable tone akin to Morgan Freeman's speech style.
234
+ Assures the use of effective hooks and whitespace management to keep the reader's attention.
235
+ Ensures articles are straightforward, concise, and free of filler words, with minimal use of humor and sarcasm."""
236
+ )
237
+ writer = ChainlitAssistantAgent(
238
+ name="Writer", llm_config=llm_config,
239
+ system_message="""Writer. Develops SEO-optimized, engaging content tailored for Medium.com's audience interested in AI & Machine Learning.
240
+ Writes with a conversational and likable tone, ensuring simplicity and engagement.
241
+ Crafts impactful, concise titles and subheadings, with titles of 5-9 words incorporating numbers, and negative or neutral tones.
242
+ Structures content with effective subheadings and bullet points to facilitate reader understanding and engagement."""
243
+ )
244
+ narrative_designer = ChainlitAssistantAgent(
245
+ name="Narrative_Designer", llm_config=llm_config,
246
+ system_message="""Narrative Designer. Structures the article to maintain engagement and curiosity, using questions, action words, and numbers in subheadings.
247
+ Collaborates with the Writer and Emotional Impact Strategist to ensure the narrative is clear, concise, and resonates with the target audience.
248
+ Advises on the narrative flow to maintain reader interest and optimize for SEO."""
249
+ )
250
+ stylist = ChainlitAssistantAgent(
251
+ name="Style_Specialist", llm_config=llm_config,
252
+ system_message="""Style Specialist. Refines tone and style to be conversational and likable, aligning with the Morgan Freeman style.
253
+ Ensures the use of effective rhetoric, including minimal humor and sarcasm, to enhance readability and engagement.
254
+ Collaborates with the Writer and Proofreader to ensure stylistic consistency throughout the article."""
255
+ )
256
+ emotional_impact_strategist = ChainlitAssistantAgent(
257
+ name="Emotional_Strategist", llm_config=llm_config,
258
+ system_message="""Develops strategies for titles and subheadings that are impactful, concise, and evoke curiosity.
259
+ Advises on incorporating emotional cues that resonate with the audience's interests in AI and Machine Learning.
260
+ Collaborates with the Narrative Designer and Style Specialist to ensure a unified approach in content framing.
261
+ """
262
+ )
263
+ user_proxy = ChainlitUserProxyAgent(
264
+ name="User_Proxy",
265
+ human_input_mode="ALWAYS",
266
+ llm_config=llm_config,
267
+ # max_consecutive_auto_reply=3,
268
+ # is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
269
+ code_execution_config=False,
270
+ system_message="""User Proxy. Provides feedback on the article's effectiveness in engaging readers interested in AI and Machine Learning.
271
+ Ensures the article meets overall objectives and resonates with the intended audience.
272
+ Relays audience and management preferences to the team for necessary adjustments."""
273
+ )
274
+
275
+ cl.user_session.set(USER_PROXY_NAME, user_proxy)
276
+ cl.user_session.set(PROOF_READER, proof_reader)
277
+ cl.user_session.set(WRITER, writer)
278
+ cl.user_session.set(STYLIST, stylist)
279
+ cl.user_session.set(NARRATIVE_DESIGNER, narrative_designer)
280
+ cl.user_session.set(EMOTIONAL_STRATEGIST, emotional_impact_strategist)
281
+
282
+ doc = cl.Action( name="doc", value="doc", label="Document" )
283
+ no_doc = cl.Action( name="no_doc", value="no_doc", label="NoDocument" )
284
+ idea = cl.Action( name="Idea", value="Idea", label="Idea" )
285
+ no_idea = cl.Action( name="NoIdea", value="NoIdea", label="NoIdea" )
286
+ idea_actions = [idea, no_idea]
287
+ doc_actions = [doc, no_doc]
288
+
289
+ IDEA_option = cl.AskActionMessage(
290
+ content="Hi, let’s generate some Article ideas. Would you like to generate ideas from Reddit, or continue?",
291
+ actions=idea_actions,
292
+ )
293
+ await IDEA_option.send()
294
+
295
+ IDEA_option = IDEA_option.content.split()[-1]
296
+ if IDEA_option == "Idea":
297
+ print("Using document...")
298
+ TOPIC = None
299
+ while TOPIC is None:
300
+ TOPIC = await cl.AskUserMessage(content="What topic would you like to make an Article about? [Only send one keyword.]", timeout=180).send()
301
+
302
+ print("Topic: ", TOPIC['content'])
303
+ msg = cl.Message(
304
+ content=f"Processing data from Reddit...", disable_human_feedback=True
305
+ )
306
+ await msg.send()
307
+
308
+ articles = grab_articles(TOPIC['content'])
309
+ msg = cl.Message(
310
+ content=f"Content from Reddit loaded: \n{articles}", disable_human_feedback=True
311
+ )
312
+ await msg.send()
313
+ elif IDEA_option == "NoIdea":
314
+ article_path = "articles.txt"
315
+ articles = load_articles(article_path)
316
+ print("Articles grabbed.")
317
+
318
+ msg = cl.Message(content=f"Processing `{articles}`...", disable_human_feedback=True, author="User_Proxy")
319
+ await msg.send()
320
+
321
+ cl.user_session.set(ARTICLES, articles)
322
+ print("Articles set...")
323
+
324
+ msg = cl.Message(content=f"""This is the Article Generation Team, please give a topic to create an Article about.""",
325
+ disable_human_feedback=True,
326
+ author="User_Proxy")
327
+ await msg.send()
328
+
329
+ except Exception as e:
330
+ print("Error: ", e)
331
+ pass
332
+
333
+ @cl.on_message
334
+ async def run_conversation(message: cl.Message):
335
+ #try:
336
+ MESSAGE = message.content
337
+ print("Task: ", MESSAGE)
338
+ proof_reader = cl.user_session.get(PROOF_READER)
339
+ user_proxy = cl.user_session.get(USER_PROXY_NAME)
340
+ writer = cl.user_session.get(WRITER)
341
+ stylist = cl.user_session.get(STYLIST)
342
+ narrative_designer = cl.user_session.get(NARRATIVE_DESIGNER)
343
+ emotional_impact_strategist = cl.user_session.get(EMOTIONAL_STRATEGIST)
344
+ articles = cl.user_session.get(ARTICLES)
345
+
346
+ groupchat = autogen.GroupChat(agents=[user_proxy, proof_reader, writer,stylist, narrative_designer,emotional_impact_strategist ], messages=[], max_round=50)
347
+ manager = autogen.GroupChatManager(groupchat=groupchat)
348
+
349
+ print("Initiated GC messages... \nGC messages length: ", len(groupchat.messages))
350
+
351
+ if len(groupchat.messages) == 0:
352
+ message = f"""Use this content as background for the articles you will make: {articles}.
353
+ First create 10 ideas, then 5, then 3, then 1.
354
+ Finalize the ideas with the planner and make sure to follow the criteria of choosing based on: "What will be the most dramatic, emotional and entertaining idea".
355
+ Do not express gratitude in responses.
356
+ \nThe topic of the article will be about: """ + MESSAGE + """The final output should look like: \n""" + CONTEXT
357
+ await cl.Message(content=f"""Starting agents on task of creating a Article...""").send()
358
+ await cl.make_async(user_proxy.initiate_chat)( manager, message=message, )
359
+ else:
360
+ await cl.make_async(user_proxy.send)( manager, message=MESSAGE, )
361
+
362
+ # except Exception as e:
363
+ # print("Error: ", e)
364
+ # pass
articles.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ [Topic to help the Article generation]
chainlit.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Autogen Article Generator
2
+
3
+ This is an example of using the chainlit chat interface with multi-agent conversation between agents to make sophisticated articles.
newsAPI.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ from dotenv import load_dotenv
4
+ import os
5
+
6
+ load_dotenv()
7
+
8
+ API_KEY = 'NEWSAPI_API_KEY'
9
+
10
+ # Functions ------------------------------------------------------------------------------------------------------------->
11
+ def extract_article_info_from_list(article_list):
12
+ try:
13
+ # Initialize a string to store the extracted information
14
+ extracted_info = ""
15
+
16
+ # Extract and store the desired information for each article
17
+ for article in article_list:
18
+ description = article.get("description", "N/A")
19
+ title = article.get("title", "N/A")
20
+ author = article.get("author", "N/A")
21
+ content = article.get("content", "N/A")
22
+
23
+ # Append the information to the string
24
+ extracted_info += f"Title: {title}\nAuthor: {author}\nDescription: {description}\nContent: {content}\n\n"
25
+
26
+ return extracted_info
27
+
28
+ except Exception as e:
29
+ # Handle any exceptions
30
+ return f"An error occurred: {str(e)}"
31
+
32
+ def grab_articles(url, **kwargs):
33
+ response = requests.get(url, 5)
34
+
35
+ if response.status_code == 200:
36
+ extracted_article = extract_article_info_from_list(response.json()['articles'])
37
+ print(extracted_article)
38
+ return extracted_article
39
+ else:
40
+ raise Exception(f"Error: {response.status_code}, {response.text}")
41
+
42
+ # URL Configuration Here ------------------------------------------------------------------------------------------------------------->
43
+
44
+ BASE_URL_EVERYTHING = 'https://newsapi.org/v2/everything?'
45
+ BASE_URL_TOP_HEADLINES = 'https://newsapi.org/v2/top-headlines?'
46
+ TOPIC = 'bitcoin'
47
+ FROM = '2023-01-08' # Starting date
48
+ TO = '2023-11-08' # Ending date
49
+ SORTBY = 'popularity' # Relevancy, Popularity, PublishedAt
50
+ DOMAINS = 'techcrunch.com, thenextweb.com' # Domains you want to use.
51
+ COUNTRY = 'gb&' # Options: US, AU, JP, PH etc.
52
+ SOURCES = 'bbc-news'
53
+ CATEGORY = 'business' # Options: business, entertainment, general health, science, sports, technology
54
+
55
+ # Everything URL list
56
+ urls_everything = f"{BASE_URL_EVERYTHING}q={TOPIC}&domains={DOMAINS}&sortBy={SORTBY}&apiKey={API_KEY}"
57
+
58
+ # Top Headlines URL list
59
+ urls_top_headlines = f"{BASE_URL_TOP_HEADLINES}topic={TOPIC}&country={COUNTRY}&category={CATEGORY}&sources={SOURCES}&apiKey={API_KEY}"
redditAPI.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from dotenv import load_dotenv
3
+ import os
4
+
5
+ load_dotenv()
6
+
7
+ access_token = os.getenv('REDDIT_ACCESS_TOKEN')
8
+
9
+ headers = {
10
+ 'Authorization': f'bearer {access_token}',
11
+ 'User-Agent': 'MyAPI/0.0.1'
12
+ }
13
+
14
+ # Subreddits of interest.
15
+ subreddits = [
16
+ 'artificialintelligence', 'machinelearning', 'indiehacking',
17
+ ]
18
+
19
+ # Function to fetch top 3 comments from a post
20
+ def fetch_top_comments(post_id):
21
+ url = f'https://oauth.reddit.com/comments/{post_id}'
22
+ response = requests.get(url, headers=headers)
23
+ if response.status_code == 200:
24
+ post_data = response.json()
25
+ comments = post_data[1]['data']['children']
26
+ top_comments = [comment['data']['body'] for comment in comments[:1]] # Get top 3 comments
27
+ return top_comments
28
+ else:
29
+ return []
30
+
31
+ def grab_articles(topic):
32
+ data = []
33
+ print("Topic: ", topic)
34
+ for subreddit in subreddits:
35
+ # Search for the topic within each subreddit. Increase limit to get more posts.
36
+ search_url = f'https://oauth.reddit.com/r/{subreddit}/search?q={topic}&restrict_sr=on&sort=hot&limit=1'
37
+ res = requests.get(search_url, headers=headers)
38
+
39
+ if res.status_code == 200:
40
+ posts_data = res.json()['data']['children']
41
+
42
+ for post in posts_data:
43
+ post_data = post['data']
44
+ post_id = post_data['id']
45
+ title = post_data['title']
46
+ selftext = post_data['selftext']
47
+ top_comments = fetch_top_comments(post_id)
48
+ data.append({
49
+ 'subreddit': subreddit,
50
+ 'title': title,
51
+ 'post_content': selftext,
52
+ 'top_comments': top_comments
53
+ })
54
+ else:
55
+ print(f"Failed to fetch data from {subreddit}")
56
+
57
+ # Example output
58
+ print(data)
59
+ return data
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ chainlit==0.7.501
2
+ pyautogen==0.1.13
3
+ python-dotenv==1.0.0
4
+ requests==2.31.0
5
+ langchain==0.0.322