perf: update prompt and clean the code
Browse files- app.py +0 -12
- chainlit.md +9 -1
- prompts.py +13 -19
- scrape_data.py +3 -2
- utils.py +10 -4
app.py
CHANGED
@@ -52,29 +52,17 @@ retriever = ParentDocumentRetriever(
|
|
52 |
|
53 |
@cl.on_chat_start
|
54 |
async def on_chat_start():
|
55 |
-
|
56 |
cl.user_session.set("retriever", retriever)
|
57 |
|
58 |
-
msg = cl.Message(
|
59 |
-
content=f"Vous pouvez poser vos questions sur les articles de SIKAFINANCE",
|
60 |
-
)
|
61 |
-
await msg.send()
|
62 |
-
|
63 |
|
64 |
@cl.on_message
|
65 |
async def on_message(message: cl.Message):
|
66 |
-
|
67 |
-
# retriever = cl.user_session.get("retriever")
|
68 |
-
|
69 |
chain = prompt | model
|
70 |
-
|
71 |
msg = cl.Message(content="")
|
72 |
|
73 |
async with cl.Step(type="run", name="QA Assistant"):
|
74 |
-
|
75 |
question = message.content
|
76 |
context = format_docs(retriever.get_relevant_documents(question))
|
77 |
-
|
78 |
async for chunk in chain.astream(
|
79 |
input={"context": context, "question": question},
|
80 |
config=RunnableConfig(
|
|
|
52 |
|
53 |
@cl.on_chat_start
|
54 |
async def on_chat_start():
|
|
|
55 |
cl.user_session.set("retriever", retriever)
|
56 |
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
@cl.on_message
|
59 |
async def on_message(message: cl.Message):
|
|
|
|
|
|
|
60 |
chain = prompt | model
|
|
|
61 |
msg = cl.Message(content="")
|
62 |
|
63 |
async with cl.Step(type="run", name="QA Assistant"):
|
|
|
64 |
question = message.content
|
65 |
context = format_docs(retriever.get_relevant_documents(question))
|
|
|
66 |
async for chunk in chain.astream(
|
67 |
input={"context": context, "question": question},
|
68 |
config=RunnableConfig(
|
chainlit.md
CHANGED
@@ -2,4 +2,12 @@
|
|
2 |
|
3 |
FinChat est un chatbot conçu par [data354](https://data354.com/) pour répondre aux questions sur l'actualité économique et financière.
|
4 |
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
FinChat est un chatbot conçu par [data354](https://data354.com/) pour répondre aux questions sur l'actualité économique et financière.
|
4 |
|
5 |
+
## Débuter une conversation
|
6 |
+
|
7 |
+
Exemple de questions pour débuter une conversation
|
8 |
+
|
9 |
+
>**Donne moi un résumé de l'actualité économique**
|
10 |
+
>**Quels sont les points les plus critiques de l'actualité économique ?**
|
11 |
+
|
12 |
+
Voilà, c'est fait ! Vous pouvez maintenant posez vos questions 💻😊.
|
13 |
+
|
prompts.py
CHANGED
@@ -1,28 +1,22 @@
|
|
1 |
from langchain.prompts import ChatPromptTemplate
|
2 |
|
3 |
template = """
|
4 |
-
|
5 |
-
|
6 |
-
-
|
7 |
-
|
8 |
-
-
|
9 |
-
|
10 |
-
-
|
11 |
-
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
- Pour des question en relative à la date veuillez considerer qu'aujourd'hui est le Jeudi 11/04/2024. Par exemple pour repondre à une question sur l'actualité du jour, vous devez effectuer une comparaison entre les date de publications des articles et celle d'aujourdui pour filtrer sur les articles puis retourner les informations pertinantes.
|
17 |
-
|
18 |
-
<contexte>
|
19 |
-
``{context}``
|
20 |
-
</contexte>
|
21 |
|
22 |
<question>
|
23 |
-
{question}
|
24 |
</question>
|
25 |
-
|
26 |
"""
|
27 |
|
28 |
prompt = ChatPromptTemplate.from_template(template)
|
|
|
1 |
from langchain.prompts import ChatPromptTemplate
|
2 |
|
3 |
template = """
|
4 |
+
You are an economic and financial research assistant, specially designed to answer questions related to economics and finance. Your role is to analyze the potential short- and long-term economic and financial impacts of the events present in the articles provided in context. Discuss how it could affect different stakeholders, including businesses, consumers and investors. Outline possible market reactions and other economic implications.
|
5 |
+
Your role is to analyze the economic and financial news articles provided to you in context, and to respond appropriately to users' specific questions. When answering questions :
|
6 |
+
- For general questions such as "What's the news of the day?", summarize the key information contained in the articles provided to you in context.
|
7 |
+
- For specific questions, such as "What's the trend in the stock market today?", look for information specific to the question in the articles.
|
8 |
+
-Don't hesitate to use your knowledge and common sense to answer questions.
|
9 |
+
- Cite references clearly, including article titles, publication dates and any other relevant details.
|
10 |
+
- If the question goes beyond the scope of the documents provided, or if you cannot find relevant information, politely indicate that the answer cannot be determined on the basis of the available sources.
|
11 |
+
- Answer users in the language of their question. If the question is in French, your answer must be in French. If the question is in English, your answer must be in English.
|
12 |
+
|
13 |
+
<context>
|
14 |
+
{context}
|
15 |
+
</context>
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
<question>
|
18 |
+
{question}
|
19 |
</question>
|
|
|
20 |
"""
|
21 |
|
22 |
prompt = ChatPromptTemplate.from_template(template)
|
scrape_data.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import os
|
2 |
from datetime import date, timedelta
|
3 |
|
4 |
import bs4
|
@@ -110,7 +109,9 @@ def process_docs(
|
|
110 |
# print("Successfully loaded to document")
|
111 |
|
112 |
# This text splitter is used to create the child documents
|
113 |
-
child_splitter = RecursiveCharacterTextSplitter(
|
|
|
|
|
114 |
|
115 |
# The vectorstore to use to index the child chunks
|
116 |
vectorstore = Chroma(
|
|
|
|
|
1 |
from datetime import date, timedelta
|
2 |
|
3 |
import bs4
|
|
|
109 |
# print("Successfully loaded to document")
|
110 |
|
111 |
# This text splitter is used to create the child documents
|
112 |
+
child_splitter = RecursiveCharacterTextSplitter(
|
113 |
+
chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n"]
|
114 |
+
)
|
115 |
|
116 |
# The vectorstore to use to index the child chunks
|
117 |
vectorstore = Chroma(
|
utils.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import chainlit as cl
|
2 |
import tiktoken
|
3 |
from langchain.callbacks.base import BaseCallbackHandler
|
@@ -13,7 +15,10 @@ def format_docs(documents, max_context_size=100000, separator="\n\n"):
|
|
13 |
source = doc.metadata["link"]
|
14 |
title = doc.metadata["title"]
|
15 |
context += (
|
16 |
-
f"Article: {title}\n"
|
|
|
|
|
|
|
17 |
)
|
18 |
return context
|
19 |
|
@@ -46,6 +51,7 @@ class PostMessageHandler(BaseCallbackHandler):
|
|
46 |
self.msg.content += f"\nSources: {', '.join(source_names)}"
|
47 |
|
48 |
def clean_text(text):
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
import chainlit as cl
|
4 |
import tiktoken
|
5 |
from langchain.callbacks.base import BaseCallbackHandler
|
|
|
15 |
source = doc.metadata["link"]
|
16 |
title = doc.metadata["title"]
|
17 |
context += (
|
18 |
+
f"Article: {title}\n"
|
19 |
+
+ doc.page_content
|
20 |
+
+ f"\nSource: {source}"
|
21 |
+
+ separator
|
22 |
)
|
23 |
return context
|
24 |
|
|
|
51 |
self.msg.content += f"\nSources: {', '.join(source_names)}"
|
52 |
|
53 |
def clean_text(text):
|
54 |
+
text = re.sub("[Tt]weet", "", text) # type: ignore
|
55 |
+
text = re.sub(r"\ +", " ", text)
|
56 |
+
text = re.sub(r"\n+", "\n", text)
|
57 |
+
return text.strip()
|