hiddenVariable commited on
Commit
486fa79
·
verified ·
1 Parent(s): d0971fa

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. README.md +5 -9
  3. __pycache__/rag.cpython-311.pyc +0 -0
  4. front_end.py +24 -0
  5. rag.py +80 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .venv
2
+ .env
README.md CHANGED
@@ -1,12 +1,8 @@
1
  ---
2
- title: Voc Bot
3
- emoji: 😻
4
- colorFrom: red
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.44.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: voc_bot
3
+ app_file: front_end.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.42.0
 
 
6
  ---
7
+ # voc_bot
8
+ RAG bot on data scraped by data scraping crews
__pycache__/rag.cpython-311.pyc ADDED
Binary file (4.82 kB). View file
 
front_end.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from rag import mongo_rag_tool
3
+ from gradio.themes.base import Base
4
+
5
+ # Create an instance of GradIO
6
+
7
+
8
+ with gr.Blocks(theme=Base(), title="Market Research and VOC bot") as demo:
9
+ gr.Markdown(
10
+ """
11
+ # VOC App using mined data
12
+ """)
13
+ textbox = gr.Textbox(label="Enter your Question:")
14
+ with gr.Row():
15
+ button = gr.Button("Submit", variant="primary")
16
+ with gr.Column():
17
+ output1 = gr.Textbox(lines=1, max_lines=10, label="Answer:")
18
+ output2 = gr.Textbox(lines=1, max_lines=10, label="Sources:")
19
+
20
+ # Call query_data function upon clicking the Submit button
21
+
22
+ button.click(mongo_rag_tool, textbox, outputs=[output1, output2])
23
+
24
+ demo.launch(share=True)
rag.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_openai import OpenAIEmbeddings
3
+ from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
4
+ from langchain_core.prompts import PromptTemplate
5
+ from langchain.chains import RetrievalQA
6
+ from langchain_openai import ChatOpenAI
7
+ import logging
8
+ from dotenv import load_dotenv
9
+
10
+ load_dotenv()
11
+
12
+
13
+ INDEX_NAME = "vector_index"
14
+ DATABASE_NAME = "scraped_data_db"
15
+
16
+ def mongo_rag_tool(query: str) -> str:
17
+ """
18
+ This function is used to retrieve documents from a MongoDB database and then use the RAG model to answer the query.
19
+ The documents that are most semantically close to the query are returned.
20
+ args:
21
+ query: str: The query that you want to use to retrieve documents
22
+ collection_name: str: The name of the collection in the MongoDB database
23
+ output_filename: str: The name of the output file where the results will be saved
24
+ returns:
25
+ str: The answer to the query
26
+ """
27
+ try:
28
+ collection_name = os.getenv("MONGODB_COLLECTION_NAME")
29
+ # Connect to the MongoDB database
30
+ openai_api_key = os.getenv("OPENAI_API_KEY")
31
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, disallowed_special=(), model="text-embedding-3-small")
32
+
33
+ uri = os.getenv("MONGO_CONNECTION_STRING")
34
+ logging.info("Creating the mongo vector search object")
35
+ vector_search = MongoDBAtlasVectorSearch.from_connection_string(
36
+ uri,
37
+ DATABASE_NAME + "." + collection_name,
38
+ embeddings,
39
+ index_name=INDEX_NAME,
40
+ )
41
+
42
+ logging.info("Retrieving the documents and answering the query")
43
+ # Retrieve the documents that are most semantically close to the query, exclude ones that are less similar than the threshold
44
+ post_filter = [{"$project": {"_id": 0,"text": 1,"source": 1,"score":1,"embedding":1}}]
45
+ qa_retriever = vector_search.as_retriever(
46
+ search_type="mmr",
47
+ search_kwargs={"k": 10, 'fetch_k':100, "post_filter_pipeline": post_filter},
48
+ )
49
+
50
+ prompt_template = """Use the following pieces of context to answer the question at the end.
51
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
52
+ If you know the answer give a comprehensive, detailed and insightful answer.
53
+ {context}
54
+ Question: {question}
55
+ """
56
+ PROMPT = PromptTemplate(
57
+ template=prompt_template, input_variables=["context", "question"]
58
+ )
59
+ qa = RetrievalQA.from_chain_type(
60
+ llm=ChatOpenAI(api_key=openai_api_key, model="gpt-4o", temperature=0.2),
61
+ chain_type="stuff",
62
+ retriever=qa_retriever,
63
+ return_source_documents=True,
64
+ chain_type_kwargs={"prompt": PROMPT},
65
+ )
66
+
67
+ docs = qa.invoke({"query": query})
68
+
69
+ if docs:
70
+ logging.info("Saving the retrieved documents")
71
+ sources = docs["source_documents"]
72
+
73
+ source_list = [{"content":result.page_content, "source":result.metadata["source"]} for result in sources]
74
+ formatted_sources = "\n".join([f"Content: {source['content']}\nSource: {source['source']}\n" for source in source_list])
75
+ return docs["result"], formatted_sources
76
+ except Exception as e:
77
+ logging.error(f"An error occurred: {str(e)}")
78
+ return f"An error occurred: {str(e)}", "An error occurred: {str(e)}"
79
+
80
+ #mongo_rag_tool("What do people think about caterpillar vision link fleet management app")