Spaces:

hiddenVariable
/

voc_bot

Sleeping

App Files Files Community

hiddenVariable commited on Oct 1, 2024

Commit

486fa79

verified ·

1 Parent(s): d0971fa

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

.gitignore +2 -0
README.md +5 -9
__pycache__/rag.cpython-311.pyc +0 -0
front_end.py +24 -0
rag.py +80 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .venv
2	+ .env

README.md CHANGED Viewed

@@ -1,12 +1,8 @@
 ---
-title: Voc Bot
-emoji: 😻
-colorFrom: red
-colorTo: gray
 sdk: gradio
-sdk_version: 4.44.1
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: voc_bot
+app_file: front_end.py
 sdk: gradio
+sdk_version: 4.42.0
 ---
+# voc_bot
+RAG bot on data scraped by data scraping crews

__pycache__/rag.cpython-311.pyc ADDED Viewed

Binary file (4.82 kB). View file

front_end.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import gradio as gr
+from rag import mongo_rag_tool
+from gradio.themes.base import Base
+# Create an instance of GradIO
+with gr.Blocks(theme=Base(), title="Market Research and VOC bot") as demo:
+    gr.Markdown(
+        """
+        # VOC App using mined data
+        """)
+    textbox = gr.Textbox(label="Enter your Question:")
+    with gr.Row():
+        button = gr.Button("Submit", variant="primary")
+    with gr.Column():
+        output1 = gr.Textbox(lines=1, max_lines=10, label="Answer:")
+        output2 = gr.Textbox(lines=1, max_lines=10, label="Sources:")
+#    Call query_data function upon clicking the Submit button
+    button.click(mongo_rag_tool, textbox, outputs=[output1, output2])
+demo.launch(share=True)

rag.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+from langchain_openai import OpenAIEmbeddings
+from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
+from langchain_core.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain_openai import ChatOpenAI
+import logging
+from dotenv import load_dotenv
+load_dotenv()
+INDEX_NAME = "vector_index"
+DATABASE_NAME = "scraped_data_db"
+def mongo_rag_tool(query: str) -> str:
+    """
+    This function is used to retrieve documents from a MongoDB database and then use the RAG model to answer the query.
+    The documents that are most semantically close to the query are returned.
+    args:
+        query: str: The query that you want to use to retrieve documents
+        collection_name: str: The name of the collection in the MongoDB database
+        output_filename: str: The name of the output file where the results will be saved
+    returns:
+        str: The answer to the query
+    """
+    try:
+        collection_name = os.getenv("MONGODB_COLLECTION_NAME")
+        # Connect to the MongoDB database
+        openai_api_key = os.getenv("OPENAI_API_KEY")
+        embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, disallowed_special=(), model="text-embedding-3-small")
+        uri = os.getenv("MONGO_CONNECTION_STRING")
+        logging.info("Creating the mongo vector search object")
+        vector_search = MongoDBAtlasVectorSearch.from_connection_string(
+            uri,
+            DATABASE_NAME + "." + collection_name,
+            embeddings,
+            index_name=INDEX_NAME,
+        )
+        logging.info("Retrieving the documents and answering the query")
+        # Retrieve the documents that are most semantically close to the query, exclude ones that are less similar than the threshold
+        post_filter = [{"$project": {"_id": 0,"text": 1,"source": 1,"score":1,"embedding":1}}]
+        qa_retriever = vector_search.as_retriever(
+            search_type="mmr",
+            search_kwargs={"k": 10, 'fetch_k':100, "post_filter_pipeline": post_filter},
+        )
+        prompt_template = """Use the following pieces of context to answer the question at the end.
+            If you don't know the answer, just say that you don't know, don't try to make up an answer.
+            If you know the answer give a comprehensive, detailed and insightful answer.
+            {context}
+            Question: {question}
+            """
+        PROMPT = PromptTemplate(
+            template=prompt_template, input_variables=["context", "question"]
+        )
+        qa = RetrievalQA.from_chain_type(
+            llm=ChatOpenAI(api_key=openai_api_key, model="gpt-4o", temperature=0.2),
+            chain_type="stuff",
+            retriever=qa_retriever,
+            return_source_documents=True,
+            chain_type_kwargs={"prompt": PROMPT},
+        )
+        docs = qa.invoke({"query": query})
+        if docs:
+            logging.info("Saving the retrieved documents")
+            sources = docs["source_documents"]
+            source_list = [{"content":result.page_content, "source":result.metadata["source"]} for result in sources]
+            formatted_sources = "\n".join([f"Content: {source['content']}\nSource: {source['source']}\n" for source in source_list])
+            return docs["result"], formatted_sources
+    except Exception as e:
+        logging.error(f"An error occurred: {str(e)}")
+        return f"An error occurred: {str(e)}", "An error occurred: {str(e)}"
+#mongo_rag_tool("What do people think about caterpillar vision link fleet management app")