Spaces:

Petermoyano
/

langchain-docs-chatbot

Sleeping

Petermoyano commited on Sep 9, 2023

Commit

3d3f248

1 Parent(s): dc78e2e

implement RetrievalQA chain

Files changed (3) hide show

backend/core.py ADDED Viewed

+import os
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.chat_models import ChatOpenAI
+from langchain.chains import RetrievalQA
+from langchain.vectorstores import Pinecone
+import pinecone
+from consts import INDEX_NAME
+# initialize pinecone client
+pinecone.init(api_key=os.environ["PINECONE_API_KEY"],
+              environment=os.environ["PINECONE_ENVIRONMENT"])
+def run_llm(query: str) -> any:
+    embeddings = OpenAIEmbeddings()
+    # instance of vector db
+    docsearch = Pinecone.from_existing_index(
+        index_name=INDEX_NAME, embedding=embeddings)
+    chat = ChatOpenAI(verbose=True, temperature=0)
+    # The RetrievalQA chain needs a retriever, which we can create by using the .as_retriever() method
+    qa = RetrievalQA.from_chain_type(
+        llm=chat, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True)
+    return qa({"query": query})
+if __name__ == '__main__':
+    print(run_llm("What are the core modules of LangChain?"))

consts.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ INDEX_NAME = "langchain-docs-index"

ingestion.py CHANGED Viewed

@@ -9,6 +9,7 @@ from langchain.vectorstores import Pinecone
 import os
 import pinecone
 # initialize pinecone client
 pinecone.init(api_key=os.environ["PINECONE_API_KEY"],
@@ -37,7 +38,10 @@ def ingest_docs() -> None:
     documents = text_splitter.split_documents(documents=raw_documents)
     print(f"Split {len(documents)} documents into chunks")
     # Simple dictionary manipulation to change the source path of the documents, to a valid url.
     for doc in documents:
         old_path = doc.metadata["source"]
         new_url = old_path.replace("langchain-docs", "https:/")
@@ -49,7 +53,7 @@ def ingest_docs() -> None:
     # Take the chunks, imbed them into vectors and store them in the Pinecone vector database.
     Pinecone.from_documents(documents,
-                            embeddings, index_name="langchain-docs-index")
     print("*********Added documents to Pinecone*********")

 import os
 import pinecone
+from consts import INDEX_NAME
 # initialize pinecone client
 pinecone.init(api_key=os.environ["PINECONE_API_KEY"],
     documents = text_splitter.split_documents(documents=raw_documents)
     print(f"Split {len(documents)} documents into chunks")
     # Simple dictionary manipulation to change the source path of the documents, to a valid url.
+    # This will enable us later to access what vectors (pages of langchain in this case) the RetrievalQA
+    # chain sent to the LLM as a "relveant" context.
     for doc in documents:
         old_path = doc.metadata["source"]
         new_url = old_path.replace("langchain-docs", "https:/")
     # Take the chunks, imbed them into vectors and store them in the Pinecone vector database.
     Pinecone.from_documents(documents,
+                            embeddings, index_name=INDEX_NAME)
     print("*********Added documents to Pinecone*********")