Petermoyano commited on
Commit
3d3f248
·
1 Parent(s): dc78e2e

implement RetrievalQA chain

Browse files
Files changed (3) hide show
  1. backend/core.py +32 -0
  2. consts.py +1 -0
  3. ingestion.py +5 -1
backend/core.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.chat_models import ChatOpenAI
4
+ from langchain.chains import RetrievalQA
5
+ from langchain.vectorstores import Pinecone
6
+ import pinecone
7
+
8
+ from consts import INDEX_NAME
9
+
10
+ # initialize pinecone client
11
+ pinecone.init(api_key=os.environ["PINECONE_API_KEY"],
12
+ environment=os.environ["PINECONE_ENVIRONMENT"])
13
+
14
+
15
+ def run_llm(query: str) -> any:
16
+ embeddings = OpenAIEmbeddings()
17
+
18
+ # instance of vector db
19
+ docsearch = Pinecone.from_existing_index(
20
+ index_name=INDEX_NAME, embedding=embeddings)
21
+
22
+ chat = ChatOpenAI(verbose=True, temperature=0)
23
+
24
+ # The RetrievalQA chain needs a retriever, which we can create by using the .as_retriever() method
25
+ qa = RetrievalQA.from_chain_type(
26
+ llm=chat, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True)
27
+
28
+ return qa({"query": query})
29
+
30
+
31
+ if __name__ == '__main__':
32
+ print(run_llm("What are the core modules of LangChain?"))
consts.py ADDED
@@ -0,0 +1 @@
 
 
1
+ INDEX_NAME = "langchain-docs-index"
ingestion.py CHANGED
@@ -9,6 +9,7 @@ from langchain.vectorstores import Pinecone
9
 
10
  import os
11
  import pinecone
 
12
 
13
  # initialize pinecone client
14
  pinecone.init(api_key=os.environ["PINECONE_API_KEY"],
@@ -37,7 +38,10 @@ def ingest_docs() -> None:
37
  documents = text_splitter.split_documents(documents=raw_documents)
38
 
39
  print(f"Split {len(documents)} documents into chunks")
 
40
  # Simple dictionary manipulation to change the source path of the documents, to a valid url.
 
 
41
  for doc in documents:
42
  old_path = doc.metadata["source"]
43
  new_url = old_path.replace("langchain-docs", "https:/")
@@ -49,7 +53,7 @@ def ingest_docs() -> None:
49
 
50
  # Take the chunks, imbed them into vectors and store them in the Pinecone vector database.
51
  Pinecone.from_documents(documents,
52
- embeddings, index_name="langchain-docs-index")
53
  print("*********Added documents to Pinecone*********")
54
 
55
 
 
9
 
10
  import os
11
  import pinecone
12
+ from consts import INDEX_NAME
13
 
14
  # initialize pinecone client
15
  pinecone.init(api_key=os.environ["PINECONE_API_KEY"],
 
38
  documents = text_splitter.split_documents(documents=raw_documents)
39
 
40
  print(f"Split {len(documents)} documents into chunks")
41
+
42
  # Simple dictionary manipulation to change the source path of the documents, to a valid url.
43
+ # This will enable us later to access what vectors (pages of langchain in this case) the RetrievalQA
44
+ # chain sent to the LLM as a "relveant" context.
45
  for doc in documents:
46
  old_path = doc.metadata["source"]
47
  new_url = old_path.replace("langchain-docs", "https:/")
 
53
 
54
  # Take the chunks, imbed them into vectors and store them in the Pinecone vector database.
55
  Pinecone.from_documents(documents,
56
+ embeddings, index_name=INDEX_NAME)
57
  print("*********Added documents to Pinecone*********")
58
 
59