Petermoyano commited on
Commit
2e85929
·
1 Parent(s): f701cfe

Pipfile and docsLoader

Browse files
Files changed (5) hide show
  1. .gitignore +2 -1
  2. .vscode/settings.json +26 -0
  3. Pipfile +24 -0
  4. Pipfile.lock +0 -0
  5. ingestion.py +17 -0
.gitignore CHANGED
@@ -1 +1,2 @@
1
- .env
 
 
1
+ .env
2
+ /langchain-docs
.vscode/settings.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "workbench.colorCustomizations": {
3
+ "activityBar.activeBackground": "#fa1b49",
4
+ "activityBar.background": "#fa1b49",
5
+ "activityBar.foreground": "#e7e7e7",
6
+ "activityBar.inactiveForeground": "#e7e7e799",
7
+ "activityBarBadge.background": "#155e02",
8
+ "activityBarBadge.foreground": "#e7e7e7",
9
+ "commandCenter.border": "#e7e7e799",
10
+ "sash.hoverBorder": "#fa1b49",
11
+ "statusBar.background": "#dd0531",
12
+ "statusBar.foreground": "#e7e7e7",
13
+ "statusBarItem.hoverBackground": "#fa1b49",
14
+ "statusBarItem.remoteBackground": "#dd0531",
15
+ "statusBarItem.remoteForeground": "#e7e7e7",
16
+ "titleBar.activeBackground": "#dd0531",
17
+ "titleBar.activeForeground": "#e7e7e7",
18
+ "titleBar.inactiveBackground": "#dd053199",
19
+ "titleBar.inactiveForeground": "#e7e7e799"
20
+ },
21
+ "peacock.remoteColor": "#dd0531",
22
+ "[python]": {
23
+ "editor.defaultFormatter": "ms-python.autopep8"
24
+ },
25
+ "python.formatting.provider": "none"
26
+ }
Pipfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [[source]]
2
+ url = "https://pypi.org/simple"
3
+ verify_ssl = true
4
+ name = "pypi"
5
+
6
+ [packages]
7
+ langchain = "*"
8
+ beautifulsoup4 = "*"
9
+ tiktoken = "*"
10
+ openai = "*"
11
+ pinecone-client = "*"
12
+ unstructured = "*"
13
+ nltk = "*"
14
+ fastapi = "*"
15
+ jinja2 = "*"
16
+ uvicorn = "*"
17
+ streamlit = "*"
18
+ streamlit-chat = "*"
19
+ tqdm = "*"
20
+
21
+ [dev-packages]
22
+
23
+ [requires]
24
+ python_version = "3.10"
Pipfile.lock ADDED
The diff for this file is too large to render. See raw diff
 
ingestion.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This file es responsible for the ingestion of the data (langchain documentation).
3
+ It embedds the data into vectors, and stores it in the pinecone vectorstore.
4
+ """
5
+ import os
6
+ from langchain.document_loaders import ReadTheDocsLoader
7
+
8
+
9
+ def ingest_docs() -> None:
10
+ # The ReadTheDocsLoader is a class that is in charge of taking the dump of some data
11
+ # fetching process and loading it into the vectorstore.
12
+ loader = ReadTheDocsLoader("langchain-docs-chatbot/langchain-docs")
13
+ raw_documents = loader.load()
14
+
15
+
16
+ if __name__ == '__main__':
17
+ print('Hello world!')