Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,6 @@ from llama_index.llms.openai import OpenAI
|
|
| 8 |
from llama_index.embeddings.openai import OpenAIEmbedding
|
| 9 |
from llama_index.vector_stores.faiss import FaissVectorStore
|
| 10 |
from llama_index.core.ingestion import IngestionPipeline
|
| 11 |
-
from langchain_community.document_loaders.csv_loader import CSVLoader
|
| 12 |
from langchain_community.vectorstores import FAISS as LangChainFAISS
|
| 13 |
from langchain.chains import create_retrieval_chain
|
| 14 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
|
@@ -42,9 +41,9 @@ if uploaded_file:
|
|
| 42 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8") as temp_file:
|
| 43 |
temp_file_path = temp_file.name
|
| 44 |
data.to_csv(temp_file.name, index=False, encoding="utf-8")
|
| 45 |
-
temp_file.flush()
|
| 46 |
|
| 47 |
-
#
|
| 48 |
st.write("Temporary file path:", temp_file_path)
|
| 49 |
with open(temp_file_path, "r") as f:
|
| 50 |
st.write("Temporary file content:")
|
|
@@ -53,19 +52,21 @@ if uploaded_file:
|
|
| 53 |
# Tabs for LangChain and LlamaIndex
|
| 54 |
tab1, tab2 = st.tabs(["LangChain", "LlamaIndex"])
|
| 55 |
|
| 56 |
-
# LangChain Tab
|
| 57 |
with tab1:
|
| 58 |
st.subheader("LangChain Query")
|
| 59 |
try:
|
| 60 |
-
#
|
| 61 |
-
st.write("
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
| 64 |
|
| 65 |
# Debugging: Preview loaded documents
|
| 66 |
-
st.write("Successfully
|
| 67 |
-
if
|
| 68 |
-
st.text(
|
| 69 |
|
| 70 |
# Create FAISS VectorStore
|
| 71 |
langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
|
|
@@ -73,7 +74,7 @@ if uploaded_file:
|
|
| 73 |
embedding_function=OpenAIEmbeddings(),
|
| 74 |
index=langchain_index,
|
| 75 |
)
|
| 76 |
-
langchain_vector_store.add_documents(
|
| 77 |
|
| 78 |
# LangChain Retrieval Chain
|
| 79 |
retriever = langchain_vector_store.as_retriever()
|
|
|
|
| 8 |
from llama_index.embeddings.openai import OpenAIEmbedding
|
| 9 |
from llama_index.vector_stores.faiss import FaissVectorStore
|
| 10 |
from llama_index.core.ingestion import IngestionPipeline
|
|
|
|
| 11 |
from langchain_community.vectorstores import FAISS as LangChainFAISS
|
| 12 |
from langchain.chains import create_retrieval_chain
|
| 13 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
|
|
|
| 41 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8") as temp_file:
|
| 42 |
temp_file_path = temp_file.name
|
| 43 |
data.to_csv(temp_file.name, index=False, encoding="utf-8")
|
| 44 |
+
temp_file.flush()
|
| 45 |
|
| 46 |
+
# Verify the temporary file
|
| 47 |
st.write("Temporary file path:", temp_file_path)
|
| 48 |
with open(temp_file_path, "r") as f:
|
| 49 |
st.write("Temporary file content:")
|
|
|
|
| 52 |
# Tabs for LangChain and LlamaIndex
|
| 53 |
tab1, tab2 = st.tabs(["LangChain", "LlamaIndex"])
|
| 54 |
|
| 55 |
+
# LangChain Tab with Custom Loader
|
| 56 |
with tab1:
|
| 57 |
st.subheader("LangChain Query")
|
| 58 |
try:
|
| 59 |
+
# Custom preprocessing: Split CSV rows into smaller chunks
|
| 60 |
+
st.write("Processing CSV with a custom loader...")
|
| 61 |
+
documents = []
|
| 62 |
+
for _, row in data.iterrows():
|
| 63 |
+
content = "\n".join([f"{col}: {row[col]}" for col in data.columns])
|
| 64 |
+
documents.append({"page_content": content})
|
| 65 |
|
| 66 |
# Debugging: Preview loaded documents
|
| 67 |
+
st.write("Successfully processed documents:")
|
| 68 |
+
if documents:
|
| 69 |
+
st.text(documents[0]["page_content"])
|
| 70 |
|
| 71 |
# Create FAISS VectorStore
|
| 72 |
langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
|
|
|
|
| 74 |
embedding_function=OpenAIEmbeddings(),
|
| 75 |
index=langchain_index,
|
| 76 |
)
|
| 77 |
+
langchain_vector_store.add_documents(documents)
|
| 78 |
|
| 79 |
# LangChain Retrieval Chain
|
| 80 |
retriever = langchain_vector_store.as_retriever()
|