Spaces:

Arxived
/

chat-w-csv

Sleeping

App Files Files Community

DrishtiSharma commited on Jan 28

Commit

573b41b

verified ·

1 Parent(s): 08f6ce3

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.vector_stores.faiss import FaissVectorStore
 from llama_index.core.ingestion import IngestionPipeline
-from langchain_community.document_loaders.csv_loader import CSVLoader
 from langchain_community.vectorstores import FAISS as LangChainFAISS
 from langchain.chains import create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
@@ -42,9 +41,9 @@ if uploaded_file:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8") as temp_file:
             temp_file_path = temp_file.name
             data.to_csv(temp_file.name, index=False, encoding="utf-8")
-            temp_file.flush()  # Ensure all data is written to the file
-        # Debugging: Verify the temporary file
         st.write("Temporary file path:", temp_file_path)
         with open(temp_file_path, "r") as f:
             st.write("Temporary file content:")
@@ -53,19 +52,21 @@ if uploaded_file:
         # Tabs for LangChain and LlamaIndex
         tab1, tab2 = st.tabs(["LangChain", "LlamaIndex"])
-        # LangChain Tab
         with tab1:
             st.subheader("LangChain Query")
             try:
-                # Use CSVLoader with the temporary file path
-                st.write("Loading file with LangChain CSVLoader...")
-                loader = CSVLoader(file_path=temp_file_path)
-                docs = loader.load_and_split()
                 # Debugging: Preview loaded documents
-                st.write("Successfully loaded documents:")
-                if docs:
-                    st.text(docs[0].page_content)
                 # Create FAISS VectorStore
                 langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
@@ -73,7 +74,7 @@ if uploaded_file:
                     embedding_function=OpenAIEmbeddings(),
                     index=langchain_index,
                 )
-                langchain_vector_store.add_documents(docs)
                 # LangChain Retrieval Chain
                 retriever = langchain_vector_store.as_retriever()

 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.vector_stores.faiss import FaissVectorStore
 from llama_index.core.ingestion import IngestionPipeline
 from langchain_community.vectorstores import FAISS as LangChainFAISS
 from langchain.chains import create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
         with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8") as temp_file:
             temp_file_path = temp_file.name
             data.to_csv(temp_file.name, index=False, encoding="utf-8")
+            temp_file.flush()
+        # Verify the temporary file
         st.write("Temporary file path:", temp_file_path)
         with open(temp_file_path, "r") as f:
             st.write("Temporary file content:")
         # Tabs for LangChain and LlamaIndex
         tab1, tab2 = st.tabs(["LangChain", "LlamaIndex"])
+        # LangChain Tab with Custom Loader
         with tab1:
             st.subheader("LangChain Query")
             try:
+                # Custom preprocessing: Split CSV rows into smaller chunks
+                st.write("Processing CSV with a custom loader...")
+                documents = []
+                for _, row in data.iterrows():
+                    content = "\n".join([f"{col}: {row[col]}" for col in data.columns])
+                    documents.append({"page_content": content})
                 # Debugging: Preview loaded documents
+                st.write("Successfully processed documents:")
+                if documents:
+                    st.text(documents[0]["page_content"])
                 # Create FAISS VectorStore
                 langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
                     embedding_function=OpenAIEmbeddings(),
                     index=langchain_index,
                 )
+                langchain_vector_store.add_documents(documents)
                 # LangChain Retrieval Chain
                 retriever = langchain_vector_store.as_retriever()