Spaces:

Arxived
/

chat-w-csv

Sleeping

App Files Files Community

DrishtiSharma commited on Jan 28

Commit

a756b7d

verified ·

1 Parent(s): 14448f0

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -35

app.py CHANGED Viewed

@@ -18,6 +18,7 @@ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 from langchain_core.documents import Document
 import faiss
 import tempfile
 # Load environment variables
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
@@ -28,8 +29,8 @@ if not os.getenv("OPENAI_API_KEY"):
 # ✅ Ensure OpenAI Embeddings match FAISS dimensions
 embedding_function = OpenAIEmbeddings()
-test_vector = embedding_function.embed_query("test")  # Sample embedding
-faiss_dimension = len(test_vector)  # ✅ Dynamically detect correct dimension
 # ✅ Update global settings for LlamaIndex
 Settings.llm = OpenAI(model="gpt-4o")
@@ -53,31 +54,27 @@ if uploaded_file:
             data.to_csv(temp_file.name, index=False, encoding="utf-8")
             temp_file.flush()
-        # Debugging: Verify the temporary file (Display partial content)
-        st.write("Temporary file path:", temp_file_path)
-        with open(temp_file_path, "r") as f:
-            content = f.read()
-        st.write("Partial file content (first 500 characters):")
-        st.text(content[:500])
         # Tabs for LangChain and LlamaIndex
-        tab1, tab2 = st.tabs(["LangChain", "LlamaIndex"])
         # ✅ LangChain Processing
         with tab1:
             st.subheader("LangChain Query")
             try:
-                # ✅ Convert CSV rows into LangChain Document objects
                 st.write("Processing CSV with a custom loader...")
                 documents = []
                 for _, row in data.iterrows():
                     content = "\n".join([f"{col}: {row[col]}" for col in data.columns])
-                    doc = Document(page_content=content)
-                    documents.append(doc)
-                # ✅ Create FAISS VectorStore with Correct Dimensions
                 st.write(f"✅ Initializing FAISS with dimension: {faiss_dimension}")
                 langchain_index = faiss.IndexFlatL2(faiss_dimension)
@@ -98,27 +95,24 @@ if uploaded_file:
                 except Exception as e:
                     st.error(f"Error adding documents to FAISS: {e}")
-                # ✅ Create LangChain Query Execution Pipeline
-                retriever = langchain_vector_store.as_retriever()
-                system_prompt = (
-                    "You are an assistant for question-answering tasks. "
-                    "Use the following pieces of retrieved context to answer "
-                    "the question. If you don't know the answer, say that you "
-                    "don't know. Use three sentences maximum and keep the "
-                    "answer concise.\n\n{context}"
-                )
-                prompt = ChatPromptTemplate.from_messages(
-                    [("system", system_prompt), ("human", "{input}")]
-                )
-                question_answer_chain = create_stuff_documents_chain(ChatOpenAI(model="gpt-4o"), prompt)
-                langchain_rag_chain = create_retrieval_chain(retriever, question_answer_chain)
                 # ✅ Query Processing
                 query = st.text_input("Ask a question about your data (LangChain):")
                 if query:
                     try:
-                        st.write("Processing your question...")
                         answer = langchain_rag_chain.invoke({"input": query})
                         st.write(f"**Answer:** {answer['answer']}")
                     except Exception as e:
@@ -130,8 +124,3 @@ if uploaded_file:
                 error_message = traceback.format_exc()
                 st.error(f"Error processing with LangChain: {e}")
                 st.text(error_message)
-    except Exception as e:
-        error_message = traceback.format_exc()
-        st.error(f"Error reading uploaded file: {e}")
-        st.text(error_message)

 from langchain_core.documents import Document
 import faiss
 import tempfile
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 # Load environment variables
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
 # ✅ Ensure OpenAI Embeddings match FAISS dimensions
 embedding_function = OpenAIEmbeddings()
+test_vector = embedding_function.embed_query("test")
+faiss_dimension = len(test_vector)
 # ✅ Update global settings for LlamaIndex
 Settings.llm = OpenAI(model="gpt-4o")
             data.to_csv(temp_file.name, index=False, encoding="utf-8")
             temp_file.flush()
         # Tabs for LangChain and LlamaIndex
+        tab1, tab2 = st.tabs(["Chat w CSV using LangChain", "Chat w CSV using LlamaIndex"])
         # ✅ LangChain Processing
         with tab1:
             st.subheader("LangChain Query")
             try:
+                # ✅ Convert CSV rows into LangChain Document objects with chunking
                 st.write("Processing CSV with a custom loader...")
+                text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=90)
                 documents = []
                 for _, row in data.iterrows():
                     content = "\n".join([f"{col}: {row[col]}" for col in data.columns])
+                    chunks = text_splitter.split_text(content)
+                    for chunk in chunks:
+                        doc = Document(page_content=chunk)
+                        documents.append(doc)
+                # ✅ Create FAISS VectorStore
                 st.write(f"✅ Initializing FAISS with dimension: {faiss_dimension}")
                 langchain_index = faiss.IndexFlatL2(faiss_dimension)
                 except Exception as e:
                     st.error(f"Error adding documents to FAISS: {e}")
+                # ✅ Limit number of retrieved documents
+                retriever = langchain_vector_store.as_retriever(search_kwargs={"k": 5})
                 # ✅ Query Processing
                 query = st.text_input("Ask a question about your data (LangChain):")
                 if query:
                     try:
+                        retrieved_context = "\n\n".join([doc.page_content for doc in retriever.get_relevant_documents(query)])
+                        retrieved_context = retrieved_context[:3000]
+                        system_prompt = (
+                            "You are an assistant for question-answering tasks. "
+                            "Use the following pieces of retrieved context to answer "
+                            "the question. Keep the answer concise.\n\n"
+                            f"{retrieved_context}"
+                        )
                         answer = langchain_rag_chain.invoke({"input": query})
                         st.write(f"**Answer:** {answer['answer']}")
                     except Exception as e:
                 error_message = traceback.format_exc()
                 st.error(f"Error processing with LangChain: {e}")
                 st.text(error_message)