Spaces:

Arxived
/

chat-w-csv

Sleeping

App Files Files Community

DrishtiSharma commited on Jan 28

Commit

8d99061

verified ·

1 Parent(s): 253df02

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -15

app.py CHANGED Viewed

@@ -20,17 +20,20 @@ import faiss
 import tempfile
 # Load environment variables
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
-# Check OpenAI API Key
 if not os.getenv("OPENAI_API_KEY"):
     st.error("⚠️ OpenAI API Key is missing! Please check your .env file or environment variables.")
-# Global settings for LlamaIndex
-EMBED_DIMENSION = 512
 Settings.llm = OpenAI(model="gpt-4o")
-Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small", dimensions=EMBED_DIMENSION)
 # Streamlit app
 st.title("Chat with CSV Files - LangChain vs LlamaIndex")
@@ -70,25 +73,49 @@ if uploaded_file:
                 documents = []
                 for _, row in data.iterrows():
                     content = "\n".join([f"{col}: {row[col]}" for col in data.columns])
-                    doc = Document(page_content=content)
                     documents.append(doc)
-                # ✅ Create FAISS VectorStore
-                langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
                 docstore = InMemoryDocstore()
                 index_to_docstore_id = {}
                 langchain_vector_store = LangChainFAISS(
-                    embedding_function=OpenAIEmbeddings(),
                     index=langchain_index,
                     docstore=docstore,
                     index_to_docstore_id=index_to_docstore_id,
                 )
-                # ✅ Add properly formatted documents to FAISS
-                langchain_vector_store.add_documents(documents)
-                st.write("Documents successfully added to FAISS VectorStore.")
                 # ✅ Query Processing
                 query = st.text_input("Ask a question about your data (LangChain):")
@@ -101,14 +128,14 @@ if uploaded_file:
                     except Exception as e:
                         error_message = traceback.format_exc()
                         st.error(f"Error processing query: {e}")
-                        st.text(error_message)
             except Exception as e:
                 error_message = traceback.format_exc()
                 st.error(f"Error processing with LangChain: {e}")
-                st.text(error_message)
     except Exception as e:
         error_message = traceback.format_exc()
         st.error(f"Error reading uploaded file: {e}")
-        st.text(error_message)

 import tempfile
 # Load environment variables
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+# ✅ Check OpenAI API Key
 if not os.getenv("OPENAI_API_KEY"):
     st.error("⚠️ OpenAI API Key is missing! Please check your .env file or environment variables.")
+# ✅ Ensure OpenAI Embeddings match FAISS dimensions
+embedding_function = OpenAIEmbeddings()
+test_vector = embedding_function.embed_query("test")  # Sample embedding
+faiss_dimension = len(test_vector)  # ✅ Dynamically detect correct dimension
+# ✅ Update global settings for LlamaIndex
 Settings.llm = OpenAI(model="gpt-4o")
+Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small", dimensions=faiss_dimension)
 # Streamlit app
 st.title("Chat with CSV Files - LangChain vs LlamaIndex")
                 documents = []
                 for _, row in data.iterrows():
                     content = "\n".join([f"{col}: {row[col]}" for col in data.columns])
+                    doc = Document(page_content=content)
                     documents.append(doc)
+                # ✅ Debugging: Display a sample processed document
+                if documents:
+                    st.write("Sample processed document (LangChain):")
+                    st.text(documents[0].page_content)
+                # ✅ Create FAISS VectorStore with Correct Dimensions
+                st.write(f"✅ Initializing FAISS with dimension: {faiss_dimension}")
+                langchain_index = faiss.IndexFlatL2(faiss_dimension)
                 docstore = InMemoryDocstore()
                 index_to_docstore_id = {}
                 langchain_vector_store = LangChainFAISS(
+                    embedding_function=embedding_function,
                     index=langchain_index,
                     docstore=docstore,
                     index_to_docstore_id=index_to_docstore_id,
                 )
+                # ✅ Ensure documents are added correctly
+                try:
+                    langchain_vector_store.add_documents(documents)
+                    st.write("✅ Documents successfully added to FAISS VectorStore.")
+                except Exception as e:
+                    st.error(f"Error adding documents to FAISS: {e}")
+                # ✅ Create LangChain Query Execution Pipeline
+                retriever = langchain_vector_store.as_retriever()
+                system_prompt = (
+                    "You are an assistant for question-answering tasks. "
+                    "Use the following pieces of retrieved context to answer "
+                    "the question. If you don't know the answer, say that you "
+                    "don't know. Use three sentences maximum and keep the "
+                    "answer concise.\n\n{context}"
+                )
+                prompt = ChatPromptTemplate.from_messages(
+                    [("system", system_prompt), ("human", "{input}")]
+                )
+                question_answer_chain = create_stuff_documents_chain(ChatOpenAI(model="gpt-4o"), prompt)
+                langchain_rag_chain = create_retrieval_chain(retriever, question_answer_chain)
                 # ✅ Query Processing
                 query = st.text_input("Ask a question about your data (LangChain):")
                     except Exception as e:
                         error_message = traceback.format_exc()
                         st.error(f"Error processing query: {e}")
+                        st.text(error_message)
             except Exception as e:
                 error_message = traceback.format_exc()
                 st.error(f"Error processing with LangChain: {e}")
+                st.text(error_message)
     except Exception as e:
         error_message = traceback.format_exc()
         st.error(f"Error reading uploaded file: {e}")
+        st.text(error_message)