Adaptive-RAG / main.py
anindya-hf-2002's picture
Upload 21 files
b6d19d9 verified
from pinecone import Pinecone
from langchain_openai import ChatOpenAI
from src.vectorstore.pinecone_db import ingest_data, get_retriever, load_documents, process_chunks, save_to_parquet
from src.agents.workflow import run_adaptive_rag
from langgraph.pregel import GraphRecursionError
import tempfile
import os
from pathlib import Path
def initialize_pinecone(api_key):
"""Initialize Pinecone client with API key."""
try:
return Pinecone(api_key=api_key)
except Exception as e:
print(f"Error initializing Pinecone: {str(e)}")
return None
def initialize_llm(api_key):
"""Initialize OpenAI LLM."""
try:
return ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo")
except Exception as e:
print(f"Error initializing OpenAI: {str(e)}")
return None
def process_documents(file_paths, pc):
"""Process documents and store in Pinecone."""
if not file_paths:
print("No documents provided.")
return None
print("Processing documents...")
temp_dir = tempfile.mkdtemp()
markdown_path = Path(temp_dir) / "combined.md"
parquet_path = Path(temp_dir) / "documents.parquet"
try:
markdown_path = load_documents(file_paths, output_path=markdown_path)
chunks = process_chunks(markdown_path, chunk_size=256, threshold=0.6)
parquet_path = save_to_parquet(chunks, parquet_path)
ingest_data(
pc=pc,
parquet_path=parquet_path,
text_column="text",
pinecone_client=pc
)
retriever = get_retriever(pc)
print("Documents processed successfully!")
return retriever
except Exception as e:
print(f"Error processing documents: {str(e)}")
return None
finally:
try:
os.remove(markdown_path)
os.remove(parquet_path)
os.rmdir(temp_dir)
except:
pass
def main():
# Get API keys
pinecone_api_key = input("Enter your Pinecone API key: ")
openai_api_key = input("Enter your OpenAI API key: ")
# Initialize clients
pc = initialize_pinecone(pinecone_api_key)
if not pc:
return
llm = initialize_llm(openai_api_key)
if not llm:
return
# Get document paths
print("\nEnter the paths to your documents (one per line).")
print("Press Enter twice when done:")
file_paths = []
while True:
path = input()
if not path:
break
if os.path.exists(path):
file_paths.append(path)
else:
print(f"Warning: File {path} does not exist")
# Process documents
retriever = process_documents(file_paths, pc)
if not retriever:
return
# Chat loop
print("\nChat with your documents! Type 'exit' to quit.")
while True:
question = input("\nYou: ")
if question.lower() == 'exit':
print("Goodbye!")
break
try:
response = run_adaptive_rag(
retriever=retriever,
question=question,
llm=llm,
top_k=5,
enable_websearch=False
)
print("\nAssistant:", response)
except GraphRecursionError:
print("\nAssistant: I cannot find a sufficient answer to your question in the provided documents. Please try rephrasing your question or ask something else about the content of the documents.")
except Exception as e:
print(f"\nError: {str(e)}")
if __name__ == "__main__":
main()