import os from flask import Flask, request, jsonify import logging import openai from llama_index.core import Settings from llama_index.llms.openai import OpenAI from llama_index.vector_stores.pinecone import PineconeVectorStore from llama_index.core import VectorStoreIndex, StorageContext from dotenv import load_dotenv from pinecone import Pinecone from llama_index.embeddings.openai import OpenAIEmbedding load_dotenv() app = Flask(__name__) app.debug = True logging.basicConfig(level=logging.DEBUG) openai.api_key = os.getenv('OPENAI_API_KEY') pc = Pinecone( api_key=os.getenv('PINECONE_API_KEY') ) PINECONE_INDEX_NAME = os.getenv('PINECONE_INDEX') pinecone_index = pc.Index(PINECONE_INDEX_NAME) # After initializing the Pinecone index stats = pinecone_index.describe_index_stats() app.logger.debug(f"Pinecone index stats: {stats}") # Set up LlamaIndex global settings Settings.llm = OpenAI( model=os.getenv('OPENAI_MODEL', 'gpt-3.5-turbo'), temperature=0 ) vector_store = PineconeVectorStore( pinecone_index=pinecone_index, namespace="ai" ) # Create Storage Context with the Vector Store storage_context = StorageContext.from_defaults(vector_store=vector_store) # Initialize LlamaIndex with the existing Pinecone vector store index = VectorStoreIndex.from_vector_store( vector_store=vector_store, storage_context=storage_context ) @app.route('/run/predict', methods=['POST']) def predict(): try: data = request.json app.logger.debug(f"Received data: {data}") # data => {'query': 'What is LangChain?'} if not data: app.logger.error("No data provided in the request.") return jsonify({'error': 'No data provided.'}), 400 user_query = data.get('query') if not user_query: app.logger.error("No query provided in the request.") return jsonify({'error': 'No query provided.'}), 400 # Log Pinecone query details app.logger.debug(f"Querying Pinecone index: {PINECONE_INDEX_NAME}") app.logger.debug(f"Query: {user_query}") # Perform the query using LlamaIndex query_engine = index.as_query_engine(similarity_top_k=5) app.logger.debug(f"Query engine: {query_engine}") response = query_engine.query(user_query) app.logger.debug(f"Raw response object: {response}") app.logger.debug(f"Response type: {type(response)}") if hasattr(response, 'source_nodes'): app.logger.debug(f"Number of source nodes: {len(response.source_nodes)}") for i, node in enumerate(response.source_nodes): app.logger.debug(f"Source node {i}: {node.node.text[:100]}...") # Log first 100 chars of each source node else: app.logger.warning("No source nodes found in the response") if hasattr(response, 'response'): response_text = response.response else: response_text = str(response) app.logger.debug(f"Response text: {response_text}") return jsonify({'response': response_text}) except Exception as e: app.logger.error(f"Error processing request: {e}", exc_info=True) return jsonify({"error": "An error occurred while processing the request"}), 500 @app.route('/empty-datastore', methods=['DELETE']) def empty_datastore(): try: # Attempt to delete all vectors in the default namespace delete_response = pinecone_index.delete(delete_all=True, namespace="") app.logger.debug(f"Delete response: {delete_response}") # Verify the index is empty stats = pinecone_index.describe_index_stats() app.logger.debug(f"Index stats after deletion: {stats}") if stats['total_vector_count'] == 0: app.logger.info("Datastore emptied successfully.") return jsonify({'message': 'Datastore emptied successfully'}), 200 else: app.logger.warning("Datastore not fully emptied.") return jsonify({'message': 'Datastore not fully emptied'}), 500 except Exception as e: app.logger.error(f"Error emptying datastore: {e}") return jsonify({'error': f'An error occurred while emptying the datastore: {str(e)}'}), 500 if __name__ == '__main__': from os import environ app.run(host='0.0.0.0', port=int(environ.get('PORT', 7860)))