File size: 4,416 Bytes
dfe7b5e
4c7c2a7
 
dfe7b5e
 
 
 
 
 
bc7569e
 
dfe7b5e
 
b46c888
 
dfe7b5e
b46c888
4c7c2a7
 
dfe7b5e
 
 
 
 
 
 
 
 
 
bc7569e
 
 
 
dfe7b5e
 
bc7569e
dfe7b5e
 
 
 
 
bc7569e
dfe7b5e
 
 
 
 
 
 
 
 
 
b46c888
1da03ca
 
4c7c2a7
 
bc7569e
dfe7b5e
 
 
 
 
 
 
 
 
 
 
bc7569e
 
 
dfe7b5e
bc7569e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfe7b5e
4c7c2a7
bc7569e
dfe7b5e
1da03ca
bc7569e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b46c888
ab02082
dfe7b5e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
from flask import Flask, request, jsonify
import logging
import openai
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core import VectorStoreIndex, StorageContext
from dotenv import load_dotenv
from pinecone import Pinecone
from llama_index.embeddings.openai import OpenAIEmbedding

load_dotenv()

app = Flask(__name__)
app.debug = True

logging.basicConfig(level=logging.DEBUG)

openai.api_key = os.getenv('OPENAI_API_KEY')

pc = Pinecone(
    api_key=os.getenv('PINECONE_API_KEY')
)

PINECONE_INDEX_NAME = os.getenv('PINECONE_INDEX')

pinecone_index = pc.Index(PINECONE_INDEX_NAME)

# After initializing the Pinecone index
stats = pinecone_index.describe_index_stats()
app.logger.debug(f"Pinecone index stats: {stats}")

# Set up LlamaIndex global settings
Settings.llm = OpenAI(
    model=os.getenv('OPENAI_MODEL', 'gpt-3.5-turbo'),
    temperature=0
)

vector_store = PineconeVectorStore(
    pinecone_index=pinecone_index,
    namespace="ai"
)

# Create Storage Context with the Vector Store
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Initialize LlamaIndex with the existing Pinecone vector store
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    storage_context=storage_context
)

@app.route('/run/predict', methods=['POST'])
def predict():
    try:
        data = request.json
        app.logger.debug(f"Received data: {data}") # data => {'query': 'What is LangChain?'}

        if not data:
            app.logger.error("No data provided in the request.")
            return jsonify({'error': 'No data provided.'}), 400

        user_query = data.get('query')

        if not user_query:
            app.logger.error("No query provided in the request.")
            return jsonify({'error': 'No query provided.'}), 400

        # Log Pinecone query details
        app.logger.debug(f"Querying Pinecone index: {PINECONE_INDEX_NAME}")
        app.logger.debug(f"Query: {user_query}")

        # Perform the query using LlamaIndex
        query_engine = index.as_query_engine(similarity_top_k=5)
        app.logger.debug(f"Query engine: {query_engine}")
        
        response = query_engine.query(user_query)
        app.logger.debug(f"Raw response object: {response}")
        app.logger.debug(f"Response type: {type(response)}")
        
        if hasattr(response, 'source_nodes'):
            app.logger.debug(f"Number of source nodes: {len(response.source_nodes)}")
            for i, node in enumerate(response.source_nodes):
                app.logger.debug(f"Source node {i}: {node.node.text[:100]}...")  # Log first 100 chars of each source node
        else:
            app.logger.warning("No source nodes found in the response")

        if hasattr(response, 'response'):
            response_text = response.response
        else:
            response_text = str(response)
        
        app.logger.debug(f"Response text: {response_text}")

        return jsonify({'response': response_text})

    except Exception as e:
        app.logger.error(f"Error processing request: {e}", exc_info=True)
        return jsonify({"error": "An error occurred while processing the request"}), 500

@app.route('/empty-datastore', methods=['DELETE'])
def empty_datastore():
    try:
        # Attempt to delete all vectors in the default namespace
        delete_response = pinecone_index.delete(delete_all=True, namespace="")
        app.logger.debug(f"Delete response: {delete_response}")
        
        # Verify the index is empty
        stats = pinecone_index.describe_index_stats()
        app.logger.debug(f"Index stats after deletion: {stats}")
        
        if stats['total_vector_count'] == 0:
            app.logger.info("Datastore emptied successfully.")
            return jsonify({'message': 'Datastore emptied successfully'}), 200
        else:
            app.logger.warning("Datastore not fully emptied.")
            return jsonify({'message': 'Datastore not fully emptied'}), 500
    
    except Exception as e:
        app.logger.error(f"Error emptying datastore: {e}")
        return jsonify({'error': f'An error occurred while emptying the datastore: {str(e)}'}), 500

if __name__ == '__main__':
    from os import environ
    app.run(host='0.0.0.0', port=int(environ.get('PORT', 7860)))