ClearSpend

Sleeping

App Files Files Community

MonilM commited on Apr 26

Commit

6c095fc

1 Parent(s): 4c9f681

Added NLP#2

Browse files

Files changed (1) hide show

nlp_service.py +204 -24

nlp_service.py CHANGED Viewed

@@ -6,6 +6,9 @@ from flask import Blueprint, request, jsonify
 from collections import defaultdict
 import logging
 import os # To handle potential model loading issues
 # --- Setup ---
 logging.basicConfig(level=logging.INFO)
@@ -560,17 +563,210 @@ def analyze_expense_text(text):
         }
     else: # intent == "unknown"
-        logging.info(f"Could not determine intent for: {text}")
-        response_data = {
-            "action": "unknown",
-            "status": "failed",
-            "message": "Sorry, I couldn't quite understand that. Please try phrasing your expense or query differently. \nExamples:\n- 'Spent ₹50 on coffee yesterday at Starbucks'\n- 'Show my food expenses last week'\n- 'What was my total spending last month?'"
-        }
-    logging.info(f"Analysis complete. Action: {response_data.get('action')}, Status: {response_data.get('status')}")
     return response_data
 # --- Flask Blueprint Setup (Optional: Keep if direct API access is needed) ---
 nlp_bp = Blueprint('nlp_service', __name__)
@@ -592,20 +788,4 @@ def process_nlp_expense_route():
         if result.get("message") == "NLP model not available":
             status_code = 500
-    return jsonify(result), status_code
-# --- Example Usage / Testing Setup ---
-if __name__ == '__main__':
-    from flask import Flask
-    app = Flask(__name__)
-    app.register_blueprint(nlp_bp) # Register the blueprint
-    # Dummy data removed
-    print("Starting Flask server for testing NLP service...")
-    # print("Registered expenses:", expenses) # Can be long
-    if nlp is None:
-        print("WARNING: spaCy model failed to load. /process_nlp endpoint will return errors.")
-    app.run(debug=True, host='0.0.0.0', port=5001)

 from collections import defaultdict
 import logging
 import os # To handle potential model loading issues
+import requests # Add requests for API calls
+import json # For handling JSON data
+import os # Already imported, needed for API key
 # --- Setup ---
 logging.basicConfig(level=logging.INFO)
         }
     else: # intent == "unknown"
+        logging.info(f"Local NLP intent unknown for: {text}. Attempting Gemini API call.")
+        # --- Call Gemini API ---
+        gemini_result = call_gemini_api(text, GEMINI_API_KEY)
+        if gemini_result and isinstance(gemini_result, dict) and gemini_result.get("action") in ["add_expense", "query_expense", "info"]:
+            # If Gemini returned a structured result we can use (or an info message), return it
+            logging.info(f"Using result from Gemini API. Action: {gemini_result.get('action')}")
+            response_data = gemini_result
+            # TODO: Potentially re-validate or re-process gemini_result here if needed
+            # For example, if action is add_expense, ensure data types are correct, parse date string etc.
+            # If action is query_expense, parse date strings etc.
+            if response_data.get("action") == "add_expense" and "details" in response_data:
+                 # Basic post-processing/validation for added expense
+                 details = response_data["details"]
+                 try:
+                     if "date" in details and isinstance(details["date"], str):
+                         details["date"] = datetime.datetime.fromisoformat(details["date"].split("T")[0]).date()
+                     if "amount" in details:
+                         details["amount"] = float(details["amount"])
+                     # Add expense to memory if Gemini successfully added it
+                     # Note: This assumes Gemini provides all necessary fields correctly
+                     if all(k in details for k in ["amount", "currency", "category", "date"]):
+                         global next_expense_id
+                         new_expense = {
+                             "id": next_expense_id,
+                             "amount": details["amount"],
+                             "currency": details.get("currency", "₹"),
+                             "category": details.get("category", "Uncategorized"),
+                             "merchant": details.get("merchant"),
+                             "date": details["date"],
+                             "original_message": text
+                         }
+                         expenses.append(new_expense)
+                         next_expense_id += 1
+                         logging.info(f"Added expense (from Gemini): {new_expense}")
+                         # Update message for consistency
+                         response_data["message"] = f"✅ Expense added (via Gemini): {new_expense['currency']}{new_expense['amount']:.2f} for {new_expense['category']} on {new_expense['date'].strftime('%b %d, %Y')}."
+                         # Make details serializable for JSON response
+                         response_data["details"]["date"] = response_data["details"]["date"].isoformat()
+                     else:
+                         logging.warning("Gemini add_expense result missing required fields.")
+                         response_data = {"action": "unknown", "status": "failed", "message": "Gemini suggested adding an expense, but details were incomplete."}
+                 except (ValueError, TypeError) as e:
+                     logging.warning(f"Error processing Gemini add_expense details: {e}")
+                     response_data = {"action": "unknown", "status": "failed", "message": "Could not process expense details suggested by Gemini."}
+            elif response_data.get("action") == "query_expense" and "criteria" in response_data:
+                 # Basic post-processing for query
+                 criteria = response_data["criteria"]
+                 try:
+                     if "start_date" in criteria and isinstance(criteria["start_date"], str):
+                         criteria["start_date"] = datetime.datetime.fromisoformat(criteria["start_date"].split("T")[0]).date()
+                     if "end_date" in criteria and isinstance(criteria["end_date"], str):
+                         criteria["end_date"] = datetime.datetime.fromisoformat(criteria["end_date"].split("T")[0]).date()
+                     # Execute the query based on Gemini's criteria
+                     results = filter_expenses(criteria)
+                     # Use Gemini's message or generate a new one
+                     if not response_data.get("message"):
+                         response_data["message"] = format_expense_list(results, "Query results (via Gemini):")
+                     response_data["results_count"] = len(results)
+                     # Make criteria serializable
+                     response_data["criteria"] = {k: v.isoformat() if isinstance(v, datetime.date) else v for k, v in criteria.items() if v is not None}
+                 except (ValueError, TypeError) as e:
+                     logging.warning(f"Error processing Gemini query_expense criteria: {e}")
+                     response_data = {"action": "unknown", "status": "failed", "message": "Could not process query criteria suggested by Gemini."}
+        else:
+            # Fallback to original unknown message if Gemini fails or returns unusable data
+            logging.info("Gemini API did not provide a usable structured result. Falling back to default unknown message.")
+            response_data = {
+                "action": "unknown",
+                "status": "failed",
+                "message": "Sorry, I couldn't quite understand that. Please try phrasing your expense or query differently. \nExamples:\n- 'Spent ₹50 on coffee yesterday at Starbucks'\n- 'Show my food expenses last week'\n- 'What was my total spending last month?'"
+            }
+            # Optionally include Gemini's raw suggestion if available and not structured
+            if gemini_result and isinstance(gemini_result, dict) and "message" in gemini_result:
+                 response_data["message"] += f"\n\nGemini suggestion: {gemini_result['message']}"
+    logging.info(f"Analysis complete. Action: {response_data.get('action')}, Status: {response_data.get('status']}")
     return response_data
+# Placeholder for Gemini API Key - Load from environment variable
+GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
+# Placeholder function for Gemini API call
+def call_gemini_api(text, api_key):
+    """
+    Placeholder function to call the Gemini API.
+    Replace with actual implementation.
+    Should ideally return a dictionary similar to analyze_expense_text's output
+    or None if the call fails or response is unusable.
+    """
+    if not api_key:
+        logging.warning("GEMINI_API_KEY not set. Skipping Gemini API call.")
+        return None
+    # --- Replace with actual Gemini API endpoint and request structure ---
+    # Example using Google AI Generative Language API (adjust model and endpoint as needed)
+    # Ensure you have the google-generativeai library installed (`pip install google-generativeai`)
+    # and the API key is correctly set as an environment variable.
+    api_endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={api_key}"
+    headers = {
+        "Content-Type": "application/json"
+    }
+    # Construct the payload based on Gemini API requirements
+    # This prompt asks Gemini to act like the existing NLP service
+    prompt = f"""Analyze the following text for expense tracking. Determine the intent ('add_expense' or 'query_expense') and extract relevant details.
+Text: "{text}"
+Desired JSON output format:
+{{
+  "action": "add_expense" | "query_expense",
+  "status": "success" | "failed",
+  "message": "Confirmation or result summary",
+  "details": {{ // Only for add_expense
+    "amount": <float>,
+    "currency": "<string>",
+    "category": "<string>",
+    "merchant": "<string>",
+    "date": "YYYY-MM-DD"
+  }},
+  "criteria": {{ // Only for query_expense
+    "category": "<string>",
+    "merchant": "<string>",
+    "start_date": "YYYY-MM-DD",
+    "end_date": "YYYY-MM-DD"
+  }}
+}}
+If the intent is unclear or details are missing, set action to "unknown" and status to "failed".
+Provide only the JSON output.
+"""
+    payload = json.dumps({
+      "contents": [{
+        "parts":[{ "text": prompt }]
+      }]
+      # Add generationConfig if needed (e.g., temperature, max output tokens)
+      # "generationConfig": {
+      #   "temperature": 0.7,
+      #   "maxOutputTokens": 256
+      # }
+    })
+    # --- End of placeholder section ---
+    try:
+        response = requests.post(api_endpoint, headers=headers, data=payload, timeout=20) # Increased timeout
+        response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
+        gemini_response_raw = response.json()
+        logging.debug(f"Raw Gemini API response: {gemini_response_raw}")
+        # --- Process gemini_response ---
+        # Extract the text content which should contain the JSON
+        if 'candidates' in gemini_response_raw and len(gemini_response_raw['candidates']) > 0:
+            content = gemini_response_raw['candidates'][0].get('content', {}).get('parts', [{}])[0].get('text')
+            if content:
+                logging.info(f"Gemini suggested JSON: {content}")
+                # Clean potential markdown/code block formatting
+                content_cleaned = content.strip().strip('```json').strip('```').strip()
+                try:
+                    # Attempt to parse the JSON string from Gemini
+                    parsed_result = json.loads(content_cleaned)
+                    # Basic validation of the parsed structure
+                    if isinstance(parsed_result, dict) and "action" in parsed_result:
+                        logging.info("Successfully parsed structured data from Gemini.")
+                        # Add further validation/sanitization if needed
+                        return parsed_result
+                    else:
+                        logging.warning("Gemini response parsed but lacks expected structure.")
+                        return {"action": "info", "status": "success", "message": f"Gemini suggestion: {content_cleaned}"}
+                except json.JSONDecodeError as json_err:
+                    logging.warning(f"Failed to decode JSON from Gemini response: {json_err}. Raw content: {content_cleaned}")
+                    # Return the raw text as a message if JSON parsing fails
+                    return {"action": "info", "status": "success", "message": f"Gemini suggestion: {content_cleaned}"}
+            else:
+                logging.warning("No text content found in Gemini response candidates.")
+                return None
+        else:
+            logging.warning("No candidates found in Gemini API response.")
+            return None
+    except requests.exceptions.Timeout:
+        logging.error("Gemini API call timed out.")
+        return None
+    except requests.exceptions.RequestException as e:
+        logging.error(f"Gemini API call failed: {e}")
+        # Log response body if available and indicates an API error
+        if e.response is not None:
+            try:
+                logging.error(f"Gemini API error response: {e.response.json()}")
+            except json.JSONDecodeError:
+                logging.error(f"Gemini API error response (non-JSON): {e.response.text}")
+        return None
+    except Exception as e:
+        logging.error(f"An unexpected error occurred during Gemini API call or processing: {e}")
+        return None
 # --- Flask Blueprint Setup (Optional: Keep if direct API access is needed) ---
 nlp_bp = Blueprint('nlp_service', __name__)
         if result.get("message") == "NLP model not available":
             status_code = 500
+    return jsonify(result), status_code