Spaces:
Sleeping
Sleeping
Added NLP#2
Browse files- nlp_service.py +204 -24
nlp_service.py
CHANGED
@@ -6,6 +6,9 @@ from flask import Blueprint, request, jsonify
|
|
6 |
from collections import defaultdict
|
7 |
import logging
|
8 |
import os # To handle potential model loading issues
|
|
|
|
|
|
|
9 |
|
10 |
# --- Setup ---
|
11 |
logging.basicConfig(level=logging.INFO)
|
@@ -560,17 +563,210 @@ def analyze_expense_text(text):
|
|
560 |
}
|
561 |
|
562 |
else: # intent == "unknown"
|
563 |
-
logging.info(f"
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
569 |
|
570 |
-
logging.info(f"Analysis complete. Action: {response_data.get('action')}, Status: {response_data.get('status'
|
571 |
return response_data
|
572 |
|
573 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
574 |
# --- Flask Blueprint Setup (Optional: Keep if direct API access is needed) ---
|
575 |
nlp_bp = Blueprint('nlp_service', __name__)
|
576 |
|
@@ -592,20 +788,4 @@ def process_nlp_expense_route():
|
|
592 |
if result.get("message") == "NLP model not available":
|
593 |
status_code = 500
|
594 |
|
595 |
-
return jsonify(result), status_code
|
596 |
-
|
597 |
-
|
598 |
-
# --- Example Usage / Testing Setup ---
|
599 |
-
if __name__ == '__main__':
|
600 |
-
from flask import Flask
|
601 |
-
|
602 |
-
app = Flask(__name__)
|
603 |
-
app.register_blueprint(nlp_bp) # Register the blueprint
|
604 |
-
|
605 |
-
# Dummy data removed
|
606 |
-
|
607 |
-
print("Starting Flask server for testing NLP service...")
|
608 |
-
# print("Registered expenses:", expenses) # Can be long
|
609 |
-
if nlp is None:
|
610 |
-
print("WARNING: spaCy model failed to load. /process_nlp endpoint will return errors.")
|
611 |
-
app.run(debug=True, host='0.0.0.0', port=5001)
|
|
|
6 |
from collections import defaultdict
|
7 |
import logging
|
8 |
import os # To handle potential model loading issues
|
9 |
+
import requests # Add requests for API calls
|
10 |
+
import json # For handling JSON data
|
11 |
+
import os # Already imported, needed for API key
|
12 |
|
13 |
# --- Setup ---
|
14 |
logging.basicConfig(level=logging.INFO)
|
|
|
563 |
}
|
564 |
|
565 |
else: # intent == "unknown"
|
566 |
+
logging.info(f"Local NLP intent unknown for: {text}. Attempting Gemini API call.")
|
567 |
+
|
568 |
+
# --- Call Gemini API ---
|
569 |
+
gemini_result = call_gemini_api(text, GEMINI_API_KEY)
|
570 |
+
|
571 |
+
if gemini_result and isinstance(gemini_result, dict) and gemini_result.get("action") in ["add_expense", "query_expense", "info"]:
|
572 |
+
# If Gemini returned a structured result we can use (or an info message), return it
|
573 |
+
logging.info(f"Using result from Gemini API. Action: {gemini_result.get('action')}")
|
574 |
+
response_data = gemini_result
|
575 |
+
# TODO: Potentially re-validate or re-process gemini_result here if needed
|
576 |
+
# For example, if action is add_expense, ensure data types are correct, parse date string etc.
|
577 |
+
# If action is query_expense, parse date strings etc.
|
578 |
+
if response_data.get("action") == "add_expense" and "details" in response_data:
|
579 |
+
# Basic post-processing/validation for added expense
|
580 |
+
details = response_data["details"]
|
581 |
+
try:
|
582 |
+
if "date" in details and isinstance(details["date"], str):
|
583 |
+
details["date"] = datetime.datetime.fromisoformat(details["date"].split("T")[0]).date()
|
584 |
+
if "amount" in details:
|
585 |
+
details["amount"] = float(details["amount"])
|
586 |
+
# Add expense to memory if Gemini successfully added it
|
587 |
+
# Note: This assumes Gemini provides all necessary fields correctly
|
588 |
+
if all(k in details for k in ["amount", "currency", "category", "date"]):
|
589 |
+
global next_expense_id
|
590 |
+
new_expense = {
|
591 |
+
"id": next_expense_id,
|
592 |
+
"amount": details["amount"],
|
593 |
+
"currency": details.get("currency", "₹"),
|
594 |
+
"category": details.get("category", "Uncategorized"),
|
595 |
+
"merchant": details.get("merchant"),
|
596 |
+
"date": details["date"],
|
597 |
+
"original_message": text
|
598 |
+
}
|
599 |
+
expenses.append(new_expense)
|
600 |
+
next_expense_id += 1
|
601 |
+
logging.info(f"Added expense (from Gemini): {new_expense}")
|
602 |
+
# Update message for consistency
|
603 |
+
response_data["message"] = f"✅ Expense added (via Gemini): {new_expense['currency']}{new_expense['amount']:.2f} for {new_expense['category']} on {new_expense['date'].strftime('%b %d, %Y')}."
|
604 |
+
# Make details serializable for JSON response
|
605 |
+
response_data["details"]["date"] = response_data["details"]["date"].isoformat()
|
606 |
+
else:
|
607 |
+
logging.warning("Gemini add_expense result missing required fields.")
|
608 |
+
response_data = {"action": "unknown", "status": "failed", "message": "Gemini suggested adding an expense, but details were incomplete."}
|
609 |
+
|
610 |
+
except (ValueError, TypeError) as e:
|
611 |
+
logging.warning(f"Error processing Gemini add_expense details: {e}")
|
612 |
+
response_data = {"action": "unknown", "status": "failed", "message": "Could not process expense details suggested by Gemini."}
|
613 |
+
|
614 |
+
elif response_data.get("action") == "query_expense" and "criteria" in response_data:
|
615 |
+
# Basic post-processing for query
|
616 |
+
criteria = response_data["criteria"]
|
617 |
+
try:
|
618 |
+
if "start_date" in criteria and isinstance(criteria["start_date"], str):
|
619 |
+
criteria["start_date"] = datetime.datetime.fromisoformat(criteria["start_date"].split("T")[0]).date()
|
620 |
+
if "end_date" in criteria and isinstance(criteria["end_date"], str):
|
621 |
+
criteria["end_date"] = datetime.datetime.fromisoformat(criteria["end_date"].split("T")[0]).date()
|
622 |
+
# Execute the query based on Gemini's criteria
|
623 |
+
results = filter_expenses(criteria)
|
624 |
+
# Use Gemini's message or generate a new one
|
625 |
+
if not response_data.get("message"):
|
626 |
+
response_data["message"] = format_expense_list(results, "Query results (via Gemini):")
|
627 |
+
response_data["results_count"] = len(results)
|
628 |
+
# Make criteria serializable
|
629 |
+
response_data["criteria"] = {k: v.isoformat() if isinstance(v, datetime.date) else v for k, v in criteria.items() if v is not None}
|
630 |
+
|
631 |
+
except (ValueError, TypeError) as e:
|
632 |
+
logging.warning(f"Error processing Gemini query_expense criteria: {e}")
|
633 |
+
response_data = {"action": "unknown", "status": "failed", "message": "Could not process query criteria suggested by Gemini."}
|
634 |
+
|
635 |
+
else:
|
636 |
+
# Fallback to original unknown message if Gemini fails or returns unusable data
|
637 |
+
logging.info("Gemini API did not provide a usable structured result. Falling back to default unknown message.")
|
638 |
+
response_data = {
|
639 |
+
"action": "unknown",
|
640 |
+
"status": "failed",
|
641 |
+
"message": "Sorry, I couldn't quite understand that. Please try phrasing your expense or query differently. \nExamples:\n- 'Spent ₹50 on coffee yesterday at Starbucks'\n- 'Show my food expenses last week'\n- 'What was my total spending last month?'"
|
642 |
+
}
|
643 |
+
# Optionally include Gemini's raw suggestion if available and not structured
|
644 |
+
if gemini_result and isinstance(gemini_result, dict) and "message" in gemini_result:
|
645 |
+
response_data["message"] += f"\n\nGemini suggestion: {gemini_result['message']}"
|
646 |
|
647 |
+
logging.info(f"Analysis complete. Action: {response_data.get('action')}, Status: {response_data.get('status']}")
|
648 |
return response_data
|
649 |
|
650 |
|
651 |
+
# Placeholder for Gemini API Key - Load from environment variable
|
652 |
+
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
653 |
+
|
654 |
+
# Placeholder function for Gemini API call
|
655 |
+
def call_gemini_api(text, api_key):
|
656 |
+
"""
|
657 |
+
Placeholder function to call the Gemini API.
|
658 |
+
Replace with actual implementation.
|
659 |
+
Should ideally return a dictionary similar to analyze_expense_text's output
|
660 |
+
or None if the call fails or response is unusable.
|
661 |
+
"""
|
662 |
+
if not api_key:
|
663 |
+
logging.warning("GEMINI_API_KEY not set. Skipping Gemini API call.")
|
664 |
+
return None
|
665 |
+
|
666 |
+
# --- Replace with actual Gemini API endpoint and request structure ---
|
667 |
+
# Example using Google AI Generative Language API (adjust model and endpoint as needed)
|
668 |
+
# Ensure you have the google-generativeai library installed (`pip install google-generativeai`)
|
669 |
+
# and the API key is correctly set as an environment variable.
|
670 |
+
api_endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={api_key}"
|
671 |
+
headers = {
|
672 |
+
"Content-Type": "application/json"
|
673 |
+
}
|
674 |
+
# Construct the payload based on Gemini API requirements
|
675 |
+
# This prompt asks Gemini to act like the existing NLP service
|
676 |
+
prompt = f"""Analyze the following text for expense tracking. Determine the intent ('add_expense' or 'query_expense') and extract relevant details.
|
677 |
+
|
678 |
+
Text: "{text}"
|
679 |
+
|
680 |
+
Desired JSON output format:
|
681 |
+
{{
|
682 |
+
"action": "add_expense" | "query_expense",
|
683 |
+
"status": "success" | "failed",
|
684 |
+
"message": "Confirmation or result summary",
|
685 |
+
"details": {{ // Only for add_expense
|
686 |
+
"amount": <float>,
|
687 |
+
"currency": "<string>",
|
688 |
+
"category": "<string>",
|
689 |
+
"merchant": "<string>",
|
690 |
+
"date": "YYYY-MM-DD"
|
691 |
+
}},
|
692 |
+
"criteria": {{ // Only for query_expense
|
693 |
+
"category": "<string>",
|
694 |
+
"merchant": "<string>",
|
695 |
+
"start_date": "YYYY-MM-DD",
|
696 |
+
"end_date": "YYYY-MM-DD"
|
697 |
+
}}
|
698 |
+
}}
|
699 |
+
|
700 |
+
If the intent is unclear or details are missing, set action to "unknown" and status to "failed".
|
701 |
+
Provide only the JSON output.
|
702 |
+
"""
|
703 |
+
|
704 |
+
payload = json.dumps({
|
705 |
+
"contents": [{
|
706 |
+
"parts":[{ "text": prompt }]
|
707 |
+
}]
|
708 |
+
# Add generationConfig if needed (e.g., temperature, max output tokens)
|
709 |
+
# "generationConfig": {
|
710 |
+
# "temperature": 0.7,
|
711 |
+
# "maxOutputTokens": 256
|
712 |
+
# }
|
713 |
+
})
|
714 |
+
# --- End of placeholder section ---
|
715 |
+
|
716 |
+
try:
|
717 |
+
response = requests.post(api_endpoint, headers=headers, data=payload, timeout=20) # Increased timeout
|
718 |
+
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
719 |
+
|
720 |
+
gemini_response_raw = response.json()
|
721 |
+
logging.debug(f"Raw Gemini API response: {gemini_response_raw}")
|
722 |
+
|
723 |
+
# --- Process gemini_response ---
|
724 |
+
# Extract the text content which should contain the JSON
|
725 |
+
if 'candidates' in gemini_response_raw and len(gemini_response_raw['candidates']) > 0:
|
726 |
+
content = gemini_response_raw['candidates'][0].get('content', {}).get('parts', [{}])[0].get('text')
|
727 |
+
if content:
|
728 |
+
logging.info(f"Gemini suggested JSON: {content}")
|
729 |
+
# Clean potential markdown/code block formatting
|
730 |
+
content_cleaned = content.strip().strip('```json').strip('```').strip()
|
731 |
+
try:
|
732 |
+
# Attempt to parse the JSON string from Gemini
|
733 |
+
parsed_result = json.loads(content_cleaned)
|
734 |
+
# Basic validation of the parsed structure
|
735 |
+
if isinstance(parsed_result, dict) and "action" in parsed_result:
|
736 |
+
logging.info("Successfully parsed structured data from Gemini.")
|
737 |
+
# Add further validation/sanitization if needed
|
738 |
+
return parsed_result
|
739 |
+
else:
|
740 |
+
logging.warning("Gemini response parsed but lacks expected structure.")
|
741 |
+
return {"action": "info", "status": "success", "message": f"Gemini suggestion: {content_cleaned}"}
|
742 |
+
except json.JSONDecodeError as json_err:
|
743 |
+
logging.warning(f"Failed to decode JSON from Gemini response: {json_err}. Raw content: {content_cleaned}")
|
744 |
+
# Return the raw text as a message if JSON parsing fails
|
745 |
+
return {"action": "info", "status": "success", "message": f"Gemini suggestion: {content_cleaned}"}
|
746 |
+
else:
|
747 |
+
logging.warning("No text content found in Gemini response candidates.")
|
748 |
+
return None
|
749 |
+
else:
|
750 |
+
logging.warning("No candidates found in Gemini API response.")
|
751 |
+
return None
|
752 |
+
|
753 |
+
except requests.exceptions.Timeout:
|
754 |
+
logging.error("Gemini API call timed out.")
|
755 |
+
return None
|
756 |
+
except requests.exceptions.RequestException as e:
|
757 |
+
logging.error(f"Gemini API call failed: {e}")
|
758 |
+
# Log response body if available and indicates an API error
|
759 |
+
if e.response is not None:
|
760 |
+
try:
|
761 |
+
logging.error(f"Gemini API error response: {e.response.json()}")
|
762 |
+
except json.JSONDecodeError:
|
763 |
+
logging.error(f"Gemini API error response (non-JSON): {e.response.text}")
|
764 |
+
return None
|
765 |
+
except Exception as e:
|
766 |
+
logging.error(f"An unexpected error occurred during Gemini API call or processing: {e}")
|
767 |
+
return None
|
768 |
+
|
769 |
+
|
770 |
# --- Flask Blueprint Setup (Optional: Keep if direct API access is needed) ---
|
771 |
nlp_bp = Blueprint('nlp_service', __name__)
|
772 |
|
|
|
788 |
if result.get("message") == "NLP model not available":
|
789 |
status_code = 500
|
790 |
|
791 |
+
return jsonify(result), status_code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|