MonilM commited on
Commit
6c095fc
·
1 Parent(s): 4c9f681

Added NLP#2

Browse files
Files changed (1) hide show
  1. nlp_service.py +204 -24
nlp_service.py CHANGED
@@ -6,6 +6,9 @@ from flask import Blueprint, request, jsonify
6
  from collections import defaultdict
7
  import logging
8
  import os # To handle potential model loading issues
 
 
 
9
 
10
  # --- Setup ---
11
  logging.basicConfig(level=logging.INFO)
@@ -560,17 +563,210 @@ def analyze_expense_text(text):
560
  }
561
 
562
  else: # intent == "unknown"
563
- logging.info(f"Could not determine intent for: {text}")
564
- response_data = {
565
- "action": "unknown",
566
- "status": "failed",
567
- "message": "Sorry, I couldn't quite understand that. Please try phrasing your expense or query differently. \nExamples:\n- 'Spent ₹50 on coffee yesterday at Starbucks'\n- 'Show my food expenses last week'\n- 'What was my total spending last month?'"
568
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
 
570
- logging.info(f"Analysis complete. Action: {response_data.get('action')}, Status: {response_data.get('status')}")
571
  return response_data
572
 
573
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
  # --- Flask Blueprint Setup (Optional: Keep if direct API access is needed) ---
575
  nlp_bp = Blueprint('nlp_service', __name__)
576
 
@@ -592,20 +788,4 @@ def process_nlp_expense_route():
592
  if result.get("message") == "NLP model not available":
593
  status_code = 500
594
 
595
- return jsonify(result), status_code
596
-
597
-
598
- # --- Example Usage / Testing Setup ---
599
- if __name__ == '__main__':
600
- from flask import Flask
601
-
602
- app = Flask(__name__)
603
- app.register_blueprint(nlp_bp) # Register the blueprint
604
-
605
- # Dummy data removed
606
-
607
- print("Starting Flask server for testing NLP service...")
608
- # print("Registered expenses:", expenses) # Can be long
609
- if nlp is None:
610
- print("WARNING: spaCy model failed to load. /process_nlp endpoint will return errors.")
611
- app.run(debug=True, host='0.0.0.0', port=5001)
 
6
  from collections import defaultdict
7
  import logging
8
  import os # To handle potential model loading issues
9
+ import requests # Add requests for API calls
10
+ import json # For handling JSON data
11
+ import os # Already imported, needed for API key
12
 
13
  # --- Setup ---
14
  logging.basicConfig(level=logging.INFO)
 
563
  }
564
 
565
  else: # intent == "unknown"
566
+ logging.info(f"Local NLP intent unknown for: {text}. Attempting Gemini API call.")
567
+
568
+ # --- Call Gemini API ---
569
+ gemini_result = call_gemini_api(text, GEMINI_API_KEY)
570
+
571
+ if gemini_result and isinstance(gemini_result, dict) and gemini_result.get("action") in ["add_expense", "query_expense", "info"]:
572
+ # If Gemini returned a structured result we can use (or an info message), return it
573
+ logging.info(f"Using result from Gemini API. Action: {gemini_result.get('action')}")
574
+ response_data = gemini_result
575
+ # TODO: Potentially re-validate or re-process gemini_result here if needed
576
+ # For example, if action is add_expense, ensure data types are correct, parse date string etc.
577
+ # If action is query_expense, parse date strings etc.
578
+ if response_data.get("action") == "add_expense" and "details" in response_data:
579
+ # Basic post-processing/validation for added expense
580
+ details = response_data["details"]
581
+ try:
582
+ if "date" in details and isinstance(details["date"], str):
583
+ details["date"] = datetime.datetime.fromisoformat(details["date"].split("T")[0]).date()
584
+ if "amount" in details:
585
+ details["amount"] = float(details["amount"])
586
+ # Add expense to memory if Gemini successfully added it
587
+ # Note: This assumes Gemini provides all necessary fields correctly
588
+ if all(k in details for k in ["amount", "currency", "category", "date"]):
589
+ global next_expense_id
590
+ new_expense = {
591
+ "id": next_expense_id,
592
+ "amount": details["amount"],
593
+ "currency": details.get("currency", "₹"),
594
+ "category": details.get("category", "Uncategorized"),
595
+ "merchant": details.get("merchant"),
596
+ "date": details["date"],
597
+ "original_message": text
598
+ }
599
+ expenses.append(new_expense)
600
+ next_expense_id += 1
601
+ logging.info(f"Added expense (from Gemini): {new_expense}")
602
+ # Update message for consistency
603
+ response_data["message"] = f"✅ Expense added (via Gemini): {new_expense['currency']}{new_expense['amount']:.2f} for {new_expense['category']} on {new_expense['date'].strftime('%b %d, %Y')}."
604
+ # Make details serializable for JSON response
605
+ response_data["details"]["date"] = response_data["details"]["date"].isoformat()
606
+ else:
607
+ logging.warning("Gemini add_expense result missing required fields.")
608
+ response_data = {"action": "unknown", "status": "failed", "message": "Gemini suggested adding an expense, but details were incomplete."}
609
+
610
+ except (ValueError, TypeError) as e:
611
+ logging.warning(f"Error processing Gemini add_expense details: {e}")
612
+ response_data = {"action": "unknown", "status": "failed", "message": "Could not process expense details suggested by Gemini."}
613
+
614
+ elif response_data.get("action") == "query_expense" and "criteria" in response_data:
615
+ # Basic post-processing for query
616
+ criteria = response_data["criteria"]
617
+ try:
618
+ if "start_date" in criteria and isinstance(criteria["start_date"], str):
619
+ criteria["start_date"] = datetime.datetime.fromisoformat(criteria["start_date"].split("T")[0]).date()
620
+ if "end_date" in criteria and isinstance(criteria["end_date"], str):
621
+ criteria["end_date"] = datetime.datetime.fromisoformat(criteria["end_date"].split("T")[0]).date()
622
+ # Execute the query based on Gemini's criteria
623
+ results = filter_expenses(criteria)
624
+ # Use Gemini's message or generate a new one
625
+ if not response_data.get("message"):
626
+ response_data["message"] = format_expense_list(results, "Query results (via Gemini):")
627
+ response_data["results_count"] = len(results)
628
+ # Make criteria serializable
629
+ response_data["criteria"] = {k: v.isoformat() if isinstance(v, datetime.date) else v for k, v in criteria.items() if v is not None}
630
+
631
+ except (ValueError, TypeError) as e:
632
+ logging.warning(f"Error processing Gemini query_expense criteria: {e}")
633
+ response_data = {"action": "unknown", "status": "failed", "message": "Could not process query criteria suggested by Gemini."}
634
+
635
+ else:
636
+ # Fallback to original unknown message if Gemini fails or returns unusable data
637
+ logging.info("Gemini API did not provide a usable structured result. Falling back to default unknown message.")
638
+ response_data = {
639
+ "action": "unknown",
640
+ "status": "failed",
641
+ "message": "Sorry, I couldn't quite understand that. Please try phrasing your expense or query differently. \nExamples:\n- 'Spent ₹50 on coffee yesterday at Starbucks'\n- 'Show my food expenses last week'\n- 'What was my total spending last month?'"
642
+ }
643
+ # Optionally include Gemini's raw suggestion if available and not structured
644
+ if gemini_result and isinstance(gemini_result, dict) and "message" in gemini_result:
645
+ response_data["message"] += f"\n\nGemini suggestion: {gemini_result['message']}"
646
 
647
+ logging.info(f"Analysis complete. Action: {response_data.get('action')}, Status: {response_data.get('status']}")
648
  return response_data
649
 
650
 
651
+ # Placeholder for Gemini API Key - Load from environment variable
652
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
653
+
654
+ # Placeholder function for Gemini API call
655
+ def call_gemini_api(text, api_key):
656
+ """
657
+ Placeholder function to call the Gemini API.
658
+ Replace with actual implementation.
659
+ Should ideally return a dictionary similar to analyze_expense_text's output
660
+ or None if the call fails or response is unusable.
661
+ """
662
+ if not api_key:
663
+ logging.warning("GEMINI_API_KEY not set. Skipping Gemini API call.")
664
+ return None
665
+
666
+ # --- Replace with actual Gemini API endpoint and request structure ---
667
+ # Example using Google AI Generative Language API (adjust model and endpoint as needed)
668
+ # Ensure you have the google-generativeai library installed (`pip install google-generativeai`)
669
+ # and the API key is correctly set as an environment variable.
670
+ api_endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={api_key}"
671
+ headers = {
672
+ "Content-Type": "application/json"
673
+ }
674
+ # Construct the payload based on Gemini API requirements
675
+ # This prompt asks Gemini to act like the existing NLP service
676
+ prompt = f"""Analyze the following text for expense tracking. Determine the intent ('add_expense' or 'query_expense') and extract relevant details.
677
+
678
+ Text: "{text}"
679
+
680
+ Desired JSON output format:
681
+ {{
682
+ "action": "add_expense" | "query_expense",
683
+ "status": "success" | "failed",
684
+ "message": "Confirmation or result summary",
685
+ "details": {{ // Only for add_expense
686
+ "amount": <float>,
687
+ "currency": "<string>",
688
+ "category": "<string>",
689
+ "merchant": "<string>",
690
+ "date": "YYYY-MM-DD"
691
+ }},
692
+ "criteria": {{ // Only for query_expense
693
+ "category": "<string>",
694
+ "merchant": "<string>",
695
+ "start_date": "YYYY-MM-DD",
696
+ "end_date": "YYYY-MM-DD"
697
+ }}
698
+ }}
699
+
700
+ If the intent is unclear or details are missing, set action to "unknown" and status to "failed".
701
+ Provide only the JSON output.
702
+ """
703
+
704
+ payload = json.dumps({
705
+ "contents": [{
706
+ "parts":[{ "text": prompt }]
707
+ }]
708
+ # Add generationConfig if needed (e.g., temperature, max output tokens)
709
+ # "generationConfig": {
710
+ # "temperature": 0.7,
711
+ # "maxOutputTokens": 256
712
+ # }
713
+ })
714
+ # --- End of placeholder section ---
715
+
716
+ try:
717
+ response = requests.post(api_endpoint, headers=headers, data=payload, timeout=20) # Increased timeout
718
+ response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
719
+
720
+ gemini_response_raw = response.json()
721
+ logging.debug(f"Raw Gemini API response: {gemini_response_raw}")
722
+
723
+ # --- Process gemini_response ---
724
+ # Extract the text content which should contain the JSON
725
+ if 'candidates' in gemini_response_raw and len(gemini_response_raw['candidates']) > 0:
726
+ content = gemini_response_raw['candidates'][0].get('content', {}).get('parts', [{}])[0].get('text')
727
+ if content:
728
+ logging.info(f"Gemini suggested JSON: {content}")
729
+ # Clean potential markdown/code block formatting
730
+ content_cleaned = content.strip().strip('```json').strip('```').strip()
731
+ try:
732
+ # Attempt to parse the JSON string from Gemini
733
+ parsed_result = json.loads(content_cleaned)
734
+ # Basic validation of the parsed structure
735
+ if isinstance(parsed_result, dict) and "action" in parsed_result:
736
+ logging.info("Successfully parsed structured data from Gemini.")
737
+ # Add further validation/sanitization if needed
738
+ return parsed_result
739
+ else:
740
+ logging.warning("Gemini response parsed but lacks expected structure.")
741
+ return {"action": "info", "status": "success", "message": f"Gemini suggestion: {content_cleaned}"}
742
+ except json.JSONDecodeError as json_err:
743
+ logging.warning(f"Failed to decode JSON from Gemini response: {json_err}. Raw content: {content_cleaned}")
744
+ # Return the raw text as a message if JSON parsing fails
745
+ return {"action": "info", "status": "success", "message": f"Gemini suggestion: {content_cleaned}"}
746
+ else:
747
+ logging.warning("No text content found in Gemini response candidates.")
748
+ return None
749
+ else:
750
+ logging.warning("No candidates found in Gemini API response.")
751
+ return None
752
+
753
+ except requests.exceptions.Timeout:
754
+ logging.error("Gemini API call timed out.")
755
+ return None
756
+ except requests.exceptions.RequestException as e:
757
+ logging.error(f"Gemini API call failed: {e}")
758
+ # Log response body if available and indicates an API error
759
+ if e.response is not None:
760
+ try:
761
+ logging.error(f"Gemini API error response: {e.response.json()}")
762
+ except json.JSONDecodeError:
763
+ logging.error(f"Gemini API error response (non-JSON): {e.response.text}")
764
+ return None
765
+ except Exception as e:
766
+ logging.error(f"An unexpected error occurred during Gemini API call or processing: {e}")
767
+ return None
768
+
769
+
770
  # --- Flask Blueprint Setup (Optional: Keep if direct API access is needed) ---
771
  nlp_bp = Blueprint('nlp_service', __name__)
772
 
 
788
  if result.get("message") == "NLP model not available":
789
  status_code = 500
790
 
791
+ return jsonify(result), status_code