# filepath: c:\Users\Dell\Monil\Apps\code\Projects\space-songporter\OCR\nlp_service.py import json from model_setup import zero_shot, ner # Assuming model_setup.py exists and is correct from utils import parse_entities # Assuming utils.py exists and is correct from config import CATEGORY_KEYWORDS, QUERY_KEYWORDS # Import categories and query keywords from config def analyze_text(text: str) -> dict: """ Analyzes the input text for intent, entities, and category. Args: text: The input text string. Returns: A dictionary containing the analysis results (intent, category, amount, etc.) or an error message. """ if not text: return { "status": "failed", "message": "Input text cannot be empty." } print(f"NLP Service: Processing text: {text}") # Step 1: Intent classification try: candidate_labels = ["expense", "investment", "query", "limit-setting", "income", "other"] intent_result = zero_shot(text, candidate_labels=candidate_labels) intent = intent_result["labels"][0] score = intent_result["scores"][0] print(f"NLP Service: Intent classification: {intent} (Score: {score:.2f})") except Exception as e: print(f"NLP Service: Error during intent classification: {e}") return { "status": "failed", "message": "Intent classification failed", "error": str(e) } # Step 2: Check for Query Keywords or Query Intent text_lower = text.lower() is_query_keyword_present = any(kw in text_lower for kw in QUERY_KEYWORDS) if intent == "query" or is_query_keyword_present: if is_query_keyword_present and intent != "query": print(f"NLP Service: Query keyword detected, overriding initial intent '{intent}'. Fallback triggered.") else: print(f"NLP Service: Intent classified as '{intent}' or query keyword found. Fallback route triggered.") # Return fallback status return { "status": "fallback_required", "message": "Intent requires further processing (query detected).", "original_text": text, "classified_intent": "query" # Standardize to query if fallback is triggered } # Step 3: Entity extraction (for non-fallback intents) try: entities = ner(text) print(f"NLP Service: NER entities: {entities}") # --- FIX: Pass the original 'text' as 'full_text' --- amount, currency, item = parse_entities(entities, full_text=text) print(f"NLP Service: Parsed entities: Amount={amount}, Currency={currency}, Item={item}") except Exception as e: print(f"NLP Service: Error during entity extraction: {e}") # Decide if you want to return an error or proceed with partial data amount, currency, item = None, None, None # Default to None on error # Optionally, log the traceback for debugging import traceback traceback.print_exc() # Step 4: Category matching using config.py category = "Misc" # Default item_lower = item.lower() if item else "" # Check intent first for Income/Investment categories if intent == "income": category = "Income" elif intent == "investment": category = "Investment" else: # Only check keywords if not already classified as Income/Investment by intent for cat, keywords in CATEGORY_KEYWORDS.items(): # Skip Income/Investment keywords here as intent handles them if cat in ["Income", "Investment"]: continue if any(kw in text_lower or (item_lower and kw in item_lower) for kw in keywords): category = cat break # Stop after first match # Refine intent based on keywords if initial classification was 'other' or potentially wrong if intent != "income" and category == "Income": print(f"NLP Service: Correcting intent to 'income' based on keywords/category.") intent = "income" elif intent != "investment" and category == "Investment": print(f"NLP Service: Correcting intent to 'investment' based on keywords/category.") intent = "investment" # If no specific category matched but intent is expense/other, ensure category isn't Income/Investment elif category in ["Income", "Investment"] and intent not in ["income", "investment"]: category = "Misc" # Revert category if intent doesn't match print(f"NLP Service: Assigned category: {category}") # Final successful response structure return { "status": "success", "type": intent, "category": category, "amount": amount, "currency": currency, "item": item } # Example usage (for testing nlp_service.py directly) if __name__ == '__main__': test_cases = [ "spent 5 eur on coffee", "how much did I spend last month", "salary credited 50000", "invested 1000 in stocks", "paid 20 usd for lunch", "got groceries for 50 dollars", "what was my total spending on food?", "received 200 GBP deposit" ] for case in test_cases: print(f"\n--- Testing: '{case}' ---") result = analyze_text(case) print(json.dumps(result, indent=2))