import re # --- NLP Configuration --- CURRENCY_SYMBOLS = ["₹", "$", "€", "£"] # Expand as needed # More robust regex to find monetary values even if spaCy misses MONEY entity # Added a group to capture standalone numbers potentially without currency symbols nearby FALLBACK_AMOUNT_REGEX = re.compile(r'([\$€£₹]|\b(?:rs|usd|eur|gbp))\s?([\d,]+(?:\.\d{1,2})?)\b|\b([\d,]+(?:\.\d{1,2})?)\s?([\$€£₹]|\b(?:rupees|rs|dollars|euros|pounds|usd|eur|gbp))\b|\b([\d,]+(?:\.\d{1,2})?)\b', re.IGNORECASE) # Consolidated Category Keywords CATEGORY_KEYWORDS = { "Coffee": ["coffee", "latte", "cappuccino", "starbucks", "cafe", "café", "espresso", "mocha", "ccd"], "Food": ["food", "meal", "lunch", "dinner", "snack", "restaurant", "dining", "sandwich", "burger", "pizza"], "Groceries": ["groceries", "supermarket", "vegetables", "milk", "market", "zepto", "blinkit", "bigbasket"], "Entertainment": ["movie", "cinema", "concert", "game", "netflix", "spotify", "tickets", "fun"], "Transport": ["travel", "taxi", "flight", "train", "bus", "uber", "ola", "fuel", "gas", "lyft", "cab", "ticket", "metro", "auto", "rickshaw", "commute"], # Combined Travel/Transport "Shopping": ["shop", "shopping", "clothes", "electronics", "mall", "amazon", "flipkart", "purchase", "order", "store"], "Utilities": ["utility", "utilities", "bill", "electricity", "water", "internet", "phone", "recharge"], "Rent": ["rent", "lease"], "Income": ["salary", "received", "credited", "deposit", "income"], # Added income keyword "Investment": ["invest", "stock", "shares", "mutual fund", "sip", "investment"], # Added investment keyword # "Misc" can be the default if no keywords match } # Keywords for intent detection (less critical if using zero-shot, but can be helpers) QUERY_KEYWORDS = ["how much", "show me", "list", "what are", "total", "summary", "spending", "history", "report", "biggest", "view"] ADD_EXPENSE_VERBS = ["spent", "bought", "paid", "cost", "charged", "expensed", "got", "had"] # Verbs often associated with spending