Ninad077 commited on
Commit
d4deb6e
·
verified ·
1 Parent(s): 8ac32ba

Upload 2 files

Browse files
Files changed (2) hide show
  1. bot.py +188 -0
  2. patterns.py +24 -0
bot.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import subprocess
3
+ import os
4
+ import re
5
+ import time
6
+ import base64
7
+ from patterns import count_patterns, list_patterns, pdf_request_patterns, greetings,farewell # Import patterns
8
+
9
+ # Base GCS path and local download path
10
+ BASE_GCS_PATH = "gs://fynd-assets-private/documents/daytrader/"
11
+ LOCAL_DOWNLOAD_PATH = "/Users/ninadmandavkar/Desktop/daytrader/08-2024/"
12
+
13
+ def fetch_pdf_from_gcs(pdf_name):
14
+ gcs_search_path = f"{BASE_GCS_PATH}**/*{pdf_name}*.pdf"
15
+ result = subprocess.run(
16
+ ["gsutil", "-m", "cp", "-r", gcs_search_path, LOCAL_DOWNLOAD_PATH],
17
+ capture_output=True,
18
+ text=True
19
+ )
20
+ return result
21
+
22
+ def count_invoices_in_month(month_year):
23
+ gcs_search_path = f"{BASE_GCS_PATH}PDFs/**/*{month_year}*.pdf"
24
+ result = subprocess.run(
25
+ ["gsutil", "ls", gcs_search_path],
26
+ capture_output=True,
27
+ text=True
28
+ )
29
+ if result.returncode == 0:
30
+ files = [os.path.basename(line)[:-4] for line in result.stdout.strip().split("\n") if line] # Remove .pdf extension
31
+ return len(files), files
32
+ else:
33
+ return None, []
34
+
35
+ def extract_pdf_id(text):
36
+ match = re.search(r'([A-Z]{2}-[A-Z]-[A-Z0-9]+-FY\d{2})', text)
37
+ return match.group(0) if match else None
38
+
39
+ def month_to_str(month):
40
+ month_map = {
41
+ "january": "01", "february": "02", "march": "03", "april": "04",
42
+ "may": "05", "june": "06", "july": "07", "august": "08",
43
+ "september": "09", "october": "10", "november": "11", "december": "12"
44
+ }
45
+ if month.isdigit() and 1 <= int(month) <= 12:
46
+ return f"{int(month):02d}-2024" # Assume the year is 2024 for this context
47
+ return month_map.get(month.lower(), None)
48
+
49
+ def typing_effect(text):
50
+ typed_text_placeholder = st.empty()
51
+ styled_text = f"""
52
+ <span style="background-image: linear-gradient(to right, #800000, #ff0000);
53
+ -webkit-background-clip: text;
54
+ -webkit-text-fill-color: transparent;">
55
+ {text}
56
+ </span>
57
+ """
58
+ for i in range(len(text) + 1):
59
+ typed_text_placeholder.markdown(styled_text.replace(text, text[:i]), unsafe_allow_html=True)
60
+ time.sleep(0.018)
61
+ typed_text_placeholder.markdown(styled_text, unsafe_allow_html=True)
62
+
63
+ def chatbot_response(user_input):
64
+
65
+ lower_input = user_input.lower()
66
+
67
+ if any(greet in lower_input for greet in greetings):
68
+ return "Hello. Fynder here. How can I help you today?"
69
+
70
+ elif any(fare in lower_input for fare in farewell):
71
+ return "Goodbye! Let me know if you need help again."
72
+
73
+ # Check for PDF requests using imported patterns
74
+ for pattern in pdf_request_patterns:
75
+ pdf_request_match = re.search(pattern, lower_input)
76
+ if pdf_request_match:
77
+ pdf_id = pdf_request_match.group(1)
78
+ return f"Looking for PDF for invoice '{pdf_id}'. Please wait...", pdf_id
79
+
80
+ # Check for count requests using imported patterns
81
+ for pattern in count_patterns:
82
+ month_year_match = re.search(pattern, lower_input)
83
+ if month_year_match:
84
+ month_year = month_year_match.group(1)
85
+ if len(month_year.split()) == 2:
86
+ month_str = month_to_str(month_year.split()[0])
87
+ if month_str:
88
+ month_year = f"{month_str}-{month_year.split()[1]}"
89
+ count, _ = count_invoices_in_month(month_year)
90
+ if count is not None:
91
+ return f"There are {count} invoices generated in {month_year}."
92
+ else:
93
+ return "I encountered an error while fetching the invoice count. Please try again later."
94
+
95
+ # Check for list requests using imported patterns
96
+ for pattern in list_patterns:
97
+ list_match = re.search(pattern, lower_input)
98
+ if list_match:
99
+ month_year = list_match.group(1)
100
+ if len(month_year.split()) == 2:
101
+ month_str = month_to_str(month_year.split()[0])
102
+ if month_str:
103
+ month_year = f"{month_str}-{month_year.split()[1]}"
104
+ _, files = count_invoices_in_month(month_year)
105
+ if files:
106
+ return [f"**{file}**" for file in files] # Use markdown bold for emphasis
107
+ else:
108
+ return "No invoices found for that month."
109
+
110
+ pdf_id = extract_pdf_id(user_input)
111
+ if pdf_id:
112
+ if "when" in lower_input and "created" in lower_input:
113
+ return f"Checking creation date for '{pdf_id}'. Please wait...", pdf_id, True
114
+ else:
115
+ return f"Looking for '{pdf_id}' in GCS fynd prod 393805 bucket. Please wait...", pdf_id
116
+
117
+ return "I didn't quite understand that ;("
118
+
119
+ def download_pdf(file_path):
120
+ """Reads a PDF file and returns its content in Base64 format."""
121
+ with open(file_path, "rb") as f:
122
+ pdf_data = f.read()
123
+ return base64.b64encode(pdf_data).decode('utf-8') # Encode to Base64 and decode to string
124
+
125
+ def main():
126
+ st.markdown(
127
+ """
128
+ <h1 style="
129
+ background-image: linear-gradient(to right, #800000, #ff0000);
130
+ -webkit-background-clip: text;
131
+ -webkit-text-fill-color: transparent;
132
+ font-size: 3rem; /* Adjust size as needed */
133
+ text-align: center; /* Center the title */
134
+ ">
135
+ Fynder
136
+ </h1>
137
+ """,
138
+ unsafe_allow_html=True
139
+ )
140
+
141
+ user_input = st.text_input("", value="", help="Type your prompt here")
142
+
143
+ if user_input:
144
+ response = chatbot_response(user_input)
145
+
146
+ if isinstance(response, tuple):
147
+ bot_response, pdf_name = response
148
+ typing_effect(bot_response)
149
+
150
+ result = fetch_pdf_from_gcs(pdf_name)
151
+ if result.returncode == 0:
152
+ downloaded_files = [f for f in os.listdir(LOCAL_DOWNLOAD_PATH) if pdf_name in f and f.endswith('.pdf')]
153
+
154
+ if downloaded_files:
155
+ for pdf_file in downloaded_files:
156
+ file_path = os.path.join(LOCAL_DOWNLOAD_PATH, pdf_file) # Update this path as per your structure
157
+ pdf_data = download_pdf(file_path)
158
+
159
+ # Create a custom download button with gradient styling
160
+ download_button_html = f"""
161
+ <a href="data:application/pdf;base64,{pdf_data}" download="{pdf_file}"
162
+ style="
163
+ display: inline-block;
164
+ padding: 10px 20px;
165
+ color: white;
166
+ text-decoration: none;
167
+ border-radius: 20px;
168
+ background-image: linear-gradient(to right, #800000, #ff0000);
169
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
170
+ ">
171
+ Download {pdf_file}
172
+ </a>
173
+ """
174
+ st.markdown(download_button_html, unsafe_allow_html=True)
175
+ else:
176
+ typing_effect("No files matching that name were found.")
177
+ else:
178
+ typing_effect("There was an error fetching the PDF. Please check the name and try again.")
179
+ st.write(result.stderr)
180
+
181
+ elif isinstance(response, list): # When the response is a list of files
182
+ for pdf_file in response:
183
+ typing_effect(pdf_file) # Apply typing effect to each file
184
+ else:
185
+ typing_effect(response)
186
+
187
+ if __name__ == "__main__":
188
+ main()
patterns.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ count_patterns = [
2
+ r'count of invoices generated in (\w+ \d{4}|\d{1,2}-\d{4})',
3
+ r'invoice count for (\w+ \d{4}|\d{1,2}-\d{4})',
4
+ r'how many invoices in (\w+ \d{4}|\d{1,2}-\d{4})',
5
+ ]
6
+
7
+ # List of regex patterns for listing invoices
8
+ list_patterns = [
9
+ r'show me the list of invoices (present|generated) in (\w+ \d{4}|\d{1,2}-\d{4})',
10
+ r'list me the invoices in (\w+ \d{4}|\d{1,2}-\d{4})',
11
+ r'can you please show me the invoices in (\w+ \d{4}|\d{1,2}-\d{4})',
12
+ r'i need to check all the invoices in (\w+ \d{4}|\d{1,2}-\d{4})',
13
+ ]
14
+
15
+ pdf_request_patterns = [
16
+ r'give me the pdf for ([A-Z0-9\-]+)',
17
+ r'can you please find me the pdf for ([A-Z0-9\-]+)',
18
+ r'fetch me the pdf for ([A-Z0-9\-]+)',
19
+ r'please get the pdf for ([A-Z0-9\-]+)',
20
+ ]
21
+
22
+ greetings = ["hello", "hi", "hey", "greetings"]
23
+
24
+ farewell = ["bye", "goodbye", "see you"]