yashgori20 commited on
Commit
6243567
·
verified ·
1 Parent(s): c676ddb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +295 -374
app.py CHANGED
@@ -1,374 +1,295 @@
1
- import os
2
- import streamlit as st
3
- import pickle
4
- import faiss
5
- import pandas as pd
6
- from sentence_transformers import SentenceTransformer
7
- from groq import Groq
8
-
9
- # Set your Groq API Key (use environment variable for security)
10
- GROQ_API_KEY = "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" # Ensure this environment variable is set
11
- client = Groq(api_key=GROQ_API_KEY)
12
-
13
- # Load the embedding model
14
- model = SentenceTransformer('all-MiniLM-L6-v2')
15
-
16
- # Paths to your assets folder
17
- assets_folder = os.path.join(os.getcwd(), 'assets')
18
-
19
- # Function to load resources from local storage
20
- def load_resources():
21
- # Paths to index and chunk files
22
- industry_index_path = os.path.join( 'industry_index.faiss')
23
- industry_chunks_path = os.path.join( 'industry_chunks.pkl')
24
- circular_index_path = os.path.join( 'circular_index.faiss')
25
- circular_chunks_path = os.path.join( 'circular_chunks.pkl')
26
-
27
- # Check if the files exist
28
- if not all(os.path.exists(path) for path in [industry_index_path, industry_chunks_path, circular_index_path, circular_chunks_path]):
29
- st.error("FAISS indexes and chunk files not found in the assets folder. Please ensure they are present.")
30
- st.stop()
31
-
32
- # Load FAISS indexes and chunks
33
- industry_index = faiss.read_index(industry_index_path)
34
- with open(industry_chunks_path, 'rb') as f:
35
- industry_chunks = pickle.load(f)
36
- circular_index = faiss.read_index(circular_index_path)
37
- with open(circular_chunks_path, 'rb') as f:
38
- circular_chunks = pickle.load(f)
39
- return industry_index, industry_chunks, circular_index, circular_chunks
40
-
41
- # Prepare data
42
- industry_index, industry_chunks, circular_index, circular_chunks = load_resources()
43
-
44
- # Function to retrieve relevant chunks
45
- def retrieve_relevant_chunks(query, index, chunks, top_k=5):
46
- query_embedding = model.encode([query], convert_to_numpy=True)
47
- distances, indices = index.search(query_embedding, top_k)
48
- retrieved_chunks = [chunks[i] for i in indices[0]]
49
- return retrieved_chunks
50
-
51
- # Function for Circular Compliance (Problem Statement 2)
52
- def circular_compliance():
53
- st.header("Circular Compliance Assistant")
54
- user_query = st.text_area("Enter your scenario or question:", key='circular_input')
55
- if st.button("Check Compliance", key='circular_button'):
56
- if user_query:
57
- relevant_chunks = retrieve_relevant_chunks(user_query, circular_index, circular_chunks)
58
- context = "\n".join(relevant_chunks)
59
- prompt = f"""
60
- You are an expert RBI compliance analyst. Based on the provided RBI Master Circular on Management of Advances:
61
-
62
- {context}
63
-
64
- Please analyze the following scenario for compliance:
65
- {user_query}
66
-
67
- Provide a detailed compliance analysis with the following structure:
68
-
69
- 1. Compliance Status:
70
- - Clear statement whether the scenario is compliant or non-compliant
71
- - Level of certainty in the assessment
72
-
73
- 2. Relevant Circular Details:
74
- - Specific section(s) and paragraph references
75
- - Direct quotes from applicable sections where relevant
76
-
77
- 3. Detailed Analysis:
78
- - Breakdown of key compliance requirements
79
- - Calculation/numerical analysis if applicable
80
- - Specific points of compliance/non-compliance
81
-
82
- 4. Additional Considerations:
83
- - Related requirements or obligations
84
- - Monitoring/reporting requirements if applicable
85
-
86
- 5. Recommendation:
87
- - Clear guidance on what needs to be done for compliance
88
- - Specific steps to address any non-compliance
89
-
90
- Please provide definitive guidance based solely on the circular content, avoiding ambiguity or speculation.
91
-
92
- Response:
93
- """
94
- chat_completion = client.chat.completions.create(
95
- messages=[
96
- {'role': 'user', 'content': prompt}
97
- ],
98
- model="gemma2-9b-it",
99
- stream=False,
100
- temperature=0.0
101
- )
102
- response = chat_completion.choices[0].message.content.strip()
103
- st.write(response)
104
-
105
- # Function for Industry Classification (Problem Statement 3)
106
- def industry_classification():
107
- st.header("Industry Classification Assistant")
108
- user_keywords = st.text_input("Enter keywords related to the industry:", key='industry_input')
109
- if st.button("Get Industry Classification", key='industry_button'):
110
- if user_keywords:
111
- relevant_chunks = retrieve_relevant_chunks(user_keywords, industry_index, industry_chunks)
112
- context = "\n".join(relevant_chunks)
113
- prompt = f"""
114
- You are an assistant helping to classify industries based on keywords. Based on the following information:
115
-
116
- {context}
117
-
118
- User's Keywords:
119
- {user_keywords}
120
-
121
- Suggest the most appropriate industry classification codes. Ask any necessary follow-up questions to clarify if needed.
122
-
123
- Answer:
124
- """
125
- chat_completion = client.chat.completions.create(
126
- messages=[
127
- {'role': 'user', 'content': prompt}
128
- ],
129
- model="gemma2-9b-it",
130
- stream=False,
131
- temperature=0.0
132
- )
133
- response = chat_completion.choices[0].message.content.strip()
134
- st.write(response)
135
-
136
- # Existing calculation function (Problem Statement 1)
137
- def calculations():
138
- st.subheader("Calculation Methodology")
139
- calc_option = st.selectbox("Choose Calculation Method",
140
- ("Maximum Permissible Bank Finance (MPBF)", "Drawing Power (DP)"))
141
-
142
- if calc_option == "Maximum Permissible Bank Finance (MPBF)":
143
- st.header("MPBF Calculation")
144
- total_current_assets = st.number_input("Total Current Assets (TCA):", min_value=0.0, value=0.0)
145
- other_current_liabilities = st.number_input("Other Current Liabilities (OCL):", min_value=0.0, value=0.0)
146
- actual_nwc = st.number_input("Actual/Projected Net Working Capital (NWC):", min_value=0.0, value=0.0)
147
-
148
- if st.button("Calculate MPBF"):
149
- working_capital_gap = total_current_assets - other_current_liabilities
150
- minimum_stipulated_nwc = 0.25 * total_current_assets
151
- item_6 = working_capital_gap - minimum_stipulated_nwc
152
- item_7 = working_capital_gap - actual_nwc
153
- mpbf = min(item_6, item_7)
154
-
155
- st.success(f"Working Capital Gap (WCG): {working_capital_gap:.2f}")
156
- st.success(f"Minimum Stipulated NWC (25% of TCA): {minimum_stipulated_nwc:.2f}")
157
- st.success(f"Item 6 (WCG - Minimum Stipulated NWC): {item_6:.2f}")
158
- st.success(f"Item 7 (WCG - Actual NWC): {item_7:.2f}")
159
- st.success(f"Maximum Permissible Bank Finance (MPBF): {mpbf:.2f}")
160
-
161
- elif calc_option == "Drawing Power (DP)":
162
- st.header("DP Calculation")
163
- inventory_margin = 0.25
164
- receivables_margin = 0.40
165
- creditors_margin = 0.40
166
-
167
- st.subheader("Inventory Details")
168
- raw_material = st.number_input("Raw Material:", min_value=0.0, value=0.0)
169
- consumable_spares = st.number_input("Other Consumable Spares:", min_value=0.0, value=0.0)
170
- stock_in_process = st.number_input("Stock-in-process:", min_value=0.0, value=0.0)
171
- finished_goods = st.number_input("Finished Goods:", min_value=0.0, value=0.0)
172
-
173
- st.subheader("Receivables")
174
- domestic_receivables = st.number_input("Domestic Receivables:", min_value=0.0, value=0.0)
175
- export_receivables = st.number_input("Export Receivables:", min_value=0.0, value=0.0)
176
-
177
- st.subheader("Creditors")
178
- creditors = st.number_input("Creditors:", min_value=0.0, value=0.0)
179
-
180
- if st.button("Calculate DP"):
181
- inventory_total = raw_material + consumable_spares + stock_in_process + finished_goods
182
- inventory_advance = inventory_total * (1 - inventory_margin)
183
- receivables_total = domestic_receivables + export_receivables
184
- receivables_advance = receivables_total * (1 - receivables_margin)
185
- creditors_advance = creditors * (1 - creditors_margin)
186
- total_A = inventory_advance + receivables_advance
187
- total_B = creditors_advance
188
- dp = total_A - total_B
189
-
190
- st.success(f"Total Inventory (After Margin): {inventory_advance:.2f}")
191
- st.success(f"Total Receivables (After Margin): {receivables_advance:.2f}")
192
- st.success(f"Total (A): {total_A:.2f}")
193
- st.success(f"Creditors (After Margin): {total_B:.2f}")
194
- st.success(f"Drawing Power (DP): {dp:.2f}")
195
-
196
- # Function for Model 1 chat interface
197
- def run_model1_chat():
198
- st.header("Model 1 Chat Interface")
199
-
200
- if 'chat_history' not in st.session_state:
201
- st.session_state['chat_history'] = []
202
-
203
- user_input = st.text_input("You:", key="model1_input")
204
-
205
- if st.button("Send", key='model1_send'):
206
- if user_input:
207
- st.session_state.chat_history.append(("User", user_input))
208
-
209
- try:
210
- # Get model response
211
- chat_completion = client.chat.completions.create(
212
- messages=[
213
- {'role': 'user', 'content': user_input}
214
- ],
215
- model="gemma2-9b-it",
216
- stream=False,
217
- temperature=0.0
218
- )
219
- response = chat_completion.choices[0].message.content.strip()
220
- st.session_state.chat_history.append(("Model", response))
221
- except Exception as e:
222
- st.error(f"An error occurred: {e}")
223
- st.error("Please check your API key and model availability.")
224
-
225
- # Display chat history
226
- for speaker, message in st.session_state.chat_history:
227
- if speaker == "User":
228
- st.markdown(f"**You:** {message}")
229
- else:
230
- st.markdown(f"**Model 1:** {message}")
231
-
232
-
233
- def retrieve_relevant_financial_statements(query, index, statements, model, top_k=10, max_tokens=1500):
234
- query_embedding = model.encode([query], convert_to_numpy=True)
235
- distances, indices = index.search(query_embedding.astype('float32'), top_k)
236
- retrieved_statements = []
237
- total_tokens = 0
238
- for idx in indices[0]:
239
- statement = statements[idx]['statement']
240
- token_count = len(statement.split())
241
- if total_tokens + token_count > max_tokens:
242
- break
243
- retrieved_statements.append(statements[idx])
244
- total_tokens += token_count
245
- return retrieved_statements
246
-
247
-
248
- def model2_financial_data():
249
- st.header("Financial Data Assistant (Model 2)")
250
-
251
- # Load the FAISS index and financial statements
252
- financial_index_path = os.path.join( 'financial_index.faiss')
253
- financial_statements_path = os.path.join( 'financial_statements.pkl')
254
-
255
- # Load FAISS index
256
- if not os.path.exists(financial_index_path):
257
- st.error("Financial FAISS index not found.")
258
- st.stop()
259
- financial_index = faiss.read_index(financial_index_path)
260
-
261
- # Load statements
262
- if not os.path.exists(financial_statements_path):
263
- st.error("Financial statements data not found.")
264
- st.stop()
265
- with open(financial_statements_path, 'rb') as f:
266
- financial_statements = pickle.load(f)
267
-
268
- # Allow the user to input a query
269
- user_query = st.text_area("Ask a question about Indian state-wise financial details (1980-2015):", key='model2_input')
270
-
271
- if st.button("Get Answer", key='model2_button'):
272
- if user_query:
273
- # Extract metric, state, and year from the user's query
274
- import re
275
-
276
- # List of possible metrics
277
- metrics_list = [
278
- 'aggregate expenditure', 'capital expenditure', 'gross fiscal deficits',
279
- 'nominal gsdp series', 'own tax revenues', 'revenue deficits',
280
- 'revenue expenditure', 'social sector expenditure'
281
- ]
282
-
283
- # Create a pattern to match any of the metrics
284
- metrics_pattern = '|'.join(metrics_list)
285
- metric_regex = re.compile(rf'\b({metrics_pattern})\b', re.IGNORECASE)
286
-
287
- # Extract metric
288
- metric_match = metric_regex.search(user_query)
289
- if metric_match:
290
- query_metric = metric_match.group(1).strip().title()
291
- else:
292
- query_metric = None
293
-
294
- # Extract state
295
- # Assuming state names are capitalized properly in the data
296
- states_list = list(set(s['state'] for s in financial_statements))
297
- states_pattern = '|'.join(states_list)
298
- state_regex = re.compile(rf'\b({states_pattern})\b', re.IGNORECASE)
299
- state_match = state_regex.search(user_query)
300
- if state_match:
301
- query_state = state_match.group(1).strip()
302
- else:
303
- query_state = None
304
-
305
- # Extract year
306
- year_regex = re.compile(r'(\d{4}(?:-\d{2})?)')
307
- year_match = year_regex.search(user_query)
308
- if year_match:
309
- query_year = year_match.group(1)
310
- # Normalize the year format if needed
311
- if len(query_year) == 4:
312
- # Convert "1992" to "1992-93"
313
- query_year = f"{query_year}-{str(int(query_year[-2:])+1).zfill(2)}"
314
- elif len(query_year) == 7:
315
- # Already in "1992-93" format
316
- pass
317
- else:
318
- query_year = None
319
-
320
- if query_state and query_year:
321
- # Collect data based on the extracted information
322
- data = {}
323
- for s in financial_statements:
324
- if (
325
- s['state'].lower() == query_state.lower() and
326
- s['year'] == query_year
327
- ):
328
- if query_metric:
329
- if s['metric_type'].lower() == query_metric.lower():
330
- data[s['metric_type']] = s['value']
331
- break # Since we found the specific metric, we can stop
332
- else:
333
- data[s['metric_type']] = s['value']
334
-
335
- if data:
336
- if query_metric:
337
- # Display only the specific metric
338
- value = data.get(query_metric)
339
- if value is not None:
340
- st.write(f"The {query_metric} of {query_state} in {query_year} is {value}")
341
- else:
342
- st.write(f"{query_metric} data not found for {query_state} in {query_year}.")
343
- else:
344
- # Display all metrics
345
- st.write(f"Financial data for **{query_state}** in **{query_year}**:")
346
- df = pd.DataFrame(list(data.items()), columns=['Metric', 'Value'])
347
- st.table(df)
348
- else:
349
- st.write("Data not found for the specified state, year, or metric.")
350
- else:
351
- st.write("Could not understand the query. Please specify the state and year.")
352
-
353
- def main():
354
- st.set_page_config(page_title="Finance Assistant", page_icon="💸", layout="wide")
355
- st.title("💸 Finance Assistant")
356
-
357
- option = st.radio(
358
- "Choose a Functionality",
359
- ("Calculation Methodology", "Circular Compliance", "Industry Classification", "Model 1", "Model 2")
360
- )
361
-
362
- if option == "Calculation Methodology":
363
- calculations()
364
- elif option == "Circular Compliance":
365
- circular_compliance()
366
- elif option == "Industry Classification":
367
- industry_classification()
368
- elif option == "Model 1":
369
- run_model1_chat()
370
- elif option == "Model 2":
371
- model2_financial_data()
372
-
373
- if __name__ == "__main__":
374
- main()
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import faiss
5
+ import pandas as pd
6
+ from sentence_transformers import SentenceTransformer
7
+ from groq import Groq
8
+
9
+ GROQ_API_KEY = "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"
10
+ client = Groq(api_key=GROQ_API_KEY)
11
+ model = SentenceTransformer('all-MiniLM-L6-v2')
12
+ assets_folder = os.path.join(os.getcwd(), 'assets')
13
+
14
+ def load_resources():
15
+ industry_index_path = os.path.join( 'industry_index.faiss')
16
+ industry_chunks_path = os.path.join( 'industry_chunks.pkl')
17
+ circular_index_path = os.path.join( 'circular_index.faiss')
18
+ circular_chunks_path = os.path.join( 'circular_chunks.pkl')
19
+ if not all(os.path.exists(path) for path in [industry_index_path, industry_chunks_path, circular_index_path, circular_chunks_path]):
20
+ st.error("FAISS indexes and chunk files not found in the assets folder. Please ensure they are present.")
21
+ st.stop()
22
+ industry_index = faiss.read_index(industry_index_path)
23
+ with open(industry_chunks_path, 'rb') as f:
24
+ industry_chunks = pickle.load(f)
25
+ circular_index = faiss.read_index(circular_index_path)
26
+ with open(circular_chunks_path, 'rb') as f:
27
+ circular_chunks = pickle.load(f)
28
+ return industry_index, industry_chunks, circular_index, circular_chunks
29
+ industry_index, industry_chunks, circular_index, circular_chunks = load_resources()
30
+
31
+ def retrieve_relevant_chunks(query, index, chunks, top_k=5):
32
+ query_embedding = model.encode([query], convert_to_numpy=True)
33
+ distances, indices = index.search(query_embedding, top_k)
34
+ retrieved_chunks = [chunks[i] for i in indices[0]]
35
+ return retrieved_chunks
36
+
37
+ def circular_compliance():
38
+ st.header("Circular Compliance Assistant")
39
+ user_query = st.text_area("Enter your scenario or question:", key='circular_input')
40
+ if st.button("Check Compliance", key='circular_button'):
41
+ if user_query:
42
+ relevant_chunks = retrieve_relevant_chunks(user_query, circular_index, circular_chunks)
43
+ context = "\n".join(relevant_chunks)
44
+ prompt = f"""
45
+ You are an expert RBI compliance analyst. Based on the provided RBI Master Circular on Management of Advances:
46
+ {context}
47
+ Please analyze the following scenario for compliance:
48
+ {user_query}
49
+ Provide a detailed compliance analysis with the following structure:
50
+ 1. Compliance Status:
51
+ - Clear statement whether the scenario is compliant or non-compliant
52
+ - Level of certainty in the assessment
53
+ 2. Relevant Circular Details:
54
+ - Specific section(s) and paragraph references
55
+ - Direct quotes from applicable sections where relevant
56
+ 3. Detailed Analysis:
57
+ - Breakdown of key compliance requirements
58
+ - Calculation/numerical analysis if applicable
59
+ - Specific points of compliance/non-compliance
60
+ 4. Additional Considerations:
61
+ - Related requirements or obligations
62
+ - Monitoring/reporting requirements if applicable
63
+ 5. Recommendation:
64
+ - Clear guidance on what needs to be done for compliance
65
+ - Specific steps to address any non-compliance
66
+ Please provide definitive guidance based solely on the circular content, avoiding ambiguity or speculation.
67
+ Response:
68
+ """
69
+ chat_completion = client.chat.completions.create(
70
+ messages=[
71
+ {'role': 'user', 'content': prompt}
72
+ ],
73
+ model="gemma2-9b-it",
74
+ stream=False,
75
+ temperature=0.0
76
+ )
77
+ response = chat_completion.choices[0].message.content.strip()
78
+ st.write(response)
79
+
80
+ def industry_classification():
81
+ st.header("Industry Classification Assistant")
82
+ user_keywords = st.text_input("Enter keywords related to the industry:", key='industry_input')
83
+ if st.button("Get Industry Classification", key='industry_button'):
84
+ if user_keywords:
85
+ relevant_chunks = retrieve_relevant_chunks(user_keywords, industry_index, industry_chunks)
86
+ context = "\n".join(relevant_chunks)
87
+ prompt = f"""
88
+ You are an assistant helping to classify industries based on keywords. Based on the following information:
89
+ {context}
90
+ User's Keywords:
91
+ {user_keywords}
92
+ Suggest the most appropriate industry classification codes. Ask any necessary follow-up questions to clarify if needed.
93
+ Answer:
94
+ """
95
+ chat_completion = client.chat.completions.create(
96
+ messages=[
97
+ {'role': 'user', 'content': prompt}
98
+ ],
99
+ model="gemma2-9b-it",
100
+ stream=False,
101
+ temperature=0.0
102
+ )
103
+ response = chat_completion.choices[0].message.content.strip()
104
+ st.write(response)
105
+
106
+ def calculations():
107
+ st.subheader("Calculation Methodology")
108
+ calc_option = st.selectbox("Choose Calculation Method",
109
+ ("Maximum Permissible Bank Finance (MPBF)", "Drawing Power (DP)"))
110
+ if calc_option == "Maximum Permissible Bank Finance (MPBF)":
111
+ st.header("MPBF Calculation")
112
+ total_current_assets = st.number_input("Total Current Assets (TCA):", min_value=0.0, value=0.0)
113
+ other_current_liabilities = st.number_input("Other Current Liabilities (OCL):", min_value=0.0, value=0.0)
114
+ actual_nwc = st.number_input("Actual/Projected Net Working Capital (NWC):", min_value=0.0, value=0.0)
115
+ if st.button("Calculate MPBF"):
116
+ working_capital_gap = total_current_assets - other_current_liabilities
117
+ minimum_stipulated_nwc = 0.25 * total_current_assets
118
+ item_6 = working_capital_gap - minimum_stipulated_nwc
119
+ item_7 = working_capital_gap - actual_nwc
120
+ mpbf = min(item_6, item_7)
121
+ st.success(f"Working Capital Gap (WCG): {working_capital_gap:.2f}")
122
+ st.success(f"Minimum Stipulated NWC (25% of TCA): {minimum_stipulated_nwc:.2f}")
123
+ st.success(f"Item 6 (WCG - Minimum Stipulated NWC): {item_6:.2f}")
124
+ st.success(f"Item 7 (WCG - Actual NWC): {item_7:.2f}")
125
+ st.success(f"Maximum Permissible Bank Finance (MPBF): {mpbf:.2f}")
126
+ elif calc_option == "Drawing Power (DP)":
127
+ st.header("DP Calculation")
128
+ inventory_margin = 0.25
129
+ receivables_margin = 0.40
130
+ creditors_margin = 0.40
131
+ st.subheader("Inventory Details")
132
+ raw_material = st.number_input("Raw Material:", min_value=0.0, value=0.0)
133
+ consumable_spares = st.number_input("Other Consumable Spares:", min_value=0.0, value=0.0)
134
+ stock_in_process = st.number_input("Stock-in-process:", min_value=0.0, value=0.0)
135
+ finished_goods = st.number_input("Finished Goods:", min_value=0.0, value=0.0)
136
+ st.subheader("Receivables")
137
+ domestic_receivables = st.number_input("Domestic Receivables:", min_value=0.0, value=0.0)
138
+ export_receivables = st.number_input("Export Receivables:", min_value=0.0, value=0.0)
139
+ st.subheader("Creditors")
140
+ creditors = st.number_input("Creditors:", min_value=0.0, value=0.0)
141
+ if st.button("Calculate DP"):
142
+ inventory_total = raw_material + consumable_spares + stock_in_process + finished_goods
143
+ inventory_advance = inventory_total * (1 - inventory_margin)
144
+ receivables_total = domestic_receivables + export_receivables
145
+ receivables_advance = receivables_total * (1 - receivables_margin)
146
+ creditors_advance = creditors * (1 - creditors_margin)
147
+ total_A = inventory_advance + receivables_advance
148
+ total_B = creditors_advance
149
+ dp = total_A - total_B
150
+ st.success(f"Total Inventory (After Margin): {inventory_advance:.2f}")
151
+ st.success(f"Total Receivables (After Margin): {receivables_advance:.2f}")
152
+ st.success(f"Total (A): {total_A:.2f}")
153
+ st.success(f"Creditors (After Margin): {total_B:.2f}")
154
+ st.success(f"Drawing Power (DP): {dp:.2f}")
155
+
156
+ def run_model1_chat():
157
+ st.header("Model 1 Chat Interface")
158
+ if 'chat_history' not in st.session_state:
159
+ st.session_state['chat_history'] = []
160
+ user_input = st.text_input("You:", key="model1_input")
161
+ if st.button("Send", key='model1_send'):
162
+ if user_input:
163
+ st.session_state.chat_history.append(("User", user_input))
164
+ try:
165
+ chat_completion = client.chat.completions.create(
166
+ messages=[
167
+ {'role': 'user', 'content': user_input}
168
+ ],
169
+ model="gemma2-9b-it",
170
+ stream=False,
171
+ temperature=0.0
172
+ )
173
+ response = chat_completion.choices[0].message.content.strip()
174
+ st.session_state.chat_history.append(("Model", response))
175
+ except Exception as e:
176
+ st.error(f"An error occurred: {e}")
177
+ st.error("Please check your API key and model availability.")
178
+ for speaker, message in st.session_state.chat_history:
179
+ if speaker == "User":
180
+ st.markdown(f"**You:** {message}")
181
+ else:
182
+ st.markdown(f"**Model 1:** {message}")
183
+
184
+
185
+ def retrieve_relevant_financial_statements(query, index, statements, model, top_k=10, max_tokens=1500):
186
+ query_embedding = model.encode([query], convert_to_numpy=True)
187
+ distances, indices = index.search(query_embedding.astype('float32'), top_k)
188
+ retrieved_statements = []
189
+ total_tokens = 0
190
+ for idx in indices[0]:
191
+ statement = statements[idx]['statement']
192
+ token_count = len(statement.split())
193
+ if total_tokens + token_count > max_tokens:
194
+ break
195
+ retrieved_statements.append(statements[idx])
196
+ total_tokens += token_count
197
+ return retrieved_statements
198
+
199
+
200
+ def model2_financial_data():
201
+ st.header("Financial Data Assistant (Model 2)")
202
+ financial_index_path = os.path.join( 'financial_index.faiss')
203
+ financial_statements_path = os.path.join( 'financial_statements.pkl')
204
+ if not os.path.exists(financial_index_path):
205
+ st.error("Financial FAISS index not found.")
206
+ st.stop()
207
+ financial_index = faiss.read_index(financial_index_path)
208
+ if not os.path.exists(financial_statements_path):
209
+ st.error("Financial statements data not found.")
210
+ st.stop()
211
+ with open(financial_statements_path, 'rb') as f:
212
+ financial_statements = pickle.load(f)
213
+ user_query = st.text_area("Ask a question about Indian state-wise financial details (1980-2015):", key='model2_input')
214
+
215
+ if st.button("Get Answer", key='model2_button'):
216
+ if user_query:
217
+ import re
218
+ metrics_list = [
219
+ 'aggregate expenditure', 'capital expenditure', 'gross fiscal deficits',
220
+ 'nominal gsdp series', 'own tax revenues', 'revenue deficits',
221
+ 'revenue expenditure', 'social sector expenditure'
222
+ ]
223
+ metrics_pattern = '|'.join(metrics_list)
224
+ metric_regex = re.compile(rf'\b({metrics_pattern})\b', re.IGNORECASE)
225
+ metric_match = metric_regex.search(user_query)
226
+ if metric_match:
227
+ query_metric = metric_match.group(1).strip().title()
228
+ else:
229
+ query_metric = None
230
+ states_list = list(set(s['state'] for s in financial_statements))
231
+ states_pattern = '|'.join(states_list)
232
+ state_regex = re.compile(rf'\b({states_pattern})\b', re.IGNORECASE)
233
+ state_match = state_regex.search(user_query)
234
+ if state_match:
235
+ query_state = state_match.group(1).strip()
236
+ else:
237
+ query_state = None
238
+ year_regex = re.compile(r'(\d{4}(?:-\d{2})?)')
239
+ year_match = year_regex.search(user_query)
240
+ if year_match:
241
+ query_year = year_match.group(1)
242
+ if len(query_year) == 4:
243
+ query_year = f"{query_year}-{str(int(query_year[-2:])+1).zfill(2)}"
244
+ elif len(query_year) == 7:
245
+ pass
246
+ else:
247
+ query_year = None
248
+ if query_state and query_year:
249
+ data = {}
250
+ for s in financial_statements:
251
+ if (
252
+ s['state'].lower() == query_state.lower() and
253
+ s['year'] == query_year
254
+ ):
255
+ if query_metric:
256
+ if s['metric_type'].lower() == query_metric.lower():
257
+ data[s['metric_type']] = s['value']
258
+ break
259
+ else:
260
+ data[s['metric_type']] = s['value']
261
+ if data:
262
+ if query_metric:
263
+ value = data.get(query_metric)
264
+ if value is not None:
265
+ st.write(f"The {query_metric} of {query_state} in {query_year} is {value}")
266
+ else:
267
+ st.write(f"{query_metric} data not found for {query_state} in {query_year}.")
268
+ else:
269
+ st.write(f"Financial data for **{query_state}** in **{query_year}**:")
270
+ df = pd.DataFrame(list(data.items()), columns=['Metric', 'Value'])
271
+ st.table(df)
272
+ else:
273
+ st.write("Data not found for the specified state, year, or metric.")
274
+ else:
275
+ st.write("Could not understand the query. Please specify the state and year.")
276
+
277
+ def main():
278
+ st.set_page_config(page_title="Finance Assistant", page_icon="💸", layout="wide")
279
+ st.title("💸 Finance Assistant")
280
+ option = st.radio(
281
+ "Choose a Functionality",
282
+ ("Calculation Methodology", "Circular Compliance", "Industry Classification", "Model 1", "Model 2")
283
+ )
284
+ if option == "Calculation Methodology":
285
+ calculations()
286
+ elif option == "Circular Compliance":
287
+ circular_compliance()
288
+ elif option == "Industry Classification":
289
+ industry_classification()
290
+ elif option == "Model 1":
291
+ run_model1_chat()
292
+ elif option == "Model 2":
293
+ model2_financial_data()
294
+ if __name__ == "__main__":
295
+ main()