georgeek commited on
Commit
6c48c9b
·
1 Parent(s): 323710a
bill copy.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env -S poetry run python
2
+
3
+ import os
4
+ import json
5
+ import pdfplumber
6
+ import streamlit as st
7
+ from openai import OpenAI
8
+
9
+ client = OpenAI()
10
+
11
+ def load_user_data(user_id):
12
+ file_path = os.path.join("data", "user_data", f"user_data_{user_id}.json")
13
+ if not os.path.exists(file_path):
14
+ return {}
15
+ with open(file_path, "r") as file:
16
+ return json.load(file)
17
+
18
+ def parse_pdf_to_json(pdf_path):
19
+ user_id = {}
20
+ serie_factura = {}
21
+ data_factura = {}
22
+ costuri = {}
23
+ with pdfplumber.open(pdf_path, ) as pdf:
24
+ for page in pdf.pages:
25
+ text = page.extract_text()
26
+ if text:
27
+ lines = text.split('\n')
28
+
29
+ # Process each line and look for specific categories
30
+ for line in lines:
31
+ # Check for 'Data emiterii facturii'
32
+ if 'Data facturii' in line:
33
+ date = line.split()[-1]
34
+ data_factura['Data factura'] = date
35
+
36
+ # Check for 'Serie factură'
37
+ if 'rul facturii:' in line:
38
+ serie = line.split()[-1]
39
+ serie_factura['Serie numar'] = serie
40
+
41
+ # Check for 'Cont client'
42
+ if 'Cont client' in line:
43
+ cont = line.split()[-1]
44
+ user_id['Cont client'] = cont
45
+
46
+ # Check for 'Valoare facturată fără TVA'
47
+ if 'Sold precedent' in line:
48
+ value = line.split()[-2].replace(',', '.') # Extract and convert to float
49
+ costuri['Sold precedent'] = value
50
+
51
+ # Check for 'Total bază de impozitare TVA'
52
+ elif 'din sold precedent' in line:
53
+ value = line.split()[-2].replace(',', '.') # Extract and convert to float
54
+ costuri['Total platit din sold precedent'] = value
55
+
56
+ # Check for 'TVA'
57
+ elif 'TVA' in line and '%' in line:
58
+ value = line.split()[-2].replace(',', '.') # Extract and convert to float
59
+ costuri['TVA'] = value
60
+
61
+ # Check for 'Dobânzi penalizatoare'
62
+ elif 'Abonamente' in line:
63
+ value = line.split()[-2].replace(',', '.') # Extract and convert to float
64
+ costuri['Abonamente si extraopiuni'] = value
65
+
66
+ # Check for 'TOTAL DE PLATĂ FACTURĂ CURENTĂ'
67
+ elif 'Total factura curenta fara TVA' in line:
68
+ value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
69
+ costuri['Total factura curenta fara TVA'] = value
70
+
71
+ # Check for 'Sold Cont Contract'
72
+ elif 'Servicii utilizate' in line:
73
+ value = line.split()[-2].replace(',', '.') # Extract and convert to float
74
+ costuri['Servicii utilizate'] = value
75
+
76
+ # Check for 'Compensatii'
77
+ elif 'Rate terminal' in line:
78
+ value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
79
+ costuri['Rate terminal'] = value
80
+
81
+ # Check for 'TVA 19,00%'
82
+ elif 'TVA 19,00%' in line:
83
+ value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
84
+ costuri['TVA'] = value
85
+
86
+ # Check for 'Compensatii'
87
+ elif 'Total factura curenta' in line:
88
+ value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
89
+ costuri['Total factura curenta'] = value
90
+
91
+ return costuri
92
+
93
+ def check_related_keys(question, user_id):
94
+ user_data = load_user_data(user_id)
95
+ bill_keys = set()
96
+ for bill in user_data.get("bills", []):
97
+ bill_keys.update(bill.keys())
98
+ return [key for key in bill_keys if key.lower() in question.lower()]
99
+
100
+ def process_query(query, user_id):
101
+ user_data = load_user_data(user_id)
102
+ bill_info = user_data.get("bills", [])
103
+ related_keys = check_related_keys(query, user_id)
104
+ related_keys_str = ", ".join(related_keys) if related_keys else "N/A"
105
+
106
+ if related_keys_str != "N/A":
107
+ context = (
108
+ f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} "
109
+ f"si raspunde la intrebarea: '{query}' dar numai cu info legate de: {related_keys_str}"
110
+ )
111
+ else:
112
+ context = (
113
+ f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} "
114
+ f"si raspunde la intrebarea: '{query}' dar numai cu info legate de factura"
115
+ )
116
+
117
+ max_input_length = 550
118
+ st.write(f"Context:\n{context}")
119
+ st.write(f"Context size: {len(context)} characters")
120
+
121
+ if len(context) > max_input_length:
122
+ st.warning("Prea multe caractere în context, solicitarea nu va fi trimisă.")
123
+ return None
124
+
125
+ return context
126
+
127
+ def main():
128
+
129
+ st.title("Telecom Bill Chat with LLM Agent")
130
+
131
+ if "user_id" not in st.session_state:
132
+ st.session_state.user_id = None
133
+
134
+ user_id = st.sidebar.text_input("Introdu numărul de telefon:")
135
+ if user_id and user_id != st.session_state.user_id:
136
+ data = load_user_data(user_id)
137
+ if data:
138
+ st.session_state.user_id = user_id
139
+ st.success("Utilizator găsit!")
140
+ else:
141
+ st.warning("Nu am găsit date pentru acest ID. Încărcați o factură PDF la nevoie.")
142
+ st.session_state.user_id = user_id
143
+
144
+ uploaded_file = st.file_uploader("Încarcă factura PDF", type="pdf")
145
+ if uploaded_file and st.session_state.user_id:
146
+ bill_data = parse_pdf_to_json(uploaded_file)
147
+ existing_data = load_user_data(st.session_state.user_id)
148
+ if "bills" not in existing_data:
149
+ existing_data["bills"] = []
150
+ existing_data["bills"].append(bill_data)
151
+ file_path = os.path.join("data", "user_data", f"user_data_{st.session_state['user_id']}.json")
152
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
153
+ with open(file_path, "w") as file:
154
+ json.dump(existing_data, file)
155
+ st.success("Factura a fost încărcată și salvată cu succes!")
156
+
157
+ if st.session_state.user_id:
158
+ data = load_user_data(st.session_state.user_id)
159
+ st.write(f"Phone Number: {st.session_state.user_id}")
160
+ st.write("Facturi existente:")
161
+ for bill in data.get("bills", []):
162
+ st.write(bill)
163
+ else:
164
+ st.info("Introduceți un ID și/sau încărcați o factură PDF pentru a continua.")
165
+
166
+ # Initialize conversation in the session state
167
+ # "context_prompt_added" indicates whether we've added the specialized "bill info" context yet.
168
+ if "messages" not in st.session_state:
169
+ st.session_state["messages"] = [
170
+ {"role": "assistant", "content": "Cu ce te pot ajuta?"}
171
+ ]
172
+ if "context_prompt_added" not in st.session_state:
173
+ st.session_state.context_prompt_added = False
174
+
175
+ st.write("---")
176
+ st.subheader("Chat")
177
+
178
+ for msg in st.session_state["messages"]:
179
+ st.chat_message(msg["role"]).write(msg["content"])
180
+
181
+ if prompt := st.chat_input("Introduceți întrebarea aici:"):
182
+ if not st.session_state.user_id:
183
+ st.error("Trebuie să introduceți un număr de telefon valid sau să încărcați date.")
184
+ return
185
+
186
+ # If the context prompt hasn't been added yet, build & inject it once;
187
+ # otherwise, just add the user's raw question.
188
+ if not st.session_state.context_prompt_added:
189
+ final_prompt = process_query(prompt, st.session_state["user_id"])
190
+ if final_prompt is None:
191
+ st.stop()
192
+ st.session_state["messages"].append({"role": "user", "content": final_prompt})
193
+ st.session_state.context_prompt_added = True
194
+ else:
195
+ st.session_state["messages"].append({"role": "user", "content": prompt})
196
+
197
+ # Display the latest user message in the chat
198
+ st.chat_message("user").write(st.session_state["messages"][-1]["content"])
199
+
200
+ # Now call GPT-4 with the entire conversation
201
+ completion = client.chat.completions.create(
202
+ model="gpt-4",
203
+ messages=st.session_state["messages"]
204
+ )
205
+ response_text = completion.choices[0].message.content.strip()
206
+
207
+ st.session_state["messages"].append({"role": "assistant", "content": response_text})
208
+ st.chat_message("assistant").write(response_text)
209
+
210
+ if hasattr(completion, "usage"):
211
+ st.write("Prompt tokens:", completion.usage.prompt_tokens)
212
+ st.write("Completion tokens:", completion.usage.completion_tokens)
213
+ st.write("Total tokens:", completion.usage.total_tokens)
214
+
215
+ if __name__ == "__main__":
216
+ main()
bill.py CHANGED
@@ -2,7 +2,6 @@
2
 
3
  import os
4
  import json
5
- import pdfplumber
6
  import streamlit as st
7
  from openai import OpenAI
8
 
@@ -15,80 +14,35 @@ def load_user_data(user_id):
15
  with open(file_path, "r") as file:
16
  return json.load(file)
17
 
18
- def parse_pdf_to_json(pdf_path):
19
- user_id = {}
20
- serie_factura = {}
21
- data_factura = {}
22
- costuri = {}
23
- with pdfplumber.open(pdf_path, ) as pdf:
24
- for page in pdf.pages:
25
- text = page.extract_text()
26
- if text:
27
- lines = text.split('\n')
28
-
29
- # Process each line and look for specific categories
30
- for line in lines:
31
- # Check for 'Data emiterii facturii'
32
- if 'Data facturii' in line:
33
- date = line.split()[-1]
34
- data_factura['Data factura'] = date
35
-
36
- # Check for 'Serie factură'
37
- if 'rul facturii:' in line:
38
- serie = line.split()[-1]
39
- serie_factura['Serie numar'] = serie
40
-
41
- # Check for 'Cont client'
42
- if 'Cont client' in line:
43
- cont = line.split()[-1]
44
- user_id['Cont client'] = cont
45
-
46
- # Check for 'Valoare facturată fără TVA'
47
- if 'Sold precedent' in line:
48
- value = line.split()[-2].replace(',', '.') # Extract and convert to float
49
- costuri['Sold precedent'] = value
50
-
51
- # Check for 'Total bază de impozitare TVA'
52
- elif 'din sold precedent' in line:
53
- value = line.split()[-2].replace(',', '.') # Extract and convert to float
54
- costuri['Total platit din sold precedent'] = value
55
-
56
- # Check for 'TVA'
57
- elif 'TVA' in line and '%' in line:
58
- value = line.split()[-2].replace(',', '.') # Extract and convert to float
59
- costuri['TVA'] = value
60
-
61
- # Check for 'Dobânzi penalizatoare'
62
- elif 'Abonamente' in line:
63
- value = line.split()[-2].replace(',', '.') # Extract and convert to float
64
- costuri['Abonamente si extraopiuni'] = value
65
-
66
- # Check for 'TOTAL DE PLATĂ FACTURĂ CURENTĂ'
67
- elif 'Total factura curenta fara TVA' in line:
68
- value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
69
- costuri['Total factura curenta fara TVA'] = value
70
-
71
- # Check for 'Sold Cont Contract'
72
- elif 'Servicii utilizate' in line:
73
- value = line.split()[-2].replace(',', '.') # Extract and convert to float
74
- costuri['Servicii utilizate'] = value
75
-
76
- # Check for 'Compensatii'
77
- elif 'Rate terminal' in line:
78
- value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
79
- costuri['Rate terminal'] = value
80
-
81
- # Check for 'TVA 19,00%'
82
- elif 'TVA 19,00%' in line:
83
- value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
84
- costuri['TVA'] = value
85
-
86
- # Check for 'Compensatii'
87
- elif 'Total factura curenta' in line:
88
- value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
89
- costuri['Total factura curenta'] = value
90
-
91
- return costuri
92
 
93
  def check_related_keys(question, user_id):
94
  user_data = load_user_data(user_id)
@@ -141,13 +95,14 @@ def main():
141
  st.warning("Nu am găsit date pentru acest ID. Încărcați o factură PDF la nevoie.")
142
  st.session_state.user_id = user_id
143
 
144
- uploaded_file = st.file_uploader("Încarcă factura PDF", type="pdf")
145
  if uploaded_file and st.session_state.user_id:
146
- bill_data = parse_pdf_to_json(uploaded_file)
 
147
  existing_data = load_user_data(st.session_state.user_id)
148
  if "bills" not in existing_data:
149
  existing_data["bills"] = []
150
- existing_data["bills"].append(bill_data)
151
  file_path = os.path.join("data", "user_data", f"user_data_{st.session_state['user_id']}.json")
152
  os.makedirs(os.path.dirname(file_path), exist_ok=True)
153
  with open(file_path, "w") as file:
@@ -161,7 +116,7 @@ def main():
161
  for bill in data.get("bills", []):
162
  st.write(bill)
163
  else:
164
- st.info("Introduceți un ID și/sau încărcați o factură PDF pentru a continua.")
165
 
166
  # Initialize conversation in the session state
167
  # "context_prompt_added" indicates whether we've added the specialized "bill info" context yet.
 
2
 
3
  import os
4
  import json
 
5
  import streamlit as st
6
  from openai import OpenAI
7
 
 
14
  with open(file_path, "r") as file:
15
  return json.load(file)
16
 
17
+ def parseBill(data):
18
+ billDate = data.get("billDate")
19
+ billNo = data.get("billNo")
20
+ amountDue = data.get("amountDue")
21
+ extraCharge = data.get("extraCharge")
22
+ taxItems = data.get("taxItem", [])
23
+ subscribers = data.get("subscribers", [])
24
+
25
+ totalBillCosts = [{"categorie": t.get("cat"), "amount": t.get("amt")} for t in taxItems]
26
+ subscriberCosts = []
27
+ for sub in subscribers:
28
+ logicalResource = sub.get("logicalResource")
29
+ billSummaryItems = sub.get("billSummaryItem", [])
30
+ subscriberCosts.append({
31
+ "logicalResource": logicalResource,
32
+ "billSummaryItems": [
33
+ {"categorie": bsi.get("cat"), "amount": bsi.get("amt"), "name": bsi.get("name")}
34
+ for bsi in billSummaryItems
35
+ ],
36
+ })
37
+
38
+ return {
39
+ "billDate": billDate,
40
+ "billNo": billNo,
41
+ "amountDue": amountDue,
42
+ "extraCharge": extraCharge,
43
+ "totalBillCosts": totalBillCosts,
44
+ "subscriberCosts": subscriberCosts
45
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def check_related_keys(question, user_id):
48
  user_data = load_user_data(user_id)
 
95
  st.warning("Nu am găsit date pentru acest ID. Încărcați o factură PDF la nevoie.")
96
  st.session_state.user_id = user_id
97
 
98
+ uploaded_file = st.file_uploader("Încarcă factura JSON", type="json")
99
  if uploaded_file and st.session_state.user_id:
100
+ bill_data = json.load(uploaded_file)
101
+ parsed_bill = parseBill(bill_data)
102
  existing_data = load_user_data(st.session_state.user_id)
103
  if "bills" not in existing_data:
104
  existing_data["bills"] = []
105
+ existing_data["bills"].append(parsed_bill)
106
  file_path = os.path.join("data", "user_data", f"user_data_{st.session_state['user_id']}.json")
107
  os.makedirs(os.path.dirname(file_path), exist_ok=True)
108
  with open(file_path, "w") as file:
 
116
  for bill in data.get("bills", []):
117
  st.write(bill)
118
  else:
119
+ st.info("Introduceți un ID și/sau încărcați o factură JSON pentru a continua.")
120
 
121
  # Initialize conversation in the session state
122
  # "context_prompt_added" indicates whether we've added the specialized "bill info" context yet.
billPdf.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env -S poetry run python
2
+
3
+ import os
4
+ import json
5
+ #import pdfplumber
6
+ import streamlit as st
7
+ from openai import OpenAI
8
+
9
+ client = OpenAI()
10
+
11
+ def load_user_data(user_id):
12
+ file_path = os.path.join("data", "user_data", f"user_data_{user_id}.json")
13
+ if not os.path.exists(file_path):
14
+ return {}
15
+ with open(file_path, "r") as file:
16
+ return json.load(file)
17
+
18
+ def parse_pdf_to_json(pdf_path):
19
+ user_id = {}
20
+ serie_factura = {}
21
+ data_factura = {}
22
+ costuri = {}
23
+ with pdfplumber.open(pdf_path, ) as pdf:
24
+ for page in pdf.pages:
25
+ text = page.extract_text()
26
+ if text:
27
+ lines = text.split('\n')
28
+
29
+ # Process each line and look for specific categories
30
+ for line in lines:
31
+ # Check for 'Data emiterii facturii'
32
+ if 'Data facturii' in line:
33
+ date = line.split()[-1]
34
+ data_factura['Data factura'] = date
35
+
36
+ # Check for 'Serie factură'
37
+ if 'rul facturii:' in line:
38
+ serie = line.split()[-1]
39
+ serie_factura['Serie numar'] = serie
40
+
41
+ # Check for 'Cont client'
42
+ if 'Cont client' in line:
43
+ cont = line.split()[-1]
44
+ user_id['Cont client'] = cont
45
+
46
+ # Check for 'Valoare facturată fără TVA'
47
+ if 'Sold precedent' in line:
48
+ value = line.split()[-2].replace(',', '.') # Extract and convert to float
49
+ costuri['Sold precedent'] = value
50
+
51
+ # Check for 'Total bază de impozitare TVA'
52
+ elif 'din sold precedent' in line:
53
+ value = line.split()[-2].replace(',', '.') # Extract and convert to float
54
+ costuri['Total platit din sold precedent'] = value
55
+
56
+ # Check for 'TVA'
57
+ elif 'TVA' in line and '%' in line:
58
+ value = line.split()[-2].replace(',', '.') # Extract and convert to float
59
+ costuri['TVA'] = value
60
+
61
+ # Check for 'Dobânzi penalizatoare'
62
+ elif 'Abonamente' in line:
63
+ value = line.split()[-2].replace(',', '.') # Extract and convert to float
64
+ costuri['Abonamente si extraopiuni'] = value
65
+
66
+ # Check for 'TOTAL DE PLATĂ FACTURĂ CURENTĂ'
67
+ elif 'Total factura curenta fara TVA' in line:
68
+ value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
69
+ costuri['Total factura curenta fara TVA'] = value
70
+
71
+ # Check for 'Sold Cont Contract'
72
+ elif 'Servicii utilizate' in line:
73
+ value = line.split()[-2].replace(',', '.') # Extract and convert to float
74
+ costuri['Servicii utilizate'] = value
75
+
76
+ # Check for 'Compensatii'
77
+ elif 'Rate terminal' in line:
78
+ value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
79
+ costuri['Rate terminal'] = value
80
+
81
+ # Check for 'TVA 19,00%'
82
+ elif 'TVA 19,00%' in line:
83
+ value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
84
+ costuri['TVA'] = value
85
+
86
+ # Check for 'Compensatii'
87
+ elif 'Total factura curenta' in line:
88
+ value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
89
+ costuri['Total factura curenta'] = value
90
+
91
+ return costuri
92
+
93
+ def check_related_keys(question, user_id):
94
+ user_data = load_user_data(user_id)
95
+ bill_keys = set()
96
+ for bill in user_data.get("bills", []):
97
+ bill_keys.update(bill.keys())
98
+ return [key for key in bill_keys if key.lower() in question.lower()]
99
+
100
+ def process_query(query, user_id):
101
+ user_data = load_user_data(user_id)
102
+ bill_info = user_data.get("bills", [])
103
+ related_keys = check_related_keys(query, user_id)
104
+ related_keys_str = ", ".join(related_keys) if related_keys else "N/A"
105
+
106
+ if related_keys_str != "N/A":
107
+ context = (
108
+ f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} "
109
+ f"si raspunde la intrebarea: '{query}' dar numai cu info legate de: {related_keys_str}"
110
+ )
111
+ else:
112
+ context = (
113
+ f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} "
114
+ f"si raspunde la intrebarea: '{query}' dar numai cu info legate de factura"
115
+ )
116
+
117
+ max_input_length = 550
118
+ st.write(f"Context:\n{context}")
119
+ st.write(f"Context size: {len(context)} characters")
120
+
121
+ if len(context) > max_input_length:
122
+ st.warning("Prea multe caractere în context, solicitarea nu va fi trimisă.")
123
+ return None
124
+
125
+ return context
126
+
127
+ def main():
128
+
129
+ st.title("Telecom Bill Chat with LLM Agent")
130
+
131
+ if "user_id" not in st.session_state:
132
+ st.session_state.user_id = None
133
+
134
+ user_id = st.sidebar.text_input("Introdu numărul de telefon:")
135
+ if user_id and user_id != st.session_state.user_id:
136
+ data = load_user_data(user_id)
137
+ if data:
138
+ st.session_state.user_id = user_id
139
+ st.success("Utilizator găsit!")
140
+ else:
141
+ st.warning("Nu am găsit date pentru acest ID. Încărcați o factură PDF la nevoie.")
142
+ st.session_state.user_id = user_id
143
+
144
+ uploaded_file = st.file_uploader("Încarcă factura PDF", type="pdf")
145
+ if uploaded_file and st.session_state.user_id:
146
+ bill_data = parse_pdf_to_json(uploaded_file)
147
+ existing_data = load_user_data(st.session_state.user_id)
148
+ if "bills" not in existing_data:
149
+ existing_data["bills"] = []
150
+ existing_data["bills"].append(bill_data)
151
+ file_path = os.path.join("data", "user_data", f"user_data_{st.session_state['user_id']}.json")
152
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
153
+ with open(file_path, "w") as file:
154
+ json.dump(existing_data, file)
155
+ st.success("Factura a fost încărcată și salvată cu succes!")
156
+
157
+ if st.session_state.user_id:
158
+ data = load_user_data(st.session_state.user_id)
159
+ st.write(f"Phone Number: {st.session_state.user_id}")
160
+ st.write("Facturi existente:")
161
+ for bill in data.get("bills", []):
162
+ st.write(bill)
163
+ else:
164
+ st.info("Introduceți un ID și/sau încărcați o factură PDF pentru a continua.")
165
+
166
+ # Initialize conversation in the session state
167
+ # "context_prompt_added" indicates whether we've added the specialized "bill info" context yet.
168
+ if "messages" not in st.session_state:
169
+ st.session_state["messages"] = [
170
+ {"role": "assistant", "content": "Cu ce te pot ajuta?"}
171
+ ]
172
+ if "context_prompt_added" not in st.session_state:
173
+ st.session_state.context_prompt_added = False
174
+
175
+ st.write("---")
176
+ st.subheader("Chat")
177
+
178
+ for msg in st.session_state["messages"]:
179
+ st.chat_message(msg["role"]).write(msg["content"])
180
+
181
+ if prompt := st.chat_input("Introduceți întrebarea aici:"):
182
+ if not st.session_state.user_id:
183
+ st.error("Trebuie să introduceți un număr de telefon valid sau să încărcați date.")
184
+ return
185
+
186
+ # If the context prompt hasn't been added yet, build & inject it once;
187
+ # otherwise, just add the user's raw question.
188
+ if not st.session_state.context_prompt_added:
189
+ final_prompt = process_query(prompt, st.session_state["user_id"])
190
+ if final_prompt is None:
191
+ st.stop()
192
+ st.session_state["messages"].append({"role": "user", "content": final_prompt})
193
+ st.session_state.context_prompt_added = True
194
+ else:
195
+ st.session_state["messages"].append({"role": "user", "content": prompt})
196
+
197
+ # Display the latest user message in the chat
198
+ st.chat_message("user").write(st.session_state["messages"][-1]["content"])
199
+
200
+ # Now call GPT-4 with the entire conversation
201
+ completion = client.chat.completions.create(
202
+ model="gpt-4",
203
+ messages=st.session_state["messages"]
204
+ )
205
+ response_text = completion.choices[0].message.content.strip()
206
+
207
+ st.session_state["messages"].append({"role": "assistant", "content": response_text})
208
+ st.chat_message("assistant").write(response_text)
209
+
210
+ if hasattr(completion, "usage"):
211
+ st.write("Prompt tokens:", completion.usage.prompt_tokens)
212
+ st.write("Completion tokens:", completion.usage.completion_tokens)
213
+ st.write("Total tokens:", completion.usage.total_tokens)
214
+
215
+ if __name__ == "__main__":
216
+ main()
data/Fact1.png DELETED
Binary file (381 kB)
 
data/Factura-664159569.pdf DELETED
Binary file (114 kB)
 
data/Factura-Vodafone_03.05.pdf DELETED
Binary file (136 kB)
 
data/VDF85920235.pdf DELETED
Binary file (93.8 kB)
 
data/bill_VDF682568817.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "billDate": "2024-12-17",
3
+ "billNo": "VDF682568817",
4
+ "amountDue": 286.96,
5
+ "costFromExpiredDiscounts": 3.04,
6
+ "extraCharge": 99.31,
7
+ "taxItem": [
8
+ {
9
+ "cat": "Total factura curenta",
10
+ "amt": 201.38
11
+ },
12
+ {
13
+ "cat": "TVA 19%",
14
+ "amt": 32.15
15
+ },
16
+ {
17
+ "cat": "Total rate terminal",
18
+ "amt": 85.58
19
+ },
20
+ {
21
+ "cat": "Abonamente si extraoptiuni",
22
+ "amt": 158.54
23
+ },
24
+ {
25
+ "cat": "Reduceri",
26
+ "amt": -3.04
27
+ },
28
+ {
29
+ "cat": "Servicii utilizate",
30
+ "amt": 13.73
31
+ },
32
+ {
33
+ "cat": "Sold precedent",
34
+ "amt": 184.63
35
+ },
36
+ {
37
+ "cat": "Total platit din sold precedent",
38
+ "amt": 184.63
39
+ },
40
+ {
41
+ "cat": "Total de plata",
42
+ "amt": 286.96
43
+ }
44
+ ],
45
+ "subscribers": [
46
+ {
47
+ "billSummaryItem": [
48
+ {
49
+ "cat": "Servicii Internationale",
50
+ "amt": 4.18,
51
+ "name": "Taxa 10 SMS"
52
+ },
53
+ {
54
+ "cat": "Reduceri",
55
+ "amt": -2.94,
56
+ "name": "Reducere Secure Net"
57
+ },
58
+ {
59
+ "cat": "Reduceri",
60
+ "amt": -0.1,
61
+ "name": "Reducere Secure Net"
62
+ },
63
+ {
64
+ "cat": "Abonamente si extraoptiuni",
65
+ "amt": -2.04,
66
+ "name": "Red 18"
67
+ },
68
+ {
69
+ "cat": "Abonamente si extraoptiuni",
70
+ "amt": 2.24,
71
+ "name": "Red 19"
72
+ },
73
+ {
74
+ "cat": "Abonamente si extraoptiuni",
75
+ "amt": 66.93,
76
+ "name": "Red 19"
77
+ },
78
+ {
79
+ "cat": "Abonamente si extraoptiuni",
80
+ "amt": 0.1,
81
+ "name": "Secure Net"
82
+ },
83
+ {
84
+ "cat": "Abonamente si extraoptiuni",
85
+ "amt": 2.94,
86
+ "name": "Secure Net"
87
+ },
88
+ {
89
+ "cat": "Abonamente si extraoptiuni",
90
+ "amt": -4.18,
91
+ "name": "Reducere promotionala 12 luni"
92
+ },
93
+ {
94
+ "cat": "Abonamente si extraoptiuni",
95
+ "amt": -0.15,
96
+ "name": "Reducere promotionala 12 luni"
97
+ },
98
+ {
99
+ "cat": "Rate terminal",
100
+ "amt": 25.87,
101
+ "name": "Promotie Vodafone Smartphone cu plata in rate 1/36"
102
+ },
103
+ {
104
+ "cat": "Rate terminal",
105
+ "amt": 59.71,
106
+ "name": "Promotie Vodafone Smartphone cu plata in rate 1/36"
107
+ },
108
+ {
109
+ "cat": "Total sume datorate rate terminal",
110
+ "amt": 85.58
111
+ }
112
+ ],
113
+ "logicalResource": "722339918",
114
+ "netAmount": 66.98,
115
+ "productRef": {
116
+ "name": "Red 19",
117
+ "@referredType": "Voce mobila"
118
+ },
119
+ "subscriberExtraCost": 89.76,
120
+ "firstBill": false,
121
+ "planChangeProrata": true,
122
+ "firstBillForBan": false,
123
+ "prorateCalc": {
124
+ "billDate": "2024-12-17T00:00:00Z",
125
+ "oldPlan": {
126
+ "name": "Red 18",
127
+ "value": -2.04,
128
+ "prorateAmount": -2.04
129
+ },
130
+ "currentPlan": {
131
+ "name": "Red 19",
132
+ "value": 66.93,
133
+ "prorateAmount": 2.24,
134
+ "valuePerDay": 2.1590323
135
+ },
136
+ "prorateDays": 1,
137
+ "daysBcClosed": 30.0,
138
+ "daysBcCurrent": 31.0,
139
+ "subChangeDate": "2024-12-16T00:00"
140
+ }
141
+ },
142
+ {
143
+ "billSummaryItem": [
144
+ {
145
+ "cat": "Abonamente si extraoptiuni",
146
+ "amt": 43.04,
147
+ "name": "Red 15"
148
+ }
149
+ ],
150
+ "logicalResource": "738912539",
151
+ "netAmount": 43.04,
152
+ "productRef": {
153
+ "name": "RED 15",
154
+ "@referredType": "Voce mobila"
155
+ },
156
+ "firstBill": false,
157
+ "planChangeProrata": false,
158
+ "firstBillForBan": false
159
+ },
160
+ {
161
+ "billSummaryItem": [
162
+ {
163
+ "cat": "Servicii Nationale",
164
+ "amt": 5.37,
165
+ "name": "Apel Nr. Sp. Rel. cu Clientii"
166
+ },
167
+ {
168
+ "cat": "Servicii Internationale",
169
+ "amt": 4.18,
170
+ "name": "Taxa 30 min grupa 1 tari"
171
+ },
172
+ {
173
+ "cat": "Abonamente si extraoptiuni",
174
+ "amt": 53.84,
175
+ "name": "Red 12"
176
+ },
177
+ {
178
+ "cat": "Abonamente si extraoptiuni",
179
+ "amt": -4.18,
180
+ "name": "Reducere promo"
181
+ }
182
+ ],
183
+ "logicalResource": "727890354",
184
+ "netAmount": 59.21,
185
+ "productRef": {
186
+ "name": "Red 12",
187
+ "@referredType": "Voce mobila"
188
+ },
189
+ "subscriberExtraCost": 9.549999,
190
+ "firstBill": false,
191
+ "planChangeProrata": false,
192
+ "firstBillForBan": false
193
+ }
194
+ ]
195
+ }
data/bill_VDF682981042.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "billDate": "2024-12-20",
3
+ "billNo": "VDF682981042",
4
+ "amountDue": 10.15,
5
+ "costFromExpiredDiscounts": -0.019996643,
6
+ "extraCharge": 10.15,
7
+ "taxItem": [
8
+ {
9
+ "cat": "Total factura curenta",
10
+ "amt": 10.15
11
+ },
12
+ {
13
+ "cat": "Total sume neincluse in baza de impozitare TVA",
14
+ "amt": 10.15
15
+ },
16
+ {
17
+ "cat": "Abonamente si extraoptiuni",
18
+ "amt": 107.69
19
+ },
20
+ {
21
+ "cat": "Reduceri",
22
+ "amt": -107.69
23
+ },
24
+ {
25
+ "cat": "Sold precedent",
26
+ "amt": 10.15
27
+ },
28
+ {
29
+ "cat": "Total platit din sold precedent",
30
+ "amt": 10.15
31
+ },
32
+ {
33
+ "cat": "Total de plata",
34
+ "amt": 10.15
35
+ }
36
+ ],
37
+ "subscribers": [
38
+ {
39
+ "billSummaryItem": [
40
+ {
41
+ "cat": "Total Sume incasate in numele tertilor",
42
+ "amt": 10.15
43
+ },
44
+ {
45
+ "cat": "Sume incasate in numele tertilor",
46
+ "amt": 10.15,
47
+ "name": "Donatii SMS (fara TVA)"
48
+ },
49
+ {
50
+ "cat": "Reduceri",
51
+ "amt": -1.07,
52
+ "name": "Reducere Vodafone Secure Net"
53
+ },
54
+ {
55
+ "cat": "Reduceri",
56
+ "amt": -106.62,
57
+ "name": "Reducere abonament lunar"
58
+ },
59
+ {
60
+ "cat": "Abonamente si extraoptiuni",
61
+ "amt": 106.62,
62
+ "name": "RED INFINITY 25 DEMO"
63
+ },
64
+ {
65
+ "cat": "Abonamente si extraoptiuni",
66
+ "amt": 1.07,
67
+ "name": "Vodafone Secure Net"
68
+ }
69
+ ],
70
+ "logicalResource": "721714413",
71
+ "netAmount": 0.0,
72
+ "productRef": {
73
+ "name": "RED INFINITY 25 DEMO",
74
+ "@referredType": "Voce mobila"
75
+ },
76
+ "subscriberExtraCost": 10.15,
77
+ "firstBill": false,
78
+ "planChangeProrata": false,
79
+ "firstBillForBan": false
80
+ }
81
+ ]
82
+ }
data/bill_VDF683003873.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "billDate": "2024-12-20",
3
+ "billNo": "VDF683003873",
4
+ "amountDue": 239.87,
5
+ "extraCharge": 10.84,
6
+ "taxItem": [
7
+ {
8
+ "cat": "Total factura curenta",
9
+ "amt": 239.87
10
+ },
11
+ {
12
+ "cat": "TVA 19%",
13
+ "amt": 38.3
14
+ },
15
+ {
16
+ "cat": "Abonamente si extraoptiuni",
17
+ "amt": 190.73
18
+ },
19
+ {
20
+ "cat": "Servicii utilizate",
21
+ "amt": 10.84
22
+ },
23
+ {
24
+ "cat": "Sold precedent",
25
+ "amt": 229.46
26
+ },
27
+ {
28
+ "cat": "Total platit din sold precedent",
29
+ "amt": 229.46
30
+ },
31
+ {
32
+ "cat": "Total de plata",
33
+ "amt": 239.87
34
+ }
35
+ ],
36
+ "subscribers": [
37
+ {
38
+ "billSummaryItem": [
39
+ {
40
+ "cat": "Servicii Nationale",
41
+ "amt": 5.37,
42
+ "name": "Apel Nr. Sp. Rel. cu Clientii"
43
+ },
44
+ {
45
+ "cat": "Servicii Nationale",
46
+ "amt": 5.47,
47
+ "name": "Servicii in parteneriat (SMS)"
48
+ },
49
+ {
50
+ "cat": "Abonamente si extraoptiuni",
51
+ "amt": 90.29,
52
+ "name": "Red 35"
53
+ },
54
+ {
55
+ "cat": "Abonamente si extraoptiuni",
56
+ "amt": 2.09,
57
+ "name": "Secure Net"
58
+ }
59
+ ],
60
+ "logicalResource": "727723568",
61
+ "netAmount": 103.22,
62
+ "productRef": {
63
+ "name": "RED 35",
64
+ "@referredType": "Voce mobila"
65
+ },
66
+ "subscriberExtraCost": 10.84,
67
+ "firstBill": false,
68
+ "planChangeProrata": false,
69
+ "firstBillForBan": false
70
+ },
71
+ {
72
+ "billSummaryItem": [
73
+ {
74
+ "cat": "Abonamente si extraoptiuni",
75
+ "amt": 37.61,
76
+ "name": "Smart Net 10"
77
+ }
78
+ ],
79
+ "logicalResource": "733934342",
80
+ "netAmount": 37.61,
81
+ "productRef": {
82
+ "name": "Smart Net 10",
83
+ "@referredType": "Date mobile nelimitat"
84
+ },
85
+ "firstBill": false,
86
+ "planChangeProrata": false,
87
+ "firstBillForBan": false
88
+ },
89
+ {
90
+ "billSummaryItem": [
91
+ {
92
+ "cat": "Abonamente si extraoptiuni",
93
+ "amt": 58.65,
94
+ "name": "Red 13"
95
+ },
96
+ {
97
+ "cat": "Abonamente si extraoptiuni",
98
+ "amt": 2.09,
99
+ "name": "Secure Net"
100
+ }
101
+ ],
102
+ "logicalResource": "724899108",
103
+ "netAmount": 60.74,
104
+ "productRef": {
105
+ "name": "RED 13",
106
+ "@referredType": "Voce mobila"
107
+ },
108
+ "firstBill": false,
109
+ "planChangeProrata": false,
110
+ "firstBillForBan": false
111
+ }
112
+ ]
113
+ }
data/bill_VDF685952701.json ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "billDate": "2025-01-06",
3
+ "billNo": "VDF685952701",
4
+ "amountDue": 5550.46,
5
+ "costFromExpiredDiscounts": -1.6899998,
6
+ "extraCharge": 5445.53,
7
+ "taxItem": [
8
+ {
9
+ "cat": "Total factura curenta",
10
+ "amt": 104.93
11
+ },
12
+ {
13
+ "cat": "TVA 19%",
14
+ "amt": 16.75
15
+ },
16
+ {
17
+ "cat": "Total rate terminal",
18
+ "amt": 5445.53
19
+ },
20
+ {
21
+ "cat": "Abonamente si extraoptiuni",
22
+ "amt": 91.17
23
+ },
24
+ {
25
+ "cat": "Reduceri",
26
+ "amt": -2.99
27
+ },
28
+ {
29
+ "cat": "Rest plata",
30
+ "amt": -5073.74
31
+ },
32
+ {
33
+ "cat": "Sold precedent",
34
+ "amt": -5073.74
35
+ },
36
+ {
37
+ "cat": "Total de plata",
38
+ "amt": 476.72
39
+ }
40
+ ],
41
+ "subscribers": [
42
+ {
43
+ "billSummaryItem": [
44
+ {
45
+ "cat": "Abonamente si extraoptiuni",
46
+ "amt": 34.08,
47
+ "name": "Red 11"
48
+ },
49
+ {
50
+ "cat": "Abonamente si extraoptiuni",
51
+ "amt": -8.52,
52
+ "name": "Reducere promo"
53
+ },
54
+ {
55
+ "cat": "Abonamente si extraoptiuni",
56
+ "amt": 2.99,
57
+ "name": "Secure Net"
58
+ },
59
+ {
60
+ "cat": "Abonamente si extraoptiuni",
61
+ "amt": -4.26,
62
+ "name": "Reducere promotionala 12 luni"
63
+ },
64
+ {
65
+ "cat": "Rate terminal",
66
+ "amt": 70.99,
67
+ "name": "Promotie Vodafone Smartphone cu plata in rate 12/36"
68
+ },
69
+ {
70
+ "cat": "Total sume datorate rate terminal",
71
+ "amt": 70.99
72
+ }
73
+ ],
74
+ "logicalResource": "724015962",
75
+ "netAmount": 24.29,
76
+ "productRef": {
77
+ "name": "RED 11",
78
+ "@referredType": "Voce mobila"
79
+ },
80
+ "subscriberExtraCost": 70.99,
81
+ "firstBill": false,
82
+ "planChangeProrata": false,
83
+ "firstBillForBan": false
84
+ },
85
+ {
86
+ "billSummaryItem": [
87
+ {
88
+ "cat": "Abonamente si extraoptiuni",
89
+ "amt": 29.82,
90
+ "name": "Abonament Vodafone WiFi Instant Nelimitat+"
91
+ }
92
+ ],
93
+ "logicalResource": "373920691",
94
+ "netAmount": 29.82,
95
+ "productRef": {
96
+ "name": "Wi-Fi Instant Nelimitat+",
97
+ "@referredType": "Date mobile nelimitat"
98
+ },
99
+ "firstBill": false,
100
+ "planChangeProrata": false,
101
+ "firstBillForBan": false
102
+ },
103
+ {
104
+ "billSummaryItem": [
105
+ {
106
+ "cat": "Reduceri",
107
+ "amt": -2.99,
108
+ "name": "Reducere Secure Net"
109
+ },
110
+ {
111
+ "cat": "Abonamente si extraoptiuni",
112
+ "amt": 38.33,
113
+ "name": "RED Start"
114
+ },
115
+ {
116
+ "cat": "Abonamente si extraoptiuni",
117
+ "amt": 2.99,
118
+ "name": "Secure Net"
119
+ },
120
+ {
121
+ "cat": "Abonamente si extraoptiuni",
122
+ "amt": -4.26,
123
+ "name": "Reducere promotionala 12 luni"
124
+ },
125
+ {
126
+ "cat": "Rate terminal",
127
+ "amt": 5374.54,
128
+ "name": "Valoare rate ramase pana la expirare contract"
129
+ },
130
+ {
131
+ "cat": "Total sume datorate rate terminal",
132
+ "amt": 5374.54
133
+ }
134
+ ],
135
+ "logicalResource": "724077190",
136
+ "netAmount": 34.07,
137
+ "productRef": {
138
+ "name": "RED Start",
139
+ "@referredType": "Voce mobila"
140
+ },
141
+ "subscriberExtraCost": 5374.54,
142
+ "firstBill": false,
143
+ "planChangeProrata": false,
144
+ "firstBillForBan": false
145
+ }
146
+ ]
147
+ }
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is intentionally left blank.
src/app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from chat.context import ChatContext
3
+ from chat.bill_comparison import compare_bills
4
+ from chat.user_info import UserInfo
5
+ from chat.conversation import Conversation
6
+
7
+ def main():
8
+ st.title("Telecom Bill Chat Assistant")
9
+
10
+ user_info = UserInfo()
11
+ chat_context = ChatContext()
12
+ st.write(f"ChatContext: {chat_context}") # Debug statement
13
+ conversation = Conversation(user_info, chat_context)
14
+ st.write(f"Conversation: {conversation}") # Debug statement
15
+
16
+ user_id = st.text_input("Enter your user ID:")
17
+ if user_id:
18
+ user_info.load_user_data(user_id)
19
+ st.session_state['user_id'] = user_id
20
+
21
+ user_query = st.text_input("Ask about your bills:")
22
+ if st.button("Submit"):
23
+ if user_query:
24
+ response = conversation.handle_query(user_query)
25
+ st.write(response)
26
+ chat_context.add_message("User", user_query)
27
+ chat_context.add_message("Assistant", response)
28
+
29
+ if __name__ == "__main__":
30
+ main()
src/chat/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is intentionally left blank.
src/chat/bill_comparison.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def compare_bills(bills):
2
+ if len(bills) < 2:
3
+ return "Not enough bills to compare."
4
+
5
+ last_bills = bills[-(len(bills)):]
6
+ comparison_results = {}
7
+
8
+ for i in range(1, len(last_bills)):
9
+ previous_bill = last_bills[i - 1]
10
+ current_bill = last_bills[i]
11
+ comparison_results[f'Comparison between Bill {i} and Bill {i + 1}'] = {
12
+ 'Previous Bill Amount': previous_bill['amount'],
13
+ 'Current Bill Amount': current_bill['amount'],
14
+ 'Difference': current_bill['amount'] - previous_bill['amount']
15
+ }
16
+
17
+ return comparison_results
src/chat/bill_parsing ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdfplumber
2
+
3
+ def extrage_costuri_din_factura(pdf_path):
4
+ costuri = {}
5
+ with pdfplumber.open(pdf_path) as pdf:
6
+ for page in pdf.pages:
7
+ text = page.extract_text()
8
+ lines = text.split('\n')
9
+ for line in lines:
10
+ if 'Abonament' in line:
11
+ costuri['abonament'] = float(line.split()[-1].replace(',', '.'))
12
+ elif 'Consum suplimentar' in line:
13
+ costuri['consum_suplimentar'] = float(line.split()[-1].replace(',', '.'))
14
+ elif 'Taxe' in line:
15
+ costuri['taxe'] = float(line.split()[-1].replace(',', '.'))
16
+ elif 'Total' in line:
17
+ costuri['total'] = float(line.split()[-1].replace(',', '.'))
18
+ return costuri
19
+
20
+ # Calea către fișierul PDF
21
+ pdf_path = 'C:\\Users\\ZZ029K826\\Documents\\GitHub\\llm-bill-chat-app\\FACTURA FX-24107160858.PDF'
22
+
23
+ # Extragerea costurilor din factură
24
+ costuri_factura = extrage_costuri_din_factura(pdf_path)
25
+ print(costuri_factura)
src/chat/context.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class ChatContext:
2
+ def __init__(self):
3
+ self.messages = []
4
+ self.user_info = {}
5
+
6
+ def add_message(self, role, content):
7
+ self.messages.append({"role": role, "content": content})
8
+
9
+ def get_context(self):
10
+ return self.messages
11
+
12
+ def set_user_info(self, user_info):
13
+ self.user_info = user_info
14
+
15
+ def get_user_info(self):
16
+ return self.user_info
17
+
18
+ def clear_context(self):
19
+ self.messages = []
src/chat/conversation.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.chat.context import ChatContext
2
+
3
+ class Conversation:
4
+ def __init__(self, user_info, chat_context: ChatContext):
5
+ self.user_info = user_info
6
+ self.chat_context = chat_context
7
+
8
+ def handle_query(self, query):
9
+ self.chat_context.add_message("User", query)
10
+ response = self.generate_response(query)
11
+ self.chat_context.add_message("Assistant", response)
12
+ return response
13
+
14
+ def generate_response(self, query):
15
+ if "factura" in query:
16
+ return self.get_bill_info()
17
+ return "Imi pare rau, te pot ajuta doar cu informatii despre factura ta."
18
+
19
+ def get_bill_info(self):
20
+ if "difer" in self.chat_context.context:
21
+ return self.compare_bills()
22
+ bills = self.user_info.get_bills()
23
+ if len(bills) < 2:
24
+ return "I need at least 2 bills to compare."
25
+ comparison_result = self.compare_bills(bills[-4:]) # Assuming you want the last 4 bills
26
+ return comparison_result
27
+
28
+ def compare_bills(self, last_bills):
29
+ # Placeholder for bill comparison logic
30
+ return f"Comparing the last four bills: {last_bills}"
src/chat/llm.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ available_models = [
6
+ "Qwen/Qwen1.5-7B-Chat", # Example: This is our Qwen model
7
+ ]
8
+
9
+ def initialize_chat_model(model_name):
10
+ # Only load model if we haven't loaded it before, or if model_name changed
11
+ if "chat_model" not in st.session_state or st.session_state.model_name != model_name:
12
+ # Load the Qwen model and tokenizer
13
+ tokenizer = AutoTokenizer.from_pretrained(
14
+ model_name
15
+ )
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ model_name,
18
+ torch_dtype="auto",
19
+ device_map="auto"
20
+ )
21
+
22
+ # Pick device; if you have CUDA, this will be "cuda", else it defaults to "cpu"
23
+ device = "cuda" if torch.cuda.is_available() else "cpu"
24
+ model.to(device)
25
+
26
+ # Save in session state
27
+ st.session_state.chat_tokenizer = tokenizer
28
+ st.session_state.chat_model = model
29
+ st.session_state.device = device
30
+ st.session_state.model_name = model_name
31
+
32
+ def generate_response(
33
+ user_input: str,
34
+ model_name: str,
35
+ temperature: float = 0.7,
36
+ top_k: int = 50,
37
+ top_p: float = 0.9,
38
+ repetition_penalty: float = 1.2
39
+ ) -> str:
40
+ # Make sure model is initialized
41
+ initialize_chat_model(model_name)
42
+
43
+ tokenizer = st.session_state.chat_tokenizer
44
+ model = st.session_state.chat_model
45
+ device = st.session_state.device
46
+
47
+ # Construct chat messages for Qwen
48
+ messages = [
49
+ {"role": "system", "content": "You are a helpful assistant."},
50
+ {"role": "user", "content": user_input}
51
+ ]
52
+
53
+ # Use Qwen's chat template
54
+ text = tokenizer.apply_chat_template(
55
+ messages,
56
+ tokenize=False,
57
+ add_generation_prompt=True
58
+ )
59
+
60
+ # Tokenize and move to chosen device
61
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
62
+
63
+ # Generate the output
64
+ with torch.no_grad():
65
+ generated_ids = model.generate(
66
+ model_inputs.input_ids,
67
+ max_new_tokens=512, # Adjust as needed
68
+ temperature=temperature,
69
+ top_k=top_k,
70
+ top_p=top_p,
71
+ repetition_penalty=repetition_penalty,
72
+ do_sample=True
73
+ )
74
+
75
+ # Exclude the original input tokens from the output to get only newly generated text
76
+ generated_ids = [
77
+ output_ids[len(input_ids):]
78
+ for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
79
+ ]
80
+
81
+ # Decode
82
+ output_text = tokenizer.batch_decode(
83
+ generated_ids, skip_special_tokens=True
84
+ )[0]
85
+ return output_text
src/chat/test_llm.ipynb ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "c:\\Users\\ZZ029K826\\Documents\\GitHub\\llm-bill-chat-app\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline\n"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 2,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "model_name = \"ai-forever/mGPT-1.3B-romanian\"\n",
28
+ "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
29
+ "model = AutoModelForCausalLM.from_pretrained(model_name)\n"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 3,
35
+ "metadata": {},
36
+ "outputs": [
37
+ {
38
+ "name": "stderr",
39
+ "output_type": "stream",
40
+ "text": [
41
+ "Device set to use cpu\n"
42
+ ]
43
+ }
44
+ ],
45
+ "source": [
46
+ "chat_model = pipeline(\n",
47
+ " \"text-generation\",\n",
48
+ " model=model,\n",
49
+ " tokenizer=tokenizer,\n",
50
+ " device=-1, # Use CPU\n",
51
+ " max_length=300,\n",
52
+ " max_new_tokens=100,\n",
53
+ " truncation=True\n",
54
+ ")\n"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": 6,
60
+ "metadata": {},
61
+ "outputs": [
62
+ {
63
+ "name": "stderr",
64
+ "output_type": "stream",
65
+ "text": [
66
+ "c:\\Users\\ZZ029K826\\Documents\\GitHub\\llm-bill-chat-app\\.venv\\Lib\\site-packages\\transformers\\generation\\configuration_utils.py:628: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
67
+ " warnings.warn(\n",
68
+ "c:\\Users\\ZZ029K826\\Documents\\GitHub\\llm-bill-chat-app\\.venv\\Lib\\site-packages\\transformers\\generation\\configuration_utils.py:633: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
69
+ " warnings.warn(\n",
70
+ "Both `max_new_tokens` (=100) and `max_length`(=300) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n"
71
+ ]
72
+ },
73
+ {
74
+ "name": "stdout",
75
+ "output_type": "stream",
76
+ "text": [
77
+ "Cum te simți astăzi?\n",
78
+ "\n",
79
+ "- Nu mă mai gândesc la asta. - Ba da, sunt bine!\n",
80
+ "\n",
81
+ "Nu e nimic de făcut în legatura cu tata... dar nu vreau ca el sa se raneasca pe mine acolo unde este locul meu preferat: un apartament mare si frumoasa casa din New York City care are o proprietate privată pentru copiii lui Michael Jackson şi familia ei.\n",
82
+ "\n",
83
+ "\n"
84
+ ]
85
+ }
86
+ ],
87
+ "source": [
88
+ "def generate_response(user_input, temperature=0.7, top_k=50, top_p=0.9, repetition_penalty=1.2):\n",
89
+ " response = chat_model(\n",
90
+ " user_input,\n",
91
+ " max_new_tokens=100,\n",
92
+ " temperature=temperature,\n",
93
+ " top_k=top_k,\n",
94
+ " top_p=top_p,\n",
95
+ " repetition_penalty=repetition_penalty\n",
96
+ " )\n",
97
+ " return response[0]['generated_text']\n",
98
+ "\n",
99
+ "# Test the function with adjusted parameters\n",
100
+ "user_input = \"Cum te simți astăzi?\"\n",
101
+ "response = generate_response(user_input)\n",
102
+ "print(response)"
103
+ ]
104
+ }
105
+ ],
106
+ "metadata": {
107
+ "kernelspec": {
108
+ "display_name": ".venv",
109
+ "language": "python",
110
+ "name": "python3"
111
+ },
112
+ "language_info": {
113
+ "codemirror_mode": {
114
+ "name": "ipython",
115
+ "version": 3
116
+ },
117
+ "file_extension": ".py",
118
+ "mimetype": "text/x-python",
119
+ "name": "python",
120
+ "nbconvert_exporter": "python",
121
+ "pygments_lexer": "ipython3",
122
+ "version": "3.11.5"
123
+ }
124
+ },
125
+ "nbformat": 4,
126
+ "nbformat_minor": 2
127
+ }
src/chat/user_info.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ class UserInfo:
5
+ def __init__(self):
6
+ self.user_data = {}
7
+
8
+ def load_user_data(self, user_id):
9
+ # Placeholder for loading user data logic
10
+ # For example, load data from a database or file
11
+ file_path = f"user_data_{user_id}.json"
12
+ if os.path.exists(file_path):
13
+ with open(file_path, 'r') as file:
14
+ self.user_data = json.load(file)
15
+ else:
16
+ self.user_data = {"user_id": user_id, "bills": []}
17
+ # Save the initial user data to a file into the user_data folder under data directory
18
+ os.makedirs('data\\user_data', exist_ok=True)
19
+ file_path = os.path.join('data\\user_data', f"user_data_{user_id}.json")
20
+ with open(file_path, 'w') as file:
21
+ json.dump(self.user_data, file)
22
+
23
+ # Ensure 'bills' key is always initialized
24
+ if "bills" not in self.user_data:
25
+ self.user_data["bills"] = []
26
+
27
+ def save_bill_data(self, user_id, bill_data):
28
+ if "bills" not in self.user_data:
29
+ self.user_data["bills"] = []
30
+ self.user_data["bills"].append(bill_data)
31
+ file_path = f"user_data_{user_id}.json"
32
+
33
+ # Vrify the path exists '\llm-bill-chat-app\data' windows path directory
34
+ os.makedirs('data/user_data', exist_ok=True)
35
+
36
+ # Save the updated user data to a file into C:\Users\ZZ029K826\Documents\GitHub\llm-bill-chat-app\data directory
37
+ file_path = os.path.join('data\\user_data', f"user_data_{user_id}.json")
38
+
39
+ with open(file_path, 'w') as file:
40
+ json.dump(self.user_data, file)
41
+
42
+ def get_bills(self):
43
+ return self.user_data.get("bills", [])
src/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is intentionally left blank.