Spaces:
Sleeping
Sleeping
billjson
Browse files- bill copy.py +216 -0
- bill.py +34 -79
- billPdf.py +216 -0
- data/Fact1.png +0 -0
- data/Factura-664159569.pdf +0 -0
- data/Factura-Vodafone_03.05.pdf +0 -0
- data/VDF85920235.pdf +0 -0
- data/bill_VDF682568817.json +195 -0
- data/bill_VDF682981042.json +82 -0
- data/bill_VDF683003873.json +113 -0
- data/bill_VDF685952701.json +147 -0
- src/__init__.py +1 -0
- src/app.py +30 -0
- src/chat/__init__.py +1 -0
- src/chat/bill_comparison.py +17 -0
- src/chat/bill_parsing +25 -0
- src/chat/context.py +19 -0
- src/chat/conversation.py +30 -0
- src/chat/llm.py +85 -0
- src/chat/test_llm.ipynb +127 -0
- src/chat/user_info.py +43 -0
- src/utils/__init__.py +1 -0
bill copy.py
ADDED
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env -S poetry run python
|
2 |
+
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
import pdfplumber
|
6 |
+
import streamlit as st
|
7 |
+
from openai import OpenAI
|
8 |
+
|
9 |
+
client = OpenAI()
|
10 |
+
|
11 |
+
def load_user_data(user_id):
|
12 |
+
file_path = os.path.join("data", "user_data", f"user_data_{user_id}.json")
|
13 |
+
if not os.path.exists(file_path):
|
14 |
+
return {}
|
15 |
+
with open(file_path, "r") as file:
|
16 |
+
return json.load(file)
|
17 |
+
|
18 |
+
def parse_pdf_to_json(pdf_path):
|
19 |
+
user_id = {}
|
20 |
+
serie_factura = {}
|
21 |
+
data_factura = {}
|
22 |
+
costuri = {}
|
23 |
+
with pdfplumber.open(pdf_path, ) as pdf:
|
24 |
+
for page in pdf.pages:
|
25 |
+
text = page.extract_text()
|
26 |
+
if text:
|
27 |
+
lines = text.split('\n')
|
28 |
+
|
29 |
+
# Process each line and look for specific categories
|
30 |
+
for line in lines:
|
31 |
+
# Check for 'Data emiterii facturii'
|
32 |
+
if 'Data facturii' in line:
|
33 |
+
date = line.split()[-1]
|
34 |
+
data_factura['Data factura'] = date
|
35 |
+
|
36 |
+
# Check for 'Serie factură'
|
37 |
+
if 'rul facturii:' in line:
|
38 |
+
serie = line.split()[-1]
|
39 |
+
serie_factura['Serie numar'] = serie
|
40 |
+
|
41 |
+
# Check for 'Cont client'
|
42 |
+
if 'Cont client' in line:
|
43 |
+
cont = line.split()[-1]
|
44 |
+
user_id['Cont client'] = cont
|
45 |
+
|
46 |
+
# Check for 'Valoare facturată fără TVA'
|
47 |
+
if 'Sold precedent' in line:
|
48 |
+
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
49 |
+
costuri['Sold precedent'] = value
|
50 |
+
|
51 |
+
# Check for 'Total bază de impozitare TVA'
|
52 |
+
elif 'din sold precedent' in line:
|
53 |
+
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
54 |
+
costuri['Total platit din sold precedent'] = value
|
55 |
+
|
56 |
+
# Check for 'TVA'
|
57 |
+
elif 'TVA' in line and '%' in line:
|
58 |
+
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
59 |
+
costuri['TVA'] = value
|
60 |
+
|
61 |
+
# Check for 'Dobânzi penalizatoare'
|
62 |
+
elif 'Abonamente' in line:
|
63 |
+
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
64 |
+
costuri['Abonamente si extraopiuni'] = value
|
65 |
+
|
66 |
+
# Check for 'TOTAL DE PLATĂ FACTURĂ CURENTĂ'
|
67 |
+
elif 'Total factura curenta fara TVA' in line:
|
68 |
+
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
69 |
+
costuri['Total factura curenta fara TVA'] = value
|
70 |
+
|
71 |
+
# Check for 'Sold Cont Contract'
|
72 |
+
elif 'Servicii utilizate' in line:
|
73 |
+
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
74 |
+
costuri['Servicii utilizate'] = value
|
75 |
+
|
76 |
+
# Check for 'Compensatii'
|
77 |
+
elif 'Rate terminal' in line:
|
78 |
+
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
79 |
+
costuri['Rate terminal'] = value
|
80 |
+
|
81 |
+
# Check for 'TVA 19,00%'
|
82 |
+
elif 'TVA 19,00%' in line:
|
83 |
+
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
84 |
+
costuri['TVA'] = value
|
85 |
+
|
86 |
+
# Check for 'Compensatii'
|
87 |
+
elif 'Total factura curenta' in line:
|
88 |
+
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
89 |
+
costuri['Total factura curenta'] = value
|
90 |
+
|
91 |
+
return costuri
|
92 |
+
|
93 |
+
def check_related_keys(question, user_id):
|
94 |
+
user_data = load_user_data(user_id)
|
95 |
+
bill_keys = set()
|
96 |
+
for bill in user_data.get("bills", []):
|
97 |
+
bill_keys.update(bill.keys())
|
98 |
+
return [key for key in bill_keys if key.lower() in question.lower()]
|
99 |
+
|
100 |
+
def process_query(query, user_id):
|
101 |
+
user_data = load_user_data(user_id)
|
102 |
+
bill_info = user_data.get("bills", [])
|
103 |
+
related_keys = check_related_keys(query, user_id)
|
104 |
+
related_keys_str = ", ".join(related_keys) if related_keys else "N/A"
|
105 |
+
|
106 |
+
if related_keys_str != "N/A":
|
107 |
+
context = (
|
108 |
+
f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} "
|
109 |
+
f"si raspunde la intrebarea: '{query}' dar numai cu info legate de: {related_keys_str}"
|
110 |
+
)
|
111 |
+
else:
|
112 |
+
context = (
|
113 |
+
f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} "
|
114 |
+
f"si raspunde la intrebarea: '{query}' dar numai cu info legate de factura"
|
115 |
+
)
|
116 |
+
|
117 |
+
max_input_length = 550
|
118 |
+
st.write(f"Context:\n{context}")
|
119 |
+
st.write(f"Context size: {len(context)} characters")
|
120 |
+
|
121 |
+
if len(context) > max_input_length:
|
122 |
+
st.warning("Prea multe caractere în context, solicitarea nu va fi trimisă.")
|
123 |
+
return None
|
124 |
+
|
125 |
+
return context
|
126 |
+
|
127 |
+
def main():
|
128 |
+
|
129 |
+
st.title("Telecom Bill Chat with LLM Agent")
|
130 |
+
|
131 |
+
if "user_id" not in st.session_state:
|
132 |
+
st.session_state.user_id = None
|
133 |
+
|
134 |
+
user_id = st.sidebar.text_input("Introdu numărul de telefon:")
|
135 |
+
if user_id and user_id != st.session_state.user_id:
|
136 |
+
data = load_user_data(user_id)
|
137 |
+
if data:
|
138 |
+
st.session_state.user_id = user_id
|
139 |
+
st.success("Utilizator găsit!")
|
140 |
+
else:
|
141 |
+
st.warning("Nu am găsit date pentru acest ID. Încărcați o factură PDF la nevoie.")
|
142 |
+
st.session_state.user_id = user_id
|
143 |
+
|
144 |
+
uploaded_file = st.file_uploader("Încarcă factura PDF", type="pdf")
|
145 |
+
if uploaded_file and st.session_state.user_id:
|
146 |
+
bill_data = parse_pdf_to_json(uploaded_file)
|
147 |
+
existing_data = load_user_data(st.session_state.user_id)
|
148 |
+
if "bills" not in existing_data:
|
149 |
+
existing_data["bills"] = []
|
150 |
+
existing_data["bills"].append(bill_data)
|
151 |
+
file_path = os.path.join("data", "user_data", f"user_data_{st.session_state['user_id']}.json")
|
152 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
153 |
+
with open(file_path, "w") as file:
|
154 |
+
json.dump(existing_data, file)
|
155 |
+
st.success("Factura a fost încărcată și salvată cu succes!")
|
156 |
+
|
157 |
+
if st.session_state.user_id:
|
158 |
+
data = load_user_data(st.session_state.user_id)
|
159 |
+
st.write(f"Phone Number: {st.session_state.user_id}")
|
160 |
+
st.write("Facturi existente:")
|
161 |
+
for bill in data.get("bills", []):
|
162 |
+
st.write(bill)
|
163 |
+
else:
|
164 |
+
st.info("Introduceți un ID și/sau încărcați o factură PDF pentru a continua.")
|
165 |
+
|
166 |
+
# Initialize conversation in the session state
|
167 |
+
# "context_prompt_added" indicates whether we've added the specialized "bill info" context yet.
|
168 |
+
if "messages" not in st.session_state:
|
169 |
+
st.session_state["messages"] = [
|
170 |
+
{"role": "assistant", "content": "Cu ce te pot ajuta?"}
|
171 |
+
]
|
172 |
+
if "context_prompt_added" not in st.session_state:
|
173 |
+
st.session_state.context_prompt_added = False
|
174 |
+
|
175 |
+
st.write("---")
|
176 |
+
st.subheader("Chat")
|
177 |
+
|
178 |
+
for msg in st.session_state["messages"]:
|
179 |
+
st.chat_message(msg["role"]).write(msg["content"])
|
180 |
+
|
181 |
+
if prompt := st.chat_input("Introduceți întrebarea aici:"):
|
182 |
+
if not st.session_state.user_id:
|
183 |
+
st.error("Trebuie să introduceți un număr de telefon valid sau să încărcați date.")
|
184 |
+
return
|
185 |
+
|
186 |
+
# If the context prompt hasn't been added yet, build & inject it once;
|
187 |
+
# otherwise, just add the user's raw question.
|
188 |
+
if not st.session_state.context_prompt_added:
|
189 |
+
final_prompt = process_query(prompt, st.session_state["user_id"])
|
190 |
+
if final_prompt is None:
|
191 |
+
st.stop()
|
192 |
+
st.session_state["messages"].append({"role": "user", "content": final_prompt})
|
193 |
+
st.session_state.context_prompt_added = True
|
194 |
+
else:
|
195 |
+
st.session_state["messages"].append({"role": "user", "content": prompt})
|
196 |
+
|
197 |
+
# Display the latest user message in the chat
|
198 |
+
st.chat_message("user").write(st.session_state["messages"][-1]["content"])
|
199 |
+
|
200 |
+
# Now call GPT-4 with the entire conversation
|
201 |
+
completion = client.chat.completions.create(
|
202 |
+
model="gpt-4",
|
203 |
+
messages=st.session_state["messages"]
|
204 |
+
)
|
205 |
+
response_text = completion.choices[0].message.content.strip()
|
206 |
+
|
207 |
+
st.session_state["messages"].append({"role": "assistant", "content": response_text})
|
208 |
+
st.chat_message("assistant").write(response_text)
|
209 |
+
|
210 |
+
if hasattr(completion, "usage"):
|
211 |
+
st.write("Prompt tokens:", completion.usage.prompt_tokens)
|
212 |
+
st.write("Completion tokens:", completion.usage.completion_tokens)
|
213 |
+
st.write("Total tokens:", completion.usage.total_tokens)
|
214 |
+
|
215 |
+
if __name__ == "__main__":
|
216 |
+
main()
|
bill.py
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
|
3 |
import os
|
4 |
import json
|
5 |
-
import pdfplumber
|
6 |
import streamlit as st
|
7 |
from openai import OpenAI
|
8 |
|
@@ -15,80 +14,35 @@ def load_user_data(user_id):
|
|
15 |
with open(file_path, "r") as file:
|
16 |
return json.load(file)
|
17 |
|
18 |
-
def
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
if 'Sold precedent' in line:
|
48 |
-
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
49 |
-
costuri['Sold precedent'] = value
|
50 |
-
|
51 |
-
# Check for 'Total bază de impozitare TVA'
|
52 |
-
elif 'din sold precedent' in line:
|
53 |
-
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
54 |
-
costuri['Total platit din sold precedent'] = value
|
55 |
-
|
56 |
-
# Check for 'TVA'
|
57 |
-
elif 'TVA' in line and '%' in line:
|
58 |
-
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
59 |
-
costuri['TVA'] = value
|
60 |
-
|
61 |
-
# Check for 'Dobânzi penalizatoare'
|
62 |
-
elif 'Abonamente' in line:
|
63 |
-
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
64 |
-
costuri['Abonamente si extraopiuni'] = value
|
65 |
-
|
66 |
-
# Check for 'TOTAL DE PLATĂ FACTURĂ CURENTĂ'
|
67 |
-
elif 'Total factura curenta fara TVA' in line:
|
68 |
-
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
69 |
-
costuri['Total factura curenta fara TVA'] = value
|
70 |
-
|
71 |
-
# Check for 'Sold Cont Contract'
|
72 |
-
elif 'Servicii utilizate' in line:
|
73 |
-
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
74 |
-
costuri['Servicii utilizate'] = value
|
75 |
-
|
76 |
-
# Check for 'Compensatii'
|
77 |
-
elif 'Rate terminal' in line:
|
78 |
-
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
79 |
-
costuri['Rate terminal'] = value
|
80 |
-
|
81 |
-
# Check for 'TVA 19,00%'
|
82 |
-
elif 'TVA 19,00%' in line:
|
83 |
-
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
84 |
-
costuri['TVA'] = value
|
85 |
-
|
86 |
-
# Check for 'Compensatii'
|
87 |
-
elif 'Total factura curenta' in line:
|
88 |
-
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
89 |
-
costuri['Total factura curenta'] = value
|
90 |
-
|
91 |
-
return costuri
|
92 |
|
93 |
def check_related_keys(question, user_id):
|
94 |
user_data = load_user_data(user_id)
|
@@ -141,13 +95,14 @@ def main():
|
|
141 |
st.warning("Nu am găsit date pentru acest ID. Încărcați o factură PDF la nevoie.")
|
142 |
st.session_state.user_id = user_id
|
143 |
|
144 |
-
uploaded_file = st.file_uploader("Încarcă factura
|
145 |
if uploaded_file and st.session_state.user_id:
|
146 |
-
bill_data =
|
|
|
147 |
existing_data = load_user_data(st.session_state.user_id)
|
148 |
if "bills" not in existing_data:
|
149 |
existing_data["bills"] = []
|
150 |
-
existing_data["bills"].append(
|
151 |
file_path = os.path.join("data", "user_data", f"user_data_{st.session_state['user_id']}.json")
|
152 |
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
153 |
with open(file_path, "w") as file:
|
@@ -161,7 +116,7 @@ def main():
|
|
161 |
for bill in data.get("bills", []):
|
162 |
st.write(bill)
|
163 |
else:
|
164 |
-
st.info("Introduceți un ID și/sau încărcați o factură
|
165 |
|
166 |
# Initialize conversation in the session state
|
167 |
# "context_prompt_added" indicates whether we've added the specialized "bill info" context yet.
|
|
|
2 |
|
3 |
import os
|
4 |
import json
|
|
|
5 |
import streamlit as st
|
6 |
from openai import OpenAI
|
7 |
|
|
|
14 |
with open(file_path, "r") as file:
|
15 |
return json.load(file)
|
16 |
|
17 |
+
def parseBill(data):
|
18 |
+
billDate = data.get("billDate")
|
19 |
+
billNo = data.get("billNo")
|
20 |
+
amountDue = data.get("amountDue")
|
21 |
+
extraCharge = data.get("extraCharge")
|
22 |
+
taxItems = data.get("taxItem", [])
|
23 |
+
subscribers = data.get("subscribers", [])
|
24 |
+
|
25 |
+
totalBillCosts = [{"categorie": t.get("cat"), "amount": t.get("amt")} for t in taxItems]
|
26 |
+
subscriberCosts = []
|
27 |
+
for sub in subscribers:
|
28 |
+
logicalResource = sub.get("logicalResource")
|
29 |
+
billSummaryItems = sub.get("billSummaryItem", [])
|
30 |
+
subscriberCosts.append({
|
31 |
+
"logicalResource": logicalResource,
|
32 |
+
"billSummaryItems": [
|
33 |
+
{"categorie": bsi.get("cat"), "amount": bsi.get("amt"), "name": bsi.get("name")}
|
34 |
+
for bsi in billSummaryItems
|
35 |
+
],
|
36 |
+
})
|
37 |
+
|
38 |
+
return {
|
39 |
+
"billDate": billDate,
|
40 |
+
"billNo": billNo,
|
41 |
+
"amountDue": amountDue,
|
42 |
+
"extraCharge": extraCharge,
|
43 |
+
"totalBillCosts": totalBillCosts,
|
44 |
+
"subscriberCosts": subscriberCosts
|
45 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def check_related_keys(question, user_id):
|
48 |
user_data = load_user_data(user_id)
|
|
|
95 |
st.warning("Nu am găsit date pentru acest ID. Încărcați o factură PDF la nevoie.")
|
96 |
st.session_state.user_id = user_id
|
97 |
|
98 |
+
uploaded_file = st.file_uploader("Încarcă factura JSON", type="json")
|
99 |
if uploaded_file and st.session_state.user_id:
|
100 |
+
bill_data = json.load(uploaded_file)
|
101 |
+
parsed_bill = parseBill(bill_data)
|
102 |
existing_data = load_user_data(st.session_state.user_id)
|
103 |
if "bills" not in existing_data:
|
104 |
existing_data["bills"] = []
|
105 |
+
existing_data["bills"].append(parsed_bill)
|
106 |
file_path = os.path.join("data", "user_data", f"user_data_{st.session_state['user_id']}.json")
|
107 |
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
108 |
with open(file_path, "w") as file:
|
|
|
116 |
for bill in data.get("bills", []):
|
117 |
st.write(bill)
|
118 |
else:
|
119 |
+
st.info("Introduceți un ID și/sau încărcați o factură JSON pentru a continua.")
|
120 |
|
121 |
# Initialize conversation in the session state
|
122 |
# "context_prompt_added" indicates whether we've added the specialized "bill info" context yet.
|
billPdf.py
ADDED
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env -S poetry run python
|
2 |
+
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
#import pdfplumber
|
6 |
+
import streamlit as st
|
7 |
+
from openai import OpenAI
|
8 |
+
|
9 |
+
client = OpenAI()
|
10 |
+
|
11 |
+
def load_user_data(user_id):
|
12 |
+
file_path = os.path.join("data", "user_data", f"user_data_{user_id}.json")
|
13 |
+
if not os.path.exists(file_path):
|
14 |
+
return {}
|
15 |
+
with open(file_path, "r") as file:
|
16 |
+
return json.load(file)
|
17 |
+
|
18 |
+
def parse_pdf_to_json(pdf_path):
|
19 |
+
user_id = {}
|
20 |
+
serie_factura = {}
|
21 |
+
data_factura = {}
|
22 |
+
costuri = {}
|
23 |
+
with pdfplumber.open(pdf_path, ) as pdf:
|
24 |
+
for page in pdf.pages:
|
25 |
+
text = page.extract_text()
|
26 |
+
if text:
|
27 |
+
lines = text.split('\n')
|
28 |
+
|
29 |
+
# Process each line and look for specific categories
|
30 |
+
for line in lines:
|
31 |
+
# Check for 'Data emiterii facturii'
|
32 |
+
if 'Data facturii' in line:
|
33 |
+
date = line.split()[-1]
|
34 |
+
data_factura['Data factura'] = date
|
35 |
+
|
36 |
+
# Check for 'Serie factură'
|
37 |
+
if 'rul facturii:' in line:
|
38 |
+
serie = line.split()[-1]
|
39 |
+
serie_factura['Serie numar'] = serie
|
40 |
+
|
41 |
+
# Check for 'Cont client'
|
42 |
+
if 'Cont client' in line:
|
43 |
+
cont = line.split()[-1]
|
44 |
+
user_id['Cont client'] = cont
|
45 |
+
|
46 |
+
# Check for 'Valoare facturată fără TVA'
|
47 |
+
if 'Sold precedent' in line:
|
48 |
+
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
49 |
+
costuri['Sold precedent'] = value
|
50 |
+
|
51 |
+
# Check for 'Total bază de impozitare TVA'
|
52 |
+
elif 'din sold precedent' in line:
|
53 |
+
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
54 |
+
costuri['Total platit din sold precedent'] = value
|
55 |
+
|
56 |
+
# Check for 'TVA'
|
57 |
+
elif 'TVA' in line and '%' in line:
|
58 |
+
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
59 |
+
costuri['TVA'] = value
|
60 |
+
|
61 |
+
# Check for 'Dobânzi penalizatoare'
|
62 |
+
elif 'Abonamente' in line:
|
63 |
+
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
64 |
+
costuri['Abonamente si extraopiuni'] = value
|
65 |
+
|
66 |
+
# Check for 'TOTAL DE PLATĂ FACTURĂ CURENTĂ'
|
67 |
+
elif 'Total factura curenta fara TVA' in line:
|
68 |
+
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
69 |
+
costuri['Total factura curenta fara TVA'] = value
|
70 |
+
|
71 |
+
# Check for 'Sold Cont Contract'
|
72 |
+
elif 'Servicii utilizate' in line:
|
73 |
+
value = line.split()[-2].replace(',', '.') # Extract and convert to float
|
74 |
+
costuri['Servicii utilizate'] = value
|
75 |
+
|
76 |
+
# Check for 'Compensatii'
|
77 |
+
elif 'Rate terminal' in line:
|
78 |
+
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
79 |
+
costuri['Rate terminal'] = value
|
80 |
+
|
81 |
+
# Check for 'TVA 19,00%'
|
82 |
+
elif 'TVA 19,00%' in line:
|
83 |
+
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
84 |
+
costuri['TVA'] = value
|
85 |
+
|
86 |
+
# Check for 'Compensatii'
|
87 |
+
elif 'Total factura curenta' in line:
|
88 |
+
value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float
|
89 |
+
costuri['Total factura curenta'] = value
|
90 |
+
|
91 |
+
return costuri
|
92 |
+
|
93 |
+
def check_related_keys(question, user_id):
|
94 |
+
user_data = load_user_data(user_id)
|
95 |
+
bill_keys = set()
|
96 |
+
for bill in user_data.get("bills", []):
|
97 |
+
bill_keys.update(bill.keys())
|
98 |
+
return [key for key in bill_keys if key.lower() in question.lower()]
|
99 |
+
|
100 |
+
def process_query(query, user_id):
|
101 |
+
user_data = load_user_data(user_id)
|
102 |
+
bill_info = user_data.get("bills", [])
|
103 |
+
related_keys = check_related_keys(query, user_id)
|
104 |
+
related_keys_str = ", ".join(related_keys) if related_keys else "N/A"
|
105 |
+
|
106 |
+
if related_keys_str != "N/A":
|
107 |
+
context = (
|
108 |
+
f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} "
|
109 |
+
f"si raspunde la intrebarea: '{query}' dar numai cu info legate de: {related_keys_str}"
|
110 |
+
)
|
111 |
+
else:
|
112 |
+
context = (
|
113 |
+
f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} "
|
114 |
+
f"si raspunde la intrebarea: '{query}' dar numai cu info legate de factura"
|
115 |
+
)
|
116 |
+
|
117 |
+
max_input_length = 550
|
118 |
+
st.write(f"Context:\n{context}")
|
119 |
+
st.write(f"Context size: {len(context)} characters")
|
120 |
+
|
121 |
+
if len(context) > max_input_length:
|
122 |
+
st.warning("Prea multe caractere în context, solicitarea nu va fi trimisă.")
|
123 |
+
return None
|
124 |
+
|
125 |
+
return context
|
126 |
+
|
127 |
+
def main():
|
128 |
+
|
129 |
+
st.title("Telecom Bill Chat with LLM Agent")
|
130 |
+
|
131 |
+
if "user_id" not in st.session_state:
|
132 |
+
st.session_state.user_id = None
|
133 |
+
|
134 |
+
user_id = st.sidebar.text_input("Introdu numărul de telefon:")
|
135 |
+
if user_id and user_id != st.session_state.user_id:
|
136 |
+
data = load_user_data(user_id)
|
137 |
+
if data:
|
138 |
+
st.session_state.user_id = user_id
|
139 |
+
st.success("Utilizator găsit!")
|
140 |
+
else:
|
141 |
+
st.warning("Nu am găsit date pentru acest ID. Încărcați o factură PDF la nevoie.")
|
142 |
+
st.session_state.user_id = user_id
|
143 |
+
|
144 |
+
uploaded_file = st.file_uploader("Încarcă factura PDF", type="pdf")
|
145 |
+
if uploaded_file and st.session_state.user_id:
|
146 |
+
bill_data = parse_pdf_to_json(uploaded_file)
|
147 |
+
existing_data = load_user_data(st.session_state.user_id)
|
148 |
+
if "bills" not in existing_data:
|
149 |
+
existing_data["bills"] = []
|
150 |
+
existing_data["bills"].append(bill_data)
|
151 |
+
file_path = os.path.join("data", "user_data", f"user_data_{st.session_state['user_id']}.json")
|
152 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
153 |
+
with open(file_path, "w") as file:
|
154 |
+
json.dump(existing_data, file)
|
155 |
+
st.success("Factura a fost încărcată și salvată cu succes!")
|
156 |
+
|
157 |
+
if st.session_state.user_id:
|
158 |
+
data = load_user_data(st.session_state.user_id)
|
159 |
+
st.write(f"Phone Number: {st.session_state.user_id}")
|
160 |
+
st.write("Facturi existente:")
|
161 |
+
for bill in data.get("bills", []):
|
162 |
+
st.write(bill)
|
163 |
+
else:
|
164 |
+
st.info("Introduceți un ID și/sau încărcați o factură PDF pentru a continua.")
|
165 |
+
|
166 |
+
# Initialize conversation in the session state
|
167 |
+
# "context_prompt_added" indicates whether we've added the specialized "bill info" context yet.
|
168 |
+
if "messages" not in st.session_state:
|
169 |
+
st.session_state["messages"] = [
|
170 |
+
{"role": "assistant", "content": "Cu ce te pot ajuta?"}
|
171 |
+
]
|
172 |
+
if "context_prompt_added" not in st.session_state:
|
173 |
+
st.session_state.context_prompt_added = False
|
174 |
+
|
175 |
+
st.write("---")
|
176 |
+
st.subheader("Chat")
|
177 |
+
|
178 |
+
for msg in st.session_state["messages"]:
|
179 |
+
st.chat_message(msg["role"]).write(msg["content"])
|
180 |
+
|
181 |
+
if prompt := st.chat_input("Introduceți întrebarea aici:"):
|
182 |
+
if not st.session_state.user_id:
|
183 |
+
st.error("Trebuie să introduceți un număr de telefon valid sau să încărcați date.")
|
184 |
+
return
|
185 |
+
|
186 |
+
# If the context prompt hasn't been added yet, build & inject it once;
|
187 |
+
# otherwise, just add the user's raw question.
|
188 |
+
if not st.session_state.context_prompt_added:
|
189 |
+
final_prompt = process_query(prompt, st.session_state["user_id"])
|
190 |
+
if final_prompt is None:
|
191 |
+
st.stop()
|
192 |
+
st.session_state["messages"].append({"role": "user", "content": final_prompt})
|
193 |
+
st.session_state.context_prompt_added = True
|
194 |
+
else:
|
195 |
+
st.session_state["messages"].append({"role": "user", "content": prompt})
|
196 |
+
|
197 |
+
# Display the latest user message in the chat
|
198 |
+
st.chat_message("user").write(st.session_state["messages"][-1]["content"])
|
199 |
+
|
200 |
+
# Now call GPT-4 with the entire conversation
|
201 |
+
completion = client.chat.completions.create(
|
202 |
+
model="gpt-4",
|
203 |
+
messages=st.session_state["messages"]
|
204 |
+
)
|
205 |
+
response_text = completion.choices[0].message.content.strip()
|
206 |
+
|
207 |
+
st.session_state["messages"].append({"role": "assistant", "content": response_text})
|
208 |
+
st.chat_message("assistant").write(response_text)
|
209 |
+
|
210 |
+
if hasattr(completion, "usage"):
|
211 |
+
st.write("Prompt tokens:", completion.usage.prompt_tokens)
|
212 |
+
st.write("Completion tokens:", completion.usage.completion_tokens)
|
213 |
+
st.write("Total tokens:", completion.usage.total_tokens)
|
214 |
+
|
215 |
+
if __name__ == "__main__":
|
216 |
+
main()
|
data/Fact1.png
DELETED
Binary file (381 kB)
|
|
data/Factura-664159569.pdf
DELETED
Binary file (114 kB)
|
|
data/Factura-Vodafone_03.05.pdf
DELETED
Binary file (136 kB)
|
|
data/VDF85920235.pdf
DELETED
Binary file (93.8 kB)
|
|
data/bill_VDF682568817.json
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"billDate": "2024-12-17",
|
3 |
+
"billNo": "VDF682568817",
|
4 |
+
"amountDue": 286.96,
|
5 |
+
"costFromExpiredDiscounts": 3.04,
|
6 |
+
"extraCharge": 99.31,
|
7 |
+
"taxItem": [
|
8 |
+
{
|
9 |
+
"cat": "Total factura curenta",
|
10 |
+
"amt": 201.38
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cat": "TVA 19%",
|
14 |
+
"amt": 32.15
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"cat": "Total rate terminal",
|
18 |
+
"amt": 85.58
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"cat": "Abonamente si extraoptiuni",
|
22 |
+
"amt": 158.54
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cat": "Reduceri",
|
26 |
+
"amt": -3.04
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"cat": "Servicii utilizate",
|
30 |
+
"amt": 13.73
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"cat": "Sold precedent",
|
34 |
+
"amt": 184.63
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cat": "Total platit din sold precedent",
|
38 |
+
"amt": 184.63
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"cat": "Total de plata",
|
42 |
+
"amt": 286.96
|
43 |
+
}
|
44 |
+
],
|
45 |
+
"subscribers": [
|
46 |
+
{
|
47 |
+
"billSummaryItem": [
|
48 |
+
{
|
49 |
+
"cat": "Servicii Internationale",
|
50 |
+
"amt": 4.18,
|
51 |
+
"name": "Taxa 10 SMS"
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"cat": "Reduceri",
|
55 |
+
"amt": -2.94,
|
56 |
+
"name": "Reducere Secure Net"
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"cat": "Reduceri",
|
60 |
+
"amt": -0.1,
|
61 |
+
"name": "Reducere Secure Net"
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"cat": "Abonamente si extraoptiuni",
|
65 |
+
"amt": -2.04,
|
66 |
+
"name": "Red 18"
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"cat": "Abonamente si extraoptiuni",
|
70 |
+
"amt": 2.24,
|
71 |
+
"name": "Red 19"
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"cat": "Abonamente si extraoptiuni",
|
75 |
+
"amt": 66.93,
|
76 |
+
"name": "Red 19"
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"cat": "Abonamente si extraoptiuni",
|
80 |
+
"amt": 0.1,
|
81 |
+
"name": "Secure Net"
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"cat": "Abonamente si extraoptiuni",
|
85 |
+
"amt": 2.94,
|
86 |
+
"name": "Secure Net"
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"cat": "Abonamente si extraoptiuni",
|
90 |
+
"amt": -4.18,
|
91 |
+
"name": "Reducere promotionala 12 luni"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"cat": "Abonamente si extraoptiuni",
|
95 |
+
"amt": -0.15,
|
96 |
+
"name": "Reducere promotionala 12 luni"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"cat": "Rate terminal",
|
100 |
+
"amt": 25.87,
|
101 |
+
"name": "Promotie Vodafone Smartphone cu plata in rate 1/36"
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"cat": "Rate terminal",
|
105 |
+
"amt": 59.71,
|
106 |
+
"name": "Promotie Vodafone Smartphone cu plata in rate 1/36"
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"cat": "Total sume datorate rate terminal",
|
110 |
+
"amt": 85.58
|
111 |
+
}
|
112 |
+
],
|
113 |
+
"logicalResource": "722339918",
|
114 |
+
"netAmount": 66.98,
|
115 |
+
"productRef": {
|
116 |
+
"name": "Red 19",
|
117 |
+
"@referredType": "Voce mobila"
|
118 |
+
},
|
119 |
+
"subscriberExtraCost": 89.76,
|
120 |
+
"firstBill": false,
|
121 |
+
"planChangeProrata": true,
|
122 |
+
"firstBillForBan": false,
|
123 |
+
"prorateCalc": {
|
124 |
+
"billDate": "2024-12-17T00:00:00Z",
|
125 |
+
"oldPlan": {
|
126 |
+
"name": "Red 18",
|
127 |
+
"value": -2.04,
|
128 |
+
"prorateAmount": -2.04
|
129 |
+
},
|
130 |
+
"currentPlan": {
|
131 |
+
"name": "Red 19",
|
132 |
+
"value": 66.93,
|
133 |
+
"prorateAmount": 2.24,
|
134 |
+
"valuePerDay": 2.1590323
|
135 |
+
},
|
136 |
+
"prorateDays": 1,
|
137 |
+
"daysBcClosed": 30.0,
|
138 |
+
"daysBcCurrent": 31.0,
|
139 |
+
"subChangeDate": "2024-12-16T00:00"
|
140 |
+
}
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"billSummaryItem": [
|
144 |
+
{
|
145 |
+
"cat": "Abonamente si extraoptiuni",
|
146 |
+
"amt": 43.04,
|
147 |
+
"name": "Red 15"
|
148 |
+
}
|
149 |
+
],
|
150 |
+
"logicalResource": "738912539",
|
151 |
+
"netAmount": 43.04,
|
152 |
+
"productRef": {
|
153 |
+
"name": "RED 15",
|
154 |
+
"@referredType": "Voce mobila"
|
155 |
+
},
|
156 |
+
"firstBill": false,
|
157 |
+
"planChangeProrata": false,
|
158 |
+
"firstBillForBan": false
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"billSummaryItem": [
|
162 |
+
{
|
163 |
+
"cat": "Servicii Nationale",
|
164 |
+
"amt": 5.37,
|
165 |
+
"name": "Apel Nr. Sp. Rel. cu Clientii"
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"cat": "Servicii Internationale",
|
169 |
+
"amt": 4.18,
|
170 |
+
"name": "Taxa 30 min grupa 1 tari"
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"cat": "Abonamente si extraoptiuni",
|
174 |
+
"amt": 53.84,
|
175 |
+
"name": "Red 12"
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"cat": "Abonamente si extraoptiuni",
|
179 |
+
"amt": -4.18,
|
180 |
+
"name": "Reducere promo"
|
181 |
+
}
|
182 |
+
],
|
183 |
+
"logicalResource": "727890354",
|
184 |
+
"netAmount": 59.21,
|
185 |
+
"productRef": {
|
186 |
+
"name": "Red 12",
|
187 |
+
"@referredType": "Voce mobila"
|
188 |
+
},
|
189 |
+
"subscriberExtraCost": 9.549999,
|
190 |
+
"firstBill": false,
|
191 |
+
"planChangeProrata": false,
|
192 |
+
"firstBillForBan": false
|
193 |
+
}
|
194 |
+
]
|
195 |
+
}
|
data/bill_VDF682981042.json
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"billDate": "2024-12-20",
|
3 |
+
"billNo": "VDF682981042",
|
4 |
+
"amountDue": 10.15,
|
5 |
+
"costFromExpiredDiscounts": -0.019996643,
|
6 |
+
"extraCharge": 10.15,
|
7 |
+
"taxItem": [
|
8 |
+
{
|
9 |
+
"cat": "Total factura curenta",
|
10 |
+
"amt": 10.15
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cat": "Total sume neincluse in baza de impozitare TVA",
|
14 |
+
"amt": 10.15
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"cat": "Abonamente si extraoptiuni",
|
18 |
+
"amt": 107.69
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"cat": "Reduceri",
|
22 |
+
"amt": -107.69
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cat": "Sold precedent",
|
26 |
+
"amt": 10.15
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"cat": "Total platit din sold precedent",
|
30 |
+
"amt": 10.15
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"cat": "Total de plata",
|
34 |
+
"amt": 10.15
|
35 |
+
}
|
36 |
+
],
|
37 |
+
"subscribers": [
|
38 |
+
{
|
39 |
+
"billSummaryItem": [
|
40 |
+
{
|
41 |
+
"cat": "Total Sume incasate in numele tertilor",
|
42 |
+
"amt": 10.15
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cat": "Sume incasate in numele tertilor",
|
46 |
+
"amt": 10.15,
|
47 |
+
"name": "Donatii SMS (fara TVA)"
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"cat": "Reduceri",
|
51 |
+
"amt": -1.07,
|
52 |
+
"name": "Reducere Vodafone Secure Net"
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"cat": "Reduceri",
|
56 |
+
"amt": -106.62,
|
57 |
+
"name": "Reducere abonament lunar"
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"cat": "Abonamente si extraoptiuni",
|
61 |
+
"amt": 106.62,
|
62 |
+
"name": "RED INFINITY 25 DEMO"
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"cat": "Abonamente si extraoptiuni",
|
66 |
+
"amt": 1.07,
|
67 |
+
"name": "Vodafone Secure Net"
|
68 |
+
}
|
69 |
+
],
|
70 |
+
"logicalResource": "721714413",
|
71 |
+
"netAmount": 0.0,
|
72 |
+
"productRef": {
|
73 |
+
"name": "RED INFINITY 25 DEMO",
|
74 |
+
"@referredType": "Voce mobila"
|
75 |
+
},
|
76 |
+
"subscriberExtraCost": 10.15,
|
77 |
+
"firstBill": false,
|
78 |
+
"planChangeProrata": false,
|
79 |
+
"firstBillForBan": false
|
80 |
+
}
|
81 |
+
]
|
82 |
+
}
|
data/bill_VDF683003873.json
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"billDate": "2024-12-20",
|
3 |
+
"billNo": "VDF683003873",
|
4 |
+
"amountDue": 239.87,
|
5 |
+
"extraCharge": 10.84,
|
6 |
+
"taxItem": [
|
7 |
+
{
|
8 |
+
"cat": "Total factura curenta",
|
9 |
+
"amt": 239.87
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cat": "TVA 19%",
|
13 |
+
"amt": 38.3
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"cat": "Abonamente si extraoptiuni",
|
17 |
+
"amt": 190.73
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"cat": "Servicii utilizate",
|
21 |
+
"amt": 10.84
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cat": "Sold precedent",
|
25 |
+
"amt": 229.46
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cat": "Total platit din sold precedent",
|
29 |
+
"amt": 229.46
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cat": "Total de plata",
|
33 |
+
"amt": 239.87
|
34 |
+
}
|
35 |
+
],
|
36 |
+
"subscribers": [
|
37 |
+
{
|
38 |
+
"billSummaryItem": [
|
39 |
+
{
|
40 |
+
"cat": "Servicii Nationale",
|
41 |
+
"amt": 5.37,
|
42 |
+
"name": "Apel Nr. Sp. Rel. cu Clientii"
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cat": "Servicii Nationale",
|
46 |
+
"amt": 5.47,
|
47 |
+
"name": "Servicii in parteneriat (SMS)"
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"cat": "Abonamente si extraoptiuni",
|
51 |
+
"amt": 90.29,
|
52 |
+
"name": "Red 35"
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"cat": "Abonamente si extraoptiuni",
|
56 |
+
"amt": 2.09,
|
57 |
+
"name": "Secure Net"
|
58 |
+
}
|
59 |
+
],
|
60 |
+
"logicalResource": "727723568",
|
61 |
+
"netAmount": 103.22,
|
62 |
+
"productRef": {
|
63 |
+
"name": "RED 35",
|
64 |
+
"@referredType": "Voce mobila"
|
65 |
+
},
|
66 |
+
"subscriberExtraCost": 10.84,
|
67 |
+
"firstBill": false,
|
68 |
+
"planChangeProrata": false,
|
69 |
+
"firstBillForBan": false
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"billSummaryItem": [
|
73 |
+
{
|
74 |
+
"cat": "Abonamente si extraoptiuni",
|
75 |
+
"amt": 37.61,
|
76 |
+
"name": "Smart Net 10"
|
77 |
+
}
|
78 |
+
],
|
79 |
+
"logicalResource": "733934342",
|
80 |
+
"netAmount": 37.61,
|
81 |
+
"productRef": {
|
82 |
+
"name": "Smart Net 10",
|
83 |
+
"@referredType": "Date mobile nelimitat"
|
84 |
+
},
|
85 |
+
"firstBill": false,
|
86 |
+
"planChangeProrata": false,
|
87 |
+
"firstBillForBan": false
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"billSummaryItem": [
|
91 |
+
{
|
92 |
+
"cat": "Abonamente si extraoptiuni",
|
93 |
+
"amt": 58.65,
|
94 |
+
"name": "Red 13"
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"cat": "Abonamente si extraoptiuni",
|
98 |
+
"amt": 2.09,
|
99 |
+
"name": "Secure Net"
|
100 |
+
}
|
101 |
+
],
|
102 |
+
"logicalResource": "724899108",
|
103 |
+
"netAmount": 60.74,
|
104 |
+
"productRef": {
|
105 |
+
"name": "RED 13",
|
106 |
+
"@referredType": "Voce mobila"
|
107 |
+
},
|
108 |
+
"firstBill": false,
|
109 |
+
"planChangeProrata": false,
|
110 |
+
"firstBillForBan": false
|
111 |
+
}
|
112 |
+
]
|
113 |
+
}
|
data/bill_VDF685952701.json
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"billDate": "2025-01-06",
|
3 |
+
"billNo": "VDF685952701",
|
4 |
+
"amountDue": 5550.46,
|
5 |
+
"costFromExpiredDiscounts": -1.6899998,
|
6 |
+
"extraCharge": 5445.53,
|
7 |
+
"taxItem": [
|
8 |
+
{
|
9 |
+
"cat": "Total factura curenta",
|
10 |
+
"amt": 104.93
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cat": "TVA 19%",
|
14 |
+
"amt": 16.75
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"cat": "Total rate terminal",
|
18 |
+
"amt": 5445.53
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"cat": "Abonamente si extraoptiuni",
|
22 |
+
"amt": 91.17
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cat": "Reduceri",
|
26 |
+
"amt": -2.99
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"cat": "Rest plata",
|
30 |
+
"amt": -5073.74
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"cat": "Sold precedent",
|
34 |
+
"amt": -5073.74
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cat": "Total de plata",
|
38 |
+
"amt": 476.72
|
39 |
+
}
|
40 |
+
],
|
41 |
+
"subscribers": [
|
42 |
+
{
|
43 |
+
"billSummaryItem": [
|
44 |
+
{
|
45 |
+
"cat": "Abonamente si extraoptiuni",
|
46 |
+
"amt": 34.08,
|
47 |
+
"name": "Red 11"
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"cat": "Abonamente si extraoptiuni",
|
51 |
+
"amt": -8.52,
|
52 |
+
"name": "Reducere promo"
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"cat": "Abonamente si extraoptiuni",
|
56 |
+
"amt": 2.99,
|
57 |
+
"name": "Secure Net"
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"cat": "Abonamente si extraoptiuni",
|
61 |
+
"amt": -4.26,
|
62 |
+
"name": "Reducere promotionala 12 luni"
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"cat": "Rate terminal",
|
66 |
+
"amt": 70.99,
|
67 |
+
"name": "Promotie Vodafone Smartphone cu plata in rate 12/36"
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"cat": "Total sume datorate rate terminal",
|
71 |
+
"amt": 70.99
|
72 |
+
}
|
73 |
+
],
|
74 |
+
"logicalResource": "724015962",
|
75 |
+
"netAmount": 24.29,
|
76 |
+
"productRef": {
|
77 |
+
"name": "RED 11",
|
78 |
+
"@referredType": "Voce mobila"
|
79 |
+
},
|
80 |
+
"subscriberExtraCost": 70.99,
|
81 |
+
"firstBill": false,
|
82 |
+
"planChangeProrata": false,
|
83 |
+
"firstBillForBan": false
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"billSummaryItem": [
|
87 |
+
{
|
88 |
+
"cat": "Abonamente si extraoptiuni",
|
89 |
+
"amt": 29.82,
|
90 |
+
"name": "Abonament Vodafone WiFi Instant Nelimitat+"
|
91 |
+
}
|
92 |
+
],
|
93 |
+
"logicalResource": "373920691",
|
94 |
+
"netAmount": 29.82,
|
95 |
+
"productRef": {
|
96 |
+
"name": "Wi-Fi Instant Nelimitat+",
|
97 |
+
"@referredType": "Date mobile nelimitat"
|
98 |
+
},
|
99 |
+
"firstBill": false,
|
100 |
+
"planChangeProrata": false,
|
101 |
+
"firstBillForBan": false
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"billSummaryItem": [
|
105 |
+
{
|
106 |
+
"cat": "Reduceri",
|
107 |
+
"amt": -2.99,
|
108 |
+
"name": "Reducere Secure Net"
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"cat": "Abonamente si extraoptiuni",
|
112 |
+
"amt": 38.33,
|
113 |
+
"name": "RED Start"
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"cat": "Abonamente si extraoptiuni",
|
117 |
+
"amt": 2.99,
|
118 |
+
"name": "Secure Net"
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"cat": "Abonamente si extraoptiuni",
|
122 |
+
"amt": -4.26,
|
123 |
+
"name": "Reducere promotionala 12 luni"
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"cat": "Rate terminal",
|
127 |
+
"amt": 5374.54,
|
128 |
+
"name": "Valoare rate ramase pana la expirare contract"
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"cat": "Total sume datorate rate terminal",
|
132 |
+
"amt": 5374.54
|
133 |
+
}
|
134 |
+
],
|
135 |
+
"logicalResource": "724077190",
|
136 |
+
"netAmount": 34.07,
|
137 |
+
"productRef": {
|
138 |
+
"name": "RED Start",
|
139 |
+
"@referredType": "Voce mobila"
|
140 |
+
},
|
141 |
+
"subscriberExtraCost": 5374.54,
|
142 |
+
"firstBill": false,
|
143 |
+
"planChangeProrata": false,
|
144 |
+
"firstBillForBan": false
|
145 |
+
}
|
146 |
+
]
|
147 |
+
}
|
src/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# This file is intentionally left blank.
|
src/app.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from chat.context import ChatContext
|
3 |
+
from chat.bill_comparison import compare_bills
|
4 |
+
from chat.user_info import UserInfo
|
5 |
+
from chat.conversation import Conversation
|
6 |
+
|
7 |
+
def main():
|
8 |
+
st.title("Telecom Bill Chat Assistant")
|
9 |
+
|
10 |
+
user_info = UserInfo()
|
11 |
+
chat_context = ChatContext()
|
12 |
+
st.write(f"ChatContext: {chat_context}") # Debug statement
|
13 |
+
conversation = Conversation(user_info, chat_context)
|
14 |
+
st.write(f"Conversation: {conversation}") # Debug statement
|
15 |
+
|
16 |
+
user_id = st.text_input("Enter your user ID:")
|
17 |
+
if user_id:
|
18 |
+
user_info.load_user_data(user_id)
|
19 |
+
st.session_state['user_id'] = user_id
|
20 |
+
|
21 |
+
user_query = st.text_input("Ask about your bills:")
|
22 |
+
if st.button("Submit"):
|
23 |
+
if user_query:
|
24 |
+
response = conversation.handle_query(user_query)
|
25 |
+
st.write(response)
|
26 |
+
chat_context.add_message("User", user_query)
|
27 |
+
chat_context.add_message("Assistant", response)
|
28 |
+
|
29 |
+
if __name__ == "__main__":
|
30 |
+
main()
|
src/chat/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# This file is intentionally left blank.
|
src/chat/bill_comparison.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def compare_bills(bills):
|
2 |
+
if len(bills) < 2:
|
3 |
+
return "Not enough bills to compare."
|
4 |
+
|
5 |
+
last_bills = bills[-(len(bills)):]
|
6 |
+
comparison_results = {}
|
7 |
+
|
8 |
+
for i in range(1, len(last_bills)):
|
9 |
+
previous_bill = last_bills[i - 1]
|
10 |
+
current_bill = last_bills[i]
|
11 |
+
comparison_results[f'Comparison between Bill {i} and Bill {i + 1}'] = {
|
12 |
+
'Previous Bill Amount': previous_bill['amount'],
|
13 |
+
'Current Bill Amount': current_bill['amount'],
|
14 |
+
'Difference': current_bill['amount'] - previous_bill['amount']
|
15 |
+
}
|
16 |
+
|
17 |
+
return comparison_results
|
src/chat/bill_parsing
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pdfplumber
|
2 |
+
|
3 |
+
def extrage_costuri_din_factura(pdf_path):
|
4 |
+
costuri = {}
|
5 |
+
with pdfplumber.open(pdf_path) as pdf:
|
6 |
+
for page in pdf.pages:
|
7 |
+
text = page.extract_text()
|
8 |
+
lines = text.split('\n')
|
9 |
+
for line in lines:
|
10 |
+
if 'Abonament' in line:
|
11 |
+
costuri['abonament'] = float(line.split()[-1].replace(',', '.'))
|
12 |
+
elif 'Consum suplimentar' in line:
|
13 |
+
costuri['consum_suplimentar'] = float(line.split()[-1].replace(',', '.'))
|
14 |
+
elif 'Taxe' in line:
|
15 |
+
costuri['taxe'] = float(line.split()[-1].replace(',', '.'))
|
16 |
+
elif 'Total' in line:
|
17 |
+
costuri['total'] = float(line.split()[-1].replace(',', '.'))
|
18 |
+
return costuri
|
19 |
+
|
20 |
+
# Calea către fișierul PDF
|
21 |
+
pdf_path = 'C:\\Users\\ZZ029K826\\Documents\\GitHub\\llm-bill-chat-app\\FACTURA FX-24107160858.PDF'
|
22 |
+
|
23 |
+
# Extragerea costurilor din factură
|
24 |
+
costuri_factura = extrage_costuri_din_factura(pdf_path)
|
25 |
+
print(costuri_factura)
|
src/chat/context.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class ChatContext:
|
2 |
+
def __init__(self):
|
3 |
+
self.messages = []
|
4 |
+
self.user_info = {}
|
5 |
+
|
6 |
+
def add_message(self, role, content):
|
7 |
+
self.messages.append({"role": role, "content": content})
|
8 |
+
|
9 |
+
def get_context(self):
|
10 |
+
return self.messages
|
11 |
+
|
12 |
+
def set_user_info(self, user_info):
|
13 |
+
self.user_info = user_info
|
14 |
+
|
15 |
+
def get_user_info(self):
|
16 |
+
return self.user_info
|
17 |
+
|
18 |
+
def clear_context(self):
|
19 |
+
self.messages = []
|
src/chat/conversation.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.chat.context import ChatContext
|
2 |
+
|
3 |
+
class Conversation:
|
4 |
+
def __init__(self, user_info, chat_context: ChatContext):
|
5 |
+
self.user_info = user_info
|
6 |
+
self.chat_context = chat_context
|
7 |
+
|
8 |
+
def handle_query(self, query):
|
9 |
+
self.chat_context.add_message("User", query)
|
10 |
+
response = self.generate_response(query)
|
11 |
+
self.chat_context.add_message("Assistant", response)
|
12 |
+
return response
|
13 |
+
|
14 |
+
def generate_response(self, query):
|
15 |
+
if "factura" in query:
|
16 |
+
return self.get_bill_info()
|
17 |
+
return "Imi pare rau, te pot ajuta doar cu informatii despre factura ta."
|
18 |
+
|
19 |
+
def get_bill_info(self):
|
20 |
+
if "difer" in self.chat_context.context:
|
21 |
+
return self.compare_bills()
|
22 |
+
bills = self.user_info.get_bills()
|
23 |
+
if len(bills) < 2:
|
24 |
+
return "I need at least 2 bills to compare."
|
25 |
+
comparison_result = self.compare_bills(bills[-4:]) # Assuming you want the last 4 bills
|
26 |
+
return comparison_result
|
27 |
+
|
28 |
+
def compare_bills(self, last_bills):
|
29 |
+
# Placeholder for bill comparison logic
|
30 |
+
return f"Comparing the last four bills: {last_bills}"
|
src/chat/llm.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import torch
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
+
|
5 |
+
available_models = [
|
6 |
+
"Qwen/Qwen1.5-7B-Chat", # Example: This is our Qwen model
|
7 |
+
]
|
8 |
+
|
9 |
+
def initialize_chat_model(model_name):
|
10 |
+
# Only load model if we haven't loaded it before, or if model_name changed
|
11 |
+
if "chat_model" not in st.session_state or st.session_state.model_name != model_name:
|
12 |
+
# Load the Qwen model and tokenizer
|
13 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
14 |
+
model_name
|
15 |
+
)
|
16 |
+
model = AutoModelForCausalLM.from_pretrained(
|
17 |
+
model_name,
|
18 |
+
torch_dtype="auto",
|
19 |
+
device_map="auto"
|
20 |
+
)
|
21 |
+
|
22 |
+
# Pick device; if you have CUDA, this will be "cuda", else it defaults to "cpu"
|
23 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
24 |
+
model.to(device)
|
25 |
+
|
26 |
+
# Save in session state
|
27 |
+
st.session_state.chat_tokenizer = tokenizer
|
28 |
+
st.session_state.chat_model = model
|
29 |
+
st.session_state.device = device
|
30 |
+
st.session_state.model_name = model_name
|
31 |
+
|
32 |
+
def generate_response(
|
33 |
+
user_input: str,
|
34 |
+
model_name: str,
|
35 |
+
temperature: float = 0.7,
|
36 |
+
top_k: int = 50,
|
37 |
+
top_p: float = 0.9,
|
38 |
+
repetition_penalty: float = 1.2
|
39 |
+
) -> str:
|
40 |
+
# Make sure model is initialized
|
41 |
+
initialize_chat_model(model_name)
|
42 |
+
|
43 |
+
tokenizer = st.session_state.chat_tokenizer
|
44 |
+
model = st.session_state.chat_model
|
45 |
+
device = st.session_state.device
|
46 |
+
|
47 |
+
# Construct chat messages for Qwen
|
48 |
+
messages = [
|
49 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
50 |
+
{"role": "user", "content": user_input}
|
51 |
+
]
|
52 |
+
|
53 |
+
# Use Qwen's chat template
|
54 |
+
text = tokenizer.apply_chat_template(
|
55 |
+
messages,
|
56 |
+
tokenize=False,
|
57 |
+
add_generation_prompt=True
|
58 |
+
)
|
59 |
+
|
60 |
+
# Tokenize and move to chosen device
|
61 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(device)
|
62 |
+
|
63 |
+
# Generate the output
|
64 |
+
with torch.no_grad():
|
65 |
+
generated_ids = model.generate(
|
66 |
+
model_inputs.input_ids,
|
67 |
+
max_new_tokens=512, # Adjust as needed
|
68 |
+
temperature=temperature,
|
69 |
+
top_k=top_k,
|
70 |
+
top_p=top_p,
|
71 |
+
repetition_penalty=repetition_penalty,
|
72 |
+
do_sample=True
|
73 |
+
)
|
74 |
+
|
75 |
+
# Exclude the original input tokens from the output to get only newly generated text
|
76 |
+
generated_ids = [
|
77 |
+
output_ids[len(input_ids):]
|
78 |
+
for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
79 |
+
]
|
80 |
+
|
81 |
+
# Decode
|
82 |
+
output_text = tokenizer.batch_decode(
|
83 |
+
generated_ids, skip_special_tokens=True
|
84 |
+
)[0]
|
85 |
+
return output_text
|
src/chat/test_llm.ipynb
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stderr",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"c:\\Users\\ZZ029K826\\Documents\\GitHub\\llm-bill-chat-app\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
13 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
14 |
+
]
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"source": [
|
18 |
+
"from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline\n"
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "code",
|
23 |
+
"execution_count": 2,
|
24 |
+
"metadata": {},
|
25 |
+
"outputs": [],
|
26 |
+
"source": [
|
27 |
+
"model_name = \"ai-forever/mGPT-1.3B-romanian\"\n",
|
28 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
29 |
+
"model = AutoModelForCausalLM.from_pretrained(model_name)\n"
|
30 |
+
]
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"cell_type": "code",
|
34 |
+
"execution_count": 3,
|
35 |
+
"metadata": {},
|
36 |
+
"outputs": [
|
37 |
+
{
|
38 |
+
"name": "stderr",
|
39 |
+
"output_type": "stream",
|
40 |
+
"text": [
|
41 |
+
"Device set to use cpu\n"
|
42 |
+
]
|
43 |
+
}
|
44 |
+
],
|
45 |
+
"source": [
|
46 |
+
"chat_model = pipeline(\n",
|
47 |
+
" \"text-generation\",\n",
|
48 |
+
" model=model,\n",
|
49 |
+
" tokenizer=tokenizer,\n",
|
50 |
+
" device=-1, # Use CPU\n",
|
51 |
+
" max_length=300,\n",
|
52 |
+
" max_new_tokens=100,\n",
|
53 |
+
" truncation=True\n",
|
54 |
+
")\n"
|
55 |
+
]
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"cell_type": "code",
|
59 |
+
"execution_count": 6,
|
60 |
+
"metadata": {},
|
61 |
+
"outputs": [
|
62 |
+
{
|
63 |
+
"name": "stderr",
|
64 |
+
"output_type": "stream",
|
65 |
+
"text": [
|
66 |
+
"c:\\Users\\ZZ029K826\\Documents\\GitHub\\llm-bill-chat-app\\.venv\\Lib\\site-packages\\transformers\\generation\\configuration_utils.py:628: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
|
67 |
+
" warnings.warn(\n",
|
68 |
+
"c:\\Users\\ZZ029K826\\Documents\\GitHub\\llm-bill-chat-app\\.venv\\Lib\\site-packages\\transformers\\generation\\configuration_utils.py:633: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
|
69 |
+
" warnings.warn(\n",
|
70 |
+
"Both `max_new_tokens` (=100) and `max_length`(=300) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"name": "stdout",
|
75 |
+
"output_type": "stream",
|
76 |
+
"text": [
|
77 |
+
"Cum te simți astăzi?\n",
|
78 |
+
"\n",
|
79 |
+
"- Nu mă mai gândesc la asta. - Ba da, sunt bine!\n",
|
80 |
+
"\n",
|
81 |
+
"Nu e nimic de făcut în legatura cu tata... dar nu vreau ca el sa se raneasca pe mine acolo unde este locul meu preferat: un apartament mare si frumoasa casa din New York City care are o proprietate privată pentru copiii lui Michael Jackson şi familia ei.\n",
|
82 |
+
"\n",
|
83 |
+
"\n"
|
84 |
+
]
|
85 |
+
}
|
86 |
+
],
|
87 |
+
"source": [
|
88 |
+
"def generate_response(user_input, temperature=0.7, top_k=50, top_p=0.9, repetition_penalty=1.2):\n",
|
89 |
+
" response = chat_model(\n",
|
90 |
+
" user_input,\n",
|
91 |
+
" max_new_tokens=100,\n",
|
92 |
+
" temperature=temperature,\n",
|
93 |
+
" top_k=top_k,\n",
|
94 |
+
" top_p=top_p,\n",
|
95 |
+
" repetition_penalty=repetition_penalty\n",
|
96 |
+
" )\n",
|
97 |
+
" return response[0]['generated_text']\n",
|
98 |
+
"\n",
|
99 |
+
"# Test the function with adjusted parameters\n",
|
100 |
+
"user_input = \"Cum te simți astăzi?\"\n",
|
101 |
+
"response = generate_response(user_input)\n",
|
102 |
+
"print(response)"
|
103 |
+
]
|
104 |
+
}
|
105 |
+
],
|
106 |
+
"metadata": {
|
107 |
+
"kernelspec": {
|
108 |
+
"display_name": ".venv",
|
109 |
+
"language": "python",
|
110 |
+
"name": "python3"
|
111 |
+
},
|
112 |
+
"language_info": {
|
113 |
+
"codemirror_mode": {
|
114 |
+
"name": "ipython",
|
115 |
+
"version": 3
|
116 |
+
},
|
117 |
+
"file_extension": ".py",
|
118 |
+
"mimetype": "text/x-python",
|
119 |
+
"name": "python",
|
120 |
+
"nbconvert_exporter": "python",
|
121 |
+
"pygments_lexer": "ipython3",
|
122 |
+
"version": "3.11.5"
|
123 |
+
}
|
124 |
+
},
|
125 |
+
"nbformat": 4,
|
126 |
+
"nbformat_minor": 2
|
127 |
+
}
|
src/chat/user_info.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
|
4 |
+
class UserInfo:
|
5 |
+
def __init__(self):
|
6 |
+
self.user_data = {}
|
7 |
+
|
8 |
+
def load_user_data(self, user_id):
|
9 |
+
# Placeholder for loading user data logic
|
10 |
+
# For example, load data from a database or file
|
11 |
+
file_path = f"user_data_{user_id}.json"
|
12 |
+
if os.path.exists(file_path):
|
13 |
+
with open(file_path, 'r') as file:
|
14 |
+
self.user_data = json.load(file)
|
15 |
+
else:
|
16 |
+
self.user_data = {"user_id": user_id, "bills": []}
|
17 |
+
# Save the initial user data to a file into the user_data folder under data directory
|
18 |
+
os.makedirs('data\\user_data', exist_ok=True)
|
19 |
+
file_path = os.path.join('data\\user_data', f"user_data_{user_id}.json")
|
20 |
+
with open(file_path, 'w') as file:
|
21 |
+
json.dump(self.user_data, file)
|
22 |
+
|
23 |
+
# Ensure 'bills' key is always initialized
|
24 |
+
if "bills" not in self.user_data:
|
25 |
+
self.user_data["bills"] = []
|
26 |
+
|
27 |
+
def save_bill_data(self, user_id, bill_data):
|
28 |
+
if "bills" not in self.user_data:
|
29 |
+
self.user_data["bills"] = []
|
30 |
+
self.user_data["bills"].append(bill_data)
|
31 |
+
file_path = f"user_data_{user_id}.json"
|
32 |
+
|
33 |
+
# Vrify the path exists '\llm-bill-chat-app\data' windows path directory
|
34 |
+
os.makedirs('data/user_data', exist_ok=True)
|
35 |
+
|
36 |
+
# Save the updated user data to a file into C:\Users\ZZ029K826\Documents\GitHub\llm-bill-chat-app\data directory
|
37 |
+
file_path = os.path.join('data\\user_data', f"user_data_{user_id}.json")
|
38 |
+
|
39 |
+
with open(file_path, 'w') as file:
|
40 |
+
json.dump(self.user_data, file)
|
41 |
+
|
42 |
+
def get_bills(self):
|
43 |
+
return self.user_data.get("bills", [])
|
src/utils/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# This file is intentionally left blank.
|