joedac-netvigie
commited on
Commit
Β·
d224a2e
1
Parent(s):
4d86b16
optimize query
Browse files- app.py +1 -1
- services/database_service.py +24 -32
app.py
CHANGED
@@ -2,7 +2,7 @@ from ui.gradio_ui import GradioUI
|
|
2 |
|
3 |
|
4 |
def main():
|
5 |
-
print("π Launching...")
|
6 |
|
7 |
try:
|
8 |
ui = GradioUI()
|
|
|
2 |
|
3 |
|
4 |
def main():
|
5 |
+
print("π Launching... \n\n")
|
6 |
|
7 |
try:
|
8 |
ui = GradioUI()
|
services/database_service.py
CHANGED
@@ -16,7 +16,7 @@ class DatabaseService:
|
|
16 |
if parquet_file and Path(parquet_file).exists():
|
17 |
self.parquet_path = Path(parquet_file)
|
18 |
self._setup_duckdb()
|
19 |
-
print(f"
|
20 |
else:
|
21 |
if self._download_from_huggingface():
|
22 |
parquet_file = self._find_parquet_file()
|
@@ -65,7 +65,7 @@ class DatabaseService:
|
|
65 |
self.conn = duckdb.connect()
|
66 |
result = self.conn.execute(f"SELECT COUNT(*) FROM '{self.parquet_path}'").fetchone()
|
67 |
total = result[0] if result else 0
|
68 |
-
print(f"
|
69 |
except Exception as e:
|
70 |
print(f"β Error DuckDB: {e}")
|
71 |
raise Exception(f"DuckDB can not be configured: {e}")
|
@@ -185,23 +185,19 @@ class DatabaseService:
|
|
185 |
|
186 |
sql = f"""
|
187 |
SELECT DISTINCT
|
188 |
-
product_name, -- 0
|
189 |
-
brands, -- 1
|
190 |
nutriscore_grade, -- 2
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
serving_size, -- 13
|
202 |
-
quantity, -- 14
|
203 |
-
labels_tags, -- 15
|
204 |
-
({score_calc}) AS score -- 16
|
205 |
FROM '{self.parquet_path}'
|
206 |
WHERE {where_clause}
|
207 |
ORDER BY score DESC, product_name ASC
|
@@ -219,20 +215,16 @@ class DatabaseService:
|
|
219 |
'product_name': clean_name[:100],
|
220 |
'brands': str(row[1])[:50] if row[1] else 'N/A',
|
221 |
'nutriscore_grade': str(row[2]).lower() if row[2] else '',
|
222 |
-
'
|
223 |
-
'
|
224 |
-
'
|
225 |
-
'
|
226 |
-
'
|
227 |
-
'
|
228 |
-
'
|
229 |
-
'
|
230 |
-
'
|
231 |
-
'
|
232 |
-
'serving_size': str(row[13])[:20] if row[13] else 'N/A',
|
233 |
-
'quantity': str(row[14])[:30] if row[14] else 'N/A',
|
234 |
-
'labels': self._clean_tags(row[15]) if row[15] else [],
|
235 |
-
'score': float(row[16]) if row[16] else 0.0,
|
236 |
'mistral_analysis': analysis.get('explanation', '')
|
237 |
})
|
238 |
|
|
|
16 |
if parquet_file and Path(parquet_file).exists():
|
17 |
self.parquet_path = Path(parquet_file)
|
18 |
self._setup_duckdb()
|
19 |
+
print(f"ποΈ Dataset : {self.parquet_path} \n\n")
|
20 |
else:
|
21 |
if self._download_from_huggingface():
|
22 |
parquet_file = self._find_parquet_file()
|
|
|
65 |
self.conn = duckdb.connect()
|
66 |
result = self.conn.execute(f"SELECT COUNT(*) FROM '{self.parquet_path}'").fetchone()
|
67 |
total = result[0] if result else 0
|
68 |
+
print(f"π¦ DuckDB: {total:,} products \n\n")
|
69 |
except Exception as e:
|
70 |
print(f"β Error DuckDB: {e}")
|
71 |
raise Exception(f"DuckDB can not be configured: {e}")
|
|
|
185 |
|
186 |
sql = f"""
|
187 |
SELECT DISTINCT
|
188 |
+
product_name, -- 0
|
189 |
+
brands, -- 1
|
190 |
nutriscore_grade, -- 2
|
191 |
+
nova_group, -- 3
|
192 |
+
categories, -- 4
|
193 |
+
ingredients_n, -- 5
|
194 |
+
additives_n, -- 6
|
195 |
+
allergens_tags, -- 7
|
196 |
+
ingredients_text, -- 8
|
197 |
+
nutriments, -- 9
|
198 |
+
serving_size, -- 10
|
199 |
+
quantity, -- 11
|
200 |
+
({score_calc}) AS score -- 12
|
|
|
|
|
|
|
|
|
201 |
FROM '{self.parquet_path}'
|
202 |
WHERE {where_clause}
|
203 |
ORDER BY score DESC, product_name ASC
|
|
|
215 |
'product_name': clean_name[:100],
|
216 |
'brands': str(row[1])[:50] if row[1] else 'N/A',
|
217 |
'nutriscore_grade': str(row[2]).lower() if row[2] else '',
|
218 |
+
'nova_group': row[3] if row[3] else None,
|
219 |
+
'categories': str(row[4])[:100] if row[4] else 'N/A',
|
220 |
+
'ingredients_count': row[5] if row[5] else 0,
|
221 |
+
'additives_count': row[6] if row[6] else 0,
|
222 |
+
'allergens': self._clean_tags(row[7]),
|
223 |
+
'ingredients_text': str(row[8])[:500] if row[8] else 'N/A',
|
224 |
+
'nutriments': (row[9]) if row[9] else 'N/A',
|
225 |
+
'serving_size': str(row[10])[:20] if row[10] else 'N/A',
|
226 |
+
'quantity': str(row[11])[:30] if row[11] else 'N/A',
|
227 |
+
'score': float(row[12]) if row[12] else 0.0,
|
|
|
|
|
|
|
|
|
228 |
'mistral_analysis': analysis.get('explanation', '')
|
229 |
})
|
230 |
|