joedac-netvigie commited on
Commit
d224a2e
Β·
1 Parent(s): 4d86b16

optimize query

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. services/database_service.py +24 -32
app.py CHANGED
@@ -2,7 +2,7 @@ from ui.gradio_ui import GradioUI
2
 
3
 
4
  def main():
5
- print("πŸš€ Launching...")
6
 
7
  try:
8
  ui = GradioUI()
 
2
 
3
 
4
  def main():
5
+ print("πŸš€ Launching... \n\n")
6
 
7
  try:
8
  ui = GradioUI()
services/database_service.py CHANGED
@@ -16,7 +16,7 @@ class DatabaseService:
16
  if parquet_file and Path(parquet_file).exists():
17
  self.parquet_path = Path(parquet_file)
18
  self._setup_duckdb()
19
- print(f"βœ… Dataset : {self.parquet_path}")
20
  else:
21
  if self._download_from_huggingface():
22
  parquet_file = self._find_parquet_file()
@@ -65,7 +65,7 @@ class DatabaseService:
65
  self.conn = duckdb.connect()
66
  result = self.conn.execute(f"SELECT COUNT(*) FROM '{self.parquet_path}'").fetchone()
67
  total = result[0] if result else 0
68
- print(f"βœ… DuckDB: {total:,} products")
69
  except Exception as e:
70
  print(f"❌ Error DuckDB: {e}")
71
  raise Exception(f"DuckDB can not be configured: {e}")
@@ -185,23 +185,19 @@ class DatabaseService:
185
 
186
  sql = f"""
187
  SELECT DISTINCT
188
- product_name, -- 0
189
- brands, -- 1
190
  nutriscore_grade, -- 2
191
- ecoscore_grade, -- 3
192
- nova_group, -- 4
193
- categories, -- 5
194
- ingredients_n, -- 6
195
- additives_n, -- 7
196
- allergens_tags, -- 8
197
- ingredients_text, -- 9
198
- ingredients_tags, -- 10
199
- nutriments, -- 11
200
- origins, -- 12
201
- serving_size, -- 13
202
- quantity, -- 14
203
- labels_tags, -- 15
204
- ({score_calc}) AS score -- 16
205
  FROM '{self.parquet_path}'
206
  WHERE {where_clause}
207
  ORDER BY score DESC, product_name ASC
@@ -219,20 +215,16 @@ class DatabaseService:
219
  'product_name': clean_name[:100],
220
  'brands': str(row[1])[:50] if row[1] else 'N/A',
221
  'nutriscore_grade': str(row[2]).lower() if row[2] else '',
222
- 'ecoscore_grade': str(row[3]).lower() if row[3] else '',
223
- 'nova_group': row[4] if row[4] else None,
224
- 'categories': str(row[5])[:100] if row[5] else 'N/A',
225
- 'ingredients_count': row[6] if row[6] else 0,
226
- 'additives_count': row[7] if row[7] else 0,
227
- 'allergens': self._clean_tags(row[8]),
228
- 'ingredients_text': str(row[9])[:500] if row[9] else 'N/A',
229
- 'ingredients_tags': self._clean_tags(row[10]) if row[10] else [],
230
- 'nutriments': (row[11]) if row[11] else 'N/A',
231
- 'origins': str(row[12])[:50] if row[12] else 'N/A',
232
- 'serving_size': str(row[13])[:20] if row[13] else 'N/A',
233
- 'quantity': str(row[14])[:30] if row[14] else 'N/A',
234
- 'labels': self._clean_tags(row[15]) if row[15] else [],
235
- 'score': float(row[16]) if row[16] else 0.0,
236
  'mistral_analysis': analysis.get('explanation', '')
237
  })
238
 
 
16
  if parquet_file and Path(parquet_file).exists():
17
  self.parquet_path = Path(parquet_file)
18
  self._setup_duckdb()
19
+ print(f"πŸ—œοΈ Dataset : {self.parquet_path} \n\n")
20
  else:
21
  if self._download_from_huggingface():
22
  parquet_file = self._find_parquet_file()
 
65
  self.conn = duckdb.connect()
66
  result = self.conn.execute(f"SELECT COUNT(*) FROM '{self.parquet_path}'").fetchone()
67
  total = result[0] if result else 0
68
+ print(f"πŸ¦† DuckDB: {total:,} products \n\n")
69
  except Exception as e:
70
  print(f"❌ Error DuckDB: {e}")
71
  raise Exception(f"DuckDB can not be configured: {e}")
 
185
 
186
  sql = f"""
187
  SELECT DISTINCT
188
+ product_name, -- 0
189
+ brands, -- 1
190
  nutriscore_grade, -- 2
191
+ nova_group, -- 3
192
+ categories, -- 4
193
+ ingredients_n, -- 5
194
+ additives_n, -- 6
195
+ allergens_tags, -- 7
196
+ ingredients_text, -- 8
197
+ nutriments, -- 9
198
+ serving_size, -- 10
199
+ quantity, -- 11
200
+ ({score_calc}) AS score -- 12
 
 
 
 
201
  FROM '{self.parquet_path}'
202
  WHERE {where_clause}
203
  ORDER BY score DESC, product_name ASC
 
215
  'product_name': clean_name[:100],
216
  'brands': str(row[1])[:50] if row[1] else 'N/A',
217
  'nutriscore_grade': str(row[2]).lower() if row[2] else '',
218
+ 'nova_group': row[3] if row[3] else None,
219
+ 'categories': str(row[4])[:100] if row[4] else 'N/A',
220
+ 'ingredients_count': row[5] if row[5] else 0,
221
+ 'additives_count': row[6] if row[6] else 0,
222
+ 'allergens': self._clean_tags(row[7]),
223
+ 'ingredients_text': str(row[8])[:500] if row[8] else 'N/A',
224
+ 'nutriments': (row[9]) if row[9] else 'N/A',
225
+ 'serving_size': str(row[10])[:20] if row[10] else 'N/A',
226
+ 'quantity': str(row[11])[:30] if row[11] else 'N/A',
227
+ 'score': float(row[12]) if row[12] else 0.0,
 
 
 
 
228
  'mistral_analysis': analysis.get('explanation', '')
229
  })
230