joedac-netvigie commited on
Commit
5370228
·
0 Parent(s):

Initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Variables d'environnement
2
+ .env
3
+ .env.local
4
+ .env.production
5
+ .env.staging
6
+
7
+ # Environnements virtuels Python
8
+ .venv/
9
+ venv/
10
+ env/
11
+ ENV/
12
+
13
+ # IDE et éditeurs
14
+ .idea/
15
+ .vscode/
16
+ *.swp
17
+ *.swo
18
+ *~
19
+
20
+ # Python
21
+ __pycache__/
22
+ *.py[cod]
23
+ *$py.class
24
+ *.so
25
+ .Python
26
+ build/
27
+ develop-eggs/
28
+ dist/
29
+ downloads/
30
+ eggs/
31
+ .eggs/
32
+ lib/
33
+ lib64/
34
+ parts/
35
+ sdist/
36
+ var/
37
+ wheels/
38
+ *.egg-info/
39
+ .installed.cfg
40
+ *.egg
41
+ MANIFEST
42
+
43
+ # Jupyter Notebook
44
+ .ipynb_checkpoints
45
+
46
+ # Cache Python
47
+ .pytest_cache/
48
+ .coverage
49
+ htmlcov/
50
+
51
+ # Gradio
52
+ .gradio/
53
+ gradio_cached_examples/
54
+
55
+ # Data et modèles (souvent volumineux)
56
+ data/*.parquet
57
+ data/*.csv
58
+ data/*.json
59
+ data/*.pkl
60
+ data/*.db
61
+ *.h5
62
+ *.hdf5
63
+
64
+ # Logs
65
+ *.log
66
+ logs/
67
+
68
+ # Fichiers temporaires
69
+ *.tmp
70
+ *.temp
71
+ .DS_Store
72
+ Thumbs.db
73
+
74
+ # Documentation générée
75
+ docs/_build/
76
+
77
+ # Fichiers de sauvegarde
78
+ *.bak
79
+ *.backup
80
+
81
+ # Certificats et clés
82
+ *.pem
83
+ *.key
84
+ *.crt
app.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ui.gradio_ui import GradioUI
2
+
3
+
4
+ def main():
5
+ print("🚀 Launching...")
6
+
7
+ try:
8
+ ui = GradioUI()
9
+ demo = ui.create_interface()
10
+
11
+ demo.launch(mcp_server=True)
12
+
13
+ except Exception as e:
14
+ print(f"❌ Error: {e}")
15
+
16
+
17
+ if __name__ == "__main__":
18
+ main()
config/settings.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
7
+ MISTRAL_URL = "https://api.mistral.ai/v1/chat/completions"
8
+ DATA_DIR = os.getenv("DATA_DIR", "./data")
9
+ PARQUET_FILE = os.getenv("PARQUET_FILE", "./data/food.parquet")
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio~=5.31.0
2
+ mcp
3
+ pandas~=2.2.3
4
+ requests~=2.32.3
5
+ datasets~=3.6.0
6
+ numpy
7
+ huggingface_hub
8
+ duckdb~=1.3.0
9
+ python-dotenv~=1.1.0
services/database_service.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import duckdb
3
+ from datasets import load_dataset
4
+ import os
5
+ from config.settings import PARQUET_FILE
6
+
7
+
8
+ class DatabaseService:
9
+ def __init__(self):
10
+ self.parquet_path = None
11
+ self.conn = None
12
+
13
+ os.makedirs('./data', exist_ok=True)
14
+
15
+ parquet_file = self._find_parquet_file()
16
+ if parquet_file and Path(parquet_file).exists():
17
+ self.parquet_path = Path(parquet_file)
18
+ self._setup_duckdb()
19
+ print(f"✅ Dataset : {self.parquet_path}")
20
+ else:
21
+ if self._download_from_huggingface():
22
+ parquet_file = self._find_parquet_file()
23
+ if parquet_file:
24
+ self.parquet_path = Path(parquet_file)
25
+ self._setup_duckdb()
26
+ print(f"✅ Dataset dowloaded : {self.parquet_path}")
27
+ else:
28
+ print("❌ Can't load dataset")
29
+ raise Exception("No dataset")
30
+ else:
31
+ print("❌ Can't download dataset")
32
+ raise Exception("Dataset download failed")
33
+
34
+ def _download_from_huggingface(self):
35
+ """
36
+ Download dataset from Hugging Face
37
+ """
38
+ try:
39
+ print("🔄 Downloading dataset from Hugging Face...")
40
+
41
+
42
+ dataset_name = "openfoodfacts/product-database"
43
+
44
+ dataset = load_dataset(dataset_name, split="train")
45
+
46
+ dataset.to_parquet("./data/food.parquet")
47
+
48
+ print(f"✅ Dataset downloaded & saved in ./data/food.parquet")
49
+ return True
50
+
51
+ except Exception as e:
52
+ print(f"❌ Erreur lors du téléchargement depuis Hugging Face: {e}")
53
+
54
+
55
+ def _find_parquet_file(self):
56
+ paths = [PARQUET_FILE, "./data/food.parquet", "./food.parquet", "../data/food.parquet"]
57
+ for path in paths:
58
+ if Path(path).exists():
59
+ return path
60
+ return None
61
+
62
+ def _setup_duckdb(self):
63
+ try:
64
+ self.conn = duckdb.connect()
65
+ result = self.conn.execute(f"SELECT COUNT(*) FROM '{self.parquet_path}'").fetchone()
66
+ total = result[0] if result else 0
67
+ print(f"✅ DuckDB: {total:,} products")
68
+ except Exception as e:
69
+ print(f"❌ Error DuckDB: {e}")
70
+ raise Exception(f"DuckDB can not be configured: {e}")
71
+
72
+ def escape_sql_string(self, text):
73
+ if not text:
74
+ return ""
75
+ text = text.replace("'", "''")
76
+ text = text.replace("%", "%%")
77
+ return text
78
+
79
+ def _clean_tags(self, tags_raw):
80
+ if not tags_raw:
81
+ return []
82
+
83
+ if isinstance(tags_raw, str):
84
+ if tags_raw.startswith('['):
85
+ try:
86
+ import ast
87
+ tags_list = ast.literal_eval(tags_raw)
88
+ if isinstance(tags_list, list):
89
+ return [str(tag).replace('en:', '').replace('fr:', '') for tag in tags_list[:3]]
90
+ except:
91
+ pass
92
+ return [tags_raw]
93
+
94
+ if isinstance(tags_raw, list):
95
+ return [str(tag).replace('en:', '').replace('fr:', '') for tag in tags_raw[:3]]
96
+
97
+ return [str(tags_raw)]
98
+
99
+ def clean_product_name(self, raw_name):
100
+ if not raw_name or raw_name == 'N/A':
101
+ return 'N/A'
102
+
103
+ if raw_name.startswith('[') and 'text' in raw_name:
104
+ try:
105
+ import re
106
+ match = re.search(r"'text':\s*'([^']*)'", raw_name)
107
+ if match:
108
+ return match.group(1)
109
+ match = re.search(r'"text":\s*"([^"]*)"', raw_name)
110
+ if match:
111
+ return match.group(1)
112
+ except Exception as e:
113
+ print(f"⚠️ Error cleaning: {e}")
114
+
115
+ clean = raw_name.replace('[', '').replace(']', '').replace('{', '').replace('}', '')
116
+
117
+ if 'text' in clean:
118
+ parts = clean.split('text')
119
+ if len(parts) > 1:
120
+ text_part = parts[-1]
121
+ text_part = text_part.replace('"', '').replace("'", '').replace(':', '').replace(',', '')
122
+ text_part = text_part.strip()
123
+ if text_part and len(text_part) > 3:
124
+ return text_part
125
+
126
+ return clean[:100]
127
+
128
+ def search_products(self, analysis, limit=1):
129
+ """
130
+ Search products in dataset
131
+ """
132
+ if not self.conn or not self.parquet_path:
133
+ print("❌ DAtabase not initialized")
134
+ return []
135
+
136
+ try:
137
+ product = analysis.get("product", "").strip()
138
+ brand = analysis.get("brand", "").strip()
139
+
140
+ safe_product = self.escape_sql_string(product)
141
+ safe_brand = self.escape_sql_string(brand)
142
+ safe_query = self.escape_sql_string(product)
143
+
144
+ conditions = []
145
+ scores = []
146
+
147
+ if product and brand:
148
+ product_condition = f"LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('%{safe_product}%')"
149
+ brand_condition = f"LOWER(CAST(brands AS VARCHAR)) LIKE LOWER('%{safe_brand}%')"
150
+ conditions.append(f"({product_condition} AND {brand_condition})")
151
+ scores.append(f"CASE WHEN {product_condition} AND {brand_condition} THEN 100 ELSE 0 END")
152
+ scores.append(
153
+ f"CASE WHEN LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('{safe_product}%') THEN 20 ELSE 0 END")
154
+ scores.append(f"CASE WHEN LOWER(CAST(brands AS VARCHAR)) = LOWER('{safe_brand}') THEN 30 ELSE 0 END")
155
+
156
+ elif product and not brand:
157
+ product_condition = f"LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('%{safe_product}%')"
158
+ conditions.append(product_condition)
159
+ scores.append(
160
+ f"CASE WHEN LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('{safe_product}%') THEN 80 ELSE 50 END")
161
+ scores.append("CASE WHEN brands IS NOT NULL AND LENGTH(CAST(brands AS VARCHAR)) > 3 THEN 10 ELSE 0 END")
162
+
163
+ elif brand and not product:
164
+ brand_condition = f"LOWER(CAST(brands AS VARCHAR)) LIKE LOWER('%{safe_brand}%')"
165
+ conditions.append(brand_condition)
166
+ scores.append(f"CASE WHEN LOWER(CAST(brands AS VARCHAR)) = LOWER('{safe_brand}') THEN 90 ELSE 60 END")
167
+
168
+ else:
169
+ conditions.append(
170
+ f"(LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('%{safe_query}%') OR LOWER(CAST(brands AS VARCHAR)) LIKE LOWER('%{safe_query}%'))")
171
+ scores.append(
172
+ f"CASE WHEN LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('%{safe_query}%') THEN 40 ELSE 20 END")
173
+
174
+ conditions.append("LOWER(CAST(countries_tags AS VARCHAR)) LIKE '%france%'")
175
+
176
+ where_clause = " AND ".join(conditions)
177
+ score_calc = " + ".join(scores) if scores else "1"
178
+
179
+ min_score_threshold = 30
180
+ where_clause = f"({where_clause}) AND (({score_calc}) >= {min_score_threshold})"
181
+
182
+ sql = f"""
183
+ SELECT DISTINCT
184
+ product_name, -- 0
185
+ brands, -- 1
186
+ nutriscore_grade, -- 2
187
+ ecoscore_grade, -- 3
188
+ nova_group, -- 4
189
+ categories, -- 5
190
+ ingredients_n, -- 6
191
+ additives_n, -- 7
192
+ allergens_tags, -- 8
193
+ ingredients_text, -- 9
194
+ ingredients_tags, -- 10
195
+ nutriments, -- 11
196
+ origins, -- 12
197
+ serving_size, -- 13
198
+ quantity, -- 14
199
+ labels_tags, -- 15
200
+ ({score_calc}) AS score -- 16
201
+ FROM '{self.parquet_path}'
202
+ WHERE {where_clause}
203
+ ORDER BY score DESC, product_name ASC
204
+ LIMIT {limit}
205
+ """
206
+
207
+ results = self.conn.execute(sql).fetchall()
208
+
209
+ products = []
210
+ for row in results:
211
+ raw_name = str(row[0]) if row[0] else 'N/A'
212
+ clean_name = self.clean_product_name(raw_name)
213
+
214
+ products.append({
215
+ 'product_name': clean_name[:100],
216
+ 'brands': str(row[1])[:50] if row[1] else 'N/A',
217
+ 'nutriscore_grade': str(row[2]).lower() if row[2] else '',
218
+ 'ecoscore_grade': str(row[3]).lower() if row[3] else '',
219
+ 'nova_group': row[4] if row[4] else None,
220
+ 'categories': str(row[5])[:100] if row[5] else 'N/A',
221
+ 'ingredients_count': row[6] if row[6] else 0,
222
+ 'additives_count': row[7] if row[7] else 0,
223
+ 'allergens': self._clean_tags(row[8]),
224
+ 'ingredients_text': str(row[9])[:500] if row[9] else 'N/A',
225
+ 'ingredients_tags': self._clean_tags(row[10]) if row[10] else [],
226
+ 'nutriments': (row[11]) if row[11] else 'N/A',
227
+ 'origins': str(row[12])[:50] if row[12] else 'N/A',
228
+ 'serving_size': str(row[13])[:20] if row[13] else 'N/A',
229
+ 'quantity': str(row[14])[:30] if row[14] else 'N/A',
230
+ 'labels': self._clean_tags(row[15]) if row[15] else [],
231
+ 'score': float(row[16]) if row[16] else 0.0,
232
+ 'mistral_analysis': analysis.get('explanation', '')
233
+ })
234
+
235
+ return products
236
+
237
+ except Exception as e:
238
+ print(f"❌ Search error: {e}")
239
+ return []
services/mistral_service.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ from config.settings import MISTRAL_API_KEY, MISTRAL_URL
4
+
5
+
6
+ class MistralService:
7
+ def __init__(self):
8
+ self.api_key = MISTRAL_API_KEY
9
+ self.api_url = MISTRAL_URL
10
+
11
+ def analyze_query(self, query):
12
+ """
13
+ Analyse la requête utilisateur pour distinguer la marque et le produit
14
+ :param query:
15
+ :return: JSON
16
+ """
17
+
18
+ if not self.api_key:
19
+ return {"product": query, "brand": "", "quality_filter": "none", "explanation": "Pas de clé Mistral"}
20
+
21
+ prompt = f"""Analyse cette requête de recherche alimentaire, tu es capable de différencier la marque et le produit, et réponds en JSON :
22
+
23
+ Requête: "{query}"
24
+
25
+ RÈGLES IMPORTANTES :
26
+ - Si c'est un nom de produit avec parfum/goût (ex: "danette chocolat", "yaourt fraise", "pizza 4 fromages"), alors TOUT est le PRODUIT
27
+ - Séparer produit/marque SEULEMENT si ton analyse en conclue que c'est une marque.
28
+ - Les parfums/goûts/saveurs ne sont PAS des marques : chocolat, vanille, fraise, pistache, caramel, etc.
29
+
30
+ Réponds uniquement avec ce format JSON :
31
+ {{
32
+ "product": "nom du produit recherché",
33
+ "brand": "marque si explicitement une VRAIE marque connue ou vide",
34
+ "explanation": "explication courte"
35
+ }}
36
+
37
+ Exemples :
38
+ - "danette pistache" → {{"product": "danette pistache", "brand": "", "quality_filter": "none", "explanation": "Recherche produit Danette parfum pistache"}}
39
+ - "yaourt fraise" → {{"product": "yaourt fraise", "brand": "", "quality_filter": "none", "explanation": "Recherche yaourt parfum fraise"}}
40
+ - "pizza 4 fromages" → {{"product": "pizza 4 fromages", "brand": "", "quality_filter": "none", "explanation": "Recherche pizza 4 fromages"}}
41
+ - "yaourt danone fraise" → {{"product": "yaourt fraise", "brand": "danone", "quality_filter": "none", "explanation": "Yaourt fraise de la marque Danone"}}
42
+ - "pizza picard" → {{"product": "pizza", "brand": "picard", "quality_filter": "none", "explanation": "Pizzas de la marque Picard"}}
43
+ - "nutella" → {{"product": "nutella", "brand": "", "quality_filter": "none", "explanation": "Recherche produit Nutella"}}
44
+ - "coca cola" → {{"product": "coca cola", "brand": "", "quality_filter": "none", "explanation": "Recherche produit Coca Cola"}}
45
+ """
46
+
47
+ try:
48
+ headers = {
49
+ "Authorization": f"Bearer {self.api_key}",
50
+ "Content-Type": "application/json"
51
+ }
52
+
53
+ data = {
54
+ "model": "mistral-small",
55
+ "messages": [{"role": "user", "content": prompt}],
56
+ "temperature": 0.1,
57
+ "max_tokens": 200
58
+ }
59
+
60
+ response = requests.post(self.api_url, headers=headers, json=data, timeout=10)
61
+
62
+ if response.status_code == 200:
63
+ result = response.json()
64
+ content = result["choices"][0]["message"]["content"]
65
+
66
+ content = content.strip()
67
+ if content.startswith("```json"):
68
+ content = content[7:]
69
+ if content.startswith("```"):
70
+ content = content[3:]
71
+ if content.endswith("```"):
72
+ content = content[:-3]
73
+
74
+ start_idx = content.find('{')
75
+ end_idx = content.rfind('}')
76
+ if start_idx != -1 and end_idx != -1:
77
+ content = content[start_idx:end_idx + 1]
78
+
79
+ analysis = json.loads(content)
80
+ print(f"🤖 Mistral understand : {analysis.get('explanation', '')}")
81
+ return analysis
82
+
83
+ else:
84
+ print(f"❌ Mistral API error: {response.status_code}")
85
+
86
+ except Exception as e:
87
+ print(f"❌ Mistral error: {e}")
services/search_service.py ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from typing import List, Dict, Any, Optional
4
+ from services.mistral_service import MistralService
5
+ from services.database_service import DatabaseService
6
+
7
+
8
+ class FoodSearchService:
9
+ """Service pour la recherche et le traitement des données alimentaires"""
10
+
11
+ def __init__(self):
12
+ self.mistral_service = MistralService()
13
+ self.database_service = DatabaseService()
14
+
15
+ def search_products(self, query: str, max_results: int = 5) -> str:
16
+ """
17
+ Recherche de produits alimentaires avec analyse intelligente par IA.
18
+
19
+ Cette fonction utilise Mistral AI pour comprendre les requêtes en langage naturel
20
+ et recherche dans la base OpenFoodFacts pour retourner des informations détaillées
21
+ sur les produits alimentaires.
22
+
23
+ Args:
24
+ query (str): Requête de recherche en langage naturel.
25
+ Exemples :
26
+ - "pepito" : recherche tous les produits Pepito
27
+ - "pizza picard" : pizzas de la marque Picard
28
+ - "yaourt sans lactose" : yaourts adaptés
29
+ - "biscuits lu bio" : biscuits LU avec label bio
30
+
31
+ max_results (int): Nombre maximum de résultats à retourner (1-20).
32
+ Défaut: 5
33
+
34
+ Returns:
35
+ str: Résultats formatés en markdown contenant pour chaque produit :
36
+ - Nom du produit et marque
37
+ - Scores nutritionnels (Nutri-Score, NOVA)
38
+ - Composition (nombre d'ingrédients et additifs)
39
+ - Liste complète des ingrédients
40
+ - Allergènes présents
41
+ - Valeurs nutritionnelles (énergie, matières grasses, sucres, sel, protéines)
42
+ - Informations produit (quantité, portion)
43
+ - Catégories du produit
44
+
45
+ Raises:
46
+ Exception: En cas d'erreur de recherche ou de connexion à la base de données
47
+
48
+ Examples:
49
+ >>> service = FoodSearchService()
50
+ >>> results = service.search_products("nutella", 3)
51
+ >>> print(results)
52
+ **📊 3 produits trouvés :**
53
+
54
+ **1. Nutella**
55
+ - **Marque**: Ferrero
56
+ - **Scores**: Nutri: 🔴E | NOVA: 🔴4
57
+ - **🧾 Ingrédients**: Sucre, huile de palme, noisettes...
58
+ """
59
+ if not query.strip():
60
+ return "❌ Enter your research"
61
+
62
+ try:
63
+ analysis = self.mistral_service.analyze_query(query)
64
+ except Exception as e:
65
+ print(f"Mistral not ready: {e}")
66
+ analysis = {'keywords': [query], 'brand': '', 'category': ''}
67
+
68
+ # Rechercher dans la base
69
+ try:
70
+ products = self.database_service.search_products(analysis, limit=max_results)
71
+ except Exception as e:
72
+ return f"❌ Search error: {str(e)}"
73
+
74
+ if not products:
75
+ return f"❌ No results for '{query}'"
76
+
77
+ return self._format_products_results(products)
78
+
79
+ def _format_products_results(self, products: List[Dict[str, Any]]) -> str:
80
+ """Format results in markdown"""
81
+ result = f"**📊 {len(products)} produits trouvés :**\n\n"
82
+
83
+ for i, product in enumerate(products, 1):
84
+ try:
85
+ result += self._format_single_product(i, product)
86
+ except Exception as e:
87
+ print(f"Error formating product {i}: {e}")
88
+ result += f"**{i}. Erreur lors du traitement du produit**\n\n"
89
+ continue
90
+
91
+ return result
92
+
93
+ def _format_single_product(self, index: int, product: Dict[str, Any]) -> str:
94
+ """Formate single produdt"""
95
+ product_name = product.get('product_name', 'Produit inconnu')
96
+ brand = product.get('brands', 'N/A')
97
+
98
+ result = f"**{index}. {product_name}**\n"
99
+ result += f"- **Marque**: {brand}\n"
100
+
101
+ # Scores nutritionnels
102
+ result += self._format_nutrition_scores(product)
103
+
104
+ # Composition
105
+ result += self._format_composition(product)
106
+
107
+ # Ingrédients
108
+ result += self._format_ingredients(product)
109
+
110
+ # Allergènes
111
+ result += self._format_allergens(product)
112
+
113
+ # Valeurs nutritionnelles
114
+ result += self._format_nutrition_values(product)
115
+
116
+ # Informations additionnelles
117
+ result += self._format_additional_info(product)
118
+
119
+ # Catégories
120
+ result += self._format_categories(product)
121
+
122
+ result += "\n"
123
+ return result
124
+
125
+ def _format_nutrition_scores(self, product: Dict[str, Any]) -> str:
126
+ """Format nutrition scores"""
127
+ scores_line = []
128
+
129
+ # Nutri-Score
130
+ nutri_grade = product.get('nutriscore_grade')
131
+ if nutri_grade and nutri_grade != 'unknown':
132
+ nutri_emoji = self._get_grade_emoji(nutri_grade, 'nutri')
133
+ scores_line.append(f"Nutri: {nutri_emoji}{nutri_grade.upper()}")
134
+
135
+ # Groupe NOVA
136
+ nova_group = product.get('nova_group')
137
+ if nova_group:
138
+ nova_emoji = self._get_grade_emoji(nova_group, 'nova')
139
+ scores_line.append(f"NOVA: {nova_emoji}{nova_group}")
140
+
141
+ return f"- **Scores**: {' | '.join(scores_line)}\n" if scores_line else ""
142
+
143
+ def _format_composition(self, product: Dict[str, Any]) -> str:
144
+ """Format composition (ingrédients/additifs)"""
145
+ composition_parts = []
146
+ ingredients_count = product.get('ingredients_count', 0)
147
+ additives_count = product.get('additives_count', 0)
148
+
149
+ if ingredients_count and ingredients_count > 0:
150
+ composition_parts.append(f"{ingredients_count} ingrédients")
151
+ if additives_count and additives_count > 0:
152
+ composition_parts.append(f"⚠️ {additives_count} additifs")
153
+
154
+ return f"- **Composition**: {', '.join(composition_parts)}\n" if composition_parts else ""
155
+
156
+ def _format_ingredients(self, product: Dict[str, Any]) -> str:
157
+ """Format ingredients"""
158
+ ingredients_text = product.get('ingredients_text')
159
+ if ingredients_text and ingredients_text != 'N/A':
160
+ ingredients_clean = self._extract_ingredients_text(ingredients_text)
161
+
162
+ if ingredients_clean:
163
+ # Limiter la longueur pour l'affichage
164
+ if len(ingredients_clean) > 300:
165
+ ingredients_display = ingredients_clean[:300] + "..."
166
+ else:
167
+ ingredients_display = ingredients_clean
168
+ return f"- **🧾 Ingrédients**: {ingredients_display}\n"
169
+
170
+ return ""
171
+
172
+ def _format_allergens(self, product: Dict[str, Any]) -> str:
173
+ """Format allergens"""
174
+ allergens = product.get('allergens', [])
175
+ if allergens and len(allergens) > 0:
176
+ allergens_clean = [self._clean_allergen_name(a) for a in allergens[:4]]
177
+ allergens_clean = [a for a in allergens_clean if a]
178
+ if allergens_clean:
179
+ return f"- **⚠️ Allergènes**: {', '.join(allergens_clean)}\n"
180
+
181
+ return ""
182
+
183
+ def _format_nutrition_values(self, product: Dict[str, Any]) -> str:
184
+ """Format nutrition values"""
185
+ nutrients = self._parse_nutrients(product.get('nutriments', {}))
186
+ nutrition_display = []
187
+
188
+ # Ordre d'importance pour l'affichage
189
+ key_nutrients = [
190
+ 'energy-kcal', 'energy_kcal',
191
+ 'sugars',
192
+ 'fat',
193
+ 'salt', 'sodium',
194
+ 'proteins',
195
+ 'saturated-fat', 'saturated_fat',
196
+ 'carbohydrates'
197
+ ]
198
+
199
+ for nutrient_name in key_nutrients:
200
+ if nutrient_name in nutrients:
201
+ formatted_value = self._format_nutrition_value(nutrients[nutrient_name])
202
+ if formatted_value != "N/A":
203
+ display_name = {
204
+ 'energy-kcal': 'Énergie', 'energy_kcal': 'Énergie',
205
+ 'fat': 'Matières grasses',
206
+ 'saturated-fat': 'Sat. grasses', 'saturated_fat': 'Sat. grasses',
207
+ 'sugars': 'Sucres',
208
+ 'salt': 'Sel', 'sodium': 'Sel',
209
+ 'proteins': 'Protéines',
210
+ 'carbohydrates': 'Glucides'
211
+ }.get(nutrient_name, nutrient_name.title())
212
+ nutrition_display.append(f"{display_name}: {formatted_value}")
213
+
214
+ if len(nutrition_display) >= 4:
215
+ break
216
+
217
+ return f"- **🍽️ Nutrition** (100g): {' | '.join(nutrition_display)}\n" if nutrition_display else ""
218
+
219
+ def _format_additional_info(self, product: Dict[str, Any]) -> str:
220
+ """Format additional info"""
221
+ additional_info = []
222
+ serving_size = product.get('serving_size')
223
+ quantity = product.get('quantity')
224
+
225
+ if quantity and quantity != 'N/A':
226
+ additional_info.append(f"📦 {quantity}")
227
+ if serving_size and serving_size != 'N/A':
228
+ additional_info.append(f"🥄 Portion: {serving_size}")
229
+
230
+ return f"- **ℹ️ Infos**: {' | '.join(additional_info)}\n" if additional_info else ""
231
+
232
+ def _format_categories(self, product: Dict[str, Any]) -> str:
233
+ """Formate categories"""
234
+ categories = product.get('categories')
235
+ if categories and categories != 'N/A':
236
+ cats_clean = categories.replace(',', ' → ').replace('Snacks → ', '')
237
+ if len(cats_clean) > 60:
238
+ cats_clean = cats_clean[:60] + "..."
239
+ return f"- **📂 Catégorie**: {cats_clean}\n"
240
+
241
+ return ""
242
+
243
+
244
+ def _get_grade_emoji(self, grade, grade_type='nutri') -> str:
245
+ """Return grad emoji"""
246
+ if not grade or grade == 'unknown':
247
+ return ''
248
+
249
+ if grade_type == 'nutri':
250
+ return {'a': '🟢', 'b': '🟡', 'c': '🟠', 'd': '🔴', 'e': '🔴'}.get(str(grade).lower(), '')
251
+ elif grade_type == 'nova':
252
+ return {1: '🟢', 2: '🟡', 3: '🟠', 4: '🔴'}.get(grade, '')
253
+ return ''
254
+
255
+ def _clean_allergen_name(self, allergen: str) -> str:
256
+ """Clean allergen name"""
257
+ if not allergen:
258
+ return allergen
259
+ cleaned = allergen.replace('en:', '').replace('fr:', '')
260
+ cleaned = cleaned.replace('-', ' ').title()
261
+ return cleaned
262
+
263
+ def _extract_ingredients_text(self, ingredients_data) -> str:
264
+ """Extract ingredients list"""
265
+ if not ingredients_data or ingredients_data == 'N/A':
266
+ return ""
267
+
268
+ try:
269
+ data_str = str(ingredients_data)
270
+
271
+ patterns = [
272
+ r"'text':\s*'([^']{20,})'",
273
+ r'"text":\s*"([^"]{20,})"',
274
+ r"'text':\s*\"([^\"]{20,})\"",
275
+ r'"text":\s*\'([^\']{20,})\'',
276
+ ]
277
+
278
+ all_matches = []
279
+ for pattern in patterns:
280
+ matches = re.findall(pattern, data_str, re.DOTALL)
281
+ all_matches.extend(matches)
282
+
283
+ if all_matches:
284
+ longest_match = max(all_matches, key=len)
285
+ return self._clean_ingredients_text(longest_match)
286
+
287
+ # Fallback
288
+ simple_pattern = r"text[^a-zA-Z]*([a-zA-Z][^}]{30,})"
289
+ simple_matches = re.findall(simple_pattern, data_str)
290
+ if simple_matches:
291
+ return self._clean_ingredients_text(simple_matches[0])
292
+
293
+ except Exception as e:
294
+ print(f"Error ingredients extraction: {e}")
295
+
296
+ return ""
297
+
298
+ def _clean_ingredients_text(self, text: str) -> str:
299
+ """Clean ingredients text"""
300
+ if not text:
301
+ return ""
302
+
303
+ text = re.sub(r'<span class="allergen">(.*?)</span>', r'\1', text)
304
+
305
+ text = text.replace('\\', '').replace('\\"', '"').replace("\\'", "'")
306
+ text = text.replace('&quot;', '"')
307
+
308
+ text = re.sub(r'\s*%\s*', '% ', text)
309
+ text = re.sub(r'\s*\(\s*', ' (', text)
310
+ text = re.sub(r'\s*\)\s*', ') ', text)
311
+ text = re.sub(r'\s+', ' ', text)
312
+
313
+ return text.strip()
314
+
315
+ def _parse_nutrients(self, nutrients_data) -> Dict[str, Any]:
316
+ """Parse nutrients"""
317
+ if not nutrients_data or nutrients_data == 'N/A':
318
+ return {}
319
+
320
+ try:
321
+ # Si c'est déjà une liste
322
+ if isinstance(nutrients_data, list):
323
+ result = {}
324
+ for nutrient in nutrients_data:
325
+ if isinstance(nutrient, dict) and 'name' in nutrient:
326
+ result[nutrient['name']] = nutrient
327
+ return result
328
+
329
+ # Si c'est une chaîne
330
+ if isinstance(nutrients_data, str):
331
+ if nutrients_data.startswith('[') and 'name' in nutrients_data:
332
+ try:
333
+ import ast
334
+ nutrients_list = ast.literal_eval(nutrients_data)
335
+ if isinstance(nutrients_list, list):
336
+ result = {}
337
+ for nutrient in nutrients_list:
338
+ if isinstance(nutrient, dict) and 'name' in nutrient:
339
+ result[nutrient['name']] = nutrient
340
+ return result
341
+ except:
342
+ pass
343
+
344
+ try:
345
+ nutrients_list = json.loads(nutrients_data)
346
+ result = {}
347
+ for nutrient in nutrients_list:
348
+ if isinstance(nutrient, dict) and 'name' in nutrient:
349
+ result[nutrient['name']] = nutrient
350
+ return result
351
+ except:
352
+ pass
353
+
354
+ except:
355
+ pass
356
+
357
+ return {}
358
+
359
+ def _format_nutrition_value(self, nutrient_data) -> str:
360
+ """Format nutrition value"""
361
+ if not nutrient_data:
362
+ return "N/A"
363
+
364
+ value = nutrient_data.get('100g', nutrient_data.get('value', 0))
365
+ unit = nutrient_data.get('unit', '')
366
+
367
+ if value is None or value == 0:
368
+ return "N/A"
369
+
370
+ if 'kcal' in unit.lower():
371
+ return f"{int(value)} kcal"
372
+ elif 'kj' in unit.lower():
373
+ return f"{int(value)} kJ"
374
+ elif unit == 'g':
375
+ return f"{value:.1f}g"
376
+ else:
377
+ return f"{value} {unit}" if unit else f"{value}"
ui/gradio_ui.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from services.search_service import FoodSearchService
3
+
4
+
5
+ class GradioUI:
6
+
7
+ def __init__(self):
8
+ self.food_service = FoodSearchService()
9
+
10
+ def search_products(self, query: str, max_results: int) -> str:
11
+ """
12
+ :param query:
13
+ :param max_results:
14
+ :return:
15
+ """
16
+ return self.food_service.search_products(query, max_results)
17
+
18
+ def create_interface(self):
19
+
20
+ with gr.Blocks(title="🤖 Healthify me", theme=gr.themes.Soft()) as demo:
21
+ # En-tête
22
+ gr.Markdown("""
23
+ # 🤖 Healthify me
24
+ """)
25
+
26
+ with gr.Row():
27
+ with gr.Column(scale=3):
28
+ search_input = gr.Textbox(
29
+ label="🔍 Search for product",
30
+ placeholder="Ex: pepito, biscuits lu, pizza picard, yaourt danone..."
31
+ )
32
+
33
+ with gr.Column(scale=1):
34
+ max_results = gr.Slider(
35
+ minimum=1,
36
+ maximum=20,
37
+ value=5,
38
+ step=1,
39
+ label="📊 Number of results",
40
+ info="How many results ?"
41
+ )
42
+
43
+ search_btn = gr.Button("🤖 Search", variant="primary", size="lg")
44
+
45
+ search_output = gr.Markdown()
46
+
47
+ search_btn.click(
48
+ self.search_products,
49
+ inputs=[search_input, max_results],
50
+ outputs=[search_output]
51
+ )
52
+
53
+ search_input.submit(
54
+ self.search_products,
55
+ inputs=[search_input, max_results],
56
+ outputs=[search_output]
57
+ )
58
+
59
+ return demo
60
+