joedac-netvigie
commited on
Commit
·
5370228
0
Parent(s):
Initial commit
Browse files- .gitignore +84 -0
- app.py +18 -0
- config/settings.py +9 -0
- requirements.txt +9 -0
- services/database_service.py +239 -0
- services/mistral_service.py +87 -0
- services/search_service.py +377 -0
- ui/gradio_ui.py +60 -0
.gitignore
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Variables d'environnement
|
| 2 |
+
.env
|
| 3 |
+
.env.local
|
| 4 |
+
.env.production
|
| 5 |
+
.env.staging
|
| 6 |
+
|
| 7 |
+
# Environnements virtuels Python
|
| 8 |
+
.venv/
|
| 9 |
+
venv/
|
| 10 |
+
env/
|
| 11 |
+
ENV/
|
| 12 |
+
|
| 13 |
+
# IDE et éditeurs
|
| 14 |
+
.idea/
|
| 15 |
+
.vscode/
|
| 16 |
+
*.swp
|
| 17 |
+
*.swo
|
| 18 |
+
*~
|
| 19 |
+
|
| 20 |
+
# Python
|
| 21 |
+
__pycache__/
|
| 22 |
+
*.py[cod]
|
| 23 |
+
*$py.class
|
| 24 |
+
*.so
|
| 25 |
+
.Python
|
| 26 |
+
build/
|
| 27 |
+
develop-eggs/
|
| 28 |
+
dist/
|
| 29 |
+
downloads/
|
| 30 |
+
eggs/
|
| 31 |
+
.eggs/
|
| 32 |
+
lib/
|
| 33 |
+
lib64/
|
| 34 |
+
parts/
|
| 35 |
+
sdist/
|
| 36 |
+
var/
|
| 37 |
+
wheels/
|
| 38 |
+
*.egg-info/
|
| 39 |
+
.installed.cfg
|
| 40 |
+
*.egg
|
| 41 |
+
MANIFEST
|
| 42 |
+
|
| 43 |
+
# Jupyter Notebook
|
| 44 |
+
.ipynb_checkpoints
|
| 45 |
+
|
| 46 |
+
# Cache Python
|
| 47 |
+
.pytest_cache/
|
| 48 |
+
.coverage
|
| 49 |
+
htmlcov/
|
| 50 |
+
|
| 51 |
+
# Gradio
|
| 52 |
+
.gradio/
|
| 53 |
+
gradio_cached_examples/
|
| 54 |
+
|
| 55 |
+
# Data et modèles (souvent volumineux)
|
| 56 |
+
data/*.parquet
|
| 57 |
+
data/*.csv
|
| 58 |
+
data/*.json
|
| 59 |
+
data/*.pkl
|
| 60 |
+
data/*.db
|
| 61 |
+
*.h5
|
| 62 |
+
*.hdf5
|
| 63 |
+
|
| 64 |
+
# Logs
|
| 65 |
+
*.log
|
| 66 |
+
logs/
|
| 67 |
+
|
| 68 |
+
# Fichiers temporaires
|
| 69 |
+
*.tmp
|
| 70 |
+
*.temp
|
| 71 |
+
.DS_Store
|
| 72 |
+
Thumbs.db
|
| 73 |
+
|
| 74 |
+
# Documentation générée
|
| 75 |
+
docs/_build/
|
| 76 |
+
|
| 77 |
+
# Fichiers de sauvegarde
|
| 78 |
+
*.bak
|
| 79 |
+
*.backup
|
| 80 |
+
|
| 81 |
+
# Certificats et clés
|
| 82 |
+
*.pem
|
| 83 |
+
*.key
|
| 84 |
+
*.crt
|
app.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ui.gradio_ui import GradioUI
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def main():
|
| 5 |
+
print("🚀 Launching...")
|
| 6 |
+
|
| 7 |
+
try:
|
| 8 |
+
ui = GradioUI()
|
| 9 |
+
demo = ui.create_interface()
|
| 10 |
+
|
| 11 |
+
demo.launch(mcp_server=True)
|
| 12 |
+
|
| 13 |
+
except Exception as e:
|
| 14 |
+
print(f"❌ Error: {e}")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
if __name__ == "__main__":
|
| 18 |
+
main()
|
config/settings.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
|
| 7 |
+
MISTRAL_URL = "https://api.mistral.ai/v1/chat/completions"
|
| 8 |
+
DATA_DIR = os.getenv("DATA_DIR", "./data")
|
| 9 |
+
PARQUET_FILE = os.getenv("PARQUET_FILE", "./data/food.parquet")
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio~=5.31.0
|
| 2 |
+
mcp
|
| 3 |
+
pandas~=2.2.3
|
| 4 |
+
requests~=2.32.3
|
| 5 |
+
datasets~=3.6.0
|
| 6 |
+
numpy
|
| 7 |
+
huggingface_hub
|
| 8 |
+
duckdb~=1.3.0
|
| 9 |
+
python-dotenv~=1.1.0
|
services/database_service.py
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import duckdb
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
import os
|
| 5 |
+
from config.settings import PARQUET_FILE
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class DatabaseService:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.parquet_path = None
|
| 11 |
+
self.conn = None
|
| 12 |
+
|
| 13 |
+
os.makedirs('./data', exist_ok=True)
|
| 14 |
+
|
| 15 |
+
parquet_file = self._find_parquet_file()
|
| 16 |
+
if parquet_file and Path(parquet_file).exists():
|
| 17 |
+
self.parquet_path = Path(parquet_file)
|
| 18 |
+
self._setup_duckdb()
|
| 19 |
+
print(f"✅ Dataset : {self.parquet_path}")
|
| 20 |
+
else:
|
| 21 |
+
if self._download_from_huggingface():
|
| 22 |
+
parquet_file = self._find_parquet_file()
|
| 23 |
+
if parquet_file:
|
| 24 |
+
self.parquet_path = Path(parquet_file)
|
| 25 |
+
self._setup_duckdb()
|
| 26 |
+
print(f"✅ Dataset dowloaded : {self.parquet_path}")
|
| 27 |
+
else:
|
| 28 |
+
print("❌ Can't load dataset")
|
| 29 |
+
raise Exception("No dataset")
|
| 30 |
+
else:
|
| 31 |
+
print("❌ Can't download dataset")
|
| 32 |
+
raise Exception("Dataset download failed")
|
| 33 |
+
|
| 34 |
+
def _download_from_huggingface(self):
|
| 35 |
+
"""
|
| 36 |
+
Download dataset from Hugging Face
|
| 37 |
+
"""
|
| 38 |
+
try:
|
| 39 |
+
print("🔄 Downloading dataset from Hugging Face...")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
dataset_name = "openfoodfacts/product-database"
|
| 43 |
+
|
| 44 |
+
dataset = load_dataset(dataset_name, split="train")
|
| 45 |
+
|
| 46 |
+
dataset.to_parquet("./data/food.parquet")
|
| 47 |
+
|
| 48 |
+
print(f"✅ Dataset downloaded & saved in ./data/food.parquet")
|
| 49 |
+
return True
|
| 50 |
+
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"❌ Erreur lors du téléchargement depuis Hugging Face: {e}")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _find_parquet_file(self):
|
| 56 |
+
paths = [PARQUET_FILE, "./data/food.parquet", "./food.parquet", "../data/food.parquet"]
|
| 57 |
+
for path in paths:
|
| 58 |
+
if Path(path).exists():
|
| 59 |
+
return path
|
| 60 |
+
return None
|
| 61 |
+
|
| 62 |
+
def _setup_duckdb(self):
|
| 63 |
+
try:
|
| 64 |
+
self.conn = duckdb.connect()
|
| 65 |
+
result = self.conn.execute(f"SELECT COUNT(*) FROM '{self.parquet_path}'").fetchone()
|
| 66 |
+
total = result[0] if result else 0
|
| 67 |
+
print(f"✅ DuckDB: {total:,} products")
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"❌ Error DuckDB: {e}")
|
| 70 |
+
raise Exception(f"DuckDB can not be configured: {e}")
|
| 71 |
+
|
| 72 |
+
def escape_sql_string(self, text):
|
| 73 |
+
if not text:
|
| 74 |
+
return ""
|
| 75 |
+
text = text.replace("'", "''")
|
| 76 |
+
text = text.replace("%", "%%")
|
| 77 |
+
return text
|
| 78 |
+
|
| 79 |
+
def _clean_tags(self, tags_raw):
|
| 80 |
+
if not tags_raw:
|
| 81 |
+
return []
|
| 82 |
+
|
| 83 |
+
if isinstance(tags_raw, str):
|
| 84 |
+
if tags_raw.startswith('['):
|
| 85 |
+
try:
|
| 86 |
+
import ast
|
| 87 |
+
tags_list = ast.literal_eval(tags_raw)
|
| 88 |
+
if isinstance(tags_list, list):
|
| 89 |
+
return [str(tag).replace('en:', '').replace('fr:', '') for tag in tags_list[:3]]
|
| 90 |
+
except:
|
| 91 |
+
pass
|
| 92 |
+
return [tags_raw]
|
| 93 |
+
|
| 94 |
+
if isinstance(tags_raw, list):
|
| 95 |
+
return [str(tag).replace('en:', '').replace('fr:', '') for tag in tags_raw[:3]]
|
| 96 |
+
|
| 97 |
+
return [str(tags_raw)]
|
| 98 |
+
|
| 99 |
+
def clean_product_name(self, raw_name):
|
| 100 |
+
if not raw_name or raw_name == 'N/A':
|
| 101 |
+
return 'N/A'
|
| 102 |
+
|
| 103 |
+
if raw_name.startswith('[') and 'text' in raw_name:
|
| 104 |
+
try:
|
| 105 |
+
import re
|
| 106 |
+
match = re.search(r"'text':\s*'([^']*)'", raw_name)
|
| 107 |
+
if match:
|
| 108 |
+
return match.group(1)
|
| 109 |
+
match = re.search(r'"text":\s*"([^"]*)"', raw_name)
|
| 110 |
+
if match:
|
| 111 |
+
return match.group(1)
|
| 112 |
+
except Exception as e:
|
| 113 |
+
print(f"⚠️ Error cleaning: {e}")
|
| 114 |
+
|
| 115 |
+
clean = raw_name.replace('[', '').replace(']', '').replace('{', '').replace('}', '')
|
| 116 |
+
|
| 117 |
+
if 'text' in clean:
|
| 118 |
+
parts = clean.split('text')
|
| 119 |
+
if len(parts) > 1:
|
| 120 |
+
text_part = parts[-1]
|
| 121 |
+
text_part = text_part.replace('"', '').replace("'", '').replace(':', '').replace(',', '')
|
| 122 |
+
text_part = text_part.strip()
|
| 123 |
+
if text_part and len(text_part) > 3:
|
| 124 |
+
return text_part
|
| 125 |
+
|
| 126 |
+
return clean[:100]
|
| 127 |
+
|
| 128 |
+
def search_products(self, analysis, limit=1):
|
| 129 |
+
"""
|
| 130 |
+
Search products in dataset
|
| 131 |
+
"""
|
| 132 |
+
if not self.conn or not self.parquet_path:
|
| 133 |
+
print("❌ DAtabase not initialized")
|
| 134 |
+
return []
|
| 135 |
+
|
| 136 |
+
try:
|
| 137 |
+
product = analysis.get("product", "").strip()
|
| 138 |
+
brand = analysis.get("brand", "").strip()
|
| 139 |
+
|
| 140 |
+
safe_product = self.escape_sql_string(product)
|
| 141 |
+
safe_brand = self.escape_sql_string(brand)
|
| 142 |
+
safe_query = self.escape_sql_string(product)
|
| 143 |
+
|
| 144 |
+
conditions = []
|
| 145 |
+
scores = []
|
| 146 |
+
|
| 147 |
+
if product and brand:
|
| 148 |
+
product_condition = f"LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('%{safe_product}%')"
|
| 149 |
+
brand_condition = f"LOWER(CAST(brands AS VARCHAR)) LIKE LOWER('%{safe_brand}%')"
|
| 150 |
+
conditions.append(f"({product_condition} AND {brand_condition})")
|
| 151 |
+
scores.append(f"CASE WHEN {product_condition} AND {brand_condition} THEN 100 ELSE 0 END")
|
| 152 |
+
scores.append(
|
| 153 |
+
f"CASE WHEN LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('{safe_product}%') THEN 20 ELSE 0 END")
|
| 154 |
+
scores.append(f"CASE WHEN LOWER(CAST(brands AS VARCHAR)) = LOWER('{safe_brand}') THEN 30 ELSE 0 END")
|
| 155 |
+
|
| 156 |
+
elif product and not brand:
|
| 157 |
+
product_condition = f"LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('%{safe_product}%')"
|
| 158 |
+
conditions.append(product_condition)
|
| 159 |
+
scores.append(
|
| 160 |
+
f"CASE WHEN LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('{safe_product}%') THEN 80 ELSE 50 END")
|
| 161 |
+
scores.append("CASE WHEN brands IS NOT NULL AND LENGTH(CAST(brands AS VARCHAR)) > 3 THEN 10 ELSE 0 END")
|
| 162 |
+
|
| 163 |
+
elif brand and not product:
|
| 164 |
+
brand_condition = f"LOWER(CAST(brands AS VARCHAR)) LIKE LOWER('%{safe_brand}%')"
|
| 165 |
+
conditions.append(brand_condition)
|
| 166 |
+
scores.append(f"CASE WHEN LOWER(CAST(brands AS VARCHAR)) = LOWER('{safe_brand}') THEN 90 ELSE 60 END")
|
| 167 |
+
|
| 168 |
+
else:
|
| 169 |
+
conditions.append(
|
| 170 |
+
f"(LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('%{safe_query}%') OR LOWER(CAST(brands AS VARCHAR)) LIKE LOWER('%{safe_query}%'))")
|
| 171 |
+
scores.append(
|
| 172 |
+
f"CASE WHEN LOWER(CAST(product_name AS VARCHAR)) LIKE LOWER('%{safe_query}%') THEN 40 ELSE 20 END")
|
| 173 |
+
|
| 174 |
+
conditions.append("LOWER(CAST(countries_tags AS VARCHAR)) LIKE '%france%'")
|
| 175 |
+
|
| 176 |
+
where_clause = " AND ".join(conditions)
|
| 177 |
+
score_calc = " + ".join(scores) if scores else "1"
|
| 178 |
+
|
| 179 |
+
min_score_threshold = 30
|
| 180 |
+
where_clause = f"({where_clause}) AND (({score_calc}) >= {min_score_threshold})"
|
| 181 |
+
|
| 182 |
+
sql = f"""
|
| 183 |
+
SELECT DISTINCT
|
| 184 |
+
product_name, -- 0
|
| 185 |
+
brands, -- 1
|
| 186 |
+
nutriscore_grade, -- 2
|
| 187 |
+
ecoscore_grade, -- 3
|
| 188 |
+
nova_group, -- 4
|
| 189 |
+
categories, -- 5
|
| 190 |
+
ingredients_n, -- 6
|
| 191 |
+
additives_n, -- 7
|
| 192 |
+
allergens_tags, -- 8
|
| 193 |
+
ingredients_text, -- 9
|
| 194 |
+
ingredients_tags, -- 10
|
| 195 |
+
nutriments, -- 11
|
| 196 |
+
origins, -- 12
|
| 197 |
+
serving_size, -- 13
|
| 198 |
+
quantity, -- 14
|
| 199 |
+
labels_tags, -- 15
|
| 200 |
+
({score_calc}) AS score -- 16
|
| 201 |
+
FROM '{self.parquet_path}'
|
| 202 |
+
WHERE {where_clause}
|
| 203 |
+
ORDER BY score DESC, product_name ASC
|
| 204 |
+
LIMIT {limit}
|
| 205 |
+
"""
|
| 206 |
+
|
| 207 |
+
results = self.conn.execute(sql).fetchall()
|
| 208 |
+
|
| 209 |
+
products = []
|
| 210 |
+
for row in results:
|
| 211 |
+
raw_name = str(row[0]) if row[0] else 'N/A'
|
| 212 |
+
clean_name = self.clean_product_name(raw_name)
|
| 213 |
+
|
| 214 |
+
products.append({
|
| 215 |
+
'product_name': clean_name[:100],
|
| 216 |
+
'brands': str(row[1])[:50] if row[1] else 'N/A',
|
| 217 |
+
'nutriscore_grade': str(row[2]).lower() if row[2] else '',
|
| 218 |
+
'ecoscore_grade': str(row[3]).lower() if row[3] else '',
|
| 219 |
+
'nova_group': row[4] if row[4] else None,
|
| 220 |
+
'categories': str(row[5])[:100] if row[5] else 'N/A',
|
| 221 |
+
'ingredients_count': row[6] if row[6] else 0,
|
| 222 |
+
'additives_count': row[7] if row[7] else 0,
|
| 223 |
+
'allergens': self._clean_tags(row[8]),
|
| 224 |
+
'ingredients_text': str(row[9])[:500] if row[9] else 'N/A',
|
| 225 |
+
'ingredients_tags': self._clean_tags(row[10]) if row[10] else [],
|
| 226 |
+
'nutriments': (row[11]) if row[11] else 'N/A',
|
| 227 |
+
'origins': str(row[12])[:50] if row[12] else 'N/A',
|
| 228 |
+
'serving_size': str(row[13])[:20] if row[13] else 'N/A',
|
| 229 |
+
'quantity': str(row[14])[:30] if row[14] else 'N/A',
|
| 230 |
+
'labels': self._clean_tags(row[15]) if row[15] else [],
|
| 231 |
+
'score': float(row[16]) if row[16] else 0.0,
|
| 232 |
+
'mistral_analysis': analysis.get('explanation', '')
|
| 233 |
+
})
|
| 234 |
+
|
| 235 |
+
return products
|
| 236 |
+
|
| 237 |
+
except Exception as e:
|
| 238 |
+
print(f"❌ Search error: {e}")
|
| 239 |
+
return []
|
services/mistral_service.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
from config.settings import MISTRAL_API_KEY, MISTRAL_URL
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class MistralService:
|
| 7 |
+
def __init__(self):
|
| 8 |
+
self.api_key = MISTRAL_API_KEY
|
| 9 |
+
self.api_url = MISTRAL_URL
|
| 10 |
+
|
| 11 |
+
def analyze_query(self, query):
|
| 12 |
+
"""
|
| 13 |
+
Analyse la requête utilisateur pour distinguer la marque et le produit
|
| 14 |
+
:param query:
|
| 15 |
+
:return: JSON
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
if not self.api_key:
|
| 19 |
+
return {"product": query, "brand": "", "quality_filter": "none", "explanation": "Pas de clé Mistral"}
|
| 20 |
+
|
| 21 |
+
prompt = f"""Analyse cette requête de recherche alimentaire, tu es capable de différencier la marque et le produit, et réponds en JSON :
|
| 22 |
+
|
| 23 |
+
Requête: "{query}"
|
| 24 |
+
|
| 25 |
+
RÈGLES IMPORTANTES :
|
| 26 |
+
- Si c'est un nom de produit avec parfum/goût (ex: "danette chocolat", "yaourt fraise", "pizza 4 fromages"), alors TOUT est le PRODUIT
|
| 27 |
+
- Séparer produit/marque SEULEMENT si ton analyse en conclue que c'est une marque.
|
| 28 |
+
- Les parfums/goûts/saveurs ne sont PAS des marques : chocolat, vanille, fraise, pistache, caramel, etc.
|
| 29 |
+
|
| 30 |
+
Réponds uniquement avec ce format JSON :
|
| 31 |
+
{{
|
| 32 |
+
"product": "nom du produit recherché",
|
| 33 |
+
"brand": "marque si explicitement une VRAIE marque connue ou vide",
|
| 34 |
+
"explanation": "explication courte"
|
| 35 |
+
}}
|
| 36 |
+
|
| 37 |
+
Exemples :
|
| 38 |
+
- "danette pistache" → {{"product": "danette pistache", "brand": "", "quality_filter": "none", "explanation": "Recherche produit Danette parfum pistache"}}
|
| 39 |
+
- "yaourt fraise" → {{"product": "yaourt fraise", "brand": "", "quality_filter": "none", "explanation": "Recherche yaourt parfum fraise"}}
|
| 40 |
+
- "pizza 4 fromages" → {{"product": "pizza 4 fromages", "brand": "", "quality_filter": "none", "explanation": "Recherche pizza 4 fromages"}}
|
| 41 |
+
- "yaourt danone fraise" → {{"product": "yaourt fraise", "brand": "danone", "quality_filter": "none", "explanation": "Yaourt fraise de la marque Danone"}}
|
| 42 |
+
- "pizza picard" → {{"product": "pizza", "brand": "picard", "quality_filter": "none", "explanation": "Pizzas de la marque Picard"}}
|
| 43 |
+
- "nutella" → {{"product": "nutella", "brand": "", "quality_filter": "none", "explanation": "Recherche produit Nutella"}}
|
| 44 |
+
- "coca cola" → {{"product": "coca cola", "brand": "", "quality_filter": "none", "explanation": "Recherche produit Coca Cola"}}
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
try:
|
| 48 |
+
headers = {
|
| 49 |
+
"Authorization": f"Bearer {self.api_key}",
|
| 50 |
+
"Content-Type": "application/json"
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
data = {
|
| 54 |
+
"model": "mistral-small",
|
| 55 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 56 |
+
"temperature": 0.1,
|
| 57 |
+
"max_tokens": 200
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
response = requests.post(self.api_url, headers=headers, json=data, timeout=10)
|
| 61 |
+
|
| 62 |
+
if response.status_code == 200:
|
| 63 |
+
result = response.json()
|
| 64 |
+
content = result["choices"][0]["message"]["content"]
|
| 65 |
+
|
| 66 |
+
content = content.strip()
|
| 67 |
+
if content.startswith("```json"):
|
| 68 |
+
content = content[7:]
|
| 69 |
+
if content.startswith("```"):
|
| 70 |
+
content = content[3:]
|
| 71 |
+
if content.endswith("```"):
|
| 72 |
+
content = content[:-3]
|
| 73 |
+
|
| 74 |
+
start_idx = content.find('{')
|
| 75 |
+
end_idx = content.rfind('}')
|
| 76 |
+
if start_idx != -1 and end_idx != -1:
|
| 77 |
+
content = content[start_idx:end_idx + 1]
|
| 78 |
+
|
| 79 |
+
analysis = json.loads(content)
|
| 80 |
+
print(f"🤖 Mistral understand : {analysis.get('explanation', '')}")
|
| 81 |
+
return analysis
|
| 82 |
+
|
| 83 |
+
else:
|
| 84 |
+
print(f"❌ Mistral API error: {response.status_code}")
|
| 85 |
+
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"❌ Mistral error: {e}")
|
services/search_service.py
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import re
|
| 3 |
+
from typing import List, Dict, Any, Optional
|
| 4 |
+
from services.mistral_service import MistralService
|
| 5 |
+
from services.database_service import DatabaseService
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class FoodSearchService:
|
| 9 |
+
"""Service pour la recherche et le traitement des données alimentaires"""
|
| 10 |
+
|
| 11 |
+
def __init__(self):
|
| 12 |
+
self.mistral_service = MistralService()
|
| 13 |
+
self.database_service = DatabaseService()
|
| 14 |
+
|
| 15 |
+
def search_products(self, query: str, max_results: int = 5) -> str:
|
| 16 |
+
"""
|
| 17 |
+
Recherche de produits alimentaires avec analyse intelligente par IA.
|
| 18 |
+
|
| 19 |
+
Cette fonction utilise Mistral AI pour comprendre les requêtes en langage naturel
|
| 20 |
+
et recherche dans la base OpenFoodFacts pour retourner des informations détaillées
|
| 21 |
+
sur les produits alimentaires.
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
query (str): Requête de recherche en langage naturel.
|
| 25 |
+
Exemples :
|
| 26 |
+
- "pepito" : recherche tous les produits Pepito
|
| 27 |
+
- "pizza picard" : pizzas de la marque Picard
|
| 28 |
+
- "yaourt sans lactose" : yaourts adaptés
|
| 29 |
+
- "biscuits lu bio" : biscuits LU avec label bio
|
| 30 |
+
|
| 31 |
+
max_results (int): Nombre maximum de résultats à retourner (1-20).
|
| 32 |
+
Défaut: 5
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
str: Résultats formatés en markdown contenant pour chaque produit :
|
| 36 |
+
- Nom du produit et marque
|
| 37 |
+
- Scores nutritionnels (Nutri-Score, NOVA)
|
| 38 |
+
- Composition (nombre d'ingrédients et additifs)
|
| 39 |
+
- Liste complète des ingrédients
|
| 40 |
+
- Allergènes présents
|
| 41 |
+
- Valeurs nutritionnelles (énergie, matières grasses, sucres, sel, protéines)
|
| 42 |
+
- Informations produit (quantité, portion)
|
| 43 |
+
- Catégories du produit
|
| 44 |
+
|
| 45 |
+
Raises:
|
| 46 |
+
Exception: En cas d'erreur de recherche ou de connexion à la base de données
|
| 47 |
+
|
| 48 |
+
Examples:
|
| 49 |
+
>>> service = FoodSearchService()
|
| 50 |
+
>>> results = service.search_products("nutella", 3)
|
| 51 |
+
>>> print(results)
|
| 52 |
+
**📊 3 produits trouvés :**
|
| 53 |
+
|
| 54 |
+
**1. Nutella**
|
| 55 |
+
- **Marque**: Ferrero
|
| 56 |
+
- **Scores**: Nutri: 🔴E | NOVA: 🔴4
|
| 57 |
+
- **🧾 Ingrédients**: Sucre, huile de palme, noisettes...
|
| 58 |
+
"""
|
| 59 |
+
if not query.strip():
|
| 60 |
+
return "❌ Enter your research"
|
| 61 |
+
|
| 62 |
+
try:
|
| 63 |
+
analysis = self.mistral_service.analyze_query(query)
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Mistral not ready: {e}")
|
| 66 |
+
analysis = {'keywords': [query], 'brand': '', 'category': ''}
|
| 67 |
+
|
| 68 |
+
# Rechercher dans la base
|
| 69 |
+
try:
|
| 70 |
+
products = self.database_service.search_products(analysis, limit=max_results)
|
| 71 |
+
except Exception as e:
|
| 72 |
+
return f"❌ Search error: {str(e)}"
|
| 73 |
+
|
| 74 |
+
if not products:
|
| 75 |
+
return f"❌ No results for '{query}'"
|
| 76 |
+
|
| 77 |
+
return self._format_products_results(products)
|
| 78 |
+
|
| 79 |
+
def _format_products_results(self, products: List[Dict[str, Any]]) -> str:
|
| 80 |
+
"""Format results in markdown"""
|
| 81 |
+
result = f"**📊 {len(products)} produits trouvés :**\n\n"
|
| 82 |
+
|
| 83 |
+
for i, product in enumerate(products, 1):
|
| 84 |
+
try:
|
| 85 |
+
result += self._format_single_product(i, product)
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"Error formating product {i}: {e}")
|
| 88 |
+
result += f"**{i}. Erreur lors du traitement du produit**\n\n"
|
| 89 |
+
continue
|
| 90 |
+
|
| 91 |
+
return result
|
| 92 |
+
|
| 93 |
+
def _format_single_product(self, index: int, product: Dict[str, Any]) -> str:
|
| 94 |
+
"""Formate single produdt"""
|
| 95 |
+
product_name = product.get('product_name', 'Produit inconnu')
|
| 96 |
+
brand = product.get('brands', 'N/A')
|
| 97 |
+
|
| 98 |
+
result = f"**{index}. {product_name}**\n"
|
| 99 |
+
result += f"- **Marque**: {brand}\n"
|
| 100 |
+
|
| 101 |
+
# Scores nutritionnels
|
| 102 |
+
result += self._format_nutrition_scores(product)
|
| 103 |
+
|
| 104 |
+
# Composition
|
| 105 |
+
result += self._format_composition(product)
|
| 106 |
+
|
| 107 |
+
# Ingrédients
|
| 108 |
+
result += self._format_ingredients(product)
|
| 109 |
+
|
| 110 |
+
# Allergènes
|
| 111 |
+
result += self._format_allergens(product)
|
| 112 |
+
|
| 113 |
+
# Valeurs nutritionnelles
|
| 114 |
+
result += self._format_nutrition_values(product)
|
| 115 |
+
|
| 116 |
+
# Informations additionnelles
|
| 117 |
+
result += self._format_additional_info(product)
|
| 118 |
+
|
| 119 |
+
# Catégories
|
| 120 |
+
result += self._format_categories(product)
|
| 121 |
+
|
| 122 |
+
result += "\n"
|
| 123 |
+
return result
|
| 124 |
+
|
| 125 |
+
def _format_nutrition_scores(self, product: Dict[str, Any]) -> str:
|
| 126 |
+
"""Format nutrition scores"""
|
| 127 |
+
scores_line = []
|
| 128 |
+
|
| 129 |
+
# Nutri-Score
|
| 130 |
+
nutri_grade = product.get('nutriscore_grade')
|
| 131 |
+
if nutri_grade and nutri_grade != 'unknown':
|
| 132 |
+
nutri_emoji = self._get_grade_emoji(nutri_grade, 'nutri')
|
| 133 |
+
scores_line.append(f"Nutri: {nutri_emoji}{nutri_grade.upper()}")
|
| 134 |
+
|
| 135 |
+
# Groupe NOVA
|
| 136 |
+
nova_group = product.get('nova_group')
|
| 137 |
+
if nova_group:
|
| 138 |
+
nova_emoji = self._get_grade_emoji(nova_group, 'nova')
|
| 139 |
+
scores_line.append(f"NOVA: {nova_emoji}{nova_group}")
|
| 140 |
+
|
| 141 |
+
return f"- **Scores**: {' | '.join(scores_line)}\n" if scores_line else ""
|
| 142 |
+
|
| 143 |
+
def _format_composition(self, product: Dict[str, Any]) -> str:
|
| 144 |
+
"""Format composition (ingrédients/additifs)"""
|
| 145 |
+
composition_parts = []
|
| 146 |
+
ingredients_count = product.get('ingredients_count', 0)
|
| 147 |
+
additives_count = product.get('additives_count', 0)
|
| 148 |
+
|
| 149 |
+
if ingredients_count and ingredients_count > 0:
|
| 150 |
+
composition_parts.append(f"{ingredients_count} ingrédients")
|
| 151 |
+
if additives_count and additives_count > 0:
|
| 152 |
+
composition_parts.append(f"⚠️ {additives_count} additifs")
|
| 153 |
+
|
| 154 |
+
return f"- **Composition**: {', '.join(composition_parts)}\n" if composition_parts else ""
|
| 155 |
+
|
| 156 |
+
def _format_ingredients(self, product: Dict[str, Any]) -> str:
|
| 157 |
+
"""Format ingredients"""
|
| 158 |
+
ingredients_text = product.get('ingredients_text')
|
| 159 |
+
if ingredients_text and ingredients_text != 'N/A':
|
| 160 |
+
ingredients_clean = self._extract_ingredients_text(ingredients_text)
|
| 161 |
+
|
| 162 |
+
if ingredients_clean:
|
| 163 |
+
# Limiter la longueur pour l'affichage
|
| 164 |
+
if len(ingredients_clean) > 300:
|
| 165 |
+
ingredients_display = ingredients_clean[:300] + "..."
|
| 166 |
+
else:
|
| 167 |
+
ingredients_display = ingredients_clean
|
| 168 |
+
return f"- **🧾 Ingrédients**: {ingredients_display}\n"
|
| 169 |
+
|
| 170 |
+
return ""
|
| 171 |
+
|
| 172 |
+
def _format_allergens(self, product: Dict[str, Any]) -> str:
|
| 173 |
+
"""Format allergens"""
|
| 174 |
+
allergens = product.get('allergens', [])
|
| 175 |
+
if allergens and len(allergens) > 0:
|
| 176 |
+
allergens_clean = [self._clean_allergen_name(a) for a in allergens[:4]]
|
| 177 |
+
allergens_clean = [a for a in allergens_clean if a]
|
| 178 |
+
if allergens_clean:
|
| 179 |
+
return f"- **⚠️ Allergènes**: {', '.join(allergens_clean)}\n"
|
| 180 |
+
|
| 181 |
+
return ""
|
| 182 |
+
|
| 183 |
+
def _format_nutrition_values(self, product: Dict[str, Any]) -> str:
|
| 184 |
+
"""Format nutrition values"""
|
| 185 |
+
nutrients = self._parse_nutrients(product.get('nutriments', {}))
|
| 186 |
+
nutrition_display = []
|
| 187 |
+
|
| 188 |
+
# Ordre d'importance pour l'affichage
|
| 189 |
+
key_nutrients = [
|
| 190 |
+
'energy-kcal', 'energy_kcal',
|
| 191 |
+
'sugars',
|
| 192 |
+
'fat',
|
| 193 |
+
'salt', 'sodium',
|
| 194 |
+
'proteins',
|
| 195 |
+
'saturated-fat', 'saturated_fat',
|
| 196 |
+
'carbohydrates'
|
| 197 |
+
]
|
| 198 |
+
|
| 199 |
+
for nutrient_name in key_nutrients:
|
| 200 |
+
if nutrient_name in nutrients:
|
| 201 |
+
formatted_value = self._format_nutrition_value(nutrients[nutrient_name])
|
| 202 |
+
if formatted_value != "N/A":
|
| 203 |
+
display_name = {
|
| 204 |
+
'energy-kcal': 'Énergie', 'energy_kcal': 'Énergie',
|
| 205 |
+
'fat': 'Matières grasses',
|
| 206 |
+
'saturated-fat': 'Sat. grasses', 'saturated_fat': 'Sat. grasses',
|
| 207 |
+
'sugars': 'Sucres',
|
| 208 |
+
'salt': 'Sel', 'sodium': 'Sel',
|
| 209 |
+
'proteins': 'Protéines',
|
| 210 |
+
'carbohydrates': 'Glucides'
|
| 211 |
+
}.get(nutrient_name, nutrient_name.title())
|
| 212 |
+
nutrition_display.append(f"{display_name}: {formatted_value}")
|
| 213 |
+
|
| 214 |
+
if len(nutrition_display) >= 4:
|
| 215 |
+
break
|
| 216 |
+
|
| 217 |
+
return f"- **🍽️ Nutrition** (100g): {' | '.join(nutrition_display)}\n" if nutrition_display else ""
|
| 218 |
+
|
| 219 |
+
def _format_additional_info(self, product: Dict[str, Any]) -> str:
|
| 220 |
+
"""Format additional info"""
|
| 221 |
+
additional_info = []
|
| 222 |
+
serving_size = product.get('serving_size')
|
| 223 |
+
quantity = product.get('quantity')
|
| 224 |
+
|
| 225 |
+
if quantity and quantity != 'N/A':
|
| 226 |
+
additional_info.append(f"📦 {quantity}")
|
| 227 |
+
if serving_size and serving_size != 'N/A':
|
| 228 |
+
additional_info.append(f"🥄 Portion: {serving_size}")
|
| 229 |
+
|
| 230 |
+
return f"- **ℹ️ Infos**: {' | '.join(additional_info)}\n" if additional_info else ""
|
| 231 |
+
|
| 232 |
+
def _format_categories(self, product: Dict[str, Any]) -> str:
|
| 233 |
+
"""Formate categories"""
|
| 234 |
+
categories = product.get('categories')
|
| 235 |
+
if categories and categories != 'N/A':
|
| 236 |
+
cats_clean = categories.replace(',', ' → ').replace('Snacks → ', '')
|
| 237 |
+
if len(cats_clean) > 60:
|
| 238 |
+
cats_clean = cats_clean[:60] + "..."
|
| 239 |
+
return f"- **📂 Catégorie**: {cats_clean}\n"
|
| 240 |
+
|
| 241 |
+
return ""
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def _get_grade_emoji(self, grade, grade_type='nutri') -> str:
|
| 245 |
+
"""Return grad emoji"""
|
| 246 |
+
if not grade or grade == 'unknown':
|
| 247 |
+
return ''
|
| 248 |
+
|
| 249 |
+
if grade_type == 'nutri':
|
| 250 |
+
return {'a': '🟢', 'b': '🟡', 'c': '🟠', 'd': '🔴', 'e': '🔴'}.get(str(grade).lower(), '')
|
| 251 |
+
elif grade_type == 'nova':
|
| 252 |
+
return {1: '🟢', 2: '🟡', 3: '🟠', 4: '🔴'}.get(grade, '')
|
| 253 |
+
return ''
|
| 254 |
+
|
| 255 |
+
def _clean_allergen_name(self, allergen: str) -> str:
|
| 256 |
+
"""Clean allergen name"""
|
| 257 |
+
if not allergen:
|
| 258 |
+
return allergen
|
| 259 |
+
cleaned = allergen.replace('en:', '').replace('fr:', '')
|
| 260 |
+
cleaned = cleaned.replace('-', ' ').title()
|
| 261 |
+
return cleaned
|
| 262 |
+
|
| 263 |
+
def _extract_ingredients_text(self, ingredients_data) -> str:
|
| 264 |
+
"""Extract ingredients list"""
|
| 265 |
+
if not ingredients_data or ingredients_data == 'N/A':
|
| 266 |
+
return ""
|
| 267 |
+
|
| 268 |
+
try:
|
| 269 |
+
data_str = str(ingredients_data)
|
| 270 |
+
|
| 271 |
+
patterns = [
|
| 272 |
+
r"'text':\s*'([^']{20,})'",
|
| 273 |
+
r'"text":\s*"([^"]{20,})"',
|
| 274 |
+
r"'text':\s*\"([^\"]{20,})\"",
|
| 275 |
+
r'"text":\s*\'([^\']{20,})\'',
|
| 276 |
+
]
|
| 277 |
+
|
| 278 |
+
all_matches = []
|
| 279 |
+
for pattern in patterns:
|
| 280 |
+
matches = re.findall(pattern, data_str, re.DOTALL)
|
| 281 |
+
all_matches.extend(matches)
|
| 282 |
+
|
| 283 |
+
if all_matches:
|
| 284 |
+
longest_match = max(all_matches, key=len)
|
| 285 |
+
return self._clean_ingredients_text(longest_match)
|
| 286 |
+
|
| 287 |
+
# Fallback
|
| 288 |
+
simple_pattern = r"text[^a-zA-Z]*([a-zA-Z][^}]{30,})"
|
| 289 |
+
simple_matches = re.findall(simple_pattern, data_str)
|
| 290 |
+
if simple_matches:
|
| 291 |
+
return self._clean_ingredients_text(simple_matches[0])
|
| 292 |
+
|
| 293 |
+
except Exception as e:
|
| 294 |
+
print(f"Error ingredients extraction: {e}")
|
| 295 |
+
|
| 296 |
+
return ""
|
| 297 |
+
|
| 298 |
+
def _clean_ingredients_text(self, text: str) -> str:
|
| 299 |
+
"""Clean ingredients text"""
|
| 300 |
+
if not text:
|
| 301 |
+
return ""
|
| 302 |
+
|
| 303 |
+
text = re.sub(r'<span class="allergen">(.*?)</span>', r'\1', text)
|
| 304 |
+
|
| 305 |
+
text = text.replace('\\', '').replace('\\"', '"').replace("\\'", "'")
|
| 306 |
+
text = text.replace('"', '"')
|
| 307 |
+
|
| 308 |
+
text = re.sub(r'\s*%\s*', '% ', text)
|
| 309 |
+
text = re.sub(r'\s*\(\s*', ' (', text)
|
| 310 |
+
text = re.sub(r'\s*\)\s*', ') ', text)
|
| 311 |
+
text = re.sub(r'\s+', ' ', text)
|
| 312 |
+
|
| 313 |
+
return text.strip()
|
| 314 |
+
|
| 315 |
+
def _parse_nutrients(self, nutrients_data) -> Dict[str, Any]:
|
| 316 |
+
"""Parse nutrients"""
|
| 317 |
+
if not nutrients_data or nutrients_data == 'N/A':
|
| 318 |
+
return {}
|
| 319 |
+
|
| 320 |
+
try:
|
| 321 |
+
# Si c'est déjà une liste
|
| 322 |
+
if isinstance(nutrients_data, list):
|
| 323 |
+
result = {}
|
| 324 |
+
for nutrient in nutrients_data:
|
| 325 |
+
if isinstance(nutrient, dict) and 'name' in nutrient:
|
| 326 |
+
result[nutrient['name']] = nutrient
|
| 327 |
+
return result
|
| 328 |
+
|
| 329 |
+
# Si c'est une chaîne
|
| 330 |
+
if isinstance(nutrients_data, str):
|
| 331 |
+
if nutrients_data.startswith('[') and 'name' in nutrients_data:
|
| 332 |
+
try:
|
| 333 |
+
import ast
|
| 334 |
+
nutrients_list = ast.literal_eval(nutrients_data)
|
| 335 |
+
if isinstance(nutrients_list, list):
|
| 336 |
+
result = {}
|
| 337 |
+
for nutrient in nutrients_list:
|
| 338 |
+
if isinstance(nutrient, dict) and 'name' in nutrient:
|
| 339 |
+
result[nutrient['name']] = nutrient
|
| 340 |
+
return result
|
| 341 |
+
except:
|
| 342 |
+
pass
|
| 343 |
+
|
| 344 |
+
try:
|
| 345 |
+
nutrients_list = json.loads(nutrients_data)
|
| 346 |
+
result = {}
|
| 347 |
+
for nutrient in nutrients_list:
|
| 348 |
+
if isinstance(nutrient, dict) and 'name' in nutrient:
|
| 349 |
+
result[nutrient['name']] = nutrient
|
| 350 |
+
return result
|
| 351 |
+
except:
|
| 352 |
+
pass
|
| 353 |
+
|
| 354 |
+
except:
|
| 355 |
+
pass
|
| 356 |
+
|
| 357 |
+
return {}
|
| 358 |
+
|
| 359 |
+
def _format_nutrition_value(self, nutrient_data) -> str:
|
| 360 |
+
"""Format nutrition value"""
|
| 361 |
+
if not nutrient_data:
|
| 362 |
+
return "N/A"
|
| 363 |
+
|
| 364 |
+
value = nutrient_data.get('100g', nutrient_data.get('value', 0))
|
| 365 |
+
unit = nutrient_data.get('unit', '')
|
| 366 |
+
|
| 367 |
+
if value is None or value == 0:
|
| 368 |
+
return "N/A"
|
| 369 |
+
|
| 370 |
+
if 'kcal' in unit.lower():
|
| 371 |
+
return f"{int(value)} kcal"
|
| 372 |
+
elif 'kj' in unit.lower():
|
| 373 |
+
return f"{int(value)} kJ"
|
| 374 |
+
elif unit == 'g':
|
| 375 |
+
return f"{value:.1f}g"
|
| 376 |
+
else:
|
| 377 |
+
return f"{value} {unit}" if unit else f"{value}"
|
ui/gradio_ui.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from services.search_service import FoodSearchService
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class GradioUI:
|
| 6 |
+
|
| 7 |
+
def __init__(self):
|
| 8 |
+
self.food_service = FoodSearchService()
|
| 9 |
+
|
| 10 |
+
def search_products(self, query: str, max_results: int) -> str:
|
| 11 |
+
"""
|
| 12 |
+
:param query:
|
| 13 |
+
:param max_results:
|
| 14 |
+
:return:
|
| 15 |
+
"""
|
| 16 |
+
return self.food_service.search_products(query, max_results)
|
| 17 |
+
|
| 18 |
+
def create_interface(self):
|
| 19 |
+
|
| 20 |
+
with gr.Blocks(title="🤖 Healthify me", theme=gr.themes.Soft()) as demo:
|
| 21 |
+
# En-tête
|
| 22 |
+
gr.Markdown("""
|
| 23 |
+
# 🤖 Healthify me
|
| 24 |
+
""")
|
| 25 |
+
|
| 26 |
+
with gr.Row():
|
| 27 |
+
with gr.Column(scale=3):
|
| 28 |
+
search_input = gr.Textbox(
|
| 29 |
+
label="🔍 Search for product",
|
| 30 |
+
placeholder="Ex: pepito, biscuits lu, pizza picard, yaourt danone..."
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
with gr.Column(scale=1):
|
| 34 |
+
max_results = gr.Slider(
|
| 35 |
+
minimum=1,
|
| 36 |
+
maximum=20,
|
| 37 |
+
value=5,
|
| 38 |
+
step=1,
|
| 39 |
+
label="📊 Number of results",
|
| 40 |
+
info="How many results ?"
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
search_btn = gr.Button("🤖 Search", variant="primary", size="lg")
|
| 44 |
+
|
| 45 |
+
search_output = gr.Markdown()
|
| 46 |
+
|
| 47 |
+
search_btn.click(
|
| 48 |
+
self.search_products,
|
| 49 |
+
inputs=[search_input, max_results],
|
| 50 |
+
outputs=[search_output]
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
search_input.submit(
|
| 54 |
+
self.search_products,
|
| 55 |
+
inputs=[search_input, max_results],
|
| 56 |
+
outputs=[search_output]
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
return demo
|
| 60 |
+
|