kerols77 commited on
Commit
ce8e104
·
verified ·
1 Parent(s): 8364273

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +627 -627
app.py CHANGED
@@ -1,627 +1,627 @@
1
- from flask import Flask, request, jsonify
2
- import pandas as pd
3
- from transformers import pipeline
4
- import os
5
- import re
6
- import json
7
- import requests
8
- import random
9
- from difflib import get_close_matches
10
- from textblob import TextBlob
11
- from nltk.tokenize import word_tokenize, sent_tokenize
12
- import nltk
13
- import ast
14
- from urllib.parse import quote
15
-
16
- def force_download_nltk():
17
- nltk_data_dir = os.environ.get("NLTK_DATA", "/app/nltk_data")
18
- transformers_cache_dir = os.environ.get("TRANSFORMERS_CACHE", "/app/transformers_cache")
19
- os.makedirs(nltk_data_dir, exist_ok=True)
20
- os.makedirs(transformers_cache_dir, exist_ok=True)
21
- os.environ["NLTK_DATA"] = nltk_data_dir
22
- os.environ["TRANSFORMERS_CACHE"] = transformers_cache_dir
23
- needed_packages = ["punkt"]
24
- for package in needed_packages:
25
- try:
26
- nltk.data.find(f"tokenizers/{package}")
27
- except LookupError:
28
- print(f"Downloading NLTK package: {package} to {nltk_data_dir}")
29
- nltk.download(package, download_dir=nltk_data_dir)
30
- force_download_nltk()
31
-
32
- domain_words = {
33
- "carb", "carbs", "carbo", "carbohydrate", "carbohydrates",
34
- "fat", "fats", "protein", "proteins", "fiber", "cholesterol",
35
- "calcium", "iron", "magnesium", "potassium", "sodium", "vitamin", "vitamin c",
36
- "calories", "calorie"
37
- }
38
-
39
- def smart_correct_spelling(text, domain_set):
40
- tokens = word_tokenize(text)
41
- corrected_tokens = []
42
- for token in tokens:
43
- if token.isalpha() and token.lower() not in domain_set:
44
- corrected_word = str(TextBlob(token).correct())
45
- corrected_tokens.append(corrected_word)
46
- else:
47
- corrected_tokens.append(token)
48
- return " ".join(corrected_tokens)
49
-
50
- qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
51
- classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
52
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
53
-
54
- def summarize_input(text):
55
- summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
56
- return summary[0]['summary_text']
57
-
58
- df = pd.read_csv(r"Datasets\Final used Datasets\food_dataset_with_nutriition.csv")
59
- print(f"Starting with {len(df)} recipes in dataset")
60
- nutrition_columns = ["calories", "Total fats", "Carbohydrate", "Fiber", "Protein",
61
- "Cholesterol", "Calcium", "Iron", "Magnesium", "Potassium", "Sodium", "Vitamin C"]
62
- for col in nutrition_columns:
63
- df[col] = pd.to_numeric(df[col], errors='coerce')
64
-
65
- disease_df = pd.read_csv(r"Datasets\Final used Datasets\disease_food_nutrition_mapping.csv")
66
- disease_df["Disease"] = disease_df["Disease"].str.lower()
67
-
68
- try:
69
- with open(r"docs\common_misspellings.json", "r") as file:
70
- common_misspellings = json.load(file)
71
- except FileNotFoundError:
72
- common_misspellings = {"suger": "sugar", "milc": "milk"}
73
- with open(r"docs\common_misspellings.json", "w") as file:
74
- json.dump(common_misspellings, file, indent=2)
75
-
76
- try:
77
- with open(r"docs\common_ingredients.json", "r") as file:
78
- common_ingredients = json.load(file)
79
- except FileNotFoundError:
80
- common_ingredients = ["sugar", "salt", "flour", "milk", "eggs", "butter", "oil", "water"]
81
- with open(r"docs\common_ingredients.json", "w") as file:
82
- json.dump(common_ingredients, file, indent=2)
83
-
84
- def create_ingredient_dictionary(dataframe, common_ingredients_list):
85
- all_ingredients = []
86
- all_ingredients.extend(common_ingredients_list)
87
- all_ingredients.extend(set(common_misspellings.values()))
88
- for ingredients_list in dataframe['ingredients']:
89
- parts = re.split(r',|\sand\s|\sor\s|;', str(ingredients_list))
90
- for part in parts:
91
- clean_part = re.sub(
92
- r'\d+[\s/]*(oz|ounce|cup|tbsp|tsp|tablespoon|teaspoon|pound|lb|g|ml|l|pinch|dash)\b\.?',
93
- '', part)
94
- clean_part = re.sub(
95
- r'\b(fresh|freshly|chopped|minced|diced|sliced|grated|ground|powdered|crushed|toasted|roasted)\b',
96
- '', clean_part)
97
- clean_part = re.sub(r'\(.*?\)', '', clean_part)
98
- clean_part = clean_part.strip()
99
- subparts = re.split(r'\sand\s|\sor\s', clean_part)
100
- for subpart in subparts:
101
- cleaned_subpart = subpart.strip().lower()
102
- if cleaned_subpart and len(cleaned_subpart) > 2:
103
- all_ingredients.append(cleaned_subpart)
104
- unique_ingredients = list(set(all_ingredients))
105
- unique_ingredients.sort(key=len, reverse=True)
106
- return unique_ingredients
107
- food_dictionary = create_ingredient_dictionary(df, common_ingredients)
108
-
109
- def identify_food_ingredient(text, ingredient_dict, misspellings_dict):
110
- cleaned = re.sub(
111
- r'\d+[\s/]*(oz|ounce|cup|tbsp|tsp|tablespoon|teaspoon|pound|lb|g|ml|l|pinch|dash)\b\.?',
112
- '', text)
113
- cleaned = re.sub(
114
- r'\b(fresh|freshly|chopped|minced|diced|sliced|grated|ground|powdered|crushed|toasted|roasted)\b',
115
- '', cleaned)
116
- cleaned = re.sub(r'\(.*?\)', '', cleaned)
117
- cleaned = cleaned.strip().lower()
118
- if cleaned in misspellings_dict:
119
- return misspellings_dict[cleaned]
120
- if cleaned in ingredient_dict:
121
- return cleaned
122
- words = cleaned.split()
123
- for word in words:
124
- if word in ingredient_dict:
125
- return word
126
- if word in misspellings_dict:
127
- return misspellings_dict[word]
128
- close_matches = get_close_matches(cleaned, ingredient_dict, n=3, cutoff=0.8)
129
- if close_matches:
130
- return close_matches[0]
131
- for dict_ingredient in ingredient_dict:
132
- if dict_ingredient in cleaned:
133
- return dict_ingredient
134
- close_matches = get_close_matches(cleaned, ingredient_dict, n=3, cutoff=0.6)
135
- if close_matches:
136
- return close_matches[0]
137
- return None
138
-
139
- def correct_food_ingredient(ingredient, ingredient_dict, misspellings_dict):
140
- cleaned = re.sub(
141
- r'\d+[\s/]*(oz|ounce|cup|tbsp|tsp|tablespoon|teaspoon|pound|lb|g|ml|l|pinch|dash)\b\.?',
142
- '', ingredient)
143
- cleaned = re.sub(
144
- r'\b(fresh|freshly|chopped|minced|diced|sliced|grated|ground|powdered|crushed|toasted|roasted)\b',
145
- '', cleaned)
146
- cleaned = re.sub(r'\(.*?\)', '', cleaned)
147
- cleaned = cleaned.strip().lower()
148
- if cleaned in misspellings_dict:
149
- return misspellings_dict[cleaned]
150
- if cleaned in ingredient_dict:
151
- return cleaned
152
- close_matches = get_close_matches(cleaned, ingredient_dict, n=3, cutoff=0.8)
153
- if close_matches:
154
- return close_matches[0]
155
- close_matches = get_close_matches(cleaned, ingredient_dict, n=3, cutoff=0.6)
156
- if close_matches:
157
- return close_matches[0]
158
- for dict_ingredient in ingredient_dict:
159
- if cleaned in dict_ingredient or dict_ingredient in cleaned:
160
- return dict_ingredient
161
- return cleaned
162
-
163
- def add_misspelling(misspelled, correct):
164
- try:
165
- with open(r"docs\common_misspellings.json", "r") as file:
166
- misspellings = json.load(file)
167
- misspellings[misspelled.lower()] = correct.lower()
168
- with open(r"docs\common_misspellings.json", "w") as file:
169
- json.dump(misspellings, file, indent=2, sort_keys=True)
170
- return True
171
- except Exception:
172
- return False
173
-
174
- def extract_unwanted_ingredients(input_text):
175
- question = "What ingredients should be excluded?"
176
- result = qa_pipeline(question=question, context=input_text)
177
- raw_answer = result['answer']
178
- potential_ingredients = []
179
- for part in raw_answer.split(','):
180
- for subpart in part.split(' and '):
181
- for item in subpart.split(' or '):
182
- clean_item = item.strip()
183
- if clean_item:
184
- potential_ingredients.append(clean_item)
185
- valid_ingredients = []
186
- for item in potential_ingredients:
187
- corrected = identify_food_ingredient(item, food_dictionary, common_misspellings)
188
- if corrected:
189
- valid_ingredients.append(corrected)
190
- return valid_ingredients if valid_ingredients else [raw_answer]
191
-
192
- def classify_clause(clause):
193
- candidate_labels = ["include", "exclude"]
194
- result = classifier(clause, candidate_labels, hypothesis_template="This clause means the ingredient should be {}.")
195
- return result["labels"][0].lower()
196
-
197
- def extract_ingredients_from_clause(clause, ingredient_dict, misspellings_dict):
198
- found = []
199
- for ingredient in ingredient_dict:
200
- if ingredient.lower() in clause.lower():
201
- normalized = identify_food_ingredient(ingredient, ingredient_dict, misspellings_dict)
202
- if normalized:
203
- found.append(normalized)
204
- return list(set(found))
205
-
206
- def classify_ingredients_in_query(query, ingredient_dict, misspellings_dict):
207
- include_ingredients = []
208
- exclude_ingredients = []
209
-
210
- nutrition_terms = ['calories', 'calorie', 'fat', 'fats', 'carb', 'carbs', 'protein',
211
- 'fiber', 'cholesterol', 'calcium', 'iron', 'magnesium',
212
- 'potassium', 'sodium', 'vitamin']
213
- modified_query = query
214
- for term in nutrition_terms:
215
- pattern = re.compile(r'(low|high)\s+' + term, re.IGNORECASE)
216
- modified_query = pattern.sub('', modified_query)
217
- clauses = re.split(r'\bbut\b|,', modified_query, flags=re.IGNORECASE)
218
- for clause in clauses:
219
- clause = clause.strip()
220
- if not clause:
221
- continue
222
- intent = classify_clause(clause)
223
- ingredients_found = extract_ingredients_from_clause(clause, ingredient_dict, misspellings_dict)
224
- if intent == "include":
225
- include_ingredients.extend(ingredients_found)
226
- elif intent == "exclude":
227
- exclude_ingredients.extend(ingredients_found)
228
- return list(set(include_ingredients)), list(set(exclude_ingredients))
229
-
230
- def extract_nutrition_from_clause(clause, nutrition_dict, misspellings_dict):
231
- found = []
232
- clause_lower = clause.lower()
233
- sorted_terms = sorted(nutrition_dict, key=lambda x: -len(x))
234
- for term in sorted_terms:
235
- pattern = r'\b' + re.escape(term.lower()) + r'\b'
236
- if re.search(pattern, clause_lower):
237
- found.append(term.lower())
238
- return list(set(found))
239
-
240
- def classify_nutrition_in_query(query, nutrition_dict, misspellings_dict):
241
- include_nutrition = []
242
- exclude_nutrition = []
243
- clauses = re.split(r'\band\b|,|but', query, flags=re.IGNORECASE)
244
- overall_intent = "exclude" if re.search(r'sensitivity|allergy|exclude', query, flags=re.IGNORECASE) else "include"
245
- for clause in clauses:
246
- clause = clause.strip()
247
- if not clause:
248
- continue
249
- intent = "include" if "i want" in clause.lower() else overall_intent
250
- numbers = re.findall(r'\d+(?:\.\d+)?', clause)
251
- threshold = float(numbers[0]) if numbers else None
252
- if re.search(r'\b(high|over|above|more than|exceeding)\b', clause, flags=re.IGNORECASE):
253
- modifier = "high"
254
- elif re.search(r'\b(low|under|less than|below)\b', clause, flags=re.IGNORECASE):
255
- modifier = "low"
256
- else:
257
- modifier = "high" if intent == "exclude" else "low"
258
- terms_found = extract_nutrition_from_clause(clause, nutrition_dict, misspellings_dict)
259
- for term in terms_found:
260
- norm_term = nutrition_terms_dictionary.get(term, term)
261
- condition = (modifier, norm_term, threshold) if threshold is not None else (modifier, norm_term)
262
- if intent == "include":
263
- include_nutrition.append(condition)
264
- elif intent == "exclude":
265
- exclude_nutrition.append(condition)
266
- return list(set(include_nutrition)), list(set(exclude_nutrition))
267
-
268
- nutrition_terms_dictionary = {
269
- "calorie": "calories",
270
- "calories": "calories",
271
- "fat": "Total fats",
272
- "fats": "Total fats",
273
- "total fat": "Total fats",
274
- "total fats": "Total fats",
275
- "carb": "Carbohydrate",
276
- "carbs": "Carbohydrate",
277
- "carbo": "Carbohydrate",
278
- "carbohydrate": "Carbohydrate",
279
- "carbohydrates": "Carbohydrate",
280
- "fiber": "Fiber",
281
- "protein": "Protein",
282
- "proteins": "Protein",
283
- "cholesterol": "Cholesterol",
284
- "calcium": "Calcium",
285
- "iron": "Iron",
286
- "magnesium": "Magnesium",
287
- "potassium": "Potassium",
288
- "sodium": "Sodium",
289
- "vitamin c": "Vitamin C"
290
- }
291
-
292
- fixed_thresholds = {
293
- "calories": 700,
294
- "Total fats": 60,
295
- "Carbohydrate": 120,
296
- "Fiber": 10,
297
- "Protein": 30,
298
- "Cholesterol": 100,
299
- "Calcium": 300,
300
- "Iron": 5,
301
- "Magnesium": 100,
302
- "Potassium": 300,
303
- "Sodium": 400,
304
- "Vitamin C": 50
305
- }
306
-
307
- def filter_by_nutrition_condition(df, condition):
308
- if isinstance(condition, tuple):
309
- if len(condition) == 3:
310
- direction, nutrition_term, threshold = condition
311
- elif len(condition) == 2:
312
- direction, nutrition_term = condition
313
- threshold = fixed_thresholds.get(nutrition_term)
314
- else:
315
- return df
316
- column = nutrition_term
317
- if column is None or threshold is None:
318
- return df
319
- if direction == "low":
320
- return df[df[column] < threshold]
321
- elif direction == "high":
322
- return df[df[column] >= threshold]
323
- return df
324
-
325
- def score_recipe_ingredients(recipe_ingredients, include_list):
326
- recipe_lower = recipe_ingredients.lower()
327
- match_count = sum(
328
- 1 for ingredient in include_list
329
- if ingredient.lower() in recipe_lower
330
- )
331
- return match_count
332
-
333
- def filter_and_rank_recipes(df, include_list, exclude_list, include_nutrition, exclude_nutrition):
334
- filtered_df = df.copy()
335
- print(f"Starting with {len(filtered_df)} recipes for filtering")
336
- if include_list:
337
- filtered_df['ingredient_match_count'] = filtered_df['ingredients'].apply(
338
- lambda x: score_recipe_ingredients(str(x), include_list)
339
- )
340
- filtered_df = filtered_df[filtered_df['ingredient_match_count'] >= 2]
341
- print(f"After requiring at least 2 included ingredients: {len(filtered_df)} recipes remain")
342
- for ingredient in exclude_list:
343
- before_count = len(filtered_df)
344
- filtered_df = filtered_df[
345
- ~filtered_df['ingredients']
346
- .str.lower()
347
- .fillna('')
348
- .str.contains(re.escape(ingredient.lower()))
349
- ]
350
- print(f"After excluding '{ingredient}': {len(filtered_df)} recipes remain (removed {before_count - len(filtered_df)})")
351
- for i, cond in enumerate(include_nutrition):
352
- before_count = len(filtered_df)
353
- filtered_df = filter_by_nutrition_condition(filtered_df, cond)
354
- after_count = len(filtered_df)
355
- print(f"After applying nutrition condition {i+1} (include) '{cond}': {after_count} recipes remain (removed {before_count - after_count})")
356
- for i, cond in enumerate(exclude_nutrition):
357
- before_count = len(filtered_df)
358
- temp_df = filter_by_nutrition_condition(df.copy(), cond)
359
- filtered_df = filtered_df[~filtered_df.index.isin(temp_df.index)]
360
- after_count = len(filtered_df)
361
- print(f"After applying nutrition condition {i+1} (exclude) '{cond}': {after_count} recipes remain (removed {before_count - after_count})")
362
- if filtered_df.empty:
363
- print("\nNo recipes match all criteria. Implementing fallback approach...")
364
- fallback_df = df.copy()
365
- if include_list:
366
- fallback_df['ingredient_match_count'] = fallback_df['ingredients'].apply(
367
- lambda x: score_recipe_ingredients(str(x), include_list)
368
- )
369
- fallback_df = fallback_df[fallback_df['ingredient_match_count'] >= 1]
370
- else:
371
- fallback_df['ingredient_match_count'] = 1
372
- for ingredient in exclude_list:
373
- fallback_df = fallback_df[
374
- ~fallback_df['ingredients']
375
- .str.lower()
376
- .fillna('')
377
- .str.contains(re.escape(ingredient.lower()))
378
- ]
379
- if fallback_df.empty:
380
- fallback_df = df.sample(min(5, len(df)))
381
- fallback_df['ingredient_match_count'] = 0
382
- print("No matches found. Showing random recipes as a fallback")
383
- filtered_df = fallback_df
384
- if 'ingredient_match_count' not in filtered_df.columns:
385
- filtered_df['ingredient_match_count'] = 0
386
- filtered_df = filtered_df.sort_values('ingredient_match_count', ascending=False)
387
- return filtered_df
388
-
389
- def get_disease_recommendations(user_text, disease_mapping_df):
390
- user_text_lower = user_text.lower()
391
- matches = disease_mapping_df[disease_mapping_df['Disease'].apply(lambda d: d in user_text_lower)]
392
- if not matches.empty:
393
- disease_info = matches.iloc[0]
394
- def safe_parse_list(x):
395
- if isinstance(x, str):
396
- try:
397
- return ast.literal_eval(x)
398
- except:
399
- return [item.strip() for item in x.split(',') if item.strip()]
400
- return x
401
- best_foods = safe_parse_list(disease_info.get("Best_Foods", "[]"))
402
- worst_foods = safe_parse_list(disease_info.get("Worst_Foods", "[]"))
403
- best_nutrition = safe_parse_list(disease_info.get("Best_Nutrition", "[]"))
404
- worst_nutrition = safe_parse_list(disease_info.get("Worst_Nutrition", "[]"))
405
- recommendations = {
406
- "Disease": disease_info['Disease'],
407
- "Best_Foods": best_foods,
408
- "Worst_Foods": worst_foods,
409
- "Best_Nutrition": best_nutrition,
410
- "Worst_Nutrition": worst_nutrition
411
- }
412
- return recommendations
413
- return None
414
-
415
- def get_recipe_output(recipe_row):
416
- recipe_name = recipe_row['title']
417
- ner_info = recipe_row.get('NER', '')
418
- try:
419
- ner_list = json.loads(ner_info)
420
- ner_str = ", ".join(ner_list)
421
- except Exception:
422
- ner_str = ner_info
423
- nutrition_details = {col: float(recipe_row[col]) for col in nutrition_columns}
424
- result = {
425
- "Meal name": recipe_name,
426
- "NER": ner_str,
427
- "Nutrition details": nutrition_details
428
- }
429
- print(f"Meal name: {recipe_name}")
430
- print(f"NER: {ner_str}")
431
- print(f"Nutrition details: {nutrition_details}")
432
- return result
433
-
434
- def process_long_query(query):
435
- if len(query.split()) > 500:
436
- print("Long input detected. Summarizing...")
437
- query = summarize_input(query)
438
- print(f"Processed Query: \"{query}\"")
439
- corrected = smart_correct_spelling(query, domain_words)
440
- sentences = sent_tokenize(corrected)
441
- aggregated_include = []
442
- aggregated_exclude = []
443
- aggregated_include_nutrition = []
444
- aggregated_exclude_nutrition = []
445
- for sentence in sentences:
446
- inc, exc = classify_ingredients_in_query(sentence, food_dictionary, common_misspellings)
447
- aggregated_include.extend(inc)
448
- aggregated_exclude.extend(exc)
449
- inc_nut, exc_nut = classify_nutrition_in_query(sentence, list(nutrition_terms_dictionary.keys()), common_misspellings)
450
- aggregated_include_nutrition.extend(inc_nut)
451
- aggregated_exclude_nutrition.extend(exc_nut)
452
- return corrected, list(set(aggregated_include)), list(set(aggregated_exclude)), \
453
- list(set(aggregated_include_nutrition)), list(set(aggregated_exclude_nutrition))
454
-
455
- def send_to_api(meal_data, parent_id):
456
- try:
457
- api_endpoint = "http://54.242.19.19:3000/api/ResturantMenu/add"
458
- meal_id = random.randint(1000, 9999)
459
- meal_name = meal_data.get("Meal name", "No meal name available")
460
- ner_info = meal_data.get("NER", "")
461
- images_public = "https://kero.beshoy.me/recipe_images/"
462
- image_path = True
463
- image_url = ""
464
- if image_path:
465
- try:
466
- image_url = images_public + quote(meal_name, safe="") + ".jpg"
467
- print(f"Successfully uploaded image to the server for {meal_name}: {image_url}")
468
- except Exception as cl_err:
469
- print(f"Error uploading to the server: {cl_err}")
470
- if not image_url:
471
- image_url = "https://picsum.photos/200"
472
- payload = {
473
- "id": str(meal_id),
474
- "name": meal_name,
475
- "description": ner_info,
476
- "photo": image_url,
477
- "parentId": parent_id
478
- }
479
- print(f"\nSending payload to API: {payload}")
480
- response = requests.post(api_endpoint, json=payload)
481
- print(f"API Response for meal {meal_name}: {response.status_code}")
482
-
483
- try:
484
- return response.json()
485
- except Exception:
486
- return {"error": response.text}
487
- except Exception as e:
488
- print(f"Error sending meal to API: {e}")
489
- return {"error": str(e)}
490
-
491
- app = Flask(__name__)
492
- @app.route('/process', methods=['POST'])
493
- def process():
494
- try:
495
-
496
- input_text = ""
497
- parent_id = ""
498
-
499
- if request.is_json:
500
-
501
- data = request.json
502
- input_text = data.get("description", "")
503
- parent_id = data.get("parentId", "")
504
-
505
- if not input_text:
506
- return jsonify({"error": "Missing description in request"}), 400
507
- if not parent_id:
508
- return jsonify({"error": "Missing parentId in request"}), 400
509
-
510
- else:
511
-
512
- input_text_json = request.form
513
- input_text = input_text_json.get("description", "")
514
- parent_id = input_text_json.get("parentId", "")
515
-
516
- if not input_text:
517
- return jsonify({"error": "Missing description in request"}), 400
518
- if not parent_id:
519
- return jsonify({"error": "Missing parentId in request"}), 400
520
-
521
- print("WARNING: Using raw data format. Please consider using JSON format.")
522
-
523
- raw_input_text = input_text
524
- processed_input, user_include, user_exclude, user_include_nutrition, user_exclude_nutrition = process_long_query(raw_input_text)
525
-
526
- include_list, exclude_list = [], []
527
- include_nutrition, exclude_nutrition = [], []
528
-
529
- disease_recs = get_disease_recommendations(processed_input, disease_df)
530
-
531
- if disease_recs:
532
- print("\nDisease-related Recommendations Detected:")
533
- print(f"Disease: {disease_recs['Disease']}")
534
- print(f"Best Foods: {disease_recs['Best_Foods']}")
535
- print(f"Worst Foods: {disease_recs['Worst_Foods']}")
536
- print(f"Best Nutrition: {disease_recs['Best_Nutrition']}")
537
- print(f"Worst Nutrition: {disease_recs['Worst_Nutrition']}")
538
-
539
- include_list.extend(disease_recs["Best_Foods"])
540
- exclude_list.extend(disease_recs["Worst_Foods"])
541
-
542
- def parse_nutrition_condition(nutrition_phrase):
543
- parts = nutrition_phrase.strip().split()
544
- if len(parts) == 2:
545
- direction = parts[0].lower()
546
- nutrient = parts[1].lower()
547
- mapped_nutrient = nutrition_terms_dictionary.get(nutrient, nutrient)
548
- return (direction, mapped_nutrient)
549
- return None
550
-
551
- for bn in disease_recs["Best_Nutrition"]:
552
- cond = parse_nutrition_condition(bn)
553
- if cond:
554
- include_nutrition.append(cond)
555
- for wn in disease_recs["Worst_Nutrition"]:
556
- cond = parse_nutrition_condition(wn)
557
- if cond:
558
- exclude_nutrition.append(cond)
559
-
560
- include_list.extend(user_include)
561
- exclude_list.extend(user_exclude)
562
- include_nutrition.extend(user_include_nutrition)
563
- exclude_nutrition.extend(user_exclude_nutrition)
564
-
565
- include_list = list(set(include_list))
566
- exclude_list = list(set(exclude_list))
567
- include_nutrition = list(set(include_nutrition))
568
- exclude_nutrition = list(set(exclude_nutrition))
569
-
570
- print("\nFinal Lists After Combining Disease + User Query:")
571
- print(f"Ingredients to include: {include_list}")
572
- print(f"Ingredients to exclude: {exclude_list}")
573
- print(f"Nutrition conditions to include: {include_nutrition}")
574
- print(f"Nutrition conditions to exclude: {exclude_nutrition}")
575
-
576
- corrected_include = [correct_food_ingredient(ingredient, food_dictionary, common_misspellings) for ingredient in include_list]
577
- corrected_exclude = [correct_food_ingredient(ingredient, food_dictionary, common_misspellings) for ingredient in exclude_list]
578
-
579
- include_list = list(set(corrected_include))
580
- exclude_list = list(set(corrected_exclude))
581
- filtered_df = filter_and_rank_recipes(
582
- df,
583
- include_list,
584
- exclude_list,
585
- include_nutrition,
586
- exclude_nutrition
587
- )
588
-
589
- final_output = {}
590
- api_responses = []
591
-
592
- if not filtered_df.empty:
593
- filtered_df = filtered_df.sample(frac=1)
594
- meal_count = min(6, len(filtered_df))
595
-
596
- for i in range(meal_count):
597
- if i == 0:
598
- print("\nRecommended Meal:")
599
- meal_data = get_recipe_output(filtered_df.iloc[i])
600
- final_output["Recommended Meal"] = meal_data
601
- else:
602
- print(f"\nOption {i}:")
603
- meal_data = get_recipe_output(filtered_df.iloc[i])
604
- final_output[f"Option {i}"] = meal_data
605
-
606
- api_response = send_to_api(meal_data, parent_id)
607
- api_responses.append(api_response)
608
- else:
609
- error_message = f"No recipes found that match your criteria.\nIngredients to include: {', '.join(include_list)}\nIngredients to exclude: {', '.join(exclude_list)}\nNutrition Include: {', '.join(str(cond) for cond in include_nutrition)}\nNutrition Exclude: {', '.join(str(cond) for cond in exclude_nutrition)}."
610
- print(error_message)
611
- final_output["Message"] = error_message
612
- return jsonify({"error": error_message}), 404
613
-
614
- return jsonify({
615
- "original_response": final_output,
616
- "api_responses": api_responses,
617
- "message": f"Successfully processed {len(api_responses)} meals"
618
- })
619
-
620
- except Exception as e:
621
- print(f"Error processing request: {str(e)}")
622
- return jsonify({"error": f"Internal server error: {str(e)}"}), 500
623
-
624
-
625
- if __name__ == '__main__':
626
- port = int(os.environ.get("PORT", 7860))
627
- app.run(host="0.0.0.0", port=port, debug=False)
 
1
+ from flask import Flask, request, jsonify
2
+ import pandas as pd
3
+ from transformers import pipeline
4
+ import os
5
+ import re
6
+ import json
7
+ import requests
8
+ import random
9
+ from difflib import get_close_matches
10
+ from textblob import TextBlob
11
+ from nltk.tokenize import word_tokenize, sent_tokenize
12
+ import nltk
13
+ import ast
14
+ from urllib.parse import quote
15
+
16
+ def force_download_nltk():
17
+ nltk_data_dir = os.environ.get("NLTK_DATA", "/app/nltk_data")
18
+ transformers_cache_dir = os.environ.get("TRANSFORMERS_CACHE", "/app/transformers_cache")
19
+ os.makedirs(nltk_data_dir, exist_ok=True)
20
+ os.makedirs(transformers_cache_dir, exist_ok=True)
21
+ os.environ["NLTK_DATA"] = nltk_data_dir
22
+ os.environ["TRANSFORMERS_CACHE"] = transformers_cache_dir
23
+ needed_packages = ["punkt"]
24
+ for package in needed_packages:
25
+ try:
26
+ nltk.data.find(f"tokenizers/{package}")
27
+ except LookupError:
28
+ print(f"Downloading NLTK package: {package} to {nltk_data_dir}")
29
+ nltk.download(package, download_dir=nltk_data_dir)
30
+ force_download_nltk()
31
+
32
+ domain_words = {
33
+ "carb", "carbs", "carbo", "carbohydrate", "carbohydrates",
34
+ "fat", "fats", "protein", "proteins", "fiber", "cholesterol",
35
+ "calcium", "iron", "magnesium", "potassium", "sodium", "vitamin", "vitamin c",
36
+ "calories", "calorie"
37
+ }
38
+
39
+ def smart_correct_spelling(text, domain_set):
40
+ tokens = word_tokenize(text)
41
+ corrected_tokens = []
42
+ for token in tokens:
43
+ if token.isalpha() and token.lower() not in domain_set:
44
+ corrected_word = str(TextBlob(token).correct())
45
+ corrected_tokens.append(corrected_word)
46
+ else:
47
+ corrected_tokens.append(token)
48
+ return " ".join(corrected_tokens)
49
+
50
+ qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
51
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
52
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
53
+
54
+ def summarize_input(text):
55
+ summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
56
+ return summary[0]['summary_text']
57
+
58
+ df = pd.read_csv("Datasets/Final used Datasets/food_dataset_with_nutriition.csv")
59
+ print(f"Starting with {len(df)} recipes in dataset")
60
+ nutrition_columns = ["calories", "Total fats", "Carbohydrate", "Fiber", "Protein",
61
+ "Cholesterol", "Calcium", "Iron", "Magnesium", "Potassium", "Sodium", "Vitamin C"]
62
+ for col in nutrition_columns:
63
+ df[col] = pd.to_numeric(df[col], errors='coerce')
64
+
65
+ disease_df = pd.read_csv("Datasets/Final used Datasets/disease_food_nutrition_mapping.csv")
66
+ disease_df["Disease"] = disease_df["Disease"].str.lower()
67
+
68
+ try:
69
+ with open("docs/common_misspellings.json", "r") as file:
70
+ common_misspellings = json.load(file)
71
+ except FileNotFoundError:
72
+ common_misspellings = {"suger": "sugar", "milc": "milk"}
73
+ with open("docs/common_misspellings.json", "w") as file:
74
+ json.dump(common_misspellings, file, indent=2)
75
+
76
+ try:
77
+ with open("docs/common_ingredients.json", "r") as file:
78
+ common_ingredients = json.load(file)
79
+ except FileNotFoundError:
80
+ common_ingredients = ["sugar", "salt", "flour", "milk", "eggs", "butter", "oil", "water"]
81
+ with open("docs/common_ingredients.json", "w") as file:
82
+ json.dump(common_ingredients, file, indent=2)
83
+
84
+ def create_ingredient_dictionary(dataframe, common_ingredients_list):
85
+ all_ingredients = []
86
+ all_ingredients.extend(common_ingredients_list)
87
+ all_ingredients.extend(set(common_misspellings.values()))
88
+ for ingredients_list in dataframe['ingredients']:
89
+ parts = re.split(r',|\sand\s|\sor\s|;', str(ingredients_list))
90
+ for part in parts:
91
+ clean_part = re.sub(
92
+ r'\d+[\s/]*(oz|ounce|cup|tbsp|tsp|tablespoon|teaspoon|pound|lb|g|ml|l|pinch|dash)\b\.?',
93
+ '', part)
94
+ clean_part = re.sub(
95
+ r'\b(fresh|freshly|chopped|minced|diced|sliced|grated|ground|powdered|crushed|toasted|roasted)\b',
96
+ '', clean_part)
97
+ clean_part = re.sub(r'\(.*?\)', '', clean_part)
98
+ clean_part = clean_part.strip()
99
+ subparts = re.split(r'\sand\s|\sor\s', clean_part)
100
+ for subpart in subparts:
101
+ cleaned_subpart = subpart.strip().lower()
102
+ if cleaned_subpart and len(cleaned_subpart) > 2:
103
+ all_ingredients.append(cleaned_subpart)
104
+ unique_ingredients = list(set(all_ingredients))
105
+ unique_ingredients.sort(key=len, reverse=True)
106
+ return unique_ingredients
107
+ food_dictionary = create_ingredient_dictionary(df, common_ingredients)
108
+
109
+ def identify_food_ingredient(text, ingredient_dict, misspellings_dict):
110
+ cleaned = re.sub(
111
+ r'\d+[\s/]*(oz|ounce|cup|tbsp|tsp|tablespoon|teaspoon|pound|lb|g|ml|l|pinch|dash)\b\.?',
112
+ '', text)
113
+ cleaned = re.sub(
114
+ r'\b(fresh|freshly|chopped|minced|diced|sliced|grated|ground|powdered|crushed|toasted|roasted)\b',
115
+ '', cleaned)
116
+ cleaned = re.sub(r'\(.*?\)', '', cleaned)
117
+ cleaned = cleaned.strip().lower()
118
+ if cleaned in misspellings_dict:
119
+ return misspellings_dict[cleaned]
120
+ if cleaned in ingredient_dict:
121
+ return cleaned
122
+ words = cleaned.split()
123
+ for word in words:
124
+ if word in ingredient_dict:
125
+ return word
126
+ if word in misspellings_dict:
127
+ return misspellings_dict[word]
128
+ close_matches = get_close_matches(cleaned, ingredient_dict, n=3, cutoff=0.8)
129
+ if close_matches:
130
+ return close_matches[0]
131
+ for dict_ingredient in ingredient_dict:
132
+ if dict_ingredient in cleaned:
133
+ return dict_ingredient
134
+ close_matches = get_close_matches(cleaned, ingredient_dict, n=3, cutoff=0.6)
135
+ if close_matches:
136
+ return close_matches[0]
137
+ return None
138
+
139
+ def correct_food_ingredient(ingredient, ingredient_dict, misspellings_dict):
140
+ cleaned = re.sub(
141
+ r'\d+[\s/]*(oz|ounce|cup|tbsp|tsp|tablespoon|teaspoon|pound|lb|g|ml|l|pinch|dash)\b\.?',
142
+ '', ingredient)
143
+ cleaned = re.sub(
144
+ r'\b(fresh|freshly|chopped|minced|diced|sliced|grated|ground|powdered|crushed|toasted|roasted)\b',
145
+ '', cleaned)
146
+ cleaned = re.sub(r'\(.*?\)', '', cleaned)
147
+ cleaned = cleaned.strip().lower()
148
+ if cleaned in misspellings_dict:
149
+ return misspellings_dict[cleaned]
150
+ if cleaned in ingredient_dict:
151
+ return cleaned
152
+ close_matches = get_close_matches(cleaned, ingredient_dict, n=3, cutoff=0.8)
153
+ if close_matches:
154
+ return close_matches[0]
155
+ close_matches = get_close_matches(cleaned, ingredient_dict, n=3, cutoff=0.6)
156
+ if close_matches:
157
+ return close_matches[0]
158
+ for dict_ingredient in ingredient_dict:
159
+ if cleaned in dict_ingredient or dict_ingredient in cleaned:
160
+ return dict_ingredient
161
+ return cleaned
162
+
163
+ def add_misspelling(misspelled, correct):
164
+ try:
165
+ with open("docs/common_misspellings.json", "r") as file:
166
+ misspellings = json.load(file)
167
+ misspellings[misspelled.lower()] = correct.lower()
168
+ with open("docs/common_misspellings.json", "w") as file:
169
+ json.dump(misspellings, file, indent=2, sort_keys=True)
170
+ return True
171
+ except Exception:
172
+ return False
173
+
174
+ def extract_unwanted_ingredients(input_text):
175
+ question = "What ingredients should be excluded?"
176
+ result = qa_pipeline(question=question, context=input_text)
177
+ raw_answer = result['answer']
178
+ potential_ingredients = []
179
+ for part in raw_answer.split(','):
180
+ for subpart in part.split(' and '):
181
+ for item in subpart.split(' or '):
182
+ clean_item = item.strip()
183
+ if clean_item:
184
+ potential_ingredients.append(clean_item)
185
+ valid_ingredients = []
186
+ for item in potential_ingredients:
187
+ corrected = identify_food_ingredient(item, food_dictionary, common_misspellings)
188
+ if corrected:
189
+ valid_ingredients.append(corrected)
190
+ return valid_ingredients if valid_ingredients else [raw_answer]
191
+
192
+ def classify_clause(clause):
193
+ candidate_labels = ["include", "exclude"]
194
+ result = classifier(clause, candidate_labels, hypothesis_template="This clause means the ingredient should be {}.")
195
+ return result["labels"][0].lower()
196
+
197
+ def extract_ingredients_from_clause(clause, ingredient_dict, misspellings_dict):
198
+ found = []
199
+ for ingredient in ingredient_dict:
200
+ if ingredient.lower() in clause.lower():
201
+ normalized = identify_food_ingredient(ingredient, ingredient_dict, misspellings_dict)
202
+ if normalized:
203
+ found.append(normalized)
204
+ return list(set(found))
205
+
206
+ def classify_ingredients_in_query(query, ingredient_dict, misspellings_dict):
207
+ include_ingredients = []
208
+ exclude_ingredients = []
209
+
210
+ nutrition_terms = ['calories', 'calorie', 'fat', 'fats', 'carb', 'carbs', 'protein',
211
+ 'fiber', 'cholesterol', 'calcium', 'iron', 'magnesium',
212
+ 'potassium', 'sodium', 'vitamin']
213
+ modified_query = query
214
+ for term in nutrition_terms:
215
+ pattern = re.compile(r'(low|high)\s+' + term, re.IGNORECASE)
216
+ modified_query = pattern.sub('', modified_query)
217
+ clauses = re.split(r'\bbut\b|,', modified_query, flags=re.IGNORECASE)
218
+ for clause in clauses:
219
+ clause = clause.strip()
220
+ if not clause:
221
+ continue
222
+ intent = classify_clause(clause)
223
+ ingredients_found = extract_ingredients_from_clause(clause, ingredient_dict, misspellings_dict)
224
+ if intent == "include":
225
+ include_ingredients.extend(ingredients_found)
226
+ elif intent == "exclude":
227
+ exclude_ingredients.extend(ingredients_found)
228
+ return list(set(include_ingredients)), list(set(exclude_ingredients))
229
+
230
+ def extract_nutrition_from_clause(clause, nutrition_dict, misspellings_dict):
231
+ found = []
232
+ clause_lower = clause.lower()
233
+ sorted_terms = sorted(nutrition_dict, key=lambda x: -len(x))
234
+ for term in sorted_terms:
235
+ pattern = r'\b' + re.escape(term.lower()) + r'\b'
236
+ if re.search(pattern, clause_lower):
237
+ found.append(term.lower())
238
+ return list(set(found))
239
+
240
+ def classify_nutrition_in_query(query, nutrition_dict, misspellings_dict):
241
+ include_nutrition = []
242
+ exclude_nutrition = []
243
+ clauses = re.split(r'\band\b|,|but', query, flags=re.IGNORECASE)
244
+ overall_intent = "exclude" if re.search(r'sensitivity|allergy|exclude', query, flags=re.IGNORECASE) else "include"
245
+ for clause in clauses:
246
+ clause = clause.strip()
247
+ if not clause:
248
+ continue
249
+ intent = "include" if "i want" in clause.lower() else overall_intent
250
+ numbers = re.findall(r'\d+(?:\.\d+)?', clause)
251
+ threshold = float(numbers[0]) if numbers else None
252
+ if re.search(r'\b(high|over|above|more than|exceeding)\b', clause, flags=re.IGNORECASE):
253
+ modifier = "high"
254
+ elif re.search(r'\b(low|under|less than|below)\b', clause, flags=re.IGNORECASE):
255
+ modifier = "low"
256
+ else:
257
+ modifier = "high" if intent == "exclude" else "low"
258
+ terms_found = extract_nutrition_from_clause(clause, nutrition_dict, misspellings_dict)
259
+ for term in terms_found:
260
+ norm_term = nutrition_terms_dictionary.get(term, term)
261
+ condition = (modifier, norm_term, threshold) if threshold is not None else (modifier, norm_term)
262
+ if intent == "include":
263
+ include_nutrition.append(condition)
264
+ elif intent == "exclude":
265
+ exclude_nutrition.append(condition)
266
+ return list(set(include_nutrition)), list(set(exclude_nutrition))
267
+
268
+ nutrition_terms_dictionary = {
269
+ "calorie": "calories",
270
+ "calories": "calories",
271
+ "fat": "Total fats",
272
+ "fats": "Total fats",
273
+ "total fat": "Total fats",
274
+ "total fats": "Total fats",
275
+ "carb": "Carbohydrate",
276
+ "carbs": "Carbohydrate",
277
+ "carbo": "Carbohydrate",
278
+ "carbohydrate": "Carbohydrate",
279
+ "carbohydrates": "Carbohydrate",
280
+ "fiber": "Fiber",
281
+ "protein": "Protein",
282
+ "proteins": "Protein",
283
+ "cholesterol": "Cholesterol",
284
+ "calcium": "Calcium",
285
+ "iron": "Iron",
286
+ "magnesium": "Magnesium",
287
+ "potassium": "Potassium",
288
+ "sodium": "Sodium",
289
+ "vitamin c": "Vitamin C"
290
+ }
291
+
292
+ fixed_thresholds = {
293
+ "calories": 700,
294
+ "Total fats": 60,
295
+ "Carbohydrate": 120,
296
+ "Fiber": 10,
297
+ "Protein": 30,
298
+ "Cholesterol": 100,
299
+ "Calcium": 300,
300
+ "Iron": 5,
301
+ "Magnesium": 100,
302
+ "Potassium": 300,
303
+ "Sodium": 400,
304
+ "Vitamin C": 50
305
+ }
306
+
307
+ def filter_by_nutrition_condition(df, condition):
308
+ if isinstance(condition, tuple):
309
+ if len(condition) == 3:
310
+ direction, nutrition_term, threshold = condition
311
+ elif len(condition) == 2:
312
+ direction, nutrition_term = condition
313
+ threshold = fixed_thresholds.get(nutrition_term)
314
+ else:
315
+ return df
316
+ column = nutrition_term
317
+ if column is None or threshold is None:
318
+ return df
319
+ if direction == "low":
320
+ return df[df[column] < threshold]
321
+ elif direction == "high":
322
+ return df[df[column] >= threshold]
323
+ return df
324
+
325
+ def score_recipe_ingredients(recipe_ingredients, include_list):
326
+ recipe_lower = recipe_ingredients.lower()
327
+ match_count = sum(
328
+ 1 for ingredient in include_list
329
+ if ingredient.lower() in recipe_lower
330
+ )
331
+ return match_count
332
+
333
+ def filter_and_rank_recipes(df, include_list, exclude_list, include_nutrition, exclude_nutrition):
334
+ filtered_df = df.copy()
335
+ print(f"Starting with {len(filtered_df)} recipes for filtering")
336
+ if include_list:
337
+ filtered_df['ingredient_match_count'] = filtered_df['ingredients'].apply(
338
+ lambda x: score_recipe_ingredients(str(x), include_list)
339
+ )
340
+ filtered_df = filtered_df[filtered_df['ingredient_match_count'] >= 2]
341
+ print(f"After requiring at least 2 included ingredients: {len(filtered_df)} recipes remain")
342
+ for ingredient in exclude_list:
343
+ before_count = len(filtered_df)
344
+ filtered_df = filtered_df[
345
+ ~filtered_df['ingredients']
346
+ .str.lower()
347
+ .fillna('')
348
+ .str.contains(re.escape(ingredient.lower()))
349
+ ]
350
+ print(f"After excluding '{ingredient}': {len(filtered_df)} recipes remain (removed {before_count - len(filtered_df)})")
351
+ for i, cond in enumerate(include_nutrition):
352
+ before_count = len(filtered_df)
353
+ filtered_df = filter_by_nutrition_condition(filtered_df, cond)
354
+ after_count = len(filtered_df)
355
+ print(f"After applying nutrition condition {i+1} (include) '{cond}': {after_count} recipes remain (removed {before_count - after_count})")
356
+ for i, cond in enumerate(exclude_nutrition):
357
+ before_count = len(filtered_df)
358
+ temp_df = filter_by_nutrition_condition(df.copy(), cond)
359
+ filtered_df = filtered_df[~filtered_df.index.isin(temp_df.index)]
360
+ after_count = len(filtered_df)
361
+ print(f"After applying nutrition condition {i+1} (exclude) '{cond}': {after_count} recipes remain (removed {before_count - after_count})")
362
+ if filtered_df.empty:
363
+ print("\nNo recipes match all criteria. Implementing fallback approach...")
364
+ fallback_df = df.copy()
365
+ if include_list:
366
+ fallback_df['ingredient_match_count'] = fallback_df['ingredients'].apply(
367
+ lambda x: score_recipe_ingredients(str(x), include_list)
368
+ )
369
+ fallback_df = fallback_df[fallback_df['ingredient_match_count'] >= 1]
370
+ else:
371
+ fallback_df['ingredient_match_count'] = 1
372
+ for ingredient in exclude_list:
373
+ fallback_df = fallback_df[
374
+ ~fallback_df['ingredients']
375
+ .str.lower()
376
+ .fillna('')
377
+ .str.contains(re.escape(ingredient.lower()))
378
+ ]
379
+ if fallback_df.empty:
380
+ fallback_df = df.sample(min(5, len(df)))
381
+ fallback_df['ingredient_match_count'] = 0
382
+ print("No matches found. Showing random recipes as a fallback")
383
+ filtered_df = fallback_df
384
+ if 'ingredient_match_count' not in filtered_df.columns:
385
+ filtered_df['ingredient_match_count'] = 0
386
+ filtered_df = filtered_df.sort_values('ingredient_match_count', ascending=False)
387
+ return filtered_df
388
+
389
+ def get_disease_recommendations(user_text, disease_mapping_df):
390
+ user_text_lower = user_text.lower()
391
+ matches = disease_mapping_df[disease_mapping_df['Disease'].apply(lambda d: d in user_text_lower)]
392
+ if not matches.empty:
393
+ disease_info = matches.iloc[0]
394
+ def safe_parse_list(x):
395
+ if isinstance(x, str):
396
+ try:
397
+ return ast.literal_eval(x)
398
+ except:
399
+ return [item.strip() for item in x.split(',') if item.strip()]
400
+ return x
401
+ best_foods = safe_parse_list(disease_info.get("Best_Foods", "[]"))
402
+ worst_foods = safe_parse_list(disease_info.get("Worst_Foods", "[]"))
403
+ best_nutrition = safe_parse_list(disease_info.get("Best_Nutrition", "[]"))
404
+ worst_nutrition = safe_parse_list(disease_info.get("Worst_Nutrition", "[]"))
405
+ recommendations = {
406
+ "Disease": disease_info['Disease'],
407
+ "Best_Foods": best_foods,
408
+ "Worst_Foods": worst_foods,
409
+ "Best_Nutrition": best_nutrition,
410
+ "Worst_Nutrition": worst_nutrition
411
+ }
412
+ return recommendations
413
+ return None
414
+
415
+ def get_recipe_output(recipe_row):
416
+ recipe_name = recipe_row['title']
417
+ ner_info = recipe_row.get('NER', '')
418
+ try:
419
+ ner_list = json.loads(ner_info)
420
+ ner_str = ", ".join(ner_list)
421
+ except Exception:
422
+ ner_str = ner_info
423
+ nutrition_details = {col: float(recipe_row[col]) for col in nutrition_columns}
424
+ result = {
425
+ "Meal name": recipe_name,
426
+ "NER": ner_str,
427
+ "Nutrition details": nutrition_details
428
+ }
429
+ print(f"Meal name: {recipe_name}")
430
+ print(f"NER: {ner_str}")
431
+ print(f"Nutrition details: {nutrition_details}")
432
+ return result
433
+
434
+ def process_long_query(query):
435
+ if len(query.split()) > 500:
436
+ print("Long input detected. Summarizing...")
437
+ query = summarize_input(query)
438
+ print(f"Processed Query: \"{query}\"")
439
+ corrected = smart_correct_spelling(query, domain_words)
440
+ sentences = sent_tokenize(corrected)
441
+ aggregated_include = []
442
+ aggregated_exclude = []
443
+ aggregated_include_nutrition = []
444
+ aggregated_exclude_nutrition = []
445
+ for sentence in sentences:
446
+ inc, exc = classify_ingredients_in_query(sentence, food_dictionary, common_misspellings)
447
+ aggregated_include.extend(inc)
448
+ aggregated_exclude.extend(exc)
449
+ inc_nut, exc_nut = classify_nutrition_in_query(sentence, list(nutrition_terms_dictionary.keys()), common_misspellings)
450
+ aggregated_include_nutrition.extend(inc_nut)
451
+ aggregated_exclude_nutrition.extend(exc_nut)
452
+ return corrected, list(set(aggregated_include)), list(set(aggregated_exclude)), \
453
+ list(set(aggregated_include_nutrition)), list(set(aggregated_exclude_nutrition))
454
+
455
+ def send_to_api(meal_data, parent_id):
456
+ try:
457
+ api_endpoint = "http://54.242.19.19:3000/api/ResturantMenu/add"
458
+ meal_id = random.randint(1000, 9999)
459
+ meal_name = meal_data.get("Meal name", "No meal name available")
460
+ ner_info = meal_data.get("NER", "")
461
+ images_public = "https://kero.beshoy.me/recipe_images/"
462
+ image_path = True
463
+ image_url = ""
464
+ if image_path:
465
+ try:
466
+ image_url = images_public + quote(meal_name, safe="") + ".jpg"
467
+ print(f"Successfully uploaded image to the server for {meal_name}: {image_url}")
468
+ except Exception as cl_err:
469
+ print(f"Error uploading to the server: {cl_err}")
470
+ if not image_url:
471
+ image_url = "https://picsum.photos/200"
472
+ payload = {
473
+ "id": str(meal_id),
474
+ "name": meal_name,
475
+ "description": ner_info,
476
+ "photo": image_url,
477
+ "parentId": parent_id
478
+ }
479
+ print(f"\nSending payload to API: {payload}")
480
+ response = requests.post(api_endpoint, json=payload)
481
+ print(f"API Response for meal {meal_name}: {response.status_code}")
482
+
483
+ try:
484
+ return response.json()
485
+ except Exception:
486
+ return {"error": response.text}
487
+ except Exception as e:
488
+ print(f"Error sending meal to API: {e}")
489
+ return {"error": str(e)}
490
+
491
+ app = Flask(__name__)
492
+ @app.route('/process', methods=['POST'])
493
+ def process():
494
+ try:
495
+
496
+ input_text = ""
497
+ parent_id = ""
498
+
499
+ if request.is_json:
500
+
501
+ data = request.json
502
+ input_text = data.get("description", "")
503
+ parent_id = data.get("parentId", "")
504
+
505
+ if not input_text:
506
+ return jsonify({"error": "Missing description in request"}), 400
507
+ if not parent_id:
508
+ return jsonify({"error": "Missing parentId in request"}), 400
509
+
510
+ else:
511
+
512
+ input_text_json = request.form
513
+ input_text = input_text_json.get("description", "")
514
+ parent_id = input_text_json.get("parentId", "")
515
+
516
+ if not input_text:
517
+ return jsonify({"error": "Missing description in request"}), 400
518
+ if not parent_id:
519
+ return jsonify({"error": "Missing parentId in request"}), 400
520
+
521
+ print("WARNING: Using raw data format. Please consider using JSON format.")
522
+
523
+ raw_input_text = input_text
524
+ processed_input, user_include, user_exclude, user_include_nutrition, user_exclude_nutrition = process_long_query(raw_input_text)
525
+
526
+ include_list, exclude_list = [], []
527
+ include_nutrition, exclude_nutrition = [], []
528
+
529
+ disease_recs = get_disease_recommendations(processed_input, disease_df)
530
+
531
+ if disease_recs:
532
+ print("\nDisease-related Recommendations Detected:")
533
+ print(f"Disease: {disease_recs['Disease']}")
534
+ print(f"Best Foods: {disease_recs['Best_Foods']}")
535
+ print(f"Worst Foods: {disease_recs['Worst_Foods']}")
536
+ print(f"Best Nutrition: {disease_recs['Best_Nutrition']}")
537
+ print(f"Worst Nutrition: {disease_recs['Worst_Nutrition']}")
538
+
539
+ include_list.extend(disease_recs["Best_Foods"])
540
+ exclude_list.extend(disease_recs["Worst_Foods"])
541
+
542
+ def parse_nutrition_condition(nutrition_phrase):
543
+ parts = nutrition_phrase.strip().split()
544
+ if len(parts) == 2:
545
+ direction = parts[0].lower()
546
+ nutrient = parts[1].lower()
547
+ mapped_nutrient = nutrition_terms_dictionary.get(nutrient, nutrient)
548
+ return (direction, mapped_nutrient)
549
+ return None
550
+
551
+ for bn in disease_recs["Best_Nutrition"]:
552
+ cond = parse_nutrition_condition(bn)
553
+ if cond:
554
+ include_nutrition.append(cond)
555
+ for wn in disease_recs["Worst_Nutrition"]:
556
+ cond = parse_nutrition_condition(wn)
557
+ if cond:
558
+ exclude_nutrition.append(cond)
559
+
560
+ include_list.extend(user_include)
561
+ exclude_list.extend(user_exclude)
562
+ include_nutrition.extend(user_include_nutrition)
563
+ exclude_nutrition.extend(user_exclude_nutrition)
564
+
565
+ include_list = list(set(include_list))
566
+ exclude_list = list(set(exclude_list))
567
+ include_nutrition = list(set(include_nutrition))
568
+ exclude_nutrition = list(set(exclude_nutrition))
569
+
570
+ print("\nFinal Lists After Combining Disease + User Query:")
571
+ print(f"Ingredients to include: {include_list}")
572
+ print(f"Ingredients to exclude: {exclude_list}")
573
+ print(f"Nutrition conditions to include: {include_nutrition}")
574
+ print(f"Nutrition conditions to exclude: {exclude_nutrition}")
575
+
576
+ corrected_include = [correct_food_ingredient(ingredient, food_dictionary, common_misspellings) for ingredient in include_list]
577
+ corrected_exclude = [correct_food_ingredient(ingredient, food_dictionary, common_misspellings) for ingredient in exclude_list]
578
+
579
+ include_list = list(set(corrected_include))
580
+ exclude_list = list(set(corrected_exclude))
581
+ filtered_df = filter_and_rank_recipes(
582
+ df,
583
+ include_list,
584
+ exclude_list,
585
+ include_nutrition,
586
+ exclude_nutrition
587
+ )
588
+
589
+ final_output = {}
590
+ api_responses = []
591
+
592
+ if not filtered_df.empty:
593
+ filtered_df = filtered_df.sample(frac=1)
594
+ meal_count = min(6, len(filtered_df))
595
+
596
+ for i in range(meal_count):
597
+ if i == 0:
598
+ print("\nRecommended Meal:")
599
+ meal_data = get_recipe_output(filtered_df.iloc[i])
600
+ final_output["Recommended Meal"] = meal_data
601
+ else:
602
+ print(f"\nOption {i}:")
603
+ meal_data = get_recipe_output(filtered_df.iloc[i])
604
+ final_output[f"Option {i}"] = meal_data
605
+
606
+ api_response = send_to_api(meal_data, parent_id)
607
+ api_responses.append(api_response)
608
+ else:
609
+ error_message = f"No recipes found that match your criteria.\nIngredients to include: {', '.join(include_list)}\nIngredients to exclude: {', '.join(exclude_list)}\nNutrition Include: {', '.join(str(cond) for cond in include_nutrition)}\nNutrition Exclude: {', '.join(str(cond) for cond in exclude_nutrition)}."
610
+ print(error_message)
611
+ final_output["Message"] = error_message
612
+ return jsonify({"error": error_message}), 404
613
+
614
+ return jsonify({
615
+ "original_response": final_output,
616
+ "api_responses": api_responses,
617
+ "message": f"Successfully processed {len(api_responses)} meals"
618
+ })
619
+
620
+ except Exception as e:
621
+ print(f"Error processing request: {str(e)}")
622
+ return jsonify({"error": f"Internal server error: {str(e)}"}), 500
623
+
624
+
625
+ if __name__ == '__main__':
626
+ port = int(os.environ.get("PORT", 7860))
627
+ app.run(host="0.0.0.0", port=port, debug=False)