prithivMLmods
/

Food-101-93M

@@ -16,6 +16,10 @@ tags:
 - biology
 ---
 ```py
 Classification Report:
                          precision    recall  f1-score   support
@@ -125,4 +129,91 @@ grilled_cheese_sandwich     0.8523    0.8773    0.8647       750
                accuracy                         0.8973     75750
               macro avg     0.8987    0.8973    0.8977     75750
            weighted avg     0.8987    0.8973    0.8977     75750
-```

 - biology
 ---
+# **Food-101-93M**
+> **Food-101-93M** is a fine-tuned image classification model built on top of **google/siglip2-base-patch16-224** using the **SiglipForImageClassification** architecture. It is trained to classify food images into one of 101 popular dishes, derived from the [Food-101 dataset](https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/).
 ```py
 Classification Report:
                          precision    recall  f1-score   support
                accuracy                         0.8973     75750
               macro avg     0.8987    0.8973    0.8977     75750
            weighted avg     0.8987    0.8973    0.8977     75750
+```
+The model categorizes images into 101 food classes such as `sushi`, `hamburger`, `waffles`, `pad_thai`, and more.
+---
+# **Run with Transformers 🤗**
+```python
+!pip install -q transformers torch pillow gradio
+```
+```python
+import gradio as gr
+from transformers import AutoImageProcessor, SiglipForImageClassification
+from PIL import Image
+import torch
+# Load model and processor
+model_name = "prithivMLmods/Food-101-93M"
+model = SiglipForImageClassification.from_pretrained(model_name)
+processor = AutoImageProcessor.from_pretrained(model_name)
+# Food-101 labels
+labels = {
+    "0": "apple_pie", "1": "baby_back_ribs", "2": "baklava", "3": "beef_carpaccio", "4": "beef_tartare",
+    "5": "beet_salad", "6": "beignets", "7": "bibimbap", "8": "bread_pudding", "9": "breakfast_burrito",
+    "10": "bruschetta", "11": "caesar_salad", "12": "cannoli", "13": "caprese_salad", "14": "carrot_cake",
+    "15": "ceviche", "16": "cheesecake", "17": "cheese_plate", "18": "chicken_curry", "19": "chicken_quesadilla",
+    "20": "chicken_wings", "21": "chocolate_cake", "22": "chocolate_mousse", "23": "churros", "24": "clam_chowder",
+    "25": "club_sandwich", "26": "crab_cakes", "27": "creme_brulee", "28": "croque_madame", "29": "cup_cakes",
+    "30": "deviled_eggs", "31": "donuts", "32": "dumplings", "33": "edamame", "34": "eggs_benedict",
+    "35": "escargots", "36": "falafel", "37": "filet_mignon", "38": "fish_and_chips", "39": "foie_gras",
+    "40": "french_fries", "41": "french_onion_soup", "42": "french_toast", "43": "fried_calamari", "44": "fried_rice",
+    "45": "frozen_yogurt", "46": "garlic_bread", "47": "gnocchi", "48": "greek_salad", "49": "grilled_cheese_sandwich",
+    "50": "grilled_salmon", "51": "guacamole", "52": "gyoza", "53": "hamburger", "54": "hot_and_sour_soup",
+    "55": "hot_dog", "56": "huevos_rancheros", "57": "hummus", "58": "ice_cream", "59": "lasagna",
+    "60": "lobster_bisque", "61": "lobster_roll_sandwich", "62": "macaroni_and_cheese", "63": "macarons", "64": "miso_soup",
+    "65": "mussels", "66": "nachos", "67": "omelette", "68": "onion_rings", "69": "oysters",
+    "70": "pad_thai", "71": "paella", "72": "pancakes", "73": "panna_cotta", "74": "peking_duck",
+    "75": "pho", "76": "pizza", "77": "pork_chop", "78": "poutine", "79": "prime_rib",
+    "80": "pulled_pork_sandwich", "81": "ramen", "82": "ravioli", "83": "red_velvet_cake", "84": "risotto",
+    "85": "samosa", "86": "sashimi", "87": "scallops", "88": "seaweed_salad", "89": "shrimp_and_grits",
+    "90": "spaghetti_bolognese", "91": "spaghetti_carbonara", "92": "spring_rolls", "93": "steak", "94": "strawberry_shortcake",
+    "95": "sushi", "96": "tacos", "97": "takoyaki", "98": "tiramisu", "99": "tuna_tartare", "100": "waffles"
+}
+def classify_food(image):
+    """Predicts the type of food in the image."""
+    image = Image.fromarray(image).convert("RGB")
+    inputs = processor(images=image, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+        probs = torch.nn.functional.softmax(logits, dim=1).squeeze().tolist()
+    predictions = {labels[str(i)]: round(probs[i], 3) for i in range(len(probs))}
+    # Sort by descending probability
+    predictions = dict(sorted(predictions.items(), key=lambda item: item[1], reverse=True)[:5])
+    return predictions
+# Gradio Interface
+iface = gr.Interface(
+    fn=classify_food,
+    inputs=gr.Image(type="numpy"),
+    outputs=gr.Label(num_top_classes=5, label="Top 5 Prediction Scores"),
+    title="Food-101-93M 🍽️",
+    description="Upload an image of food to classify it into one of 101 dish categories based on the Food-101 dataset."
+)
+# Launch app
+if __name__ == "__main__":
+    iface.launch()
+```
+---
+# **Intended Use:**
+The **Food-101-93M** model is intended for:
+- **Recipe Recommendation Engines:** Automatically tagging food images to suggest recipes.
+- **Food Logging & Calorie Tracking Apps:** Categorizing meals based on photos.
+- **Smart Kitchens:** Assisting food recognition in smart appliances.
+- **Restaurant Menu Digitization:** Auto-classifying dishes for visual menus or ordering systems.
+- **Dataset Labeling:** Enabling automatic annotation of food datasets for training other ML models.