Spaces:

kernel-luso-comfort
/

BiomedParse

Sleeping

App Files Files Community

kernel-luso-comfort commited on Dec 31, 2024

Commit

99b73a0

1 Parent(s): 699e2ed

Refactor model initialization and prediction logic; enhance mock prediction to handle modality and targets

Browse files

Files changed (2) hide show

inference_utils/init_predict.py +39 -36
inference_utils/init_predict_mock.py +44 -41

inference_utils/init_predict.py CHANGED Viewed

@@ -10,15 +10,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from PIL import Image
 from huggingface_hub import hf_hub_download
 import matplotlib.pyplot as plt
 import numpy as np
 from inference_utils.inference import interactive_infer_image
-from main import model
-import gradio as gr
 from modeling import build_model
 from modeling.BaseModel import BaseModel
@@ -27,29 +25,40 @@ from utilities.constants import BIOMED_CLASSES
 from utilities.distributed import init_distributed
-def generate_colors(n):
-    cmap = plt.get_cmap("tab10")
-    colors = [tuple(int(255 * val) for val in cmap(i)[:3]) for i in range(n)]
-    return colors
-def overlay_masks(image, masks, colors):
-    overlay = image.copy()
-    overlay = np.array(overlay, dtype=np.uint8)
-    for mask, color in zip(masks, colors):
-        overlay[mask > 0] = (overlay[mask > 0] * 0.4 + np.array(color) * 0.6).astype(
-            np.uint8
         )
-    return Image.fromarray(overlay)
-def predict(image: gr.Image, prompts: str):
     if not prompts:
         return None
-    # Convert string input to list
-    prompts = [p.strip() for p in prompts.split(",")]
     # Convert to RGB if needed
     if image.mode != "RGB":
         image = image.convert("RGB")
@@ -66,23 +75,17 @@ def predict(image: gr.Image, prompts: str):
     return pred_overlay
-def init_model():
-    # Download model
-    model_file = hf_hub_download(
-        repo_id="microsoft/BiomedParse",
-        filename="biomedparse_v1.pt",
-        token=os.getenv("HF_TOKEN"),
-    )
-    # Initialize model
-    conf_files = "configs/biomedparse_inference.yaml"
-    opt = load_opt_from_config_files([conf_files])
-    opt = init_distributed(opt)
-    model = BaseModel(opt, build_model(opt)).from_pretrained(model_file).eval().cuda()
-    with torch.no_grad():
-        model.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(
-            BIOMED_CLASSES + ["background"], is_eval=True
         )
-    return model

 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 from PIL import Image
 from huggingface_hub import hf_hub_download
 import matplotlib.pyplot as plt
 import numpy as np
+import torch
 from inference_utils.inference import interactive_infer_image
 from modeling import build_model
 from modeling.BaseModel import BaseModel
 from utilities.distributed import init_distributed
+class Model:
+    def init(self):
+        self._model = init_model()
+    def predict(self, image: Image, modality_type: str, targets: list[str]) -> Image:
+        return predict(self._model, image, targets)
+def init_model():
+    # Download model
+    model_file = hf_hub_download(
+        repo_id="microsoft/BiomedParse",
+        filename="biomedparse_v1.pt",
+        token=os.getenv("HF_TOKEN"),
+    )
+    # Initialize model
+    conf_files = "configs/biomedparse_inference.yaml"
+    opt = load_opt_from_config_files([conf_files])
+    opt = init_distributed(opt)
+    model = BaseModel(opt, build_model(opt)).from_pretrained(model_file).eval().cuda()
+    with torch.no_grad():
+        model.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(
+            BIOMED_CLASSES + ["background"], is_eval=True
         )
+    return model
+def predict(model, image: Image, prompts: list[str]):
     if not prompts:
         return None
     # Convert to RGB if needed
     if image.mode != "RGB":
         image = image.convert("RGB")
     return pred_overlay
+def generate_colors(n):
+    cmap = plt.get_cmap("tab10")
+    colors = [tuple(int(255 * val) for val in cmap(i)[:3]) for i in range(n)]
+    return colors
+def overlay_masks(image, masks, colors):
+    overlay = image.copy()
+    overlay = np.array(overlay, dtype=np.uint8)
+    for mask, color in zip(masks, colors):
+        overlay[mask > 0] = (overlay[mask > 0] * 0.4 + np.array(color) * 0.6).astype(
+            np.uint8
         )
+    return Image.fromarray(overlay)

inference_utils/init_predict_mock.py CHANGED Viewed

@@ -12,48 +12,51 @@
 from typing import Tuple
-from PIL import Image, ImageDraw, ImageFont
 import gradio as gr
 import random
-def init_model():
-    return None
-def predict(
-    image: Image, modality_type: str, targets: list[str]
-) -> Tuple[gr.Image, str]:
-    # Randomly split targets into found and not found
-    targets_found = random.sample(targets, k=len(targets) // 2)
-    targets_not_found = [t for t in targets if t not in targets_found]
-    # Create a copy of the image to draw on
-    image_with_text = image.copy()
-    draw = ImageDraw.Draw(image_with_text)
-    # Draw found targets on the image with larger font
-    font_size = 36
-    try:
-        font = ImageFont.truetype("DejaVuSans.ttf", font_size)
-    except OSError:
-        # Fallback to default font if DejaVuSans is not available
-        font = ImageFont.load_default()
-    # Calculate starting position from bottom
-    line_height = 50
-    total_height = len(targets_found) * line_height
-    padding = 20
-    # Start from bottom and work upwards
-    y_position = image_with_text.height - total_height - padding
-    for target in targets_found:
-        draw.text((20, y_position), target, fill="red", font=font)
-        y_position += line_height
-    # Format targets_not_found as a string with one target per line
-    targets_not_found_str = (
-        "\n".join(targets_not_found) if targets_not_found else "All targets were found!"
-    )
-    return image_with_text, targets_not_found_str

 from typing import Tuple
+from PIL import ImageDraw, ImageFont
+from PIL.Image import Image
 import gradio as gr
 import random
+class Model:
+    def init(self):
+        pass
+    def predict(
+        image: Image, modality_type: str, targets: list[str]
+    ) -> Tuple[Image, str]:
+        # Randomly split targets into found and not found
+        targets_found = random.sample(targets, k=len(targets) // 2)
+        targets_not_found = [t for t in targets if t not in targets_found]
+        # Create a copy of the image to draw on
+        image_with_text = image.copy()
+        draw = ImageDraw.Draw(image_with_text)
+        # Draw found targets on the image with larger font
+        font_size = 36
+        try:
+            font = ImageFont.truetype("DejaVuSans.ttf", font_size)
+        except OSError:
+            # Fallback to default font if DejaVuSans is not available
+            font = ImageFont.load_default()
+        # Calculate starting position from bottom
+        line_height = 50
+        total_height = len(targets_found) * line_height
+        padding = 20
+        # Start from bottom and work upwards
+        y_position = image_with_text.height - total_height - padding
+        for target in targets_found:
+            draw.text((20, y_position), target, fill="red", font=font)
+            y_position += line_height
+        # Format targets_not_found as a string with one target per line
+        targets_not_found_str = (
+            "\n".join(targets_not_found)
+            if targets_not_found
+            else "All targets were found!"
+        )
+        return image_with_text, targets_not_found_str