Spaces:

origami-digital
/

sidewalk-sam

Sleeping

App Files Files Community

David Vaillant commited on May 5, 2024

Commit

a073fdd

1 Parent(s): 5b4a37c

Basic func.

Browse files

Files changed (3) hide show

baby_shiny.py +102 -0
backend.py +72 -0
checkpoints/bbox_finetune.ckpt +3 -0

baby_shiny.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from shiny import App, Inputs, Outputs, Session, reactive, render, ui
+from shiny.types import FileInfo, ImgData
+import asyncio
+import concurrent.futures
+import backend
+import numpy as np
+from PIL import Image, ImageDraw
+from pathlib import Path
+import tempfile
+def draw_layer_on_image(im: Image) -> Image:
+    """Draws something on top of an image."""
+    # Attempting to use thresholds.
+    threshold: int = 1
+    output_im = np.array(im)
+    # return Image.fromarray(output_im)
+    # The image drawing code.
+    draw = ImageDraw.Draw(im)
+    draw.line((0, 0) + im.size, fill=128, width=5)
+    draw.line((0, im.size[1], im.size[0], 0), fill=128)
+    return im
+# UI:
+# TITLE ELEMENT, centered
+# input, centered.
+# table in middle. Upload, displays image on the left.
+# arrow in the middle, mask on the right.
+card_height = '700px'
+app_ui = ui.page_fixed(
+    ui.input_file("file1", "Upload a sidewalk.", accept=[".jpg", ".png", ".jpeg"], multiple=False),
+    ui.layout_columns(
+        ui.card(
+            ui.card_header("Uploaded Image"),
+            ui.output_image("show_image"),
+            height=card_height
+        ),
+        ui.card(
+            ui.card_header("Image Mask"),
+            # ui.input_task_button("mask_btn", "Process mask"),
+            ui.output_image("samwalk"),
+            height=card_height
+        ),
+    )
+)
+def strip_alpha(image: Image) -> Image:
+    # Create a white background
+    background = Image.new('RGBA', image.size, (255, 255, 255, 255))
+    composite = Image.alpha_composite(background, image)
+    rgb_image = composite.convert('RGB')
+    return rgb_image
+def server(input: Inputs, output: Outputs, session: Session):
+    uploaded_img = None
+    @reactive.calc
+    def parsed_file():
+        file: list[FileInfo] | None = input.file1()
+        if file is None:
+            return
+        return file[0]
+    @render.image
+    def show_image():
+        uploaded_img = parsed_file()
+        if uploaded_img is None:
+            return
+        uploaded_src = uploaded_img['datapath']
+        img: ImgData = {"src": str(uploaded_src), "width": "500px"}
+        return img
+    # @reactive.event(input.mask_btn)
+    @render.image
+    def samwalk():
+        uploaded_file = parsed_file()
+        if uploaded_file is None:
+            return
+        uploaded_src = uploaded_file['datapath']
+        uploaded_img = Image.open(uploaded_src)
+        if uploaded_img.mode == 'RGBA':
+            uploaded_img = strip_alpha(uploaded_img)
+        dirpath = tempfile.mkdtemp()
+        # output_img = async_process_image(uploaded_img)
+        # while output_img is None:
+        #     pass
+        # output_img = output_img.result()
+        #     # return {"src": str("waiting.gif"), "width": "500px"}
+        output_img = backend.process_image(uploaded_img)
+        output_path = dirpath / Path(uploaded_src)
+        output_img.save(output_path)
+        return {"src": str(output_path), "width": "500px"}
+app = App(app_ui, server)

backend.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# backend.py
+import numpy as np
+from PIL import Image, ImageDraw
+import torch
+from transformers import SamModel, SamProcessor
+from torchvision.transforms import v2
+from samgeo.text_sam import LangSAM
+import os
+import logging
+preproc = v2.Compose([
+    v2.PILToTensor(),
+    v2.ToDtype(torch.float32, scale=True),  # to float32 in [0, 1]
+])
+# Load the necessary models.
+device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+CHECKPOINT_FILE = os.getenv("SAM_FINETUNE_CHECKPOINT", "checkpoints/bbox_finetune.pth")
+processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+tuned_model = SamModel.from_pretrained("facebook/sam-vit-large").to(device)
+tuned_model.load_state_dict(torch.load(CHECKPOINT_FILE,
+                                       map_location=device))
+langsam_model = LangSAM("vit_l")
+def process_image(image: Image, bbox: list[int, int, int, int] = None) -> Image:
+    logging.info("Logging image information.")
+    if bbox is None:
+        # No bbox information. Use default (filters out zeroes)
+        logging.debug("Using default, null bounding box.")
+        bbox = list(map(float, image.getbbox()))  # List of floats.
+    inputs = processor(preproc(image), input_boxes=[[bbox]],
+                       do_rescale=False, return_tensors="pt")
+    inputs = {k: v.to(device) for k, v in inputs.items()}  # Map objects to our device.
+    mask = get_sidewalk_mask(tuned_model, inputs)
+    # Get tree masks.
+    # Union 'em??
+    return mask
+def get_sidewalk_mask(model, inputs) -> Image:
+    logging.info("Calculating mask.")
+    model.eval()
+    with torch.no_grad():
+        outputs = model(**inputs, multimask_output=False)
+    ## apply sigmoid
+    mask_probabilities = torch.sigmoid(outputs.pred_masks.squeeze(1))
+    ## Convert to numpy for the rest of our stuff.
+    mask_probabilities = mask_probabilities.cpu().numpy().squeeze()
+    ## Filter out smaller probs.
+    mask_probabilities[mask_probabilities < 0.5] = 0
+    ## Map probabilities to color intensity linearly.
+    mask_probabilities *= 255
+    greyscale_img = Image.fromarray(mask_probabilities).convert('L')
+    return greyscale_img
+def get_tree_masks(image: Image):
+    langsam_model.predict(image, "tree", box_threshold=0.24, text_threshold=0.24)
+# masks, boxes, phrases, logits = tuned_model.predict(image_pil, bbox)
+# tree_data = langsam_model.predict(image_pil, text_prompt)
+# def draw_layer_on_image(model, im: Image, text_prompt: str='sidewalk') -> Image:

checkpoints/bbox_finetune.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c72e371f7cd4644c9d9550649db4a5473ad63c21472b9d0973670d0dff1ff69
+size 1249561500