KarthikAI
/

InstantID-i2i

@@ -1,85 +1,44 @@
 import base64
 import io
-import traceback
 from PIL import Image
 import torch
 from diffusers import StableDiffusionImg2ImgPipeline
 # Global pipeline instance
 pipe = None
 class EndpointHandler:
     def __init__(self, model_dir: str):
-        # Determine device based on CUDA availability
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
     def init(self):
-        """
-        Load the InstantID-enhanced Stable Diffusion img2img model once when the endpoint starts.
-        """
         global pipe
         if pipe is None:
             pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
                 "karthikAI/InstantID-i2i",
                 revision="main",
-                torch_dtype=torch.float16,
-                safety_checker=None
-            ).to(self.device)
-            pipe.enable_attention_slicing()
     def inference(self, model_inputs: dict) -> dict:
-        """
-        Run a single img2img inference with detailed error debugging.
-        Expects a JSON payload with:
-          - "inputs": base64-encoded input image
-          - "parameters": {
-                "prompt": str,
-                "strength": float,
-                "guidance_scale": float,
-                "num_inference_steps": int,
-            }
-        Returns on success:
-          - "generated_image_base64": base64-encoded PNG
-        On failure:
-          - "error": error message
-          - "traceback": full Python traceback
-        """
-        try:
-            # 1. Decode incoming image
-            b64_img = model_inputs.get("inputs")
-            if not b64_img:
-                raise ValueError("No image data provided under 'inputs'.")
-            image_bytes = base64.b64decode(b64_img)
-            init_img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-            # 2. Extract parameters
-            params = model_inputs.get("parameters", {})
-            prompt = params.get("prompt", "")
-            strength = float(params.get("strength", 0.75))
-            guidance_scale = float(params.get("guidance_scale", 7.5))
-            num_steps = int(params.get("num_inference_steps", 50))
-            # 3. Run the img2img pipeline
-            result = pipe(
-                prompt=prompt,
-                image=init_img,
-                strength=strength,
-                guidance_scale=guidance_scale,
-                num_inference_steps=num_steps,
-            )
-            out_img = result.images[0]
-            # 4. Encode and return image
-            buffer = io.BytesIO()
-            out_img.save(buffer, format="PNG")
-            generated_b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
-            return {"generated_image_base64": generated_b64}
-        except Exception as e:
-            # Return detailed error info for debugging
-            tb = traceback.format_exc()
-            return {
-                "error": str(e),
-                "traceback": tb
-            }

 import base64
 import io
 from PIL import Image
 import torch
 from diffusers import StableDiffusionImg2ImgPipeline
 # Global pipeline instance
+torch_device = "cuda" if torch.cuda.is_available() else "cpu"
 pipe = None
 class EndpointHandler:
     def __init__(self, model_dir: str):
+        # model_dir is ignored; HF clones your repo here
+        pass
     def init(self):
         global pipe
         if pipe is None:
+            # Load your InstantID img2img model
             pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
                 "karthikAI/InstantID-i2i",
                 revision="main",
+                torch_dtype=torch.float16
+            ).to(torch_device)
     def inference(self, model_inputs: dict) -> dict:
+        # 1) decode base64 image
+        b64 = model_inputs.get("inputs")
+        if b64 is None:
+            return {"error": "No 'inputs' key with base64 image provided."}
+        img = Image.open(io.BytesIO(base64.b64decode(b64))).convert("RGB")
+        # 2) extract prompt
+        prompt = model_inputs.get("parameters", {}).get("prompt", "")
+        # 3) minimal call: prompt + image only
+        out = pipe(prompt=prompt, image=img)
+        result_img = out.images[0]
+        # 4) encode output
+        buf = io.BytesIO()
+        result_img.save(buf, format="PNG")
+        b64_out = base64.b64encode(buf.getvalue()).decode()
+        return {"generated_image_base64": b64_out}