Spaces:

Nirav-Madhani
/

Octo-1.5-Small

Sleeping

App Files Files Community

Nirav-Madhani commited on Mar 8

Commit

f8cb635

verified ·

1 Parent(s): 89208ac

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -35

app.py CHANGED Viewed

@@ -8,25 +8,20 @@ import os
 import io
 import base64
 from typing import List
-from fastapi.openapi.docs import get_swagger_ui_html
-# Set JAX to use CPU platform (adjust if GPU is needed)
 os.environ['JAX_PLATFORMS'] = 'cpu'
-# Load the model once globally
 model = OctoModel.load_pretrained("hf://rail-berkeley/octo-small-1.5")
 # Initialize FastAPI app
-app = FastAPI(
-    title="Octo Model Inference API",
-    docs_url="/"  # Swagger UI at root
-)
-# Define request body model
 class InferenceRequest(BaseModel):
-    image_base64: List[str]  # List of base64-encoded images in time sequence
     task: str = "pick up the fork"  # Default task
-    window_size: int = 2  # Default window size, configurable
 # Health check endpoint
 @app.get("/health")
@@ -37,14 +32,7 @@ async def health_check():
 @app.post("/predict")
 async def predict(request: InferenceRequest, dataset_name: str = "bridge_dataset"):
     try:
-        # Validate input
-        if len(request.image_base64) < request.window_size:
-            raise HTTPException(
-                status_code=400,
-                detail=f"At least {request.window_size} images required for the specified window size"
-            )
-        # Process images
         images = []
         for img_base64 in request.image_base64:
             if img_base64.startswith("data:image"):
@@ -54,11 +42,11 @@ async def predict(request: InferenceRequest, dataset_name: str = "bridge_dataset
             img = np.array(img)
             images.append(img)
-        # Stack all images and add batch dimension
         img_array = np.stack(images)[np.newaxis, ...]  # Shape: (1, T, 256, 256, 3)
         observation = {
             "image_primary": img_array,
-            "timestep_pad_mask": np.full((1, len(images)), True, dtype=bool)  # Shape: (1, T)
         }
         # Create task and predict actions
@@ -69,20 +57,8 @@ async def predict(request: InferenceRequest, dataset_name: str = "bridge_dataset
             unnormalization_statistics=model.dataset_statistics[dataset_name]["action"],
             rng=jax.random.PRNGKey(0)
         )
-        actions = actions[0]  # Remove batch dimension, Shape: (horizon, action_dim)
-        # Convert to list for JSON response
-        actions_list = actions.tolist()
-        return {"actions": actions_list}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
-# Custom Swagger UI route (optional)
-@app.get("/docs", include_in_schema=False)
-async def custom_swagger_ui_html():
-    return get_swagger_ui_html(
-        openapi_url=app.openapi_url,
-        title=app.title + " - Swagger UI",
-        oauth2_redirect_url=app.swagger_ui_oauth2_redirect_url,
-    )

 import io
 import base64
 from typing import List
+# Set JAX to use CPU (adjust to GPU if available)
 os.environ['JAX_PLATFORMS'] = 'cpu'
+# Load Octo 1.5 model globally
 model = OctoModel.load_pretrained("hf://rail-berkeley/octo-small-1.5")
 # Initialize FastAPI app
+app = FastAPI(title="Octo 1.5 Inference API")
+# Request body model
 class InferenceRequest(BaseModel):
+    image_base64: List[str]  # List of base64-encoded images
     task: str = "pick up the fork"  # Default task
 # Health check endpoint
 @app.get("/health")
 @app.post("/predict")
 async def predict(request: InferenceRequest, dataset_name: str = "bridge_dataset"):
     try:
+        # Decode and process images
         images = []
         for img_base64 in request.image_base64:
             if img_base64.startswith("data:image"):
             img = np.array(img)
             images.append(img)
+        # Stack images with batch dimension
         img_array = np.stack(images)[np.newaxis, ...]  # Shape: (1, T, 256, 256, 3)
         observation = {
             "image_primary": img_array,
+            "timestep_pad_mask": np.ones((1, len(images)), dtype=bool)  # Shape: (1, T)
         }
         # Create task and predict actions
             unnormalization_statistics=model.dataset_statistics[dataset_name]["action"],
             rng=jax.random.PRNGKey(0)
         )
+        actions = actions[0]  # Remove batch dimension, Shape: (T, action_dim)
+        return {"actions": actions.tolist()}
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")