Spaces:
Sleeping
Sleeping
| import cv2 | |
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| from transformers import AutoImageProcessor, AutoModelForDepthEstimation | |
| import streamlit as st | |
| # Load model and image processor | |
| image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf") | |
| model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf") | |
| # Set the device for model (CUDA if available) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model.to(device) | |
| # Use FP16 if available (half precision for speed) | |
| if torch.cuda.is_available(): | |
| model = model.half() | |
| # Streamlit App | |
| st.title("Real-time Depth Estimation from Webcam") | |
| # Initialize the webcam capture (OpenCV) | |
| cap = cv2.VideoCapture(0) | |
| # Streamlit button to capture a screenshot | |
| if st.button("Capture Screenshot"): | |
| ret, frame = cap.read() | |
| if ret: | |
| # Process the frame for depth estimation | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| image = Image.fromarray(frame_rgb) | |
| # Prepare image for the model | |
| inputs = image_processor(images=image, return_tensors="pt").to(device) | |
| # Model inference (no gradients needed) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| predicted_depth = outputs.predicted_depth | |
| # Interpolate depth map to match the frame's dimensions | |
| prediction = torch.nn.functional.interpolate( | |
| predicted_depth.unsqueeze(1), | |
| size=(frame.shape[0], frame.shape[1]), # Match the frame's dimensions | |
| mode="bicubic", | |
| align_corners=False, | |
| ) | |
| # Convert depth map to numpy for visualization | |
| depth_map = prediction.squeeze().cpu().numpy() | |
| # Normalize depth map for display (visualization purposes) | |
| depth_map_normalized = np.uint8(depth_map / np.max(depth_map) * 255) | |
| depth_map_colored = cv2.applyColorMap(depth_map_normalized, cv2.COLORMAP_JET) | |
| # Display the original frame and the depth map in Streamlit | |
| st.image(frame, caption="Original Webcam Image", channels="BGR", use_column_width=True) | |
| st.image(depth_map_colored, caption="Depth Map", channels="BGR", use_column_width=True) | |
| # Release the capture object when done | |
| cap.release() | |