Spaces:

NextDrought
/

ForestAI-TreeExtraction

Sleeping

DynamicPacific commited on Jul 11, 2025

Commit

67cd09b

1 Parent(s): dbdc03c

Improve tree extraction with contour-based detection

- Replace grid-based approach with contour detection
- Use adaptive NDVI thresholding based on image statistics
- Add morphological operations to clean up tree masks
- Implement area-based filtering (0.1% to 10% of image)
- Add adaptive confidence scoring
- Include area and confidence metadata in features
- Add OpenCV dependency for contour processing
- Results in more accurate tree shapes and better area coverage

Files changed (2) hide show

requirements.txt +1 -0
utils/advanced_extraction.py +79 -42

requirements.txt CHANGED Viewed

@@ -10,3 +10,4 @@ fiona>=1.9.0
 matplotlib>=3.7.0
 pandas>=2.0.0
 scipy>=1.11.0

 matplotlib>=3.7.0
 pandas>=2.0.0
 scipy>=1.11.0
+opencv-python>=4.8.0

utils/advanced_extraction.py CHANGED Viewed

@@ -3,24 +3,35 @@ import logging
 import numpy as np
 import rasterio
 from rasterio.warp import transform_bounds
 def extract_features_from_geotiff(image_path, output_folder, feature_type="trees"):
-    """Simple feature extraction for HF Spaces."""
     try:
         logging.info(f"Extracting {feature_type} from {image_path}")
         with rasterio.open(image_path) as src:
-            # Simple NDVI calculation
             if src.count >= 3:
                 red = src.read(1).astype(float)
                 green = src.read(2).astype(float)
                 nir = src.read(4).astype(float) if src.count >= 4 else green
                 ndvi = np.divide(nir - red, nir + red + 1e-10)
-                mask = ndvi > 0.2
             else:
                 band = src.read(1)
-                mask = band > np.percentile(band, 60)
             # Get bounds
             bounds = src.bounds
@@ -32,48 +43,74 @@ def extract_features_from_geotiff(image_path, output_folder, feature_type="trees
             else:
                 west, south, east, north = -74.1, 40.6, -73.9, 40.8
-        # Create simple features
         features = []
         height, width = mask.shape
-        grid_size = max(10, min(height, width) // 50)
-        feature_id = 0
-        for y in range(0, height, grid_size):
-            for x in range(0, width, grid_size):
-                cell = mask[y:y+grid_size, x:x+grid_size]
-                if np.sum(cell) > grid_size * grid_size * 0.3:
-                    x_ratio = x / width
-                    y_ratio = y / height
-                    lon1 = west + x_ratio * (east - west)
-                    lat1 = north - y_ratio * (north - south)
-                    x2_ratio = min((x + grid_size) / width, 1.0)
-                    y2_ratio = min((y + grid_size) / height, 1.0)
-                    lon2 = west + x2_ratio * (east - west)
-                    lat2 = north - y2_ratio * (north - south)
-                    polygon_coords = [
-                        [lon1, lat1], [lon2, lat1], [lon2, lat2], [lon1, lat2], [lon1, lat1]
-                    ]
-                    feature = {
-                        "type": "Feature",
-                        "id": feature_id,
-                        "properties": {
-                            "feature_type": feature_type,
-                            "confidence": 0.8
-                        },
-                        "geometry": {
-                            "type": "Polygon",
-                            "coordinates": [polygon_coords]
-                        }
                     }
-                    features.append(feature)
-                    feature_id += 1
         return {
             "type": "FeatureCollection",

 import numpy as np
 import rasterio
 from rasterio.warp import transform_bounds
+import cv2
+from scipy import ndimage
 def extract_features_from_geotiff(image_path, output_folder, feature_type="trees"):
+    """Improved feature extraction with contour-based tree detection."""
     try:
         logging.info(f"Extracting {feature_type} from {image_path}")
         with rasterio.open(image_path) as src:
+            # Enhanced NDVI calculation
             if src.count >= 3:
                 red = src.read(1).astype(float)
                 green = src.read(2).astype(float)
                 nir = src.read(4).astype(float) if src.count >= 4 else green
+                # Improved NDVI calculation
                 ndvi = np.divide(nir - red, nir + red + 1e-10)
+                # Adaptive thresholding based on image statistics
+                ndvi_mean = np.mean(ndvi)
+                ndvi_std = np.std(ndvi)
+                threshold = max(0.3, ndvi_mean + 0.5 * ndvi_std)  # More adaptive threshold
+                mask = ndvi > threshold
             else:
                 band = src.read(1)
+                # Adaptive threshold for single-band images
+                threshold = np.percentile(band, 70)  # Increased from 60
+                mask = band > threshold
             # Get bounds
             bounds = src.bounds
             else:
                 west, south, east, north = -74.1, 40.6, -73.9, 40.8
+        # Convert to uint8 for OpenCV processing
+        mask_uint8 = (mask * 255).astype(np.uint8)
+        # Morphological operations to clean up the mask
+        kernel = np.ones((3, 3), np.uint8)
+        mask_cleaned = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, kernel)
+        mask_cleaned = cv2.morphologyEx(mask_cleaned, cv2.MORPH_OPEN, kernel)
+        # Find contours instead of using grid
+        contours, _ = cv2.findContours(mask_cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        # Filter contours by area and create features
         features = []
         height, width = mask.shape
+        min_area = (height * width) * 0.001  # Minimum 0.1% of image area
+        max_area = (height * width) * 0.1    # Maximum 10% of image area
+        for i, contour in enumerate(contours):
+            area = cv2.contourArea(contour)
+            # Filter by area
+            if area < min_area or area > max_area:
+                continue
+            # Simplify contour for better polygon shape
+            epsilon = 0.02 * cv2.arcLength(contour, True)
+            approx = cv2.approxPolyDP(contour, epsilon, True)
+            # Convert contour points to geographic coordinates
+            polygon_coords = []
+            for point in approx:
+                x, y = point[0]
+                # Convert pixel coordinates to geographic coordinates
+                x_ratio = x / width
+                y_ratio = y / height
+                lon = west + x_ratio * (east - west)
+                lat = north - y_ratio * (north - south)
+                polygon_coords.append([lon, lat])
+            # Close the polygon
+            if len(polygon_coords) > 2:
+                polygon_coords.append(polygon_coords[0])
+                # Calculate confidence based on area and shape
+                area_ratio = area / (height * width)
+                confidence = min(0.95, 0.5 + area_ratio * 10)  # Adaptive confidence
+                feature = {
+                    "type": "Feature",
+                    "id": i,
+                    "properties": {
+                        "feature_type": feature_type,
+                        "confidence": round(confidence, 2),
+                        "area_pixels": int(area),
+                        "area_ratio": round(area_ratio, 4)
+                    },
+                    "geometry": {
+                        "type": "Polygon",
+                        "coordinates": [polygon_coords]
                     }
+                }
+                features.append(feature)
+        logging.info(f"Extracted {len(features)} tree features")
         return {
             "type": "FeatureCollection",