dhivehi-ocr / detector.py
alakxender's picture
init
228e8c1
import os
import json
import subprocess
from typing import Union, List, Dict, Optional
from pathlib import Path
class TextDetector:
def __init__(self, output_dir: Optional[str] = None):
"""
Initialize the text detector.
Args:
output_dir: Optional directory to save results. If None, uses default surya_detect output directory.
"""
self.output_dir = output_dir
def process_input(self,
data_path: Union[str, Path],
save_images: bool = False,
page_range: Optional[str] = None) -> Dict:
"""
Process input file or directory using surya_detect.
Args:
data_path: Path to image, PDF, or directory of images/PDFs
save_images: Whether to save images of pages and detected text lines
page_range: Optional page range to process in PDFs (e.g., "0,5-10,20")
Returns:
Dictionary containing detection results
"""
# Convert to Path object if string
data_path = Path(data_path)
# Build surya_detect command
cmd = ["surya_detect", str(data_path)]
if save_images:
cmd.append("--images")
if self.output_dir:
cmd.extend(["--output_dir", self.output_dir])
if page_range:
cmd.extend(["--page_range", page_range])
# Run surya_detect
try:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Error running surya_detect: {e}")
# Read and return results
return self._read_results(data_path)
def _read_results(self, data_path: Path) -> Dict:
"""
Read and parse the results.json file generated by surya_detect.
Args:
data_path: Path to the input file/directory
Returns:
Dictionary containing detection results
"""
# Determine results file path
if self.output_dir:
# surya_detect creates a subdirectory with the input filename
input_name = data_path.stem
results_path = Path(self.output_dir) / input_name / "results.json"
else:
# Default surya_detect output location
results_path = data_path.parent / "results.json"
if not results_path.exists():
raise FileNotFoundError(f"Results file not found at {results_path}")
# Read and parse results
with open(results_path, 'r') as f:
results = json.load(f)
return results
def get_text_regions(self, results: Dict, filename: str) -> List[Dict]:
"""
Extract text regions from detection results for a specific file.
Args:
results: Detection results dictionary
filename: Name of the file to get regions for (without extension)
Returns:
List of dictionaries containing text regions for each page
"""
if filename not in results:
raise KeyError(f"No results found for file {filename}")
return results[filename]
def get_page_regions(self, results: Dict, filename: str, page_num: int) -> Dict:
"""
Get text regions for a specific page of a file.
Args:
results: Detection results dictionary
filename: Name of the file (without extension)
page_num: Page number (0-based)
Returns:
Dictionary containing text regions for the specified page
"""
regions = self.get_text_regions(results, filename)
if page_num >= len(regions):
raise IndexError(f"Page {page_num} not found in results")
return regions[page_num]
def get_text_lines(self, page_regions: Dict) -> List[Dict]:
"""
Extract text lines from page regions.
Args:
page_regions: Dictionary containing page detection results
Returns:
List of dictionaries containing text line information
"""
return page_regions.get('bboxes', [])
def get_vertical_lines(self, page_regions: Dict) -> List[Dict]:
"""
Extract vertical lines from page regions.
Args:
page_regions: Dictionary containing page detection results
Returns:
List of dictionaries containing vertical line information
"""
return page_regions.get('vertical_lines', [])