Spaces:

alakxender
/

dhivehi-ocr

Running on Zero

App Files Files Community

dhivehi-ocr / detector.py

alakxender

init

228e8c1 3 months ago

raw

history blame contribute delete

4.81 kB

	import os
	import json
	import subprocess
	from typing import Union, List, Dict, Optional
	from pathlib import Path

	class TextDetector:
	def __init__(self, output_dir: Optional[str] = None):
	"""
	Initialize the text detector.

	Args:
	output_dir: Optional directory to save results. If None, uses default surya_detect output directory.
	"""
	self.output_dir = output_dir

	def process_input(self,
	data_path: Union[str, Path],
	save_images: bool = False,
	page_range: Optional[str] = None) -> Dict:
	"""
	Process input file or directory using surya_detect.

	Args:
	data_path: Path to image, PDF, or directory of images/PDFs
	save_images: Whether to save images of pages and detected text lines
	page_range: Optional page range to process in PDFs (e.g., "0,5-10,20")

	Returns:
	Dictionary containing detection results
	"""
	# Convert to Path object if string
	data_path = Path(data_path)

	# Build surya_detect command
	cmd = ["surya_detect", str(data_path)]

	if save_images:
	cmd.append("--images")

	if self.output_dir:
	cmd.extend(["--output_dir", self.output_dir])

	if page_range:
	cmd.extend(["--page_range", page_range])

	# Run surya_detect
	try:
	subprocess.run(cmd, check=True)
	except subprocess.CalledProcessError as e:
	raise RuntimeError(f"Error running surya_detect: {e}")

	# Read and return results
	return self._read_results(data_path)

	def _read_results(self, data_path: Path) -> Dict:
	"""
	Read and parse the results.json file generated by surya_detect.

	Args:
	data_path: Path to the input file/directory

	Returns:
	Dictionary containing detection results
	"""
	# Determine results file path
	if self.output_dir:
	# surya_detect creates a subdirectory with the input filename
	input_name = data_path.stem
	results_path = Path(self.output_dir) / input_name / "results.json"
	else:
	# Default surya_detect output location
	results_path = data_path.parent / "results.json"

	if not results_path.exists():
	raise FileNotFoundError(f"Results file not found at {results_path}")

	# Read and parse results
	with open(results_path, 'r') as f:
	results = json.load(f)

	return results

	def get_text_regions(self, results: Dict, filename: str) -> List[Dict]:
	"""
	Extract text regions from detection results for a specific file.

	Args:
	results: Detection results dictionary
	filename: Name of the file to get regions for (without extension)

	Returns:
	List of dictionaries containing text regions for each page
	"""
	if filename not in results:
	raise KeyError(f"No results found for file {filename}")

	return results[filename]

	def get_page_regions(self, results: Dict, filename: str, page_num: int) -> Dict:
	"""
	Get text regions for a specific page of a file.

	Args:
	results: Detection results dictionary
	filename: Name of the file (without extension)
	page_num: Page number (0-based)

	Returns:
	Dictionary containing text regions for the specified page
	"""
	regions = self.get_text_regions(results, filename)

	if page_num >= len(regions):
	raise IndexError(f"Page {page_num} not found in results")

	return regions[page_num]

	def get_text_lines(self, page_regions: Dict) -> List[Dict]:
	"""
	Extract text lines from page regions.

	Args:
	page_regions: Dictionary containing page detection results

	Returns:
	List of dictionaries containing text line information
	"""
	return page_regions.get('bboxes', [])

	def get_vertical_lines(self, page_regions: Dict) -> List[Dict]:
	"""
	Extract vertical lines from page regions.

	Args:
	page_regions: Dictionary containing page detection results

	Returns:
	List of dictionaries containing vertical line information
	"""
	return page_regions.get('vertical_lines', [])