import os
import pandas as pd
from PIL import Image
from typing import Dict, Any
from src.utils.path_utils import get_project_root

# Constants
PROJECT_ROOT = get_project_root()
PREPROCESSED_DIR = PROJECT_ROOT / "data/preprocessed"

HEADERS = [
    "id",
    "claim",
    "claim_image",
    "evidence",
    "evidence_image",
    "category",
    "claim_ocr",
    "evidence_ocr",
]


def get_preprocessed_data(dataset: str = "train") -> pd.DataFrame:
    """
    Load the preprocessed data for the specified dataset.

    Args:
        dataset (str): Either 'train' or 'test'. Defaults to 'train'.

    Returns:
        pd.DataFrame: A DataFrame containing the preprocessed data.
    """
    csv_path = PREPROCESSED_DIR / f"{dataset}.csv"

    if not csv_path.exists():
        raise FileNotFoundError(f"Preprocessed dataset CSV not found: {csv_path}")

    return pd.read_csv(csv_path)


def load_images_for_row(row: Dict[str, Any]) -> Dict[str, Any]:
    """
    Load the claim and evidence images for a given row of data.

    Args:
        row (Dict[str, Any]): A dictionary representing a row of preprocessed data.

    Returns:
        Dict[str, Any]: A dictionary containing the original row with loaded images added.
    """
    result = row.copy()  # Copy the original row to avoid modifying the input
    claim_image_path = row.get("claim_image")
    evidence_image_path = row.get("evidence_image")

    if claim_image_path and os.path.exists(claim_image_path):
        try:
            result["claim_image"] = Image.open(claim_image_path).convert("RGB")
        except Exception as e:
            print(f"Failed to load claim image from {claim_image_path}: {e}")
            result["claim_image"] = None
    else:
        result["claim_image"] = None

    if evidence_image_path and os.path.exists(evidence_image_path):
        try:
            result["evidence_image"] = Image.open(evidence_image_path).convert("RGB")
        except Exception as e:
            print(f"Failed to load evidence image from {evidence_image_path}: {e}")
            result["evidence_image"] = None
    else:
        result["evidence_image"] = None

    return result