import os
import asyncio
import subprocess
from pathlib import Path

from loguru import logger

from yourbench_space.leaderboard_space.env import INIT_MODELS


ON_SPACES = os.environ.get("system") == "spaces"
OUTPUT_DIR = "/data" if ON_SPACES else "."  # TODO: fix the space folder


def create_eval_file(eval_ds_name: str):
    task_name = eval_ds_name.replace("/", "_")
    template_path = Path("/home/user/app/yourbench_space/lighteval_task/yourbench_task.py")
    subprocess.run(["lighteval", "tasks", "create", str(template_path), task_name, eval_ds_name])


async def run_process(args: list, custom_env=None) -> dict:
    process = await asyncio.create_subprocess_exec(
        *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=custom_env
    )
    try:
        await asyncio.wait_for(process.wait(), timeout=350)
    except TimeoutError:
        logger.error("Lighteval process Timed Out")

    stdout = await process.stdout.read()
    stderr = await process.stderr.read()
    return {"pid": process.pid, "stdout": stdout.decode(), "stderr": stderr.decode()}


async def run_evaluations(eval_ds_name: str, org: str, custom_env=None) -> list:
    task_name = eval_ds_name.replace("/", "_")
    tasks = []
    for model_name, provider in INIT_MODELS:
        args = [
            "lighteval",
            "endpoint",
            "inference-providers",
            f"model={model_name},provider={provider}",
            f"custom|{task_name}|0|0",
            "--custom-tasks",
            f"custom_{task_name}_task.py",
            "--max-samples",
            "30",
            "--output-dir",
            f"{OUTPUT_DIR}",
            "--save-details",
            "--results-org",
            org,
            "--push-to-hub",
        ]
        tasks.append(run_process(args, custom_env))
    # Will capture the task if failed
    processes = await asyncio.gather(*tasks, return_exceptions=True)
    for process in processes:
        logger.info("Logs for process:")
        logger.info(process["stdout"])
        logger.info(process["stderr"])

    if all(not isinstance(result, Exception) for result in processes):
        return "✅"
    return "At least one model failed"