import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score

import pandas as pd
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm
from silero_vad import  read_audio, get_speech_timestamps
from functools import partial

from pathlib import Path
str(Path().resolve() / "silero-vad/src/silero-vad")
from silero_vad import utils_vad
# from utils_vad import init_jit_model, OnnxWrapper
import torch
torch.set_num_threads(1)

def load_silero_vad(onnx=False, model_file_path=None):
    if onnx:
        model = utils_vad.OnnxWrapper(model_file_path, force_onnx_cpu=True)
    else:
        model = utils_vad.init_jit_model(model_file_path)
    
    return model

def init_worker(model_file_path):
    """Initialize the model inside each worker process"""
    global model
    model = load_silero_vad(onnx=False, model_file_path=model_file_path)


def get_vad(file, threshold):
    if pd.isna(file):
        return None
    wav = read_audio(file)
    speech_timestamps = get_speech_timestamps(
        wav,
        model,
        return_seconds=True,
        threshold=threshold
    )
    return speech_timestamps

def process_vad_parallel(df, threshold, column_name, model_file_path):
    results = []
    with ProcessPoolExecutor(max_workers=8, initializer=partial(init_worker, model_file_path) ) as executor:
        futures = {executor.submit(get_vad, file, threshold): i for i, file in enumerate(df["audio_path"])}
        for future in tqdm(futures, total=len(df), desc=f"Processing {column_name}"):
            results.append(future.result())  # Collect results
    df[column_name] = results
    return df


def create_frame_labels(segments, duration, frame_size=0.01):
    frames = np.zeros(int(duration / frame_size))
    for seg in segments:
        start_idx = int(seg['start'] / frame_size)
        end_idx = int(seg['end'] / frame_size)
        frames[start_idx:end_idx] = 1
    return frames

def compute_auc_roc(df, actual_col, predicted_col, frame_size=0.01):
    max_time = max(
        max(seg['end'] for row in df[actual_col] for seg in row),
        max(seg['end'] for row in df[predicted_col] for seg in row)
    )
    
    gt_labels = create_frame_labels([seg for row in df[actual_col] for seg in row], max_time, frame_size)
    pred_labels = create_frame_labels([seg for row in df[predicted_col] for seg in row], max_time, frame_size)
    
    auc_roc = roc_auc_score(gt_labels, pred_labels)
    return auc_roc


df = pd.read_feather("./val.feather")
model_file_path = "/home/sourabh/Desktop/dev/hum-vad/HumAware-VAD/humaware_vad.jit"
df = process_vad_parallel(df, 0.5, "unhum_vad_output_0.5", model_file_path=model_file_path)
df = process_vad_parallel(df, 0.9, "unhum_vad_output_0.9", model_file_path=model_file_path)

model_file_path = "/home/sourabh/Desktop/dev/hum-vad/.venv/lib/python3.12/site-packages/silero_vad/data/silero_vad.jit"
df = process_vad_parallel(df, 0.5, "silero_vad_output_0.5", model_file_path=model_file_path)
df = process_vad_parallel(df, 0.9, "silero_vad_output_0.9", model_file_path=model_file_path)


auc_roc_score = compute_auc_roc(df, "speech_ts", "unhum_vad_output_0.5")
print(f"AUC-ROC Score: {auc_roc_score:.4f}")

auc_roc_score = compute_auc_roc(df, "speech_ts", "unhum_vad_output_0.9")
print(f"AUC-ROC Score unhum_vad_output_0.9: {auc_roc_score:.4f}")

auc_roc_score = compute_auc_roc(df, "speech_ts", "silero_vad_output_0.5")
print(f"AUC-ROC Score silero_vad_output_0.5: {auc_roc_score:.4f}")

auc_roc_score = compute_auc_roc(df, "speech_ts", "silero_vad_output_0.9")
print(f"AUC-ROC Score silero_vad_output_0.9: {auc_roc_score:.4f}")