Code for testing

#23
by DeepNeuralWork - opened

import torch
import torchaudio
import numpy as np
from speechbrain.pretrained import EncoderClassifier
from scipy.spatial.distance import cosine

----------------------------

Load the ECAPA-TDNN model from local folder

----------------------------

classifier = EncoderClassifier.from_hparams(
source="./model", # Local path to the downloaded HF model
run_opts={"device": "cuda" if torch.cuda.is_available() else "cpu"}
)

def load_audio(file_path):
"""Load audio and convert to mono if needed."""
signal, sr = torchaudio.load(file_path)
if signal.shape[0] > 1: # Stereo β†’ Mono
signal = torch.mean(signal, dim=0, keepdim=True)
return signal, sr

def get_embedding(file_path):
"""Extract speaker embedding from audio."""
signal, sr = load_audio(file_path)
embedding = classifier.encode_batch(signal)
return embedding.squeeze().detach().cpu().numpy()

def compare_speakers(file1, file2, threshold=0.75):
"""Compare two audios, return True if same speaker."""
emb1 = get_embedding(file1)
emb2 = get_embedding(file2)

similarity = 1 - cosine(emb1, emb2)  # Cosine similarity
print(f"Cosine similarity: {similarity:.4f}")

return similarity >= threshold

----------------------------

Example usage

----------------------------

file_a = ""
file_b = ""

if compare_speakers(file_a, file_b):
print("βœ… Same speaker")
else:
print("❌ Different speakers")

Sign up or log in to comment