Code for testing
import torch
import torchaudio
import numpy as np
from speechbrain.pretrained import EncoderClassifier
from scipy.spatial.distance import cosine
----------------------------
Load the ECAPA-TDNN model from local folder
----------------------------
classifier = EncoderClassifier.from_hparams(
source="./model", # Local path to the downloaded HF model
run_opts={"device": "cuda" if torch.cuda.is_available() else "cpu"}
)
def load_audio(file_path):
"""Load audio and convert to mono if needed."""
signal, sr = torchaudio.load(file_path)
if signal.shape[0] > 1: # Stereo β Mono
signal = torch.mean(signal, dim=0, keepdim=True)
return signal, sr
def get_embedding(file_path):
"""Extract speaker embedding from audio."""
signal, sr = load_audio(file_path)
embedding = classifier.encode_batch(signal)
return embedding.squeeze().detach().cpu().numpy()
def compare_speakers(file1, file2, threshold=0.75):
"""Compare two audios, return True if same speaker."""
emb1 = get_embedding(file1)
emb2 = get_embedding(file2)
similarity = 1 - cosine(emb1, emb2) # Cosine similarity
print(f"Cosine similarity: {similarity:.4f}")
return similarity >= threshold
----------------------------
Example usage
----------------------------
file_a = ""
file_b = ""
if compare_speakers(file_a, file_b):
print("β
Same speaker")
else:
print("β Different speakers")