Code for testing

#23

by DeepNeuralWork - opened Aug 12

Aug 12

import torch
import torchaudio
import numpy as np
from speechbrain.pretrained import EncoderClassifier
from scipy.spatial.distance import cosine

----------------------------

Load the ECAPA-TDNN model from local folder

----------------------------

classifier = EncoderClassifier.from_hparams(
source="./model", # Local path to the downloaded HF model
run_opts={"device": "cuda" if torch.cuda.is_available() else "cpu"}
)

def load_audio(file_path):
"""Load audio and convert to mono if needed."""
signal, sr = torchaudio.load(file_path)
if signal.shape[0] > 1: # Stereo → Mono
signal = torch.mean(signal, dim=0, keepdim=True)
return signal, sr

def get_embedding(file_path):
"""Extract speaker embedding from audio."""
signal, sr = load_audio(file_path)
embedding = classifier.encode_batch(signal)
return embedding.squeeze().detach().cpu().numpy()

def compare_speakers(file1, file2, threshold=0.75):
"""Compare two audios, return True if same speaker."""
emb1 = get_embedding(file1)
emb2 = get_embedding(file2)

similarity = 1 - cosine(emb1, emb2)  # Cosine similarity
print(f"Cosine similarity: {similarity:.4f}")

return similarity >= threshold

----------------------------

Example usage

----------------------------

file_a = ""
file_b = ""

if compare_speakers(file_a, file_b):
print("✅ Same speaker")
else:
print("❌ Different speakers")

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment