Spaces:

Wismut
/

StyleTTS2_Studio

Running

File size: 4,584 Bytes

0af9841

import numpy as np
import json
from sklearn.decomposition import PCA
import joblib

# File paths
VOICES_JSON_PATH = "voices.json"
ANNOTATIONS_JSON_PATH = "annotations.json"
PCA_MODEL_PATH = "pca_model.pkl"
VECTOR_DIMENSION = 256  # Adjust based on your actual vector size
N_COMPONENTS = 6  # Number of PCA components for annotated features


def load_json(file_path):
    """Load a JSON file."""
    try:
        with open(file_path, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: {file_path} not found.")
        return {}
    except json.JSONDecodeError:
        print(f"Error: {file_path} is not valid JSON.")
        return {}


def extract_annotated_vectors():
    """
    Load annotations and match annotated features with style vectors.
    Returns:
        np.ndarray: Style vectors (256-dim).
        np.ndarray: Annotated features (n_components-dim).
    """
    # Load data
    voices_data = load_json(VOICES_JSON_PATH)
    annotations = load_json(ANNOTATIONS_JSON_PATH)

    style_vectors = []
    annotated_features = []

    # Extract annotated features and match style vectors
    for item in annotations:
        # Extract the key for the style vector
        audio_path = item.get("audio", "")
        key = audio_path.split("/")[-1].split("-")[-1].replace(".wav", "")

        # Skip if the style vector is missing
        if key not in voices_data:
            print(f"Warning: No style vector found for key '{key}'. Skipping.")
            continue

        # Get the style vector and ensure it's flattened to 1D
        style_vector = np.array(voices_data[key], dtype=np.float32).squeeze()
        if style_vector.ndim != 1:
            print(f"Skipping vector with unexpected dimensions: {style_vector.shape}")
            continue

        # Extract annotated features (pacing, gender, tone, enunciation, style)
        features = [
            item["gender"][0]["rating"],
            item["tone"][0]["rating"],
            item["pacing"][0]["rating"],
            item["enunciation"][0]["rating"],
            item["quality"][0]["rating"],
            item["style"][0]["rating"],
        ]

        # Append data
        style_vectors.append(style_vector)
        annotated_features.append(features)

    if not style_vectors or not annotated_features:
        print("Error: No valid style vectors or annotations found.")
        return None, None

    return np.array(style_vectors), np.array(annotated_features)


def train_and_save_pca_model():
    """
    Train the PCA model using annotated style vectors and save the model.
    """
    # Extract style vectors and annotated features
    style_vectors, annotated_features = extract_annotated_vectors()
    if style_vectors is None or annotated_features is None:
        print("Error: Unable to extract annotated data.")
        return

    # Validate shape of style_vectors
    print(f"Style vectors shape: {style_vectors.shape}")  # Should be (n_samples, 256)
    print(
        f"Annotated features shape: {annotated_features.shape}"
    )  # Should be (n_samples, 5)

    # Train PCA on style vectors
    print(f"Training PCA on {len(style_vectors)} style vectors...")
    pca = PCA(n_components=N_COMPONENTS)
    pca.fit(style_vectors)

    # Save PCA model
    joblib.dump(pca, PCA_MODEL_PATH)
    print(f"PCA model saved to {PCA_MODEL_PATH}.")

    # Optionally save annotated features for downstream tasks
    np.save("annotated_features.npy", annotated_features)
    print("Annotated features saved to 'annotated_features.npy'.")


def load_pca_model():
    """Load the trained PCA model."""
    try:
        return joblib.load(PCA_MODEL_PATH)
    except FileNotFoundError:
        print(f"Error: {PCA_MODEL_PATH} not found.")
        return None


def reduce_to_pca_components(style_vector, pca):
    """
    Reduce a 256-dimensional style vector to PCA space.

    Args:
        style_vector (np.ndarray): Original style vector (256-dim).
        pca (PCA): Trained PCA model.

    Returns:
        np.ndarray: Reduced vector in PCA space (n_components-dim).
    """
    return pca.transform([style_vector])[0]


def reconstruct_from_pca_components(pca_vector, pca):
    """
    Reconstruct the original style vector from PCA space.

    Args:
        pca_vector (np.ndarray): Vector in PCA space (n_components-dim).
        pca (PCA): Trained PCA model.

    Returns:
        np.ndarray: Reconstructed style vector (256-dim).
    """
    return pca.inverse_transform([pca_vector])[0]


if __name__ == "__main__":
    train_and_save_pca_model()