File size: 4,584 Bytes
0af9841
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import numpy as np
import json
from sklearn.decomposition import PCA
import joblib

# File paths
VOICES_JSON_PATH = "voices.json"
ANNOTATIONS_JSON_PATH = "annotations.json"
PCA_MODEL_PATH = "pca_model.pkl"
VECTOR_DIMENSION = 256  # Adjust based on your actual vector size
N_COMPONENTS = 6  # Number of PCA components for annotated features


def load_json(file_path):
    """Load a JSON file."""
    try:
        with open(file_path, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: {file_path} not found.")
        return {}
    except json.JSONDecodeError:
        print(f"Error: {file_path} is not valid JSON.")
        return {}


def extract_annotated_vectors():
    """
    Load annotations and match annotated features with style vectors.
    Returns:
        np.ndarray: Style vectors (256-dim).
        np.ndarray: Annotated features (n_components-dim).
    """
    # Load data
    voices_data = load_json(VOICES_JSON_PATH)
    annotations = load_json(ANNOTATIONS_JSON_PATH)

    style_vectors = []
    annotated_features = []

    # Extract annotated features and match style vectors
    for item in annotations:
        # Extract the key for the style vector
        audio_path = item.get("audio", "")
        key = audio_path.split("/")[-1].split("-")[-1].replace(".wav", "")

        # Skip if the style vector is missing
        if key not in voices_data:
            print(f"Warning: No style vector found for key '{key}'. Skipping.")
            continue

        # Get the style vector and ensure it's flattened to 1D
        style_vector = np.array(voices_data[key], dtype=np.float32).squeeze()
        if style_vector.ndim != 1:
            print(f"Skipping vector with unexpected dimensions: {style_vector.shape}")
            continue

        # Extract annotated features (pacing, gender, tone, enunciation, style)
        features = [
            item["gender"][0]["rating"],
            item["tone"][0]["rating"],
            item["pacing"][0]["rating"],
            item["enunciation"][0]["rating"],
            item["quality"][0]["rating"],
            item["style"][0]["rating"],
        ]

        # Append data
        style_vectors.append(style_vector)
        annotated_features.append(features)

    if not style_vectors or not annotated_features:
        print("Error: No valid style vectors or annotations found.")
        return None, None

    return np.array(style_vectors), np.array(annotated_features)


def train_and_save_pca_model():
    """
    Train the PCA model using annotated style vectors and save the model.
    """
    # Extract style vectors and annotated features
    style_vectors, annotated_features = extract_annotated_vectors()
    if style_vectors is None or annotated_features is None:
        print("Error: Unable to extract annotated data.")
        return

    # Validate shape of style_vectors
    print(f"Style vectors shape: {style_vectors.shape}")  # Should be (n_samples, 256)
    print(
        f"Annotated features shape: {annotated_features.shape}"
    )  # Should be (n_samples, 5)

    # Train PCA on style vectors
    print(f"Training PCA on {len(style_vectors)} style vectors...")
    pca = PCA(n_components=N_COMPONENTS)
    pca.fit(style_vectors)

    # Save PCA model
    joblib.dump(pca, PCA_MODEL_PATH)
    print(f"PCA model saved to {PCA_MODEL_PATH}.")

    # Optionally save annotated features for downstream tasks
    np.save("annotated_features.npy", annotated_features)
    print("Annotated features saved to 'annotated_features.npy'.")


def load_pca_model():
    """Load the trained PCA model."""
    try:
        return joblib.load(PCA_MODEL_PATH)
    except FileNotFoundError:
        print(f"Error: {PCA_MODEL_PATH} not found.")
        return None


def reduce_to_pca_components(style_vector, pca):
    """
    Reduce a 256-dimensional style vector to PCA space.

    Args:
        style_vector (np.ndarray): Original style vector (256-dim).
        pca (PCA): Trained PCA model.

    Returns:
        np.ndarray: Reduced vector in PCA space (n_components-dim).
    """
    return pca.transform([style_vector])[0]


def reconstruct_from_pca_components(pca_vector, pca):
    """
    Reconstruct the original style vector from PCA space.

    Args:
        pca_vector (np.ndarray): Vector in PCA space (n_components-dim).
        pca (PCA): Trained PCA model.

    Returns:
        np.ndarray: Reconstructed style vector (256-dim).
    """
    return pca.inverse_transform([pca_vector])[0]


if __name__ == "__main__":
    train_and_save_pca_model()