Spaces:
Running
Running
File size: 4,584 Bytes
0af9841 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import numpy as np
import json
from sklearn.decomposition import PCA
import joblib
# File paths
VOICES_JSON_PATH = "voices.json"
ANNOTATIONS_JSON_PATH = "annotations.json"
PCA_MODEL_PATH = "pca_model.pkl"
VECTOR_DIMENSION = 256 # Adjust based on your actual vector size
N_COMPONENTS = 6 # Number of PCA components for annotated features
def load_json(file_path):
"""Load a JSON file."""
try:
with open(file_path, "r") as f:
return json.load(f)
except FileNotFoundError:
print(f"Error: {file_path} not found.")
return {}
except json.JSONDecodeError:
print(f"Error: {file_path} is not valid JSON.")
return {}
def extract_annotated_vectors():
"""
Load annotations and match annotated features with style vectors.
Returns:
np.ndarray: Style vectors (256-dim).
np.ndarray: Annotated features (n_components-dim).
"""
# Load data
voices_data = load_json(VOICES_JSON_PATH)
annotations = load_json(ANNOTATIONS_JSON_PATH)
style_vectors = []
annotated_features = []
# Extract annotated features and match style vectors
for item in annotations:
# Extract the key for the style vector
audio_path = item.get("audio", "")
key = audio_path.split("/")[-1].split("-")[-1].replace(".wav", "")
# Skip if the style vector is missing
if key not in voices_data:
print(f"Warning: No style vector found for key '{key}'. Skipping.")
continue
# Get the style vector and ensure it's flattened to 1D
style_vector = np.array(voices_data[key], dtype=np.float32).squeeze()
if style_vector.ndim != 1:
print(f"Skipping vector with unexpected dimensions: {style_vector.shape}")
continue
# Extract annotated features (pacing, gender, tone, enunciation, style)
features = [
item["gender"][0]["rating"],
item["tone"][0]["rating"],
item["pacing"][0]["rating"],
item["enunciation"][0]["rating"],
item["quality"][0]["rating"],
item["style"][0]["rating"],
]
# Append data
style_vectors.append(style_vector)
annotated_features.append(features)
if not style_vectors or not annotated_features:
print("Error: No valid style vectors or annotations found.")
return None, None
return np.array(style_vectors), np.array(annotated_features)
def train_and_save_pca_model():
"""
Train the PCA model using annotated style vectors and save the model.
"""
# Extract style vectors and annotated features
style_vectors, annotated_features = extract_annotated_vectors()
if style_vectors is None or annotated_features is None:
print("Error: Unable to extract annotated data.")
return
# Validate shape of style_vectors
print(f"Style vectors shape: {style_vectors.shape}") # Should be (n_samples, 256)
print(
f"Annotated features shape: {annotated_features.shape}"
) # Should be (n_samples, 5)
# Train PCA on style vectors
print(f"Training PCA on {len(style_vectors)} style vectors...")
pca = PCA(n_components=N_COMPONENTS)
pca.fit(style_vectors)
# Save PCA model
joblib.dump(pca, PCA_MODEL_PATH)
print(f"PCA model saved to {PCA_MODEL_PATH}.")
# Optionally save annotated features for downstream tasks
np.save("annotated_features.npy", annotated_features)
print("Annotated features saved to 'annotated_features.npy'.")
def load_pca_model():
"""Load the trained PCA model."""
try:
return joblib.load(PCA_MODEL_PATH)
except FileNotFoundError:
print(f"Error: {PCA_MODEL_PATH} not found.")
return None
def reduce_to_pca_components(style_vector, pca):
"""
Reduce a 256-dimensional style vector to PCA space.
Args:
style_vector (np.ndarray): Original style vector (256-dim).
pca (PCA): Trained PCA model.
Returns:
np.ndarray: Reduced vector in PCA space (n_components-dim).
"""
return pca.transform([style_vector])[0]
def reconstruct_from_pca_components(pca_vector, pca):
"""
Reconstruct the original style vector from PCA space.
Args:
pca_vector (np.ndarray): Vector in PCA space (n_components-dim).
pca (PCA): Trained PCA model.
Returns:
np.ndarray: Reconstructed style vector (256-dim).
"""
return pca.inverse_transform([pca_vector])[0]
if __name__ == "__main__":
train_and_save_pca_model()
|