{ | |
"model_params": { | |
"decoder": { | |
"resblock_dilation_sizes": [ | |
[ | |
1, | |
3, | |
5 | |
], | |
[ | |
1, | |
3, | |
5 | |
], | |
[ | |
1, | |
3, | |
5 | |
] | |
], | |
"resblock_kernel_sizes": [ | |
3, | |
7, | |
11 | |
], | |
"type": "hifigan", | |
"upsample_initial_channel": 512, | |
"upsample_kernel_sizes": [ | |
20, | |
10, | |
6, | |
4 | |
], | |
"upsample_rates": [ | |
10, | |
5, | |
3, | |
2 | |
] | |
}, | |
"diffusion": { | |
"dist": { | |
"estimate_sigma_data": true, | |
"mean": -3.0, | |
"sigma_data": 0.2, | |
"std": 1.0 | |
}, | |
"embedding_mask_proba": 0.1, | |
"transformer": { | |
"head_features": 64, | |
"multiplier": 2, | |
"num_heads": 8, | |
"num_layers": 3 | |
} | |
}, | |
"dim_in": 64, | |
"dropout": 0.2, | |
"hidden_dim": 512, | |
"max_conv_dim": 512, | |
"max_dur": 50, | |
"multispeaker": false, | |
"n_layer": 3, | |
"n_mels": 80, | |
"n_token": 178, | |
"slm": { | |
"hidden": 768, | |
"initial_channel": 64, | |
"model": "microsoft/wavlm-base-plus", | |
"nlayers": 13, | |
"sr": 16000 | |
}, | |
"style_dim": 128 | |
}, | |
"training_config": { | |
"epochs": 5, | |
"batch_size": 2, | |
"max_len": 620, | |
"optimizer": { | |
"bert_lr": 1e-05, | |
"ft_lr": 0.0001, | |
"lr": 0.0001 | |
}, | |
"loss_params": { | |
"diff_epoch": 1, | |
"joint_epoch": 110, | |
"lambda_F0": 1.0, | |
"lambda_ce": 20.0, | |
"lambda_diff": 1.0, | |
"lambda_dur": 1.0, | |
"lambda_gen": 1.0, | |
"lambda_mel": 5.0, | |
"lambda_mono": 1.0, | |
"lambda_norm": 1.0, | |
"lambda_s2s": 1.0, | |
"lambda_slm": 1.0, | |
"lambda_sty": 1.0 | |
} | |
}, | |
"preprocess_params": { | |
"spect_params": { | |
"hop_length": 300, | |
"n_fft": 2048, | |
"win_length": 1200 | |
}, | |
"sr": 24000 | |
}, | |
"data_params": { | |
"OOD_data": "Data/OOD_texts.txt", | |
"min_length": 50, | |
"root_path": "Data/wavs", | |
"train_data": "Data/train_list.txt", | |
"val_data": "Data/val_list.txt" | |
}, | |
"model_state": { | |
"epoch": 4, | |
"iterations": 2810, | |
"val_loss": 0.48474612832069397 | |
}, | |
"training_metrics": { | |
"train_loss": [], | |
"val_loss": [ | |
56.0, | |
24.0, | |
35.0, | |
43.0, | |
22.0, | |
13.0, | |
21.0, | |
5.0, | |
53.0, | |
45.0, | |
41.0, | |
25.0, | |
3.0, | |
44.0, | |
40.0, | |
18.0, | |
17.0, | |
0.0, | |
9.0, | |
52.0, | |
52.0, | |
7.0, | |
21.0, | |
24.0, | |
21.0, | |
41.0, | |
40.0, | |
0.0, | |
22.0, | |
55.0, | |
35.0, | |
30.0, | |
35.0, | |
5.0, | |
52.0, | |
52.0, | |
0.0, | |
3.0, | |
45.0, | |
31.0, | |
42.0, | |
16.0, | |
5.0, | |
35.0, | |
13.0, | |
51.0, | |
4.0, | |
27.0, | |
44.0, | |
16.0, | |
48.0, | |
11.0, | |
57.0, | |
15.0, | |
27.0, | |
53.0, | |
28.0, | |
57.0, | |
20.0, | |
16.0, | |
1.0, | |
21.0, | |
5.0 | |
], | |
"dur_loss": [ | |
0.502, | |
0.482, | |
0.476, | |
0.466, | |
0.473, | |
0.464, | |
0.464, | |
0.459, | |
0.467, | |
0.47, | |
0.463, | |
0.458, | |
0.498, | |
0.483, | |
0.472, | |
0.473, | |
0.465, | |
0.469, | |
0.459, | |
0.454, | |
0.461, | |
0.458, | |
0.461, | |
0.453, | |
0.457, | |
0.456, | |
0.456, | |
0.455, | |
0.456, | |
0.453, | |
0.452, | |
0.453, | |
0.464, | |
0.468, | |
0.446, | |
0.45, | |
0.449, | |
0.451, | |
0.442, | |
0.438, | |
0.445, | |
0.439, | |
0.524, | |
0.488, | |
0.495, | |
0.486, | |
0.488, | |
0.494, | |
0.484, | |
0.485, | |
0.419, | |
0.417, | |
0.425, | |
0.426, | |
0.429, | |
0.424, | |
0.414, | |
0.426, | |
0.523, | |
0.5, | |
0.5, | |
0.492, | |
0.485 | |
], | |
"F0_loss": [ | |
1.759, | |
1.681, | |
1.706, | |
1.622, | |
1.67, | |
1.749, | |
1.749, | |
1.72, | |
1.733, | |
1.71, | |
1.702, | |
1.661, | |
1.743, | |
1.683, | |
1.642, | |
1.678, | |
1.703, | |
1.679, | |
1.635, | |
1.733, | |
1.648, | |
1.689, | |
1.705, | |
1.693, | |
1.688, | |
1.7, | |
1.716, | |
1.678, | |
1.672, | |
1.696, | |
1.642, | |
1.693, | |
1.536, | |
1.46, | |
1.438, | |
1.463, | |
1.466, | |
1.446, | |
1.453, | |
1.445, | |
1.443, | |
1.441, | |
1.573, | |
1.585, | |
1.659, | |
1.654, | |
1.68, | |
1.614, | |
1.596, | |
1.552, | |
1.095, | |
1.086, | |
1.032, | |
1.022, | |
1.033, | |
1.059, | |
1.036, | |
1.02, | |
2.685, | |
2.684, | |
2.636, | |
2.653, | |
2.774 | |
], | |
"epochs": [ | |
1, | |
2, | |
3, | |
4, | |
5, | |
6, | |
7, | |
8, | |
9, | |
10, | |
11, | |
12, | |
13, | |
14, | |
15, | |
16, | |
17, | |
18, | |
19, | |
20, | |
21, | |
22, | |
23, | |
24, | |
25, | |
26, | |
27, | |
28, | |
29, | |
30, | |
31, | |
32, | |
33, | |
34, | |
35, | |
36, | |
37, | |
38, | |
39, | |
40, | |
41, | |
42, | |
43, | |
44, | |
45, | |
46, | |
47, | |
48, | |
49, | |
50, | |
51, | |
52, | |
53, | |
54, | |
55, | |
56, | |
57, | |
58, | |
59, | |
60, | |
61, | |
62, | |
63 | |
] | |
} | |
} |