nonoJDWAOIDAWKDA's picture
Upload StyleTTS2 checkpoint epoch_2nd_00004.pth with all inference components
2b22005 verified
{
"model_params": {
"decoder": {
"resblock_dilation_sizes": [
[
1,
3,
5
],
[
1,
3,
5
],
[
1,
3,
5
]
],
"resblock_kernel_sizes": [
3,
7,
11
],
"type": "hifigan",
"upsample_initial_channel": 512,
"upsample_kernel_sizes": [
20,
10,
6,
4
],
"upsample_rates": [
10,
5,
3,
2
]
},
"diffusion": {
"dist": {
"estimate_sigma_data": true,
"mean": -3.0,
"sigma_data": 0.2,
"std": 1.0
},
"embedding_mask_proba": 0.1,
"transformer": {
"head_features": 64,
"multiplier": 2,
"num_heads": 8,
"num_layers": 3
}
},
"dim_in": 64,
"dropout": 0.2,
"hidden_dim": 512,
"max_conv_dim": 512,
"max_dur": 50,
"multispeaker": false,
"n_layer": 3,
"n_mels": 80,
"n_token": 178,
"slm": {
"hidden": 768,
"initial_channel": 64,
"model": "microsoft/wavlm-base-plus",
"nlayers": 13,
"sr": 16000
},
"style_dim": 128
},
"training_config": {
"epochs": 5,
"batch_size": 2,
"max_len": 620,
"optimizer": {
"bert_lr": 1e-05,
"ft_lr": 0.0001,
"lr": 0.0001
},
"loss_params": {
"diff_epoch": 1,
"joint_epoch": 110,
"lambda_F0": 1.0,
"lambda_ce": 20.0,
"lambda_diff": 1.0,
"lambda_dur": 1.0,
"lambda_gen": 1.0,
"lambda_mel": 5.0,
"lambda_mono": 1.0,
"lambda_norm": 1.0,
"lambda_s2s": 1.0,
"lambda_slm": 1.0,
"lambda_sty": 1.0
}
},
"preprocess_params": {
"spect_params": {
"hop_length": 300,
"n_fft": 2048,
"win_length": 1200
},
"sr": 24000
},
"data_params": {
"OOD_data": "Data/OOD_texts.txt",
"min_length": 50,
"root_path": "Data/wavs",
"train_data": "Data/train_list.txt",
"val_data": "Data/val_list.txt"
},
"model_state": {
"epoch": 4,
"iterations": 2810,
"val_loss": 0.48474612832069397
},
"training_metrics": {
"train_loss": [],
"val_loss": [
56.0,
24.0,
35.0,
43.0,
22.0,
13.0,
21.0,
5.0,
53.0,
45.0,
41.0,
25.0,
3.0,
44.0,
40.0,
18.0,
17.0,
0.0,
9.0,
52.0,
52.0,
7.0,
21.0,
24.0,
21.0,
41.0,
40.0,
0.0,
22.0,
55.0,
35.0,
30.0,
35.0,
5.0,
52.0,
52.0,
0.0,
3.0,
45.0,
31.0,
42.0,
16.0,
5.0,
35.0,
13.0,
51.0,
4.0,
27.0,
44.0,
16.0,
48.0,
11.0,
57.0,
15.0,
27.0,
53.0,
28.0,
57.0,
20.0,
16.0,
1.0,
21.0,
5.0
],
"dur_loss": [
0.502,
0.482,
0.476,
0.466,
0.473,
0.464,
0.464,
0.459,
0.467,
0.47,
0.463,
0.458,
0.498,
0.483,
0.472,
0.473,
0.465,
0.469,
0.459,
0.454,
0.461,
0.458,
0.461,
0.453,
0.457,
0.456,
0.456,
0.455,
0.456,
0.453,
0.452,
0.453,
0.464,
0.468,
0.446,
0.45,
0.449,
0.451,
0.442,
0.438,
0.445,
0.439,
0.524,
0.488,
0.495,
0.486,
0.488,
0.494,
0.484,
0.485,
0.419,
0.417,
0.425,
0.426,
0.429,
0.424,
0.414,
0.426,
0.523,
0.5,
0.5,
0.492,
0.485
],
"F0_loss": [
1.759,
1.681,
1.706,
1.622,
1.67,
1.749,
1.749,
1.72,
1.733,
1.71,
1.702,
1.661,
1.743,
1.683,
1.642,
1.678,
1.703,
1.679,
1.635,
1.733,
1.648,
1.689,
1.705,
1.693,
1.688,
1.7,
1.716,
1.678,
1.672,
1.696,
1.642,
1.693,
1.536,
1.46,
1.438,
1.463,
1.466,
1.446,
1.453,
1.445,
1.443,
1.441,
1.573,
1.585,
1.659,
1.654,
1.68,
1.614,
1.596,
1.552,
1.095,
1.086,
1.032,
1.022,
1.033,
1.059,
1.036,
1.02,
2.685,
2.684,
2.636,
2.653,
2.774
],
"epochs": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63
]
}
}