vocab_size: 6001 embedding_dim: 1024 in_channels: 1024 out_channels: 1 resblock_type: "1" resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]] resblock_kernel_sizes: [3, 7, 11] upsample_kernel_sizes: [11, 8, 8, 4, 4] upsample_initial_channel: 512 upsample_factors: [5, 4, 4, 2, 2] inference_padding: 5 cond_channels: 0 conv_post_bias: True generator: !new:speechbrain.lobes.models.HifiGAN.UnitHifiganGenerator in_channels: !ref <in_channels> out_channels: !ref <out_channels> resblock_type: !ref <resblock_type> resblock_dilation_sizes: !ref <resblock_dilation_sizes> resblock_kernel_sizes: !ref <resblock_kernel_sizes> upsample_kernel_sizes: !ref <upsample_kernel_sizes> upsample_initial_channel: !ref <upsample_initial_channel> upsample_factors: !ref <upsample_factors> inference_padding: !ref <inference_padding> cond_channels: !ref <cond_channels> conv_post_bias: !ref <conv_post_bias> vocab_size: !ref <vocab_size> embedding_dim: !ref <embedding_dim> duration_predictor: False multi_speaker: False modules: generator: !ref <generator> pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: generator: !ref <generator>