Spaces:

Yehor
/

radtts-uk-bigvgan

Running

App Files Files Community

radtts-uk-bigvgan / configs /radtts-pp-dap-model.json

Yehor

Init

ea6a7ed 4 months ago

raw

history blame contribute delete

7.16 kB

	{
	"train_config": {
	"output_directory": "outdir_pp_model",
	"epochs": 10000000,
	"optim_algo": "RAdam",
	"learning_rate": 0.001,
	"weight_decay": 1e-06,
	"sigma": 1.0,
	"iters_per_checkpoint": 1000,
	"batch_size": 16,
	"seed": null,
	"checkpoint_path": "",
	"ignore_layers": [],
	"ignore_layers_warmstart": [],
	"finetune_layers": [],
	"include_layers": [],
	"vocoder_config_path": "models/hifigan_22khz_config.json",
	"vocoder_checkpoint_path": "models/hifigan_ljs_generator_v1.pt",
	"log_attribute_samples": true,
	"log_decoder_samples": true,
	"warmstart_checkpoint_path": "outdir_pp/model_100000",
	"use_amp": true,
	"grad_clip_val": 1.0,
	"loss_weights": {
	"blank_logprob": -1,
	"ctc_loss_weight": 0.1,
	"binarization_loss_weight": 1.0,
	"dur_loss_weight": 1.0,
	"f0_loss_weight": 1.0,
	"energy_loss_weight": 1.0,
	"vpred_loss_weight": 1.0
	},
	"binarization_start_iter": 0,
	"kl_loss_start_iter": 0,
	"unfreeze_modules": "all"
	},
	"data_config": {
	"training_files": {
	"LJS": {
	"basedir": "filelists/",
	"audiodir": "wavs",
	"filelist": "3speakers_ukrainian_train_filelist_dc.txt",
	"lmdbpath": ""
	}
	},
	"validation_files": {
	"LJS": {
	"basedir": "filelists/",
	"audiodir": "wavs",
	"filelist": "3speakers_ukrainian_val_filelist_dc.txt",
	"lmdbpath": ""
	}
	},
	"dur_min": 0.1,
	"dur_max": 10.2,
	"sampling_rate": 22050,
	"filter_length": 1024,
	"hop_length": 256,
	"win_length": 1024,
	"n_mel_channels": 80,
	"mel_fmin": 0.0,
	"mel_fmax": 8000.0,
	"f0_min": 80.0,
	"f0_max": 640.0,
	"max_wav_value": 32768.0,
	"use_f0": true,
	"use_log_f0": 0,
	"use_energy_avg": true,
	"use_scaled_energy": true,
	"symbol_set": "ukrainian",
	"cleaner_names": [
	"ukrainian_cleaners"
	],
	"heteronyms_path": "tts_text_processing/heteronyms",
	"phoneme_dict_path": "tts_text_processing/cmudict-0.7b",
	"p_phoneme": 0.0,
	"handle_phoneme": "word",
	"handle_phoneme_ambiguous": "ignore",
	"include_speakers": null,
	"n_frames": -1,
	"betabinom_cache_path": "/home/dmytro_chaplinsky/RAD-TTS/radtts-code/cache",
	"lmdb_cache_path": "",
	"use_attn_prior_masking": true,
	"prepend_space_to_text": true,
	"append_space_to_text": true,
	"add_bos_eos_to_text": false,
	"betabinom_scaling_factor": 1.0,
	"distance_tx_unvoiced": false,
	"mel_noise_scale": 0.0
	},
	"dist_config": {
	"dist_backend": "nccl",
	"dist_url": "tcp://localhost:54321"
	},
	"model_config": {
	"n_speakers": 3,
	"n_speaker_dim": 16,
	"n_text": 185,
	"n_text_dim": 512,
	"n_flows": 8,
	"n_conv_layers_per_step": 4,
	"n_mel_channels": 80,
	"n_hidden": 1024,
	"mel_encoder_n_hidden": 512,
	"dummy_speaker_embedding": false,
	"n_early_size": 2,
	"n_early_every": 2,
	"n_group_size": 2,
	"affine_model": "wavenet",
	"include_modules": "decatndpmvpredapm",
	"scaling_fn": "tanh",
	"matrix_decomposition": "LUS",
	"learn_alignments": true,
	"use_speaker_emb_for_alignment": false,
	"attn_straight_through_estimator": true,
	"use_context_lstm": true,
	"context_lstm_norm": "spectral",
	"context_lstm_w_f0_and_energy": true,
	"text_encoder_lstm_norm": "spectral",
	"n_f0_dims": 1,
	"n_energy_avg_dims": 1,
	"use_first_order_features": false,
	"unvoiced_bias_activation": "relu",
	"decoder_use_partial_padding": true,
	"decoder_use_unvoiced_bias": true,
	"ap_pred_log_f0": true,
	"ap_use_unvoiced_bias": false,
	"ap_use_voiced_embeddings": true,
	"dur_model_config": {
	"name": "dap",
	"hparams": {
	"n_speaker_dim": 16,
	"bottleneck_hparams": {
	"in_dim": 512,
	"reduction_factor": 16,
	"norm": "weightnorm",
	"non_linearity": "relu"
	},
	"take_log_of_input": true,
	"arch_hparams": {
	"out_dim": 1,
	"n_layers": 2,
	"n_channels": 256,
	"kernel_size": 3,
	"p_dropout": 0.25,
	"in_dim": 48
	}
	}
	},
	"f0_model_config": {
	"name": "dap",
	"hparams": {
	"n_speaker_dim": 16,
	"bottleneck_hparams": {
	"in_dim": 512,
	"reduction_factor": 16,
	"norm": "weightnorm",
	"non_linearity": "relu"
	},
	"take_log_of_input": false,
	"use_transformer": false,
	"arch_hparams": {
	"out_dim": 1,
	"n_layers": 2,
	"n_channels": 256,
	"kernel_size": 11,
	"p_dropout": 0.5,
	"in_dim": 48
	}
	}
	},
	"energy_model_config": {
	"name": "dap",
	"hparams": {
	"n_speaker_dim": 16,
	"bottleneck_hparams": {
	"in_dim": 512,
	"reduction_factor": 16,
	"norm": "weightnorm",
	"non_linearity": "relu"
	},
	"take_log_of_input": false,
	"use_transformer": false,
	"arch_hparams": {
	"out_dim": 1,
	"n_layers": 2,
	"n_channels": 256,
	"kernel_size": 3,
	"p_dropout": 0.25,
	"in_dim": 48
	}
	}
	},
	"v_model_config": {
	"name": "dap",
	"hparams": {
	"n_speaker_dim": 16,
	"take_log_of_input": false,
	"bottleneck_hparams": {
	"in_dim": 512,
	"reduction_factor": 16,
	"norm": "weightnorm",
	"non_linearity": "relu"
	},
	"arch_hparams": {
	"out_dim": 1,
	"n_layers": 2,
	"n_channels": 256,
	"kernel_size": 3,
	"p_dropout": 0.5,
	"lstm_type": "",
	"use_linear": 1,
	"in_dim": 48
	}
	}
	}
	}
	}