data: sampling_rate: 44100 block_size: 512 duration: 2 encoder: vec768l12 cnhubertsoft_gate: 10 encoder_sample_rate: 16000 encoder_hop_size: 320 encoder_out_channels: 768 training_files: filelists/train.txt validation_files: filelists/val.txt extensions: - wav unit_interpolate_mode: nearest model: type: Diffusion n_layers: 20 n_chans: 512 n_hidden: 256 use_pitch_aug: true timesteps: 1000 k_step_max: 0 n_spk: 1 device: cuda vocoder: type: nsf-hifigan ckpt: pretrain/nsf_hifigan/model infer: speedup: 10 method: dpm-solver++ env: expdir: logs/44k/diffusion gpu_id: 0 train: num_workers: 4 amp_dtype: fp32 batch_size: 48 cache_all_data: true cache_device: cpu cache_fp16: true epochs: 100000 interval_log: 10 interval_val: 2000 interval_force_save: 5000 lr: 0.0001 decay_step: 100000 gamma: 0.5 weight_decay: 0 save_opt: false spk: nanami: 0