AaronZ345 commited on
Commit
ba10942
·
verified ·
1 Parent(s): 76663ea

Upload 5 files

Browse files
checkpoints/SAD/model_ckpt_steps_80000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27c1940a6d328d69772f8689f3656cea95cdc333f91d2ca03847468f497d9d3f
3
+ size 737037749
checkpoints/SDLM/model_ckpt_steps_120000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:265db88bd7542ea1a628cfa04a97449855a9c29c75b39b8a0ede92ac6bd8b6a3
3
+ size 2650549953
checkpoints/TCSinger/model_ckpt_steps_200000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0afd0cd3592778e3860d0bce3e4cbd8a9c5d02ea23b7bafebcbd49db521d4706
3
+ size 1386681253
checkpoints/hifigan/config.yaml ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ adam_b1: 0.8
3
+ adam_b2: 0.99
4
+ amp: false
5
+ audio_num_mel_bins: 80
6
+ audio_sample_rate: 48000
7
+ aux_context_window: 0
8
+ base_config:
9
+ - egs/egs_bases/singing/hifigan.yaml
10
+ binarization_args:
11
+ reset_phone_dict: true
12
+ reset_word_dict: true
13
+ shuffle: false
14
+ trim_eos_bos: false
15
+ with_align: false
16
+ with_f0: true
17
+ with_f0cwt: false
18
+ with_linear: false
19
+ with_midi: true
20
+ with_spk_embed: false
21
+ with_spk_id: true
22
+ with_txt: false
23
+ with_wav: true
24
+ with_word: false
25
+ binarizer_cls: data_gen.tts.singing.binarize.SingingBinarizer
26
+ binary_data_dir: data/binary/merge-midi-1
27
+ check_val_every_n_epoch: 10
28
+ clip_grad_norm: 1
29
+ clip_grad_value: 0
30
+ datasets:
31
+ - merge#
32
+ debug: false
33
+ dec_ffn_kernel_size: 9
34
+ dec_layers: 4
35
+ dict_dir: ''
36
+ disc_start_steps: 40000
37
+ discriminator_grad_norm: 1
38
+ discriminator_optimizer_params:
39
+ lr: 0.0002
40
+ discriminator_scheduler_params:
41
+ gamma: 0.999
42
+ step_size: 600
43
+ dropout: 0.1
44
+ ds_workers: 1
45
+ enc_ffn_kernel_size: 9
46
+ enc_layers: 4
47
+ endless_ds: true
48
+ eval_max_batches: -1
49
+ ffn_act: gelu
50
+ ffn_padding: SAME
51
+ fft_size: 1024
52
+ fmax: 24000
53
+ fmin: 20
54
+ frames_multiple: 1
55
+ gen_dir_name: ''
56
+ generator_grad_norm: 10
57
+ generator_optimizer_params:
58
+ lr: 0.0002
59
+ generator_scheduler_params:
60
+ gamma: 0.999
61
+ step_size: 600
62
+ griffin_lim_iters: 60
63
+ hidden_size: 256
64
+ hop_size: 256
65
+ infer: false
66
+ lambda_adv: 1.0
67
+ lambda_cdisc: 4.0
68
+ lambda_energy: 0.0
69
+ lambda_f0: 0.0
70
+ lambda_mel: 5.0
71
+ lambda_mel_adv: 1.0
72
+ lambda_ph_dur: 0.0
73
+ lambda_sent_dur: 0.0
74
+ lambda_uv: 0.0
75
+ lambda_word_dur: 0.0
76
+ load_ckpt: ''
77
+ loss_mel_bins: 320
78
+ loud_norm: false
79
+ lr: 2.0
80
+ max_epochs: 1000
81
+ max_frames: 4800
82
+ max_input_tokens: 1550
83
+ max_samples: 8192
84
+ max_sentences: 24
85
+ max_tokens: 20000
86
+ max_updates: 1000000
87
+ max_valid_sentences: 1
88
+ max_valid_tokens: 60000
89
+ mel_loss: ssim:0.5|l1:0.5
90
+ mel_loss_param:
91
+ fft_sizes:
92
+ - 3072
93
+ - 4096
94
+ - 5120
95
+ hop_sizes:
96
+ - 128
97
+ - 128
98
+ - 128
99
+ mel_bin: 320
100
+ win_lengths:
101
+ - 3072
102
+ - 4096
103
+ - 5120
104
+ mel_vmax: 1.5
105
+ mel_vmin: -6
106
+ mfa_version: 2
107
+ min_frames: 0
108
+ min_level_db: -100
109
+ num_ckpt_keep: 3
110
+ num_heads: 2
111
+ num_mels: 80
112
+ num_sanity_val_steps: 10
113
+ num_spk: 100
114
+ num_test_samples: 0
115
+ num_valid_plots: 10
116
+ optimizer_adam_beta1: 0.9
117
+ optimizer_adam_beta2: 0.98
118
+ out_wav_norm: false
119
+ pitch_extractor: parselmouth
120
+ pitch_type: frame
121
+ pre_align_args:
122
+ allow_no_txt: false
123
+ denoise: false
124
+ nsample_per_mfa_group: 1000
125
+ sox_resample: true
126
+ sox_to_wav: false
127
+ trim_sil: false
128
+ txt_processor: zh
129
+ use_tone: false
130
+ pre_align_cls: data_gen.tts.singing.pre_align.SingingPreAlign
131
+ predictor_grad: 0.0
132
+ print_nan_grads: false
133
+ processed_data_dir: ../../data/audio/features/singing/old_data/processed/merge_all_44100
134
+ profile_infer: false
135
+ raw_data_dir: ../../data/audio/wavs/sing/old_data
136
+ ref_level_db: 20
137
+ rename_tmux: true
138
+ resblock: '1'
139
+ resblock_dilation_sizes:
140
+ - - 1
141
+ - 3
142
+ - 5
143
+ - - 1
144
+ - 3
145
+ - 5
146
+ - - 1
147
+ - 3
148
+ - 5
149
+ resblock_kernel_sizes:
150
+ - 3
151
+ - 7
152
+ - 11
153
+ resume_from_checkpoint: 0
154
+ save_best: true
155
+ save_codes: []
156
+ save_f0: true
157
+ save_gt: true
158
+ scheduler: rsqrt
159
+ seed: 1234
160
+ sort_by_len: true
161
+ stft_loss_param:
162
+ fft_sizes:
163
+ - 2048
164
+ - 4096
165
+ - 8192
166
+ hop_sizes:
167
+ - 128
168
+ - 256
169
+ - 256
170
+ win_lengths:
171
+ - 2048
172
+ - 4096
173
+ - 8192
174
+ task_cls: tasks.vocoder.hifigan.HifiGanTask
175
+ tb_log_interval: 100
176
+ test_ids: []
177
+ test_input_dir: ''
178
+ test_num: 0
179
+ test_prefixes: []
180
+ test_set_name: test
181
+ train_set_name: train
182
+ train_sets: ''
183
+ upsample_initial_channel: 512
184
+ upsample_kernel_sizes:
185
+ - 16
186
+ - 8
187
+ - 8
188
+ - 4
189
+ upsample_rates:
190
+ - 8
191
+ - 4
192
+ - 4
193
+ - 2
194
+ use_afl: false
195
+ use_cond_disc: false
196
+ use_different_mel_loss: true
197
+ use_fm_loss: false
198
+ use_gt_dur: true
199
+ use_gt_f0: true
200
+ use_ms_stft: true
201
+ use_pitch_embed: true
202
+ use_ref_enc: true
203
+ use_spec_disc: false
204
+ use_spk_embed: true
205
+ use_spk_id: false
206
+ use_split_spk_id: false
207
+ use_word_input: false
208
+ val_check_interval: 2000
209
+ valid_infer_interval: 10000
210
+ valid_monitor_key: val_loss
211
+ valid_monitor_mode: min
212
+ valid_set_name: valid
213
+ vocoder: pwg
214
+ vocoder_ckpt: ''
215
+ vocoder_denoise_c: 0.0
216
+ warmup_updates: 8000
217
+ weight_decay: 0
218
+ win_length: null
219
+ win_size: 1024
220
+ window: hann
221
+ word_size: 3000
222
+ work_dir: ../../checkpoints/NeuralSeq/1224_0927seg_hifigan_nsf_ccy
checkpoints/hifigan/model_ckpt_steps_1000000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d02734a6559601ace176d41a4edaeae6408142caaf1476ae42a23f52fe82c44d
3
+ size 1013728024