nmcuong commited on
Commit
b3f704b
·
verified ·
1 Parent(s): 6f84009

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ samples/sample.wav filter=lfs diff=lfs merge=lfs -text
pretrain/DUR_463000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6737cfc2802232cbcf0372ffa184eda6925e2516ec9b873e4c31329b1eeca078
3
+ size 6892002
pretrain/D_463000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d93b2c2ce313a7f1286ffdee845223fc9d30847581ee694b5a20324db7428422
3
+ size 561099642
pretrain/G_463000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf4af007bda8aa94e862d881b6f8c8506c44c00018c7f24a6ebbe8445b2a146c
3
+ size 623777934
pretrain/config.json ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 52,
6
+ "epochs": 10000,
7
+ "learning_rate": 0.0003,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 12,
14
+ "fp16_run": false,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 16384,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0,
21
+ "skip_optimizer": true,
22
+ "device": "cuda:0"
23
+ },
24
+ "data": {
25
+ "training_files": "/storage2/melotts/dataset/infore/train.list",
26
+ "validation_files": "/storage2/melotts/dataset/infore/val.list",
27
+ "max_wav_value": 32768.0,
28
+ "sampling_rate": 44100,
29
+ "filter_length": 2048,
30
+ "hop_length": 512,
31
+ "win_length": 2048,
32
+ "n_mel_channels": 128,
33
+ "mel_fmin": 0.0,
34
+ "mel_fmax": null,
35
+ "add_blank": true,
36
+ "n_speakers": 256,
37
+ "cleaned_text": true,
38
+ "spk2id": {
39
+ "VI-default": 0
40
+ }
41
+ },
42
+ "model": {
43
+ "use_spk_conditioned_encoder": true,
44
+ "use_noise_scaled_mas": true,
45
+ "use_mel_posterior_encoder": false,
46
+ "use_duration_discriminator": true,
47
+ "inter_channels": 192,
48
+ "hidden_channels": 192,
49
+ "filter_channels": 768,
50
+ "n_heads": 2,
51
+ "n_layers": 6,
52
+ "n_layers_trans_flow": 3,
53
+ "kernel_size": 3,
54
+ "p_dropout": 0.1,
55
+ "resblock": "1",
56
+ "resblock_kernel_sizes": [
57
+ 3,
58
+ 7,
59
+ 11
60
+ ],
61
+ "resblock_dilation_sizes": [
62
+ [
63
+ 1,
64
+ 3,
65
+ 5
66
+ ],
67
+ [
68
+ 1,
69
+ 3,
70
+ 5
71
+ ],
72
+ [
73
+ 1,
74
+ 3,
75
+ 5
76
+ ]
77
+ ],
78
+ "upsample_rates": [
79
+ 8,
80
+ 8,
81
+ 2,
82
+ 2,
83
+ 2
84
+ ],
85
+ "upsample_initial_channel": 512,
86
+ "upsample_kernel_sizes": [
87
+ 16,
88
+ 16,
89
+ 8,
90
+ 2,
91
+ 2
92
+ ],
93
+ "n_layers_q": 3,
94
+ "use_spectral_norm": false,
95
+ "gin_channels": 256
96
+ },
97
+ "device": "cuda:0",
98
+ "num_languages": 10,
99
+ "num_tones": 24,
100
+ "symbols": [
101
+ "_",
102
+ "\"",
103
+ "(",
104
+ ")",
105
+ "*",
106
+ "/",
107
+ ":",
108
+ "AA",
109
+ "E",
110
+ "EE",
111
+ "En",
112
+ "N",
113
+ "OO",
114
+ "Q",
115
+ "V",
116
+ "[",
117
+ "\\",
118
+ "]",
119
+ "^",
120
+ "a",
121
+ "a:",
122
+ "aa",
123
+ "ae",
124
+ "ah",
125
+ "ai",
126
+ "an",
127
+ "ang",
128
+ "ao",
129
+ "aw",
130
+ "ay",
131
+ "b",
132
+ "by",
133
+ "c",
134
+ "ch",
135
+ "d",
136
+ "dh",
137
+ "dy",
138
+ "e",
139
+ "e:",
140
+ "eh",
141
+ "ei",
142
+ "en",
143
+ "eng",
144
+ "er",
145
+ "ey",
146
+ "f",
147
+ "g",
148
+ "gy",
149
+ "h",
150
+ "hh",
151
+ "hy",
152
+ "i",
153
+ "i0",
154
+ "i:",
155
+ "ia",
156
+ "ian",
157
+ "iang",
158
+ "iao",
159
+ "ie",
160
+ "ih",
161
+ "in",
162
+ "ing",
163
+ "iong",
164
+ "ir",
165
+ "iu",
166
+ "iy",
167
+ "j",
168
+ "jh",
169
+ "k",
170
+ "ky",
171
+ "l",
172
+ "m",
173
+ "my",
174
+ "n",
175
+ "ng",
176
+ "ny",
177
+ "o",
178
+ "o:",
179
+ "ong",
180
+ "ou",
181
+ "ow",
182
+ "oy",
183
+ "p",
184
+ "py",
185
+ "q",
186
+ "r",
187
+ "ry",
188
+ "s",
189
+ "sh",
190
+ "t",
191
+ "th",
192
+ "ts",
193
+ "ty",
194
+ "u",
195
+ "u:",
196
+ "ua",
197
+ "uai",
198
+ "uan",
199
+ "uang",
200
+ "uh",
201
+ "ui",
202
+ "un",
203
+ "uo",
204
+ "uw",
205
+ "v",
206
+ "van",
207
+ "ve",
208
+ "vn",
209
+ "w",
210
+ "x",
211
+ "y",
212
+ "z",
213
+ "zh",
214
+ "zy",
215
+ "~",
216
+ "¡",
217
+ "¿",
218
+ "æ",
219
+ "ç",
220
+ "ð",
221
+ "ø",
222
+ "ŋ",
223
+ "œ",
224
+ "ɐ",
225
+ "ɑ",
226
+ "ɒ",
227
+ "ɔ",
228
+ "ɕ",
229
+ "ə",
230
+ "ɛ",
231
+ "ɜ",
232
+ "ɡ",
233
+ "ɣ",
234
+ "ɥ",
235
+ "ɦ",
236
+ "ɪ",
237
+ "ɫ",
238
+ "ɬ",
239
+ "ɭ",
240
+ "ɯ",
241
+ "ɲ",
242
+ "ɵ",
243
+ "ɸ",
244
+ "ɹ",
245
+ "ɾ",
246
+ "ʁ",
247
+ "ʃ",
248
+ "ʊ",
249
+ "ʌ",
250
+ "ʎ",
251
+ "ʏ",
252
+ "ʑ",
253
+ "ʒ",
254
+ "ʝ",
255
+ "ʲ",
256
+ "ˈ",
257
+ "ˌ",
258
+ "ː",
259
+ "̃",
260
+ "̩",
261
+ "β",
262
+ "θ",
263
+ "ᄀ",
264
+ "k͡p",
265
+ "ˈa",
266
+ "ɤ",
267
+ "ˈɛ",
268
+ "ˈi",
269
+ "ă",
270
+ "ˈe",
271
+ "ʔ",
272
+ "ŋ͡m",
273
+ "tʰ",
274
+ "ɤ̆",
275
+ "ᄌ",
276
+ "ᄍ",
277
+ "ᄎ",
278
+ "ᄏ",
279
+ "ᄐ",
280
+ "ᄑ",
281
+ "ᄒ",
282
+ "ᅡ",
283
+ "ᅢ",
284
+ "ᅣ",
285
+ "ᅤ",
286
+ "ᅥ",
287
+ "ᅦ",
288
+ "ᅧ",
289
+ "ᅨ",
290
+ "ᅩ",
291
+ "ᅪ",
292
+ "ᅫ",
293
+ "ᅬ",
294
+ "ᅭ",
295
+ "ᅮ",
296
+ "ᅯ",
297
+ "ᅰ",
298
+ "ᅱ",
299
+ "ᅲ",
300
+ "ᅳ",
301
+ "ᅴ",
302
+ "ᅵ",
303
+ "ᆨ",
304
+ "ᆫ",
305
+ "ᆮ",
306
+ "ᆯ",
307
+ "ᆷ",
308
+ "ᆸ",
309
+ "ᆼ",
310
+ "ㄸ",
311
+ "!",
312
+ "?",
313
+ "…",
314
+ ",",
315
+ ".",
316
+ "'",
317
+ "-",
318
+ "SP",
319
+ "UNK"
320
+ ]
321
+ }
samples/sample.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9c5aa7e4736909cb08f5afb058d0da1a847e2d5ec9b72220639d02b874b4a23
3
+ size 7978560