Vineel Pratap
commited on
Commit
·
9b10038
1
Parent(s):
471aa6e
update2
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- models/abi/G_100000.pth +3 -0
- models/abi/config.json +87 -0
- models/abi/vocab.txt +53 -0
- models/abp/G_100000.pth +3 -0
- models/abp/config.json +87 -0
- models/abp/vocab.txt +33 -0
- models/aca/G_100000.pth +3 -0
- models/aca/config.json +87 -0
- models/aca/vocab.txt +35 -0
- models/acd/G_100000.pth +3 -0
- models/acd/config.json +87 -0
- models/acd/vocab.txt +28 -0
- models/ace/G_100000.pth +3 -0
- models/ace/config.json +87 -0
- models/ace/vocab.txt +42 -0
- models/acf/G_100000.pth +3 -0
- models/acf/config.json +87 -0
- models/acf/vocab.txt +33 -0
- models/ach/G_100000.pth +3 -0
- models/ach/config.json +87 -0
- models/ach/vocab.txt +28 -0
- models/acn/G_100000.pth +3 -0
- models/acn/config.json +87 -0
- models/acn/vocab.txt +37 -0
- models/acr/G_100000.pth +3 -0
- models/acr/config.json +87 -0
- models/acr/vocab.txt +37 -0
- models/acu/G_100000.pth +3 -0
- models/acu/config.json +87 -0
- models/acu/vocab.txt +35 -0
- models/ade/G_100000.pth +3 -0
- models/ade/config.json +87 -0
- models/ade/vocab.txt +40 -0
- models/adh/G_100000.pth +3 -0
- models/adh/config.json +87 -0
- models/adh/vocab.txt +29 -0
- models/adj/G_100000.pth +3 -0
- models/adj/config.json +87 -0
- models/adj/vocab.txt +46 -0
- models/adx/G_100000.pth +3 -0
- models/adx/config.json +87 -0
- models/adx/vocab.txt +59 -0
- models/aeu/G_100000.pth +3 -0
- models/aeu/config.json +87 -0
- models/aeu/vocab.txt +25 -0
- models/agd/G_100000.pth +3 -0
- models/agd/config.json +87 -0
- models/agd/vocab.txt +30 -0
- models/agg/G_100000.pth +3 -0
- models/agg/config.json +87 -0
models/abi/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10d49b59265a0a5f5d899dde745c21bc0a492f68f3769521d238b23a446f93b2
|
| 3 |
+
size 145496039
|
models/abi/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/abi/vocab.txt
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ɔ
|
| 2 |
+
ê
|
| 3 |
+
ǒ
|
| 4 |
+
|
| 5 |
+
̂
|
| 6 |
+
h
|
| 7 |
+
ě
|
| 8 |
+
i
|
| 9 |
+
ɩ
|
| 10 |
+
k
|
| 11 |
+
á
|
| 12 |
+
̌
|
| 13 |
+
ǐ
|
| 14 |
+
b
|
| 15 |
+
p
|
| 16 |
+
í
|
| 17 |
+
ǔ
|
| 18 |
+
u
|
| 19 |
+
ń
|
| 20 |
+
w
|
| 21 |
+
'
|
| 22 |
+
ί
|
| 23 |
+
f
|
| 24 |
+
ó
|
| 25 |
+
y
|
| 26 |
+
s
|
| 27 |
+
î
|
| 28 |
+
m
|
| 29 |
+
ɛ
|
| 30 |
+
έ
|
| 31 |
+
e
|
| 32 |
+
ʋ
|
| 33 |
+
ḿ
|
| 34 |
+
n
|
| 35 |
+
ú
|
| 36 |
+
o
|
| 37 |
+
d
|
| 38 |
+
â
|
| 39 |
+
ô
|
| 40 |
+
c
|
| 41 |
+
ǎ
|
| 42 |
+
é
|
| 43 |
+
́
|
| 44 |
+
j
|
| 45 |
+
l
|
| 46 |
+
-
|
| 47 |
+
t
|
| 48 |
+
_
|
| 49 |
+
r
|
| 50 |
+
g
|
| 51 |
+
ε
|
| 52 |
+
û
|
| 53 |
+
a
|
models/abp/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a87c726778281f406b80e0383b53e80f0a39fd75f3dba8c50ab17df25b97c76
|
| 3 |
+
size 145480667
|
models/abp/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/abp/vocab.txt
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_
|
| 2 |
+
t
|
| 3 |
+
e
|
| 4 |
+
b
|
| 5 |
+
ō
|
| 6 |
+
j
|
| 7 |
+
c
|
| 8 |
+
r
|
| 9 |
+
f
|
| 10 |
+
w
|
| 11 |
+
i
|
| 12 |
+
q
|
| 13 |
+
h
|
| 14 |
+
g
|
| 15 |
+
l
|
| 16 |
+
m
|
| 17 |
+
k
|
| 18 |
+
y
|
| 19 |
+
d
|
| 20 |
+
ā
|
| 21 |
+
s
|
| 22 |
+
'
|
| 23 |
+
a
|
| 24 |
+
n
|
| 25 |
+
x
|
| 26 |
+
6
|
| 27 |
+
o
|
| 28 |
+
-
|
| 29 |
+
p
|
| 30 |
+
u
|
| 31 |
+
|
| 32 |
+
v
|
| 33 |
+
z
|
models/aca/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b6e02c38c06f326322663ec0e415aaaee53427226be1df1b3fa0ba91ebd6134
|
| 3 |
+
size 145482239
|
models/aca/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/aca/vocab.txt
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
a
|
| 2 |
+
|
|
| 3 |
+
i
|
| 4 |
+
n
|
| 5 |
+
á
|
| 6 |
+
c
|
| 7 |
+
e
|
| 8 |
+
u
|
| 9 |
+
l
|
| 10 |
+
r
|
| 11 |
+
w
|
| 12 |
+
j
|
| 13 |
+
s
|
| 14 |
+
í
|
| 15 |
+
m
|
| 16 |
+
é
|
| 17 |
+
o
|
| 18 |
+
'
|
| 19 |
+
h
|
| 20 |
+
t
|
| 21 |
+
y
|
| 22 |
+
b
|
| 23 |
+
d
|
| 24 |
+
ú
|
| 25 |
+
q
|
| 26 |
+
ó
|
| 27 |
+
p
|
| 28 |
+
—
|
| 29 |
+
g
|
| 30 |
+
f
|
| 31 |
+
z
|
| 32 |
+
v
|
| 33 |
+
x
|
| 34 |
+
ñ
|
| 35 |
+
|
models/acd/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1ffad2d099b4fbfe8b434922abb825e898b105276f5e18bdc794513400f9203
|
| 3 |
+
size 145476854
|
models/acd/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/acd/vocab.txt
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
|
| 2 |
+
a
|
| 3 |
+
ɛ
|
| 4 |
+
n
|
| 5 |
+
ɔ
|
| 6 |
+
i
|
| 7 |
+
o
|
| 8 |
+
m
|
| 9 |
+
y
|
| 10 |
+
e
|
| 11 |
+
u
|
| 12 |
+
g
|
| 13 |
+
s
|
| 14 |
+
k
|
| 15 |
+
b
|
| 16 |
+
r
|
| 17 |
+
l
|
| 18 |
+
d
|
| 19 |
+
w
|
| 20 |
+
f
|
| 21 |
+
-
|
| 22 |
+
t
|
| 23 |
+
p
|
| 24 |
+
'
|
| 25 |
+
ŋ
|
| 26 |
+
h
|
| 27 |
+
c
|
| 28 |
+
|
models/ace/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7b520bb31ef5f6fe27b350d1cdfafc5e0e7241fc2235c0e0ffd0d2232184892
|
| 3 |
+
size 145487623
|
models/ace/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/ace/vocab.txt
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
|
| 2 |
+
a
|
| 3 |
+
n
|
| 4 |
+
e
|
| 5 |
+
u
|
| 6 |
+
g
|
| 7 |
+
t
|
| 8 |
+
h
|
| 9 |
+
i
|
| 10 |
+
k
|
| 11 |
+
m
|
| 12 |
+
b
|
| 13 |
+
o
|
| 14 |
+
y
|
| 15 |
+
l
|
| 16 |
+
r
|
| 17 |
+
s
|
| 18 |
+
p
|
| 19 |
+
j
|
| 20 |
+
d
|
| 21 |
+
é
|
| 22 |
+
w
|
| 23 |
+
ô
|
| 24 |
+
ë
|
| 25 |
+
-
|
| 26 |
+
c
|
| 27 |
+
ö
|
| 28 |
+
á
|
| 29 |
+
ó
|
| 30 |
+
f
|
| 31 |
+
z
|
| 32 |
+
'
|
| 33 |
+
q
|
| 34 |
+
ú
|
| 35 |
+
`
|
| 36 |
+
0
|
| 37 |
+
6
|
| 38 |
+
4
|
| 39 |
+
3
|
| 40 |
+
1
|
| 41 |
+
2
|
| 42 |
+
|
models/acf/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b77a841ab67ccc15b895c9fb24779b94d85b83e5267992dba49a66bcd6923f93
|
| 3 |
+
size 145480679
|
models/acf/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/acf/vocab.txt
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
|
| 2 |
+
a
|
| 3 |
+
n
|
| 4 |
+
é
|
| 5 |
+
i
|
| 6 |
+
o
|
| 7 |
+
s
|
| 8 |
+
t
|
| 9 |
+
k
|
| 10 |
+
y
|
| 11 |
+
p
|
| 12 |
+
l
|
| 13 |
+
w
|
| 14 |
+
m
|
| 15 |
+
è
|
| 16 |
+
u
|
| 17 |
+
d
|
| 18 |
+
-
|
| 19 |
+
e
|
| 20 |
+
b
|
| 21 |
+
v
|
| 22 |
+
j
|
| 23 |
+
ò
|
| 24 |
+
z
|
| 25 |
+
f
|
| 26 |
+
ʼ
|
| 27 |
+
h
|
| 28 |
+
g
|
| 29 |
+
c
|
| 30 |
+
r
|
| 31 |
+
—
|
| 32 |
+
'
|
| 33 |
+
|
models/ach/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7cffe85baf83584a9876299052071c35e4604a5192b9ecd6402a88d70a6d7a1
|
| 3 |
+
size 145476861
|
models/ach/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/ach/vocab.txt
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
|
| 2 |
+
o
|
| 3 |
+
a
|
| 4 |
+
i
|
| 5 |
+
e
|
| 6 |
+
k
|
| 7 |
+
n
|
| 8 |
+
m
|
| 9 |
+
w
|
| 10 |
+
t
|
| 11 |
+
u
|
| 12 |
+
y
|
| 13 |
+
l
|
| 14 |
+
c
|
| 15 |
+
d
|
| 16 |
+
b
|
| 17 |
+
g
|
| 18 |
+
r
|
| 19 |
+
p
|
| 20 |
+
ŋ
|
| 21 |
+
j
|
| 22 |
+
-
|
| 23 |
+
s
|
| 24 |
+
'
|
| 25 |
+
v
|
| 26 |
+
f
|
| 27 |
+
h
|
| 28 |
+
|
models/acn/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:650c7c63e516d2c2c363fdc4f9ce8ac58f6b8c37c75fe1a1e4bbe1d633e8e8f0
|
| 3 |
+
size 145483865
|
models/acn/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/acn/vocab.txt
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
u
|
| 2 |
+
k
|
| 3 |
+
g
|
| 4 |
+
a
|
| 5 |
+
_
|
| 6 |
+
—
|
| 7 |
+
d
|
| 8 |
+
w
|
| 9 |
+
i
|
| 10 |
+
o
|
| 11 |
+
-
|
| 12 |
+
b
|
| 13 |
+
e
|
| 14 |
+
n
|
| 15 |
+
t
|
| 16 |
+
y
|
| 17 |
+
p
|
| 18 |
+
s
|
| 19 |
+
z
|
| 20 |
+
x
|
| 21 |
+
m
|
| 22 |
+
h
|
| 23 |
+
c
|
| 24 |
+
|
| 25 |
+
l
|
| 26 |
+
0
|
| 27 |
+
2
|
| 28 |
+
j
|
| 29 |
+
f
|
| 30 |
+
3
|
| 31 |
+
5
|
| 32 |
+
q
|
| 33 |
+
v
|
| 34 |
+
r
|
| 35 |
+
6
|
| 36 |
+
1
|
| 37 |
+
4
|
models/acr/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe163f49adbb77ba29f6a49a80ed6417f00de3ed26fc7bde862ef44fbb7bb706
|
| 3 |
+
size 145483857
|
models/acr/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/acr/vocab.txt
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
|
| 2 |
+
a
|
| 3 |
+
i
|
| 4 |
+
'
|
| 5 |
+
k
|
| 6 |
+
r
|
| 7 |
+
u
|
| 8 |
+
e
|
| 9 |
+
j
|
| 10 |
+
n
|
| 11 |
+
o
|
| 12 |
+
c
|
| 13 |
+
l
|
| 14 |
+
h
|
| 15 |
+
t
|
| 16 |
+
q
|
| 17 |
+
w
|
| 18 |
+
x
|
| 19 |
+
m
|
| 20 |
+
b
|
| 21 |
+
s
|
| 22 |
+
y
|
| 23 |
+
p
|
| 24 |
+
z
|
| 25 |
+
d
|
| 26 |
+
—
|
| 27 |
+
ú
|
| 28 |
+
g
|
| 29 |
+
á
|
| 30 |
+
é
|
| 31 |
+
ó
|
| 32 |
+
f
|
| 33 |
+
í
|
| 34 |
+
v
|
| 35 |
+
-
|
| 36 |
+
ñ
|
| 37 |
+
|
models/acu/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b73f60dc370d4b9603a8b4f49a39f38b9ba104cc4873804e37a3742f669656bf
|
| 3 |
+
size 145482199
|
models/acu/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/acu/vocab.txt
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
a
|
| 2 |
+
|
|
| 3 |
+
i
|
| 4 |
+
n
|
| 5 |
+
u
|
| 6 |
+
t
|
| 7 |
+
r
|
| 8 |
+
m
|
| 9 |
+
k
|
| 10 |
+
s
|
| 11 |
+
e
|
| 12 |
+
h
|
| 13 |
+
j
|
| 14 |
+
c
|
| 15 |
+
w
|
| 16 |
+
y
|
| 17 |
+
p
|
| 18 |
+
g
|
| 19 |
+
o
|
| 20 |
+
í
|
| 21 |
+
—
|
| 22 |
+
ú
|
| 23 |
+
d
|
| 24 |
+
l
|
| 25 |
+
é
|
| 26 |
+
á
|
| 27 |
+
b
|
| 28 |
+
f
|
| 29 |
+
v
|
| 30 |
+
ó
|
| 31 |
+
z
|
| 32 |
+
q
|
| 33 |
+
x
|
| 34 |
+
ñ
|
| 35 |
+
|
models/ade/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f15657ac66a20bbf85a3ff8968ac2f5f1306fe124dbf7557c9ebb35f63db43d7
|
| 3 |
+
size 145486083
|
models/ade/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/ade/vocab.txt
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
|
| 2 |
+
a
|
| 3 |
+
â
|
| 4 |
+
n
|
| 5 |
+
e
|
| 6 |
+
b
|
| 7 |
+
g
|
| 8 |
+
æ
|
| 9 |
+
i
|
| 10 |
+
t
|
| 11 |
+
w
|
| 12 |
+
ô
|
| 13 |
+
y
|
| 14 |
+
k
|
| 15 |
+
o
|
| 16 |
+
r
|
| 17 |
+
l
|
| 18 |
+
u
|
| 19 |
+
d
|
| 20 |
+
m
|
| 21 |
+
f
|
| 22 |
+
s
|
| 23 |
+
û
|
| 24 |
+
p
|
| 25 |
+
à
|
| 26 |
+
-
|
| 27 |
+
è
|
| 28 |
+
ã
|
| 29 |
+
õ
|
| 30 |
+
ù
|
| 31 |
+
î
|
| 32 |
+
å
|
| 33 |
+
ì
|
| 34 |
+
ü
|
| 35 |
+
ǹ
|
| 36 |
+
ò
|
| 37 |
+
h
|
| 38 |
+
'
|
| 39 |
+
c
|
| 40 |
+
|
models/adh/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04406c957a29e5e62b97e3b528868d11cafbec74886e242d56f8b3501a40bebf
|
| 3 |
+
size 145477721
|
models/adh/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/adh/vocab.txt
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
|
| 2 |
+
o
|
| 3 |
+
i
|
| 4 |
+
a
|
| 5 |
+
e
|
| 6 |
+
n
|
| 7 |
+
k
|
| 8 |
+
m
|
| 9 |
+
w
|
| 10 |
+
r
|
| 11 |
+
y
|
| 12 |
+
h
|
| 13 |
+
t
|
| 14 |
+
d
|
| 15 |
+
j
|
| 16 |
+
g
|
| 17 |
+
u
|
| 18 |
+
l
|
| 19 |
+
p
|
| 20 |
+
c
|
| 21 |
+
b
|
| 22 |
+
ŋ
|
| 23 |
+
s
|
| 24 |
+
f
|
| 25 |
+
'
|
| 26 |
+
z
|
| 27 |
+
v
|
| 28 |
+
-
|
| 29 |
+
|
models/adj/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:256854868625effe7ef0801e3467e22e74986dc465dc3af39f5ee138ffa009a0
|
| 3 |
+
size 145490681
|
models/adj/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/adj/vocab.txt
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
|
| 2 |
+
a
|
| 3 |
+
e
|
| 4 |
+
ɛ
|
| 5 |
+
n
|
| 6 |
+
m
|
| 7 |
+
l
|
| 8 |
+
k
|
| 9 |
+
i
|
| 10 |
+
ŋ
|
| 11 |
+
'
|
| 12 |
+
s
|
| 13 |
+
b
|
| 14 |
+
o
|
| 15 |
+
y
|
| 16 |
+
w
|
| 17 |
+
ɔ
|
| 18 |
+
u
|
| 19 |
+
r
|
| 20 |
+
g
|
| 21 |
+
c
|
| 22 |
+
t
|
| 23 |
+
d
|
| 24 |
+
j
|
| 25 |
+
f
|
| 26 |
+
p
|
| 27 |
+
-
|
| 28 |
+
ó
|
| 29 |
+
́
|
| 30 |
+
í
|
| 31 |
+
̀
|
| 32 |
+
á
|
| 33 |
+
h
|
| 34 |
+
é
|
| 35 |
+
ì
|
| 36 |
+
ò
|
| 37 |
+
ú
|
| 38 |
+
à
|
| 39 |
+
ê
|
| 40 |
+
v
|
| 41 |
+
ù
|
| 42 |
+
̂
|
| 43 |
+
è
|
| 44 |
+
z
|
| 45 |
+
ô
|
| 46 |
+
|
models/adx/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62192b382120334b39670d3ae5567159358eb66361588f1b42e6cce52b825e83
|
| 3 |
+
size 145500761
|
models/adx/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/adx/vocab.txt
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ཤ
|
| 2 |
+
ྐ
|
| 3 |
+
ི
|
| 4 |
+
འ
|
| 5 |
+
ཞ
|
| 6 |
+
|
| 7 |
+
ྩ
|
| 8 |
+
པ
|
| 9 |
+
ྫ
|
| 10 |
+
ྱ
|
| 11 |
+
ྭ
|
| 12 |
+
ཙ
|
| 13 |
+
ད
|
| 14 |
+
ཁ
|
| 15 |
+
ྔ
|
| 16 |
+
ྷ
|
| 17 |
+
ུ
|
| 18 |
+
ེ
|
| 19 |
+
ོ
|
| 20 |
+
_
|
| 21 |
+
ྡ
|
| 22 |
+
ཆ
|
| 23 |
+
མ
|
| 24 |
+
ྟ
|
| 25 |
+
ྤ
|
| 26 |
+
ན
|
| 27 |
+
ཉ
|
| 28 |
+
ཛ
|
| 29 |
+
ཊ
|
| 30 |
+
ཏ
|
| 31 |
+
ྦ
|
| 32 |
+
ྨ
|
| 33 |
+
ང
|
| 34 |
+
ཪ
|
| 35 |
+
ཟ
|
| 36 |
+
ས
|
| 37 |
+
ཚ
|
| 38 |
+
ྣ
|
| 39 |
+
ླ
|
| 40 |
+
ཇ
|
| 41 |
+
ྗ
|
| 42 |
+
ཨ
|
| 43 |
+
ྙ
|
| 44 |
+
ྒ
|
| 45 |
+
བ
|
| 46 |
+
ཅ
|
| 47 |
+
ཧ
|
| 48 |
+
ྲ
|
| 49 |
+
ཝ
|
| 50 |
+
ཡ
|
| 51 |
+
ཱ
|
| 52 |
+
ག
|
| 53 |
+
ཀ
|
| 54 |
+
ར
|
| 55 |
+
ཐ
|
| 56 |
+
ཕ
|
| 57 |
+
ྕ
|
| 58 |
+
ལ
|
| 59 |
+
་
|
models/aeu/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2d9b805eb84438ab960cb51ee261fdaf62280f1b1c0ec27449692ecc857ff8f
|
| 3 |
+
size 145474559
|
models/aeu/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/aeu/vocab.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
b
|
| 2 |
+
j
|
| 3 |
+
g
|
| 4 |
+
t
|
| 5 |
+
u
|
| 6 |
+
-
|
| 7 |
+
k
|
| 8 |
+
q
|
| 9 |
+
s
|
| 10 |
+
i
|
| 11 |
+
e
|
| 12 |
+
l
|
| 13 |
+
o
|
| 14 |
+
p
|
| 15 |
+
_
|
| 16 |
+
y
|
| 17 |
+
n
|
| 18 |
+
c
|
| 19 |
+
v
|
| 20 |
+
|
| 21 |
+
m
|
| 22 |
+
h
|
| 23 |
+
d
|
| 24 |
+
a
|
| 25 |
+
w
|
models/agd/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3724c7877b954ab715ef920ca771336f4e3cf1e7596db81cbd11893f035588c
|
| 3 |
+
size 145478407
|
models/agd/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|
models/agd/vocab.txt
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
3
|
| 2 |
+
p
|
| 3 |
+
y
|
| 4 |
+
f
|
| 5 |
+
ó
|
| 6 |
+
4
|
| 7 |
+
0
|
| 8 |
+
á
|
| 9 |
+
m
|
| 10 |
+
-
|
| 11 |
+
r
|
| 12 |
+
h
|
| 13 |
+
|
| 14 |
+
t
|
| 15 |
+
d
|
| 16 |
+
o
|
| 17 |
+
é
|
| 18 |
+
'
|
| 19 |
+
s
|
| 20 |
+
b
|
| 21 |
+
e
|
| 22 |
+
n
|
| 23 |
+
u
|
| 24 |
+
_
|
| 25 |
+
v
|
| 26 |
+
ú
|
| 27 |
+
í
|
| 28 |
+
a
|
| 29 |
+
i
|
| 30 |
+
k
|
models/agg/G_100000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8aa1936c08d0178a46cc741a42e231732c4955096833c58371b5fd4ceefe678
|
| 3 |
+
size 145480667
|
models/agg/config.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train": {
|
| 3 |
+
"log_interval": 200,
|
| 4 |
+
"eval_interval": 1000,
|
| 5 |
+
"seed": 1234,
|
| 6 |
+
"epochs": 20000,
|
| 7 |
+
"learning_rate": 0.0002,
|
| 8 |
+
"betas": [
|
| 9 |
+
0.8,
|
| 10 |
+
0.99
|
| 11 |
+
],
|
| 12 |
+
"eps": 1e-09,
|
| 13 |
+
"batch_size": 64,
|
| 14 |
+
"fp16_run": true,
|
| 15 |
+
"lr_decay": 0.999875,
|
| 16 |
+
"segment_size": 8192,
|
| 17 |
+
"init_lr_ratio": 1,
|
| 18 |
+
"warmup_epochs": 0,
|
| 19 |
+
"c_mel": 45,
|
| 20 |
+
"c_kl": 1.0
|
| 21 |
+
},
|
| 22 |
+
"data": {
|
| 23 |
+
"training_files": "train.ltr",
|
| 24 |
+
"validation_files": "dev.ltr",
|
| 25 |
+
"text_cleaners": [
|
| 26 |
+
"transliteration_cleaners"
|
| 27 |
+
],
|
| 28 |
+
"max_wav_value": 32768.0,
|
| 29 |
+
"sampling_rate": 16000,
|
| 30 |
+
"filter_length": 1024,
|
| 31 |
+
"hop_length": 256,
|
| 32 |
+
"win_length": 1024,
|
| 33 |
+
"n_mel_channels": 80,
|
| 34 |
+
"mel_fmin": 0.0,
|
| 35 |
+
"mel_fmax": null,
|
| 36 |
+
"add_blank": true,
|
| 37 |
+
"n_speakers": 0,
|
| 38 |
+
"cleaned_text": true
|
| 39 |
+
},
|
| 40 |
+
"model": {
|
| 41 |
+
"inter_channels": 192,
|
| 42 |
+
"hidden_channels": 192,
|
| 43 |
+
"filter_channels": 768,
|
| 44 |
+
"n_heads": 2,
|
| 45 |
+
"n_layers": 6,
|
| 46 |
+
"kernel_size": 3,
|
| 47 |
+
"p_dropout": 0.1,
|
| 48 |
+
"resblock": "1",
|
| 49 |
+
"resblock_kernel_sizes": [
|
| 50 |
+
3,
|
| 51 |
+
7,
|
| 52 |
+
11
|
| 53 |
+
],
|
| 54 |
+
"resblock_dilation_sizes": [
|
| 55 |
+
[
|
| 56 |
+
1,
|
| 57 |
+
3,
|
| 58 |
+
5
|
| 59 |
+
],
|
| 60 |
+
[
|
| 61 |
+
1,
|
| 62 |
+
3,
|
| 63 |
+
5
|
| 64 |
+
],
|
| 65 |
+
[
|
| 66 |
+
1,
|
| 67 |
+
3,
|
| 68 |
+
5
|
| 69 |
+
]
|
| 70 |
+
],
|
| 71 |
+
"upsample_rates": [
|
| 72 |
+
8,
|
| 73 |
+
8,
|
| 74 |
+
2,
|
| 75 |
+
2
|
| 76 |
+
],
|
| 77 |
+
"upsample_initial_channel": 512,
|
| 78 |
+
"upsample_kernel_sizes": [
|
| 79 |
+
16,
|
| 80 |
+
16,
|
| 81 |
+
4,
|
| 82 |
+
4
|
| 83 |
+
],
|
| 84 |
+
"n_layers_q": 3,
|
| 85 |
+
"use_spectral_norm": false
|
| 86 |
+
}
|
| 87 |
+
}
|