uxfion commited on
Commit
3585c0d
·
1 Parent(s): 310d9e5

add 0528 model

Browse files
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ ---
4
+
5
+ Disclaimer: For Academic Purposes Only
6
+
7
+ The information provided in this document is for academic purposes only. It is intended for educational and research use, and should not be used for any commercial or legal purposes. The authors do not guarantee the accuracy, completeness, or reliability of the information.
8
+
9
+ 免责声明:仅供学术交流
10
+
11
+ 本文件中的信息仅供学术交流使用。其目的是用于教育和研究,不得用于任何商业或法律目的。作者不保证信息的准确性、完整性或可靠性。
asset/DVAE.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:613cb128adf89188c93ea5880ea0b798e66b1fe6186d0c535d99bcd87bfd6976
3
+ size 27749823
asset/Decoder.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9964e36e840f0e3a748c5f716fe6de6490d2135a5f5155f4a642d51860e2ec38
3
+ size 103718156
asset/GPT.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7d4ee6461ea097a2be23eb40d73fb94ad3b3d39cb64fbb50cb3357fd466cadb
3
+ size 900746442
asset/Vocos.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a670eda1c08b740013679c7a90ebb7f1a97646ea7673069a6838e6b51d6c58
3
+ size 54363119
asset/tokenizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e911ae7c6a7c27953433f35c44227a67838fe229a1f428503bdb6cd3d1bcc69c
3
+ size 336680
config/decoder.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ dim: 384
4
+
5
+ decoder_config:
6
+ idim: ${dim}
7
+ odim: ${dim}
8
+ hidden: 512
9
+ n_layer: 12
10
+ bn_dim: 128
11
+
12
+ vq_config: null
config/dvae.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ dim: 512
4
+ decoder_config:
5
+ idim: ${dim}
6
+ odim: ${dim}
7
+ n_layer: 12
8
+ bn_dim: 128
9
+
10
+ vq_config:
11
+ dim: 1024
12
+ levels: [5,5,5,5]
13
+ G: 2
14
+ R: 2
config/gpt.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ num_audio_tokens: 626
4
+ num_text_tokens: 21178
5
+
6
+ gpt_config:
7
+ hidden_size: 768
8
+ intermediate_size: 3072
9
+ num_attention_heads: 12
10
+ num_hidden_layers: 20
11
+ use_cache: False
12
+ max_position_embeddings: 4096
13
+ # attn_implementation: flash_attention_2
14
+
15
+ spk_emb_dim: 192
16
+ spk_KL: False
17
+ num_audio_tokens: 626
18
+ num_text_tokens: null
19
+ num_vq: 4
20
+
config/path.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ vocos_config_path: config/vocos.yaml
4
+ vocos_ckpt_path: asset/Vocos.pt
5
+ dvae_config_path: config/dvae.yaml
6
+ dvae_ckpt_path: asset/DVAE.pt
7
+ gpt_config_path: config/gpt.yaml
8
+ gpt_ckpt_path: asset/GPT.pt
9
+ decoder_config_path: config/decoder.yaml
10
+ decoder_ckpt_path: asset/Decoder.pt
11
+ tokenizer_path: asset/tokenizer.pt
config/vocos.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ feature_extractor:
2
+ class_path: vocos.feature_extractors.MelSpectrogramFeatures
3
+ init_args:
4
+ sample_rate: 24000
5
+ n_fft: 1024
6
+ hop_length: 256
7
+ n_mels: 100
8
+ padding: center
9
+
10
+ backbone:
11
+ class_path: vocos.models.VocosBackbone
12
+ init_args:
13
+ input_channels: 100
14
+ dim: 512
15
+ intermediate_dim: 1536
16
+ num_layers: 8
17
+
18
+ head:
19
+ class_path: vocos.heads.ISTFTHead
20
+ init_args:
21
+ dim: 512
22
+ n_fft: 1024
23
+ hop_length: 256
24
+ padding: center