add 0528 model

Files changed (11) hide show

README.md ADDED Viewed

+---
+license: cc-by-nc-4.0
+---
+Disclaimer: For Academic Purposes Only
+The information provided in this document is for academic purposes only. It is intended for educational and research use, and should not be used for any commercial or legal purposes. The authors do not guarantee the accuracy, completeness, or reliability of the information.
+免责声明：仅供学术交流
+本文件中的信息仅供学术交流使用。其目的是用于教育和研究，不得用于任何商业或法律目的。作者不保证信息的准确性、完整性或可靠性。

asset/DVAE.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:613cb128adf89188c93ea5880ea0b798e66b1fe6186d0c535d99bcd87bfd6976
+size 27749823

asset/Decoder.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:9964e36e840f0e3a748c5f716fe6de6490d2135a5f5155f4a642d51860e2ec38
+size 103718156

asset/GPT.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7d4ee6461ea097a2be23eb40d73fb94ad3b3d39cb64fbb50cb3357fd466cadb
+size 900746442

asset/Vocos.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:09a670eda1c08b740013679c7a90ebb7f1a97646ea7673069a6838e6b51d6c58
+size 54363119

asset/tokenizer.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:e911ae7c6a7c27953433f35c44227a67838fe229a1f428503bdb6cd3d1bcc69c
+size 336680

config/decoder.yaml ADDED Viewed

+dim: 384
+decoder_config:
+  idim: ${dim}
+  odim: ${dim}
+  hidden: 512
+  n_layer: 12
+  bn_dim: 128
+vq_config: null

config/dvae.yaml ADDED Viewed

+dim: 512
+decoder_config:
+  idim: ${dim}
+  odim: ${dim}
+  n_layer: 12
+  bn_dim: 128
+vq_config:
+  dim: 1024
+  levels: [5,5,5,5]
+  G: 2
+  R: 2

config/gpt.yaml ADDED Viewed

+num_audio_tokens: 626
+num_text_tokens: 21178
+gpt_config:
+  hidden_size: 768
+  intermediate_size: 3072
+  num_attention_heads: 12
+  num_hidden_layers: 20
+  use_cache: False
+  max_position_embeddings: 4096
+  # attn_implementation: flash_attention_2
+  spk_emb_dim: 192
+  spk_KL: False
+  num_audio_tokens: 626
+  num_text_tokens: null
+  num_vq: 4

config/path.yaml ADDED Viewed

+vocos_config_path: config/vocos.yaml
+vocos_ckpt_path: asset/Vocos.pt
+dvae_config_path: config/dvae.yaml
+dvae_ckpt_path: asset/DVAE.pt
+gpt_config_path: config/gpt.yaml
+gpt_ckpt_path: asset/GPT.pt
+decoder_config_path: config/decoder.yaml
+decoder_ckpt_path: asset/Decoder.pt
+tokenizer_path: asset/tokenizer.pt

config/vocos.yaml ADDED Viewed

+feature_extractor:
+  class_path: vocos.feature_extractors.MelSpectrogramFeatures
+  init_args:
+    sample_rate: 24000
+    n_fft: 1024
+    hop_length: 256
+    n_mels: 100
+    padding: center
+backbone:
+  class_path: vocos.models.VocosBackbone
+  init_args:
+    input_channels: 100
+    dim: 512
+    intermediate_dim: 1536
+    num_layers: 8
+head:
+  class_path: vocos.heads.ISTFTHead
+  init_args:
+    dim: 512
+    n_fft: 1024
+    hop_length: 256
+    padding: center