amphion
/

MaskGCT

Text-to-Speech

Safetensors

Model card Files Files and versions

xet

Community

RMSnow commited on Apr 13

Commit

3279c63

verified ·

1 Parent(s): e9ea266

Create model-index.json

Browse files

Files changed (1) hide show

model-index.json +68 -0

model-index.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+    "name": "MaskGCT",
+    "_name_or_path": "amphion/MaskGCT",
+    "modelId": "amphion/MaskGCT",
+    "architectures": [
+        "MaskGCTModel"
+    ],
+    "model_type": "maskgct",
+    "task_specific_params": {
+        "text-to-speech": {
+            "supported_tasks": [
+                "zero-shot-tts",
+                "non-autoregressive-tts"
+            ]
+        }
+    },
+    "tags": [
+        "text-to-speech",
+        "safetensors"
+    ],
+    "pipeline_tag": "text-to-speech",
+    "language": [
+        "en",
+        "zh"
+    ],
+    "license": "cc-by-nc-4.0",
+    "datasets": [
+        "Emilia-100k"
+    ],
+    "model_structure": {
+        "semantic_codec": {
+            "type": "w2v-bert-2.0",
+            "description": "Converting speech to semantic tokens"
+        },
+        "acoustic_codec": {
+            "type": "codec",
+            "description": "Converting speech to acoustic tokens and reconstructing waveform"
+        },
+        "maskgct_t2s": {
+            "type": "transformer",
+            "description": "Predicting semantic tokens with text and prompt semantic tokens"
+        },
+        "maskgct_s2a": {
+            "variants": {
+                "1layer": {
+                    "type": "transformer",
+                    "description": "Single layer model for acoustic token prediction"
+                },
+                "full": {
+                    "type": "transformer",
+                    "description": "Full model for acoustic token prediction"
+                }
+            },
+            "description": "Predicts acoustic tokens conditioned on semantic tokens"
+        }
+    },
+    "training_data": {
+        "total_hours": 100000,
+        "language_split": {
+            "english": 50000,
+            "chinese": 50000
+        }
+    },
+    "paper": {
+        "title": "MaskGCT: Zero-Shot Text-to-Speech with Masked Generative Codec Transformer",
+        "arxiv_id": "2409.00750"
+    }
+}