Zyphra
/

Zonos-v0.1-transformer

+{
+    "backbone": {
+        "d_model": 2048,
+        "d_intermediate": 0,
+        "attn_mlp_d_intermediate": 8192,
+        "n_layer": 26,
+        "ssm_cfg": {},
+        "attn_layer_idx": [
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+            16,
+            17,
+            18,
+            19,
+            20,
+            21,
+            22,
+            23,
+            24,
+            25
+        ],
+        "attn_cfg": {
+            "causal": true,
+            "num_heads": 16,
+            "num_heads_kv": 4,
+            "rotary_emb_dim": 128,
+            "rotary_emb_interleaved": true,
+            "qkv_proj_bias": false,
+            "out_proj_bias": false
+        },
+        "rms_norm": false,
+        "residual_in_fp32": false,
+        "norm_epsilon": 1e-05
+    },
+    "prefix_conditioner": {
+        "conditioners": [
+            {
+                "type": "EspeakPhonemeConditioner",
+                "name": "espeak"
+            },
+            {
+                "cond_dim": 128,
+                "uncond_type": "learned",
+                "projection": "linear",
+                "type": "PassthroughConditioner",
+                "name": "speaker"
+            },
+            {
+                "input_dim": 8,
+                "uncond_type": "learned",
+                "type": "FourierConditioner",
+                "name": "emotion"
+            },
+            {
+                "min_val": 0,
+                "max_val": 24000,
+                "uncond_type": "learned",
+                "type": "FourierConditioner",
+                "name": "fmax"
+            },
+            {
+                "min_val": 0,
+                "max_val": 400,
+                "uncond_type": "learned",
+                "type": "FourierConditioner",
+                "name": "pitch_std"
+            },
+            {
+                "min_val": 0,
+                "max_val": 40,
+                "uncond_type": "learned",
+                "type": "FourierConditioner",
+                "name": "speaking_rate"
+            },
+            {
+                "min_val": -1,
+                "max_val": 126,
+                "uncond_type": "learned",
+                "type": "IntegerConditioner",
+                "name": "language_id"
+            }
+        ],
+        "projection": "linear"
+    },
+    "eos_token_id": 1024,
+    "masked_token_id": 1025
+}