bezzam
/

xcodec-hubert-librispeech

Feature Extraction

Model card Files Files and versions Community

bezzam HF Staff commited on Aug 16

Commit

ead4046

·

verified ·

1 Parent(s): a2349ba

Upload model

Files changed (2) hide show

config.json +135 -0
model.safetensors +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,135 @@

+{
+  "acoustic_model_config": {
+    "codebook_dim": 8,
+    "codebook_loss_weight": 1.0,
+    "codebook_size": 1024,
+    "commitment_loss_weight": 0.25,
+    "decoder_hidden_size": 1024,
+    "downsampling_ratios": [
+      8,
+      5,
+      4,
+      2
+    ],
+    "encoder_hidden_size": 64,
+    "hidden_size": 256,
+    "hop_length": 320,
+    "model_type": "dac",
+    "n_codebooks": 9,
+    "quantizer_dropout": 0,
+    "sampling_rate": 16000,
+    "upsampling_ratios": [
+      8,
+      5,
+      4,
+      2
+    ]
+  },
+  "architectures": [
+    "XcodecModel"
+  ],
+  "block_dilations": [
+    1,
+    1
+  ],
+  "channel_ratios": [
+    1,
+    1
+  ],
+  "codebook_dim": 1024,
+  "codebook_size": 1024,
+  "decoder_channels": 768,
+  "encoder_channels": 768,
+  "initializer_range": 0.02,
+  "input_channels": 768,
+  "kernel_size": 3,
+  "model_type": "xcodec",
+  "output_channels": 768,
+  "sample_rate": 16000,
+  "semantic_model_config": {
+    "_name_or_path": "facebook/hubert-base-ls960",
+    "activation_dropout": 0.1,
+    "apply_spec_augment": true,
+    "architectures": [
+      "HubertModel"
+    ],
+    "attention_dropout": 0.1,
+    "classifier_proj_size": 256,
+    "conv_bias": false,
+    "conv_dim": [
+      512,
+      512,
+      512,
+      512,
+      512,
+      512,
+      512
+    ],
+    "conv_kernel": [
+      10,
+      3,
+      3,
+      3,
+      3,
+      2,
+      2
+    ],
+    "conv_pos_batch_norm": false,
+    "conv_stride": [
+      5,
+      2,
+      2,
+      2,
+      2,
+      2,
+      2
+    ],
+    "ctc_loss_reduction": "sum",
+    "ctc_zero_infinity": false,
+    "do_stable_layer_norm": false,
+    "feat_extract_activation": "gelu",
+    "feat_extract_dropout": 0.0,
+    "feat_extract_norm": "group",
+    "feat_proj_dropout": 0.1,
+    "feat_proj_layer_norm": true,
+    "final_dropout": 0.1,
+    "gradient_checkpointing": false,
+    "hidden_act": "gelu",
+    "hidden_dropout": 0.1,
+    "hidden_dropout_prob": 0.1,
+    "hidden_size": 768,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-05,
+    "layerdrop": 0.1,
+    "mask_feature_length": 10,
+    "mask_feature_min_masks": 0,
+    "mask_feature_prob": 0.0,
+    "mask_time_length": 10,
+    "mask_time_min_masks": 2,
+    "mask_time_prob": 0.05,
+    "model_type": "hubert",
+    "num_attention_heads": 12,
+    "num_conv_pos_embedding_groups": 16,
+    "num_conv_pos_embeddings": 128,
+    "num_feat_extract_layers": 7,
+    "num_hidden_layers": 12,
+    "tokenizer_class": "Wav2Vec2CTCTokenizer",
+    "use_weighted_layer_sum": false,
+    "vocab_size": 32
+  },
+  "strides": [
+    1,
+    1
+  ],
+  "target_bandwidths": [
+    0.5,
+    1,
+    1.5,
+    2,
+    4
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.56.0.dev0",
+  "unit_kernel_size": 3
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6191971294c216bf6fb7f38fb39e69267a2e3eea6b5469a0952806923b901ce5
+size 709991220