chaanks's picture
Update hyperparams.yaml
5e12847 verified
# ################################
# Model: Best-RQ
# Authors: Jarod Duret 2024
# ################################
sample_rate: 16000
n_fft: 512
n_mels: 80
win_length: 32
hop_length: 10
####################### Model parameters ###########################
# Transformer
d_model: 768
nhead: 8
num_encoder_layers: 12
num_decoder_layers: 0
d_ffn: 2048
transformer_dropout: 0.1
activation: !name:torch.nn.GELU
output_neurons: 5000
encoder_layerdrop: 0.0
compute_features: !new:speechbrain.lobes.features.Fbank
sample_rate: !ref <sample_rate>
n_fft: !ref <n_fft>
n_mels: !ref <n_mels>
hop_length: !ref <hop_length>
win_length: !ref <win_length>
normalizer: !new:speechbrain.processing.features.InputNormalization
norm_type: sentence
update_until_epoch: 0
############################## Models ################################
latent_extractor: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
input_shape: (8, 10, 80)
num_blocks: 2
num_layers_per_block: 1
out_channels: (64, 32)
kernel_sizes: (3, 3)
strides: (2, 2)
residuals: (False, False)
latent_encoder: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR
input_size: 640
tgt_vocab: !ref <output_neurons>
d_model: !ref <d_model>
nhead: !ref <nhead>
num_encoder_layers: !ref <num_encoder_layers>
num_decoder_layers: !ref <num_decoder_layers>
d_ffn: !ref <d_ffn>
dropout: !ref <transformer_dropout>
activation: !ref <activation>
conformer_activation: !ref <activation>
encoder_module: conformer
attention_type: RelPosMHAXL
normalize_before: True
causal: False
layerdrop_prob: !ref <encoder_layerdrop>
# We must call an encoder wrapper so the decoder isn't run (we don't have any)
encoder_wrapper: !new:speechbrain.lobes.models.transformer.TransformerASR.EncoderWrapper
transformer: !ref <latent_encoder>
# encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
# latent_extractor: !ref <latent_extractor>
# encoder_wrapper: !ref <encoder_wrapper>
model: !new:torch.nn.ModuleList
- [!ref <latent_extractor>, !ref <encoder_wrapper>]
modules:
normalizer: !ref <normalizer>
extractor: !ref <latent_extractor>
encoder: !ref <encoder_wrapper>
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
model: !ref <model>
normalizer: !ref <normalizer>