emiyasstar
/

librispeech

Model card Files Files and versions

xet

Community

emiyasstar commited on Jul 28, 2022

Commit

9edce30

1 Parent(s): 089b429

Upload train_conformer_pretrain_w2v.yaml

Browse files

Files changed (1) hide show

train_conformer_pretrain_w2v.yaml +95 -0

train_conformer_pretrain_w2v.yaml ADDED Viewed

	@@ -0,0 +1,95 @@

+# network architecture
+# encoder related
+encoder: conformer
+encoder_conf:
+    output_size: 512    # dimension of attention
+    attention_heads: 8
+    linear_units: 2048  # the number of units of position-wise feed forward
+    num_blocks: 12      # the number of encoder blocks
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.0
+    attention_dropout_rate: 0.0
+    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
+    normalize_before: true
+    cnn_module_kernel: 31
+    use_cnn_module: True
+    activation_type: 'swish'
+    pos_enc_layer_type: 'rel_pos'
+    selfattention_layer_type: 'rel_selfattn'
+# decoder related
+decoder: transformer
+decoder_conf:
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.0
+    self_attention_dropout_rate: 0.0
+    src_attention_dropout_rate: 0.0
+# hybrid CTC/attention
+model_conf:
+    ctc_weight: 1.0
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: false
+# use raw_wav or kaldi feature
+raw_wav: true
+# dataset related
+dataset_conf:
+    filter_conf:
+        max_length: 2000
+        min_length: 50
+        token_max_length: 400
+        token_min_length: 1
+    resample_conf:
+        resample_rate: 16000
+    speed_perturb: false
+    fbank_conf:
+        num_mel_bins: 80
+        frame_shift: 10
+        frame_length: 25
+        dither: 1.0
+    spec_aug: false
+    spec_aug_conf:
+        num_t_mask: 3
+        num_f_mask: 2
+        max_t: 50
+        max_f: 10
+    shuffle: true
+    shuffle_conf:
+        shuffle_size: 1500
+    sort: true
+    sort_conf:
+        sort_size: 500  # sort_size should be less than shuffle_size
+    batch_conf:
+        batch_type: 'dynamic' # static or dynamic
+        max_frames_in_batch: 20000
+        batch_size: 3
+pretrain: True
+wav2vec_conf:
+    pretrain: True
+    quantize_targets: True
+    project_targets: True
+    latent_vars: 320
+    latent_dim: 512
+    latent_groups: 2
+    w2v_ext_loss: True
+    w2v_loss_weights: [1.5,0]
+    mask: True
+    mask_prob: 0.65
+grad_clip: 5
+accum_grad: 4
+max_epoch: 280
+log_interval: 100
+optim: adam
+optim_conf:
+    lr: 0.002
+scheduler: warmuplr     # pytorch v1.1.0+ required
+scheduler_conf:
+    warmup_steps: 25000