File size: 1,078 Bytes
56a1295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
_target_: modules.astral_quantization.default_model.AstralQuantizer
tokenizer_name: "openai/whisper-small"
ssl_model_name: "facebook/hubert-large-ll60k"
ssl_output_layer: 18
encoder:
  _target_: modules.astral_quantization.convnext.ConvNeXtV2Stage
  dim: 512
  num_blocks: 12
  intermediate_dim: 1536
  dilation: 1
  input_dim: 1024
quantizer:
  _target_: modules.astral_quantization.bsq.BinarySphericalQuantize
  codebook_size: 2048  # codebook size, must be a power of 2
  dim: 512
  entropy_loss_weight: 0.1
  diversity_gamma: 1.0
  spherical: True
  enable_entropy_loss: True
  soft_entropy_loss: True
decoder:
  _target_: modules.astral_quantization.convnext.ConvNeXtV2Stage
  dim: 512
  num_blocks: 12
  intermediate_dim: 1536
  dilation: 1
  output_dim: 1024
  gin_channels: 192
asr_decoder:
  _target_: modules.astral_quantization.asr_decoder.ASRDecoder
  hidden_dim: 768
  num_heads: 12
  depth: 12
  block_size: 4096
  in_channels: 512
  n_vocab: 51866
  bos_id: 50528
  eos_id: 50527
  dropout_rate: 0.0
  attn_dropout_rate: 0.0