name: large type: ddit hidden_size: 1280 cond_dim: 128 length: 1024 base_n_blocks: 28 # We try to roughly match parameter count n_blocks: ${adjust_n_blocks:} n_heads: 20 scale_by_sigma: True dropout: 0.1 tie_word_embeddings: False # 36 1280 20