|
{ |
|
"_class_name": "Transformer2DModel", |
|
"_diffusers_version": "0.27.2", |
|
"in_channels": 8, |
|
"num_layers": 24, |
|
"inner_dim": 2560, |
|
"attention_head_dim": 128, |
|
"num_attention_heads": 20, |
|
"mlp_ratio": 2.5, |
|
"out_channels": 8, |
|
"max_position": 32768, |
|
"rope_theta": 1000000.0, |
|
"speaker_embedding_dim": 512, |
|
"text_embedding_dim": 768, |
|
"ssl_encoder_depths": [8, 8], |
|
"ssl_names": ["mert", "m-hubert"], |
|
"ssl_latent_dims": [1024, 768], |
|
"patch_size": [16, 1], |
|
"max_height": 16, |
|
"max_width": 32768, |
|
"lyric_encoder_vocab_size": 6693, |
|
"lyric_hidden_size": 1024 |
|
} |
|
|