{ | |
"__name__": "Config: Transformer config for WanModel_S2V", | |
"_class_name": "WanModel_S2V", | |
"_diffusers_version": "0.34.0", | |
"adain_mode": "attn_norm", | |
"add_last_motion": true, | |
"audio_dim": 1024, | |
"audio_inject_layers": [ | |
0, | |
4, | |
8, | |
12, | |
16, | |
20, | |
24, | |
27, | |
30, | |
33, | |
36, | |
39 | |
], | |
"cond_dim": 16, | |
"dim": 5120, | |
"enable_adain": true, | |
"enable_framepack": true, | |
"enable_motioner": false, | |
"enable_tsm": false, | |
"eps": 1e-06, | |
"ffn_dim": 13824, | |
"framepack_drop_mode": "padd", | |
"freq_dim": 256, | |
"in_dim": 16, | |
"model_type": "s2v", | |
"motion_token_num": 1024, | |
"num_audio_token": 4, | |
"num_heads": 40, | |
"num_layers": 40, | |
"out_dim": 16, | |
"text_len": 512, | |
"trainable_token_pos_emb": false, | |
"zero_init": true, | |
"zero_timestep": true | |
} | |