| { | |
| "_class_name": "UNetModel", | |
| "_diffusers_version": "0.30.2", | |
| "addition_attention": true, | |
| "attention_resolutions": [ | |
| 4, | |
| 2, | |
| 1 | |
| ], | |
| "channel_mult": [ | |
| 1, | |
| 2, | |
| 4, | |
| 4 | |
| ], | |
| "context_dim": 1024, | |
| "conv_resample": true, | |
| "default_fps": 24, | |
| "dims": 2, | |
| "dropout": 0.1, | |
| "fps_condition": true, | |
| "image_cross_attention": true, | |
| "image_cross_attention_scale_learnable": false, | |
| "in_channels": 8, | |
| "masked_layer_fusion": true, | |
| "model_channels": 320, | |
| "num_head_channels": 64, | |
| "num_heads": -1, | |
| "num_res_blocks": 2, | |
| "out_channels": 4, | |
| "resblock_updown": false, | |
| "temporal_attention": true, | |
| "temporal_conv": true, | |
| "temporal_length": 16, | |
| "temporal_selfatt_only": true, | |
| "tempspatial_aware": false, | |
| "transformer_depth": 1, | |
| "use_causal_attention": false, | |
| "use_checkpoint": true, | |
| "use_linear": true, | |
| "use_relative_position": false, | |
| "use_scale_shift_norm": false | |
| } | |