YuxueYang
		
	commited on
		
		
					Commit 
							
							·
						
						0a92d82
	
1
								Parent(s):
							
							fd90f34
								
Upload safetensor
Browse files- .gitattributes +1 -0
 - README.md +26 -0
 - demos.gif +3 -0
 - image_projector/config.json +14 -0
 - image_projector/diffusion_pytorch_model.safetensors +3 -0
 - layer_controlnet/config.json +49 -0
 - layer_controlnet/diffusion_pytorch_model.safetensors +3 -0
 - scheduler/scheduler_config.json +19 -0
 - unet/config.json +43 -0
 - unet/diffusion_pytorch_model.safetensors +3 -0
 - vae/config.json +25 -0
 - vae/diffusion_pytorch_model.safetensors +3 -0
 - vae_dualref/config.json +25 -0
 - vae_dualref/diffusion_pytorch_model.safetensors +3 -0
 
    	
        .gitattributes
    CHANGED
    
    | 
         @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text 
     | 
|
| 33 | 
         
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         
     | 
| 34 | 
         
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         
     | 
| 35 | 
         
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         
     | 
| 
         | 
| 
         | 
|
| 33 | 
         
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         
     | 
| 34 | 
         
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         
     | 
| 35 | 
         
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         
     | 
| 36 | 
         
            +
            *.gif filter=lfs diff=lfs merge=lfs -text
         
     | 
    	
        README.md
    CHANGED
    
    | 
         @@ -1,3 +1,29 @@ 
     | 
|
| 1 | 
         
             
            ---
         
     | 
| 2 | 
         
             
            license: mit
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 3 | 
         
             
            ---
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
             
            ---
         
     | 
| 2 | 
         
             
            license: mit
         
     | 
| 3 | 
         
            +
            tags:
         
     | 
| 4 | 
         
            +
             - video generation
         
     | 
| 5 | 
         
            +
             - CreateAI
         
     | 
| 6 | 
         
            +
            pipeline_tag: image-to-video
         
     | 
| 7 | 
         
            +
            library_name: diffusers
         
     | 
| 8 | 
         
             
            ---
         
     | 
| 9 | 
         
            +
             
     | 
| 10 | 
         
            +
            # LayerAnimate-Mix
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            [Project](https://layeranimate.github.io) | [Github](https://github.com/IamCreateAI/LayerAnimate) | [Paper](https://arxiv.org/abs/2501.08295)
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            <div align="center"> <img src='demos.gif'></img></div>
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
            **LayerAnimate** is a novel video diffusion framework with layer-aware architecture that empowers the manipulation of layers through layer-level controls.
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            ## Citation
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            Please consider citing our work as follows if it is helpful.
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
            ```bib
         
     | 
| 23 | 
         
            +
            @article{yang2025layeranimate,
         
     | 
| 24 | 
         
            +
              author    = {Yang, Yuxue and Fan, Lue and Lin, Zuzeng and Wang, Feng and Zhang, Zhaoxiang},
         
     | 
| 25 | 
         
            +
              title     = {LayerAnimate: Layer-specific Control for Animation},
         
     | 
| 26 | 
         
            +
              journal   = {arXiv preprint arXiv:2501.08295},
         
     | 
| 27 | 
         
            +
              year      = {2025},
         
     | 
| 28 | 
         
            +
            }
         
     | 
| 29 | 
         
            +
            ```
         
     | 
    	
        demos.gif
    ADDED
    
    
											 
									 | 
									
								
											Git LFS Details
  | 
									
    	
        image_projector/config.json
    ADDED
    
    | 
         @@ -0,0 +1,14 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "Resampler",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.30.2",
         
     | 
| 4 | 
         
            +
              "_name_or_path": "models/i2v",
         
     | 
| 5 | 
         
            +
              "depth": 4,
         
     | 
| 6 | 
         
            +
              "dim": 1024,
         
     | 
| 7 | 
         
            +
              "dim_head": 64,
         
     | 
| 8 | 
         
            +
              "embedding_dim": 1280,
         
     | 
| 9 | 
         
            +
              "ff_mult": 4,
         
     | 
| 10 | 
         
            +
              "heads": 12,
         
     | 
| 11 | 
         
            +
              "num_queries": 16,
         
     | 
| 12 | 
         
            +
              "output_dim": 1024,
         
     | 
| 13 | 
         
            +
              "video_length": 16
         
     | 
| 14 | 
         
            +
            }
         
     | 
    	
        image_projector/diffusion_pytorch_model.safetensors
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:893a58eb98b1b2ed33ccd635ebf8ddad825ff20d0959b582964fd03cc8e37e30
         
     | 
| 3 | 
         
            +
            size 97579608
         
     | 
    	
        layer_controlnet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,49 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "LayerControlNet",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.30.2",
         
     | 
| 4 | 
         
            +
              "addition_attention": true,
         
     | 
| 5 | 
         
            +
              "attention_resolutions": [
         
     | 
| 6 | 
         
            +
                4,
         
     | 
| 7 | 
         
            +
                2,
         
     | 
| 8 | 
         
            +
                1
         
     | 
| 9 | 
         
            +
              ],
         
     | 
| 10 | 
         
            +
              "channel_mult": [
         
     | 
| 11 | 
         
            +
                1,
         
     | 
| 12 | 
         
            +
                2,
         
     | 
| 13 | 
         
            +
                4
         
     | 
| 14 | 
         
            +
              ],
         
     | 
| 15 | 
         
            +
              "condition_channels": {
         
     | 
| 16 | 
         
            +
                "motion_score": 2,
         
     | 
| 17 | 
         
            +
                "sketch": 4,
         
     | 
| 18 | 
         
            +
                "trajectory": 3
         
     | 
| 19 | 
         
            +
              },
         
     | 
| 20 | 
         
            +
              "context_dim": 1024,
         
     | 
| 21 | 
         
            +
              "control_injection_mode": "add",
         
     | 
| 22 | 
         
            +
              "conv_resample": true,
         
     | 
| 23 | 
         
            +
              "default_fps": 24,
         
     | 
| 24 | 
         
            +
              "dims": 2,
         
     | 
| 25 | 
         
            +
              "dropout": 0.1,
         
     | 
| 26 | 
         
            +
              "fps_condition": true,
         
     | 
| 27 | 
         
            +
              "ignore_noisy_latents": true,
         
     | 
| 28 | 
         
            +
              "image_cross_attention": true,
         
     | 
| 29 | 
         
            +
              "image_cross_attention_scale_learnable": false,
         
     | 
| 30 | 
         
            +
              "in_channels": 5,
         
     | 
| 31 | 
         
            +
              "model_channels": 320,
         
     | 
| 32 | 
         
            +
              "num_head_channels": 64,
         
     | 
| 33 | 
         
            +
              "num_heads": -1,
         
     | 
| 34 | 
         
            +
              "num_res_blocks": 2,
         
     | 
| 35 | 
         
            +
              "out_channels": 4,
         
     | 
| 36 | 
         
            +
              "resblock_updown": false,
         
     | 
| 37 | 
         
            +
              "temporal_attention": true,
         
     | 
| 38 | 
         
            +
              "temporal_conv": true,
         
     | 
| 39 | 
         
            +
              "temporal_length": 16,
         
     | 
| 40 | 
         
            +
              "temporal_selfatt_only": true,
         
     | 
| 41 | 
         
            +
              "tempspatial_aware": false,
         
     | 
| 42 | 
         
            +
              "transformer_depth": 1,
         
     | 
| 43 | 
         
            +
              "use_causal_attention": false,
         
     | 
| 44 | 
         
            +
              "use_checkpoint": true,
         
     | 
| 45 | 
         
            +
              "use_linear": true,
         
     | 
| 46 | 
         
            +
              "use_relative_position": false,
         
     | 
| 47 | 
         
            +
              "use_scale_shift_norm": false,
         
     | 
| 48 | 
         
            +
              "use_vae_for_trajectory": false
         
     | 
| 49 | 
         
            +
            }
         
     | 
    	
        layer_controlnet/diffusion_pytorch_model.safetensors
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:370fda8bb902ca1fe3c58811240d608a8b8f6ff22e70efba9c1048d8fe368480
         
     | 
| 3 | 
         
            +
            size 682530392
         
     | 
    	
        scheduler/scheduler_config.json
    ADDED
    
    | 
         @@ -0,0 +1,19 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "DDIMScheduler",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.30.2",
         
     | 
| 4 | 
         
            +
              "beta_end": 0.012,
         
     | 
| 5 | 
         
            +
              "beta_schedule": "scaled_linear",
         
     | 
| 6 | 
         
            +
              "beta_start": 0.00085,
         
     | 
| 7 | 
         
            +
              "clip_sample": false,
         
     | 
| 8 | 
         
            +
              "clip_sample_range": 1.0,
         
     | 
| 9 | 
         
            +
              "dynamic_thresholding_ratio": 0.995,
         
     | 
| 10 | 
         
            +
              "num_train_timesteps": 1000,
         
     | 
| 11 | 
         
            +
              "prediction_type": "v_prediction",
         
     | 
| 12 | 
         
            +
              "rescale_betas_zero_snr": true,
         
     | 
| 13 | 
         
            +
              "sample_max_value": 1.0,
         
     | 
| 14 | 
         
            +
              "set_alpha_to_one": true,
         
     | 
| 15 | 
         
            +
              "steps_offset": 1,
         
     | 
| 16 | 
         
            +
              "thresholding": false,
         
     | 
| 17 | 
         
            +
              "timestep_spacing": "leading",
         
     | 
| 18 | 
         
            +
              "trained_betas": null
         
     | 
| 19 | 
         
            +
            }
         
     | 
    	
        unet/config.json
    ADDED
    
    | 
         @@ -0,0 +1,43 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "UNetModel",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.30.2",
         
     | 
| 4 | 
         
            +
              "addition_attention": true,
         
     | 
| 5 | 
         
            +
              "attention_resolutions": [
         
     | 
| 6 | 
         
            +
                4,
         
     | 
| 7 | 
         
            +
                2,
         
     | 
| 8 | 
         
            +
                1
         
     | 
| 9 | 
         
            +
              ],
         
     | 
| 10 | 
         
            +
              "channel_mult": [
         
     | 
| 11 | 
         
            +
                1,
         
     | 
| 12 | 
         
            +
                2,
         
     | 
| 13 | 
         
            +
                4,
         
     | 
| 14 | 
         
            +
                4
         
     | 
| 15 | 
         
            +
              ],
         
     | 
| 16 | 
         
            +
              "context_dim": 1024,
         
     | 
| 17 | 
         
            +
              "conv_resample": true,
         
     | 
| 18 | 
         
            +
              "default_fps": 24,
         
     | 
| 19 | 
         
            +
              "dims": 2,
         
     | 
| 20 | 
         
            +
              "dropout": 0.1,
         
     | 
| 21 | 
         
            +
              "fps_condition": true,
         
     | 
| 22 | 
         
            +
              "image_cross_attention": true,
         
     | 
| 23 | 
         
            +
              "image_cross_attention_scale_learnable": false,
         
     | 
| 24 | 
         
            +
              "in_channels": 8,
         
     | 
| 25 | 
         
            +
              "masked_layer_fusion": true,
         
     | 
| 26 | 
         
            +
              "model_channels": 320,
         
     | 
| 27 | 
         
            +
              "num_head_channels": 64,
         
     | 
| 28 | 
         
            +
              "num_heads": -1,
         
     | 
| 29 | 
         
            +
              "num_res_blocks": 2,
         
     | 
| 30 | 
         
            +
              "out_channels": 4,
         
     | 
| 31 | 
         
            +
              "resblock_updown": false,
         
     | 
| 32 | 
         
            +
              "temporal_attention": true,
         
     | 
| 33 | 
         
            +
              "temporal_conv": true,
         
     | 
| 34 | 
         
            +
              "temporal_length": 16,
         
     | 
| 35 | 
         
            +
              "temporal_selfatt_only": true,
         
     | 
| 36 | 
         
            +
              "tempspatial_aware": false,
         
     | 
| 37 | 
         
            +
              "transformer_depth": 1,
         
     | 
| 38 | 
         
            +
              "use_causal_attention": false,
         
     | 
| 39 | 
         
            +
              "use_checkpoint": true,
         
     | 
| 40 | 
         
            +
              "use_linear": true,
         
     | 
| 41 | 
         
            +
              "use_relative_position": false,
         
     | 
| 42 | 
         
            +
              "use_scale_shift_norm": false
         
     | 
| 43 | 
         
            +
            }
         
     | 
    	
        unet/diffusion_pytorch_model.safetensors
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:7681439cf7fb436df807b8262f06541bfcc056d293ec2e52352bf2c64c973b37
         
     | 
| 3 | 
         
            +
            size 3001024704
         
     | 
    	
        vae/config.json
    ADDED
    
    | 
         @@ -0,0 +1,25 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "AutoencoderKL",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.30.3",
         
     | 
| 4 | 
         
            +
              "ddconfig": {
         
     | 
| 5 | 
         
            +
                "attn_resolutions": [],
         
     | 
| 6 | 
         
            +
                "ch": 128,
         
     | 
| 7 | 
         
            +
                "ch_mult": [
         
     | 
| 8 | 
         
            +
                  1,
         
     | 
| 9 | 
         
            +
                  2,
         
     | 
| 10 | 
         
            +
                  4,
         
     | 
| 11 | 
         
            +
                  4
         
     | 
| 12 | 
         
            +
                ],
         
     | 
| 13 | 
         
            +
                "double_z": true,
         
     | 
| 14 | 
         
            +
                "dropout": 0.0,
         
     | 
| 15 | 
         
            +
                "in_channels": 3,
         
     | 
| 16 | 
         
            +
                "num_res_blocks": 2,
         
     | 
| 17 | 
         
            +
                "out_ch": 3,
         
     | 
| 18 | 
         
            +
                "resolution": 256,
         
     | 
| 19 | 
         
            +
                "z_channels": 4
         
     | 
| 20 | 
         
            +
              },
         
     | 
| 21 | 
         
            +
              "embed_dim": 4,
         
     | 
| 22 | 
         
            +
              "image_key": "image",
         
     | 
| 23 | 
         
            +
              "input_dim": 4,
         
     | 
| 24 | 
         
            +
              "use_checkpoint": false
         
     | 
| 25 | 
         
            +
            }
         
     | 
    	
        vae/diffusion_pytorch_model.safetensors
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:7053f0e04f33a165d6c20c27727eb7238676ffdc290ca0cb924acdc080b89ae3
         
     | 
| 3 | 
         
            +
            size 334641012
         
     | 
    	
        vae_dualref/config.json
    ADDED
    
    | 
         @@ -0,0 +1,25 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_class_name": "AutoencoderKL_Dualref",
         
     | 
| 3 | 
         
            +
              "_diffusers_version": "0.11.1",
         
     | 
| 4 | 
         
            +
              "ddconfig": {
         
     | 
| 5 | 
         
            +
                "attn_resolutions": [],
         
     | 
| 6 | 
         
            +
                "ch": 128,
         
     | 
| 7 | 
         
            +
                "ch_mult": [
         
     | 
| 8 | 
         
            +
                  1,
         
     | 
| 9 | 
         
            +
                  2,
         
     | 
| 10 | 
         
            +
                  4,
         
     | 
| 11 | 
         
            +
                  4
         
     | 
| 12 | 
         
            +
                ],
         
     | 
| 13 | 
         
            +
                "double_z": true,
         
     | 
| 14 | 
         
            +
                "dropout": 0.0,
         
     | 
| 15 | 
         
            +
                "in_channels": 3,
         
     | 
| 16 | 
         
            +
                "num_res_blocks": 2,
         
     | 
| 17 | 
         
            +
                "out_ch": 3,
         
     | 
| 18 | 
         
            +
                "resolution": 256,
         
     | 
| 19 | 
         
            +
                "z_channels": 4
         
     | 
| 20 | 
         
            +
              },
         
     | 
| 21 | 
         
            +
              "embed_dim": 4,
         
     | 
| 22 | 
         
            +
              "image_key": "image",
         
     | 
| 23 | 
         
            +
              "input_dim": 4,
         
     | 
| 24 | 
         
            +
              "use_checkpoint": false
         
     | 
| 25 | 
         
            +
            }
         
     | 
    	
        vae_dualref/diffusion_pytorch_model.safetensors
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:ec01046000da70640a5f146bc18e9438181ec3ee9cd27c71ab2da3ca9ea8bdde
         
     | 
| 3 | 
         
            +
            size 399810404
         
     |