clone3 commited on May 12

Commit

f68e035

verified ·

1 Parent(s): e7d006e

Model Init

Browse files

Files changed (37) hide show

.gitattributes +37 -35
README.md +73 -0
assets/leffa.png +3 -0
assets/teaser.png +3 -0
assets/vis_result.png +3 -0
densepose/Base-DensePose-RCNN-FPN.yaml +48 -0
densepose/densepose_rcnn_R_50_FPN_s1x.yaml +8 -0
densepose/model_final_162be9.pkl +3 -0
examples/garment/01449_00.jpg +3 -0
examples/garment/01486_00.jpg +3 -0
examples/garment/01853_00.jpg +3 -0
examples/garment/02070_00.jpg +3 -0
examples/garment/03553_00.jpg +3 -0
examples/person1/01350_00.jpg +3 -0
examples/person1/01376_00.jpg +3 -0
examples/person1/01416_00.jpg +3 -0
examples/person1/05976_00.jpg +3 -0
examples/person1/06094_00.jpg +3 -0
examples/person2/01850_00.jpg +3 -0
examples/person2/01875_00.jpg +3 -0
examples/person2/02532_00.jpg +3 -0
examples/person2/02902_00.jpg +3 -0
examples/person2/05346_00.jpg +3 -0
humanparsing/parsing_atr.onnx +3 -0
humanparsing/parsing_lip.onnx +3 -0
openpose/body_pose_model.pth +3 -0
pose_transfer.pth +3 -0
schp/exp-schp-201908261155-lip.pth +3 -0
schp/exp-schp-201908301523-atr.pth +3 -0
stable-diffusion-inpainting/scheduler/scheduler_config.json +13 -0
stable-diffusion-inpainting/unet/config.json +36 -0
stable-diffusion-inpainting/vae/config.json +29 -0
stable-diffusion-xl-1.0-inpainting-0.1/scheduler/scheduler_config.json +18 -0
stable-diffusion-xl-1.0-inpainting-0.1/unet/config.json +78 -0
stable-diffusion-xl-1.0-inpainting-0.1/vae/config.json +32 -0
virtual_tryon.pth +3 -0
virtual_tryon_dc.pth +3 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,37 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,76 @@
 ---
 license: mit
 ---

 ---
 license: mit
+pipeline_tag: image-to-image
 ---
+# *Leffa*: Learning Flow Fields in Attention for Controllable Person Image Generation
+[📚 Paper](https://arxiv.org/abs/2412.08486) - [🤖 Code](https://github.com/franciszzj/Leffa) - [🔥 Demo](https://huggingface.co/spaces/franciszzj/Leffa) - [🤗 Model](https://huggingface.co/franciszzj/Leffa)
+Star ⭐ us if you like it!
+## News
+- 09/Jan/2025. Inference defaults to float16, generating an image in 6 seconds (on A100).
+- 02/Jan/2025. Update the mask generator to improve results. Add ref unet acceleration, boosting prediction speed by 30%. Include more controls in Advanced Options to enhance user experience. Enable intermediate result output for easier development. Enjoy using it!
+- 18/Dec/2024. Thanks to @[StartHua](https://github.com/StartHua) for integrating Leffa into ComfyUI! Here is the [repo](https://github.com/StartHua/Comfyui_leffa)!
+- 16/Dec/2024. The virtual try-on [model](https://huggingface.co/franciszzj/Leffa/blob/main/virtual_tryon_dc.pth) trained on DressCode is released.
+- 12/Dec/2024. The HuggingFace [demo](https://huggingface.co/spaces/franciszzj/Leffa) and [models](https://huggingface.co/franciszzj/Leffa) (virtual try-on model trained on VITON-HD and pose transfer model trained on DeepFashion) are released.
+- 11/Dec/2024. The [arXiv](https://arxiv.org/abs/2412.08486) version of the paper is released.
+*[Leffa](https://en.wiktionary.org/wiki/leffa)* is a unified framework for controllable person image generation that enables precise manipulation of both appearance (i.e., virtual try-on) and pose (i.e., pose transfer).
+<div align="center">
+  <img src="https://huggingface.co/franciszzj/Leffa/resolve/main/assets/teaser.png" width="100%" height="100%"/>
+</div>
+## Abstract
+Controllable person image generation aims to generate a person image conditioned on reference images, allowing precise control over the person’s appearance or pose. However, prior methods often distort fine-grained textural details from the reference image, despite achieving high overall image quality. We attribute these distortions to inadequate attention to corresponding regions in the reference image. To address this, we thereby propose **le**arning **f**low **f**ields in **a**ttention (***Leffa***), which explicitly guides the target query to attend to the correct reference key in the attention layer during training. Specifically, it is realized via a regularization loss on top of the attention map within a diffusion-based baseline. Our extensive experiments show that *Leffa* achieves state-of-the-art performance in controlling appearance (virtual try-on) and pose (pose transfer), significantly reducing fine-grained detail distortion while maintaining high image quality. Additionally, we show that our loss is model-agnostic and can be used to improve the performance of other diffusion models.
+## Method
+An overview of our *Leffa* training pipeline for controllable person image generation. The left is our diffusion-based baseline; the right is our *Leffa* loss. Note that Isrc and Itgt are the same image during training.
+<div align="center">
+  <img src="https://huggingface.co/franciszzj/Leffa/resolve/main/assets/leffa.png" width="100%" height="100%"/>
+</div>
+## Visualization
+Qualitative visual results comparison with other methods. The input person image for the pose transfer is generated using our method in the virtual try-on. The visualization results demonstrate that our method not only generates high-quality images but also greatly reduces the distortion of fine-grained details.
+<div align="center">
+  <img src="https://huggingface.co/franciszzj/Leffa/resolve/main/assets/vis_result.png" width="100%" height="100%"/>
+</div>
+## Installation
+Create a conda environment and install requirements:
+```shell
+conda create -n leffa python==3.10
+conda activate leffa
+cd Leffa
+pip install -r requirements.txt
+```
+## Gradio App
+Run locally:
+```shell
+python app.py
+```
+## Evaluation
+We use this [code](https://github.com/franciszzj/VtonEval) for metric evaluation.
+## Acknowledgement
+Our code is based on [Diffusers](https://github.com/huggingface/diffusers) and [Transformers](https://github.com/huggingface/transformers).
+We use [SCHP](https://github.com/GoGoDuck912/Self-Correction-Human-Parsing/tree/master) and [DensePose](https://github.com/facebookresearch/DensePose) to generate masks and densepose in our [Demo](https://huggingface.co/spaces/franciszzj/Leffa).
+We also referred to the code of [IDM-VTON](https://github.com/yisol/IDM-VTON) and [CatVTON](https://github.com/Zheng-Chong/CatVTON).
+## Citation
+If you find our work helpful or inspiring, please feel free to cite it.
+```
+@article{zhou2024learning,
+  title={Learning Flow Fields in Attention for Controllable Person Image Generation},
+  author={Zhou, Zijian and Liu, Shikun and Han, Xiao and Liu, Haozhe and Ng, Kam Woh and Xie, Tian and Cong, Yuren and Li, Hang and Xu, Mengmeng and Pérez-Rúa, Juan-Manuel and Patel, Aditya and Xiang, Tao and Shi, Miaojing and He, Sen},
+  journal={arXiv preprint arXiv:2412.08486},
+  year={2024},
+}
+```

assets/leffa.png ADDED Viewed

Git LFS Details

SHA256: d972a56cb9e26e8d8da7a0d7ad432823deea3d054b2ee86cb5924c504a6c3c8d
Pointer size: 131 Bytes
Size of remote file: 996 kB

assets/teaser.png ADDED Viewed

Git LFS Details

SHA256: bdbe1adca64c2e3c724e49b0716d1184748d90e8fc59efe6c0522c3b6f9e0061
Pointer size: 132 Bytes
Size of remote file: 3.01 MB

assets/vis_result.png ADDED Viewed

Git LFS Details

SHA256: 9487c022ac15b2657d590988d4f9b64733e601815f5c78e34c67b2fb9492358e
Pointer size: 132 Bytes
Size of remote file: 4.77 MB

densepose/Base-DensePose-RCNN-FPN.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+VERSION: 2
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+DATASETS:
+  TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
+  TEST: ("densepose_coco_2014_minival",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.01
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+  WARMUP_FACTOR: 0.1
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)

densepose/densepose_rcnn_R_50_FPN_s1x.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)

densepose/model_final_162be9.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8a7382001b16e453bad95ca9dbc68ae8f2b839b304cf90eaf5c27fbdb4dae91
+size 255757821

examples/garment/01449_00.jpg ADDED Viewed

Git LFS Details

SHA256: 5fdbe45ddd029dece0e1ff5647f2668cee2041709c00ee35287be3875cb481f9
Pointer size: 130 Bytes
Size of remote file: 91.3 kB

examples/garment/01486_00.jpg ADDED Viewed

Git LFS Details

SHA256: 71a19af30f25afd1741058f7a9407138550a8daf1d80735a5e5708c67d27539a
Pointer size: 130 Bytes
Size of remote file: 60.2 kB

examples/garment/01853_00.jpg ADDED Viewed

Git LFS Details

SHA256: 6eb95f38f7218470672c58f04f517e8307067263163907dc704dc50a0a448818
Pointer size: 130 Bytes
Size of remote file: 66.8 kB

examples/garment/02070_00.jpg ADDED Viewed

Git LFS Details

SHA256: 69ed1fab8693941dfb2a71649e76590e81a964b5fe1b295e5ec874631f6e4460
Pointer size: 130 Bytes
Size of remote file: 64.8 kB

examples/garment/03553_00.jpg ADDED Viewed

Git LFS Details

SHA256: 68e6e8357138574fa65ed8c19602101cafbef04d773069de344ec47999481c61
Pointer size: 131 Bytes
Size of remote file: 140 kB

examples/person1/01350_00.jpg ADDED Viewed

Git LFS Details

SHA256: 49dae5eeb91487a77538a839f841eba657a064901674051c655d3074a7d0b78d
Pointer size: 130 Bytes
Size of remote file: 99.8 kB

examples/person1/01376_00.jpg ADDED Viewed

Git LFS Details

SHA256: c8bb7065f83f6ebb4c51acbfee18ad291473676774063a779ad0db8c12d875de
Pointer size: 131 Bytes
Size of remote file: 111 kB

examples/person1/01416_00.jpg ADDED Viewed

Git LFS Details

SHA256: 6b7e83ece90a58255fd6b758929d5e6cce5e9fe52cee9f2639566ec0e6dfc7bf
Pointer size: 131 Bytes
Size of remote file: 130 kB

examples/person1/05976_00.jpg ADDED Viewed

Git LFS Details

SHA256: f8f89e8888cdac8c870e69237d3c4444cee05654b1ec61c508bafd2350bb6b64
Pointer size: 131 Bytes
Size of remote file: 142 kB

examples/person1/06094_00.jpg ADDED Viewed

Git LFS Details

SHA256: 1f651f17985c4cda7b3ae13db0de9ca42858ae1a2341a3ba2e6ed6fc77ae56af
Pointer size: 131 Bytes
Size of remote file: 113 kB

examples/person2/01850_00.jpg ADDED Viewed

Git LFS Details

SHA256: 21a1b4dd00e1484833f00f712f3b98ca9f21dea50f1eba0fcdb47bd83ff47da0
Pointer size: 131 Bytes
Size of remote file: 135 kB

examples/person2/01875_00.jpg ADDED Viewed

Git LFS Details

SHA256: 41b546ff50cc815f36d17e61fd5187b62759886b7df72e31df6ac75bb34565e3
Pointer size: 130 Bytes
Size of remote file: 83.7 kB

examples/person2/02532_00.jpg ADDED Viewed

Git LFS Details

SHA256: 8cdc3d62b11bdcf8958a8fa9cf5b19eece71c259b5d50f83059e69b0242a735e
Pointer size: 131 Bytes
Size of remote file: 137 kB

examples/person2/02902_00.jpg ADDED Viewed

Git LFS Details

SHA256: 804f40e17336e1cbdfc94c3955a2d00f0610cc763003e0dec428ab1f31317bfc
Pointer size: 131 Bytes
Size of remote file: 113 kB

examples/person2/05346_00.jpg ADDED Viewed

Git LFS Details

SHA256: 609c5482fc7b29e14b2167b214b54602b589d6576f8dc00ee8f3d625887b948f
Pointer size: 131 Bytes
Size of remote file: 131 kB

humanparsing/parsing_atr.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04c7d1d070d0e0ae943d86b18cb5aaaea9e278d97462e9cfb270cbbe4cd977f4
+size 266859305

humanparsing/parsing_lip.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8436e1dae96e2601c373d1ace29c8f0978b16357d9038c17a8ba756cca376dbc
+size 266863411

openpose/body_pose_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25a948c16078b0f08e236bda51a385d855ef4c153598947c28c0d47ed94bb746
+size 209267595

pose_transfer.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d8c4acf1e6737a846a67690ac4e109fc7cf4bc7a8186fe4f73115274182a766
+size 20876062782

schp/exp-schp-201908261155-lip.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24fa3254ceeb74c8435458994a64b522fb439a3635b7b86ff470457e0413da00
+size 267449349

schp/exp-schp-201908301523-atr.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9d7c91ce3b4e7133df56b599fc817b533e3439c5e8d282a59126d2fda339a2a
+size 267445237

stable-diffusion-inpainting/scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "_class_name": "DDIMScheduler",
+  "_diffusers_version": "0.6.0.dev0",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "num_train_timesteps": 1000,
+  "set_alpha_to_one": false,
+  "steps_offset": 1,
+  "trained_betas": null,
+  "skip_prk_steps": true
+}

stable-diffusion-inpainting/unet/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.6.0.dev0",
+  "act_fn": "silu",
+  "attention_head_dim": 8,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 768,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 9,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "out_channels": 4,
+  "sample_size": 64,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ]
+}

stable-diffusion-inpainting/vae/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.6.0.dev0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 256,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

stable-diffusion-xl-1.0-inpainting-0.1/scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "_class_name": "EulerDiscreteScheduler",
+  "_diffusers_version": "0.21.0.dev0",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "interpolation_type": "linear",
+  "num_train_timesteps": 1000,
+  "prediction_type": "epsilon",
+  "sample_max_value": 1.0,
+  "set_alpha_to_one": false,
+  "skip_prk_steps": true,
+  "steps_offset": 1,
+  "timestep_spacing": "leading",
+  "trained_betas": null,
+  "use_karras_sigmas": false
+}

stable-diffusion-xl-1.0-inpainting-0.1/unet/config.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.21.0.dev0",
+  "_name_or_path": "valhalla/sdxl-inpaint-ema",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 9,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 37000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}

stable-diffusion-xl-1.0-inpainting-0.1/vae/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.21.0.dev0",
+  "_name_or_path": "madebyollin/sdxl-vae-fp16-fix",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": false,
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 512,
+  "scaling_factor": 0.13025,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

virtual_tryon.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4d4b31dafac19ec5cc4ccbe307d4eb61b76a52f98aaef78fc02b4405c406351
+size 7211553158

virtual_tryon_dc.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c42a83129b885528457a49f9bb9bcb02b5c96e8c7d007421e29e45c73ce626bf
+size 7211553158