Lauma_wan_2_2_14B_lora / config.yaml
svjack's picture
Upload folder using huggingface_hub
517526d verified
job: extension
config:
name: Lauma_wan_2_2_14B_lora
process:
- type: ui_trainer
training_folder: /home/featurize/ai-toolkit/output
sqlite_db_path: /home/featurize/ai-toolkit/aitk_db.db
device: cuda
trigger_word: null
performance_log_every: 10
network:
type: lora
linear: 32
linear_alpha: 32
conv: 16
conv_alpha: 16
lokr_full_rank: true
lokr_factor: -1
network_kwargs:
ignore_if_contains: []
save:
dtype: bf16
save_every: 250
max_step_saves_to_keep: 4000
save_format: diffusers
push_to_hub: false
datasets:
- folder_path: /home/featurize/ai-toolkit/datasets/Lauma_images_captioned
control_path: null
mask_path: null
mask_min_value: 0.1
default_caption: ''
caption_ext: txt
caption_dropout_rate: 0.05
cache_latents_to_disk: false
is_reg: false
network_weight: 1
resolution:
- 512
controls: []
shrink_video_to_frames: true
num_frames: 1
do_i2v: true
flip_x: false
flip_y: false
- folder_path: /home/featurize/ai-toolkit/datasets/Lauma_videos_captioned
control_path: null
mask_path: null
mask_min_value: 0.1
default_caption: ''
caption_ext: txt
caption_dropout_rate: 0.05
cache_latents_to_disk: false
is_reg: false
network_weight: 1
resolution:
- 512
controls: []
shrink_video_to_frames: true
num_frames: 16
do_i2v: true
flip_x: false
flip_y: false
train:
batch_size: 1
bypass_guidance_embedding: false
steps: 30000
gradient_accumulation: 1
train_unet: true
train_text_encoder: false
gradient_checkpointing: true
noise_scheduler: flowmatch
optimizer: adamw8bit
timestep_type: linear
content_or_style: balanced
optimizer_params:
weight_decay: 0.0001
unload_text_encoder: false
cache_text_embeddings: true
lr: 0.0001
ema_config:
use_ema: false
ema_decay: 0.99
skip_first_sample: true
disable_sampling: true
dtype: bf16
diff_output_preservation: false
diff_output_preservation_multiplier: 1
diff_output_preservation_class: person
switch_boundary_every: 1
model:
name_or_path: /home/featurize/download/Wan2.2-T2V-A14B-Diffusers-bf16
quantize: true
qtype: uint4|ostris/accuracy_recovery_adapters/wan22_14b_t2i_torchao_uint4.safetensors
quantize_te: true
qtype_te: qfloat8
arch: wan22_14b:t2v
low_vram: true
model_kwargs:
train_high_noise: true
train_low_noise: true
sample:
sampler: flowmatch
sample_every: 250
width: 1024
height: 1024
samples:
- prompt: woman with red hair, playing chess at the park, bomb going off in
the background
- prompt: a woman holding a coffee cup, in a beanie, sitting at a cafe
- prompt: a horse is a DJ at a night club, fish eye lens, smoke machine, lazer
lights, holding a martini
- prompt: a man showing off his cool new t shirt at the beach, a shark is jumping
out of the water in the background
- prompt: a bear building a log cabin in the snow covered mountains
- prompt: woman playing the guitar, on stage, singing a song, laser lights,
punk rocker
- prompt: hipster man with a beard, building a chair, in a wood shop
- prompt: photo of a man, white background, medium shot, modeling clothing,
studio lighting, white backdrop
- prompt: a man holding a sign that says, 'this is a sign'
- prompt: a bulldog, in a post apocalyptic world, with a shotgun, in a leather
jacket, in a desert, with a motorcycle
neg: ''
seed: 42
walk_seed: true
guidance_scale: 4
sample_steps: 25
num_frames: 41
fps: 16
meta:
name: Lauma_wan_2_2_14B_lora
version: '1.0'