davidrd123's picture
Upload 3 files
759d882 verified
# Change these paths
output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
# training settings
epochs = 1000
micro_batch_size_per_gpu = 4
pipeline_stages = 1
gradient_accumulation_steps = 1
gradient_clipping = 1
#warmup_steps = 100
# eval settings
eval_every_n_epochs = 1
#eval_every_n_steps = 100
eval_before_first_step = true
eval_micro_batch_size_per_gpu = 1
eval_gradient_accumulation_steps = 1
# misc settings
save_every_n_epochs = 500
save_every_n_steps = 250
#checkpoint_every_n_epochs = 1
checkpoint_every_n_minutes = 30
activation_checkpointing = 'unsloth'
partition_method = 'parameters'
save_dtype = 'bfloat16'
caching_batch_size = 8
steps_per_print = 1
[model]
type = 'qwen_image'
# Change this path
diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
dtype = 'bfloat16'
# transformer_dtype = 'bfloat16' # Using full BF16 for best quality on B200
timestep_sample_method = 'logit_normal'
[adapter]
type = 'lora'
rank = 32
dtype = 'bfloat16'
# Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
# with a properly tuned learning rate.
[optimizer]
type = 'automagic'
weight_decay = 0.01
# [optimizer]
# type = 'AdamW8bitKahan'
# lr = 2e-5
# betas = [0.9, 0.99]
# weight_decay = 0.01