File size: 1,335 Bytes
759d882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# Change these paths
output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
dataset = '/root/diffusion-pipe/my_configs/dataset.toml'

# training settings
epochs = 1000
micro_batch_size_per_gpu = 4
pipeline_stages = 1
gradient_accumulation_steps = 1
gradient_clipping = 1
#warmup_steps = 100

# eval settings
eval_every_n_epochs = 1
#eval_every_n_steps = 100
eval_before_first_step = true
eval_micro_batch_size_per_gpu = 1
eval_gradient_accumulation_steps = 1

# misc settings
save_every_n_epochs = 500
save_every_n_steps = 250
#checkpoint_every_n_epochs = 1
checkpoint_every_n_minutes = 30
activation_checkpointing = 'unsloth'
partition_method = 'parameters'
save_dtype = 'bfloat16'
caching_batch_size = 8
steps_per_print = 1


[model]
type = 'qwen_image'
# Change this path
diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
dtype = 'bfloat16'
# transformer_dtype = 'bfloat16'  # Using full BF16 for best quality on B200
timestep_sample_method = 'logit_normal'

[adapter]
type = 'lora'
rank = 32
dtype = 'bfloat16'

# Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
# with a properly tuned learning rate.
[optimizer]
type = 'automagic'
weight_decay = 0.01

# [optimizer]
# type = 'AdamW8bitKahan'
# lr = 2e-5
# betas = [0.9, 0.99]
# weight_decay = 0.01