davidrd123 commited on
Commit
808eea7
·
verified ·
1 Parent(s): 6c7f90e

Upload 24 files

Browse files
step1000/adapter_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "to_v",
29
+ "to_q",
30
+ "add_k_proj",
31
+ "add_q_proj",
32
+ "proj",
33
+ "img_mod.1",
34
+ "to_out.0",
35
+ "to_k",
36
+ "txt_mod.1",
37
+ "net.2",
38
+ "add_v_proj",
39
+ "to_add_out"
40
+ ],
41
+ "target_parameters": null,
42
+ "task_type": null,
43
+ "trainable_token_indices": null,
44
+ "use_dora": false,
45
+ "use_qalora": false,
46
+ "use_rslora": false
47
+ }
step1000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:643f865fa3f8139987824553e405e780d376b7470a768ea3e12a9b1cda47f1d8
3
+ size 590058432
step1000/qwen.toml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Change these paths
2
+ output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
3
+ dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
4
+
5
+ # training settings
6
+ epochs = 1000
7
+ micro_batch_size_per_gpu = 4
8
+ pipeline_stages = 1
9
+ gradient_accumulation_steps = 1
10
+ gradient_clipping = 1
11
+ #warmup_steps = 100
12
+
13
+ # eval settings
14
+ eval_every_n_epochs = 1
15
+ #eval_every_n_steps = 100
16
+ eval_before_first_step = true
17
+ eval_micro_batch_size_per_gpu = 1
18
+ eval_gradient_accumulation_steps = 1
19
+
20
+ # misc settings
21
+ save_every_n_epochs = 500
22
+ save_every_n_steps = 250
23
+ #checkpoint_every_n_epochs = 1
24
+ checkpoint_every_n_minutes = 30
25
+ activation_checkpointing = 'unsloth'
26
+ partition_method = 'parameters'
27
+ save_dtype = 'bfloat16'
28
+ caching_batch_size = 8
29
+ steps_per_print = 1
30
+
31
+
32
+ [model]
33
+ type = 'qwen_image'
34
+ # Change this path
35
+ diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
36
+ dtype = 'bfloat16'
37
+ # transformer_dtype = 'bfloat16' # Using full BF16 for best quality on B200
38
+ timestep_sample_method = 'logit_normal'
39
+
40
+ [adapter]
41
+ type = 'lora'
42
+ rank = 32
43
+ dtype = 'bfloat16'
44
+
45
+ # Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
46
+ # with a properly tuned learning rate.
47
+ [optimizer]
48
+ type = 'automagic'
49
+ weight_decay = 0.01
50
+
51
+ # [optimizer]
52
+ # type = 'AdamW8bitKahan'
53
+ # lr = 2e-5
54
+ # betas = [0.9, 0.99]
55
+ # weight_decay = 0.01
step2000/adapter_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "to_v",
29
+ "to_q",
30
+ "add_k_proj",
31
+ "add_q_proj",
32
+ "proj",
33
+ "img_mod.1",
34
+ "to_out.0",
35
+ "to_k",
36
+ "txt_mod.1",
37
+ "net.2",
38
+ "add_v_proj",
39
+ "to_add_out"
40
+ ],
41
+ "target_parameters": null,
42
+ "task_type": null,
43
+ "trainable_token_indices": null,
44
+ "use_dora": false,
45
+ "use_qalora": false,
46
+ "use_rslora": false
47
+ }
step2000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b78e4acc5b9077771a2fbcca9c27949b7a8c6f48d42cc6d3da2b14e0278b2ea
3
+ size 590058432
step2000/qwen.toml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Change these paths
2
+ output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
3
+ dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
4
+
5
+ # training settings
6
+ epochs = 1000
7
+ micro_batch_size_per_gpu = 4
8
+ pipeline_stages = 1
9
+ gradient_accumulation_steps = 1
10
+ gradient_clipping = 1
11
+ #warmup_steps = 100
12
+
13
+ # eval settings
14
+ eval_every_n_epochs = 1
15
+ #eval_every_n_steps = 100
16
+ eval_before_first_step = true
17
+ eval_micro_batch_size_per_gpu = 1
18
+ eval_gradient_accumulation_steps = 1
19
+
20
+ # misc settings
21
+ save_every_n_epochs = 500
22
+ save_every_n_steps = 250
23
+ #checkpoint_every_n_epochs = 1
24
+ checkpoint_every_n_minutes = 30
25
+ activation_checkpointing = 'unsloth'
26
+ partition_method = 'parameters'
27
+ save_dtype = 'bfloat16'
28
+ caching_batch_size = 8
29
+ steps_per_print = 1
30
+
31
+
32
+ [model]
33
+ type = 'qwen_image'
34
+ # Change this path
35
+ diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
36
+ dtype = 'bfloat16'
37
+ # transformer_dtype = 'bfloat16' # Using full BF16 for best quality on B200
38
+ timestep_sample_method = 'logit_normal'
39
+
40
+ [adapter]
41
+ type = 'lora'
42
+ rank = 32
43
+ dtype = 'bfloat16'
44
+
45
+ # Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
46
+ # with a properly tuned learning rate.
47
+ [optimizer]
48
+ type = 'automagic'
49
+ weight_decay = 0.01
50
+
51
+ # [optimizer]
52
+ # type = 'AdamW8bitKahan'
53
+ # lr = 2e-5
54
+ # betas = [0.9, 0.99]
55
+ # weight_decay = 0.01
step3000/adapter_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "net.2",
29
+ "txt_mod.1",
30
+ "to_q",
31
+ "to_add_out",
32
+ "proj",
33
+ "to_v",
34
+ "add_q_proj",
35
+ "to_out.0",
36
+ "add_v_proj",
37
+ "img_mod.1",
38
+ "to_k",
39
+ "add_k_proj"
40
+ ],
41
+ "target_parameters": null,
42
+ "task_type": null,
43
+ "trainable_token_indices": null,
44
+ "use_dora": false,
45
+ "use_qalora": false,
46
+ "use_rslora": false
47
+ }
step3000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a269e4ad28fe6220069b29323630c49d27aca238df7ed919ef628440281f90
3
+ size 590058432
step3000/qwen.toml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Change these paths
2
+ output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
3
+ dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
4
+
5
+ # training settings
6
+ epochs = 1000
7
+ micro_batch_size_per_gpu = 4
8
+ pipeline_stages = 1
9
+ gradient_accumulation_steps = 1
10
+ gradient_clipping = 1
11
+ #warmup_steps = 100
12
+
13
+ # eval settings
14
+ eval_every_n_epochs = 1
15
+ #eval_every_n_steps = 100
16
+ eval_before_first_step = true
17
+ eval_micro_batch_size_per_gpu = 1
18
+ eval_gradient_accumulation_steps = 1
19
+
20
+ # misc settings
21
+ save_every_n_epochs = 500
22
+ save_every_n_steps = 250
23
+ #checkpoint_every_n_epochs = 1
24
+ checkpoint_every_n_minutes = 30
25
+ activation_checkpointing = 'unsloth'
26
+ partition_method = 'parameters'
27
+ save_dtype = 'bfloat16'
28
+ caching_batch_size = 8
29
+ steps_per_print = 1
30
+
31
+
32
+ [model]
33
+ type = 'qwen_image'
34
+ # Change this path
35
+ diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
36
+ dtype = 'bfloat16'
37
+ # transformer_dtype = 'bfloat16' # Using full BF16 for best quality on B200
38
+ timestep_sample_method = 'logit_normal'
39
+
40
+ [adapter]
41
+ type = 'lora'
42
+ rank = 32
43
+ dtype = 'bfloat16'
44
+
45
+ # Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
46
+ # with a properly tuned learning rate.
47
+ [optimizer]
48
+ type = 'automagic'
49
+ weight_decay = 0.01
50
+
51
+ # [optimizer]
52
+ # type = 'AdamW8bitKahan'
53
+ # lr = 2e-5
54
+ # betas = [0.9, 0.99]
55
+ # weight_decay = 0.01
step4000/adapter_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "net.2",
29
+ "txt_mod.1",
30
+ "to_q",
31
+ "to_add_out",
32
+ "proj",
33
+ "to_v",
34
+ "add_q_proj",
35
+ "to_out.0",
36
+ "add_v_proj",
37
+ "img_mod.1",
38
+ "to_k",
39
+ "add_k_proj"
40
+ ],
41
+ "target_parameters": null,
42
+ "task_type": null,
43
+ "trainable_token_indices": null,
44
+ "use_dora": false,
45
+ "use_qalora": false,
46
+ "use_rslora": false
47
+ }
step4000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c7333df0384ecda13289d43dec5158e362c9c6bd098d60b07941d512ae60825
3
+ size 590058432
step4000/qwen.toml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Change these paths
2
+ output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
3
+ dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
4
+
5
+ # training settings
6
+ epochs = 1000
7
+ micro_batch_size_per_gpu = 4
8
+ pipeline_stages = 1
9
+ gradient_accumulation_steps = 1
10
+ gradient_clipping = 1
11
+ #warmup_steps = 100
12
+
13
+ # eval settings
14
+ eval_every_n_epochs = 1
15
+ #eval_every_n_steps = 100
16
+ eval_before_first_step = true
17
+ eval_micro_batch_size_per_gpu = 1
18
+ eval_gradient_accumulation_steps = 1
19
+
20
+ # misc settings
21
+ save_every_n_epochs = 500
22
+ save_every_n_steps = 250
23
+ #checkpoint_every_n_epochs = 1
24
+ checkpoint_every_n_minutes = 30
25
+ activation_checkpointing = 'unsloth'
26
+ partition_method = 'parameters'
27
+ save_dtype = 'bfloat16'
28
+ caching_batch_size = 8
29
+ steps_per_print = 1
30
+
31
+
32
+ [model]
33
+ type = 'qwen_image'
34
+ # Change this path
35
+ diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
36
+ dtype = 'bfloat16'
37
+ # transformer_dtype = 'bfloat16' # Using full BF16 for best quality on B200
38
+ timestep_sample_method = 'logit_normal'
39
+
40
+ [adapter]
41
+ type = 'lora'
42
+ rank = 32
43
+ dtype = 'bfloat16'
44
+
45
+ # Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
46
+ # with a properly tuned learning rate.
47
+ [optimizer]
48
+ type = 'automagic'
49
+ weight_decay = 0.01
50
+
51
+ # [optimizer]
52
+ # type = 'AdamW8bitKahan'
53
+ # lr = 2e-5
54
+ # betas = [0.9, 0.99]
55
+ # weight_decay = 0.01
step5000/adapter_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "net.2",
29
+ "txt_mod.1",
30
+ "to_q",
31
+ "to_add_out",
32
+ "proj",
33
+ "to_v",
34
+ "add_q_proj",
35
+ "to_out.0",
36
+ "add_v_proj",
37
+ "img_mod.1",
38
+ "to_k",
39
+ "add_k_proj"
40
+ ],
41
+ "target_parameters": null,
42
+ "task_type": null,
43
+ "trainable_token_indices": null,
44
+ "use_dora": false,
45
+ "use_qalora": false,
46
+ "use_rslora": false
47
+ }
step5000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4761aae489e9911b01b7b037d3c4a4d35e74c94fdcacb617ec623f7ff07cb22c
3
+ size 590058432
step5000/qwen.toml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Change these paths
2
+ output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
3
+ dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
4
+
5
+ # training settings
6
+ epochs = 1000
7
+ micro_batch_size_per_gpu = 4
8
+ pipeline_stages = 1
9
+ gradient_accumulation_steps = 1
10
+ gradient_clipping = 1
11
+ #warmup_steps = 100
12
+
13
+ # eval settings
14
+ eval_every_n_epochs = 1
15
+ #eval_every_n_steps = 100
16
+ eval_before_first_step = true
17
+ eval_micro_batch_size_per_gpu = 1
18
+ eval_gradient_accumulation_steps = 1
19
+
20
+ # misc settings
21
+ save_every_n_epochs = 500
22
+ save_every_n_steps = 250
23
+ #checkpoint_every_n_epochs = 1
24
+ checkpoint_every_n_minutes = 30
25
+ activation_checkpointing = 'unsloth'
26
+ partition_method = 'parameters'
27
+ save_dtype = 'bfloat16'
28
+ caching_batch_size = 8
29
+ steps_per_print = 1
30
+
31
+
32
+ [model]
33
+ type = 'qwen_image'
34
+ # Change this path
35
+ diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
36
+ dtype = 'bfloat16'
37
+ # transformer_dtype = 'bfloat16' # Using full BF16 for best quality on B200
38
+ timestep_sample_method = 'logit_normal'
39
+
40
+ [adapter]
41
+ type = 'lora'
42
+ rank = 32
43
+ dtype = 'bfloat16'
44
+
45
+ # Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
46
+ # with a properly tuned learning rate.
47
+ [optimizer]
48
+ type = 'automagic'
49
+ weight_decay = 0.01
50
+
51
+ # [optimizer]
52
+ # type = 'AdamW8bitKahan'
53
+ # lr = 2e-5
54
+ # betas = [0.9, 0.99]
55
+ # weight_decay = 0.01
step6000/adapter_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "net.2",
29
+ "txt_mod.1",
30
+ "to_q",
31
+ "to_add_out",
32
+ "proj",
33
+ "to_v",
34
+ "add_q_proj",
35
+ "to_out.0",
36
+ "add_v_proj",
37
+ "img_mod.1",
38
+ "to_k",
39
+ "add_k_proj"
40
+ ],
41
+ "target_parameters": null,
42
+ "task_type": null,
43
+ "trainable_token_indices": null,
44
+ "use_dora": false,
45
+ "use_qalora": false,
46
+ "use_rslora": false
47
+ }
step6000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5df6f916a4494362ced8c6c754fbc73f55112b706705f7a66f779d91dc053aa9
3
+ size 590058432
step6000/qwen.toml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Change these paths
2
+ output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
3
+ dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
4
+
5
+ # training settings
6
+ epochs = 1000
7
+ micro_batch_size_per_gpu = 4
8
+ pipeline_stages = 1
9
+ gradient_accumulation_steps = 1
10
+ gradient_clipping = 1
11
+ #warmup_steps = 100
12
+
13
+ # eval settings
14
+ eval_every_n_epochs = 1
15
+ #eval_every_n_steps = 100
16
+ eval_before_first_step = true
17
+ eval_micro_batch_size_per_gpu = 1
18
+ eval_gradient_accumulation_steps = 1
19
+
20
+ # misc settings
21
+ save_every_n_epochs = 500
22
+ save_every_n_steps = 250
23
+ #checkpoint_every_n_epochs = 1
24
+ checkpoint_every_n_minutes = 30
25
+ activation_checkpointing = 'unsloth'
26
+ partition_method = 'parameters'
27
+ save_dtype = 'bfloat16'
28
+ caching_batch_size = 8
29
+ steps_per_print = 1
30
+
31
+
32
+ [model]
33
+ type = 'qwen_image'
34
+ # Change this path
35
+ diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
36
+ dtype = 'bfloat16'
37
+ # transformer_dtype = 'bfloat16' # Using full BF16 for best quality on B200
38
+ timestep_sample_method = 'logit_normal'
39
+
40
+ [adapter]
41
+ type = 'lora'
42
+ rank = 32
43
+ dtype = 'bfloat16'
44
+
45
+ # Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
46
+ # with a properly tuned learning rate.
47
+ [optimizer]
48
+ type = 'automagic'
49
+ weight_decay = 0.01
50
+
51
+ # [optimizer]
52
+ # type = 'AdamW8bitKahan'
53
+ # lr = 2e-5
54
+ # betas = [0.9, 0.99]
55
+ # weight_decay = 0.01
step7000/adapter_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "net.2",
29
+ "txt_mod.1",
30
+ "to_q",
31
+ "to_add_out",
32
+ "proj",
33
+ "to_v",
34
+ "add_q_proj",
35
+ "to_out.0",
36
+ "add_v_proj",
37
+ "img_mod.1",
38
+ "to_k",
39
+ "add_k_proj"
40
+ ],
41
+ "target_parameters": null,
42
+ "task_type": null,
43
+ "trainable_token_indices": null,
44
+ "use_dora": false,
45
+ "use_qalora": false,
46
+ "use_rslora": false
47
+ }
step7000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f093c308672d95cc16a95276fe24bf2750847c1bb0d9ddb186da0504589c629f
3
+ size 590058432
step7000/qwen.toml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Change these paths
2
+ output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
3
+ dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
4
+
5
+ # training settings
6
+ epochs = 1000
7
+ micro_batch_size_per_gpu = 4
8
+ pipeline_stages = 1
9
+ gradient_accumulation_steps = 1
10
+ gradient_clipping = 1
11
+ #warmup_steps = 100
12
+
13
+ # eval settings
14
+ eval_every_n_epochs = 1
15
+ #eval_every_n_steps = 100
16
+ eval_before_first_step = true
17
+ eval_micro_batch_size_per_gpu = 1
18
+ eval_gradient_accumulation_steps = 1
19
+
20
+ # misc settings
21
+ save_every_n_epochs = 500
22
+ save_every_n_steps = 250
23
+ #checkpoint_every_n_epochs = 1
24
+ checkpoint_every_n_minutes = 30
25
+ activation_checkpointing = 'unsloth'
26
+ partition_method = 'parameters'
27
+ save_dtype = 'bfloat16'
28
+ caching_batch_size = 8
29
+ steps_per_print = 1
30
+
31
+
32
+ [model]
33
+ type = 'qwen_image'
34
+ # Change this path
35
+ diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
36
+ dtype = 'bfloat16'
37
+ # transformer_dtype = 'bfloat16' # Using full BF16 for best quality on B200
38
+ timestep_sample_method = 'logit_normal'
39
+
40
+ [adapter]
41
+ type = 'lora'
42
+ rank = 32
43
+ dtype = 'bfloat16'
44
+
45
+ # Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
46
+ # with a properly tuned learning rate.
47
+ [optimizer]
48
+ type = 'automagic'
49
+ weight_decay = 0.01
50
+
51
+ # [optimizer]
52
+ # type = 'AdamW8bitKahan'
53
+ # lr = 2e-5
54
+ # betas = [0.9, 0.99]
55
+ # weight_decay = 0.01
step8000/adapter_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "net.2",
29
+ "txt_mod.1",
30
+ "to_q",
31
+ "to_add_out",
32
+ "proj",
33
+ "to_v",
34
+ "add_q_proj",
35
+ "to_out.0",
36
+ "add_v_proj",
37
+ "img_mod.1",
38
+ "to_k",
39
+ "add_k_proj"
40
+ ],
41
+ "target_parameters": null,
42
+ "task_type": null,
43
+ "trainable_token_indices": null,
44
+ "use_dora": false,
45
+ "use_qalora": false,
46
+ "use_rslora": false
47
+ }
step8000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b42ffadf135da58da90612b8eeac7c0b4284998b30be3a85fe0f59a074a33b
3
+ size 590058432
step8000/qwen.toml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Change these paths
2
+ output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
3
+ dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
4
+
5
+ # training settings
6
+ epochs = 1000
7
+ micro_batch_size_per_gpu = 4
8
+ pipeline_stages = 1
9
+ gradient_accumulation_steps = 1
10
+ gradient_clipping = 1
11
+ #warmup_steps = 100
12
+
13
+ # eval settings
14
+ eval_every_n_epochs = 1
15
+ #eval_every_n_steps = 100
16
+ eval_before_first_step = true
17
+ eval_micro_batch_size_per_gpu = 1
18
+ eval_gradient_accumulation_steps = 1
19
+
20
+ # misc settings
21
+ save_every_n_epochs = 500
22
+ save_every_n_steps = 250
23
+ #checkpoint_every_n_epochs = 1
24
+ checkpoint_every_n_minutes = 30
25
+ activation_checkpointing = 'unsloth'
26
+ partition_method = 'parameters'
27
+ save_dtype = 'bfloat16'
28
+ caching_batch_size = 8
29
+ steps_per_print = 1
30
+
31
+
32
+ [model]
33
+ type = 'qwen_image'
34
+ # Change this path
35
+ diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
36
+ dtype = 'bfloat16'
37
+ # transformer_dtype = 'bfloat16' # Using full BF16 for best quality on B200
38
+ timestep_sample_method = 'logit_normal'
39
+
40
+ [adapter]
41
+ type = 'lora'
42
+ rank = 32
43
+ dtype = 'bfloat16'
44
+
45
+ # Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
46
+ # with a properly tuned learning rate.
47
+ [optimizer]
48
+ type = 'automagic'
49
+ weight_decay = 0.01
50
+
51
+ # [optimizer]
52
+ # type = 'AdamW8bitKahan'
53
+ # lr = 2e-5
54
+ # betas = [0.9, 0.99]
55
+ # weight_decay = 0.01