davidrd123 commited on 15 days ago

Commit

808eea7

verified ·

1 Parent(s): 6c7f90e

Upload 24 files

Browse files

Files changed (24) hide show

step1000/adapter_config.json +47 -0
step1000/adapter_model.safetensors +3 -0
step1000/qwen.toml +55 -0
step2000/adapter_config.json +47 -0
step2000/adapter_model.safetensors +3 -0
step2000/qwen.toml +55 -0
step3000/adapter_config.json +47 -0
step3000/adapter_model.safetensors +3 -0
step3000/qwen.toml +55 -0
step4000/adapter_config.json +47 -0
step4000/adapter_model.safetensors +3 -0
step4000/qwen.toml +55 -0
step5000/adapter_config.json +47 -0
step5000/adapter_model.safetensors +3 -0
step5000/qwen.toml +55 -0
step6000/adapter_config.json +47 -0
step6000/adapter_model.safetensors +3 -0
step6000/qwen.toml +55 -0
step7000/adapter_config.json +47 -0
step7000/adapter_model.safetensors +3 -0
step7000/qwen.toml +55 -0
step8000/adapter_config.json +47 -0
step8000/adapter_model.safetensors +3 -0
step8000/qwen.toml +55 -0

step1000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "to_v",
+    "to_q",
+    "add_k_proj",
+    "add_q_proj",
+    "proj",
+    "img_mod.1",
+    "to_out.0",
+    "to_k",
+    "txt_mod.1",
+    "net.2",
+    "add_v_proj",
+    "to_add_out"
+  ],
+  "target_parameters": null,
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

step1000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:643f865fa3f8139987824553e405e780d376b7470a768ea3e12a9b1cda47f1d8
+size 590058432

step1000/qwen.toml ADDED Viewed

	@@ -0,0 +1,55 @@

+# Change these paths
+output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
+dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
+# training settings
+epochs = 1000
+micro_batch_size_per_gpu = 4
+pipeline_stages = 1
+gradient_accumulation_steps = 1
+gradient_clipping = 1
+#warmup_steps = 100
+# eval settings
+eval_every_n_epochs = 1
+#eval_every_n_steps = 100
+eval_before_first_step = true
+eval_micro_batch_size_per_gpu = 1
+eval_gradient_accumulation_steps = 1
+# misc settings
+save_every_n_epochs = 500
+save_every_n_steps = 250
+#checkpoint_every_n_epochs = 1
+checkpoint_every_n_minutes = 30
+activation_checkpointing = 'unsloth'
+partition_method = 'parameters'
+save_dtype = 'bfloat16'
+caching_batch_size = 8
+steps_per_print = 1
+[model]
+type = 'qwen_image'
+# Change this path
+diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
+dtype = 'bfloat16'
+# transformer_dtype = 'bfloat16'  # Using full BF16 for best quality on B200
+timestep_sample_method = 'logit_normal'
+[adapter]
+type = 'lora'
+rank = 32
+dtype = 'bfloat16'
+# Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
+# with a properly tuned learning rate.
+[optimizer]
+type = 'automagic'
+weight_decay = 0.01
+# [optimizer]
+# type = 'AdamW8bitKahan'
+# lr = 2e-5
+# betas = [0.9, 0.99]
+# weight_decay = 0.01

step2000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "to_v",
+    "to_q",
+    "add_k_proj",
+    "add_q_proj",
+    "proj",
+    "img_mod.1",
+    "to_out.0",
+    "to_k",
+    "txt_mod.1",
+    "net.2",
+    "add_v_proj",
+    "to_add_out"
+  ],
+  "target_parameters": null,
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

step2000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b78e4acc5b9077771a2fbcca9c27949b7a8c6f48d42cc6d3da2b14e0278b2ea
+size 590058432

step2000/qwen.toml ADDED Viewed

	@@ -0,0 +1,55 @@

+# Change these paths
+output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
+dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
+# training settings
+epochs = 1000
+micro_batch_size_per_gpu = 4
+pipeline_stages = 1
+gradient_accumulation_steps = 1
+gradient_clipping = 1
+#warmup_steps = 100
+# eval settings
+eval_every_n_epochs = 1
+#eval_every_n_steps = 100
+eval_before_first_step = true
+eval_micro_batch_size_per_gpu = 1
+eval_gradient_accumulation_steps = 1
+# misc settings
+save_every_n_epochs = 500
+save_every_n_steps = 250
+#checkpoint_every_n_epochs = 1
+checkpoint_every_n_minutes = 30
+activation_checkpointing = 'unsloth'
+partition_method = 'parameters'
+save_dtype = 'bfloat16'
+caching_batch_size = 8
+steps_per_print = 1
+[model]
+type = 'qwen_image'
+# Change this path
+diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
+dtype = 'bfloat16'
+# transformer_dtype = 'bfloat16'  # Using full BF16 for best quality on B200
+timestep_sample_method = 'logit_normal'
+[adapter]
+type = 'lora'
+rank = 32
+dtype = 'bfloat16'
+# Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
+# with a properly tuned learning rate.
+[optimizer]
+type = 'automagic'
+weight_decay = 0.01
+# [optimizer]
+# type = 'AdamW8bitKahan'
+# lr = 2e-5
+# betas = [0.9, 0.99]
+# weight_decay = 0.01

step3000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "net.2",
+    "txt_mod.1",
+    "to_q",
+    "to_add_out",
+    "proj",
+    "to_v",
+    "add_q_proj",
+    "to_out.0",
+    "add_v_proj",
+    "img_mod.1",
+    "to_k",
+    "add_k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

step3000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7a269e4ad28fe6220069b29323630c49d27aca238df7ed919ef628440281f90
+size 590058432

step3000/qwen.toml ADDED Viewed

	@@ -0,0 +1,55 @@

+# Change these paths
+output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
+dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
+# training settings
+epochs = 1000
+micro_batch_size_per_gpu = 4
+pipeline_stages = 1
+gradient_accumulation_steps = 1
+gradient_clipping = 1
+#warmup_steps = 100
+# eval settings
+eval_every_n_epochs = 1
+#eval_every_n_steps = 100
+eval_before_first_step = true
+eval_micro_batch_size_per_gpu = 1
+eval_gradient_accumulation_steps = 1
+# misc settings
+save_every_n_epochs = 500
+save_every_n_steps = 250
+#checkpoint_every_n_epochs = 1
+checkpoint_every_n_minutes = 30
+activation_checkpointing = 'unsloth'
+partition_method = 'parameters'
+save_dtype = 'bfloat16'
+caching_batch_size = 8
+steps_per_print = 1
+[model]
+type = 'qwen_image'
+# Change this path
+diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
+dtype = 'bfloat16'
+# transformer_dtype = 'bfloat16'  # Using full BF16 for best quality on B200
+timestep_sample_method = 'logit_normal'
+[adapter]
+type = 'lora'
+rank = 32
+dtype = 'bfloat16'
+# Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
+# with a properly tuned learning rate.
+[optimizer]
+type = 'automagic'
+weight_decay = 0.01
+# [optimizer]
+# type = 'AdamW8bitKahan'
+# lr = 2e-5
+# betas = [0.9, 0.99]
+# weight_decay = 0.01

step4000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "net.2",
+    "txt_mod.1",
+    "to_q",
+    "to_add_out",
+    "proj",
+    "to_v",
+    "add_q_proj",
+    "to_out.0",
+    "add_v_proj",
+    "img_mod.1",
+    "to_k",
+    "add_k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

step4000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c7333df0384ecda13289d43dec5158e362c9c6bd098d60b07941d512ae60825
+size 590058432

step4000/qwen.toml ADDED Viewed

	@@ -0,0 +1,55 @@

+# Change these paths
+output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
+dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
+# training settings
+epochs = 1000
+micro_batch_size_per_gpu = 4
+pipeline_stages = 1
+gradient_accumulation_steps = 1
+gradient_clipping = 1
+#warmup_steps = 100
+# eval settings
+eval_every_n_epochs = 1
+#eval_every_n_steps = 100
+eval_before_first_step = true
+eval_micro_batch_size_per_gpu = 1
+eval_gradient_accumulation_steps = 1
+# misc settings
+save_every_n_epochs = 500
+save_every_n_steps = 250
+#checkpoint_every_n_epochs = 1
+checkpoint_every_n_minutes = 30
+activation_checkpointing = 'unsloth'
+partition_method = 'parameters'
+save_dtype = 'bfloat16'
+caching_batch_size = 8
+steps_per_print = 1
+[model]
+type = 'qwen_image'
+# Change this path
+diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
+dtype = 'bfloat16'
+# transformer_dtype = 'bfloat16'  # Using full BF16 for best quality on B200
+timestep_sample_method = 'logit_normal'
+[adapter]
+type = 'lora'
+rank = 32
+dtype = 'bfloat16'
+# Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
+# with a properly tuned learning rate.
+[optimizer]
+type = 'automagic'
+weight_decay = 0.01
+# [optimizer]
+# type = 'AdamW8bitKahan'
+# lr = 2e-5
+# betas = [0.9, 0.99]
+# weight_decay = 0.01

step5000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "net.2",
+    "txt_mod.1",
+    "to_q",
+    "to_add_out",
+    "proj",
+    "to_v",
+    "add_q_proj",
+    "to_out.0",
+    "add_v_proj",
+    "img_mod.1",
+    "to_k",
+    "add_k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

step5000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4761aae489e9911b01b7b037d3c4a4d35e74c94fdcacb617ec623f7ff07cb22c
+size 590058432

step5000/qwen.toml ADDED Viewed

	@@ -0,0 +1,55 @@

+# Change these paths
+output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
+dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
+# training settings
+epochs = 1000
+micro_batch_size_per_gpu = 4
+pipeline_stages = 1
+gradient_accumulation_steps = 1
+gradient_clipping = 1
+#warmup_steps = 100
+# eval settings
+eval_every_n_epochs = 1
+#eval_every_n_steps = 100
+eval_before_first_step = true
+eval_micro_batch_size_per_gpu = 1
+eval_gradient_accumulation_steps = 1
+# misc settings
+save_every_n_epochs = 500
+save_every_n_steps = 250
+#checkpoint_every_n_epochs = 1
+checkpoint_every_n_minutes = 30
+activation_checkpointing = 'unsloth'
+partition_method = 'parameters'
+save_dtype = 'bfloat16'
+caching_batch_size = 8
+steps_per_print = 1
+[model]
+type = 'qwen_image'
+# Change this path
+diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
+dtype = 'bfloat16'
+# transformer_dtype = 'bfloat16'  # Using full BF16 for best quality on B200
+timestep_sample_method = 'logit_normal'
+[adapter]
+type = 'lora'
+rank = 32
+dtype = 'bfloat16'
+# Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
+# with a properly tuned learning rate.
+[optimizer]
+type = 'automagic'
+weight_decay = 0.01
+# [optimizer]
+# type = 'AdamW8bitKahan'
+# lr = 2e-5
+# betas = [0.9, 0.99]
+# weight_decay = 0.01

step6000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "net.2",
+    "txt_mod.1",
+    "to_q",
+    "to_add_out",
+    "proj",
+    "to_v",
+    "add_q_proj",
+    "to_out.0",
+    "add_v_proj",
+    "img_mod.1",
+    "to_k",
+    "add_k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

step6000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5df6f916a4494362ced8c6c754fbc73f55112b706705f7a66f779d91dc053aa9
+size 590058432

step6000/qwen.toml ADDED Viewed

	@@ -0,0 +1,55 @@

+# Change these paths
+output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
+dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
+# training settings
+epochs = 1000
+micro_batch_size_per_gpu = 4
+pipeline_stages = 1
+gradient_accumulation_steps = 1
+gradient_clipping = 1
+#warmup_steps = 100
+# eval settings
+eval_every_n_epochs = 1
+#eval_every_n_steps = 100
+eval_before_first_step = true
+eval_micro_batch_size_per_gpu = 1
+eval_gradient_accumulation_steps = 1
+# misc settings
+save_every_n_epochs = 500
+save_every_n_steps = 250
+#checkpoint_every_n_epochs = 1
+checkpoint_every_n_minutes = 30
+activation_checkpointing = 'unsloth'
+partition_method = 'parameters'
+save_dtype = 'bfloat16'
+caching_batch_size = 8
+steps_per_print = 1
+[model]
+type = 'qwen_image'
+# Change this path
+diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
+dtype = 'bfloat16'
+# transformer_dtype = 'bfloat16'  # Using full BF16 for best quality on B200
+timestep_sample_method = 'logit_normal'
+[adapter]
+type = 'lora'
+rank = 32
+dtype = 'bfloat16'
+# Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
+# with a properly tuned learning rate.
+[optimizer]
+type = 'automagic'
+weight_decay = 0.01
+# [optimizer]
+# type = 'AdamW8bitKahan'
+# lr = 2e-5
+# betas = [0.9, 0.99]
+# weight_decay = 0.01

step7000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "net.2",
+    "txt_mod.1",
+    "to_q",
+    "to_add_out",
+    "proj",
+    "to_v",
+    "add_q_proj",
+    "to_out.0",
+    "add_v_proj",
+    "img_mod.1",
+    "to_k",
+    "add_k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

step7000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f093c308672d95cc16a95276fe24bf2750847c1bb0d9ddb186da0504589c629f
+size 590058432

step7000/qwen.toml ADDED Viewed

	@@ -0,0 +1,55 @@

+# Change these paths
+output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
+dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
+# training settings
+epochs = 1000
+micro_batch_size_per_gpu = 4
+pipeline_stages = 1
+gradient_accumulation_steps = 1
+gradient_clipping = 1
+#warmup_steps = 100
+# eval settings
+eval_every_n_epochs = 1
+#eval_every_n_steps = 100
+eval_before_first_step = true
+eval_micro_batch_size_per_gpu = 1
+eval_gradient_accumulation_steps = 1
+# misc settings
+save_every_n_epochs = 500
+save_every_n_steps = 250
+#checkpoint_every_n_epochs = 1
+checkpoint_every_n_minutes = 30
+activation_checkpointing = 'unsloth'
+partition_method = 'parameters'
+save_dtype = 'bfloat16'
+caching_batch_size = 8
+steps_per_print = 1
+[model]
+type = 'qwen_image'
+# Change this path
+diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
+dtype = 'bfloat16'
+# transformer_dtype = 'bfloat16'  # Using full BF16 for best quality on B200
+timestep_sample_method = 'logit_normal'
+[adapter]
+type = 'lora'
+rank = 32
+dtype = 'bfloat16'
+# Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
+# with a properly tuned learning rate.
+[optimizer]
+type = 'automagic'
+weight_decay = 0.01
+# [optimizer]
+# type = 'AdamW8bitKahan'
+# lr = 2e-5
+# betas = [0.9, 0.99]
+# weight_decay = 0.01

step8000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "net.2",
+    "txt_mod.1",
+    "to_q",
+    "to_add_out",
+    "proj",
+    "to_v",
+    "add_q_proj",
+    "to_out.0",
+    "add_v_proj",
+    "img_mod.1",
+    "to_k",
+    "add_k_proj"
+  ],
+  "target_parameters": null,
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

step8000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0b42ffadf135da58da90612b8eeac7c0b4284998b30be3a85fe0f59a074a33b
+size 590058432

step8000/qwen.toml ADDED Viewed

	@@ -0,0 +1,55 @@

+# Change these paths
+output_dir = '/root/diffusion-pipe/output/goya_drawings_qwen'
+dataset = '/root/diffusion-pipe/my_configs/dataset.toml'
+# training settings
+epochs = 1000
+micro_batch_size_per_gpu = 4
+pipeline_stages = 1
+gradient_accumulation_steps = 1
+gradient_clipping = 1
+#warmup_steps = 100
+# eval settings
+eval_every_n_epochs = 1
+#eval_every_n_steps = 100
+eval_before_first_step = true
+eval_micro_batch_size_per_gpu = 1
+eval_gradient_accumulation_steps = 1
+# misc settings
+save_every_n_epochs = 500
+save_every_n_steps = 250
+#checkpoint_every_n_epochs = 1
+checkpoint_every_n_minutes = 30
+activation_checkpointing = 'unsloth'
+partition_method = 'parameters'
+save_dtype = 'bfloat16'
+caching_batch_size = 8
+steps_per_print = 1
+[model]
+type = 'qwen_image'
+# Change this path
+diffusers_path = '/root/diffusion-pipe/imagegen_models/wan/Qwen-Image'
+dtype = 'bfloat16'
+# transformer_dtype = 'bfloat16'  # Using full BF16 for best quality on B200
+timestep_sample_method = 'logit_normal'
+[adapter]
+type = 'lora'
+rank = 32
+dtype = 'bfloat16'
+# Automagic should work fine and requires no tuning, but you can probably get better results using AdamW
+# with a properly tuned learning rate.
+[optimizer]
+type = 'automagic'
+weight_decay = 0.01
+# [optimizer]
+# type = 'AdamW8bitKahan'
+# lr = 2e-5
+# betas = [0.9, 0.99]
+# weight_decay = 0.01