Diffusers
Safetensors
English
File size: 2,638 Bytes
4c2f224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
wandb_group: lidm_cardiacnet
output_dir: /nfs/usrhome/khmuhammad/Echodream/experiments/lidm_cardiacnet
vae_path: /nfs/usrhome/khmuhammad/Echodream/models/vae
globals:
  target_fps: 32
  target_nframes: 64
  outputs:
  - image
  - class_id
datasets:
- name: CardiacNetLatent
  active: true
  params:
    root: /nfs/usrhome/khmuhammad/Echodream/data/latents/cardiacnet
    target_fps: ${globals.target_fps}
    target_nframes: ${globals.target_nframes}
    target_resolution: 14
    outputs: ${globals.outputs}
unet:
  _class_name: UNet2DConditionModel
  sample_size: 14
  in_channels: 4
  out_channels: 4
  center_input_sample: false
  time_embedding_type: positional
  freq_shift: 0
  flip_sin_to_cos: true
  down_block_types:
  - AttnDownBlock2D
  - AttnDownBlock2D
  - AttnDownBlock2D
  - DownBlock2D
  up_block_types:
  - UpBlock2D
  - AttnUpBlock2D
  - AttnUpBlock2D
  - AttnUpBlock2D
  block_out_channels:
  - 128
  - 256
  - 256
  - 512
  layers_per_block: 2
  mid_block_scale_factor: 1
  downsample_padding: 1
  dropout: 0.0
  act_fn: silu
  cross_attention_dim: 1
  norm_eps: 1.0e-05
noise_scheduler:
  _class_name: DDPMScheduler
  num_train_timesteps: 1000
  beta_start: 0.0001
  beta_end: 0.02
  beta_schedule: linear
  variance_type: fixed_small
  clip_sample: true
  clip_sample_range: 4.0
  prediction_type: v_prediction
  thresholding: false
  dynamic_thresholding_ratio: 0.995
  sample_max_value: 1.0
  timestep_spacing: leading
  steps_offset: 0
training_mode: diffusion
train_batch_size: 256
dataloader_num_workers: 16
max_train_steps: 5000
training_conditioning_type: class_id
learning_rate: 0.0003
lr_warmup_steps: 500
scale_lr: false
lr_scheduler: constant
use_8bit_adam: false
gradient_accumulation_steps: 1
noise_offset: 0.0
drop_conditionning: 0.1
gradient_checkpointing: false
use_ema: true
enable_xformers_memory_efficient_attention: false
allow_tf32: true
adam_beta1: 0.9
adam_beta2: 0.999
adam_weight_decay: 0.01
adam_epsilon: 1.0e-08
max_grad_norm: 1.0
logging_dir: logs
mixed_precision: fp16
validation_timesteps: 128
validation_fps: ${globals.target_fps}
validation_frames: ${globals.target_nframes}
validation_count: 4
validation_guidance: 1.0
validation_steps: 100
validation_conditioning_type: class_id
report_to: wandb
checkpointing_steps: 100
checkpoints_total_limit: 50
resume_from_checkpoint: null
tracker_project_name: echo-dream
seed: 42
text_encoder_path: openai/clip-vit-large-patch14
pretrained_model_name_or_path: openai/clip-vit-large-patch14
tokenizer_path: openai/clip-vit-large-patch14
train_text_encoder: false
condition_guidance_scale: 5.0
num_validation_samples: 4
num_train_epochs: 5000