Diffusers
Safetensors
English
EchoPath / lvdm /config.yaml
marshallhamzah's picture
Upload folder using huggingface_hub
4c2f224 verified
raw
history blame
2.95 kB
wandb_group: lvdm_cardiacnet
output_dir: /nfs/usrhome/khmuhammad/Echodream/experiments/cardiacnet_with_both_cfg
vae_path: /nfs/usrhome/khmuhammad/Echodream/models/vae
globals:
target_fps: 32
target_nframes: 64
outputs:
- video
- class_id
- image
datasets:
- name: CardiacNetLatent
active: true
params:
root: /nfs/usrhome/khmuhammad/Echodream/data/latents/cardiacnet
target_fps: ${globals.target_fps}
target_nframes: ${globals.target_nframes}
target_resolution: 14
outputs: ${globals.outputs}
unet:
_class_name: UNetSpatioTemporalConditionModel
addition_time_embed_dim: 1
block_out_channels:
- 128
- 256
- 256
- 512
cross_attention_dim: 1
down_block_types:
- CrossAttnDownBlockSpatioTemporal
- CrossAttnDownBlockSpatioTemporal
- CrossAttnDownBlockSpatioTemporal
- DownBlockSpatioTemporal
in_channels: 8
layers_per_block: 2
num_attention_heads:
- 8
- 16
- 16
- 32
num_frames: ${globals.target_nframes}
out_channels: 4
projection_class_embeddings_input_dim: 1
sample_size: 14
transformer_layers_per_block: 1
up_block_types:
- UpBlockSpatioTemporal
- CrossAttnUpBlockSpatioTemporal
- CrossAttnUpBlockSpatioTemporal
- CrossAttnUpBlockSpatioTemporal
noise_scheduler:
_class_name: DDPMScheduler
num_train_timesteps: 1000
beta_start: 0.0001
beta_end: 0.02
beta_schedule: linear
variance_type: fixed_small
clip_sample: true
clip_sample_range: 4.0
prediction_type: v_prediction
thresholding: false
dynamic_thresholding_ratio: 0.995
sample_max_value: 1.0
timestep_spacing: leading
steps_offset: 0
training_mode: diffusion
train_batch_size: 4
dataloader_num_workers: 16
max_train_steps: 100000
training_conditioning_type: class_id
learning_rate: 1.0e-05
lr_warmup_steps: 500
scale_lr: false
lr_scheduler: constant
use_8bit_adam: false
gradient_accumulation_steps: 1
noise_offset: 0.1
drop_conditionning: 0.1
gradient_checkpointing: false
use_ema: true
enable_xformers_memory_efficient_attention: false
allow_tf32: true
adam_beta1: 0.9
adam_beta2: 0.999
adam_weight_decay: 0.01
adam_epsilon: 1.0e-08
max_grad_norm: 1.0
logging_dir: logs
mixed_precision: fp16
validation_timesteps: 128
validation_fps: ${globals.target_fps}
validation_frames: ${globals.target_nframes}
validation_lvefs:
- 0.0
- 0.4
- 0.7
- 1.0
validation_class_ids:
- 0
- 1
- 2
- 2
validation_texts:
- '0.0'
- '0.4'
- '0.7'
- '1.0'
num_validation_samples: 4
validation_guidance: 1.0
validation_steps: 1000
validation_conditioning_type: class_id
report_to: wandb
checkpointing_steps: 1000
checkpoints_total_limit: 50
resume_from_checkpoint: null
tracker_project_name: echo-dream
seed: 42
text_encoder_path: openai/clip-vit-large-patch14
pretrained_model_name_or_path: openai/clip-vit-large-patch14
tokenizer_path: openai/clip-vit-large-patch14
train_text_encoder: false
guidance_scale_class: 5.0
guidance_scale_frame: 1.0
use_separate_guidance: true
num_train_epochs: 2858