DreamVoice / dreamvoice /src /configs /diffvc_cross.yaml
Higobeatz's picture
Initial commit
0a97d6c
raw
history blame contribute delete
926 Bytes
version: 1.0
system: "cross"
model:
cls_embedding:
content_dim: 768
content_hidden: 256
use_pitch: false
unet:
sample_size: [128, 256]
in_channels: 257
out_channels: 1
layers_per_block: 2
block_out_channels: [128, 256, 256, 512]
down_block_types:
[
"DownBlock2D",
"DownBlock2D",
"CrossAttnDownBlock2D",
"CrossAttnDownBlock2D",
]
up_block_types:
[
"CrossAttnUpBlock2D",
"CrossAttnUpBlock2D",
"UpBlock2D",
"UpBlock2D",
]
attention_head_dim: 32
cross_attention_dim: 768
scheduler:
num_train_steps: 1000
beta_schedule: 'linear'
beta_start: 0.0001
beta_end: 0.02
num_infer_steps: 50
rescale_betas_zero_snr: true
timestep_spacing: "trailing"
clip_sample: false
prediction_type: 'v_prediction'
scale: 2.75
shift: 5.80