unidisc / configs /experiments /jan_cub.yaml
aswerdlow's picture
Initial commit
131da64
raw
history blame contribute delete
970 Bytes
# @package _global_
defaults:
- override /model: medium
- override /lr_scheduler: cosine_with_hard_restarts_schedule_with_warmup
loader:
batch_size: 16
eval_batch_size: 16
desired_global_batch_size: 128
num_workers: 4
trainer:
ckpt_steps: 5000
val_check_interval: 100
use_legacy_update_batch_fn: true
mask_txt_only: true
mask_entire_modality: 0.15
ema: 0.9999
use_custom_ema: true
force_enable_checkpointing: true
skip_early_checkpointing: false
force_after_eos_padding: false
checkpointing:
checkpoints_total_limit: 20
lr_scheduler:
num_warmup_steps: 10000
num_training_steps: 400000
num_cycles: 80
data:
resolution: 256
train: cub2011_custom
use_weighted_tensordict_sampler: false
model:
vae_type: titok128
txt_length: 18
img_length: 128
rope_2d: false
force_text_vocab_size: 5450
text_vocab_size: 5451
image_vocab_size: 8192
attn_dropout: 0.1
optim:
lr: 1.0e-04
weight_decay: 0.2
beta2: 0.99