# @package _global_

defaults:
  - /model: small

model:
  downscale_ratio: 16
  image_vocab_size: 8192
  vae_type: magvit
  use_custom_vae_ckpt: null
  custom_vae_name: null
  img_length: 256
  txt_length: 128
  image_model: true
  text_model: true
  unified_model: true
  image_model_fid_eval: false
  force_argmax_valid_indices: true
  use_pretrained_img_emb: false
  codebook_embed_dim: 256
  qk_norm: true
  norm_type: rms
  sandwich_normalization: true
  zero_linear_init: false
  modality_embed: true
  rope_2d: false
  use_spda_attn: true
  force_optimized_native_attn: true
  freeze_txt_emb: false
  add_labels: null
  txt_dropout: null
  text_vocab_size: 32001
  use_flex_attention: true
  flex_attention_txt_masking_prob: 0.1
  flex_attention_img_masking_prob: 0.1
  linear_factor: 1
data:
  train: combined_tokens
  valid: ${.train}
  n_duplicate_train: null
  wrap: true
  streaming: false
  precache: false
  tokenizer_name_or_path: NousResearch/Llama-2-7b-hf
  resolution: 256
  block_size: 128
  n_val_samples: null
  unpaired: false
  n_duplicate_val: null
  save_train_dataloader: true
  save_validation_dataloader: true
  iterable: false
  webdataset_iterable: false
  webdataset_indexed: false
  dataset_type: null
  tokens_flip_collate: false
  n_train_samples: null
  raw_data_dir: null
  tokenizers_parallelism: false
  token_data_dir: null
  force_disable_shuffle: false
  keep_tensordict_on_disk: true
  use_custom_tensordict_collate: true
  force_mp_spawn: false
  enable_cuda_in_tensordict_collate: false
  use_weighted_tensordict_sampler: true
  fraction_txt_data: 0.0
  data_dir_train:
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_8_magvit
    weight: -1
    name: datacomp1b_8_magvit_train
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/cc12m_tokens_train_256
    weight: -1
    name: cc12m_tokens_train_256
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/HPDv2_image_reward_v1_v2_v3_magvit
    weight: -1
    name: HPDv2_image_reward_v1_v2_v3_magvit
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/pick_score_sac_prompts_v1_v2_v3_magvit
    weight: -1
    name: pick_score_sac_prompts_v1_v2_v3_magvit
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/datacomp1b_0_1_6_magvit
    weight: -1
    name: datacomp1b_0_1_6_magvit
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/laion400m_magvit_part_0
    weight: -1
    name: laion400m_magvit_part_0
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/laion400m_magvit_part_1
    weight: -1
    name: laion400m_magvit_part_1
  data_dir_val:
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_8_magvit_val
    weight: 1
    name: datacomp1b_8_magvit_val
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/cc12m_tokens_val_256
    weight: 1
    name: cc12m_tokens_val_256
  tokenize_vqvae_in_dataloader: false
  val:
    .train: null
  use_token_dataset: true
  image_dataset: tglcourse/lsun_church_train
  image_data_train: null
  image_data_val: null
  keep_hf_dataset_in_memory: true
  allow_label: false
  disable_text_modality: true
  force_raw_train_images: false
  aggressive_aug: true
  allow_aug_vqvae_dataloader: true
  move_tensordict_to_shm: false
  force_full_attention_mask: false
eval:
  generate_samples: false
  compute_generative_perplexity: false
  log_every_n_evals: 10
  log_every_n_fid: 20
  limit_val_batches_manual: 16
  perplexity_batch_size: ${loader.eval_batch_size}
  num_masking_viz_batches: -1
  max_num_fid_batches_per_device: ${eval:'8192 // (${trainer.devices} * ${loader.eval_batch_size})'}
  cfg: null
  class_conditional_fid: false
  force_cfg_value: true
  split_cfg_batches: true
  fid_mode: clean
  clean_fid_precomputed_name: lsun_church
  clean_fid_precomputed_split: trainfull
  clean_fid_precomputed_res: 256
trainer:
  log_every_n_steps: 10
  val_check_interval: 1000
  custom_ddp_bf16: true
  scale_lr_by_batch_size: false
  limit_val_batches: 16
  use_gradient_checkpointing: false
  log_seperate_modal_losses: true
  softmin_snr: 5
  text_loss_weight: 1.0
  img_loss_weight: null
  low_precision_loss: false
  compile: false
  multimodal_batches: true
  compile_fullgraph: false
  log_grad_norm_every_n_steps: 10
  mask_entire_modality: 0.1
  force_shift_image_batches: false
  ckpt_steps: 10000
  ckpt_every_n_minutes: -1
  ignore_text_in_unified: false
  disable_all_eval_generation: false
  eval_on_start: false
  ckpt_model_only: false
  ema: 0.0
  use_custom_ema: false
  log_flops: false
  disable_distributed_torchmetrics: true
  restart_on_failure: true
  force_null_sigma: true
  allow_null_sigma: true
  compile_flag_pos_emb: true
  add_label: false
  first_token_dropout: null
  force_shift_raw_image_batches: true
  txt_dropout: 0.1
  disable_ddp_optimizer: true
optim:
  lr: 0.0003
  weight_decay: 0.05
loader:
  batch_size: 64
  eval_batch_size: ${loader.batch_size}
  num_workers: 1
  desired_global_batch_size: 512
  persistent_workers: true
  pin_memory: true
  num_eval_workers: 1
sampling:
  steps: ${model.length}
  num_sample_batches: 2
  max_sampling_steps: ${model.length}
wandb:
  mode: online
lr_scheduler:
  num_warmup_steps: 5000
checkpointing:
  checkpoints_total_limit: 4