# @package _global_ defaults: - /model: small model: downscale_ratio: 16 image_vocab_size: 8192 vae_type: magvit use_custom_vae_ckpt: null custom_vae_name: null img_length: 256 txt_length: 128 image_model: true text_model: true unified_model: true image_model_fid_eval: false force_argmax_valid_indices: true use_pretrained_img_emb: false codebook_embed_dim: 256 qk_norm: true norm_type: rms sandwich_normalization: true zero_linear_init: false modality_embed: true rope_2d: false use_spda_attn: true force_optimized_native_attn: true freeze_txt_emb: false add_labels: null txt_dropout: null text_vocab_size: 32001 use_flex_attention: true flex_attention_txt_masking_prob: 0.1 flex_attention_img_masking_prob: 0.1 linear_factor: 1 data: train: combined_tokens valid: ${.train} n_duplicate_train: null wrap: true streaming: false precache: false tokenizer_name_or_path: NousResearch/Llama-2-7b-hf resolution: 256 block_size: 128 n_val_samples: null unpaired: false n_duplicate_val: null save_train_dataloader: true save_validation_dataloader: true iterable: false webdataset_iterable: false webdataset_indexed: false dataset_type: null tokens_flip_collate: false n_train_samples: null raw_data_dir: null tokenizers_parallelism: false token_data_dir: null force_disable_shuffle: false keep_tensordict_on_disk: true use_custom_tensordict_collate: true force_mp_spawn: false enable_cuda_in_tensordict_collate: false use_weighted_tensordict_sampler: true fraction_txt_data: 0.0 data_dir_train: - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_8_magvit weight: -1 name: datacomp1b_8_magvit_train - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/cc12m_tokens_train_256 weight: -1 name: cc12m_tokens_train_256 - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/HPDv2_image_reward_v1_v2_v3_magvit weight: -1 name: HPDv2_image_reward_v1_v2_v3_magvit - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/pick_score_sac_prompts_v1_v2_v3_magvit weight: -1 name: pick_score_sac_prompts_v1_v2_v3_magvit - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/datacomp1b_0_1_6_magvit weight: -1 name: datacomp1b_0_1_6_magvit - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/laion400m_magvit_part_0 weight: -1 name: laion400m_magvit_part_0 - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/laion400m_magvit_part_1 weight: -1 name: laion400m_magvit_part_1 data_dir_val: - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_8_magvit_val weight: 1 name: datacomp1b_8_magvit_val - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/cc12m_tokens_val_256 weight: 1 name: cc12m_tokens_val_256 tokenize_vqvae_in_dataloader: false val: .train: null use_token_dataset: true image_dataset: tglcourse/lsun_church_train image_data_train: null image_data_val: null keep_hf_dataset_in_memory: true allow_label: false disable_text_modality: true force_raw_train_images: false aggressive_aug: true allow_aug_vqvae_dataloader: true move_tensordict_to_shm: false force_full_attention_mask: false eval: generate_samples: false compute_generative_perplexity: false log_every_n_evals: 10 log_every_n_fid: 20 limit_val_batches_manual: 16 perplexity_batch_size: ${loader.eval_batch_size} num_masking_viz_batches: -1 max_num_fid_batches_per_device: ${eval:'8192 // (${trainer.devices} * ${loader.eval_batch_size})'} cfg: null class_conditional_fid: false force_cfg_value: true split_cfg_batches: true fid_mode: clean clean_fid_precomputed_name: lsun_church clean_fid_precomputed_split: trainfull clean_fid_precomputed_res: 256 trainer: log_every_n_steps: 10 val_check_interval: 1000 custom_ddp_bf16: true scale_lr_by_batch_size: false limit_val_batches: 16 use_gradient_checkpointing: false log_seperate_modal_losses: true softmin_snr: 5 text_loss_weight: 1.0 img_loss_weight: null low_precision_loss: false compile: false multimodal_batches: true compile_fullgraph: false log_grad_norm_every_n_steps: 10 mask_entire_modality: 0.1 force_shift_image_batches: false ckpt_steps: 10000 ckpt_every_n_minutes: -1 ignore_text_in_unified: false disable_all_eval_generation: false eval_on_start: false ckpt_model_only: false ema: 0.0 use_custom_ema: false log_flops: false disable_distributed_torchmetrics: true restart_on_failure: true force_null_sigma: true allow_null_sigma: true compile_flag_pos_emb: true add_label: false first_token_dropout: null force_shift_raw_image_batches: true txt_dropout: 0.1 disable_ddp_optimizer: true optim: lr: 0.0003 weight_decay: 0.05 loader: batch_size: 64 eval_batch_size: ${loader.batch_size} num_workers: 1 desired_global_batch_size: 512 persistent_workers: true pin_memory: true num_eval_workers: 1 sampling: steps: ${model.length} num_sample_batches: 2 max_sampling_steps: ${model.length} wandb: mode: online lr_scheduler: num_warmup_steps: 5000 checkpointing: checkpoints_total_limit: 4