|
|
|
|
|
defaults: |
|
- vq16_t2i |
|
- override /model: extra_large |
|
|
|
data: |
|
train: combined_tokens |
|
valid: ${.train} |
|
precache: false |
|
streaming: false |
|
resolution: 256 |
|
block_size: 128 |
|
tokenizer_name_or_path: NousResearch/Llama-2-7b-hf |
|
wrap: true |
|
iterable: false |
|
webdataset_iterable: false |
|
webdataset_indexed: false |
|
unpaired: false |
|
dataset_type: null |
|
tokens_flip_collate: false |
|
n_val_samples: null |
|
n_train_samples: null |
|
n_duplicate_train: null |
|
n_duplicate_val: null |
|
raw_data_dir: null |
|
save_train_dataloader: true |
|
save_validation_dataloader: true |
|
tokenizers_parallelism: false |
|
token_data_dir: null |
|
force_disable_shuffle: false |
|
use_custom_tensordict_collate: true |
|
use_weighted_tensordict_sampler: true |
|
force_mp_spawn: false |
|
enable_cuda_in_tensordict_collate: false |
|
use_token_dataset: true |
|
keep_tensordict_on_disk: true |
|
move_tensordict_to_shm: false |
|
add_text_to_weighted_sampler: false |
|
data_dir_train: |
|
|
|
|
|
|
|
- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/pixelprose_tokens |
|
weight: 1.0 |
|
name: pixelprose |
|
- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/journeydb_train |
|
weight: 10.0 |
|
name: journeydb_train |
|
- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_0_tokens |
|
weight: 1.0 |
|
name: datacomp0 |
|
- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_1_tokens |
|
weight: 1.0 |
|
name: datacomp1 |
|
- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_2_tokens |
|
weight: 1.0 |
|
name: datacomp2 |
|
- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_3_tokens |
|
weight: 1.0 |
|
name: datacomp3 |
|
- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_4_tokens |
|
weight: 1.0 |
|
name: datacomp4 |
|
- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_5_tokens |
|
weight: 1.0 |
|
name: datacomp5 |
|
- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_6_tokens |
|
weight: 1.0 |
|
name: datacomp6 |
|
data_dir_val: |
|
- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/pixelprose_tokens |
|
weight: 1.0 |
|
name: dummy_1 |
|
|
|
model: |
|
img_length: ${eval:'(${data.resolution} // ${model.downscale_ratio})**2'} |
|
txt_length: ${eval:'${data.block_size} if ${.unified_model} else 0'} |
|
length: ${eval:'${.txt_length} + ${.img_length}'} |
|
unified_model: true |
|
image_model: true |
|
text_model: true |
|
image_model_fid_eval: false |
|
force_argmax_valid_indices: true |
|
use_pretrained_img_emb: false |
|
rope_2d: true |
|
modality_embed: true |
|
norm_type: rms |
|
qk_norm: true |
|
sandwich_normalization: true |
|
text_vocab_size: 32001 |
|
|
|
loader: |
|
batch_size: 8 |
|
eval_batch_size: ${eval:'${.batch_size} // 2'} |
|
desired_global_batch_size: 512 |
|
persistent_workers: true |
|
pin_memory: false |
|
num_workers: 0 |
|
num_eval_workers: 0 |
|
eval: |
|
log_every_n_evals: -1 |
|
log_every_n_fid: -1 |
|
limit_val_batches_manual: 16 |
|
generate_samples: true |
|
compute_generative_perplexity: false |
|
perplexity_batch_size: ${loader.eval_batch_size} |
|
cfg: 5.0 |
|
num_val_metrics_standalone_samples: -1 |
|
num_val_metrics_standalone_batches_per_device: -1 |
|
auto_enhance_reward_config: |
|
dfn_score: 1.0 |
|
laion_aesthetic_score: 1.0 |
|
|
|
trainer: |
|
log_flops: false |
|
log_every_n_steps: 10 |
|
custom_ddp_bf16: true |
|
log_seperate_modal_losses: true |
|
limit_val_batches: 16 |
|
softmin_snr: 5 |
|
text_loss_weight: 1.0 |
|
img_loss_weight: 0.6 |
|
use_gradient_checkpointing: false |
|
ckpt_steps: 20000 |
|
ckpt_every_n_minutes: 180 |
|
ckpt_recent_timeout_minutes: 10 |
|
use_custom_ema: false |
|
ema: 0.0 |
|
fsdp: true |
|
restart_on_failure: true |
|
eval_on_start: false |
|
val_check_interval: 100000000000 |
|
scale_lr_by_batch_size: false |
|
watch_gradients: false |
|
compile: true |
|
mask_entire_modality: 0.15 |
|
compile_flag_pos_emb: true |
|
multimodal_batches: true |
|
optim: |
|
lr: 0.0001 |
|
sampling: |
|
steps: 128 |
|
num_sample_batches: 2 |
|
wandb: |
|
mode: online |
|
checkpointing: |
|
checkpoints_total_limit: 10 |
|
use_automatic_naming: false |
|
lr_scheduler: |
|
num_warmup_steps: 10000 |