Spaces:

aswerdlow
/

unidisc

Running on L4

App Files Files Community

unidisc / configs /experiments /large_scale_train_high_res_interleaved.yaml

aswerdlow

Initial commit

131da64 22 days ago

raw

history blame contribute delete

3.36 kB


	# @package _global_

	data:
	move_tensordict_to_shm: false
	enable_cuda_in_tensordict_collate: false
	force_mp_spawn: false
	resolution: 512
	add_text_to_weighted_sampler: false

	add_image_gen_tokens: true
	use_packing_collate: true
	dynamic_packing_lengths: true
	remove_txt_img_padding: true
	require_sample_ids: true
	block_size: ${model.length}
	disable_mask_after_eos: true
	add_image_token: true
	use_slow_tokenizer: true
	force_seed: true

	data_dir_train:
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/matrix/HPDv2_image_reward_v1_v2_v3/train
	weight: 0.5
	name: HPDv2_image_reward_v1_v2_v3 # 3593248
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/grogu/pick_score_sac_prompts_v1_v2_v3_512
	weight: 1.0
	name: pick_score_sac_prompts_v1_v2_v3_512 # 9330810
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/pixelprose_tokens
	weight: 1.0
	name: pixelprose_tokens # 6627589
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/babel/cambrian_10m_v5
	weight: 1.0
	name: cambrian_10m_v5 # 8215264
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/scratch_ssd_tokens/datacomp1b_7_512
	weight: 1.0
	name: datacomp1b_7_512 # 23955209
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_2_tokens
	weight: 0.5
	name: datacomp_1b_datacomp1b_2_tokens # 10161505
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_4_tokens
	weight: 0.5
	name: datacomp_1b_datacomp1b_4_tokens # 27895717
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/babel/mmc4_fewer_faces_v0
	weight: 2.0
	name: mmc4_fewer_faces_v0 # 22605524
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_5_tokens
	weight: 0.5
	name: datacomp_1b_datacomp1b_5_tokens
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_0_tokens
	weight: 0.5
	name: datacomp_1b_datacomp1b_0_tokens
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_1_tokens
	weight: 0.5
	name: datacomp_1b_datacomp1b_1_tokens
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/babel/cosmopedia_2_v0
	weight: 1.0
	name: cosmopedia_v2
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/babel/fineweb_edu_dedup_v0
	weight: 1.0
	name: fineweb_edu_dedup
	data_dir_val:
	- dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/matrix/gecko_eval_512
	weight: 1.0
	name: gecko_eval_512

	trainer:
	text_loss_weight: 1.0
	img_loss_weight: 0.2
	mask_entire_modality: 0.2

	force_full_attention_mask: false
	force_full_attention_mask_loss_only: false
	disable_all_eval_generation: true
	interleaved: true
	interleaved_training_flex_attention: true
	force_convert_to_dict: true
	val_check_interval: -1
	use_gradient_checkpointing: true
	disable_all_checkpointing: false
	set_max_txt_loss_ratio: true
	gradient_clip_val: 1.0
	skip_early_checkpointing: false
	bypass_load_from_state_dicts_if_resuming: true

	loader:
	num_workers: 4
	num_eval_workers: 4

	lr_scheduler:
	num_warmup_steps: 5000

	model:
	linear_factor: 2
	use_flex_attention: true
	use_spda_attn: true

	length: 1536
	txt_length: ${.length}
	img_length: ${.length}

	eval:
	generate_samples: false
	disable_visualization: true