edx-xl / config.toml
jxm's picture
Upload folder using huggingface_hub
c664036 verified
use_wandb = 1
seed = 1
style = "dit"
d_adapter = 1024
normalize_embeddings = 1
depth = 4
embs = [ "gte", "gtr", "stella", "sentence-t5", "e5", "sbert", "clip", "stella-big", "snowflake", "ember",]
n_embs_per_batch = 2
max_seq_length = 512
depth_transform = 12
lr = 5e-5
bs = 64
save_every = 500
epochs = 100.0
val_bs = 64
dataset = "bge"
max_grad_norm = 10.0
gradient_accumulation_steps = 1
loss_coefficient_contrastive = 1
loss_coefficient_vsp = 0
loss_coefficient_trans = 1
loss_coefficient_cc = 0
eval_steps = 999999
cluster_size = 512
cluster_strategy = "cluster_within_domain"
warmup_steps = 100
wandb_project = "edx-2"
wandb_name = "dit-pretrain-p2-4-long"
save_dir = "checkpoints/{}/"
state_dict_dir = "checkpoints/dit-pretrain-32/model.pt"
num_params = 2610419544