Image Feature Extraction
Birder
PyTorch
vit_reg4_m16_rms_avg_i-jepa / training_args.json
hassonofer's picture
Upload training_args.json
8d4b123 verified
{
"cmdline": "birder/scripts/train_i_jepa.py --network vit_reg4_m16_rms_avg --predictor-depth 6 --opt adamw --lr 0.001 --lr-scheduler cosine --lr-cosine-min 1e-6 --epochs 60 --warmup-epochs 5 --batch-size 320 --wd 0.04 --wd-end 0.4 --norm-wd 0 --amp --amp-dtype bfloat16 --compile --compile-opt --wds --wds-info /mnt/data/ssl_micro_packed/_info.json",
"network": "vit_reg4_m16_rms_avg",
"net_param": null,
"model_config": null,
"predictor_embed_dim": 384,
"predictor_num_heads": 12,
"predictor_depth": 6,
"compile": true,
"compile_opt": true,
"opt": "adamw",
"momentum": 0.9,
"nesterov": false,
"opt_eps": null,
"opt_betas": null,
"opt_alpha": null,
"lr": 0.001,
"bias_lr": null,
"lr_scale": null,
"lr_scale_type": "linear",
"wd": 0.04,
"wd_end": 0.4,
"norm_wd": 0.0,
"bias_weight_decay": null,
"transformer_embedding_decay": null,
"layer_decay": null,
"lr_scheduler_update": "epoch",
"lr_scheduler": "cosine",
"lr_step_size": 40,
"lr_steps": null,
"lr_step_gamma": 0.75,
"lr_cosine_min": 1e-06,
"lr_power": 1.0,
"grad_accum_steps": 1,
"channels": 3,
"size": [
224,
224
],
"batch_size": 320,
"warmup_epochs": 5,
"aug_type": "birder",
"aug_level": 1,
"use_grayscale": false,
"ra_num_ops": 2,
"ra_magnitude": 9,
"augmix_severity": 3,
"resize_min_scale": 0.35,
"re_prob": null,
"simple_crop": false,
"rgb_mode": "birder",
"epochs": 60,
"stop_epoch": 61,
"save_frequency": 1,
"keep_last": null,
"resume_epoch": null,
"load_states": false,
"load_scheduler": false,
"tag": null,
"log_interval": 100,
"num_workers": 16,
"img_loader": "tv",
"prefetch_factor": null,
"model_dtype": "float32",
"amp": true,
"amp_dtype": "bfloat16",
"fast_matmul": false,
"grad_anomaly_detection": false,
"world_size": 2,
"dist_url": "env://",
"find_unused_parameters": false,
"clip_grad_norm": null,
"gpu": 0,
"cpu": false,
"use_deterministic_algorithms": false,
"plot_lr": false,
"no_summary": false,
"data_path": [],
"wds": true,
"wds_info": "/mnt/data/ssl_micro_packed/_info.json",
"wds_cache_dir": null,
"wds_train_size": null,
"wds_split": "training",
"rank": 0,
"distributed": true,
"dist_backend": "nccl"
}