panda / training_info.json

final checkpoint files

f38d112 verified 24 days ago

13.5 kB

	{
	"model_config": {
	"mode": "predict",
	"context_length": 512,
	"prediction_length": 128,
	"distribution_output": null,
	"loss": "mse",
	"huber_delta": 1.0,
	"patch_length": 16,
	"patch_stride": 16,
	"num_hidden_layers": 8,
	"d_model": 512,
	"num_attention_heads": 8,
	"channel_attention": true,
	"ffn_dim": 512,
	"norm_type": "rmsnorm",
	"norm_eps": 1e-05,
	"attention_dropout": 0.0,
	"positional_dropout": 0.0,
	"path_dropout": 0.0,
	"ff_dropout": 0.0,
	"bias": true,
	"activation_function": "gelu",
	"pre_norm": true,
	"use_cls_token": false,
	"init_std": 0.02,
	"scaling": "std",
	"do_mask_input": null,
	"mask_type": "random",
	"random_mask_ratio": 0.5,
	"num_forecast_mask_patches": 3,
	"channel_consistent_masking": false,
	"unmasked_channel_indices": null,
	"mask_value": 0,
	"pooling_type": "mean",
	"head_dropout": 0.0,
	"num_parallel_samples": 100,
	"channel_rope": false,
	"max_wavelength": 500,
	"rope_percent": 0.75,
	"pretrained_encoder_path": null,
	"use_dynamics_embedding": true,
	"num_poly_feats": 120,
	"poly_degrees": 2,
	"rff_trainable": false,
	"rff_scale": 1.0,
	"num_rff": 256
	},
	"train_config": {
	"seed": 99,
	"max_steps": 100000,
	"save_steps": 50000,
	"log_steps": 1000,
	"per_device_train_batch_size": 1024,
	"gradient_accumulation_steps": 1,
	"max_grad_norm": 1.0,
	"dataloader_num_workers": 16,
	"dataloader_prefetch_factor": 2,
	"tf32": false,
	"torch_compile": true,
	"optim": "adamw_torch_fused",
	"learning_rate": 0.001,
	"lr_scheduler_type": "cosine",
	"warmup_ratio": 0.1,
	"weight_decay": 0.0,
	"output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
	"ddp_backend": "nccl",
	"ddp_find_unused_parameters": false,
	"remove_unused_columns": false
	},
	"all_config": {
	"run_name": "pft_chattn_emb_w_poly",
	"wandb": {
	"log": true,
	"project_name": "dystformer",
	"entity": "gilpinlab",
	"group_name": "fine-tuning",
	"resume": false,
	"tags": null
	},
	"patchtst": {
	"mode": "predict",
	"context_length": 512,
	"prediction_length": 128,
	"distribution_output": null,
	"loss": "mse",
	"huber_delta": 1.0,
	"patch_length": 16,
	"patch_stride": 16,
	"num_hidden_layers": 8,
	"d_model": 512,
	"num_attention_heads": 8,
	"channel_attention": true,
	"ffn_dim": 512,
	"norm_type": "rmsnorm",
	"norm_eps": 1e-05,
	"attention_dropout": 0.0,
	"positional_dropout": 0.0,
	"path_dropout": 0.0,
	"ff_dropout": 0.0,
	"bias": true,
	"activation_function": "gelu",
	"pre_norm": true,
	"use_cls_token": false,
	"init_std": 0.02,
	"scaling": "std",
	"do_mask_input": null,
	"mask_type": "random",
	"random_mask_ratio": 0.5,
	"num_forecast_mask_patches": 3,
	"channel_consistent_masking": false,
	"unmasked_channel_indices": null,
	"mask_value": 0,
	"pooling_type": "mean",
	"head_dropout": 0.0,
	"num_parallel_samples": 100,
	"channel_rope": false,
	"max_wavelength": 500,
	"rope_percent": 0.75,
	"pretrained_encoder_path": null,
	"use_dynamics_embedding": true,
	"num_poly_feats": 120,
	"poly_degrees": 2,
	"rff_trainable": false,
	"rff_scale": 1.0,
	"num_rff": 256
	},
	"chronos": {
	"model_id": "amazon/chronos-t5-mini",
	"model_type": "seq2seq",
	"random_init": false,
	"tie_embeddings": true,
	"context_length": 512,
	"prediction_length": 64,
	"num_samples": 20,
	"n_tokens": 4096,
	"n_special_tokens": 2,
	"pad_token_id": 0,
	"eos_token_id": 1,
	"use_eos_token": true,
	"tokenizer_class": "MeanScaleUniformBins",
	"tokenizer_kwargs": {
	"low_limit": -15.0,
	"high_limit": 15.0
	},
	"temperature": 1.0,
	"top_k": 50,
	"top_p": 1.0
	},
	"train": {
	"seed": 99,
	"max_steps": 100000,
	"save_steps": 50000,
	"log_steps": 1000,
	"per_device_train_batch_size": 1024,
	"gradient_accumulation_steps": 1,
	"max_grad_norm": 1.0,
	"dataloader_num_workers": 16,
	"dataloader_prefetch_factor": 2,
	"tf32": false,
	"torch_compile": true,
	"optim": "adamw_torch_fused",
	"learning_rate": 0.001,
	"lr_scheduler_type": "cosine",
	"warmup_ratio": 0.1,
	"weight_decay": 0.0,
	"output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
	"ddp_backend": "nccl",
	"ddp_find_unused_parameters": false,
	"remove_unused_columns": false
	},
	"scheduler": {
	"enabled": false,
	"schedule_value_name": "noise_scale",
	"schedule_name": "cosine",
	"epoch_stop": 0.5,
	"init_value": 1.0,
	"final_value": 0.0,
	"eps": 0.008,
	"num_steps": 4,
	"decay_rate": 8.0
	},
	"eval": {
	"mode": "predict",
	"data_path": "/stor/work/AMDG_Gilpin_Summer2024/data/test/",
	"checkpoint_path": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints",
	"device": "cuda:7",
	"torch_dtype": "float32",
	"batch_size": 32,
	"num_systems": 10,
	"sliding_context": false,
	"metric_names": [
	"mse",
	"mae",
	"smape",
	"r2_score",
	"spearman"
	],
	"forecast_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/forecasts",
	"labels_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/labels",
	"completions_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/completions",
	"patch_input_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/patch_input",
	"timestep_masks_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/timestep_masks",
	"metrics_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/metrics",
	"metrics_fname": "metrics.json",
	"overwrite": false,
	"seed": 42,
	"parallel_sample_reduction": "mean",
	"limit_prediction_length": true,
	"prediction_length": 64,
	"num_test_instances": 1,
	"window_style": "sampled",
	"window_stride": 1,
	"split_coords": false,
	"verbose": false,
	"use_channel_sampler": false,
	"channel_sampler": {
	"num_channels": 3,
	"num_samples": 2
	}
	},
	"run_metrics": {
	"wandb_run_id": null,
	"plot_dir": "figs",
	"save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/run_metrics",
	"save_fname": "metrics.json"
	},
	"train_data_dirs": [
	"/stor/work/AMDG_Gilpin_Summer2024/data/final_skew40/train",
	"/stor/work/AMDG_Gilpin_Summer2024/data/final_skew40/train_z5_z10",
	"/stor/work/AMDG_Gilpin_Summer2024/data/final_base40/train",
	"/stor/work/AMDG_Gilpin_Summer2024/data/final_base40/train_z5_z10"
	],
	"extra_train_data_paths": null,
	"probability": null,
	"shuffle_buffer_length": 100000,
	"min_past": 60,
	"max_missing_prop": 0.9,
	"fixed_dim": 3,
	"augmentations": {
	"augmentation_rate": 0.2,
	"probabilities": [
	0.3333333333333333,
	0.3333333333333333,
	0.3333333333333333,
	0.0,
	0.0
	],
	"dim_range": [
	3,
	8
	],
	"lag_range": [
	1,
	10
	],
	"phase_surrogate_cutoff": 1.0,
	"mode_range": [
	5,
	15
	],
	"max_wavenumber": 10.0,
	"max_amp": 10.0
	},
	"sampling": {
	"data_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/",
	"sys_class": "continuous_no_delay",
	"test_split": 0.3,
	"split_prefix": null,
	"rseed": 999,
	"ic_rseed": 888,
	"num_points": 4096,
	"num_periods": 40,
	"num_periods_min": 20,
	"num_periods_max": 60,
	"num_ics": 1,
	"num_param_perturbations": 4,
	"param_scale": 0.5,
	"split_coords": false,
	"standardize": false,
	"verbose": false,
	"multiprocessing": true,
	"debug_system": null,
	"silence_integration_errors": false,
	"save_params": true,
	"save_traj_stats": false,
	"ignore_probability": 0.0,
	"sign_match_probability": 0.5,
	"atol": 1e-10,
	"rtol": 1e-09,
	"reference_traj": {
	"length": 4096,
	"transient": 0.5,
	"n_periods": 40,
	"atol": 1e-07,
	"rtol": 1e-06
	}
	},
	"validator": {
	"enable": true,
	"verbose": false,
	"transient_time_frac": 0.05,
	"plot_save_dir": null,
	"save_failed_trajs": false,
	"attractor_tests": [
	"check_not_linear",
	"check_boundedness",
	"check_not_fixed_point",
	"check_zero_one_test",
	"check_power_spectrum",
	"check_stationarity"
	]
	},
	"events": {
	"max_duration": 300,
	"instability_threshold": 10000.0,
	"min_step": 1e-10,
	"verbose": true
	},
	"skew": {
	"num_pairs": 5000,
	"pairs_rseed": 123,
	"sys_idx_low": 0,
	"sys_idx_high": null,
	"normalization_strategy": "flow_rms",
	"randomize_driver_indices": true,
	"transform_scales": true,
	"train_nonskew_path": null,
	"test_nonskew_path": null,
	"coupling_map_type": "additive",
	"coupling_map": {
	"transform_scales": false,
	"randomize_driver_indices": true,
	"normalization_strategy": "flow_rms",
	"random_seed": 0
	}
	},
	"analysis": {
	"data_dir": "/stor/work/AMDG_Gilpin_Summer2024/data",
	"split": "copy/final_skew40/train",
	"num_samples": 1,
	"one_dim_target": false,
	"save_dir": "outputs",
	"plots_dir": "figures",
	"compute_quantile_limits": false,
	"compute_max_lyapunov_exponents": false,
	"filter_ensemble": true,
	"filter_json_fname": "failed_samples",
	"verbose": true,
	"attractor_tests": [
	"check_zero_one_test"
	],
	"check_not_transient": {
	"max_transient_prop": 0.2,
	"atol": 0.001
	},
	"check_stationarity": {
	"p_value": 0.05
	},
	"check_boundedness": {
	"threshold": 10000.0,
	"max_zscore": 5,
	"eps": 1e-10
	},
	"check_zero_one_test": {
	"threshold": 0.2,
	"strategy": "score"
	}
	}
	},
	"job_info": {
	"cuda_available": true,
	"device_count": 4,
	"device_names": {
	"0": "AMD Instinct MI100",
	"1": "AMD Instinct MI100",
	"2": "AMD Instinct MI100",
	"3": "AMD Instinct MI100"
	},
	"mem_info": {
	"0": [
	4438360064,
	34342961152
	],
	"1": [
	4429185024,
	34342961152
	],
	"2": [
	4456448000,
	34342961152
	],
	"3": [
	4462739456,
	34342961152
	]
	},
	"torchelastic_launched": true,
	"world_size": 4,
	"python_version": "3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0]",
	"torch_version": "2.2.2+rocm5.7",
	"numpy_version": "1.26.4",
	"gluonts_version": "0.15.1",
	"transformers_version": "4.40.1",
	"accelerate_version": "0.34.2"
	}
	}