{ | |
"batch_size": 18, | |
"num_epochs": 4, | |
"lr": 5e-5, | |
"seq_len": 256, | |
"d_model": 256, | |
"N": 4, | |
"h": 4, | |
"train": "dataset/openweb_fine.jsonl", | |
"test": "dataset/openweb_fine.jsonl", | |
"d_ff": 1024, | |
"dropout": 0.1, | |
"model_folder": "./", | |
"model_basename": "", | |
"preload": "weights", | |
"tokenizer_file": "openweb2.tokenizer.json", | |
"experiment_name": "runs/openweb2", | |
"dataset": "dataset/dataset_general.jsonl", | |
"loss_file": "openweb2/losses.jsonl", | |
"fine_dataset": "dataset/fine_tune.jsonl", | |
"fine_epochs": 0 | |
} |