|
{ |
|
"batch_size": 18, |
|
"num_epochs": 4, |
|
"lr": 5e-5, |
|
"seq_len": 256, |
|
"d_model": 256, |
|
"N": 4, |
|
"h": 4, |
|
"train": "dataset/openweb_fine.jsonl", |
|
"test": "dataset/openweb_fine.jsonl", |
|
"d_ff": 1024, |
|
"dropout": 0.1, |
|
"model_folder": "openweb2", |
|
"model_basename": "openweb2-", |
|
"preload": "03", |
|
"tokenizer_file": "openweb2.tokenizer.json", |
|
"experiment_name": "runs/openweb2", |
|
"dataset": "dataset/dataset_general.jsonl", |
|
"loss_file": "openweb2/losses.jsonl", |
|
"fine_dataset": "dataset/fine_tune.jsonl", |
|
"fine_epochs": 0 |
|
} |