{ "batch_size": 18, "num_epochs": 4, "lr": 5e-5, "seq_len": 256, "d_model": 256, "N": 4, "h": 4, "train": "dataset/openweb_fine.jsonl", "test": "dataset/openweb_fine.jsonl", "d_ff": 1024, "dropout": 0.1, "model_folder": "openweb2", "model_basename": "openweb2-", "preload": "03", "tokenizer_file": "openweb2.tokenizer.json", "experiment_name": "runs/openweb2", "dataset": "dataset/dataset_general.jsonl", "loss_file": "openweb2/losses.jsonl", "fine_dataset": "dataset/fine_tune.jsonl", "fine_epochs": 0 }