{"max_len": 4096, "train_max_len": 512, "vocab_size": 10000, "d_model": 816, "n_heads": 8, "n_layers": 6, "d_ff": 2048, "epoch": 10, "loss": 0.13120031296321585} |
{"max_len": 4096, "train_max_len": 512, "vocab_size": 10000, "d_model": 816, "n_heads": 8, "n_layers": 6, "d_ff": 2048, "epoch": 10, "loss": 0.13120031296321585} |