{ | |
"attention_backend": "PyTorch SDPA (Flash/Efficient/Math)", | |
"src_tokenizer_type": "ByteLevelBPE", | |
"tgt_tokenizer_type": "Unigram", | |
"src_vocab_size_estimate": 10000, | |
"tgt_vocab_size_estimate": 14938, | |
"emb_size": 1536, | |
"nhead": 8, | |
"ffn_hid_dim": 4096, | |
"num_encoder_layers": 8, | |
"num_decoder_layers": 8, | |
"dropout": 0.1, | |
"max_len": 384, | |
"batch_size_per_gpu": 128, | |
"effective_batch_size": 768, | |
"num_epochs": 50, | |
"learning_rate": 5e-05, | |
"weight_decay": 0.01, | |
"grad_clip_norm": 1.0, | |
"validation_split": 0.05, | |
"random_seed": 42, | |
"patience": 5, | |
"precision": "bf16-mixed", | |
"gpus": 6, | |
"strategy": "ddp", | |
"num_workers": 2, | |
"pytorch_version": "2.6.0+cu124", | |
"sdpa_available": true, | |
"bos_token_id": 1, | |
"eos_token_id": 2, | |
"pad_token_id": 0, | |
"tgt_vocab_size": 14938, | |
"src_vocab_size": 10000 | |
} |