{ | |
"attn_dropout_p": 0.0, | |
"d_model": 832, | |
"ff_dim": 2048, | |
"ffn_dropout_p": 0.2, | |
"learn_te": true, | |
"n_heads": 16, | |
"n_layers": 12, | |
"resid_dropout_p": 0.2, | |
"s1_bits": 10, | |
"s2_bits": 10, | |
"token_dropout_p": 0.0 | |
} |
{ | |
"attn_dropout_p": 0.0, | |
"d_model": 832, | |
"ff_dim": 2048, | |
"ffn_dropout_p": 0.2, | |
"learn_te": true, | |
"n_heads": 16, | |
"n_layers": 12, | |
"resid_dropout_p": 0.2, | |
"s1_bits": 10, | |
"s2_bits": 10, | |
"token_dropout_p": 0.0 | |
} |