File size: 975 Bytes
2228dd4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
from transformers import PretrainedConfig
repo_name = "BeardedMonster/SabiYarn-125M"
class GPTJXConfig(PretrainedConfig):
model_type="nanogpt-j"
def __init__(self,
block_size: int = 1024,
vocab_size: int = 52050, #50304 # GPT-2 vocab_size of 50257, padded up to nearest multiple of 64 for efficiency
n_layer: int = 12,
n_head: int = 12,
n_embd: int = 768,
dropout: float = 0.0,
bias: bool = False, # True: bias in Linears and LayerNorms, like GPT-2. False: a bit better and faster
**kwargs
):
self.block_size = block_size
self.vocab_size = vocab_size
self.n_layer = n_layer
self.n_head = n_head
self.n_embd = n_embd
self.dropout = dropout
self.bias = bias
super().__init__(**kwargs)
|