|
from transformers import PretrainedConfig
|
|
|
|
repo_name = "BeardedMonster/SabiYarn-125M"
|
|
|
|
class GPTJXConfig(PretrainedConfig):
|
|
model_type="nanogpt-j"
|
|
|
|
|
|
def __init__(self,
|
|
block_size: int = 1024,
|
|
vocab_size: int = 52050,
|
|
n_layer: int = 12,
|
|
n_head: int = 12,
|
|
n_embd: int = 768,
|
|
dropout: float = 0.0,
|
|
bias: bool = False,
|
|
**kwargs
|
|
):
|
|
|
|
self.block_size = block_size
|
|
self.vocab_size = vocab_size
|
|
self.n_layer = n_layer
|
|
self.n_head = n_head
|
|
self.n_embd = n_embd
|
|
self.dropout = dropout
|
|
self.bias = bias
|
|
|
|
super().__init__(**kwargs)
|
|
|
|
|