PulkundwarP commited on
Commit
93e6aa5
·
verified ·
1 Parent(s): 24dca49

Upload 4 files

Browse files
Files changed (4) hide show
  1. __init__.py +5 -0
  2. config.json +9 -0
  3. configuration_gpt.py +22 -0
  4. modeling_gpt.py +143 -0
__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .configuration_custom_gpt import CustomGPTConfig
2
+ from .custom_gpt import CustomGPT
3
+
4
+ CustomGPTConfig.register_for_auto_class()
5
+ CustomGPT.register_for_auto_class("AutoModelForCausalLM")
config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "block_size": 768,
3
+ "dropout": 0.1,
4
+ "model_type": "custom_gpt",
5
+ "n_embd": 768,
6
+ "n_head": 8,
7
+ "n_layer": 8,
8
+ "vocab_size": 50304
9
+ }
configuration_gpt.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from transformers import PretrainedConfig
3
+
4
+ @dataclass
5
+ class GPTConfig(PretrainedConfig):
6
+ """
7
+ Configuration class for custom GPT model.
8
+ """
9
+ model_type = "custom_gpt"
10
+ block_size: int = 768
11
+ vocab_size: int = 50257
12
+ n_layer: int = 8
13
+ n_head: int = 8
14
+ n_embd: int = 768
15
+ dropout: float = 0.1
16
+
17
+ @classmethod
18
+ def from_pretrained(cls, *args, **kwargs):
19
+ """
20
+ Override the from_pretrained method to handle custom configuration loading.
21
+ """
22
+ return super().from_pretrained(*args, **kwargs)
modeling_gpt.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import math
3
+ import time
4
+ import json
5
+ import inspect
6
+ from dataclasses import dataclass
7
+ import torch
8
+ import torch.nn as nn
9
+ from torch.nn import functional as F
10
+ from safetensors.torch import save_model
11
+ from transformers import PreTrainedModel, PretrainedConfig, AutoConfig, AutoModelForCausalLM
12
+ from configuration_gpt import GPTConfig
13
+ from huggingface_hub import HfApi
14
+
15
+ import os
16
+ import json
17
+ import torch
18
+ from safetensors.torch import save_model
19
+
20
+
21
+
22
+ # Define the CausalSelfAttention class
23
+ class CausalSelfAttention(nn.Module):
24
+ def __init__(self, config):
25
+ super().__init__()
26
+ assert config.n_embd % config.n_head == 0
27
+ self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd)
28
+ self.c_proj = nn.Linear(config.n_embd, config.n_embd)
29
+ self.c_proj.NANOGPT_SCALE_INIT = 1
30
+ self.n_head = config.n_head
31
+ self.n_embd = config.n_embd
32
+
33
+ def forward(self, x):
34
+ B, T, C = x.size()
35
+ qkv = self.c_attn(x)
36
+ q, k, v = qkv.split(self.n_embd, dim=2)
37
+ k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
38
+ q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
39
+ v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
40
+ y = F.scaled_dot_product_attention(q, k, v, is_causal=True)
41
+ y = y.transpose(1, 2).contiguous().view(B, T, C)
42
+ y = self.c_proj(y)
43
+ return y
44
+
45
+ # Define the MLP class
46
+ class MLP(nn.Module):
47
+ def __init__(self, config):
48
+ super().__init__()
49
+ self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd)
50
+ self.gelu = nn.GELU(approximate='tanh')
51
+ self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd)
52
+ self.c_proj.NANOGPT_SCALE_INIT = 1
53
+
54
+ def forward(self, x):
55
+ x = self.c_fc(x)
56
+ x = self.gelu(x)
57
+ x = self.c_proj(x)
58
+ return x
59
+
60
+ # Define the Block class
61
+ class Block(nn.Module):
62
+ def __init__(self, config):
63
+ super().__init__()
64
+ self.ln_1 = nn.LayerNorm(config.n_embd)
65
+ self.attn = CausalSelfAttention(config)
66
+ self.ln_2 = nn.LayerNorm(config.n_embd)
67
+ self.mlp = MLP(config)
68
+
69
+ def forward(self, x):
70
+ x = x + self.attn(self.ln_1(x))
71
+ x = x + self.mlp(self.ln_2(x))
72
+ return x
73
+
74
+ # Define the GPT class
75
+ class GPT(PreTrainedModel):
76
+ config_class = GPTConfig
77
+
78
+ def __init__(self, config):
79
+ super().__init__(config)
80
+ self.config = config
81
+ self.transformer = nn.ModuleDict(dict(
82
+ wte=nn.Embedding(config.vocab_size, config.n_embd),
83
+ wpe=nn.Embedding(config.block_size, config.n_embd),
84
+ h=nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
85
+ ln_f=nn.LayerNorm(config.n_embd),
86
+ ))
87
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
88
+ self.transformer.wte.weight = self.lm_head.weight
89
+ self.apply(self._init_weights)
90
+
91
+ def _init_weights(self, module):
92
+ if isinstance(module, nn.Linear):
93
+ std = 0.02
94
+ if hasattr(module, 'NANOGPT_SCALE_INIT'):
95
+ std *= (2 * self.config.n_layer) ** -0.5
96
+ torch.nn.init.normal_(module.weight, mean=0.0, std=std)
97
+ if module.bias is not None:
98
+ torch.nn.init.zeros_(module.bias)
99
+ elif isinstance(module, nn.Embedding):
100
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
101
+
102
+ def forward(self, idx, targets=None):
103
+ B, T = idx.size()
104
+ assert T <= self.config.block_size, f"Cannot forward sequence of length {T}, block size is only {self.config.block_size}"
105
+ pos = torch.arange(0, T, dtype=torch.long, device=idx.device)
106
+ pos_emb = self.transformer.wpe(pos)
107
+ tok_emb = self.transformer.wte(idx)
108
+ x = tok_emb + pos_emb
109
+ for block in self.transformer.h:
110
+ x = block(x)
111
+ x = self.transformer.ln_f(x)
112
+ logits = self.lm_head(x)
113
+ loss = None
114
+ if targets is not None:
115
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
116
+ return logits, loss
117
+
118
+ def save_pretrained(self, save_directory):
119
+ super().save_pretrained(save_directory)
120
+ torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin"))
121
+
122
+ @classmethod
123
+ def from_pretrained(cls, *args, **kwargs):
124
+ return super().from_pretrained(*args, **kwargs)
125
+ def push_to_hub(self, repo_id, private=False, commit_message="Push model to hub"):
126
+ # Save the model locally
127
+ self.save_pretrained(repo_id)
128
+
129
+ # Use HfApi to push the model to the Hugging Face Hub
130
+ api = HfApi()
131
+ api.upload_folder(
132
+ folder_path=repo_id,
133
+ repo_id=repo_id,
134
+ repo_type="model",
135
+ private=private,
136
+ commit_message=commit_message
137
+ )
138
+
139
+
140
+ AutoConfig.register("custom_gpt", GPTConfig)
141
+ AutoModelForCausalLM.register(GPTConfig, GPT)
142
+ config = GPTConfig()
143
+ model = GPT(config)