abancp commited on
Commit
e6bd9b6
·
1 Parent(s): 196a50e

ready for deploy

Browse files
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
__pycache__/inference.cpython-312.pyc CHANGED
Binary files a/__pycache__/inference.cpython-312.pyc and b/__pycache__/inference.cpython-312.pyc differ
 
__pycache__/inference_fine_tune.cpython-312.pyc ADDED
Binary file (7.63 kB). View file
 
app.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from inference_fine_tune import generate_response # your generator-based inference code
3
+
4
+ # This function streams the response
5
+ def chat_interface(prompt):
6
+ return generate_response(prompt) # returns a generator
7
+
8
+ with gr.Blocks() as demo:
9
+ gr.Markdown("## Chat with the Model")
10
+ with gr.Row():
11
+ inp = gr.Textbox(label="Your Prompt", placeholder="Enter your message...", lines=3)
12
+ out = gr.Textbox(label="Model Response", lines=10)
13
+
14
+ # Stream response
15
+ btn = gr.Button("Send")
16
+ btn.click(chat_interface, inputs=inp, outputs=out)
17
+
18
+ # Launch for Hugging Face Spaces
19
+ demo.launch(share=True)
inference.py CHANGED
@@ -137,12 +137,13 @@ def generate_response(prompt:str):
137
  next_token = torch.multinomial(probs, num_samples=1)
138
  next_token = top_k_indices.gather(-1, next_token)
139
  word = tokenizer.decode([next_token.item()])
140
- yield word
141
  decoder_input = torch.cat([decoder_input, next_token], dim=1)
142
  if decoder_input.shape[1] > config['seq_len']:
143
  decoder_input = decoder_input[:,-config['seq_len']:]
144
  if next_token.item() == eos_token_id:
145
  break
 
146
 
147
 
148
 
 
137
  next_token = torch.multinomial(probs, num_samples=1)
138
  next_token = top_k_indices.gather(-1, next_token)
139
  word = tokenizer.decode([next_token.item()])
140
+ # yield word
141
  decoder_input = torch.cat([decoder_input, next_token], dim=1)
142
  if decoder_input.shape[1] > config['seq_len']:
143
  decoder_input = decoder_input[:,-config['seq_len']:]
144
  if next_token.item() == eos_token_id:
145
  break
146
+ return decoder_input
147
 
148
 
149
 
inference_fine_tune.py CHANGED
@@ -103,8 +103,8 @@ def run_model(config):
103
  raise FileNotFoundError("Model File not found : "+ model_path)
104
 
105
  def generate_response(prompt:str):
 
106
  config = get_config("./openweb.config.json")
107
- print(config)
108
  device = "cuda" if torch.cuda.is_available() else "cpu"
109
  tokenizer = get_tokenizer(config)
110
  pad_token_id = tokenizer.token_to_id("<pad>")
@@ -117,15 +117,13 @@ def generate_response(prompt:str):
117
  model.eval()
118
  state = torch.load(model_path)
119
  model.load_state_dict(state['model_state_dict'])
120
-
121
- # input_tokens = tokenizer.encode(prompt).ids[:-1]
122
  input_tokens.extend([user_token_id] + input_tokens + [ai_token_id] )
123
  if len(input_tokens) > config['seq_len']:
124
  print(f"exceeding max length of input : {config['seq_len']}")
125
  exit()
126
  input_tokens = torch.tensor(input_tokens)
127
- print(input_tokens)
128
- print(tokenizer.decode(input_tokens))
129
  decoder_input = input_tokens.to(device)
130
  if decoder_input.dim() == 1:
131
  decoder_input = decoder_input.unsqueeze(0)
@@ -135,7 +133,6 @@ def generate_response(prompt:str):
135
  while decoder_input.shape[1] < 2000 :
136
  # Apply causal mask based on current decoder_input length
137
  # decoder_mask = (decoder_input != pad_token_id).unsqueeze(0).int() & causal_mask(decoder_input.size(1)).type_as(input_mask).to(device)
138
- print(decoder_input)
139
  # Get model output
140
  out = model.decode(decoder_input)
141
  logits = model.project(out[:, -1]) # Get logits for last token
@@ -144,13 +141,15 @@ def generate_response(prompt:str):
144
  probs = torch.softmax(top_k_logits, dim=-1)
145
  next_token = torch.multinomial(probs, num_samples=1)
146
  next_token = top_k_indices.gather(-1, next_token)
147
- word = tokenizer.decode([next_token.item()])
148
- yield word
149
  decoder_input = torch.cat([decoder_input, next_token], dim=1)
150
  if decoder_input.shape[1] > config['seq_len']:
151
  decoder_input = decoder_input[:,-config['seq_len']:]
152
  if next_token.item() == eos_token_id:
153
  break
 
 
154
 
155
  if __name__ == "__main__":
156
  config = get_config("openweb.config.json")
 
103
  raise FileNotFoundError("Model File not found : "+ model_path)
104
 
105
  def generate_response(prompt:str):
106
+ print("Prompt : ",prompt)
107
  config = get_config("./openweb.config.json")
 
108
  device = "cuda" if torch.cuda.is_available() else "cpu"
109
  tokenizer = get_tokenizer(config)
110
  pad_token_id = tokenizer.token_to_id("<pad>")
 
117
  model.eval()
118
  state = torch.load(model_path)
119
  model.load_state_dict(state['model_state_dict'])
120
+ word = ""
121
+ input_tokens = tokenizer.encode(prompt).ids
122
  input_tokens.extend([user_token_id] + input_tokens + [ai_token_id] )
123
  if len(input_tokens) > config['seq_len']:
124
  print(f"exceeding max length of input : {config['seq_len']}")
125
  exit()
126
  input_tokens = torch.tensor(input_tokens)
 
 
127
  decoder_input = input_tokens.to(device)
128
  if decoder_input.dim() == 1:
129
  decoder_input = decoder_input.unsqueeze(0)
 
133
  while decoder_input.shape[1] < 2000 :
134
  # Apply causal mask based on current decoder_input length
135
  # decoder_mask = (decoder_input != pad_token_id).unsqueeze(0).int() & causal_mask(decoder_input.size(1)).type_as(input_mask).to(device)
 
136
  # Get model output
137
  out = model.decode(decoder_input)
138
  logits = model.project(out[:, -1]) # Get logits for last token
 
141
  probs = torch.softmax(top_k_logits, dim=-1)
142
  next_token = torch.multinomial(probs, num_samples=1)
143
  next_token = top_k_indices.gather(-1, next_token)
144
+ word += tokenizer.decode([next_token.item()])
145
+
146
  decoder_input = torch.cat([decoder_input, next_token], dim=1)
147
  if decoder_input.shape[1] > config['seq_len']:
148
  decoder_input = decoder_input[:,-config['seq_len']:]
149
  if next_token.item() == eos_token_id:
150
  break
151
+ print("Output : ",word)
152
+ return word
153
 
154
  if __name__ == "__main__":
155
  config = get_config("openweb.config.json")
openweb.config.json CHANGED
@@ -10,9 +10,9 @@
10
  "test": "dataset/openweb_fine.jsonl",
11
  "d_ff": 1024,
12
  "dropout": 0.1,
13
- "model_folder": "openweb2",
14
- "model_basename": "openweb2-",
15
- "preload": "03",
16
  "tokenizer_file": "openweb2.tokenizer.json",
17
  "experiment_name": "runs/openweb2",
18
  "dataset": "dataset/dataset_general.jsonl",
 
10
  "test": "dataset/openweb_fine.jsonl",
11
  "d_ff": 1024,
12
  "dropout": 0.1,
13
+ "model_folder": "./",
14
+ "model_basename": "",
15
+ "preload": "weights",
16
  "tokenizer_file": "openweb2.tokenizer.json",
17
  "experiment_name": "runs/openweb2",
18
  "dataset": "dataset/dataset_general.jsonl",