ready for deploy
Browse files- .gradio/certificate.pem +31 -0
- __pycache__/inference.cpython-312.pyc +0 -0
- __pycache__/inference_fine_tune.cpython-312.pyc +0 -0
- app.py +19 -0
- inference.py +2 -1
- inference_fine_tune.py +7 -8
- openweb.config.json +3 -3
.gradio/certificate.pem
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-----BEGIN CERTIFICATE-----
|
2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
31 |
+
-----END CERTIFICATE-----
|
__pycache__/inference.cpython-312.pyc
CHANGED
Binary files a/__pycache__/inference.cpython-312.pyc and b/__pycache__/inference.cpython-312.pyc differ
|
|
__pycache__/inference_fine_tune.cpython-312.pyc
ADDED
Binary file (7.63 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from inference_fine_tune import generate_response # your generator-based inference code
|
3 |
+
|
4 |
+
# This function streams the response
|
5 |
+
def chat_interface(prompt):
|
6 |
+
return generate_response(prompt) # returns a generator
|
7 |
+
|
8 |
+
with gr.Blocks() as demo:
|
9 |
+
gr.Markdown("## Chat with the Model")
|
10 |
+
with gr.Row():
|
11 |
+
inp = gr.Textbox(label="Your Prompt", placeholder="Enter your message...", lines=3)
|
12 |
+
out = gr.Textbox(label="Model Response", lines=10)
|
13 |
+
|
14 |
+
# Stream response
|
15 |
+
btn = gr.Button("Send")
|
16 |
+
btn.click(chat_interface, inputs=inp, outputs=out)
|
17 |
+
|
18 |
+
# Launch for Hugging Face Spaces
|
19 |
+
demo.launch(share=True)
|
inference.py
CHANGED
@@ -137,12 +137,13 @@ def generate_response(prompt:str):
|
|
137 |
next_token = torch.multinomial(probs, num_samples=1)
|
138 |
next_token = top_k_indices.gather(-1, next_token)
|
139 |
word = tokenizer.decode([next_token.item()])
|
140 |
-
yield word
|
141 |
decoder_input = torch.cat([decoder_input, next_token], dim=1)
|
142 |
if decoder_input.shape[1] > config['seq_len']:
|
143 |
decoder_input = decoder_input[:,-config['seq_len']:]
|
144 |
if next_token.item() == eos_token_id:
|
145 |
break
|
|
|
146 |
|
147 |
|
148 |
|
|
|
137 |
next_token = torch.multinomial(probs, num_samples=1)
|
138 |
next_token = top_k_indices.gather(-1, next_token)
|
139 |
word = tokenizer.decode([next_token.item()])
|
140 |
+
# yield word
|
141 |
decoder_input = torch.cat([decoder_input, next_token], dim=1)
|
142 |
if decoder_input.shape[1] > config['seq_len']:
|
143 |
decoder_input = decoder_input[:,-config['seq_len']:]
|
144 |
if next_token.item() == eos_token_id:
|
145 |
break
|
146 |
+
return decoder_input
|
147 |
|
148 |
|
149 |
|
inference_fine_tune.py
CHANGED
@@ -103,8 +103,8 @@ def run_model(config):
|
|
103 |
raise FileNotFoundError("Model File not found : "+ model_path)
|
104 |
|
105 |
def generate_response(prompt:str):
|
|
|
106 |
config = get_config("./openweb.config.json")
|
107 |
-
print(config)
|
108 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
109 |
tokenizer = get_tokenizer(config)
|
110 |
pad_token_id = tokenizer.token_to_id("<pad>")
|
@@ -117,15 +117,13 @@ def generate_response(prompt:str):
|
|
117 |
model.eval()
|
118 |
state = torch.load(model_path)
|
119 |
model.load_state_dict(state['model_state_dict'])
|
120 |
-
|
121 |
-
|
122 |
input_tokens.extend([user_token_id] + input_tokens + [ai_token_id] )
|
123 |
if len(input_tokens) > config['seq_len']:
|
124 |
print(f"exceeding max length of input : {config['seq_len']}")
|
125 |
exit()
|
126 |
input_tokens = torch.tensor(input_tokens)
|
127 |
-
print(input_tokens)
|
128 |
-
print(tokenizer.decode(input_tokens))
|
129 |
decoder_input = input_tokens.to(device)
|
130 |
if decoder_input.dim() == 1:
|
131 |
decoder_input = decoder_input.unsqueeze(0)
|
@@ -135,7 +133,6 @@ def generate_response(prompt:str):
|
|
135 |
while decoder_input.shape[1] < 2000 :
|
136 |
# Apply causal mask based on current decoder_input length
|
137 |
# decoder_mask = (decoder_input != pad_token_id).unsqueeze(0).int() & causal_mask(decoder_input.size(1)).type_as(input_mask).to(device)
|
138 |
-
print(decoder_input)
|
139 |
# Get model output
|
140 |
out = model.decode(decoder_input)
|
141 |
logits = model.project(out[:, -1]) # Get logits for last token
|
@@ -144,13 +141,15 @@ def generate_response(prompt:str):
|
|
144 |
probs = torch.softmax(top_k_logits, dim=-1)
|
145 |
next_token = torch.multinomial(probs, num_samples=1)
|
146 |
next_token = top_k_indices.gather(-1, next_token)
|
147 |
-
word
|
148 |
-
|
149 |
decoder_input = torch.cat([decoder_input, next_token], dim=1)
|
150 |
if decoder_input.shape[1] > config['seq_len']:
|
151 |
decoder_input = decoder_input[:,-config['seq_len']:]
|
152 |
if next_token.item() == eos_token_id:
|
153 |
break
|
|
|
|
|
154 |
|
155 |
if __name__ == "__main__":
|
156 |
config = get_config("openweb.config.json")
|
|
|
103 |
raise FileNotFoundError("Model File not found : "+ model_path)
|
104 |
|
105 |
def generate_response(prompt:str):
|
106 |
+
print("Prompt : ",prompt)
|
107 |
config = get_config("./openweb.config.json")
|
|
|
108 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
109 |
tokenizer = get_tokenizer(config)
|
110 |
pad_token_id = tokenizer.token_to_id("<pad>")
|
|
|
117 |
model.eval()
|
118 |
state = torch.load(model_path)
|
119 |
model.load_state_dict(state['model_state_dict'])
|
120 |
+
word = ""
|
121 |
+
input_tokens = tokenizer.encode(prompt).ids
|
122 |
input_tokens.extend([user_token_id] + input_tokens + [ai_token_id] )
|
123 |
if len(input_tokens) > config['seq_len']:
|
124 |
print(f"exceeding max length of input : {config['seq_len']}")
|
125 |
exit()
|
126 |
input_tokens = torch.tensor(input_tokens)
|
|
|
|
|
127 |
decoder_input = input_tokens.to(device)
|
128 |
if decoder_input.dim() == 1:
|
129 |
decoder_input = decoder_input.unsqueeze(0)
|
|
|
133 |
while decoder_input.shape[1] < 2000 :
|
134 |
# Apply causal mask based on current decoder_input length
|
135 |
# decoder_mask = (decoder_input != pad_token_id).unsqueeze(0).int() & causal_mask(decoder_input.size(1)).type_as(input_mask).to(device)
|
|
|
136 |
# Get model output
|
137 |
out = model.decode(decoder_input)
|
138 |
logits = model.project(out[:, -1]) # Get logits for last token
|
|
|
141 |
probs = torch.softmax(top_k_logits, dim=-1)
|
142 |
next_token = torch.multinomial(probs, num_samples=1)
|
143 |
next_token = top_k_indices.gather(-1, next_token)
|
144 |
+
word += tokenizer.decode([next_token.item()])
|
145 |
+
|
146 |
decoder_input = torch.cat([decoder_input, next_token], dim=1)
|
147 |
if decoder_input.shape[1] > config['seq_len']:
|
148 |
decoder_input = decoder_input[:,-config['seq_len']:]
|
149 |
if next_token.item() == eos_token_id:
|
150 |
break
|
151 |
+
print("Output : ",word)
|
152 |
+
return word
|
153 |
|
154 |
if __name__ == "__main__":
|
155 |
config = get_config("openweb.config.json")
|
openweb.config.json
CHANGED
@@ -10,9 +10,9 @@
|
|
10 |
"test": "dataset/openweb_fine.jsonl",
|
11 |
"d_ff": 1024,
|
12 |
"dropout": 0.1,
|
13 |
-
"model_folder": "
|
14 |
-
"model_basename": "
|
15 |
-
"preload": "
|
16 |
"tokenizer_file": "openweb2.tokenizer.json",
|
17 |
"experiment_name": "runs/openweb2",
|
18 |
"dataset": "dataset/dataset_general.jsonl",
|
|
|
10 |
"test": "dataset/openweb_fine.jsonl",
|
11 |
"d_ff": 1024,
|
12 |
"dropout": 0.1,
|
13 |
+
"model_folder": "./",
|
14 |
+
"model_basename": "",
|
15 |
+
"preload": "weights",
|
16 |
"tokenizer_file": "openweb2.tokenizer.json",
|
17 |
"experiment_name": "runs/openweb2",
|
18 |
"dataset": "dataset/dataset_general.jsonl",
|