Spaces:

abancp
/

10M-LLM

Running

App Files Files Community

abancp commited on May 3

Commit

e6bd9b6

1 Parent(s): 196a50e

ready for deploy

Browse files

Files changed (7) hide show

.gradio/certificate.pem +31 -0
__pycache__/inference.cpython-312.pyc +0 -0
__pycache__/inference_fine_tune.cpython-312.pyc +0 -0
app.py +19 -0
inference.py +2 -1
inference_fine_tune.py +7 -8
openweb.config.json +3 -3

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

__pycache__/inference.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/inference.cpython-312.pyc and b/__pycache__/inference.cpython-312.pyc differ

__pycache__/inference_fine_tune.cpython-312.pyc ADDED Viewed

Binary file (7.63 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import gradio as gr
+from inference_fine_tune import generate_response  # your generator-based inference code
+# This function streams the response
+def chat_interface(prompt):
+    return generate_response(prompt)  # returns a generator
+with gr.Blocks() as demo:
+    gr.Markdown("## Chat with the Model")
+    with gr.Row():
+        inp = gr.Textbox(label="Your Prompt", placeholder="Enter your message...", lines=3)
+    out = gr.Textbox(label="Model Response", lines=10)
+    # Stream response
+    btn = gr.Button("Send")
+    btn.click(chat_interface, inputs=inp, outputs=out)
+# Launch for Hugging Face Spaces
+demo.launch(share=True)

inference.py CHANGED Viewed

@@ -137,12 +137,13 @@ def generate_response(prompt:str):
         next_token = torch.multinomial(probs, num_samples=1)
         next_token = top_k_indices.gather(-1, next_token)
         word = tokenizer.decode([next_token.item()])
-        yield word
         decoder_input = torch.cat([decoder_input, next_token], dim=1)
         if decoder_input.shape[1] > config['seq_len']:
             decoder_input = decoder_input[:,-config['seq_len']:]
         if next_token.item() == eos_token_id:
             break

         next_token = torch.multinomial(probs, num_samples=1)
         next_token = top_k_indices.gather(-1, next_token)
         word = tokenizer.decode([next_token.item()])
+        # yield word
         decoder_input = torch.cat([decoder_input, next_token], dim=1)
         if decoder_input.shape[1] > config['seq_len']:
             decoder_input = decoder_input[:,-config['seq_len']:]
         if next_token.item() == eos_token_id:
             break
+    return decoder_input

inference_fine_tune.py CHANGED Viewed

@@ -103,8 +103,8 @@ def run_model(config):
         raise FileNotFoundError("Model File not found : "+ model_path)
 def generate_response(prompt:str):
     config = get_config("./openweb.config.json")
-    print(config)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     tokenizer = get_tokenizer(config)
     pad_token_id = tokenizer.token_to_id("<pad>")
@@ -117,15 +117,13 @@ def generate_response(prompt:str):
     model.eval()
     state = torch.load(model_path)
     model.load_state_dict(state['model_state_dict'])
-    # input_tokens = tokenizer.encode(prompt).ids[:-1]
     input_tokens.extend([user_token_id] + input_tokens + [ai_token_id] )
     if len(input_tokens) > config['seq_len']:
         print(f"exceeding max length of input : {config['seq_len']}")
         exit()
     input_tokens = torch.tensor(input_tokens)
-    print(input_tokens)
-    print(tokenizer.decode(input_tokens))
     decoder_input = input_tokens.to(device)
     if decoder_input.dim() == 1:
        decoder_input = decoder_input.unsqueeze(0)
@@ -135,7 +133,6 @@ def generate_response(prompt:str):
     while decoder_input.shape[1] < 2000 :
         # Apply causal mask based on current decoder_input length
         # decoder_mask = (decoder_input != pad_token_id).unsqueeze(0).int() & causal_mask(decoder_input.size(1)).type_as(input_mask).to(device)
-        print(decoder_input)
         # Get model output
         out = model.decode(decoder_input)
         logits = model.project(out[:, -1])  # Get logits for last token
@@ -144,13 +141,15 @@ def generate_response(prompt:str):
         probs = torch.softmax(top_k_logits, dim=-1)
         next_token = torch.multinomial(probs, num_samples=1)
         next_token = top_k_indices.gather(-1, next_token)
-        word = tokenizer.decode([next_token.item()])
-        yield word
         decoder_input = torch.cat([decoder_input, next_token], dim=1)
         if decoder_input.shape[1] > config['seq_len']:
             decoder_input = decoder_input[:,-config['seq_len']:]
         if next_token.item() == eos_token_id:
             break
 if __name__ == "__main__":
     config = get_config("openweb.config.json")

         raise FileNotFoundError("Model File not found : "+ model_path)
 def generate_response(prompt:str):
+    print("Prompt : ",prompt)
     config = get_config("./openweb.config.json")
     device = "cuda" if torch.cuda.is_available() else "cpu"
     tokenizer = get_tokenizer(config)
     pad_token_id = tokenizer.token_to_id("<pad>")
     model.eval()
     state = torch.load(model_path)
     model.load_state_dict(state['model_state_dict'])
+    word = ""
+    input_tokens = tokenizer.encode(prompt).ids
     input_tokens.extend([user_token_id] + input_tokens + [ai_token_id] )
     if len(input_tokens) > config['seq_len']:
         print(f"exceeding max length of input : {config['seq_len']}")
         exit()
     input_tokens = torch.tensor(input_tokens)
     decoder_input = input_tokens.to(device)
     if decoder_input.dim() == 1:
        decoder_input = decoder_input.unsqueeze(0)
     while decoder_input.shape[1] < 2000 :
         # Apply causal mask based on current decoder_input length
         # decoder_mask = (decoder_input != pad_token_id).unsqueeze(0).int() & causal_mask(decoder_input.size(1)).type_as(input_mask).to(device)
         # Get model output
         out = model.decode(decoder_input)
         logits = model.project(out[:, -1])  # Get logits for last token
         probs = torch.softmax(top_k_logits, dim=-1)
         next_token = torch.multinomial(probs, num_samples=1)
         next_token = top_k_indices.gather(-1, next_token)
+        word += tokenizer.decode([next_token.item()])
         decoder_input = torch.cat([decoder_input, next_token], dim=1)
         if decoder_input.shape[1] > config['seq_len']:
             decoder_input = decoder_input[:,-config['seq_len']:]
         if next_token.item() == eos_token_id:
             break
+    print("Output : ",word)
+    return word
 if __name__ == "__main__":
     config = get_config("openweb.config.json")

openweb.config.json CHANGED Viewed

@@ -10,9 +10,9 @@
     "test": "dataset/openweb_fine.jsonl",
     "d_ff": 1024,
     "dropout": 0.1,
-    "model_folder": "openweb2",
-    "model_basename": "openweb2-",
-    "preload": "03",
     "tokenizer_file": "openweb2.tokenizer.json",
     "experiment_name": "runs/openweb2",
     "dataset": "dataset/dataset_general.jsonl",

     "test": "dataset/openweb_fine.jsonl",
     "d_ff": 1024,
     "dropout": 0.1,
+    "model_folder": "./",
+    "model_basename": "",
+    "preload": "weights",
     "tokenizer_file": "openweb2.tokenizer.json",
     "experiment_name": "runs/openweb2",
     "dataset": "dataset/dataset_general.jsonl",