TejAndrewsACC commited on
Commit
072eb93
·
verified ·
1 Parent(s): 8466c9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -113
app.py CHANGED
@@ -1,115 +1,33 @@
1
- import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
- import torch.nn as nn
5
- import os
6
 
7
- # Replace 'use_auth_token' with 'token' as per the latest warning
8
- model_name = "nvidia/Hymba-1.5B-Instruct"
9
- hf_token = os.getenv("DOWNLOAD")
10
-
11
- # Install sentencepiece if necessary
12
- # !pip install sentencepiece # Uncomment if needed
13
-
14
- # Load tokenizer and model with proper token
15
- # Correct code to load the model and tokenizer with proper authentication and remote code trust
16
- # Install the required dependencies first
17
- # pip install flash_attn causal_conv1d mamba_ssm einops
18
-
19
- # Loading model and tokenizer
20
- tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
21
- model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token, trust_remote_code=True)
22
-
23
-
24
-
25
- # Define your neural networks
26
- class LargeNeuralNetwork(nn.Module):
27
- def __init__(self):
28
- super(LargeNeuralNetwork, self).__init__()
29
- self.layer1 = nn.Linear(512, 2048)
30
- self.layer2 = nn.Linear(2048, 4096)
31
- self.layer3 = nn.Linear(4096, 8192)
32
- self.layer4 = nn.Linear(8192, 16384)
33
- self.layer5 = nn.Linear(16384, 32768)
34
- self.relu = nn.ReLU()
35
- self.output = nn.Linear(32768, 1)
36
-
37
- def forward(self, x):
38
- x = self.relu(self.layer1(x))
39
- x = self.relu(self.layer2(x))
40
- x = self.relu(self.layer3(x))
41
- x = self.relu(self.layer4(x))
42
- x = self.relu(self.layer5(x))
43
- return self.output(x)
44
-
45
- class LargeRecurrentNN(nn.Module):
46
- def __init__(self):
47
- super(LargeRecurrentNN, self).__init__()
48
- self.rnn = nn.RNN(input_size=512, hidden_size=2048, num_layers=3, batch_first=True)
49
- self.fc = nn.Linear(2048, 1)
50
-
51
- def forward(self, x):
52
- h0 = torch.zeros(3, x.size(0), 2048).to(x.device)
53
- out, _ = self.rnn(x, h0)
54
- out = self.fc(out[:, -1, :])
55
- return out
56
-
57
- class LargeConvolutionalNN(nn.Module):
58
- def __init__(self):
59
- super(LargeConvolutionalNN, self).__init__()
60
- self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
61
- self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
62
- self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
63
- self.fc1 = nn.Linear(128*32*32, 1024)
64
- self.fc2 = nn.Linear(1024, 1)
65
- self.relu = nn.ReLU()
66
-
67
- def forward(self, x):
68
- x = self.relu(self.conv1(x))
69
- x = self.relu(self.conv2(x))
70
- x = self.relu(self.conv3(x))
71
- x = x.view(x.size(0), -1)
72
- x = self.relu(self.fc1(x))
73
- return self.fc2(x)
74
-
75
- class PhiModel(nn.Module):
76
- def __init__(self):
77
- super(PhiModel, self).__init__()
78
- self.fc = nn.Linear(512, 1024)
79
-
80
- def forward(self, x):
81
- return self.fc(x)
82
-
83
- class GeneticAlgorithm(nn.Module):
84
- def __init__(self):
85
- super(GeneticAlgorithm, self).__init__()
86
- self.fc = nn.Linear(512, 1024)
87
-
88
- def forward(self, x):
89
- return self.fc(x)
90
-
91
- system_message = "You are Surefire Pulse AGI ACC 4.500, created by the ACC and Tej Andrews, the owner of the ACC. Your personal name is Pulse."
92
-
93
- def chat(message, history):
94
- prompt = f"{system_message}\n\n"
95
-
96
- for msg in history:
97
- prompt += f"User: {msg[0]}\nAssistant: {msg[1]}\n"
98
-
99
- prompt += f"User: {message}\nAssistant:"
100
-
101
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
102
- outputs = model.generate(**inputs, max_new_tokens=150)
103
-
104
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
105
-
106
- history.append((message, response))
107
-
108
- return response, history
109
-
110
- gr.ChatInterface(
111
- fn=chat,
112
- type="messages",
113
- title="Chatbot",
114
- description="Interact with the AI assistant."
115
- ).launch()
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, StopStringCriteria, StoppingCriteriaList
 
2
  import torch
 
 
3
 
4
+ # Load the tokenizer and model
5
+ repo_name = "nvidia/Hymba-1.5B-Instruct"
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
8
+ model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True)
9
+ model = model.cuda().to(torch.bfloat16)
10
+
11
+ # Chat with Hymba
12
+ prompt = input()
13
+
14
+ messages = [
15
+ {"role": "system", "content": "You are a helpful assistant."}
16
+ ]
17
+ messages.append({"role": "user", "content": prompt})
18
+
19
+ # Apply chat template
20
+ tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
21
+ stopping_criteria = StoppingCriteriaList([StopStringCriteria(tokenizer=tokenizer, stop_strings="</s>")])
22
+ outputs = model.generate(
23
+ tokenized_chat,
24
+ max_new_tokens=256,
25
+ do_sample=False,
26
+ temperature=0.7,
27
+ use_cache=True,
28
+ stopping_criteria=stopping_criteria
29
+ )
30
+ input_length = tokenized_chat.shape[1]
31
+ response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
32
+
33
+ print(f"Model response: {response}")