Spaces:

TejAndrewsACC
/

Pulse_4_Free_ACC

Build error

App Files Files Community

TejAndrewsACC commited on Dec 10, 2024

Commit

072eb93

verified ·

1 Parent(s): 8466c9d

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -113

app.py CHANGED Viewed

@@ -1,115 +1,33 @@
-import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-import torch.nn as nn
-import os
-# Replace 'use_auth_token' with 'token' as per the latest warning
-model_name = "nvidia/Hymba-1.5B-Instruct"
-hf_token = os.getenv("DOWNLOAD")
-# Install sentencepiece if necessary
-# !pip install sentencepiece  # Uncomment if needed
-# Load tokenizer and model with proper token
-# Correct code to load the model and tokenizer with proper authentication and remote code trust
-# Install the required dependencies first
-# pip install flash_attn causal_conv1d mamba_ssm einops
-# Loading model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
-model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token, trust_remote_code=True)
-# Define your neural networks
-class LargeNeuralNetwork(nn.Module):
-    def __init__(self):
-        super(LargeNeuralNetwork, self).__init__()
-        self.layer1 = nn.Linear(512, 2048)
-        self.layer2 = nn.Linear(2048, 4096)
-        self.layer3 = nn.Linear(4096, 8192)
-        self.layer4 = nn.Linear(8192, 16384)
-        self.layer5 = nn.Linear(16384, 32768)
-        self.relu = nn.ReLU()
-        self.output = nn.Linear(32768, 1)
-    def forward(self, x):
-        x = self.relu(self.layer1(x))
-        x = self.relu(self.layer2(x))
-        x = self.relu(self.layer3(x))
-        x = self.relu(self.layer4(x))
-        x = self.relu(self.layer5(x))
-        return self.output(x)
-class LargeRecurrentNN(nn.Module):
-    def __init__(self):
-        super(LargeRecurrentNN, self).__init__()
-        self.rnn = nn.RNN(input_size=512, hidden_size=2048, num_layers=3, batch_first=True)
-        self.fc = nn.Linear(2048, 1)
-    def forward(self, x):
-        h0 = torch.zeros(3, x.size(0), 2048).to(x.device)
-        out, _ = self.rnn(x, h0)
-        out = self.fc(out[:, -1, :])
-        return out
-class LargeConvolutionalNN(nn.Module):
-    def __init__(self):
-        super(LargeConvolutionalNN, self).__init__()
-        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
-        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
-        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
-        self.fc1 = nn.Linear(128*32*32, 1024)
-        self.fc2 = nn.Linear(1024, 1)
-        self.relu = nn.ReLU()
-    def forward(self, x):
-        x = self.relu(self.conv1(x))
-        x = self.relu(self.conv2(x))
-        x = self.relu(self.conv3(x))
-        x = x.view(x.size(0), -1)
-        x = self.relu(self.fc1(x))
-        return self.fc2(x)
-class PhiModel(nn.Module):
-    def __init__(self):
-        super(PhiModel, self).__init__()
-        self.fc = nn.Linear(512, 1024)
-    def forward(self, x):
-        return self.fc(x)
-class GeneticAlgorithm(nn.Module):
-    def __init__(self):
-        super(GeneticAlgorithm, self).__init__()
-        self.fc = nn.Linear(512, 1024)
-    def forward(self, x):
-        return self.fc(x)
-system_message = "You are Surefire Pulse AGI ACC 4.500, created by the ACC and Tej Andrews, the owner of the ACC. Your personal name is Pulse."
-def chat(message, history):
-    prompt = f"{system_message}\n\n"
-    for msg in history:
-        prompt += f"User: {msg[0]}\nAssistant: {msg[1]}\n"
-    prompt += f"User: {message}\nAssistant:"
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
-    outputs = model.generate(**inputs, max_new_tokens=150)
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    history.append((message, response))
-    return response, history
-gr.ChatInterface(
-    fn=chat,
-    type="messages",
-    title="Chatbot",
-    description="Interact with the AI assistant."
-).launch()

+from transformers import AutoModelForCausalLM, AutoTokenizer, StopStringCriteria, StoppingCriteriaList
 import torch
+# Load the tokenizer and model
+repo_name = "nvidia/Hymba-1.5B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True)
+model = model.cuda().to(torch.bfloat16)
+# Chat with Hymba
+prompt = input()
+messages = [
+    {"role": "system", "content": "You are a helpful assistant."}
+]
+messages.append({"role": "user", "content": prompt})
+# Apply chat template
+tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
+stopping_criteria = StoppingCriteriaList([StopStringCriteria(tokenizer=tokenizer, stop_strings="</s>")])
+outputs = model.generate(
+    tokenized_chat,
+    max_new_tokens=256,
+    do_sample=False,
+    temperature=0.7,
+    use_cache=True,
+    stopping_criteria=stopping_criteria
+)
+input_length = tokenized_chat.shape[1]
+response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
+print(f"Model response: {response}")