Spaces:
Runtime error
Runtime error
sanketchaudhary10
commited on
Commit
·
2a24040
1
Parent(s):
01d8c2b
Adding files
Browse files- __init__.py +0 -0
- chat_logic.py +33 -0
- main.py +23 -0
- model.py +25 -0
- utils.py +6 -0
__init__.py
ADDED
|
File without changes
|
chat_logic.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from model import tokenizer, model
|
| 2 |
+
|
| 3 |
+
HISTORY = [] # Initialize an empty history list
|
| 4 |
+
|
| 5 |
+
def chat(message, history):
|
| 6 |
+
"""
|
| 7 |
+
Handles user input, generates a response using the model, and updates the chat history.
|
| 8 |
+
"""
|
| 9 |
+
# Combine history with the current message
|
| 10 |
+
conversation = "\n".join(history) + f"\nUser: {message}\nAssistant:"
|
| 11 |
+
|
| 12 |
+
# Tokenize and generate response
|
| 13 |
+
inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024).to("cuda")
|
| 14 |
+
outputs = model.generate(inputs.input_ids, max_length=1024, temperature=0.7, do_sample=True)
|
| 15 |
+
reply = tokenizer.decode(outputs[:, inputs.input_ids.shape[-1]:][0], skip_special_tokens=True)
|
| 16 |
+
|
| 17 |
+
# Update history
|
| 18 |
+
update_history(message, reply)
|
| 19 |
+
return reply
|
| 20 |
+
|
| 21 |
+
def update_history(message, reply):
|
| 22 |
+
"""
|
| 23 |
+
Update the global history with the latest message and reply.
|
| 24 |
+
"""
|
| 25 |
+
global HISTORY
|
| 26 |
+
HISTORY.append(f"User: {message}")
|
| 27 |
+
HISTORY.append(f"Assistant: {reply}")
|
| 28 |
+
|
| 29 |
+
def get_history():
|
| 30 |
+
"""
|
| 31 |
+
Retrieve the chat history as a string.
|
| 32 |
+
"""
|
| 33 |
+
return "\n".join(HISTORY)
|
main.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from chat_logic import chat, get_history
|
| 3 |
+
|
| 4 |
+
def launch_gradio_ui():
|
| 5 |
+
with gr.Blocks() as gui:
|
| 6 |
+
gr.Markdown("## Chat With Llama 3.1-8B")
|
| 7 |
+
|
| 8 |
+
with gr.Row():
|
| 9 |
+
with gr.Column(scale=3):
|
| 10 |
+
chat_interface = gr.ChatInterface(fn=chat)
|
| 11 |
+
|
| 12 |
+
with gr.Column(scale=1):
|
| 13 |
+
gr.Markdown("### Message History")
|
| 14 |
+
history_display = gr.Textbox(label="Chat History", lines=27, interactive=False)
|
| 15 |
+
refresh_button = gr.Button("Refresh History")
|
| 16 |
+
|
| 17 |
+
# Update history display when the button is clicked
|
| 18 |
+
refresh_button.click(get_history, [], history_display)
|
| 19 |
+
|
| 20 |
+
gui.launch(share=True)
|
| 21 |
+
|
| 22 |
+
if __name__ == "__main__":
|
| 23 |
+
launch_gradio_ui()
|
model.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 2 |
+
|
| 3 |
+
# def load_model(model_name="meta-llama/Llama-3.1-8B"):
|
| 4 |
+
# """
|
| 5 |
+
# Load the Hugging Face Llama model and tokenizer.
|
| 6 |
+
# """
|
| 7 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 8 |
+
# model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto")
|
| 9 |
+
# return tokenizer, model
|
| 10 |
+
|
| 11 |
+
# # Initialize model and tokenizer
|
| 12 |
+
# tokenizer, model = load_model()
|
| 13 |
+
|
| 14 |
+
from transformers import pipeline
|
| 15 |
+
|
| 16 |
+
# Replace with your Hugging Face API token
|
| 17 |
+
api_token = "your_huggingface_api_token"
|
| 18 |
+
|
| 19 |
+
# Load the model using the API
|
| 20 |
+
generator = pipeline(
|
| 21 |
+
"text-generation",
|
| 22 |
+
model="meta-llama/Llama-3.1-8B",
|
| 23 |
+
use_auth_token=api_token
|
| 24 |
+
)
|
| 25 |
+
|
utils.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def truncate_conversation(history, max_tokens=1024):
|
| 2 |
+
"""
|
| 3 |
+
Truncate the conversation history to fit within the token limit.
|
| 4 |
+
"""
|
| 5 |
+
truncated_history = history[-max_tokens:]
|
| 6 |
+
return truncated_history
|