File size: 1,516 Bytes
4f0fba3 cd52c0b 33560b1 657cc09 4f0fba3 33560b1 6b9c8bf 4f0fba3 33560b1 cd52c0b 7f50726 cd52c0b 46a1867 cd52c0b 4f0fba3 cd52c0b 657cc09 7f50726 4f0fba3 657cc09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import gradio as gr
import os
import requests
from llama_cpp import Llama
# Define model URL & local path
MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q5_K_S.gguf"
MODEL_PATH = "/home/user/app/llama-2-7b.Q5_K_S.gguf" # Local storage path
# Function to download model if not present
def download_model():
if not os.path.exists(MODEL_PATH):
print("Downloading model...")
response = requests.get(MODEL_URL, stream=True)
with open(MODEL_PATH, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print("Model downloaded successfully!")
# Download model before launching OmniAI
download_model()
# Define OmniAI's chat function (handles two arguments: user message + history)
def omni_ai_chat(user_message, history):
try:
llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_batch=256, flash_attn=True) # Load locally downloaded model
response = llm(user_message) # Process only the latest prompt
return response["choices"][0]["text"].strip()
except Exception as e:
return f"Error loading AI model: {str(e)}"
# Set up Gradio chatbot UI with correct formatting
chatbot = gr.ChatInterface(fn=omni_ai_chat, title="OmniAI - Cloud AI",
description="Your personal AI assistant, running entirely in the cloud!",
type="messages") # Fixes deprecated format warning
# Launch the app!
chatbot.launch()
|