Spaces:

FortuneT
/

TeoAI

Sleeping

File size: 1,516 Bytes

4f0fba3
cd52c0b
33560b1
657cc09
4f0fba3
33560b1
6b9c8bf
 
4f0fba3
33560b1
 
 
 
 
 
 
 
 
 
 
 
 
cd52c0b
7f50726
cd52c0b
46a1867
cd52c0b
 
 
 
4f0fba3
cd52c0b
657cc09
7f50726
 
4f0fba3
657cc09

import gradio as gr
import os
import requests
from llama_cpp import Llama

# Define model URL & local path
MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q5_K_S.gguf"
MODEL_PATH = "/home/user/app/llama-2-7b.Q5_K_S.gguf"  # Local storage path

# Function to download model if not present
def download_model():
    if not os.path.exists(MODEL_PATH):
        print("Downloading model...")
        response = requests.get(MODEL_URL, stream=True)
        with open(MODEL_PATH, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print("Model downloaded successfully!")

# Download model before launching OmniAI
download_model()

# Define OmniAI's chat function (handles two arguments: user message + history)
def omni_ai_chat(user_message, history):
    try:
        llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_batch=256, flash_attn=True)  # Load locally downloaded model
        response = llm(user_message)  # Process only the latest prompt
        return response["choices"][0]["text"].strip()
    except Exception as e:
        return f"Error loading AI model: {str(e)}"

# Set up Gradio chatbot UI with correct formatting
chatbot = gr.ChatInterface(fn=omni_ai_chat, title="OmniAI - Cloud AI",
                           description="Your personal AI assistant, running entirely in the cloud!",
                           type="messages")  # Fixes deprecated format warning

# Launch the app!
chatbot.launch()