File size: 1,516 Bytes
4f0fba3
cd52c0b
33560b1
657cc09
4f0fba3
33560b1
6b9c8bf
 
4f0fba3
33560b1
 
 
 
 
 
 
 
 
 
 
 
 
cd52c0b
7f50726
cd52c0b
46a1867
cd52c0b
 
 
 
4f0fba3
cd52c0b
657cc09
7f50726
 
4f0fba3
657cc09
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr
import os
import requests
from llama_cpp import Llama

# Define model URL & local path
MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q5_K_S.gguf"
MODEL_PATH = "/home/user/app/llama-2-7b.Q5_K_S.gguf"  # Local storage path

# Function to download model if not present
def download_model():
    if not os.path.exists(MODEL_PATH):
        print("Downloading model...")
        response = requests.get(MODEL_URL, stream=True)
        with open(MODEL_PATH, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print("Model downloaded successfully!")

# Download model before launching OmniAI
download_model()

# Define OmniAI's chat function (handles two arguments: user message + history)
def omni_ai_chat(user_message, history):
    try:
        llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_batch=256, flash_attn=True)  # Load locally downloaded model
        response = llm(user_message)  # Process only the latest prompt
        return response["choices"][0]["text"].strip()
    except Exception as e:
        return f"Error loading AI model: {str(e)}"

# Set up Gradio chatbot UI with correct formatting
chatbot = gr.ChatInterface(fn=omni_ai_chat, title="OmniAI - Cloud AI",
                           description="Your personal AI assistant, running entirely in the cloud!",
                           type="messages")  # Fixes deprecated format warning

# Launch the app!
chatbot.launch()