|
import gradio as gr |
|
import os |
|
import requests |
|
from llama_cpp import Llama |
|
|
|
|
|
MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q5_K_S.gguf" |
|
MODEL_PATH = "/home/user/app/llama-2-7b.Q5_K_S.gguf" |
|
|
|
|
|
def download_model(): |
|
if not os.path.exists(MODEL_PATH): |
|
print("Downloading model...") |
|
response = requests.get(MODEL_URL, stream=True) |
|
with open(MODEL_PATH, "wb") as f: |
|
for chunk in response.iter_content(chunk_size=8192): |
|
f.write(chunk) |
|
print("Model downloaded successfully!") |
|
|
|
|
|
download_model() |
|
|
|
|
|
def omni_ai_chat(user_message, history): |
|
try: |
|
llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_batch=256, flash_attn=True) |
|
response = llm(user_message) |
|
return response["choices"][0]["text"].strip() |
|
except Exception as e: |
|
return f"Error loading AI model: {str(e)}" |
|
|
|
|
|
chatbot = gr.ChatInterface(fn=omni_ai_chat, title="OmniAI - Cloud AI", |
|
description="Your personal AI assistant, running entirely in the cloud!", |
|
type="messages") |
|
|
|
|
|
chatbot.launch() |
|
|