import gradio as gr import os import requests from llama_cpp import Llama # Define model URL & local path MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q5_K_S.gguf" MODEL_PATH = "/home/user/app/llama-2-7b.Q5_K_S.gguf" # Local storage path # Function to download model if not present def download_model(): if not os.path.exists(MODEL_PATH): print("Downloading model...") response = requests.get(MODEL_URL, stream=True) with open(MODEL_PATH, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) print("Model downloaded successfully!") # Download model before launching OmniAI download_model() # Define OmniAI's chat function (handles two arguments: user message + history) def omni_ai_chat(user_message, history): try: llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_batch=256, flash_attn=True) # Load locally downloaded model response = llm(user_message) # Process only the latest prompt return response["choices"][0]["text"].strip() except Exception as e: return f"Error loading AI model: {str(e)}" # Set up Gradio chatbot UI with correct formatting chatbot = gr.ChatInterface(fn=omni_ai_chat, title="OmniAI - Cloud AI", description="Your personal AI assistant, running entirely in the cloud!", type="messages") # Fixes deprecated format warning # Launch the app! chatbot.launch()