Spaces:

FortuneT
/

TeoAI

Sleeping

TeoAI / app.py

Update app.py

46a1867 verified about 2 months ago

1.52 kB

	import gradio as gr
	import os
	import requests
	from llama_cpp import Llama

	# Define model URL & local path
	MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q5_K_S.gguf"
	MODEL_PATH = "/home/user/app/llama-2-7b.Q5_K_S.gguf" # Local storage path

	# Function to download model if not present
	def download_model():
	if not os.path.exists(MODEL_PATH):
	print("Downloading model...")
	response = requests.get(MODEL_URL, stream=True)
	with open(MODEL_PATH, "wb") as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)
	print("Model downloaded successfully!")

	# Download model before launching OmniAI
	download_model()

	# Define OmniAI's chat function (handles two arguments: user message + history)
	def omni_ai_chat(user_message, history):
	try:
	llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_batch=256, flash_attn=True) # Load locally downloaded model
	response = llm(user_message) # Process only the latest prompt
	return response["choices"][0]["text"].strip()
	except Exception as e:
	return f"Error loading AI model: {str(e)}"

	# Set up Gradio chatbot UI with correct formatting
	chatbot = gr.ChatInterface(fn=omni_ai_chat, title="OmniAI - Cloud AI",
	description="Your personal AI assistant, running entirely in the cloud!",
	type="messages") # Fixes deprecated format warning

	# Launch the app!
	chatbot.launch()