from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware import subprocess import os app = FastAPI() # Allow all origins (replace with your website domain in production) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) @app.post("/generate") async def generate_text(prompt: str): try: # Run llama.cpp in interactive mode cmd = [ "./llama.cpp/main", "-m", "model.gguf", "-p", prompt, "-n", "128", # Max tokens "-t", "4" # Threads (adjust based on CPU) ] result = subprocess.run(cmd, capture_output=True, text=True) return {"response": result.stdout} except Exception as e: raise HTTPException(status_code=500, detail=str(e))