from fastapi import FastAPI from pydantic import BaseModel from llama_cpp import Llama from pathlib import Path import os from dotenv import load_dotenv from huggingface_hub import hf_hub_download # Carica variabili da .env load_dotenv() HUGGINGFACE_HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN") app = FastAPI(title="Company Assistant API") # Percorso del modello locale MODEL_DIR = Path("models/llama-3.2-1b") MODEL_DIR.mkdir(parents=True, exist_ok=True) MODEL_PATH = MODEL_DIR / "llama-3.2-1b-instruct-q4_k_m.gguf" print('ciao') # Scarica il modello se non esiste if not MODEL_PATH.exists(): print("Scaricando il modello GGUF Llama 3.2-1B...") MODEL_PATH = hf_hub_download( repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF", filename="llama-3.2-1b-instruct-q4_k_m.gguf", cache_dir=str(MODEL_DIR), token=HUGGINGFACE_HUB_TOKEN ) print("Modello scaricato!") # Inizializza il modello llm = Llama(model_path=str(MODEL_PATH)) # Modello dati richiesta class Message(BaseModel): text: str @app.get("/") def root(): return {"message": "Company Assistant API รจ attiva!"} @app.post("/chat") def chat(message: Message): print(message) result = llm(prompt=message.text, max_tokens=200) print('risultato ', result) return {"reply": result}