brezzagabriele's picture
Update app.py to include additional print statement for debugging the chat function and return the full result from the model.
3b89341
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
from pathlib import Path
import os
from dotenv import load_dotenv
from huggingface_hub import hf_hub_download
# Carica variabili da .env
load_dotenv()
HUGGINGFACE_HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN")
app = FastAPI(title="Company Assistant API")
# Percorso del modello locale
MODEL_DIR = Path("models/llama-3.2-1b")
MODEL_DIR.mkdir(parents=True, exist_ok=True)
MODEL_PATH = MODEL_DIR / "llama-3.2-1b-instruct-q4_k_m.gguf"
print('ciao')
# Scarica il modello se non esiste
if not MODEL_PATH.exists():
print("Scaricando il modello GGUF Llama 3.2-1B...")
MODEL_PATH = hf_hub_download(
repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
filename="llama-3.2-1b-instruct-q4_k_m.gguf",
cache_dir=str(MODEL_DIR),
token=HUGGINGFACE_HUB_TOKEN
)
print("Modello scaricato!")
# Inizializza il modello
llm = Llama(model_path=str(MODEL_PATH))
# Modello dati richiesta
class Message(BaseModel):
text: str
@app.get("/")
def root():
return {"message": "Company Assistant API è attiva!"}
@app.post("/chat")
def chat(message: Message):
print(message)
result = llm(prompt=message.text, max_tokens=200)
print('risultato ', result)
return {"reply": result}