Spaces:

brezzagabriele
/

company-assistant-v2

Build error

brezzagabriele commited on Aug 17

Commit

b2bc09b

1 Parent(s): 611f683

Update app.py to change model directory and filename for Llama 3.2-1B, replace requests with huggingface_hub for model downloading, and add print statements for debugging. Modify requirements.txt to include huggingface_hub and ensure proper package installation.

Files changed (2) hide show

app.py CHANGED Viewed

@@ -2,9 +2,9 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
 from pathlib import Path
-import requests
 import os
 from dotenv import load_dotenv
 # Carica variabili da .env
 load_dotenv()
@@ -13,25 +13,22 @@ HUGGINGFACE_HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN")
 app = FastAPI(title="Company Assistant API")
 # Percorso del modello locale
-MODEL_DIR = Path("models/3B-instruct")
-MODEL_PATH = MODEL_DIR / "llama-3.2-3b-instruct.gguf"
-# URL diretto al modello GGUF su Hugging Face
-MODEL_URL = "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/resolve/main/llama-3.2-3b-instruct.gguf"
 MODEL_DIR.mkdir(parents=True, exist_ok=True)
 # Scarica il modello se non esiste
 if not MODEL_PATH.exists():
-    print("Scaricando il modello GGUF Llama 3.2 3B Instruct...")
-    headers = {}
-    if HUGGINGFACE_HUB_TOKEN:
-        headers["Authorization"] = f"Bearer {HUGGINGFACE_HUB_TOKEN}"
-    response = requests.get(MODEL_URL, headers=headers, stream=True)
-    response.raise_for_status()
-    with open(MODEL_PATH, "wb") as f:
-        for chunk in response.iter_content(chunk_size=8192):
-            f.write(chunk)
     print("Modello scaricato!")
 # Inizializza il modello
@@ -47,5 +44,6 @@ def root():
 @app.post("/chat")
 def chat(message: Message):
     result = llm(prompt=message.text, max_tokens=200)
     return {"reply": result['text']}

 from pydantic import BaseModel
 from llama_cpp import Llama
 from pathlib import Path
 import os
 from dotenv import load_dotenv
+from huggingface_hub import hf_hub_download
 # Carica variabili da .env
 load_dotenv()
 app = FastAPI(title="Company Assistant API")
 # Percorso del modello locale
+MODEL_DIR = Path("models/llama-3.2-1b")
 MODEL_DIR.mkdir(parents=True, exist_ok=True)
+MODEL_PATH = MODEL_DIR / "llama-3.2-1b-instruct-q4_k_m.gguf"
+print('ciao')
 # Scarica il modello se non esiste
 if not MODEL_PATH.exists():
+    print("Scaricando il modello GGUF Llama 3.2-1B...")
+    MODEL_PATH = hf_hub_download(
+        repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
+        filename="llama-3.2-1b-instruct-q4_k_m.gguf",
+        cache_dir=str(MODEL_DIR),
+        token=HUGGINGFACE_HUB_TOKEN
+    )
     print("Modello scaricato!")
 # Inizializza il modello
 @app.post("/chat")
 def chat(message: Message):
+    print(message)
     result = llm(prompt=message.text, max_tokens=200)
     return {"reply": result['text']}

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 fastapi
-uvicorn[standard]
 pydantic
 llama-cpp-python
-requests
-python-dotenv

 fastapi
 pydantic
 llama-cpp-python
+python-dotenv
+huggingface_hub
+uvicorn
+requests