Spaces:
Build error
Build error
Commit
·
b2bc09b
1
Parent(s):
611f683
Update app.py to change model directory and filename for Llama 3.2-1B, replace requests with huggingface_hub for model downloading, and add print statements for debugging. Modify requirements.txt to include huggingface_hub and ensure proper package installation.
Browse files- app.py +14 -16
- requirements.txt +4 -3
app.py
CHANGED
@@ -2,9 +2,9 @@ from fastapi import FastAPI
|
|
2 |
from pydantic import BaseModel
|
3 |
from llama_cpp import Llama
|
4 |
from pathlib import Path
|
5 |
-
import requests
|
6 |
import os
|
7 |
from dotenv import load_dotenv
|
|
|
8 |
|
9 |
# Carica variabili da .env
|
10 |
load_dotenv()
|
@@ -13,25 +13,22 @@ HUGGINGFACE_HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN")
|
|
13 |
app = FastAPI(title="Company Assistant API")
|
14 |
|
15 |
# Percorso del modello locale
|
16 |
-
MODEL_DIR = Path("models/
|
17 |
-
MODEL_PATH = MODEL_DIR / "llama-3.2-3b-instruct.gguf"
|
18 |
-
# URL diretto al modello GGUF su Hugging Face
|
19 |
-
MODEL_URL = "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/resolve/main/llama-3.2-3b-instruct.gguf"
|
20 |
-
|
21 |
MODEL_DIR.mkdir(parents=True, exist_ok=True)
|
22 |
|
|
|
|
|
|
|
|
|
23 |
# Scarica il modello se non esiste
|
24 |
if not MODEL_PATH.exists():
|
25 |
-
print("Scaricando il modello GGUF Llama 3.2
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
with open(MODEL_PATH, "wb") as f:
|
33 |
-
for chunk in response.iter_content(chunk_size=8192):
|
34 |
-
f.write(chunk)
|
35 |
print("Modello scaricato!")
|
36 |
|
37 |
# Inizializza il modello
|
@@ -47,5 +44,6 @@ def root():
|
|
47 |
|
48 |
@app.post("/chat")
|
49 |
def chat(message: Message):
|
|
|
50 |
result = llm(prompt=message.text, max_tokens=200)
|
51 |
return {"reply": result['text']}
|
|
|
2 |
from pydantic import BaseModel
|
3 |
from llama_cpp import Llama
|
4 |
from pathlib import Path
|
|
|
5 |
import os
|
6 |
from dotenv import load_dotenv
|
7 |
+
from huggingface_hub import hf_hub_download
|
8 |
|
9 |
# Carica variabili da .env
|
10 |
load_dotenv()
|
|
|
13 |
app = FastAPI(title="Company Assistant API")
|
14 |
|
15 |
# Percorso del modello locale
|
16 |
+
MODEL_DIR = Path("models/llama-3.2-1b")
|
|
|
|
|
|
|
|
|
17 |
MODEL_DIR.mkdir(parents=True, exist_ok=True)
|
18 |
|
19 |
+
MODEL_PATH = MODEL_DIR / "llama-3.2-1b-instruct-q4_k_m.gguf"
|
20 |
+
|
21 |
+
print('ciao')
|
22 |
+
|
23 |
# Scarica il modello se non esiste
|
24 |
if not MODEL_PATH.exists():
|
25 |
+
print("Scaricando il modello GGUF Llama 3.2-1B...")
|
26 |
+
MODEL_PATH = hf_hub_download(
|
27 |
+
repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
|
28 |
+
filename="llama-3.2-1b-instruct-q4_k_m.gguf",
|
29 |
+
cache_dir=str(MODEL_DIR),
|
30 |
+
token=HUGGINGFACE_HUB_TOKEN
|
31 |
+
)
|
|
|
|
|
|
|
32 |
print("Modello scaricato!")
|
33 |
|
34 |
# Inizializza il modello
|
|
|
44 |
|
45 |
@app.post("/chat")
|
46 |
def chat(message: Message):
|
47 |
+
print(message)
|
48 |
result = llm(prompt=message.text, max_tokens=200)
|
49 |
return {"reply": result['text']}
|
requirements.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
fastapi
|
2 |
-
uvicorn[standard]
|
3 |
pydantic
|
4 |
llama-cpp-python
|
5 |
-
|
6 |
-
|
|
|
|
|
|
1 |
fastapi
|
|
|
2 |
pydantic
|
3 |
llama-cpp-python
|
4 |
+
python-dotenv
|
5 |
+
huggingface_hub
|
6 |
+
uvicorn
|
7 |
+
requests
|