brezzagabriele commited on
Commit
b2bc09b
·
1 Parent(s): 611f683

Update app.py to change model directory and filename for Llama 3.2-1B, replace requests with huggingface_hub for model downloading, and add print statements for debugging. Modify requirements.txt to include huggingface_hub and ensure proper package installation.

Browse files
Files changed (2) hide show
  1. app.py +14 -16
  2. requirements.txt +4 -3
app.py CHANGED
@@ -2,9 +2,9 @@ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from pathlib import Path
5
- import requests
6
  import os
7
  from dotenv import load_dotenv
 
8
 
9
  # Carica variabili da .env
10
  load_dotenv()
@@ -13,25 +13,22 @@ HUGGINGFACE_HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN")
13
  app = FastAPI(title="Company Assistant API")
14
 
15
  # Percorso del modello locale
16
- MODEL_DIR = Path("models/3B-instruct")
17
- MODEL_PATH = MODEL_DIR / "llama-3.2-3b-instruct.gguf"
18
- # URL diretto al modello GGUF su Hugging Face
19
- MODEL_URL = "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/resolve/main/llama-3.2-3b-instruct.gguf"
20
-
21
  MODEL_DIR.mkdir(parents=True, exist_ok=True)
22
 
 
 
 
 
23
  # Scarica il modello se non esiste
24
  if not MODEL_PATH.exists():
25
- print("Scaricando il modello GGUF Llama 3.2 3B Instruct...")
26
- headers = {}
27
- if HUGGINGFACE_HUB_TOKEN:
28
- headers["Authorization"] = f"Bearer {HUGGINGFACE_HUB_TOKEN}"
29
-
30
- response = requests.get(MODEL_URL, headers=headers, stream=True)
31
- response.raise_for_status()
32
- with open(MODEL_PATH, "wb") as f:
33
- for chunk in response.iter_content(chunk_size=8192):
34
- f.write(chunk)
35
  print("Modello scaricato!")
36
 
37
  # Inizializza il modello
@@ -47,5 +44,6 @@ def root():
47
 
48
  @app.post("/chat")
49
  def chat(message: Message):
 
50
  result = llm(prompt=message.text, max_tokens=200)
51
  return {"reply": result['text']}
 
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from pathlib import Path
 
5
  import os
6
  from dotenv import load_dotenv
7
+ from huggingface_hub import hf_hub_download
8
 
9
  # Carica variabili da .env
10
  load_dotenv()
 
13
  app = FastAPI(title="Company Assistant API")
14
 
15
  # Percorso del modello locale
16
+ MODEL_DIR = Path("models/llama-3.2-1b")
 
 
 
 
17
  MODEL_DIR.mkdir(parents=True, exist_ok=True)
18
 
19
+ MODEL_PATH = MODEL_DIR / "llama-3.2-1b-instruct-q4_k_m.gguf"
20
+
21
+ print('ciao')
22
+
23
  # Scarica il modello se non esiste
24
  if not MODEL_PATH.exists():
25
+ print("Scaricando il modello GGUF Llama 3.2-1B...")
26
+ MODEL_PATH = hf_hub_download(
27
+ repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
28
+ filename="llama-3.2-1b-instruct-q4_k_m.gguf",
29
+ cache_dir=str(MODEL_DIR),
30
+ token=HUGGINGFACE_HUB_TOKEN
31
+ )
 
 
 
32
  print("Modello scaricato!")
33
 
34
  # Inizializza il modello
 
44
 
45
  @app.post("/chat")
46
  def chat(message: Message):
47
+ print(message)
48
  result = llm(prompt=message.text, max_tokens=200)
49
  return {"reply": result['text']}
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  fastapi
2
- uvicorn[standard]
3
  pydantic
4
  llama-cpp-python
5
- requests
6
- python-dotenv
 
 
 
1
  fastapi
 
2
  pydantic
3
  llama-cpp-python
4
+ python-dotenv
5
+ huggingface_hub
6
+ uvicorn
7
+ requests