derekl35 HF Staff commited on
Commit
6febc7c
·
verified ·
1 Parent(s): e9b7b43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -27,8 +27,8 @@ def _load_agg_stats() -> dict:
27
  except json.JSONDecodeError:
28
  print(f"Warning: {AGG_FILE} is corrupted. Starting with empty stats.")
29
  return {"8-bit bnb": {"attempts": 0, "correct": 0}, "4-bit bnb": {"attempts": 0, "correct": 0}}
30
- return {"8-bit bnb": {"attempts": 0, "correct": 0},
31
- "4-bit bnb": {"attempts": 0, "correct": 0}}
32
 
33
  def _save_agg_stats(stats: dict) -> None:
34
  with InterProcessLock(str(LOCK_FILE)):
@@ -59,7 +59,8 @@ def load_bf16_pipeline():
59
  torch_dtype=torch.bfloat16,
60
  token=HF_TOKEN
61
  )
62
- pipe.to(DEVICE)
 
63
  end_time = time.time()
64
  mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
65
  print(f"BF16 Pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
@@ -80,7 +81,8 @@ def load_bnb_8bit_pipeline():
80
  MODEL_ID,
81
  torch_dtype=torch.bfloat16
82
  )
83
- pipe.to(DEVICE)
 
84
  end_time = time.time()
85
  mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
86
  print(f"8-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
@@ -101,7 +103,8 @@ def load_bnb_4bit_pipeline():
101
  MODEL_ID,
102
  torch_dtype=torch.bfloat16
103
  )
104
- pipe.to(DEVICE)
 
105
  end_time = time.time()
106
  mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
107
  print(f"4-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
 
27
  except json.JSONDecodeError:
28
  print(f"Warning: {AGG_FILE} is corrupted. Starting with empty stats.")
29
  return {"8-bit bnb": {"attempts": 0, "correct": 0}, "4-bit bnb": {"attempts": 0, "correct": 0}}
30
+ return {"8-bit bnb": {"attempts": 157, "correct": 74},
31
+ "4-bit bnb": {"attempts": 159, "correct": 78}}
32
 
33
  def _save_agg_stats(stats: dict) -> None:
34
  with InterProcessLock(str(LOCK_FILE)):
 
59
  torch_dtype=torch.bfloat16,
60
  token=HF_TOKEN
61
  )
62
+ # pipe.to(DEVICE)
63
+ pipe.enable_model_cpu_offload()
64
  end_time = time.time()
65
  mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
66
  print(f"BF16 Pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
 
81
  MODEL_ID,
82
  torch_dtype=torch.bfloat16
83
  )
84
+ # pipe.to(DEVICE)
85
+ pipe.enable_model_cpu_offload()
86
  end_time = time.time()
87
  mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
88
  print(f"8-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
 
103
  MODEL_ID,
104
  torch_dtype=torch.bfloat16
105
  )
106
+ # pipe.to(DEVICE)
107
+ pipe.enable_model_cpu_offload()
108
  end_time = time.time()
109
  mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
110
  print(f"4-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")