Spaces:
Running
on
A100
Running
on
A100
Update app.py
Browse files
app.py
CHANGED
@@ -27,8 +27,8 @@ def _load_agg_stats() -> dict:
|
|
27 |
except json.JSONDecodeError:
|
28 |
print(f"Warning: {AGG_FILE} is corrupted. Starting with empty stats.")
|
29 |
return {"8-bit bnb": {"attempts": 0, "correct": 0}, "4-bit bnb": {"attempts": 0, "correct": 0}}
|
30 |
-
return {"8-bit bnb": {"attempts":
|
31 |
-
"4-bit bnb": {"attempts":
|
32 |
|
33 |
def _save_agg_stats(stats: dict) -> None:
|
34 |
with InterProcessLock(str(LOCK_FILE)):
|
@@ -59,7 +59,8 @@ def load_bf16_pipeline():
|
|
59 |
torch_dtype=torch.bfloat16,
|
60 |
token=HF_TOKEN
|
61 |
)
|
62 |
-
pipe.to(DEVICE)
|
|
|
63 |
end_time = time.time()
|
64 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
65 |
print(f"BF16 Pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|
@@ -80,7 +81,8 @@ def load_bnb_8bit_pipeline():
|
|
80 |
MODEL_ID,
|
81 |
torch_dtype=torch.bfloat16
|
82 |
)
|
83 |
-
pipe.to(DEVICE)
|
|
|
84 |
end_time = time.time()
|
85 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
86 |
print(f"8-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|
@@ -101,7 +103,8 @@ def load_bnb_4bit_pipeline():
|
|
101 |
MODEL_ID,
|
102 |
torch_dtype=torch.bfloat16
|
103 |
)
|
104 |
-
pipe.to(DEVICE)
|
|
|
105 |
end_time = time.time()
|
106 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
107 |
print(f"4-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|
|
|
27 |
except json.JSONDecodeError:
|
28 |
print(f"Warning: {AGG_FILE} is corrupted. Starting with empty stats.")
|
29 |
return {"8-bit bnb": {"attempts": 0, "correct": 0}, "4-bit bnb": {"attempts": 0, "correct": 0}}
|
30 |
+
return {"8-bit bnb": {"attempts": 157, "correct": 74},
|
31 |
+
"4-bit bnb": {"attempts": 159, "correct": 78}}
|
32 |
|
33 |
def _save_agg_stats(stats: dict) -> None:
|
34 |
with InterProcessLock(str(LOCK_FILE)):
|
|
|
59 |
torch_dtype=torch.bfloat16,
|
60 |
token=HF_TOKEN
|
61 |
)
|
62 |
+
# pipe.to(DEVICE)
|
63 |
+
pipe.enable_model_cpu_offload()
|
64 |
end_time = time.time()
|
65 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
66 |
print(f"BF16 Pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|
|
|
81 |
MODEL_ID,
|
82 |
torch_dtype=torch.bfloat16
|
83 |
)
|
84 |
+
# pipe.to(DEVICE)
|
85 |
+
pipe.enable_model_cpu_offload()
|
86 |
end_time = time.time()
|
87 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
88 |
print(f"8-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|
|
|
103 |
MODEL_ID,
|
104 |
torch_dtype=torch.bfloat16
|
105 |
)
|
106 |
+
# pipe.to(DEVICE)
|
107 |
+
pipe.enable_model_cpu_offload()
|
108 |
end_time = time.time()
|
109 |
mem_reserved = torch.cuda.memory_reserved(0)/1024**3 if DEVICE == "cuda" else 0
|
110 |
print(f"4-bit BNB pipeline loaded in {end_time - start_time:.2f}s. Memory reserved: {mem_reserved:.2f} GB")
|