update(core): fix code
Browse files- __pycache__/utils.cpython-312.pyc +0 -0
- app.py +37 -23
- utils.py +51 -67
__pycache__/utils.cpython-312.pyc
ADDED
Binary file (4.1 kB). View file
|
|
app.py
CHANGED
@@ -1,69 +1,83 @@
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
from utils import
|
4 |
|
5 |
|
6 |
-
class
|
7 |
"""Class to manage model loading and perplexity calculation state."""
|
8 |
|
9 |
def __init__(self):
|
10 |
-
self.
|
11 |
|
12 |
def load_models(self, checkpoint_input_str: str) -> str:
|
13 |
"""Load models from a comma-separated string of checkpoint names."""
|
14 |
-
|
15 |
-
|
16 |
]
|
17 |
|
18 |
-
if not
|
19 |
return "Please enter at least one model checkpoint name."
|
20 |
|
21 |
try:
|
22 |
-
self.
|
23 |
return "Models loaded successfully!"
|
24 |
except Exception as e:
|
25 |
return f"Model loading failed: {e}"
|
26 |
|
27 |
-
def
|
|
|
|
|
|
|
|
|
28 |
"""Calculate perplexity using the loaded models."""
|
29 |
-
if self.
|
30 |
return "Please load models first."
|
31 |
-
|
|
|
32 |
try:
|
33 |
-
|
34 |
-
return result
|
35 |
except Exception as e:
|
36 |
return f"Perplexity calculation failed: {e}"
|
37 |
|
38 |
|
39 |
-
def
|
40 |
"""Create and return the Gradio interface."""
|
41 |
-
manager =
|
42 |
|
43 |
with gr.Blocks() as demo:
|
44 |
-
gr.Markdown("# LLM
|
45 |
|
46 |
-
|
47 |
-
label="Checkpoints",
|
48 |
-
value=", ".join(checkpoints),
|
49 |
)
|
50 |
|
51 |
load_btn = gr.Button("Load Models", variant="primary")
|
52 |
-
perplexity_btn = gr.Button("Compute PPL")
|
53 |
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
perplexity_output = gr.JSON(label="PPL Results")
|
56 |
|
57 |
# Connect event handlers
|
58 |
load_btn.click(
|
59 |
-
fn=manager.load_models,
|
|
|
|
|
60 |
)
|
61 |
|
62 |
-
perplexity_btn.click(
|
|
|
|
|
|
|
|
|
63 |
|
64 |
return demo
|
65 |
|
66 |
|
67 |
if __name__ == "__main__":
|
68 |
-
demo =
|
69 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
+
from utils import load_model, log_perplexity
|
4 |
|
5 |
|
6 |
+
class Manager:
|
7 |
"""Class to manage model loading and perplexity calculation state."""
|
8 |
|
9 |
def __init__(self):
|
10 |
+
self.loaded = None
|
11 |
|
12 |
def load_models(self, checkpoint_input_str: str) -> str:
|
13 |
"""Load models from a comma-separated string of checkpoint names."""
|
14 |
+
checkpoints = [
|
15 |
+
ckpt.strip() for ckpt in checkpoint_input_str.split(",") if ckpt.strip()
|
16 |
]
|
17 |
|
18 |
+
if not checkpoints:
|
19 |
return "Please enter at least one model checkpoint name."
|
20 |
|
21 |
try:
|
22 |
+
self.loaded = load_model(checkpoints)
|
23 |
return "Models loaded successfully!"
|
24 |
except Exception as e:
|
25 |
return f"Model loading failed: {e}"
|
26 |
|
27 |
+
def perplexity(
|
28 |
+
self,
|
29 |
+
num_samples: int | None = None,
|
30 |
+
sample_length: int | None = None,
|
31 |
+
) -> dict | str:
|
32 |
"""Calculate perplexity using the loaded models."""
|
33 |
+
if self.loaded is None:
|
34 |
return "Please load models first."
|
35 |
+
if num_samples is None or sample_length is None:
|
36 |
+
return "Please set the number of samples and sample length."
|
37 |
try:
|
38 |
+
return log_perplexity(self.loaded, num_samples, sample_length)
|
|
|
39 |
except Exception as e:
|
40 |
return f"Perplexity calculation failed: {e}"
|
41 |
|
42 |
|
43 |
+
def make_interface() -> gr.Blocks:
|
44 |
"""Create and return the Gradio interface."""
|
45 |
+
manager = Manager()
|
46 |
|
47 |
with gr.Blocks() as demo:
|
48 |
+
gr.Markdown("# LLM PPLs")
|
49 |
|
50 |
+
checkpoints = gr.Textbox(
|
51 |
+
label="Checkpoints", value="HuggingFaceTB/SmolLM2-135M"
|
|
|
52 |
)
|
53 |
|
54 |
load_btn = gr.Button("Load Models", variant="primary")
|
|
|
55 |
|
56 |
+
with gr.Row():
|
57 |
+
num_samples = gr.Number(label="Number of Samples", value=1500)
|
58 |
+
sample_length = gr.Number(label="Sample Length", value=128)
|
59 |
+
|
60 |
+
perplexity_btn = gr.Button("Compute PPLs")
|
61 |
+
|
62 |
+
load_output = gr.Textbox(label="Model Loading Status")
|
63 |
perplexity_output = gr.JSON(label="PPL Results")
|
64 |
|
65 |
# Connect event handlers
|
66 |
load_btn.click(
|
67 |
+
fn=manager.load_models,
|
68 |
+
inputs=checkpoints,
|
69 |
+
outputs=load_output,
|
70 |
)
|
71 |
|
72 |
+
perplexity_btn.click(
|
73 |
+
fn=manager.perplexity,
|
74 |
+
inputs=[num_samples, sample_length],
|
75 |
+
outputs=perplexity_output,
|
76 |
+
)
|
77 |
|
78 |
return demo
|
79 |
|
80 |
|
81 |
if __name__ == "__main__":
|
82 |
+
demo = make_interface()
|
83 |
demo.launch()
|
utils.py
CHANGED
@@ -1,94 +1,78 @@
|
|
1 |
from typing import Final
|
2 |
|
|
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
import ujson as json
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
7 |
|
8 |
-
|
9 |
-
|
10 |
|
11 |
-
checkpoints = ["HuggingFaceTB/SmolLM2-135M"] # Inputs
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
def load_model(checkpoints: list[str]) -> dict:
|
15 |
-
tokenizers = [
|
16 |
-
AutoTokenizer.from_pretrained(checkpoint) for checkpoint in checkpoints
|
17 |
-
]
|
18 |
|
|
|
|
|
19 |
models = [
|
20 |
-
AutoModelForCausalLM.from_pretrained(
|
21 |
-
|
22 |
-
device_map="auto",
|
23 |
-
torch_dtype=torch.bfloat16,
|
24 |
-
)
|
25 |
-
.to(dev)
|
26 |
-
.eval()
|
27 |
-
for checkpoint in checkpoints
|
28 |
]
|
29 |
|
30 |
# Load the models and tokenizers into a dictionary
|
31 |
return {
|
32 |
-
|
33 |
-
for
|
34 |
}
|
35 |
|
36 |
|
37 |
-
def
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
num_samples: Final[int] = 500 # Sample size for perplexity calculation
|
47 |
-
sample_length: Final[int] = 100 # Maximum length of text to consider for perplexity
|
48 |
-
|
49 |
-
loaded = load_model(checkpoints)
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
ppls = {checkpoint: [] for checkpoint in loaded.keys()}
|
55 |
for i in range(num_samples):
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
# Calculate perplexity for each model
|
63 |
-
current_ppls = {}
|
64 |
-
for checkpoint, info in loaded.items():
|
65 |
-
ppl = _perplexity(
|
66 |
-
info["model"],
|
67 |
-
info["tokenizer"],
|
68 |
-
text,
|
69 |
-
)
|
70 |
-
current_ppls[checkpoint] = ppl
|
71 |
-
|
72 |
-
# Filter out outliers
|
73 |
-
if all(1 < ppl < 1e4 for ppl in current_ppls.values()):
|
74 |
-
for checkpoint, ppl in current_ppls.items():
|
75 |
-
ppls[checkpoint].append(ppl)
|
76 |
-
|
77 |
-
# Convert perplexity into log scale
|
78 |
-
log_ppls: dict = {checkpoint: np.log(ppl) for checkpoint, ppl in ppls.items()}
|
79 |
|
80 |
# Calculate the mean perplexity for each model
|
81 |
-
|
82 |
-
checkpoint: np.mean(ppl) for checkpoint, ppl in log_ppls.items()
|
83 |
-
}
|
84 |
|
85 |
# Calculate the standard deviation of perplexity for each model
|
86 |
-
|
87 |
-
checkpoint: np.std(ppl) for checkpoint, ppl in log_ppls.items()
|
88 |
-
}
|
89 |
|
90 |
-
return {
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from typing import Final
|
2 |
|
3 |
+
import gradio as gr
|
4 |
import numpy as np
|
5 |
import torch
|
6 |
import ujson as json
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
|
9 |
+
_dev: Final = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
+
_dtype: Final = torch.bfloat16
|
11 |
|
|
|
12 |
|
13 |
+
def _perplexity(model, tokenizer, text) -> float:
|
14 |
+
encodings = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
15 |
+
input_ids = encodings.input_ids.to(_dev)
|
16 |
+
with torch.no_grad():
|
17 |
+
outputs = model(input_ids, labels=input_ids)
|
18 |
+
loss = outputs.loss.item()
|
19 |
+
return np.log(torch.exp(torch.tensor(loss)).item())
|
20 |
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
def load_model(checkpoints: list[str]) -> dict:
|
23 |
+
tokenizers = [AutoTokenizer.from_pretrained(c) for c in checkpoints]
|
24 |
models = [
|
25 |
+
AutoModelForCausalLM.from_pretrained(c, device_map="auto", torch_dtype=_dtype)
|
26 |
+
for c in checkpoints
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
]
|
28 |
|
29 |
# Load the models and tokenizers into a dictionary
|
30 |
return {
|
31 |
+
ckpt: {"model": model.to(_dev).eval(), "tokenizer": tokenizer}
|
32 |
+
for ckpt, model, tokenizer in zip(checkpoints, models, tokenizers)
|
33 |
}
|
34 |
|
35 |
|
36 |
+
def log_perplexity(
|
37 |
+
loaded: dict,
|
38 |
+
num_samples: int,
|
39 |
+
sample_length: int,
|
40 |
+
progress=gr.Progress(),
|
41 |
+
) -> dict:
|
42 |
+
# Initialize a dictionary to store perplexity
|
43 |
+
ppls: dict[str, list] = {ckpt: [] for ckpt in loaded.keys()}
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
# Initialize samples
|
46 |
+
texts: Final[list[str]] = [
|
47 |
+
text.strip()[:sample_length]
|
48 |
+
for text in json.load(open("texts.json", "r"))
|
49 |
+
if text.strip()
|
50 |
+
]
|
51 |
|
52 |
+
# Start the iteration
|
53 |
+
progress(0, desc="Starting")
|
|
|
54 |
for i in range(num_samples):
|
55 |
+
progress(i / num_samples, desc="Processing samples")
|
56 |
+
for ckpt, info in loaded.items(): # Calculate perplexity for each model
|
57 |
+
ppl: float = _perplexity(info["model"], info["tokenizer"], texts[i])
|
58 |
+
if 1 < ppl < 1e4: # Filter out outliers
|
59 |
+
ppls[ckpt].append(ppl)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
# Calculate the mean perplexity for each model
|
62 |
+
means: dict = {ckpt: np.mean(ppl) for ckpt, ppl in ppls.items()}
|
|
|
|
|
63 |
|
64 |
# Calculate the standard deviation of perplexity for each model
|
65 |
+
stds: dict = {ckpt: np.std(ppl) for ckpt, ppl in ppls.items()}
|
|
|
|
|
66 |
|
67 |
+
return {"ppls": ppls, "means": means, "stds": stds}
|
68 |
+
|
69 |
+
|
70 |
+
if __name__ == "__main__":
|
71 |
+
from pprint import pprint
|
72 |
+
|
73 |
+
# Example usage
|
74 |
+
checkpoints = ["HuggingFaceTB/SmolLM2-135M"]
|
75 |
+
loaded = load_model(checkpoints)
|
76 |
+
num_samples = 500
|
77 |
+
sample_length = 128
|
78 |
+
pprint(log_perplexity(loaded, num_samples, sample_length))
|