chore(misc): update files
Browse files- app.py +67 -4
- texts.json +0 -0
- utils.py +94 -0
app.py
CHANGED
|
@@ -1,7 +1,70 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
|
| 3 |
-
|
| 4 |
-
return "Hello " + name + "!!"
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
|
| 3 |
+
from utils import checkpoints, load_model, log_perplexity
|
|
|
|
| 4 |
|
| 5 |
+
|
| 6 |
+
class ModelManager:
|
| 7 |
+
"""Class to manage model loading and perplexity calculation state."""
|
| 8 |
+
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.loaded_models = None
|
| 11 |
+
|
| 12 |
+
def load_models(self, checkpoint_input_str: str) -> str:
|
| 13 |
+
"""Load models from a comma-separated string of checkpoint names."""
|
| 14 |
+
checkpoint_list = [
|
| 15 |
+
c.strip() for c in checkpoint_input_str.split(",") if c.strip()
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
if not checkpoint_list:
|
| 19 |
+
return "Please enter at least one model checkpoint name."
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
self.loaded_models = load_model(checkpoint_list)
|
| 23 |
+
return "Models loaded successfully!"
|
| 24 |
+
except Exception as e:
|
| 25 |
+
return f"Model loading failed: {e}"
|
| 26 |
+
|
| 27 |
+
def calculate_perplexity(self) -> dict | str:
|
| 28 |
+
"""Calculate perplexity using the loaded models."""
|
| 29 |
+
if self.loaded_models is None:
|
| 30 |
+
return "Please load models first."
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
result = log_perplexity()
|
| 34 |
+
return result
|
| 35 |
+
except Exception as e:
|
| 36 |
+
return f"Perplexity calculation failed: {e}"
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def create_interface() -> gr.Blocks:
|
| 40 |
+
"""Create and return the Gradio interface."""
|
| 41 |
+
manager = ModelManager()
|
| 42 |
+
|
| 43 |
+
with gr.Blocks() as demo:
|
| 44 |
+
gr.Markdown("# Language Model Perplexity Calculator (Sequential Version)")
|
| 45 |
+
gr.Markdown("Enter Hugging Face model checkpoint names (comma-separated).")
|
| 46 |
+
|
| 47 |
+
checkpoint_input = gr.Textbox(
|
| 48 |
+
label="Checkpoints (e.g. Qwen/Qwen2.5-14B-Instruct)",
|
| 49 |
+
value=", ".join(checkpoints),
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
load_btn = gr.Button("Load Models", variant="primary")
|
| 53 |
+
perplexity_btn = gr.Button("Calculate Perplexity")
|
| 54 |
+
|
| 55 |
+
load_output = gr.Textbox(label="Model Loading Status", interactive=False)
|
| 56 |
+
perplexity_output = gr.JSON(label="Perplexity Results")
|
| 57 |
+
|
| 58 |
+
# Connect event handlers
|
| 59 |
+
load_btn.click(
|
| 60 |
+
fn=manager.load_models, inputs=checkpoint_input, outputs=load_output
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
perplexity_btn.click(fn=manager.calculate_perplexity, outputs=perplexity_output)
|
| 64 |
+
|
| 65 |
+
return demo
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
if __name__ == "__main__":
|
| 69 |
+
demo = create_interface()
|
| 70 |
+
demo.launch()
|
texts.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
utils.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Final
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import torch
|
| 5 |
+
import ujson as json
|
| 6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 7 |
+
|
| 8 |
+
dev: Final = "cuda" if torch.cuda.is_available() else "cpu"
|
| 9 |
+
texts: Final = json.load(open("texts.json", "r"))
|
| 10 |
+
|
| 11 |
+
checkpoints = ["HuggingFaceTB/SmolLM2-135M"] # Inputs
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def load_model(checkpoints: list[str]) -> dict:
|
| 15 |
+
tokenizers = [
|
| 16 |
+
AutoTokenizer.from_pretrained(checkpoint) for checkpoint in checkpoints
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
models = [
|
| 20 |
+
AutoModelForCausalLM.from_pretrained(
|
| 21 |
+
checkpoint,
|
| 22 |
+
device_map="auto",
|
| 23 |
+
torch_dtype=torch.bfloat16,
|
| 24 |
+
)
|
| 25 |
+
.to(dev)
|
| 26 |
+
.eval()
|
| 27 |
+
for checkpoint in checkpoints
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
# Load the models and tokenizers into a dictionary
|
| 31 |
+
return {
|
| 32 |
+
checkpoint: {"model": model, "tokenizer": tokenizer}
|
| 33 |
+
for checkpoint, model, tokenizer in zip(checkpoints, models, tokenizers)
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _perplexity(model, tokenizer, text):
|
| 38 |
+
encodings = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
| 39 |
+
input_ids = encodings.input_ids.to(dev)
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
outputs = model(input_ids, labels=input_ids)
|
| 42 |
+
loss = outputs.loss.item()
|
| 43 |
+
return torch.exp(torch.tensor(loss)).item()
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
num_samples: Final[int] = 1500 # Sample size for perplexity calculation
|
| 47 |
+
sample_length: Final[int] = 128 # Maximum length of text to consider for perplexity
|
| 48 |
+
|
| 49 |
+
loaded = load_model(checkpoints)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def log_perplexity() -> dict:
|
| 53 |
+
# Initialize a dictionary to store perplexity
|
| 54 |
+
ppls = {checkpoint: [] for checkpoint in loaded.keys()}
|
| 55 |
+
for i in range(num_samples):
|
| 56 |
+
text = texts[i]
|
| 57 |
+
if len(text.strip()) == 0:
|
| 58 |
+
continue
|
| 59 |
+
|
| 60 |
+
text = text.strip()[:sample_length]
|
| 61 |
+
|
| 62 |
+
# Calculate perplexity for each model
|
| 63 |
+
current_ppls = {}
|
| 64 |
+
for checkpoint, info in loaded.items():
|
| 65 |
+
ppl = _perplexity(
|
| 66 |
+
info["model"],
|
| 67 |
+
info["tokenizer"],
|
| 68 |
+
text,
|
| 69 |
+
)
|
| 70 |
+
current_ppls[checkpoint] = ppl
|
| 71 |
+
|
| 72 |
+
# Filter out outliers
|
| 73 |
+
if all(1 < ppl < 1e4 for ppl in current_ppls.values()):
|
| 74 |
+
for checkpoint, ppl in current_ppls.items():
|
| 75 |
+
ppls[checkpoint].append(ppl)
|
| 76 |
+
|
| 77 |
+
# Convert perplexity into log scale
|
| 78 |
+
log_ppls: dict = {checkpoint: np.log(ppl) for checkpoint, ppl in ppls.items()}
|
| 79 |
+
|
| 80 |
+
# Calculate the mean perplexity for each model
|
| 81 |
+
mean_log_ppls: dict = {
|
| 82 |
+
checkpoint: np.mean(ppl) for checkpoint, ppl in log_ppls.items()
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
# Calculate the standard deviation of perplexity for each model
|
| 86 |
+
std_log_ppls: dict = {
|
| 87 |
+
checkpoint: np.std(ppl) for checkpoint, ppl in log_ppls.items()
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
return {
|
| 91 |
+
"ppls": ppls,
|
| 92 |
+
"mean_ppls": mean_log_ppls,
|
| 93 |
+
"std_ppls": std_log_ppls,
|
| 94 |
+
}
|