sthenno commited on
Commit
f1d3bf6
·
1 Parent(s): a9011a0

chore(misc): update files

Browse files
Files changed (3) hide show
  1. app.py +67 -4
  2. texts.json +0 -0
  3. utils.py +94 -0
app.py CHANGED
@@ -1,7 +1,70 @@
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
+ from utils import checkpoints, load_model, log_perplexity
 
4
 
5
+
6
+ class ModelManager:
7
+ """Class to manage model loading and perplexity calculation state."""
8
+
9
+ def __init__(self):
10
+ self.loaded_models = None
11
+
12
+ def load_models(self, checkpoint_input_str: str) -> str:
13
+ """Load models from a comma-separated string of checkpoint names."""
14
+ checkpoint_list = [
15
+ c.strip() for c in checkpoint_input_str.split(",") if c.strip()
16
+ ]
17
+
18
+ if not checkpoint_list:
19
+ return "Please enter at least one model checkpoint name."
20
+
21
+ try:
22
+ self.loaded_models = load_model(checkpoint_list)
23
+ return "Models loaded successfully!"
24
+ except Exception as e:
25
+ return f"Model loading failed: {e}"
26
+
27
+ def calculate_perplexity(self) -> dict | str:
28
+ """Calculate perplexity using the loaded models."""
29
+ if self.loaded_models is None:
30
+ return "Please load models first."
31
+
32
+ try:
33
+ result = log_perplexity()
34
+ return result
35
+ except Exception as e:
36
+ return f"Perplexity calculation failed: {e}"
37
+
38
+
39
+ def create_interface() -> gr.Blocks:
40
+ """Create and return the Gradio interface."""
41
+ manager = ModelManager()
42
+
43
+ with gr.Blocks() as demo:
44
+ gr.Markdown("# Language Model Perplexity Calculator (Sequential Version)")
45
+ gr.Markdown("Enter Hugging Face model checkpoint names (comma-separated).")
46
+
47
+ checkpoint_input = gr.Textbox(
48
+ label="Checkpoints (e.g. Qwen/Qwen2.5-14B-Instruct)",
49
+ value=", ".join(checkpoints),
50
+ )
51
+
52
+ load_btn = gr.Button("Load Models", variant="primary")
53
+ perplexity_btn = gr.Button("Calculate Perplexity")
54
+
55
+ load_output = gr.Textbox(label="Model Loading Status", interactive=False)
56
+ perplexity_output = gr.JSON(label="Perplexity Results")
57
+
58
+ # Connect event handlers
59
+ load_btn.click(
60
+ fn=manager.load_models, inputs=checkpoint_input, outputs=load_output
61
+ )
62
+
63
+ perplexity_btn.click(fn=manager.calculate_perplexity, outputs=perplexity_output)
64
+
65
+ return demo
66
+
67
+
68
+ if __name__ == "__main__":
69
+ demo = create_interface()
70
+ demo.launch()
texts.json ADDED
The diff for this file is too large to render. See raw diff
 
utils.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Final
2
+
3
+ import numpy as np
4
+ import torch
5
+ import ujson as json
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
+
8
+ dev: Final = "cuda" if torch.cuda.is_available() else "cpu"
9
+ texts: Final = json.load(open("texts.json", "r"))
10
+
11
+ checkpoints = ["HuggingFaceTB/SmolLM2-135M"] # Inputs
12
+
13
+
14
+ def load_model(checkpoints: list[str]) -> dict:
15
+ tokenizers = [
16
+ AutoTokenizer.from_pretrained(checkpoint) for checkpoint in checkpoints
17
+ ]
18
+
19
+ models = [
20
+ AutoModelForCausalLM.from_pretrained(
21
+ checkpoint,
22
+ device_map="auto",
23
+ torch_dtype=torch.bfloat16,
24
+ )
25
+ .to(dev)
26
+ .eval()
27
+ for checkpoint in checkpoints
28
+ ]
29
+
30
+ # Load the models and tokenizers into a dictionary
31
+ return {
32
+ checkpoint: {"model": model, "tokenizer": tokenizer}
33
+ for checkpoint, model, tokenizer in zip(checkpoints, models, tokenizers)
34
+ }
35
+
36
+
37
+ def _perplexity(model, tokenizer, text):
38
+ encodings = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
39
+ input_ids = encodings.input_ids.to(dev)
40
+ with torch.no_grad():
41
+ outputs = model(input_ids, labels=input_ids)
42
+ loss = outputs.loss.item()
43
+ return torch.exp(torch.tensor(loss)).item()
44
+
45
+
46
+ num_samples: Final[int] = 1500 # Sample size for perplexity calculation
47
+ sample_length: Final[int] = 128 # Maximum length of text to consider for perplexity
48
+
49
+ loaded = load_model(checkpoints)
50
+
51
+
52
+ def log_perplexity() -> dict:
53
+ # Initialize a dictionary to store perplexity
54
+ ppls = {checkpoint: [] for checkpoint in loaded.keys()}
55
+ for i in range(num_samples):
56
+ text = texts[i]
57
+ if len(text.strip()) == 0:
58
+ continue
59
+
60
+ text = text.strip()[:sample_length]
61
+
62
+ # Calculate perplexity for each model
63
+ current_ppls = {}
64
+ for checkpoint, info in loaded.items():
65
+ ppl = _perplexity(
66
+ info["model"],
67
+ info["tokenizer"],
68
+ text,
69
+ )
70
+ current_ppls[checkpoint] = ppl
71
+
72
+ # Filter out outliers
73
+ if all(1 < ppl < 1e4 for ppl in current_ppls.values()):
74
+ for checkpoint, ppl in current_ppls.items():
75
+ ppls[checkpoint].append(ppl)
76
+
77
+ # Convert perplexity into log scale
78
+ log_ppls: dict = {checkpoint: np.log(ppl) for checkpoint, ppl in ppls.items()}
79
+
80
+ # Calculate the mean perplexity for each model
81
+ mean_log_ppls: dict = {
82
+ checkpoint: np.mean(ppl) for checkpoint, ppl in log_ppls.items()
83
+ }
84
+
85
+ # Calculate the standard deviation of perplexity for each model
86
+ std_log_ppls: dict = {
87
+ checkpoint: np.std(ppl) for checkpoint, ppl in log_ppls.items()
88
+ }
89
+
90
+ return {
91
+ "ppls": ppls,
92
+ "mean_ppls": mean_log_ppls,
93
+ "std_ppls": std_log_ppls,
94
+ }