Amber Tanaka commited on
Commit
cdccabc
·
unverified ·
1 Parent(s): ac15cf4

Update table legend to use new names + styling (#66)

Browse files
Files changed (4) hide show
  1. aliases.py +4 -4
  2. content.py +2 -0
  3. leaderboard_transformer.py +2 -2
  4. ui_components.py +14 -14
aliases.py CHANGED
@@ -1,5 +1,5 @@
1
- CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS = "Open Source + Open Weights"
2
- CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS = "Open Source"
3
  CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API Available"
4
  CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
5
 
@@ -10,8 +10,8 @@ CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully Custom"
10
 
11
 
12
  OPENNESS_ALIASES = {
13
- CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS: {"Open source & open weights"},
14
- CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS: {"Open source & closed weights"},
15
  CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"Closed source & API available"},
16
  CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed source & UI only"}
17
  }
 
1
+ CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS = "Open Source + Open Weights"
2
+ CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS = "Open Source"
3
  CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API Available"
4
  CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
5
 
 
10
 
11
 
12
  OPENNESS_ALIASES = {
13
+ CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {"Open source & open weights"},
14
+ CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {"Open source & closed weights"},
15
  CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"Closed source & API available"},
16
  CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed source & UI only"}
17
  }
content.py CHANGED
@@ -490,6 +490,8 @@ span.wrap[tabindex="0"][role="button"][data-editable="false"] {
490
  }
491
  .benchmark-title{
492
  color: var(--color-primary-pink);
 
 
493
  }
494
  .dark .benchmark-title{
495
  color: var(--color-primary-green);
 
490
  }
491
  .benchmark-title{
492
  color: var(--color-primary-pink);
493
+ margin-top: 50px;
494
+ font-size: 20px;
495
  }
496
  .dark .benchmark-title{
497
  color: var(--color-primary-green);
leaderboard_transformer.py CHANGED
@@ -344,8 +344,8 @@ def _plot_scatter_plotly(
344
  # These include aliases for openness categories,
345
  # so multiple names might correspond to the same color.
346
  color_map = {
347
- aliases.CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS: "deeppink",
348
- aliases.CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS: "coral",
349
  aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "yellow",
350
  aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "white",
351
  }
 
344
  # These include aliases for openness categories,
345
  # so multiple names might correspond to the same color.
346
  color_map = {
347
+ aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: "deeppink",
348
+ aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: "coral",
349
  aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "yellow",
350
  aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "white",
351
  }
ui_components.py CHANGED
@@ -40,12 +40,12 @@ AGENTEVAL_MANIFEST_NAME = "agenteval.json"
40
  os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
41
  # Global variables
42
  COMBINED_ICON_MAP = {
43
- aliases.CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS: {
44
  aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-ow-standard.svg",
45
  aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-ow-equivalent.svg",
46
  aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-ow-custom.svg",
47
  },
48
- aliases.CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS: {
49
  aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-standard.svg",
50
  aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-equivalent.svg",
51
  aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-custom.svg",
@@ -77,13 +77,13 @@ for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
77
 
78
  OPENNESS_SVG_MAP = {
79
  "Open Source + Open Weights": "assets/os-ow-legend.svg",
80
- "Open Source": "assets/os-legend.svg",
81
  "API Available": "assets/api-legend.svg",
82
- "Closed": "assets/c-legend.svg",
83
  }
84
  TOOLING_SVG_MAP = {
85
  "Standard": "assets/standard-legend.svg",
86
- "Custom with Standard Search": "assets/equivalent-legend.svg",
87
  "Fully Custom": "assets/custom-legend.svg",
88
  }
89
 
@@ -142,9 +142,9 @@ def build_openness_tooltip_content() -> str:
142
  """
143
  descriptions = {
144
  "Open Source + Open Weights": "Both code and ML models are open",
145
- "Open Source": "Code is open but uses an ML model with closed-weights",
146
  "API Available": "No access to code; API access only",
147
- "Closed": "No access to code or API; UI access only",
148
  }
149
  html_items = []
150
  for name, path in OPENNESS_SVG_MAP.items():
@@ -186,7 +186,7 @@ def build_tooling_tooltip_content() -> str:
186
  """Generates the inner HTML for the Agent Tooling tooltip card."""
187
  descriptions = {
188
  "Standard": "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
189
- "Custom with Standard Search": "Custom tools for accessing an equivalent underlying environment:",
190
  "Fully Custom": "Uses tools beyond constraints of Standard or Custom interface",
191
  }
192
  custom_interface_sub_list = """
@@ -201,7 +201,7 @@ def build_tooling_tooltip_content() -> str:
201
  desc = descriptions.get(name, "")
202
 
203
  # Check if this is the special case that needs a sub-list
204
- sub_list_html = custom_interface_sub_list if name == "Custom with Standard Search" else ""
205
 
206
  html_items.append(f"""
207
  <div class="tooltip-legend-item">
@@ -301,7 +301,7 @@ def create_legend_markdown(which_table: str) -> str:
301
  descriptions_tooltip_content = build_descriptions_tooltip_content(which_table)
302
  trophy_uri = get_svg_as_data_uri("assets/trophy.svg")
303
  legend_markdown = f"""
304
- <div style="display: flex; flex-wrap: wrap; align-items: flex-start; gap: 10px; font-size: 14px; padding-bottom: 8px;">
305
 
306
  <div> <!-- Container for the Pareto section -->
307
  <b>Pareto</b>
@@ -310,8 +310,8 @@ def create_legend_markdown(which_table: str) -> str:
310
  <span class="tooltip-card">{pareto_tooltip_content}</span>
311
  </span>
312
  <div style="margin-top: 8px; display: flex; align-items: center; gap: 6px;">
313
- <img src="{trophy_uri}" alt="On frontier" style="width: 25px; height: 25px;">
314
- <span>On frontier</span>
315
  </div>
316
  </div>
317
 
@@ -325,7 +325,7 @@ def create_legend_markdown(which_table: str) -> str:
325
  <div class="tooltip-items-container">{openness_tooltip_content}</div>
326
  </span>
327
  </span>
328
- <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 16px; margin-top: 8px;">{openness_html}</div>
329
  </div>
330
 
331
  <div> <!-- Container for the Tooling section -->
@@ -338,7 +338,7 @@ def create_legend_markdown(which_table: str) -> str:
338
  <div class="tooltip-items-container">{tooling_tooltip_content}</div>
339
  </span>
340
  </span>
341
- <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 16px; margin-top: 8px;">{tooling_html}</div>
342
  </div>
343
 
344
  <div><!-- Container for the Column Descriptions section -->
 
40
  os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
41
  # Global variables
42
  COMBINED_ICON_MAP = {
43
+ aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {
44
  aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-ow-standard.svg",
45
  aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-ow-equivalent.svg",
46
  aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-ow-custom.svg",
47
  },
48
+ aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {
49
  aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-standard.svg",
50
  aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-equivalent.svg",
51
  aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-custom.svg",
 
77
 
78
  OPENNESS_SVG_MAP = {
79
  "Open Source + Open Weights": "assets/os-ow-legend.svg",
80
+ "Open Source + Closed Weights": "assets/os-legend.svg",
81
  "API Available": "assets/api-legend.svg",
82
+ "Closed Source & UI only": "assets/c-legend.svg",
83
  }
84
  TOOLING_SVG_MAP = {
85
  "Standard": "assets/standard-legend.svg",
86
+ "Custom Interface": "assets/equivalent-legend.svg",
87
  "Fully Custom": "assets/custom-legend.svg",
88
  }
89
 
 
142
  """
143
  descriptions = {
144
  "Open Source + Open Weights": "Both code and ML models are open",
145
+ "Open Source + Closed Weights": "Code is open but uses an ML model with closed-weights",
146
  "API Available": "No access to code; API access only",
147
+ "Closed Source + UI Only": "No access to code or API; UI access only",
148
  }
149
  html_items = []
150
  for name, path in OPENNESS_SVG_MAP.items():
 
186
  """Generates the inner HTML for the Agent Tooling tooltip card."""
187
  descriptions = {
188
  "Standard": "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
189
+ "Custom Interface": "Custom tools for accessing an equivalent underlying environment:",
190
  "Fully Custom": "Uses tools beyond constraints of Standard or Custom interface",
191
  }
192
  custom_interface_sub_list = """
 
201
  desc = descriptions.get(name, "")
202
 
203
  # Check if this is the special case that needs a sub-list
204
+ sub_list_html = custom_interface_sub_list if name == "Custom Interface" else ""
205
 
206
  html_items.append(f"""
207
  <div class="tooltip-legend-item">
 
301
  descriptions_tooltip_content = build_descriptions_tooltip_content(which_table)
302
  trophy_uri = get_svg_as_data_uri("assets/trophy.svg")
303
  legend_markdown = f"""
304
+ <div style="display: flex; flex-wrap: wrap; align-items: flex-start; gap: 0px; font-size: 13px; padding-bottom: 8px;">
305
 
306
  <div> <!-- Container for the Pareto section -->
307
  <b>Pareto</b>
 
310
  <span class="tooltip-card">{pareto_tooltip_content}</span>
311
  </span>
312
  <div style="margin-top: 8px; display: flex; align-items: center; gap: 6px;">
313
+ <img src="{trophy_uri}" alt="On Frontier" style="width: 25px; height: 25px;">
314
+ <span>On Frontier</span>
315
  </div>
316
  </div>
317
 
 
325
  <div class="tooltip-items-container">{openness_tooltip_content}</div>
326
  </span>
327
  </span>
328
+ <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 6px; margin-top: 8px;">{openness_html}</div>
329
  </div>
330
 
331
  <div> <!-- Container for the Tooling section -->
 
338
  <div class="tooltip-items-container">{tooling_tooltip_content}</div>
339
  </span>
340
  </span>
341
+ <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 6px; margin-top: 8px;">{tooling_html}</div>
342
  </div>
343
 
344
  <div><!-- Container for the Column Descriptions section -->