Spaces:
Paused
Paused
Amber Tanaka
commited on
Update table legend to use new names + styling (#66)
Browse files- aliases.py +4 -4
- content.py +2 -0
- leaderboard_transformer.py +2 -2
- ui_components.py +14 -14
aliases.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
|
2 |
-
|
3 |
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API Available"
|
4 |
CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
|
5 |
|
@@ -10,8 +10,8 @@ CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully Custom"
|
|
10 |
|
11 |
|
12 |
OPENNESS_ALIASES = {
|
13 |
-
|
14 |
-
|
15 |
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"Closed source & API available"},
|
16 |
CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed source & UI only"}
|
17 |
}
|
|
|
1 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS = "Open Source + Open Weights"
|
2 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS = "Open Source"
|
3 |
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API Available"
|
4 |
CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
|
5 |
|
|
|
10 |
|
11 |
|
12 |
OPENNESS_ALIASES = {
|
13 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {"Open source & open weights"},
|
14 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {"Open source & closed weights"},
|
15 |
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"Closed source & API available"},
|
16 |
CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed source & UI only"}
|
17 |
}
|
content.py
CHANGED
@@ -490,6 +490,8 @@ span.wrap[tabindex="0"][role="button"][data-editable="false"] {
|
|
490 |
}
|
491 |
.benchmark-title{
|
492 |
color: var(--color-primary-pink);
|
|
|
|
|
493 |
}
|
494 |
.dark .benchmark-title{
|
495 |
color: var(--color-primary-green);
|
|
|
490 |
}
|
491 |
.benchmark-title{
|
492 |
color: var(--color-primary-pink);
|
493 |
+
margin-top: 50px;
|
494 |
+
font-size: 20px;
|
495 |
}
|
496 |
.dark .benchmark-title{
|
497 |
color: var(--color-primary-green);
|
leaderboard_transformer.py
CHANGED
@@ -344,8 +344,8 @@ def _plot_scatter_plotly(
|
|
344 |
# These include aliases for openness categories,
|
345 |
# so multiple names might correspond to the same color.
|
346 |
color_map = {
|
347 |
-
aliases.
|
348 |
-
aliases.
|
349 |
aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "yellow",
|
350 |
aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "white",
|
351 |
}
|
|
|
344 |
# These include aliases for openness categories,
|
345 |
# so multiple names might correspond to the same color.
|
346 |
color_map = {
|
347 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: "deeppink",
|
348 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: "coral",
|
349 |
aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "yellow",
|
350 |
aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "white",
|
351 |
}
|
ui_components.py
CHANGED
@@ -40,12 +40,12 @@ AGENTEVAL_MANIFEST_NAME = "agenteval.json"
|
|
40 |
os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
|
41 |
# Global variables
|
42 |
COMBINED_ICON_MAP = {
|
43 |
-
aliases.
|
44 |
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-ow-standard.svg",
|
45 |
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-ow-equivalent.svg",
|
46 |
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-ow-custom.svg",
|
47 |
},
|
48 |
-
aliases.
|
49 |
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-standard.svg",
|
50 |
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-equivalent.svg",
|
51 |
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-custom.svg",
|
@@ -77,13 +77,13 @@ for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
|
|
77 |
|
78 |
OPENNESS_SVG_MAP = {
|
79 |
"Open Source + Open Weights": "assets/os-ow-legend.svg",
|
80 |
-
"Open Source": "assets/os-legend.svg",
|
81 |
"API Available": "assets/api-legend.svg",
|
82 |
-
"Closed": "assets/c-legend.svg",
|
83 |
}
|
84 |
TOOLING_SVG_MAP = {
|
85 |
"Standard": "assets/standard-legend.svg",
|
86 |
-
"Custom
|
87 |
"Fully Custom": "assets/custom-legend.svg",
|
88 |
}
|
89 |
|
@@ -142,9 +142,9 @@ def build_openness_tooltip_content() -> str:
|
|
142 |
"""
|
143 |
descriptions = {
|
144 |
"Open Source + Open Weights": "Both code and ML models are open",
|
145 |
-
"Open Source": "Code is open but uses an ML model with closed-weights",
|
146 |
"API Available": "No access to code; API access only",
|
147 |
-
"Closed": "No access to code or API; UI access only",
|
148 |
}
|
149 |
html_items = []
|
150 |
for name, path in OPENNESS_SVG_MAP.items():
|
@@ -186,7 +186,7 @@ def build_tooling_tooltip_content() -> str:
|
|
186 |
"""Generates the inner HTML for the Agent Tooling tooltip card."""
|
187 |
descriptions = {
|
188 |
"Standard": "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
|
189 |
-
"Custom
|
190 |
"Fully Custom": "Uses tools beyond constraints of Standard or Custom interface",
|
191 |
}
|
192 |
custom_interface_sub_list = """
|
@@ -201,7 +201,7 @@ def build_tooling_tooltip_content() -> str:
|
|
201 |
desc = descriptions.get(name, "")
|
202 |
|
203 |
# Check if this is the special case that needs a sub-list
|
204 |
-
sub_list_html = custom_interface_sub_list if name == "Custom
|
205 |
|
206 |
html_items.append(f"""
|
207 |
<div class="tooltip-legend-item">
|
@@ -301,7 +301,7 @@ def create_legend_markdown(which_table: str) -> str:
|
|
301 |
descriptions_tooltip_content = build_descriptions_tooltip_content(which_table)
|
302 |
trophy_uri = get_svg_as_data_uri("assets/trophy.svg")
|
303 |
legend_markdown = f"""
|
304 |
-
<div style="display: flex; flex-wrap: wrap; align-items: flex-start; gap:
|
305 |
|
306 |
<div> <!-- Container for the Pareto section -->
|
307 |
<b>Pareto</b>
|
@@ -310,8 +310,8 @@ def create_legend_markdown(which_table: str) -> str:
|
|
310 |
<span class="tooltip-card">{pareto_tooltip_content}</span>
|
311 |
</span>
|
312 |
<div style="margin-top: 8px; display: flex; align-items: center; gap: 6px;">
|
313 |
-
<img src="{trophy_uri}" alt="On
|
314 |
-
<span>On
|
315 |
</div>
|
316 |
</div>
|
317 |
|
@@ -325,7 +325,7 @@ def create_legend_markdown(which_table: str) -> str:
|
|
325 |
<div class="tooltip-items-container">{openness_tooltip_content}</div>
|
326 |
</span>
|
327 |
</span>
|
328 |
-
<div style="display: flex; flex-wrap: wrap; align-items: center; gap:
|
329 |
</div>
|
330 |
|
331 |
<div> <!-- Container for the Tooling section -->
|
@@ -338,7 +338,7 @@ def create_legend_markdown(which_table: str) -> str:
|
|
338 |
<div class="tooltip-items-container">{tooling_tooltip_content}</div>
|
339 |
</span>
|
340 |
</span>
|
341 |
-
<div style="display: flex; flex-wrap: wrap; align-items: center; gap:
|
342 |
</div>
|
343 |
|
344 |
<div><!-- Container for the Column Descriptions section -->
|
|
|
40 |
os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
|
41 |
# Global variables
|
42 |
COMBINED_ICON_MAP = {
|
43 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {
|
44 |
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-ow-standard.svg",
|
45 |
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-ow-equivalent.svg",
|
46 |
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-ow-custom.svg",
|
47 |
},
|
48 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {
|
49 |
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-standard.svg",
|
50 |
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-equivalent.svg",
|
51 |
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-custom.svg",
|
|
|
77 |
|
78 |
OPENNESS_SVG_MAP = {
|
79 |
"Open Source + Open Weights": "assets/os-ow-legend.svg",
|
80 |
+
"Open Source + Closed Weights": "assets/os-legend.svg",
|
81 |
"API Available": "assets/api-legend.svg",
|
82 |
+
"Closed Source & UI only": "assets/c-legend.svg",
|
83 |
}
|
84 |
TOOLING_SVG_MAP = {
|
85 |
"Standard": "assets/standard-legend.svg",
|
86 |
+
"Custom Interface": "assets/equivalent-legend.svg",
|
87 |
"Fully Custom": "assets/custom-legend.svg",
|
88 |
}
|
89 |
|
|
|
142 |
"""
|
143 |
descriptions = {
|
144 |
"Open Source + Open Weights": "Both code and ML models are open",
|
145 |
+
"Open Source + Closed Weights": "Code is open but uses an ML model with closed-weights",
|
146 |
"API Available": "No access to code; API access only",
|
147 |
+
"Closed Source + UI Only": "No access to code or API; UI access only",
|
148 |
}
|
149 |
html_items = []
|
150 |
for name, path in OPENNESS_SVG_MAP.items():
|
|
|
186 |
"""Generates the inner HTML for the Agent Tooling tooltip card."""
|
187 |
descriptions = {
|
188 |
"Standard": "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
|
189 |
+
"Custom Interface": "Custom tools for accessing an equivalent underlying environment:",
|
190 |
"Fully Custom": "Uses tools beyond constraints of Standard or Custom interface",
|
191 |
}
|
192 |
custom_interface_sub_list = """
|
|
|
201 |
desc = descriptions.get(name, "")
|
202 |
|
203 |
# Check if this is the special case that needs a sub-list
|
204 |
+
sub_list_html = custom_interface_sub_list if name == "Custom Interface" else ""
|
205 |
|
206 |
html_items.append(f"""
|
207 |
<div class="tooltip-legend-item">
|
|
|
301 |
descriptions_tooltip_content = build_descriptions_tooltip_content(which_table)
|
302 |
trophy_uri = get_svg_as_data_uri("assets/trophy.svg")
|
303 |
legend_markdown = f"""
|
304 |
+
<div style="display: flex; flex-wrap: wrap; align-items: flex-start; gap: 0px; font-size: 13px; padding-bottom: 8px;">
|
305 |
|
306 |
<div> <!-- Container for the Pareto section -->
|
307 |
<b>Pareto</b>
|
|
|
310 |
<span class="tooltip-card">{pareto_tooltip_content}</span>
|
311 |
</span>
|
312 |
<div style="margin-top: 8px; display: flex; align-items: center; gap: 6px;">
|
313 |
+
<img src="{trophy_uri}" alt="On Frontier" style="width: 25px; height: 25px;">
|
314 |
+
<span>On Frontier</span>
|
315 |
</div>
|
316 |
</div>
|
317 |
|
|
|
325 |
<div class="tooltip-items-container">{openness_tooltip_content}</div>
|
326 |
</span>
|
327 |
</span>
|
328 |
+
<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 6px; margin-top: 8px;">{openness_html}</div>
|
329 |
</div>
|
330 |
|
331 |
<div> <!-- Container for the Tooling section -->
|
|
|
338 |
<div class="tooltip-items-container">{tooling_tooltip_content}</div>
|
339 |
</span>
|
340 |
</span>
|
341 |
+
<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 6px; margin-top: 8px;">{tooling_html}</div>
|
342 |
</div>
|
343 |
|
344 |
<div><!-- Container for the Column Descriptions section -->
|