Spaces:
Running
Running
from dataclasses import dataclass | |
from enum import Enum | |
class Task: | |
benchmark: str | |
metric: str | |
col_name: str | |
# Select your tasks here | |
# --------------------------------------------------- | |
#TODO ζζ | |
class Tasks(Enum): | |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard | |
task0 = Task("Score_avg", "score", "Score_Avg β¬οΈ") | |
task1 = Task("Score_gpt", "score", "Score_GPT") | |
task2 = Task("Score_cog", "score", "Score_COG") | |
task3 = Task("Score_cpm", "score", "Score_CPM") | |
task4 = Task("Length_Avg", "scoreL", "Length_Avg") | |
NUM_FEWSHOT = 0 # Change with your few shot | |
# --------------------------------------------------- | |
#TODO title | |
TITLE = """ | |
<h1 align="center" id="space-title" style="font-family: 'Arial', sans-serif; | |
font-size: 42px; font-weight: bold; | |
display: flex; justify-content: center; align-items: center; gap: 12px;"> | |
<span style="font-size: 35px;">π</span> | |
<span style="background: linear-gradient(90deg, #A45EE5, #C085F6, #D8B9FF); | |
-webkit-background-clip: text; | |
-webkit-text-fill-color: transparent;"> | |
CapArena-Auto Leaderboard | |
</span> | |
<span style="font-size: 35px;">π</span> | |
</h1> | |
""" | |
# introduction text | |
def get_INTRODUCTION_TEXT(model_num: int, LAST_UPDATED: str, paper_link="TODO"): | |
return f""" | |
<div style="display: flex; flex-wrap: wrap; gap: 10px; align-items: center;"> | |
<!-- Paper icon with custom link --> | |
<a href="{paper_link}" target="_blank"> | |
π Paper | |
</a> | |
<span style="margin: 0 10px;">|</span> | |
<span style="font-weight: bold;">MODELS:</span> {model_num} | |
<span style="margin: 0 10px;">|</span> | |
<span style="font-weight: bold;">UPDATED:</span> {"3-15"} | |
</div> | |
""" | |
#TODO | |
INTRODUCE_BENCHMARK = f""" | |
<details style="margin: 10px 0; padding: 10px;"> | |
<summary style="cursor: pointer; font-size: 18px; color: #2c3e50; font-weight: bold; transition: color 0.3s;"> | |
π¬ Metric Explanations | |
</summary> | |
<div style="color: #2c3e50; border-left: 4px solid #2980b9; padding-left: 12px; margin-top: 8px;"> | |
<p> | |
<strong>CapArena-Auto</strong> is an arena-style automated evaluation benchmark for detailed captioning. | |
It includes <strong>600 evaluation images</strong> and assesses model performance through | |
<em>pairwise battles</em> with three baseline models. The final score is calculated by <strong>GPT4o-as-a-Judge</strong>. | |
</p> | |
</div> | |
</details> | |
""" | |
#TODO About | |
LLM_BENCHMARKS_TEXT = f""" | |
<div style="text-align: center; margin: 20px;"> | |
<h2 style="color: #2c3e50; font-family: Arial, sans-serif;"> See details in | |
<a href="https://github.com/njucckevin/CapArena" target="_blank" | |
style="color: #2980b9; text-decoration: none; font-weight: bold;"> | |
CapArena | |
</a> | |
</h2> | |
</div> | |
""" | |
EVALUATION_QUEUE_TEXT = """ | |
""" | |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
CITATION_BUTTON_TEXT = r""" | |
""" | |