Spaces:
Running
Running
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space.""" | |
import ast | |
import argparse | |
import glob | |
import pickle | |
import plotly | |
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
import gradio as gr | |
import pandas as pd | |
from pathlib import Path | |
import json | |
from constants import * | |
from datetime import datetime, timezone | |
# from datasets import Dataset, load_dataset, concatenate_datasets | |
import os, uuid | |
from utils_display import model_info | |
from constants import column_names, LEADERBOARD_REMARKS, DEFAULT_K, LEADERBOARD_REMARKS_MAIN | |
import pytz | |
from data_utils import post_processing | |
# get the last updated time from the elo_ranks.all.jsonl file | |
LAST_UPDATED = None | |
# with open("_intro.md", "r") as f: | |
# INTRO_MD = f.read() | |
INTRO_MD = "" | |
with open("_about_us.md", "r") as f: | |
ABOUT_MD = f.read() | |
with open("_header.md", "r") as f: | |
HEADER_MD = f.read() | |
with open("_metrics.md", "r") as f: | |
METRICS_MD = f.read() | |
original_df = None | |
# available_models = [] # to be filled in later | |
available_models = list(model_info.keys()) | |
def df_filters(mode_selection_radio, show_open_source_model_only): | |
global original_df | |
# remove the rows when the model contains "โ" | |
original_df = original_df[~original_df["Model"].str.contains("โ")] | |
modes = { | |
"greedy": ["greedy"], | |
"sampling (Temp=0.5)": ["sampling"], | |
"all": ["greedy", "sampling"] | |
} | |
# filter the df by the mode_selection_radio | |
default_main_df = original_df[original_df["Mode"].isin(modes[mode_selection_radio])] | |
default_main_df.insert(0, "", range(1, 1 + len(default_main_df))) | |
return default_main_df.copy() | |
def _gstr(text): | |
return gr.Text(text, visible=False) | |
def _tab_leaderboard(): | |
global original_df, available_models | |
# with gr.TabItem("๐ Main", elem_id="od-benchmark-tab-table-ablation", id=0, elem_classes="subtab"): | |
if True: | |
default_main_df = original_df.copy() | |
# default_main_df.insert(0, "", range(1, 1 + len(default_main_df))) | |
# default_main_df_no_task = default_main_df.copy() | |
default_mode = "greedy" | |
default_main_df = df_filters(default_mode, False) | |
with gr.Row(): | |
with gr.Column(scale=5): | |
mode_selection_radio = gr.Radio(["greedy", "all"], show_label=False, elem_id="rank-column-radio", value=default_mode) | |
# with gr.Row(): | |
# with gr.Column(scale=2): | |
leaderboard_table = gr.components.Dataframe( | |
value=default_main_df, | |
datatype= ["number", "markdown", "markdown", "number"], | |
# max_rows=None, | |
height=6000, | |
elem_id="leaderboard-table", | |
interactive=False, | |
visible=True, | |
column_widths=[50, 260, 100, 100, 120, 120, 100,100,110,100], | |
wrap=True | |
# min_width=60, | |
) | |
# checkbox_show_task_categorized.change(fn=length_margin_change, inputs=[length_margin_choices, gr.Text("main", visible=False), checkbox_show_task_categorized, show_open_source_model_only, rank_column_radio], outputs=[leaderboard_table]) | |
# show_open_source_model_only.change(fn=length_margin_change, inputs=[length_margin_choices, gr.Text("main", visible=False), checkbox_show_task_categorized, show_open_source_model_only, rank_column_radio], outputs=[leaderboard_table]) | |
# rank_column_radio.change(fn=length_margin_change, inputs=[length_margin_choices, gr.Text("main", visible=False), checkbox_show_task_categorized, show_open_source_model_only, rank_column_radio], outputs=[leaderboard_table]) | |
mode_selection_radio.change(fn=df_filters, inputs=[mode_selection_radio, _gstr("")], outputs=[leaderboard_table]) | |
def _tab_submit(): | |
pass | |
def build_demo(): | |
global original_df, available_models, gpt4t_dfs, haiku_dfs, llama_dfs | |
with gr.Blocks(theme=gr.themes.Soft(), css=css, js=js_light) as demo: | |
gr.HTML(BANNER, elem_id="banner") | |
# convert LAST_UPDATED to the PDT time | |
LAST_UPDATED = datetime.now(pytz.timezone('US/Pacific')).strftime("%Y-%m-%d %H:%M:%S") | |
# header_md_text = HEADER_MD.replace("{model_num}", str(len(original_df["-1"]))).replace("{LAST_UPDATED}", str(LAST_UPDATED)) | |
# gr.Markdown(header_md_text, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("๐ Leaderboard", elem_id="od-benchmark-tab-table", id=0): | |
_tab_leaderboard() | |
with gr.TabItem("๐ Submit Your Results", elem_id="od-benchmark-tab-table", id=3): | |
_tab_submit() | |
with gr.TabItem("๐ฎ About Us", elem_id="od-benchmark-tab-table", id=4): | |
gr.Markdown(ABOUT_MD, elem_classes="markdown-text") | |
with gr.Row(): | |
with gr.Accordion("๐ Citation", open=False, elem_classes="accordion-label"): | |
gr.Textbox( | |
value=CITATION_TEXT, | |
lines=7, | |
label="Copy the BibTeX snippet to cite this source", | |
elem_id="citation-button", | |
show_copy_button=True) | |
# ).style(show_copy_button=True) | |
return demo | |
def data_load(result_file): | |
global original_df | |
print(f"Loading {result_file}") | |
column_names_main = column_names.copy() | |
# column_names_main.update({}) | |
main_ordered_columns = ORDERED_COLUMN_NAMES | |
click_url = True | |
# read json file from the result_file | |
with open(result_file, "r") as f: | |
data = json.load(f) | |
# floatify the data, if possible | |
for d in data: | |
for k, v in d.items(): | |
try: | |
d[k] = float(v) | |
except: | |
pass | |
original_df = pd.DataFrame(data) | |
original_df = post_processing(original_df, column_names_main, ordered_columns=main_ordered_columns, click_url=click_url, rank_column=RANKING_COLUMN) | |
# print(original_df.columns) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--share", action="store_true") | |
parser.add_argument("--result_file", help="Path to results table", default="ZeroEval-main/result_dirs/zebra-grid.summary.json") | |
args = parser.parse_args() | |
data_load(args.result_file) | |
print(original_df) | |
demo = build_demo() | |
demo.launch(share=args.share, height=3000, width="100%") | |