ajude's picture
changed slider settings to cover models from 7B to 8B
a726ea8
import gradio as gr
from gradio_rangeslider import RangeSlider
import core as core
from style import CSS, LANG_SYMBOLS, T_SYMBOLS, TITLE
def create_model_controls():
with gr.Row():
with gr.Column():
model_types = gr.CheckboxGroup(
label="Select model type",
choices=[
(
f"Pretrained {T_SYMBOLS['pretrained']}",
T_SYMBOLS["pretrained"],
),
(f"Chat {T_SYMBOLS['chat']}", T_SYMBOLS["chat"]),
],
value=list(T_SYMBOLS.values()),
)
with gr.Column():
model_sizes = RangeSlider(minimum=0, maximum=150, value=(7, 8),
label="Select the number of parameters (B)")
return model_types, model_sizes
def create_language_controls(lang_choices):
with gr.Row():
langs_bar = gr.CheckboxGroup(
choices=[(LANG_SYMBOLS.get(l, l), l) for l in lang_choices],
value=lang_choices,
label="Select languages to average over",
elem_id="column-select",
interactive=True,
scale=6,
)
with gr.Column(scale=1):
clear = gr.ClearButton(
langs_bar,
value="Deselect all languages",
size="sm",
scale=1,
)
select = gr.Button(
value="Select all languages",
size="sm",
scale=1,
)
select.click(
lambda: gr.CheckboxGroup(value=lang_choices),
inputs=[],
outputs=langs_bar,
)
return langs_bar
def create_task_controls(tab_id):
with gr.Row():
shown_tasks = gr.CheckboxGroup(
choices=core.get_available_task_groups(core.get_selected_task_type(tab_id), True),
value=core.get_available_task_groups(core.get_selected_task_type(tab_id), True),
label="Select tasks to show",
elem_id="column-select",
interactive=True,
scale=50,
)
clear = gr.ClearButton(
shown_tasks,
value="Deselect all tasks",
size="sm",
scale=1,
)
select = gr.Button(
value="Select all tasks",
size="sm",
scale=1,
)
select.click(
lambda: gr.CheckboxGroup(
value=core.get_available_task_groups(core.get_selected_task_type(tab_id), True)),
inputs=[],
outputs=shown_tasks,
)
return shown_tasks
theme = gr.themes.Default(
primary_hue="blue",
).set(
button_border_width='*block_border_width'
)
demo = gr.Blocks(css=CSS, theme=theme)
with demo:
gr.HTML(TITLE)
gr.Markdown(
"This is a collection of multilingual evaluation results obtained using our fork of the LM-evaluation-harness (https://github.com/OpenGPTX/lm-evaluation-harness), based on V1 of the https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard.\
Note that currently, benchmarks are available in 21 European languages (Irish, Maltese, Croatian missing).",
elem_classes="markdown-text",
)
selected_tab = gr.State(value=0)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem(
"πŸ… LLM accuracy benchmark",
elem_id="llm-benchmark-tab-table-acc",
id=0,
) as acc:
with gr.Column():
with gr.Row():
search_bar = gr.Textbox(
label="Search models",
placeholder=" πŸ” Separate multiple queries with ';' and press ENTER...",
show_label=True,
elem_id="search-bar",
)
model_types, model_sizes = create_model_controls()
langs_bar = create_language_controls(core.languages_list)
shown_tasks = create_task_controls(0)
leaderboard_table = gr.Dataframe(datatype=["str", "markdown", "number"])
with gr.TabItem(
"πŸ… LLM accuracy benchmark (Zero-Shot)",
elem_id="llm-benchmark-tab-table-acc-zeroshot",
id=3,
) as acc_zero_shot:
with gr.Column():
with gr.Row():
search_bar_zero_shot = gr.Textbox(
label="Search models",
placeholder=" πŸ” Separate multiple queries with ';' and press ENTER...",
show_label=True,
elem_id="search-bar",
)
model_types_zero_shot, model_sizes_zero_shot = create_model_controls()
langs_bar_zero_shot = create_language_controls(core.languages_list)
shown_tasks_zero_shot = create_task_controls(1)
leaderboard_table_zero_shot = gr.Dataframe(datatype=["str", "markdown", "number"])
with gr.TabItem(
"🌐 LLM translation benchmark",
elem_id="llm-benchmark-tab-table-misc",
id=1,
) as misc:
with gr.Column():
with gr.Row():
search_bar_misc = gr.Textbox(
label="Search models",
placeholder=" πŸ” Separate multiple queries with ';' and press ENTER...",
show_label=True,
elem_id="search-bar",
)
model_types_misc, model_sizes_misc = create_model_controls()
langs_bar_misc = create_language_controls(core.languages_list)
shown_tasks_misc = create_task_controls(3)
leaderboard_table_misc = gr.Dataframe(datatype=["str", "markdown", "number"])
with gr.TabItem(
"🌐 LLM MT-Bench benchmark",
elem_id="llm-benchmark-tab-table-mtbench",
id=2,
) as mtbench:
with gr.Column():
with gr.Row():
search_bar_mtbench = gr.Textbox(
label="Search models",
placeholder=" πŸ” Separate multiple queries with ';' and press ENTER...",
show_label=True,
elem_id="search-bar",
)
langs_bar_mtbench = create_language_controls(core.mt_bench_language_list)
leaderboard_table_mtbench = gr.Dataframe(datatype=["str", "markdown", "number"])
for comp, fn in [
(search_bar, "submit"),
(langs_bar, "change"),
(shown_tasks, "change"),
(model_types, "change"),
(model_sizes, "change"),
]:
getattr(comp, fn)(
core.update_df,
[gr.State(value=0), shown_tasks, search_bar, langs_bar, model_sizes, gr.State(value=True), model_types],
leaderboard_table,
)
for comp, fn in [
(search_bar_zero_shot, "submit"),
(model_types_zero_shot, "change"),
(langs_bar_zero_shot, "change"),
(shown_tasks_zero_shot, "change"),
(model_sizes_zero_shot, "change")
]:
getattr(comp, fn)(
core.update_df,
[gr.State(value=1), shown_tasks_zero_shot, search_bar_zero_shot, langs_bar_zero_shot,
model_sizes_zero_shot, gr.State(value=False), model_types_zero_shot],
leaderboard_table_zero_shot,
)
for comp, fn in [
(search_bar_misc, "submit"),
(langs_bar_misc, "change"),
(shown_tasks_misc, "change"),
(model_types_misc, "change"),
(model_sizes_misc, "change"),
]:
getattr(comp, fn)(
core.update_df,
[gr.State(value=2), shown_tasks_misc, search_bar_misc, langs_bar_misc, model_sizes_misc,
gr.State(value=False), model_types_misc],
leaderboard_table_misc,
)
for comp, fn in [
(search_bar_mtbench, "submit"),
(langs_bar_mtbench, "change"),
]:
getattr(comp, fn)(
core.update_df,
[gr.State(value=3),
gr.State(value=core.get_available_task_groups(core.get_selected_task_type(2), False)),
search_bar_mtbench, langs_bar_mtbench, gr.State(value=False)],
leaderboard_table_mtbench,
)
gr.Blocks.load(
block=demo,
fn=core.update_df,
inputs=[gr.State(value=0), shown_tasks, search_bar, langs_bar, model_sizes, gr.State(value=True), model_types],
outputs=leaderboard_table,
)
gr.Blocks.load(
block=demo,
fn=core.update_df,
inputs=[gr.State(value=1), shown_tasks_zero_shot, search_bar_zero_shot, langs_bar_zero_shot,
model_sizes_zero_shot, gr.State(value=False), model_types_zero_shot],
outputs=leaderboard_table_zero_shot,
)
gr.Blocks.load(
block=demo,
fn=core.update_df,
inputs=[gr.State(value=2), shown_tasks_misc, search_bar_misc, langs_bar_misc, model_sizes_misc,
gr.State(value=False), model_types_misc],
outputs=leaderboard_table_misc,
)
# We do not have a checkbox for model_type in mt_bench, hence there is no model_types variable
gr.Blocks.load(
block=demo,
fn=core.update_df,
inputs=[gr.State(value=3),
gr.State(value=core.get_available_task_groups(core.get_selected_task_type(2), False)),
search_bar_mtbench, langs_bar_mtbench, gr.State(value=False)],
outputs=leaderboard_table_mtbench,
)
demo.launch()