|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from html import escape |
|
|
|
|
|
st.set_page_config(layout="wide") |
|
|
|
column_config = { |
|
"Downloads": st.column_config.NumberColumn( |
|
"Downloads", format="%d 📥" |
|
), |
|
"Likes": st.column_config.NumberColumn( |
|
"Likes", format="%d ❤️" |
|
), |
|
"Hugging Face URL": st.column_config.LinkColumn("Hugging Face URL", display_text="Open"), |
|
"Arxiv URL": st.column_config.LinkColumn("Arxiv URL", display_text="Open"), |
|
"PapersWithCode URL": st.column_config.LinkColumn("PapersWithCode URL", display_text="Open") |
|
} |
|
|
|
|
|
@st.cache_data |
|
def load_data(): |
|
file_path = 'HuggingFaceBenchmarkDatasetsWithTags - Copy of HuggingFaceBenchmarkDatasetsWithTags (1).csv' |
|
data = pd.read_csv(file_path, na_values=['NA', '']) |
|
|
|
data['Created At'] = pd.to_datetime(data['Created At'], errors='coerce') |
|
data['Last Modified'] = pd.to_datetime(data['Last Modified'], errors='coerce') |
|
|
|
numeric_cols = ['Downloads', 'Likes', 'Total Examples', 'Dataset Size (bytes)'] |
|
for col in numeric_cols: |
|
data[col] = pd.to_numeric(data[col], errors='coerce') |
|
|
|
data.replace("", np.nan, inplace=True) |
|
|
|
data = data.drop(columns=['Card Data', 'Model Card README']) |
|
return data |
|
|
|
def escape_html(val): |
|
return escape(val) if isinstance(val, str) else val |
|
|
|
df = load_data() |
|
|
|
st.title('Bench1k: LLM Benchmarks & Evals Database') |
|
st.subheader('Explore 1,327+ benchmarks. By default, sorted by # of downloads.') |
|
st.write("Use the sidebar to apply filters.") |
|
|
|
search_query = st.text_input("Search benchmarks by keyword") |
|
|
|
|
|
task_ids = df['Task IDs'].dropna().unique().tolist() |
|
selected_task_id = st.sidebar.multiselect('Filter by Task IDs', task_ids) |
|
|
|
task_categories = df['Task Categories'].dropna().unique().tolist() |
|
selected_task_category = st.sidebar.multiselect('Filter by Task Categories', task_categories) |
|
|
|
licenses = df['Licenses'].dropna().unique().tolist() |
|
selected_license = st.sidebar.multiselect('Filter by License', licenses) |
|
|
|
min_likes, max_likes = int(df['Likes'].min(skipna=True)), int(df['Likes'].max(skipna=True)) |
|
selected_likes = st.sidebar.slider('Filter by Likes', min_likes, max_likes, (min_likes, max_likes)) |
|
|
|
min_size, max_size = df['Dataset Size (bytes)'].min(skipna=True), df['Dataset Size (bytes)'].max(skipna=True) |
|
selected_size = st.sidebar.slider('Filter by Dataset Size (bytes)', min_size, max_size, (min_size, max_size)) |
|
|
|
min_examples, max_examples = df['Total Examples'].min(skipna=True), df['Total Examples'].max(skipna=True) |
|
selected_examples = st.sidebar.slider('Filter by Total Examples', min_examples, max_examples, (min_examples, max_examples)) |
|
|
|
filtered_df = df |
|
|
|
if search_query: |
|
search_cols = df.select_dtypes(include=[object]).columns |
|
filtered_df = filtered_df[filtered_df[search_cols].apply(lambda x: x.str.contains(search_query, case=False, na=False)).any(axis=1)] |
|
|
|
if selected_task_id: |
|
mask_task_id = filtered_df['Task IDs'].apply(lambda x: any(task_id.strip() in str(x).split(',') for task_id in selected_task_id)) |
|
filtered_df = filtered_df[mask_task_id] |
|
|
|
if selected_task_category: |
|
mask_task_category = filtered_df['Task Categories'].apply(lambda x: any(category.strip() in str(x).split(',') for category in selected_task_category)) |
|
filtered_df = filtered_df[mask_task_category] |
|
|
|
if selected_license: |
|
filtered_df = filtered_df[filtered_df['Licenses'].isin(selected_license)] |
|
|
|
if selected_likes: |
|
filtered_df = filtered_df[filtered_df['Likes'].between(selected_likes[0], selected_likes[1])] |
|
|
|
if selected_size: |
|
filtered_df = filtered_df[filtered_df['Dataset Size (bytes)'].between(selected_size[0], selected_size[1])] |
|
|
|
if selected_examples: |
|
filtered_df = filtered_df[filtered_df['Total Examples'].between(selected_examples[0], selected_examples[1])] |
|
|
|
def clean_html_sensitive_content(val): |
|
if isinstance(val, str): |
|
val = ''.join(e for e in val if e.isalnum() or e in [' ', '-', '_']) |
|
return val |
|
|
|
filtered_df['Task IDs'] = filtered_df['Task IDs'].apply(clean_html_sensitive_content) |
|
filtered_df['Task Categories'] = filtered_df['Task Categories'].apply(clean_html_sensitive_content) |
|
|
|
st.dataframe(filtered_df, column_config=column_config, hide_index=True) |
|
|
|
st.sidebar.info("Use the filters above to explore different aspects of the benchmark datasets.") |
|
|
|
|
|
@st.cache_data |
|
def convert_df(df): |
|
return df.to_csv().encode('utf-8') |
|
|
|
csv = convert_df(df) |
|
|
|
st.download_button( |
|
label="Download database as CSV", |
|
data=csv, |
|
file_name='bench1k_database_full.csv', |
|
mime='text/csv', |
|
) |