Spaces:
Running
Running
Refactor build_demo function to enhance the DeathMath leaderboard interface; update labels to Russian, improve layout, and add visualization options for model comparisons
Browse files
app.py
CHANGED
@@ -121,87 +121,71 @@ def update_plot(selected_models):
|
|
121 |
return create_plot(selected_models)
|
122 |
|
123 |
def build_demo():
|
|
|
|
|
|
|
|
|
124 |
download_openbench()
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
leaderboard_df = build_leadearboard_df()
|
|
|
|
|
127 |
with demo:
|
|
|
128 |
gr.HTML(TITLE)
|
129 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
130 |
|
|
|
131 |
with gr.Tabs(elem_classes="tab-buttons"):
|
132 |
-
|
|
|
|
|
|
|
|
|
133 |
Leaderboard(
|
134 |
value=leaderboard_df,
|
135 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
136 |
select_columns=SelectColumns(
|
137 |
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
|
138 |
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
|
139 |
-
label="
|
140 |
),
|
141 |
search_columns=[
|
142 |
AutoEvalColumn.model.name,
|
143 |
-
# AutoEvalColumn.fullname.name,
|
144 |
-
# AutoEvalColumn.license.name
|
145 |
],
|
146 |
)
|
147 |
|
148 |
-
#
|
149 |
-
|
150 |
-
# with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
|
151 |
-
# gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
|
152 |
-
|
153 |
-
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
|
154 |
with gr.Row():
|
155 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
156 |
-
|
157 |
-
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
|
158 |
-
|
159 |
with gr.Column():
|
160 |
-
|
161 |
-
|
162 |
-
# file_path = file.name.split("/")[-1] if "/" in file.name else file.name
|
163 |
-
# logging.info("New submition: file saved to %s", file_path)
|
164 |
-
# with open(file.name, "r") as f:
|
165 |
-
# v=json.load(f)
|
166 |
-
# new_file = v['results']
|
167 |
-
# new_file['model'] = mn+"/"+su
|
168 |
-
# new_file['moviesmc']=new_file['moviemc']["acc,none"]
|
169 |
-
# new_file['musicmc']=new_file['musicmc']["acc,none"]
|
170 |
-
# new_file['booksmc']=new_file['bookmc']["acc,none"]
|
171 |
-
# new_file['lawmc']=new_file['lawmc']["acc,none"]
|
172 |
-
# # name = v['config']["model_args"].split('=')[1].split(',')[0]
|
173 |
-
# new_file['model_dtype'] = v['config']["model_dtype"]
|
174 |
-
# new_file['ppl'] = 0
|
175 |
-
# new_file.pop('moviemc')
|
176 |
-
# new_file.pop('bookmc')
|
177 |
-
# buf = BytesIO()
|
178 |
-
# buf.write(json.dumps(new_file).encode('utf-8'))
|
179 |
-
# API.upload_file(
|
180 |
-
# path_or_fileobj=buf,
|
181 |
-
# path_in_repo="model_data/external/" + su+mn + ".json",
|
182 |
-
# repo_id="Vikhrmodels/s-openbench-eval",
|
183 |
-
# repo_type="dataset",
|
184 |
-
# )
|
185 |
-
# os.environ[RESET_JUDGEMENT_ENV] = "1"
|
186 |
-
# return file.name
|
187 |
-
# gr.LoginButton()
|
188 |
-
model_name_textbox = gr.Textbox(label="Model name")
|
189 |
-
# submitter_username = gr.Textbox(label="Username")
|
190 |
-
|
191 |
-
# def toggle_upload_button(model_name, username):
|
192 |
-
# return bool(model_name) and bool(username)
|
193 |
-
file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
|
194 |
-
# upload_button = gr.Button("Click to Upload & Submit Answers", elem_id="upload_button",variant='primary')
|
195 |
uploaded_file = gr.State()
|
196 |
file_path = gr.State()
|
|
|
197 |
with gr.Row():
|
198 |
with gr.Column():
|
199 |
-
out = gr.Textbox("
|
200 |
with gr.Column():
|
201 |
-
|
202 |
-
|
203 |
-
submit_button = gr.Button("
|
204 |
-
|
|
|
205 |
file_output.upload(
|
206 |
handle_file_upload,
|
207 |
file_output,
|
@@ -214,24 +198,49 @@ def build_demo():
|
|
214 |
[out]
|
215 |
)
|
216 |
|
217 |
-
|
218 |
-
|
|
|
|
|
219 |
model_dropdown = gr.Dropdown(
|
220 |
choices=leaderboard_df["model"].tolist(),
|
221 |
-
label="
|
222 |
-
value=leaderboard_df["model"].tolist(),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
multiselect=True,
|
224 |
-
info="
|
225 |
)
|
226 |
-
|
227 |
-
|
228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
model_dropdown.change(
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
)
|
234 |
-
|
|
|
235 |
|
236 |
|
237 |
# print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
|
|
|
121 |
return create_plot(selected_models)
|
122 |
|
123 |
def build_demo():
|
124 |
+
"""
|
125 |
+
Строит интерфейс лидерборда DeathMath
|
126 |
+
"""
|
127 |
+
# Загружаем данные для лидерборда
|
128 |
download_openbench()
|
129 |
+
|
130 |
+
# Создаем интерфейс с настройками темы
|
131 |
+
demo = gr.Blocks(
|
132 |
+
title="DeathMath Leaderboard",
|
133 |
+
css=custom_css,
|
134 |
+
theme=gr.themes.Default(
|
135 |
+
primary_hue="indigo",
|
136 |
+
secondary_hue="purple",
|
137 |
+
)
|
138 |
+
)
|
139 |
+
|
140 |
+
# Получаем данные для лидерборда
|
141 |
leaderboard_df = build_leadearboard_df()
|
142 |
+
|
143 |
+
# Строим интерфейс
|
144 |
with demo:
|
145 |
+
# Заголовок и введение
|
146 |
gr.HTML(TITLE)
|
147 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
148 |
|
149 |
+
# Основные вкладки
|
150 |
with gr.Tabs(elem_classes="tab-buttons"):
|
151 |
+
# Вкладка лидерборда
|
152 |
+
with gr.TabItem("🏅 Лидерборд", elem_id="llm-benchmark-tab-table", id=0):
|
153 |
+
gr.Markdown("### Таблица результатов моделей DeathMath")
|
154 |
+
|
155 |
+
# Таблица с результатами
|
156 |
Leaderboard(
|
157 |
value=leaderboard_df,
|
158 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
159 |
select_columns=SelectColumns(
|
160 |
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
|
161 |
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
|
162 |
+
label="Выберите колонки для отображения:",
|
163 |
),
|
164 |
search_columns=[
|
165 |
AutoEvalColumn.model.name,
|
|
|
|
|
166 |
],
|
167 |
)
|
168 |
|
169 |
+
# Вкладка для отправки результатов
|
170 |
+
with gr.TabItem("🚀 Отправить результаты", elem_id="submit-tab", id=1):
|
|
|
|
|
|
|
|
|
171 |
with gr.Row():
|
172 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
173 |
+
|
|
|
|
|
174 |
with gr.Column():
|
175 |
+
model_name_textbox = gr.Textbox(label="Название модели")
|
176 |
+
file_output = gr.File(label="Перетащите JSON файл с результатами сюда", type="filepath")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
uploaded_file = gr.State()
|
178 |
file_path = gr.State()
|
179 |
+
|
180 |
with gr.Row():
|
181 |
with gr.Column():
|
182 |
+
out = gr.Textbox("Здесь будет показан статус отправки")
|
183 |
with gr.Column():
|
184 |
+
login_button = gr.LoginButton(elem_id="oauth-button")
|
185 |
+
|
186 |
+
submit_button = gr.Button("Отправить результаты", elem_id="submit_button", variant='primary')
|
187 |
+
|
188 |
+
# Обработчики событий
|
189 |
file_output.upload(
|
190 |
handle_file_upload,
|
191 |
file_output,
|
|
|
198 |
[out]
|
199 |
)
|
200 |
|
201 |
+
# Вкладка с аналитикой
|
202 |
+
with gr.TabItem("📊 Аналитика", elem_id="analytics-tab", id=2):
|
203 |
+
with gr.Column():
|
204 |
+
if len(leaderboard_df) > 0:
|
205 |
model_dropdown = gr.Dropdown(
|
206 |
choices=leaderboard_df["model"].tolist(),
|
207 |
+
label="Модели",
|
208 |
+
value=leaderboard_df["model"].tolist()[:5] if len(leaderboard_df) >= 5 else leaderboard_df["model"].tolist(),
|
209 |
+
multiselect=True,
|
210 |
+
info="Выберите модели для сравнения"
|
211 |
+
)
|
212 |
+
else:
|
213 |
+
model_dropdown = gr.Dropdown(
|
214 |
+
choices=["example/model-1", "example/model-2"],
|
215 |
+
label="Модели",
|
216 |
+
value=["example/model-1", "example/model-2"],
|
217 |
multiselect=True,
|
218 |
+
info="Выберите модели для сравнения"
|
219 |
)
|
220 |
+
|
221 |
+
# Вкладки для разных типов визуализации
|
222 |
+
with gr.Tabs():
|
223 |
+
with gr.TabItem("Столбчатая диаграмма"):
|
224 |
+
bar_plot = gr.Plot(create_plot(model_dropdown.value))
|
225 |
+
|
226 |
+
with gr.TabItem("Радарная диаграмма"):
|
227 |
+
from src.radial.radial import create_radar_plot
|
228 |
+
radar_plot = gr.Plot(create_radar_plot(model_dropdown.value))
|
229 |
+
|
230 |
+
# Обновление графиков при изменении выбранных моделей
|
231 |
+
model_dropdown.change(
|
232 |
+
fn=create_plot,
|
233 |
+
inputs=[model_dropdown],
|
234 |
+
outputs=[bar_plot]
|
235 |
+
)
|
236 |
+
|
237 |
model_dropdown.change(
|
238 |
+
fn=create_radar_plot,
|
239 |
+
inputs=[model_dropdown],
|
240 |
+
outputs=[radar_plot]
|
241 |
)
|
242 |
+
|
243 |
+
return demo
|
244 |
|
245 |
|
246 |
# print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
|