Anonumous commited on
Commit
9c4c35f
·
1 Parent(s): 406ade6

Refactor build_demo function to enhance the DeathMath leaderboard interface; update labels to Russian, improve layout, and add visualization options for model comparisons

Browse files
Files changed (1) hide show
  1. app.py +75 -66
app.py CHANGED
@@ -121,87 +121,71 @@ def update_plot(selected_models):
121
  return create_plot(selected_models)
122
 
123
  def build_demo():
 
 
 
 
124
  download_openbench()
125
- demo = gr.Blocks(title="Small Shlepa", css=custom_css)
 
 
 
 
 
 
 
 
 
 
 
126
  leaderboard_df = build_leadearboard_df()
 
 
127
  with demo:
 
128
  gr.HTML(TITLE)
129
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
130
 
 
131
  with gr.Tabs(elem_classes="tab-buttons"):
132
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
 
 
 
 
133
  Leaderboard(
134
  value=leaderboard_df,
135
  datatype=[c.type for c in fields(AutoEvalColumn)],
136
  select_columns=SelectColumns(
137
  default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
138
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
139
- label="Select Columns to Display:",
140
  ),
141
  search_columns=[
142
  AutoEvalColumn.model.name,
143
- # AutoEvalColumn.fullname.name,
144
- # AutoEvalColumn.license.name
145
  ],
146
  )
147
 
148
- # with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1):
149
- # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
150
- # with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
151
- # gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
152
-
153
- with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
154
  with gr.Row():
155
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
156
- with gr.Row():
157
- gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
158
-
159
  with gr.Column():
160
-
161
- # def upload_file(file,su,mn):
162
- # file_path = file.name.split("/")[-1] if "/" in file.name else file.name
163
- # logging.info("New submition: file saved to %s", file_path)
164
- # with open(file.name, "r") as f:
165
- # v=json.load(f)
166
- # new_file = v['results']
167
- # new_file['model'] = mn+"/"+su
168
- # new_file['moviesmc']=new_file['moviemc']["acc,none"]
169
- # new_file['musicmc']=new_file['musicmc']["acc,none"]
170
- # new_file['booksmc']=new_file['bookmc']["acc,none"]
171
- # new_file['lawmc']=new_file['lawmc']["acc,none"]
172
- # # name = v['config']["model_args"].split('=')[1].split(',')[0]
173
- # new_file['model_dtype'] = v['config']["model_dtype"]
174
- # new_file['ppl'] = 0
175
- # new_file.pop('moviemc')
176
- # new_file.pop('bookmc')
177
- # buf = BytesIO()
178
- # buf.write(json.dumps(new_file).encode('utf-8'))
179
- # API.upload_file(
180
- # path_or_fileobj=buf,
181
- # path_in_repo="model_data/external/" + su+mn + ".json",
182
- # repo_id="Vikhrmodels/s-openbench-eval",
183
- # repo_type="dataset",
184
- # )
185
- # os.environ[RESET_JUDGEMENT_ENV] = "1"
186
- # return file.name
187
- # gr.LoginButton()
188
- model_name_textbox = gr.Textbox(label="Model name")
189
- # submitter_username = gr.Textbox(label="Username")
190
-
191
- # def toggle_upload_button(model_name, username):
192
- # return bool(model_name) and bool(username)
193
- file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
194
- # upload_button = gr.Button("Click to Upload & Submit Answers", elem_id="upload_button",variant='primary')
195
  uploaded_file = gr.State()
196
  file_path = gr.State()
 
197
  with gr.Row():
198
  with gr.Column():
199
- out = gr.Textbox("Статус отправки")
200
  with gr.Column():
201
- login_button = gr.LoginButton(elem_id="oauth-button")
202
-
203
- submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary')
204
-
 
205
  file_output.upload(
206
  handle_file_upload,
207
  file_output,
@@ -214,24 +198,49 @@ def build_demo():
214
  [out]
215
  )
216
 
217
- with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
218
- with gr.Column():
 
 
219
  model_dropdown = gr.Dropdown(
220
  choices=leaderboard_df["model"].tolist(),
221
- label="Models",
222
- value=leaderboard_df["model"].tolist(),
 
 
 
 
 
 
 
 
223
  multiselect=True,
224
- info="Select models"
225
  )
226
- with gr.Column():
227
- plot = gr.Plot(update_plot(model_dropdown.value))
228
- # plot = gr.Plot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  model_dropdown.change(
230
- fn=update_plot,
231
- inputs=[model_dropdown],
232
- outputs=[plot]
233
  )
234
- return demo
 
235
 
236
 
237
  # print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
 
121
  return create_plot(selected_models)
122
 
123
  def build_demo():
124
+ """
125
+ Строит интерфейс лидерборда DeathMath
126
+ """
127
+ # Загружаем данные для лидерборда
128
  download_openbench()
129
+
130
+ # Создаем интерфейс с настройками темы
131
+ demo = gr.Blocks(
132
+ title="DeathMath Leaderboard",
133
+ css=custom_css,
134
+ theme=gr.themes.Default(
135
+ primary_hue="indigo",
136
+ secondary_hue="purple",
137
+ )
138
+ )
139
+
140
+ # Получаем данные для лидерборда
141
  leaderboard_df = build_leadearboard_df()
142
+
143
+ # Строим интерфейс
144
  with demo:
145
+ # Заголовок и введение
146
  gr.HTML(TITLE)
147
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
148
 
149
+ # Основные вкладки
150
  with gr.Tabs(elem_classes="tab-buttons"):
151
+ # Вкладка лидерборда
152
+ with gr.TabItem("🏅 Лидерборд", elem_id="llm-benchmark-tab-table", id=0):
153
+ gr.Markdown("### Таблица результатов моделей DeathMath")
154
+
155
+ # Таблица с результатами
156
  Leaderboard(
157
  value=leaderboard_df,
158
  datatype=[c.type for c in fields(AutoEvalColumn)],
159
  select_columns=SelectColumns(
160
  default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
161
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
162
+ label="Выберите колонки для отображения:",
163
  ),
164
  search_columns=[
165
  AutoEvalColumn.model.name,
 
 
166
  ],
167
  )
168
 
169
+ # Вкладка для отправки результатов
170
+ with gr.TabItem("🚀 Отправить результаты", elem_id="submit-tab", id=1):
 
 
 
 
171
  with gr.Row():
172
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
173
+
 
 
174
  with gr.Column():
175
+ model_name_textbox = gr.Textbox(label="Название модели")
176
+ file_output = gr.File(label="Перетащите JSON файл с результатами сюда", type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  uploaded_file = gr.State()
178
  file_path = gr.State()
179
+
180
  with gr.Row():
181
  with gr.Column():
182
+ out = gr.Textbox("Здесь будет показан статус отправки")
183
  with gr.Column():
184
+ login_button = gr.LoginButton(elem_id="oauth-button")
185
+
186
+ submit_button = gr.Button("Отправить результаты", elem_id="submit_button", variant='primary')
187
+
188
+ # Обработчики событий
189
  file_output.upload(
190
  handle_file_upload,
191
  file_output,
 
198
  [out]
199
  )
200
 
201
+ # Вкладка с аналитикой
202
+ with gr.TabItem("📊 Аналитика", elem_id="analytics-tab", id=2):
203
+ with gr.Column():
204
+ if len(leaderboard_df) > 0:
205
  model_dropdown = gr.Dropdown(
206
  choices=leaderboard_df["model"].tolist(),
207
+ label="Модели",
208
+ value=leaderboard_df["model"].tolist()[:5] if len(leaderboard_df) >= 5 else leaderboard_df["model"].tolist(),
209
+ multiselect=True,
210
+ info="Выберите модели для сравнения"
211
+ )
212
+ else:
213
+ model_dropdown = gr.Dropdown(
214
+ choices=["example/model-1", "example/model-2"],
215
+ label="Модели",
216
+ value=["example/model-1", "example/model-2"],
217
  multiselect=True,
218
+ info="Выберите модели для сравнения"
219
  )
220
+
221
+ # Вкладки для разных типов визуализации
222
+ with gr.Tabs():
223
+ with gr.TabItem("Столбчатая диаграмма"):
224
+ bar_plot = gr.Plot(create_plot(model_dropdown.value))
225
+
226
+ with gr.TabItem("Радарная диаграмма"):
227
+ from src.radial.radial import create_radar_plot
228
+ radar_plot = gr.Plot(create_radar_plot(model_dropdown.value))
229
+
230
+ # Обновление графиков при изменении выбранных моделей
231
+ model_dropdown.change(
232
+ fn=create_plot,
233
+ inputs=[model_dropdown],
234
+ outputs=[bar_plot]
235
+ )
236
+
237
  model_dropdown.change(
238
+ fn=create_radar_plot,
239
+ inputs=[model_dropdown],
240
+ outputs=[radar_plot]
241
  )
242
+
243
+ return demo
244
 
245
 
246
  # print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))