Spaces:
Running
Running
Refactor file submission logic and enhance error handling in submit_file function; update leaderboard and README generation
Browse files
app.py
CHANGED
@@ -20,31 +20,73 @@ def handle_file_upload(file):
|
|
20 |
with open(file.name, "r") as f:
|
21 |
v = json.load(f)
|
22 |
return v, file_path
|
|
|
23 |
def submit_file(v, file_path, mn, profile: gr.OAuthProfile | None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
if profile is None:
|
25 |
-
return "Hub Login Required"
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
from src.display.about import (
|
50 |
INTRODUCTION_TEXT,
|
@@ -198,88 +240,183 @@ def build_demo():
|
|
198 |
|
199 |
def update_board():
|
200 |
need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
|
201 |
-
logging.info("Updating the
|
202 |
if need_reset != "1":
|
203 |
-
|
204 |
-
|
205 |
os.environ[RESET_JUDGEMENT_ENV] = "0"
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
try:
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
except Exception as e:
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
data_list
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
|
|
|
|
233 |
path_or_fileobj="genned.json",
|
234 |
path_in_repo="leaderboard.json",
|
235 |
-
repo_id="Vikhrmodels/
|
236 |
repo_type="dataset",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
)
|
238 |
-
|
|
|
|
|
|
|
|
|
239 |
|
240 |
|
241 |
-
|
242 |
-
# subprocess.run(["python3", gen_judgement_file], check=True)
|
243 |
-
|
244 |
def update_board_():
|
245 |
-
|
246 |
-
logging.info("Updating the judgement: %s", need_reset)
|
247 |
-
if need_reset != "1":
|
248 |
-
# return
|
249 |
-
pass
|
250 |
-
os.environ[RESET_JUDGEMENT_ENV] = "0"
|
251 |
-
|
252 |
-
# `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
|
253 |
-
# recursively. In this specific context, it is used to delete the directory named "m_data" along
|
254 |
-
# with all its files and subdirectories. This command helps in cleaning up the existing data in
|
255 |
-
# the "m_data" directory before downloading new dataset files into it.
|
256 |
-
# shutil.rmtree("./m_data")
|
257 |
-
# shutil.rmtree("./data")
|
258 |
-
download_dataset("Vikhrmodels/s-openbench-eval", "m_data")
|
259 |
-
data_list = [{"musicmc": 0.3021276595744681, "lawmc": 0.2800829875518672, "model": "apsys/saiga_3_8b", "moviesmc": 0.3472222222222222, "booksmc": 0.2800829875518672, "model_dtype": "torch.float16", "ppl": 0, 'mmluproru':0}]
|
260 |
-
for file in glob.glob("./m_data/model_data/external/*.json"):
|
261 |
-
with open(file) as f:
|
262 |
-
try:
|
263 |
-
data = json.load(f)
|
264 |
-
data_list.append(data)
|
265 |
-
except Exception as e:
|
266 |
-
pass # data was badly formatted, should not fail
|
267 |
-
print("DATALIST,", data_list)
|
268 |
-
|
269 |
-
if len(data_list)>1:
|
270 |
-
data_list.pop(0)
|
271 |
-
|
272 |
-
if len(data_list)>4:
|
273 |
-
with open("genned.json", "w") as f:
|
274 |
-
json.dump(data_list, f)
|
275 |
-
|
276 |
-
|
277 |
-
API.upload_file(
|
278 |
-
path_or_fileobj="genned.json",
|
279 |
-
path_in_repo="leaderboard.json",
|
280 |
-
repo_id="Vikhrmodels/s-shlepa-metainfo",
|
281 |
-
repo_type="dataset",
|
282 |
-
)
|
283 |
|
284 |
if __name__ == "__main__":
|
285 |
os.environ[RESET_JUDGEMENT_ENV] = "1"
|
@@ -290,4 +427,5 @@ if __name__ == "__main__":
|
|
290 |
scheduler.start()
|
291 |
|
292 |
demo_app = build_demo()
|
293 |
-
|
|
|
|
20 |
with open(file.name, "r") as f:
|
21 |
v = json.load(f)
|
22 |
return v, file_path
|
23 |
+
|
24 |
def submit_file(v, file_path, mn, profile: gr.OAuthProfile | None):
|
25 |
+
"""
|
26 |
+
Обрабатывает загрузку файлов с результатами пользовательских моделей
|
27 |
+
|
28 |
+
Args:
|
29 |
+
v: Загруженные данные результатов из JSON
|
30 |
+
file_path: Путь к загруженному файлу
|
31 |
+
mn: Имя модели, указанное пользователем
|
32 |
+
profile: Профиль пользователя HuggingFace
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
str: Сообщение об успехе или ошибке
|
36 |
+
"""
|
37 |
if profile is None:
|
38 |
+
return "Hub Login Required: Войдите в HuggingFace, чтобы загрузить результаты"
|
39 |
+
|
40 |
+
try:
|
41 |
+
# Проверяем наличие обязательных полей
|
42 |
+
required_fields = ["score", "math_score", "physics_score"]
|
43 |
+
for field in required_fields:
|
44 |
+
if field not in v:
|
45 |
+
return f"Error: Отсутствует обязательное поле '{field}' в JSON файле"
|
46 |
+
|
47 |
+
# Создаем новый объект для сохранения в базе данных
|
48 |
+
new_file = {
|
49 |
+
"model_name": profile.username + "/" + mn,
|
50 |
+
"score": float(v["score"]),
|
51 |
+
"math_score": float(v["math_score"]),
|
52 |
+
"physics_score": float(v["physics_score"]),
|
53 |
+
"total_tokens": int(v.get("total_tokens", 0)),
|
54 |
+
"evaluation_time": float(v.get("evaluation_time", 0.0)),
|
55 |
+
"system_prompt": v.get("system_prompt", "Вы - полезный помощник по математике и физике. Ответьте на русском языке.")
|
56 |
+
}
|
57 |
+
|
58 |
+
# Проверка значений на корректность
|
59 |
+
if not (0 <= new_file["score"] <= 1 and
|
60 |
+
0 <= new_file["math_score"] <= 1 and
|
61 |
+
0 <= new_file["physics_score"] <= 1):
|
62 |
+
return "Error: Все значения оценок должны быть в диапазоне от 0 до 1"
|
63 |
+
|
64 |
+
# Создаем уникальное имя файла на основе username и имени модели
|
65 |
+
safe_filename = profile.username + "_" + mn.replace("/", "_").replace(" ", "_") + ".json"
|
66 |
+
|
67 |
+
# Создаем JSON в памяти и загружаем его в репозиторий
|
68 |
+
buf = BytesIO()
|
69 |
+
buf.write(json.dumps(new_file, ensure_ascii=False).encode('utf-8'))
|
70 |
+
|
71 |
+
# Загружаем файл в репозиторий
|
72 |
+
API.upload_file(
|
73 |
+
path_or_fileobj=buf,
|
74 |
+
path_in_repo="model_data/external/" + safe_filename,
|
75 |
+
repo_id="Vikhrmodels/DeathMath-leaderboard-data",
|
76 |
+
repo_type="dataset",
|
77 |
+
)
|
78 |
+
|
79 |
+
# Устанавливаем флаг для обновления лидерборда
|
80 |
+
os.environ[RESET_JUDGEMENT_ENV] = "1"
|
81 |
+
|
82 |
+
# Логируем успешную загрузку
|
83 |
+
logging.info(f"Successfully uploaded model results: {new_file['model_name']}")
|
84 |
+
|
85 |
+
return f"Success! Результаты модели '{mn}' успешно отправлены в лидерборд DeathMath."
|
86 |
+
|
87 |
+
except Exception as e:
|
88 |
+
logging.error(f"Error submitting file: {e}")
|
89 |
+
return f"Error: Произошла ошибка при отправке файла: {str(e)}"
|
90 |
|
91 |
from src.display.about import (
|
92 |
INTRODUCTION_TEXT,
|
|
|
240 |
|
241 |
def update_board():
|
242 |
need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
|
243 |
+
logging.info("Updating the leaderboard: %s", need_reset)
|
244 |
if need_reset != "1":
|
245 |
+
return
|
246 |
+
|
247 |
os.environ[RESET_JUDGEMENT_ENV] = "0"
|
248 |
+
|
249 |
+
try:
|
250 |
+
# Загружаем актуальные данные из репозитория
|
251 |
+
download_dataset("Vikhrmodels/DeathMath-leaderboard-data", "m_data")
|
252 |
+
logging.info("Successfully downloaded model evaluation data")
|
253 |
+
|
254 |
+
# Собираем данные из всех файлов моделей
|
255 |
+
data_list = []
|
256 |
+
seen_models = set() # Для отслеживания дубликатов
|
257 |
+
|
258 |
+
for file in glob.glob("./m_data/model_data/external/*.json"):
|
259 |
try:
|
260 |
+
with open(file) as f:
|
261 |
+
data = json.load(f)
|
262 |
+
|
263 |
+
# Проверяем наличие необходимых полей
|
264 |
+
model_name = data.get("model_name", "")
|
265 |
+
if not model_name:
|
266 |
+
logging.error(f"Failed to parse {file}: 'model_name' not found")
|
267 |
+
continue
|
268 |
+
|
269 |
+
# Предотвращаем дублирование моделей
|
270 |
+
model_base_name = model_name.split("/")[-1].split("_v")[0]
|
271 |
+
if model_base_name in seen_models:
|
272 |
+
logging.info(f"Skipping duplicate model: {model_name}")
|
273 |
+
continue
|
274 |
+
|
275 |
+
seen_models.add(model_base_name)
|
276 |
+
|
277 |
+
# Добавляем модель в список
|
278 |
+
data_list.append({
|
279 |
+
"model_name": model_name,
|
280 |
+
"score": float(data.get("score", 0.0)),
|
281 |
+
"math_score": float(data.get("math_score", 0.0)),
|
282 |
+
"physics_score": float(data.get("physics_score", 0.0)),
|
283 |
+
"total_tokens": int(data.get("total_tokens", 0)),
|
284 |
+
"evaluation_time": float(data.get("evaluation_time", 0.0)),
|
285 |
+
"system_prompt": data.get("system_prompt",
|
286 |
+
"Вы - полезный помощник по математике и физике. Ответьте на русском языке.")
|
287 |
+
})
|
288 |
+
|
289 |
except Exception as e:
|
290 |
+
logging.error(f"Failed to process file {file}: {e}")
|
291 |
+
continue
|
292 |
+
|
293 |
+
# Если есть данные, сортируем их по общему баллу и сохраняем
|
294 |
+
if data_list:
|
295 |
+
# Сортируем по общему баллу
|
296 |
+
data_list.sort(key=lambda x: x["score"], reverse=True)
|
297 |
+
|
298 |
+
# Сохраняем обновленный лидерборд
|
299 |
+
with open("genned.json", "w", encoding="utf-8") as f:
|
300 |
+
json.dump(data_list, f, ensure_ascii=False, indent=2)
|
301 |
+
|
302 |
+
# Загружаем обновленный лидерборд в репозиторий
|
303 |
+
API.upload_file(
|
304 |
path_or_fileobj="genned.json",
|
305 |
path_in_repo="leaderboard.json",
|
306 |
+
repo_id="Vikhrmodels/DeathMath-leaderboard-metainfo",
|
307 |
repo_type="dataset",
|
308 |
+
)
|
309 |
+
logging.info(f"Updated leaderboard with {len(data_list)} models")
|
310 |
+
|
311 |
+
# Генерируем README с таблицей лидерборда
|
312 |
+
update_readme(data_list)
|
313 |
+
|
314 |
+
except Exception as e:
|
315 |
+
logging.error(f"Error updating leaderboard: {e}")
|
316 |
+
|
317 |
+
|
318 |
+
def update_readme(data_list):
|
319 |
+
"""
|
320 |
+
Генерирует README.md с таблицей лидерборда
|
321 |
+
"""
|
322 |
+
try:
|
323 |
+
import pandas as pd
|
324 |
+
from datetime import datetime
|
325 |
+
|
326 |
+
# Создаем DataFrame для удобного форматирования таблицы
|
327 |
+
df = pd.DataFrame(data_list)
|
328 |
+
|
329 |
+
# Форматируем числовые колонки
|
330 |
+
for col in ["score", "math_score", "physics_score"]:
|
331 |
+
if col in df.columns:
|
332 |
+
df[col] = df[col].apply(lambda x: f"{x:.3f}")
|
333 |
+
|
334 |
+
if "total_tokens" in df.columns:
|
335 |
+
df["total_tokens"] = df["total_tokens"].apply(lambda x: f"{int(x):,}")
|
336 |
+
|
337 |
+
if "evaluation_time" in df.columns:
|
338 |
+
df["evaluation_time"] = df["evaluation_time"].apply(lambda x: f"{x:.1f}s")
|
339 |
+
|
340 |
+
# Создаем содержимое README
|
341 |
+
current_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
342 |
+
|
343 |
+
readme_content = f"""---
|
344 |
+
title: DeathMath Leaderboard
|
345 |
+
emoji: 🔢
|
346 |
+
colorFrom: indigo
|
347 |
+
colorTo: purple
|
348 |
+
sdk: gradio
|
349 |
+
sdk_version: "4.20.0"
|
350 |
+
app_file: app.py
|
351 |
+
pinned: false
|
352 |
+
hf_oauth: true
|
353 |
+
---
|
354 |
+
# DeathMath Leaderboard
|
355 |
+
|
356 |
+
DeathMath - это бенчмарк для оценки способности моделей решать сложные математические и физические задачи на русском языке.
|
357 |
+
|
358 |
+
## Текущий лидерборд
|
359 |
+
|
360 |
+
Последнее обновление: {current_date}
|
361 |
+
|
362 |
+
| Модель | Общий балл | Математика | Физика | Токены | Время оценки |
|
363 |
+
|--------|------------|------------|---------|---------|--------------|
|
364 |
+
"""
|
365 |
+
|
366 |
+
# Добавляем строки таблицы
|
367 |
+
for _, row in df.iterrows():
|
368 |
+
model_name = row.get("model_name", "")
|
369 |
+
readme_content += f"| {model_name} | {row.get('score', 'N/A')} | {row.get('math_score', 'N/A')} | {row.get('physics_score', 'N/A')} | {row.get('total_tokens', 'N/A')} | {row.get('evaluation_time', 'N/A')} |\n"
|
370 |
+
|
371 |
+
readme_content += """
|
372 |
+
## Как принять участие в бенчмарке
|
373 |
+
|
374 |
+
Для участия в бенчмарке DeathMath:
|
375 |
+
|
376 |
+
1. Клонируйте репозиторий и запустите тесты вашей модели
|
377 |
+
2. Загрузите результаты через [HuggingFace Space](https://huggingface.co/spaces/Vikhrmodels/DeathMath-leaderboard)
|
378 |
+
3. Дождитесь проверки и добавления результатов в лидерборд
|
379 |
+
|
380 |
+
## Формат результатов
|
381 |
+
|
382 |
+
Результаты должны быть в формате JSON со следующей структурой:
|
383 |
+
```json
|
384 |
+
{
|
385 |
+
"score": 0.586,
|
386 |
+
"math_score": 0.8,
|
387 |
+
"physics_score": 0.373,
|
388 |
+
"total_tokens": 1394299,
|
389 |
+
"evaluation_time": 4533.2,
|
390 |
+
"system_prompt": "Вы - полезный помощник по математике и физике. Ответьте на русском языке."
|
391 |
+
}
|
392 |
+
```
|
393 |
+
|
394 |
+
## Лицензия
|
395 |
+
|
396 |
+
Бенчмарк распространяется под лицензией Apache 2.0
|
397 |
+
"""
|
398 |
+
|
399 |
+
# Сохраняем README
|
400 |
+
with open("README.md", "w", encoding="utf-8") as f:
|
401 |
+
f.write(readme_content)
|
402 |
+
|
403 |
+
# Загружаем README в репозиторий
|
404 |
+
API.upload_file(
|
405 |
+
path_or_fileobj="README.md",
|
406 |
+
path_in_repo="README.md",
|
407 |
+
repo_id="Vikhrmodels/DeathMath-leaderboard-metainfo",
|
408 |
+
repo_type="dataset",
|
409 |
)
|
410 |
+
|
411 |
+
logging.info("Updated README with leaderboard table")
|
412 |
+
|
413 |
+
except Exception as e:
|
414 |
+
logging.error(f"Failed to update README: {e}")
|
415 |
|
416 |
|
417 |
+
# Просто вызываем основную функцию update_board
|
|
|
|
|
418 |
def update_board_():
|
419 |
+
update_board()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
|
421 |
if __name__ == "__main__":
|
422 |
os.environ[RESET_JUDGEMENT_ENV] = "1"
|
|
|
427 |
scheduler.start()
|
428 |
|
429 |
demo_app = build_demo()
|
430 |
+
# Исправляем параметры запуска для совместимости с WebhookServer
|
431 |
+
demo_app.launch(debug=True)
|