David Pomerenke commited on
Commit
4d13673
·
1 Parent(s): 92d8154

Add Dockerfile

Browse files
.dockerignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .git
2
+ .cache
3
+ .venv
4
+ .env
5
+ frontend/node_modules
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM node:20-alpine AS build
2
+ WORKDIR /frontend
3
+ COPY frontend/package.json frontend/package-lock.json ./
4
+ RUN npm ci
5
+ COPY frontend/public/ public/
6
+ COPY frontend/src/ src/
7
+ RUN npm run build
8
+
9
+ FROM --platform=linux/amd64 ghcr.io/astral-sh/uv:python3.12-bookworm
10
+ WORKDIR /app
11
+ COPY pyproject.toml uv.lock ./
12
+ RUN uv sync --frozen --no-dev
13
+ COPY evals/ evals/
14
+ COPY --from=build /frontend/build /app/frontend/build
15
+ COPY results.json datasets.json ./
16
+ EXPOSE 8000
17
+ CMD ["uv", "run", "--no-dev", "evals/backend.py"]
README.md CHANGED
@@ -1,3 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  [![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-purple)](https://huggingface.co/spaces/datenlabor-bmz/ai-language-monitor)
2
 
3
  # AI Language Monitor 🌍
 
1
+ ---
2
+ title: AI Language Monitor
3
+ emoji: 🌍
4
+ colorFrom: purple
5
+ colorTo: pink
6
+ sdk: static
7
+ license: cc-by-sa-4.0
8
+ short_description: Evaluating LLM performance across all human languages.
9
+ datasets:
10
+ - openlanguagedata/flores_plus
11
+ - google/fleurs
12
+ - mozilla-foundation/common_voice_1_0
13
+ models:
14
+ - meta-llama/Llama-3.3-70B-Instruct
15
+ - mistralai/Mistral-Small-24B-Instruct-2501
16
+ - deepseek-ai/DeepSeek-V3
17
+ - microsoft/phi-4
18
+ - openai/whisper-large-v3
19
+ - google/gemma-3-27b-it
20
+ tags:
21
+ - leaderboard
22
+ - submission:manual
23
+ - test:public
24
+ - judge:auto
25
+ - modality:text
26
+ - modality:artefacts
27
+ - eval:generation
28
+ - language:English
29
+ - language:German
30
+ ---
31
+
32
+ <!--
33
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
34
+ For tag meaning, see https://huggingface.co/spaces/leaderboards/LeaderboardsExplorer
35
+ -->
36
+
37
+
38
  [![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-purple)](https://huggingface.co/spaces/datenlabor-bmz/ai-language-monitor)
39
 
40
  # AI Language Monitor 🌍
data/datasets.json DELETED
@@ -1,484 +0,0 @@
1
- [
2
- {
3
- "name": "FLORES+",
4
- "author": "Meta",
5
- "author_url": "https://ai.meta.com",
6
- "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
7
- "n_languages": 200,
8
- "tasks": [
9
- "translation",
10
- "classification",
11
- "language_modeling"
12
- ],
13
- "parallel": true,
14
- "base": "FLORES",
15
- "implemented": true
16
- },
17
- {
18
- "name": "FLEURS",
19
- "author": "Meta",
20
- "author_url": "https://ai.meta.com",
21
- "url": "https://huggingface.co/datasets/google/fleurs",
22
- "n_languages": 102,
23
- "tasks": [
24
- "speech_recognition"
25
- ],
26
- "parallel": true,
27
- "base": "FLORES",
28
- "implemented": true
29
- },
30
- {
31
- "name": "CommonVoice",
32
- "author": "Mozilla",
33
- "author_url": "https://mozilla.ai",
34
- "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0",
35
- "n_languages": 124,
36
- "tasks": [
37
- "speech_recognition"
38
- ],
39
- "parallel": null
40
- },
41
- {
42
- "name": "MMMLU",
43
- "author": "OpenAI",
44
- "author_url": "https://openai.com",
45
- "url": "https://huggingface.co/datasets/openai/MMMLU",
46
- "n_languages": "14",
47
- "tasks": [
48
- "question_answering"
49
- ],
50
- "parallel": true,
51
- "base": "MMLU"
52
- },
53
- {
54
- "name": "AfriMMLU",
55
- "author": "Masakhane",
56
- "author_url": "https://www.masakhane.io",
57
- "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
58
- "n_languages": "17",
59
- "tasks": [
60
- "question_answering"
61
- ],
62
- "parallel": true,
63
- "base": "MMLU"
64
- },
65
- {
66
- "name": "Okapi MMLU",
67
- "author": "Academic",
68
- "author_url": null,
69
- "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
70
- "n_languages": 16,
71
- "tasks": [
72
- "question_answering"
73
- ],
74
- "parallel": true,
75
- "base": "MMLU"
76
- },
77
- {
78
- "name": "MMLU-X",
79
- "author": "OpenGPT-X",
80
- "author_url": null,
81
- "url": "https://huggingface.co/datasets/openGPT-X/mmlux",
82
- "n_languages": 20,
83
- "tasks": [
84
- "question_answering"
85
- ],
86
- "parallel": true,
87
- "base": "MMLU"
88
- },
89
- {
90
- "name": "Global MMLU",
91
- "author": "Cohere",
92
- "author_url": "https://cohere.com",
93
- "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
94
- "n_languages": 42,
95
- "tasks": [
96
- "question_answering"
97
- ],
98
- "parallel": true,
99
- "base": "MMLU"
100
- },
101
- {
102
- "name": "MGSM",
103
- "author": "Google",
104
- "author_url": "https://google.com",
105
- "url": "https://huggingface.co/datasets/juletxara/mgsm",
106
- "n_languages": 10,
107
- "tasks": [
108
- "math"
109
- ],
110
- "parallel": true,
111
- "base": "MGSM"
112
- },
113
- {
114
- "name": "AfriMGSM",
115
- "author": "Masakhane",
116
- "author_url": "https://www.masakhane.io",
117
- "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
118
- "n_languages": 18,
119
- "tasks": [
120
- "math"
121
- ],
122
- "parallel": true,
123
- "base": "MGSM"
124
- },
125
- {
126
- "name": "GSM8K-X",
127
- "author": "OpenGPT-X",
128
- "author_url": null,
129
- "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx",
130
- "n_languages": 20,
131
- "tasks": [
132
- "math"
133
- ],
134
- "parallel": true,
135
- "base": "MGSM"
136
- },
137
- {
138
- "name": "Okapi ARC Challenge",
139
- "author": "Academic",
140
- "author_url": null,
141
- "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
142
- "n_languages": 31,
143
- "tasks": [
144
- "question_answering"
145
- ],
146
- "parallel": true,
147
- "base": "AI2 ARC"
148
- },
149
- {
150
- "name": "Uhuru ARC Easy",
151
- "author": "Masakhane",
152
- "author_url": "https://www.masakhane.io",
153
- "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
154
- "n_languages": 6,
155
- "tasks": [
156
- "question_answering"
157
- ],
158
- "parallel": true,
159
- "base": "AI2 ARC"
160
- },
161
- {
162
- "name": "Arc-X",
163
- "author": "OpenGPT-X",
164
- "author_url": null,
165
- "url": "https://huggingface.co/datasets/openGPT-X/arcx",
166
- "n_languages": 20,
167
- "tasks": [
168
- "question_answering"
169
- ],
170
- "parallel": true,
171
- "base": "AI2 ARC"
172
- },
173
- {
174
- "name": "Okapi TruthfulQA",
175
- "author": "Academic",
176
- "author_url": null,
177
- "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
178
- "n_languages": 31,
179
- "tasks": [
180
- "question_answering"
181
- ],
182
- "parallel": true,
183
- "base": "TruthfulQA"
184
- },
185
- {
186
- "name": "Uhura TruthfulQA",
187
- "author": "Masakhane",
188
- "author_url": "https://www.masakhane.io",
189
- "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
190
- "n_languages": 6,
191
- "tasks": [
192
- "question_answering"
193
- ],
194
- "parallel": true,
195
- "base": "TruthfulQA"
196
- },
197
- {
198
- "name": "TruthfulQA-X",
199
- "author": "OpenGPT-X",
200
- "author_url": null,
201
- "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax",
202
- "n_languages": 20,
203
- "tasks": [
204
- "question_answering"
205
- ],
206
- "parallel": true,
207
- "base": "TruthfulQA"
208
- },
209
- {
210
- "name": "XNLI",
211
- "author": "Meta",
212
- "author_url": "https://ai.meta.com",
213
- "url": "https://huggingface.co/datasets/facebook/xnli",
214
- "n_languages": 14,
215
- "tasks": [
216
- "classification"
217
- ],
218
- "parallel": true,
219
- "base": "XNLI"
220
- },
221
- {
222
- "name": "AfriXNLI",
223
- "author": "Masakhane",
224
- "author_url": "https://www.masakhane.io",
225
- "url": "https://huggingface.co/datasets/masakhane/afrixnli",
226
- "n_languages": 18,
227
- "tasks": [
228
- "classification"
229
- ],
230
- "parallel": true,
231
- "base": "XNLI"
232
- },
233
- {
234
- "name": "Okapi HellaSwag",
235
- "author": "Academic",
236
- "author_url": null,
237
- "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
238
- "n_languages": 31,
239
- "tasks": [
240
- "question_answering"
241
- ],
242
- "parallel": true,
243
- "base": "HellaSwag"
244
- },
245
- {
246
- "name": "HellaSwag-X",
247
- "author": "OpenGPT-X",
248
- "author_url": null,
249
- "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx",
250
- "n_languages": 20,
251
- "tasks": [
252
- "question_answering"
253
- ],
254
- "parallel": true,
255
- "base": "HellaSwag"
256
- },
257
- {
258
- "name": "WikiANN / PAN-X",
259
- "author": "Academic",
260
- "author_url": null,
261
- "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
262
- "n_languages": 176,
263
- "tasks": [
264
- "ner"
265
- ],
266
- "parallel": false
267
- },
268
- {
269
- "name": "MSVAMP",
270
- "author": "Microsoft",
271
- "author_url": "https://microsoft.ai",
272
- "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
273
- "n_languages": 10,
274
- "tasks": [
275
- "math"
276
- ],
277
- "parallel": true
278
- },
279
- {
280
- "name": "XLSUM",
281
- "author": "Academic",
282
- "author_url": null,
283
- "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
284
- "n_languages": 45,
285
- "tasks": [
286
- "summarization"
287
- ],
288
- "parallel": true
289
- },
290
- {
291
- "name": "SEA-IFEVAL",
292
- "author": "AI Singapore",
293
- "author_url": "https://aisingapore.org",
294
- "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
295
- "n_languages": 7,
296
- "tasks": [
297
- "instruction_following"
298
- ],
299
- "parallel": true,
300
- "base": "IFEVAL"
301
- },
302
- {
303
- "name": "XTREME",
304
- "author": "Google",
305
- "author_url": "https://google.com",
306
- "url": "https://huggingface.co/datasets/google/xtreme",
307
- "n_languages": 40,
308
- "tasks": [
309
- "translation",
310
- "classification",
311
- "question_answering",
312
- "ner"
313
- ],
314
- "parallel": null
315
- },
316
- {
317
- "name": "XGLUE",
318
- "author": "Microsoft",
319
- "author_url": "https://microsoft.ai",
320
- "url": "https://huggingface.co/datasets/microsoft/xglue",
321
- "n_languages": 18,
322
- "tasks": [
323
- "pos"
324
- ],
325
- "parallel": null,
326
- "base": "GLUE"
327
- },
328
- {
329
- "name": "IndicGLUE",
330
- "author": "AI4Bharat",
331
- "author_url": "https://models.ai4bharat.org",
332
- "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
333
- "n_languages": 11,
334
- "tasks": [
335
- "question_answering"
336
- ],
337
- "parallel": null,
338
- "base": "GLUE"
339
- },
340
- {
341
- "name": "Opus Gnome",
342
- "author": "Helsinki NLP",
343
- "author_url": null,
344
- "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
345
- "n_languages": 187,
346
- "tasks": [
347
- "translation"
348
- ],
349
- "parallel": true
350
- },
351
- {
352
- "name": "Opus Paracrawl",
353
- "author": "Helsinki NLP",
354
- "author_url": null,
355
- "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
356
- "n_languages": 43,
357
- "tasks": [
358
- "translation"
359
- ],
360
- "parallel": false
361
- },
362
- {
363
- "name": "CCAligned",
364
- "author": "Meta",
365
- "author_url": "https://ai.meta.com",
366
- "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
367
- "n_languages": 137,
368
- "tasks": [
369
- "translation"
370
- ],
371
- "parallel": false
372
- },
373
- {
374
- "name": "OPUS Collection",
375
- "author": "Helsinki NLP",
376
- "author_url": null,
377
- "url": "https://opus.nlpl.eu",
378
- "n_languages": 747,
379
- "tasks": [
380
- "translation"
381
- ],
382
- "parallel": false
383
- },
384
- {
385
- "name": "MasakhaNER",
386
- "author": "Masakhane",
387
- "author_url": "https://www.masakhane.io",
388
- "url": "https://huggingface.co/datasets/masakhane/masakhaner",
389
- "n_languages": 10,
390
- "tasks": [
391
- "ner"
392
- ],
393
- "parallel": null
394
- },
395
- {
396
- "name": "Multilingual Sentiments",
397
- "author": "Academic",
398
- "author_url": null,
399
- "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
400
- "n_languages": 12,
401
- "tasks": [
402
- "sentiment_analysis"
403
- ],
404
- "parallel": null
405
- },
406
- {
407
- "name": "CulturaX",
408
- "author": "Academic",
409
- "author_url": null,
410
- "url": "https://huggingface.co/datasets/uonlp/CulturaX",
411
- "n_languages": 167,
412
- "tasks": [
413
- "language_modeling"
414
- ],
415
- "parallel": false
416
- },
417
- {
418
- "name": "Tülu 3 SFT Mixture",
419
- "author": "AllenAI",
420
- "author_url": "https://allenai.org",
421
- "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
422
- "n_languages": 70,
423
- "tasks": [
424
- "instruction_following"
425
- ],
426
- "parallel": false
427
- },
428
- {
429
- "name": "xP3",
430
- "author": "BigScience",
431
- "author_url": "https://bigscience.huggingface.co",
432
- "url": "https://huggingface.co/datasets/bigscience/xP3",
433
- "n_languages": 46,
434
- "tasks": [
435
- "instruction_following"
436
- ],
437
- "parallel": false
438
- },
439
- {
440
- "name": "Aya",
441
- "author": "Cohere",
442
- "author_url": "https://cohere.com",
443
- "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
444
- "n_languages": 65,
445
- "tasks": [
446
- "instruction_following"
447
- ],
448
- "parallel": null
449
- },
450
- {
451
- "name": "Lanfrica",
452
- "author": "Lanfrica",
453
- "author_url": "https://lanfrica.com",
454
- "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
455
- "n_languages": 2200,
456
- "tasks": [
457
- "datasets"
458
- ],
459
- "parallel": null
460
- },
461
- {
462
- "name": "HuggingFace Languages",
463
- "author": "HuggingFace",
464
- "author_url": "https://huggingface.co",
465
- "url": "https://huggingface.co/languages",
466
- "n_languages": 4680,
467
- "tasks": [
468
- "datasets",
469
- "models"
470
- ],
471
- "parallel": null
472
- },
473
- {
474
- "name": "HuggingFace Multilingual Datasets",
475
- "author": "HuggingFace",
476
- "author_url": "https://huggingface.co",
477
- "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
478
- "n_languages": 2012,
479
- "tasks": [
480
- "datasets"
481
- ],
482
- "parallel": false
483
- }
484
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
evals/backend.py CHANGED
@@ -1,5 +1,5 @@
1
  import json
2
-
3
  import numpy as np
4
  import pandas as pd
5
  import uvicorn
@@ -9,10 +9,14 @@ from fastapi.middleware.gzip import GZipMiddleware
9
  from fastapi.responses import JSONResponse
10
  from fastapi.staticfiles import StaticFiles
11
 
12
- from languages import languages
13
- from models import models
14
  from countries import make_country_table
15
 
 
 
 
 
 
 
16
  def mean(lst):
17
  return sum(lst) / len(lst) if lst else None
18
 
@@ -30,7 +34,6 @@ def make_model_table(df, models):
30
  df["average"] = df[task_metrics].mean(axis=1)
31
  df = df.sort_values(by="average", ascending=False).reset_index()
32
  df = pd.merge(df, models, left_on="model", right_on="id", how="left")
33
- df["creation_date"] = df["creation_date"].dt.strftime("%Y-%m-%d")
34
  df["rank"] = df.index + 1
35
  df = df[
36
  [
@@ -85,9 +88,6 @@ app = FastAPI()
85
  app.add_middleware(CORSMiddleware, allow_origins=["*"])
86
  app.add_middleware(GZipMiddleware, minimum_size=1000)
87
 
88
- with open("results.json", "r") as f:
89
- results = pd.DataFrame(json.load(f))
90
-
91
 
92
  def serialize(df):
93
  return df.replace({np.nan: None}).to_dict(orient="records")
@@ -99,11 +99,11 @@ async def data(request: Request):
99
  data = json.loads(body)
100
  selected_languages = data.get("selectedLanguages", {})
101
  df = (
102
- results.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
103
  )
104
  # lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
105
  language_table = make_language_table(df, languages)
106
- datasets_df = pd.read_json("data/datasets.json")
107
  if selected_languages:
108
  # the filtering is only applied for the model table and the country data
109
  df = df[df["bcp_47"].isin(lang["bcp_47"] for lang in selected_languages)]
@@ -117,7 +117,7 @@ async def data(request: Request):
117
  }
118
  return JSONResponse(content=all_tables)
119
 
120
- app.mount("/", StaticFiles(directory="frontend/public", html=True), name="frontend")
121
 
122
  if __name__ == "__main__":
123
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
  import json
2
+ import os
3
  import numpy as np
4
  import pandas as pd
5
  import uvicorn
 
9
  from fastapi.responses import JSONResponse
10
  from fastapi.staticfiles import StaticFiles
11
 
 
 
12
  from countries import make_country_table
13
 
14
+ with open("results.json", "r") as f:
15
+ results = json.load(f)
16
+ scores = pd.DataFrame(results["scores"])
17
+ languages = pd.DataFrame(results["languages"])
18
+ models = pd.DataFrame(results["models"])
19
+
20
  def mean(lst):
21
  return sum(lst) / len(lst) if lst else None
22
 
 
34
  df["average"] = df[task_metrics].mean(axis=1)
35
  df = df.sort_values(by="average", ascending=False).reset_index()
36
  df = pd.merge(df, models, left_on="model", right_on="id", how="left")
 
37
  df["rank"] = df.index + 1
38
  df = df[
39
  [
 
88
  app.add_middleware(CORSMiddleware, allow_origins=["*"])
89
  app.add_middleware(GZipMiddleware, minimum_size=1000)
90
 
 
 
 
91
 
92
  def serialize(df):
93
  return df.replace({np.nan: None}).to_dict(orient="records")
 
99
  data = json.loads(body)
100
  selected_languages = data.get("selectedLanguages", {})
101
  df = (
102
+ scores.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
103
  )
104
  # lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
105
  language_table = make_language_table(df, languages)
106
+ datasets_df = pd.read_json("datasets.json")
107
  if selected_languages:
108
  # the filtering is only applied for the model table and the country data
109
  df = df[df["bcp_47"].isin(lang["bcp_47"] for lang in selected_languages)]
 
117
  }
118
  return JSONResponse(content=all_tables)
119
 
120
+ app.mount("/", StaticFiles(directory="frontend/build", html=True), name="frontend")
121
 
122
  if __name__ == "__main__":
123
+ uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8000)))
evals/countries.py CHANGED
@@ -1,26 +1,11 @@
1
  import re
2
- import xml.etree.ElementTree as ET
3
  from collections import defaultdict
4
  from joblib.memory import Memory
5
  import pandas as pd
6
  from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
7
- from language_data.util import data_filename
8
 
9
  cache = Memory(location=".cache", verbose=0).cache
10
 
11
- @cache
12
- def get_population_data():
13
- filename = data_filename("supplementalData.xml")
14
- root = ET.fromstring(open(filename).read())
15
- territories = root.findall("./territoryInfo/territory")
16
-
17
- data = {}
18
- for territory in territories:
19
- t_code = territory.attrib["type"]
20
- t_population = float(territory.attrib["population"])
21
- data[t_code] = t_population
22
- return data
23
-
24
 
25
  def population(bcp_47):
26
  items = {
 
1
  import re
 
2
  from collections import defaultdict
3
  from joblib.memory import Memory
4
  import pandas as pd
5
  from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
 
6
 
7
  cache = Memory(location=".cache", verbose=0).cache
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def population(bcp_47):
11
  items = {
evals/main.py CHANGED
@@ -1,10 +1,13 @@
1
  import asyncio
2
  import json
3
 
 
 
 
 
4
  from languages import languages
5
  from models import model_fast, models
6
  from tasks import tasks
7
- from tqdm.asyncio import tqdm_asyncio
8
 
9
  # ===== config =====
10
 
@@ -33,11 +36,18 @@ async def evaluate():
33
  ]
34
  return await tqdm_asyncio.gather(*results, miniters=1)
35
 
36
-
 
37
 
38
  async def main():
 
39
  results = await evaluate()
40
  results = [r for group in results for r in group]
 
 
 
 
 
41
  with open("results.json", "w") as f:
42
  json.dump(results, f, indent=2, ensure_ascii=False)
43
 
 
1
  import asyncio
2
  import json
3
 
4
+ import numpy as np
5
+ import pandas as pd
6
+ from tqdm.asyncio import tqdm_asyncio
7
+
8
  from languages import languages
9
  from models import model_fast, models
10
  from tasks import tasks
 
11
 
12
  # ===== config =====
13
 
 
36
  ]
37
  return await tqdm_asyncio.gather(*results, miniters=1)
38
 
39
+ def serialize(df):
40
+ return df.replace({np.nan: None, pd.NA: None}).to_dict(orient="records")
41
 
42
  async def main():
43
+ models["creation_date"] = models["creation_date"].apply(lambda x: x.isoformat())
44
  results = await evaluate()
45
  results = [r for group in results for r in group]
46
+ results = {
47
+ "languages": serialize(languages),
48
+ "models": serialize(models),
49
+ "scores": results,
50
+ }
51
  with open("results.json", "w") as f:
52
  json.dump(results, f, indent=2, ensure_ascii=False)
53
 
frontend/public/README.md DELETED
@@ -1,35 +0,0 @@
1
- ---
2
- title: AI Language Monitor
3
- emoji: 🌍
4
- colorFrom: purple
5
- colorTo: pink
6
- sdk: static
7
- license: cc-by-sa-4.0
8
- short_description: Evaluating LLM performance across all human languages.
9
- datasets:
10
- - openlanguagedata/flores_plus
11
- - google/fleurs
12
- - mozilla-foundation/common_voice_1_0
13
- models:
14
- - meta-llama/Llama-3.3-70B-Instruct
15
- - mistralai/Mistral-Small-24B-Instruct-2501
16
- - deepseek-ai/DeepSeek-V3
17
- - microsoft/phi-4
18
- - openai/whisper-large-v3
19
- - google/gemma-3-27b-it
20
- tags:
21
- - leaderboard
22
- - submission:manual
23
- - test:public
24
- - judge:auto
25
- - modality:text
26
- - modality:artefacts
27
- - eval:generation
28
- - language:English
29
- - language:German
30
- ---
31
-
32
- <!--
33
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
34
- For tag meaning, see https://huggingface.co/spaces/leaderboards/LeaderboardsExplorer
35
- -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/App.js CHANGED
@@ -62,10 +62,12 @@ function App () {
62
  <p style={{ fontSize: '1.15rem', color: '#555', marginTop: '0' }}>
63
  Tracking language proficiency of AI models for every language
64
  </p>
65
- <AutoComplete
66
- languages={data?.language_table}
67
- onComplete={items => setSelectedLanguages(items)}
68
- />
 
 
69
  </header>
70
  <main
71
  style={{
 
62
  <p style={{ fontSize: '1.15rem', color: '#555', marginTop: '0' }}>
63
  Tracking language proficiency of AI models for every language
64
  </p>
65
+ {data && (
66
+ <AutoComplete
67
+ languages={data?.language_table}
68
+ onComplete={items => setSelectedLanguages(items)}
69
+ />
70
+ )}
71
  </header>
72
  <main
73
  style={{
frontend/src/components/AutoComplete.js CHANGED
@@ -5,8 +5,8 @@ const AutoComplete = ({ languages, onComplete }) => {
5
  const [suggestions, setSuggestions] = useState([])
6
 
7
  const exampleCodes = ['de', 'fr', 'ar', 'hi', 'sw', 'fa']
8
- const exampleLanguages = languages?.filter(item =>
9
- exampleCodes.includes(item.bcp_47)
10
  )
11
 
12
  const search = e => {
 
5
  const [suggestions, setSuggestions] = useState([])
6
 
7
  const exampleCodes = ['de', 'fr', 'ar', 'hi', 'sw', 'fa']
8
+ const exampleLanguages = exampleCodes.map(code =>
9
+ languages?.find(item => item.bcp_47 === code)
10
  )
11
 
12
  const search = e => {
pyproject.toml CHANGED
@@ -9,6 +9,8 @@ dependencies = [
9
  "uvicorn>=0.34.0",
10
  "pandas>=2.2.3",
11
  "numpy>=2.1.2",
 
 
12
  ]
13
 
14
  [tool.uv]
 
9
  "uvicorn>=0.34.0",
10
  "pandas>=2.2.3",
11
  "numpy>=2.1.2",
12
+ "joblib>=1.4.2",
13
+ "language-data>=1.3.0",
14
  ]
15
 
16
  [tool.uv]
results.json CHANGED
The diff for this file is too large to render. See raw diff
 
uv.lock CHANGED
@@ -845,6 +845,8 @@ version = "0.1.0"
845
  source = { virtual = "." }
846
  dependencies = [
847
  { name = "fastapi" },
 
 
848
  { name = "numpy" },
849
  { name = "pandas" },
850
  { name = "uvicorn" },
@@ -874,6 +876,8 @@ dev = [
874
  [package.metadata]
875
  requires-dist = [
876
  { name = "fastapi", specifier = ">=0.115.8" },
 
 
877
  { name = "numpy", specifier = ">=2.1.2" },
878
  { name = "pandas", specifier = ">=2.2.3" },
879
  { name = "uvicorn", specifier = ">=0.34.0" },
 
845
  source = { virtual = "." }
846
  dependencies = [
847
  { name = "fastapi" },
848
+ { name = "joblib" },
849
+ { name = "language-data" },
850
  { name = "numpy" },
851
  { name = "pandas" },
852
  { name = "uvicorn" },
 
876
  [package.metadata]
877
  requires-dist = [
878
  { name = "fastapi", specifier = ">=0.115.8" },
879
+ { name = "joblib", specifier = ">=1.4.2" },
880
+ { name = "language-data", specifier = ">=1.3.0" },
881
  { name = "numpy", specifier = ">=2.1.2" },
882
  { name = "pandas", specifier = ">=2.2.3" },
883
  { name = "uvicorn", specifier = ">=0.34.0" },