Spaces:

fair-forward
/

evals-for-every-language

Running

App Files Files Community

David Pomerenke commited on Apr 6

Commit

4d13673

1 Parent(s): 92d8154

Add Dockerfile

Browse files

Files changed (13) hide show

.dockerignore +5 -0
Dockerfile +17 -0
README.md +37 -0
data/datasets.json +0 -484
evals/backend.py +11 -11
evals/countries.py +0 -15
evals/main.py +12 -2
frontend/public/README.md +0 -35
frontend/src/App.js +6 -4
frontend/src/components/AutoComplete.js +2 -2
pyproject.toml +2 -0
results.json +0 -0
uv.lock +4 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.git
+.cache
+.venv
+.env
+frontend/node_modules

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM node:20-alpine AS build
+WORKDIR /frontend
+COPY frontend/package.json frontend/package-lock.json ./
+RUN npm ci
+COPY frontend/public/ public/
+COPY frontend/src/ src/
+RUN npm run build
+FROM --platform=linux/amd64 ghcr.io/astral-sh/uv:python3.12-bookworm
+WORKDIR /app
+COPY pyproject.toml uv.lock ./
+RUN uv sync --frozen --no-dev
+COPY evals/ evals/
+COPY --from=build /frontend/build /app/frontend/build
+COPY results.json datasets.json ./
+EXPOSE 8000
+CMD ["uv", "run", "--no-dev", "evals/backend.py"]

README.md CHANGED Viewed

@@ -1,3 +1,40 @@
 [![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-purple)](https://huggingface.co/spaces/datenlabor-bmz/ai-language-monitor)
 # AI Language Monitor 🌍

+---
+title: AI Language Monitor
+emoji: 🌍
+colorFrom: purple
+colorTo: pink
+sdk: static
+license: cc-by-sa-4.0
+short_description: Evaluating LLM performance across all human languages.
+datasets:
+- openlanguagedata/flores_plus
+- google/fleurs
+- mozilla-foundation/common_voice_1_0
+models:
+- meta-llama/Llama-3.3-70B-Instruct
+- mistralai/Mistral-Small-24B-Instruct-2501
+- deepseek-ai/DeepSeek-V3
+- microsoft/phi-4
+- openai/whisper-large-v3
+- google/gemma-3-27b-it
+tags:
+- leaderboard
+- submission:manual
+- test:public
+- judge:auto
+- modality:text
+- modality:artefacts
+- eval:generation
+- language:English
+- language:German
+---
+<!--
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+For tag meaning, see https://huggingface.co/spaces/leaderboards/LeaderboardsExplorer
+-->
 [![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-purple)](https://huggingface.co/spaces/datenlabor-bmz/ai-language-monitor)
 # AI Language Monitor 🌍

data/datasets.json DELETED Viewed

@@ -1,484 +0,0 @@
-[
-    {
-        "name": "FLORES+",
-        "author": "Meta",
-        "author_url": "https://ai.meta.com",
-        "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
-        "n_languages": 200,
-        "tasks": [
-            "translation",
-            "classification",
-            "language_modeling"
-        ],
-        "parallel": true,
-        "base": "FLORES",
-        "implemented": true
-    },
-    {
-        "name": "FLEURS",
-        "author": "Meta",
-        "author_url": "https://ai.meta.com",
-        "url": "https://huggingface.co/datasets/google/fleurs",
-        "n_languages": 102,
-        "tasks": [
-            "speech_recognition"
-        ],
-        "parallel": true,
-        "base": "FLORES",
-        "implemented": true
-    },
-    {
-        "name": "CommonVoice",
-        "author": "Mozilla",
-        "author_url": "https://mozilla.ai",
-        "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0",
-        "n_languages": 124,
-        "tasks": [
-            "speech_recognition"
-        ],
-        "parallel": null
-    },
-    {
-        "name": "MMMLU",
-        "author": "OpenAI",
-        "author_url": "https://openai.com",
-        "url": "https://huggingface.co/datasets/openai/MMMLU",
-        "n_languages": "14",
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "MMLU"
-    },
-    {
-        "name": "AfriMMLU",
-        "author": "Masakhane",
-        "author_url": "https://www.masakhane.io",
-        "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
-        "n_languages": "17",
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "MMLU"
-    },
-    {
-        "name": "Okapi MMLU",
-        "author": "Academic",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
-        "n_languages": 16,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "MMLU"
-    },
-    {
-        "name": "MMLU-X",
-        "author": "OpenGPT-X",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/openGPT-X/mmlux",
-        "n_languages": 20,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "MMLU"
-    },
-    {
-        "name": "Global MMLU",
-        "author": "Cohere",
-        "author_url": "https://cohere.com",
-        "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
-        "n_languages": 42,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "MMLU"
-    },
-    {
-        "name": "MGSM",
-        "author": "Google",
-        "author_url": "https://google.com",
-        "url": "https://huggingface.co/datasets/juletxara/mgsm",
-        "n_languages": 10,
-        "tasks": [
-            "math"
-        ],
-        "parallel": true,
-        "base": "MGSM"
-    },
-    {
-        "name": "AfriMGSM",
-        "author": "Masakhane",
-        "author_url": "https://www.masakhane.io",
-        "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
-        "n_languages": 18,
-        "tasks": [
-            "math"
-        ],
-        "parallel": true,
-        "base": "MGSM"
-    },
-    {
-        "name": "GSM8K-X",
-        "author": "OpenGPT-X",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx",
-        "n_languages": 20,
-        "tasks": [
-            "math"
-        ],
-        "parallel": true,
-        "base": "MGSM"
-    },
-    {
-        "name": "Okapi ARC Challenge",
-        "author": "Academic",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
-        "n_languages": 31,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "AI2 ARC"
-    },
-    {
-        "name": "Uhuru ARC Easy",
-        "author": "Masakhane",
-        "author_url": "https://www.masakhane.io",
-        "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
-        "n_languages": 6,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "AI2 ARC"
-    },
-    {
-        "name": "Arc-X",
-        "author": "OpenGPT-X",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/openGPT-X/arcx",
-        "n_languages": 20,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "AI2 ARC"
-    },
-    {
-        "name": "Okapi TruthfulQA",
-        "author": "Academic",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
-        "n_languages": 31,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "TruthfulQA"
-    },
-    {
-        "name": "Uhura TruthfulQA",
-        "author": "Masakhane",
-        "author_url": "https://www.masakhane.io",
-        "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
-        "n_languages": 6,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "TruthfulQA"
-    },
-    {
-        "name": "TruthfulQA-X",
-        "author": "OpenGPT-X",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax",
-        "n_languages": 20,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "TruthfulQA"
-    },
-    {
-        "name": "XNLI",
-        "author": "Meta",
-        "author_url": "https://ai.meta.com",
-        "url": "https://huggingface.co/datasets/facebook/xnli",
-        "n_languages": 14,
-        "tasks": [
-            "classification"
-        ],
-        "parallel": true,
-        "base": "XNLI"
-    },
-    {
-        "name": "AfriXNLI",
-        "author": "Masakhane",
-        "author_url": "https://www.masakhane.io",
-        "url": "https://huggingface.co/datasets/masakhane/afrixnli",
-        "n_languages": 18,
-        "tasks": [
-            "classification"
-        ],
-        "parallel": true,
-        "base": "XNLI"
-    },
-    {
-        "name": "Okapi HellaSwag",
-        "author": "Academic",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
-        "n_languages": 31,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "HellaSwag"
-    },
-    {
-        "name": "HellaSwag-X",
-        "author": "OpenGPT-X",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx",
-        "n_languages": 20,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": true,
-        "base": "HellaSwag"
-    },
-    {
-        "name": "WikiANN / PAN-X",
-        "author": "Academic",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
-        "n_languages": 176,
-        "tasks": [
-            "ner"
-        ],
-        "parallel": false
-    },
-    {
-        "name": "MSVAMP",
-        "author": "Microsoft",
-        "author_url": "https://microsoft.ai",
-        "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
-        "n_languages": 10,
-        "tasks": [
-            "math"
-        ],
-        "parallel": true
-    },
-    {
-        "name": "XLSUM",
-        "author": "Academic",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
-        "n_languages": 45,
-        "tasks": [
-            "summarization"
-        ],
-        "parallel": true
-    },
-    {
-        "name": "SEA-IFEVAL",
-        "author": "AI Singapore",
-        "author_url": "https://aisingapore.org",
-        "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
-        "n_languages": 7,
-        "tasks": [
-            "instruction_following"
-        ],
-        "parallel": true,
-        "base": "IFEVAL"
-    },
-    {
-        "name": "XTREME",
-        "author": "Google",
-        "author_url": "https://google.com",
-        "url": "https://huggingface.co/datasets/google/xtreme",
-        "n_languages": 40,
-        "tasks": [
-            "translation",
-            "classification",
-            "question_answering",
-            "ner"
-        ],
-        "parallel": null
-    },
-    {
-        "name": "XGLUE",
-        "author": "Microsoft",
-        "author_url": "https://microsoft.ai",
-        "url": "https://huggingface.co/datasets/microsoft/xglue",
-        "n_languages": 18,
-        "tasks": [
-            "pos"
-        ],
-        "parallel": null,
-        "base": "GLUE"
-    },
-    {
-        "name": "IndicGLUE",
-        "author": "AI4Bharat",
-        "author_url": "https://models.ai4bharat.org",
-        "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
-        "n_languages": 11,
-        "tasks": [
-            "question_answering"
-        ],
-        "parallel": null,
-        "base": "GLUE"
-    },
-    {
-        "name": "Opus Gnome",
-        "author": "Helsinki NLP",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
-        "n_languages": 187,
-        "tasks": [
-            "translation"
-        ],
-        "parallel": true
-    },
-    {
-        "name": "Opus Paracrawl",
-        "author": "Helsinki NLP",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
-        "n_languages": 43,
-        "tasks": [
-            "translation"
-        ],
-        "parallel": false
-    },
-    {
-        "name": "CCAligned",
-        "author": "Meta",
-        "author_url": "https://ai.meta.com",
-        "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
-        "n_languages": 137,
-        "tasks": [
-            "translation"
-        ],
-        "parallel": false
-    },
-    {
-        "name": "OPUS Collection",
-        "author": "Helsinki NLP",
-        "author_url": null,
-        "url": "https://opus.nlpl.eu",
-        "n_languages": 747,
-        "tasks": [
-            "translation"
-        ],
-        "parallel": false
-    },
-    {
-        "name": "MasakhaNER",
-        "author": "Masakhane",
-        "author_url": "https://www.masakhane.io",
-        "url": "https://huggingface.co/datasets/masakhane/masakhaner",
-        "n_languages": 10,
-        "tasks": [
-            "ner"
-        ],
-        "parallel": null
-    },
-    {
-        "name": "Multilingual Sentiments",
-        "author": "Academic",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
-        "n_languages": 12,
-        "tasks": [
-            "sentiment_analysis"
-        ],
-        "parallel": null
-    },
-    {
-        "name": "CulturaX",
-        "author": "Academic",
-        "author_url": null,
-        "url": "https://huggingface.co/datasets/uonlp/CulturaX",
-        "n_languages": 167,
-        "tasks": [
-            "language_modeling"
-        ],
-        "parallel": false
-    },
-    {
-        "name": "Tülu 3 SFT Mixture",
-        "author": "AllenAI",
-        "author_url": "https://allenai.org",
-        "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
-        "n_languages": 70,
-        "tasks": [
-            "instruction_following"
-        ],
-        "parallel": false
-    },
-    {
-        "name": "xP3",
-        "author": "BigScience",
-        "author_url": "https://bigscience.huggingface.co",
-        "url": "https://huggingface.co/datasets/bigscience/xP3",
-        "n_languages": 46,
-        "tasks": [
-            "instruction_following"
-        ],
-        "parallel": false
-    },
-    {
-        "name": "Aya",
-        "author": "Cohere",
-        "author_url": "https://cohere.com",
-        "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
-        "n_languages": 65,
-        "tasks": [
-            "instruction_following"
-        ],
-        "parallel": null
-    },
-    {
-        "name": "Lanfrica",
-        "author": "Lanfrica",
-        "author_url": "https://lanfrica.com",
-        "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
-        "n_languages": 2200,
-        "tasks": [
-            "datasets"
-        ],
-        "parallel": null
-    },
-    {
-        "name": "HuggingFace Languages",
-        "author": "HuggingFace",
-        "author_url": "https://huggingface.co",
-        "url": "https://huggingface.co/languages",
-        "n_languages": 4680,
-        "tasks": [
-            "datasets",
-            "models"
-        ],
-        "parallel": null
-    },
-    {
-        "name": "HuggingFace Multilingual Datasets",
-        "author": "HuggingFace",
-        "author_url": "https://huggingface.co",
-        "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
-        "n_languages": 2012,
-        "tasks": [
-            "datasets"
-        ],
-        "parallel": false
-    }
-]

evals/backend.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
 import numpy as np
 import pandas as pd
 import uvicorn
@@ -9,10 +9,14 @@ from fastapi.middleware.gzip import GZipMiddleware
 from fastapi.responses import JSONResponse
 from fastapi.staticfiles import StaticFiles
-from languages import languages
-from models import models
 from countries import make_country_table
 def mean(lst):
     return sum(lst) / len(lst) if lst else None
@@ -30,7 +34,6 @@ def make_model_table(df, models):
     df["average"] = df[task_metrics].mean(axis=1)
     df = df.sort_values(by="average", ascending=False).reset_index()
     df = pd.merge(df, models, left_on="model", right_on="id", how="left")
-    df["creation_date"] = df["creation_date"].dt.strftime("%Y-%m-%d")
     df["rank"] = df.index + 1
     df = df[
         [
@@ -85,9 +88,6 @@ app = FastAPI()
 app.add_middleware(CORSMiddleware, allow_origins=["*"])
 app.add_middleware(GZipMiddleware, minimum_size=1000)
-with open("results.json", "r") as f:
-    results = pd.DataFrame(json.load(f))
 def serialize(df):
     return df.replace({np.nan: None}).to_dict(orient="records")
@@ -99,11 +99,11 @@ async def data(request: Request):
     data = json.loads(body)
     selected_languages = data.get("selectedLanguages", {})
     df = (
-        results.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
     )
     # lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
     language_table = make_language_table(df, languages)
-    datasets_df = pd.read_json("data/datasets.json")
     if selected_languages:
         # the filtering is only applied for the model table and the country data
         df = df[df["bcp_47"].isin(lang["bcp_47"] for lang in selected_languages)]
@@ -117,7 +117,7 @@ async def data(request: Request):
     }
     return JSONResponse(content=all_tables)
-app.mount("/", StaticFiles(directory="frontend/public", html=True), name="frontend")
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import json
+import os
 import numpy as np
 import pandas as pd
 import uvicorn
 from fastapi.responses import JSONResponse
 from fastapi.staticfiles import StaticFiles
 from countries import make_country_table
+with open("results.json", "r") as f:
+    results = json.load(f)
+scores = pd.DataFrame(results["scores"])
+languages = pd.DataFrame(results["languages"])
+models = pd.DataFrame(results["models"])
 def mean(lst):
     return sum(lst) / len(lst) if lst else None
     df["average"] = df[task_metrics].mean(axis=1)
     df = df.sort_values(by="average", ascending=False).reset_index()
     df = pd.merge(df, models, left_on="model", right_on="id", how="left")
     df["rank"] = df.index + 1
     df = df[
         [
 app.add_middleware(CORSMiddleware, allow_origins=["*"])
 app.add_middleware(GZipMiddleware, minimum_size=1000)
 def serialize(df):
     return df.replace({np.nan: None}).to_dict(orient="records")
     data = json.loads(body)
     selected_languages = data.get("selectedLanguages", {})
     df = (
+        scores.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
     )
     # lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
     language_table = make_language_table(df, languages)
+    datasets_df = pd.read_json("datasets.json")
     if selected_languages:
         # the filtering is only applied for the model table and the country data
         df = df[df["bcp_47"].isin(lang["bcp_47"] for lang in selected_languages)]
     }
     return JSONResponse(content=all_tables)
+app.mount("/", StaticFiles(directory="frontend/build", html=True), name="frontend")
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8000)))

evals/countries.py CHANGED Viewed

@@ -1,26 +1,11 @@
 import re
-import xml.etree.ElementTree as ET
 from collections import defaultdict
 from joblib.memory import Memory
 import pandas as pd
 from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
-from language_data.util import data_filename
 cache = Memory(location=".cache", verbose=0).cache
-@cache
-def get_population_data():
-    filename = data_filename("supplementalData.xml")
-    root = ET.fromstring(open(filename).read())
-    territories = root.findall("./territoryInfo/territory")
-    data = {}
-    for territory in territories:
-        t_code = territory.attrib["type"]
-        t_population = float(territory.attrib["population"])
-        data[t_code] = t_population
-    return data
 def population(bcp_47):
     items = {

 import re
 from collections import defaultdict
 from joblib.memory import Memory
 import pandas as pd
 from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
 cache = Memory(location=".cache", verbose=0).cache
 def population(bcp_47):
     items = {

evals/main.py CHANGED Viewed

@@ -1,10 +1,13 @@
 import asyncio
 import json
 from languages import languages
 from models import model_fast, models
 from tasks import tasks
-from tqdm.asyncio import tqdm_asyncio
 # ===== config =====
@@ -33,11 +36,18 @@ async def evaluate():
     ]
     return await tqdm_asyncio.gather(*results, miniters=1)
 async def main():
     results = await evaluate()
     results = [r for group in results for r in group]
     with open("results.json", "w") as f:
         json.dump(results, f, indent=2, ensure_ascii=False)

 import asyncio
 import json
+import numpy as np
+import pandas as pd
+from tqdm.asyncio import tqdm_asyncio
 from languages import languages
 from models import model_fast, models
 from tasks import tasks
 # ===== config =====
     ]
     return await tqdm_asyncio.gather(*results, miniters=1)
+def serialize(df):
+    return df.replace({np.nan: None, pd.NA: None}).to_dict(orient="records")
 async def main():
+    models["creation_date"] = models["creation_date"].apply(lambda x: x.isoformat())
     results = await evaluate()
     results = [r for group in results for r in group]
+    results = {
+        "languages": serialize(languages),
+        "models": serialize(models),
+        "scores": results,
+    }
     with open("results.json", "w") as f:
         json.dump(results, f, indent=2, ensure_ascii=False)

frontend/public/README.md DELETED Viewed

@@ -1,35 +0,0 @@
----
-title: AI Language Monitor
-emoji: 🌍
-colorFrom: purple
-colorTo: pink
-sdk: static
-license: cc-by-sa-4.0
-short_description: Evaluating LLM performance across all human languages.
-datasets:
-- openlanguagedata/flores_plus
-- google/fleurs
-- mozilla-foundation/common_voice_1_0
-models:
-- meta-llama/Llama-3.3-70B-Instruct
-- mistralai/Mistral-Small-24B-Instruct-2501
-- deepseek-ai/DeepSeek-V3
-- microsoft/phi-4
-- openai/whisper-large-v3
-- google/gemma-3-27b-it
-tags:
-- leaderboard
-- submission:manual
-- test:public
-- judge:auto
-- modality:text
-- modality:artefacts
-- eval:generation
-- language:English
-- language:German
----
-<!--
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
-For tag meaning, see https://huggingface.co/spaces/leaderboards/LeaderboardsExplorer
--->

frontend/src/App.js CHANGED Viewed

@@ -62,10 +62,12 @@ function App () {
           <p style={{ fontSize: '1.15rem', color: '#555', marginTop: '0' }}>
             Tracking language proficiency of AI models for every language
           </p>
-          <AutoComplete
-            languages={data?.language_table}
-            onComplete={items => setSelectedLanguages(items)}
-          />
         </header>
         <main
           style={{

           <p style={{ fontSize: '1.15rem', color: '#555', marginTop: '0' }}>
             Tracking language proficiency of AI models for every language
           </p>
+          {data && (
+            <AutoComplete
+              languages={data?.language_table}
+              onComplete={items => setSelectedLanguages(items)}
+            />
+          )}
         </header>
         <main
           style={{

frontend/src/components/AutoComplete.js CHANGED Viewed

@@ -5,8 +5,8 @@ const AutoComplete = ({ languages, onComplete }) => {
   const [suggestions, setSuggestions] = useState([])
   const exampleCodes = ['de', 'fr', 'ar', 'hi', 'sw', 'fa']
-  const exampleLanguages = languages?.filter(item =>
-    exampleCodes.includes(item.bcp_47)
   )
   const search = e => {

   const [suggestions, setSuggestions] = useState([])
   const exampleCodes = ['de', 'fr', 'ar', 'hi', 'sw', 'fa']
+  const exampleLanguages = exampleCodes.map(code =>
+    languages?.find(item => item.bcp_47 === code)
   )
   const search = e => {

pyproject.toml CHANGED Viewed

@@ -9,6 +9,8 @@ dependencies = [
     "uvicorn>=0.34.0",
     "pandas>=2.2.3",
     "numpy>=2.1.2",
 ]
 [tool.uv]

     "uvicorn>=0.34.0",
     "pandas>=2.2.3",
     "numpy>=2.1.2",
+    "joblib>=1.4.2",
+    "language-data>=1.3.0",
 ]
 [tool.uv]

results.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

uv.lock CHANGED Viewed

@@ -845,6 +845,8 @@ version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },
     { name = "numpy" },
     { name = "pandas" },
     { name = "uvicorn" },
@@ -874,6 +876,8 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "fastapi", specifier = ">=0.115.8" },
     { name = "numpy", specifier = ">=2.1.2" },
     { name = "pandas", specifier = ">=2.2.3" },
     { name = "uvicorn", specifier = ">=0.34.0" },

 source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },
+    { name = "joblib" },
+    { name = "language-data" },
     { name = "numpy" },
     { name = "pandas" },
     { name = "uvicorn" },
 [package.metadata]
 requires-dist = [
     { name = "fastapi", specifier = ">=0.115.8" },
+    { name = "joblib", specifier = ">=1.4.2" },
+    { name = "language-data", specifier = ">=1.3.0" },
     { name = "numpy", specifier = ">=2.1.2" },
     { name = "pandas", specifier = ">=2.2.3" },
     { name = "uvicorn", specifier = ">=0.34.0" },