[ { "name": "FLORES+", "author": "Meta", "author_url": "https://ai.meta.com", "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus", "n_languages": 200, "tasks": [ "translation", "classification" ], "parallel": true, "translation": "human", "base": "FLORES", "implemented": true, "group": "Translation" }, { "name": "CCAligned", "author": "Meta", "author_url": "https://ai.meta.com", "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual", "n_languages": 137, "tasks": [ "translation" ], "parallel": false, "group": "Translation" }, { "name": "OPUS Collection", "author": "Helsinki NLP", "author_url": null, "url": "https://opus.nlpl.eu", "n_languages": 747, "tasks": [ "translation" ], "parallel": false, "group": "Translation" }, { "name": "Global MMLU", "author": "Cohere", "author_url": "https://cohere.com", "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU", "n_languages": 42, "languages": [ "am", "ar", "bn", "cs", "de", "el", "en", "es", "fa", "fil", "fr", "ha", "he", "hi", "id", "ig", "it", "ja", "ko", "ky", "lt", "mg", "ms", "ne", "nl", "ny", "pl", "pt", "ro", "ru", "si", "sn", "so", "sr", "sv", "sw", "te", "tr", "uk", "vi", "yo", "zh" ], "tasks": [ "question_answering" ], "parallel": true, "translation": "mixed", "base": "MMLU", "implemented": true, "group": "Multitask Language Understanding" }, { "name": "MMMLU", "author": "OpenAI", "author_url": "https://openai.com", "url": "https://huggingface.co/datasets/openai/MMMLU", "n_languages": "14", "languages": [ "ar", "bn", "de", "es", "fr", "hi", "id", "it", "ja", "ko", "pt", "sw", "yo", "zh" ], "tasks": [ "question_answering" ], "parallel": true, "translation": "human", "base": "MMLU", "implemented": true, "group": "Multitask Language Understanding" }, { "name": "AfriMMLU", "author": "Masakhane", "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/afrimmlu", "n_languages": "17", "languages": [ "am", "en", "ee", "fr", "ha", "ig", "rw", "ln", "lg", "om", "sn", "st", "sw", "tw", "wo", "xh", "yo", "zu" ], "tasks": [ "question_answering" ], "parallel": true, "translation": "human", "base": "MMLU", "implemented": true, "group": "Multitask Language Understanding" }, { "name": "Okapi MMLU", "author": "Academic", "author_url": null, "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu", "n_languages": 26, "languages": [ "ar", "bn", "ca", "da", "de", "es", "eu", "fr", "gu", "hi", "hr", "hu", "hy", "id", "it", "kn", "ml", "mr", "ne", "nl", "pt", "ro", "ru", "sk", "sr", "sv", "ta", "te", "uk", "vi", "zh" ], "tasks": [ "question_answering" ], "parallel": true, "translation": "machine", "base": "MMLU", "implemented": true, "group": "Multitask Language Understanding" }, { "name": "MMLU-X", "author": "OpenGPT-X", "author_url": "https://opengpt-x.de", "url": "https://huggingface.co/datasets/openGPT-X/mmlux", "n_languages": 20, "languages": [ "bg", "cs", "da", "de", "el", "es", "et", "fi", "fr", "hu", "it", "lt", "lv", "nl", "pl", "pt", "ro", "sk", "sl", "sv" ], "tasks": [ "question_answering" ], "parallel": true, "translation": "machine", "base": "MMLU", "implemented": false, "group": "Multitask Language Understanding" }, { "name": "FLEURS", "author": "Meta", "author_url": "https://ai.meta.com", "url": "https://huggingface.co/datasets/google/fleurs", "n_languages": 102, "tasks": [ "speech_recognition" ], "parallel": true, "translation": "human", "base": "FLORES", "implemented": false, "group": "Speech Recognition" }, { "name": "CommonVoice", "author": "Mozilla", "author_url": "https://blog.mozilla.ai", "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0", "n_languages": 124, "tasks": [ "speech_recognition" ], "parallel": null, "translation": "human", "group": "Speech Recognition" }, { "name": "WorldCuisines", "author": "Academic", "author_url": "https://worldcuisines.github.io", "url": "https://huggingface.co/datasets/worldcuisines/vqa", "n_languages": 30, "tasks": [ "visual_question_answering" ], "parallel": null, "group": "Visual Question Answering" }, { "name": "CVQA", "author": "Academic", "author_url": null, "url": "https://huggingface.co/datasets/afaji/cvqa", "n_languages": 39, "tasks": [ "visual_question_answering" ], "parallel": null, "group": "Visual Question Answering" }, { "name": "Uhuru ARC Easy", "author": "Masakhane", "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy", "n_languages": 6, "tasks": [ "question_answering" ], "parallel": true, "translation": "human", "base": "AI2 ARC", "implemented": false, "group": "Abstract Reasoning" }, { "name": "Okapi ARC Challenge", "author": "Academic", "author_url": null, "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge", "n_languages": 31, "tasks": [ "question_answering" ], "parallel": true, "translation": "machine", "base": "AI2 ARC", "implemented": false, "group": "Abstract Reasoning" }, { "name": "Arc-X", "author": "OpenGPT-X", "author_url": "https://opengpt-x.de", "url": "https://huggingface.co/datasets/openGPT-X/arcx", "n_languages": 20, "tasks": [ "question_answering" ], "parallel": true, "translation": "machine", "base": "AI2 ARC", "implemented": false, "group": "Abstract Reasoning" }, { "name": "Uhura TruthfulQA", "author": "Masakhane", "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa", "n_languages": 6, "tasks": [ "question_answering" ], "parallel": true, "translation": "human", "base": "TruthfulQA", "implemented": false, "group": "Truthfulness" }, { "name": "Okapi TruthfulQA", "author": "Academic", "author_url": null, "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data", "n_languages": 31, "tasks": [ "question_answering" ], "parallel": true, "translation": "machine", "base": "TruthfulQA", "implemented": false, "group": "Truthfulness" }, { "name": "TruthfulQA-X", "author": "OpenGPT-X", "author_url": "https://opengpt-x.de", "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax", "n_languages": 20, "tasks": [ "question_answering" ], "parallel": true, "translation": "machine", "base": "TruthfulQA", "implemented": false, "group": "Truthfulness" }, { "name": "XNLI", "author": "Meta", "author_url": "https://ai.meta.com", "url": "https://huggingface.co/datasets/facebook/xnli", "n_languages": 14, "tasks": [ "classification", "logic" ], "parallel": true, "base": "MNLI", "group": "Natural Language Inference" }, { "name": "AfriXNLI", "author": "Masakhane", "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/afrixnli", "n_languages": 18, "tasks": [ "classification", "logic" ], "parallel": true, "translation": "human", "base": "MNLI", "implemented": false, "group": "Natural Language Inference" }, { "name": "XGLUE", "author": "Microsoft", "author_url": "https://microsoft.ai", "url": "https://huggingface.co/datasets/microsoft/xglue", "n_languages": 18, "tasks": [ "pos" ], "parallel": null, "base": "GLUE", "group": "General Language Understanding" }, { "name": "IndicGLUE", "author": "AI4Bharat", "author_url": "https://models.ai4bharat.org", "url": "https://huggingface.co/datasets/ai4bharat/indic_glue", "n_languages": 11, "tasks": [ "question_answering" ], "parallel": null, "base": "GLUE", "group": "General Language Understanding" }, { "name": "Okapi HellaSwag", "author": "Academic", "author_url": null, "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag", "n_languages": 31, "tasks": [ "question_answering" ], "parallel": true, "translation": "machine", "base": "HellaSwag", "implemented": false, "group": "Adversarial Language Modelling" }, { "name": "HellaSwag-X", "author": "OpenGPT-X", "author_url": "https://opengpt-x.de", "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx", "n_languages": 20, "tasks": [ "question_answering" ], "parallel": true, "translation": "machine", "base": "HellaSwag", "implemented": false, "group": "Adversarial Language Modelling" }, { "name": "MGSM", "author": "Google", "author_url": "https://google.com", "url": "https://huggingface.co/datasets/juletxara/mgsm", "n_languages": 10, "tasks": [ "math" ], "parallel": true, "base": "MGSM", "group": "Grade School Math" }, { "name": "AfriMGSM", "author": "Masakhane", "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/afrimgsm", "n_languages": 18, "tasks": [ "math" ], "parallel": true, "translation": "human", "base": "MGSM", "implemented": false, "group": "Grade School Math" }, { "name": "GSM8K-X", "author": "OpenGPT-X", "author_url": "https://opengpt-x.de", "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx", "n_languages": 20, "tasks": [ "math" ], "parallel": true, "translation": "machine", "base": "MGSM", "implemented": false, "group": "Grade School Math" }, { "name": "WikiANN / PAN-X", "author": "Academic", "author_url": null, "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann", "n_languages": 176, "tasks": [ "ner" ], "parallel": false, "group": "Named Entity Recognition" }, { "name": "MasakhaNER", "author": "Masakhane", "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/masakhaner", "n_languages": 10, "tasks": [ "ner" ], "parallel": null, "group": "Named Entity Recognition" }, { "name": "Tülu 3 SFT Mixture", "author": "AllenAI", "author_url": "https://allenai.org", "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture", "n_languages": 70, "tasks": [ "instruction_following" ], "parallel": false, "group": "Instruction Following" }, { "name": "xP3", "author": "BigScience", "author_url": "https://bigscience.huggingface.co", "url": "https://huggingface.co/datasets/bigscience/xP3", "n_languages": 46, "tasks": [ "instruction_following" ], "parallel": false, "group": "Instruction Following" }, { "name": "Aya", "author": "Cohere", "author_url": "https://cohere.com", "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset", "n_languages": 65, "tasks": [ "instruction_following" ], "parallel": null, "group": "Instruction Following" }, { "name": "SEA-IFEVAL", "author": "AI Singapore", "author_url": "https://aisingapore.org", "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval", "n_languages": 7, "tasks": [ "instruction_following" ], "parallel": true, "base": "IFEVAL", "group": "Instruction Following" }, { "name": "Babel-670", "author": "Academic", "author_url": null, "url": "https://github.com/UBC-NLP/Babel-670-Language-Identification", "n_languages": 670, "tasks": [ "language_identification" ], "parallel": false, "group": "Other Tasks" }, { "name": "CulturaX", "author": "Academic", "author_url": null, "url": "https://huggingface.co/datasets/uonlp/CulturaX", "n_languages": 167, "tasks": [ "language_modeling" ], "parallel": false, "group": "Other Tasks" }, { "name": "XTREME", "author": "Google", "author_url": "https://google.com", "url": "https://huggingface.co/datasets/google/xtreme", "n_languages": 40, "tasks": [ "translation", "classification", "question_answering", "ner" ], "parallel": null, "group": "Other Tasks" }, { "name": "XLSUM", "author": "Academic", "author_url": null, "url": "https://huggingface.co/datasets/csebuetnlp/xlsum", "n_languages": 45, "tasks": [ "summarization" ], "parallel": true, "group": "Other Tasks" }, { "name": "MSVAMP", "author": "Microsoft", "author_url": "https://microsoft.ai", "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP", "n_languages": 10, "tasks": [ "math" ], "parallel": true, "group": "Other Tasks" }, { "name": "Multilingual Sentiments", "author": "Academic", "author_url": null, "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments", "n_languages": 12, "tasks": [ "sentiment_analysis" ], "parallel": null, "group": "Other Tasks" }, { "name": "Lanfrica", "author": "Lanfrica", "author_url": "https://lanfrica.com", "url": "https://lanfrica.com/records?language=yor&task=machine%20translation", "n_languages": 2200, "tasks": [ "datasets" ], "parallel": null, "group": "Dataset Collections" }, { "name": "HuggingFace Languages", "author": "HuggingFace", "author_url": "https://huggingface.co", "url": "https://huggingface.co/languages", "n_languages": 4680, "tasks": [ "datasets", "models" ], "parallel": null, "group": "Dataset Collections" }, { "name": "HuggingFace Multilingual Datasets", "author": "HuggingFace", "author_url": "https://huggingface.co", "url": "https://huggingface.co/datasets?other=multilinguality:multilingual", "n_languages": 2012, "tasks": [ "datasets" ], "parallel": false, "group": "Dataset Collections" } ]