|
[ |
|
{ |
|
"name": "FLORES+", |
|
"author": "Meta", |
|
"url": "https://huggingface.co/datasets/openlanguagedata/flores_plus", |
|
"n_languages": 185, |
|
"tasks": [ |
|
"translation", |
|
"classification", |
|
"language_modeling" |
|
], |
|
"parallel": true, |
|
"base": "FLORES", |
|
"implemented": true |
|
}, |
|
{ |
|
"name": "FLEURS", |
|
"author": "Meta", |
|
"url": "https://huggingface.co/datasets/google/fleurs", |
|
"n_languages": 102, |
|
"tasks": [ |
|
"speech_recognition" |
|
], |
|
"parallel": true, |
|
"base": "FLORES", |
|
"implemented": true |
|
}, |
|
{ |
|
"name": "CommonVoice", |
|
"author": "Mozilla", |
|
"url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_1_0", |
|
"n_languages": 231, |
|
"tasks": [ |
|
"speech_recognition" |
|
], |
|
"parallel": null |
|
}, |
|
{ |
|
"name": "MMMLU", |
|
"author": "OpenAI", |
|
"url": "https://huggingface.co/datasets/openai/MMMLU", |
|
"n_languages": "14", |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"base": "MMLU" |
|
}, |
|
{ |
|
"name": "AfriMMLU", |
|
"author": "Masakhane", |
|
"url": "https://huggingface.co/datasets/masakhane/afrimmlu", |
|
"n_languages": "17", |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"base": "MMLU" |
|
}, |
|
{ |
|
"name": "Okapi MMLU", |
|
"author": "Okapi", |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu", |
|
"n_languages": 16, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"base": "MMLU" |
|
}, |
|
{ |
|
"name": "Global MMLU", |
|
"author": "Cohere", |
|
"url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU", |
|
"n_languages": 42, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"base": "MMLU" |
|
}, |
|
{ |
|
"name": "MGSM", |
|
"author": "Google", |
|
"url": "https://huggingface.co/datasets/juletxara/mgsm", |
|
"n_languages": 10, |
|
"tasks": [ |
|
"math" |
|
], |
|
"parallel": true, |
|
"base": "MGSM" |
|
}, |
|
{ |
|
"name": "AfriMGSM", |
|
"author": "Masakhane", |
|
"url": "https://huggingface.co/datasets/masakhane/afrimgsm", |
|
"n_languages": 18, |
|
"tasks": [ |
|
"math" |
|
], |
|
"parallel": true, |
|
"base": "MGSM" |
|
}, |
|
{ |
|
"name": "Okapi ARC Challenge", |
|
"author": "Okapi", |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge", |
|
"n_languages": 31, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"base": "AI2 ARC" |
|
}, |
|
{ |
|
"name": "Uhuru ARC Easy", |
|
"author": "Masakhane", |
|
"url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy", |
|
"n_languages": 6, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"base": "AI2 ARC" |
|
}, |
|
{ |
|
"name": "Okapi TruthfulQA", |
|
"author": "Okapi", |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data", |
|
"n_languages": 31, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"base": "TruthfulQA" |
|
}, |
|
{ |
|
"name": "Uhura TruthfulQA", |
|
"author": "Masakhane", |
|
"url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa", |
|
"n_languages": 6, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"base": "TruthfulQA" |
|
}, |
|
{ |
|
"name": "XNLI", |
|
"author": "Meta", |
|
"url": "https://huggingface.co/datasets/facebook/xnli", |
|
"n_languages": 14, |
|
"tasks": [ |
|
"classification" |
|
], |
|
"parallel": true, |
|
"base": "XNLI" |
|
}, |
|
{ |
|
"name": "AfriXNLI", |
|
"author": "Masakhane", |
|
"url": "https://huggingface.co/datasets/masakhane/afrixnli", |
|
"n_languages": 18, |
|
"tasks": [ |
|
"classification" |
|
], |
|
"parallel": true, |
|
"base": "XNLI" |
|
}, |
|
{ |
|
"name": "Okapi HellaSwag", |
|
"author": "Okapi", |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag", |
|
"n_languages": 31, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": true, |
|
"base": "HellaSwag" |
|
}, |
|
{ |
|
"name": "WikiANN / PAN-X", |
|
"author": "Academic", |
|
"url": "https://huggingface.co/datasets/unimelb-nlp/wikiann", |
|
"n_languages": 176, |
|
"tasks": [ |
|
"ner" |
|
], |
|
"parallel": false |
|
}, |
|
{ |
|
"name": "MSVAMP", |
|
"author": "Microsoft", |
|
"url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP", |
|
"n_languages": 10, |
|
"tasks": [ |
|
"math" |
|
], |
|
"parallel": true |
|
}, |
|
{ |
|
"name": "XLSUM", |
|
"author": "Academic", |
|
"url": "https://huggingface.co/datasets/csebuetnlp/xlsum", |
|
"n_languages": 45, |
|
"tasks": [ |
|
"summarization" |
|
], |
|
"parallel": true |
|
}, |
|
{ |
|
"name": "SEA-IFEVAL", |
|
"author": "AI Singapore", |
|
"url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval", |
|
"n_languages": 7, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": true, |
|
"base": "IFEVAL" |
|
}, |
|
{ |
|
"name": "XTREME", |
|
"author": "Google", |
|
"url": "https://huggingface.co/datasets/google/xtreme", |
|
"n_languages": 40, |
|
"tasks": [ |
|
"translation", |
|
"classification", |
|
"question_answering", |
|
"ner" |
|
], |
|
"parallel": null |
|
}, |
|
{ |
|
"name": "XGLUE", |
|
"author": "Microsoft", |
|
"url": "https://huggingface.co/datasets/microsoft/xglue", |
|
"n_languages": 18, |
|
"tasks": [ |
|
"pos" |
|
], |
|
"parallel": null, |
|
"base": "GLUE" |
|
}, |
|
{ |
|
"name": "IndicGLUE", |
|
"author": "AI4Bharat", |
|
"url": "https://huggingface.co/datasets/ai4bharat/indic_glue", |
|
"n_languages": 11, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": null, |
|
"base": "GLUE" |
|
}, |
|
{ |
|
"name": "Opus Gnome", |
|
"author": "Helsinki NLP", |
|
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome", |
|
"n_languages": 187, |
|
"tasks": [ |
|
"translation" |
|
], |
|
"parallel": true |
|
}, |
|
{ |
|
"name": "Opus Paracrawl", |
|
"author": "Helsinki NLP", |
|
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl", |
|
"n_languages": 43, |
|
"tasks": [ |
|
"translation" |
|
], |
|
"parallel": false |
|
}, |
|
{ |
|
"name": "CCAligned", |
|
"author": "Meta", |
|
"url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual", |
|
"n_languages": 137, |
|
"tasks": [ |
|
"translation" |
|
], |
|
"parallel": false |
|
}, |
|
{ |
|
"name": "OPUS Collection", |
|
"author": "Helsinki NLP", |
|
"url": "https://opus.nlpl.eu/", |
|
"n_languages": 747, |
|
"tasks": [ |
|
"translation" |
|
], |
|
"parallel": false |
|
}, |
|
{ |
|
"name": "MasakhaNER", |
|
"author": "Masakhane", |
|
"url": "https://huggingface.co/datasets/masakhane/masakhaner", |
|
"n_languages": 10, |
|
"tasks": [ |
|
"ner" |
|
], |
|
"parallel": null |
|
}, |
|
{ |
|
"name": "Multilingual Sentiments", |
|
"url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments", |
|
"n_languages": 12, |
|
"tasks": [ |
|
"sentiment_analysis" |
|
], |
|
"parallel": null |
|
}, |
|
{ |
|
"name": "CulturaX", |
|
"author": "Academic", |
|
"url": "https://huggingface.co/datasets/uonlp/CulturaX", |
|
"n_languages": 167, |
|
"tasks": [ |
|
"language_modeling" |
|
], |
|
"parallel": false |
|
}, |
|
{ |
|
"name": "T\u00fclu 3 SFT Mixture", |
|
"author": "AllenAI", |
|
"url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture", |
|
"n_languages": 70, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": false |
|
}, |
|
{ |
|
"name": "xP3", |
|
"author": "BigScience", |
|
"url": "https://huggingface.co/datasets/bigscience/xP3", |
|
"n_languages": 46, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": false |
|
}, |
|
{ |
|
"name": "Aya", |
|
"author": "Cohere", |
|
"url": "https://huggingface.co/datasets/CohereForAI/aya_dataset", |
|
"n_languages": 65, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": null |
|
}, |
|
{ |
|
"name": "Lanfrica", |
|
"author": "Lanfrica", |
|
"url": "https://lanfrica.com/records?language=yor&task=machine%20translation", |
|
"n_languages": 2200, |
|
"tasks": [ |
|
"datasets" |
|
], |
|
"parallel": null |
|
}, |
|
{ |
|
"name": "HuggingFace Languages", |
|
"author": "HuggingFace", |
|
"url": "https://huggingface.co/languages", |
|
"n_languages": 4680, |
|
"tasks": [ |
|
"datasets", |
|
"models" |
|
], |
|
"parallel": null |
|
}, |
|
{ |
|
"name": "HuggingFace Multilingual Datasets", |
|
"author": "HuggingFace", |
|
"url": "https://huggingface.co/datasets?other=multilinguality:multilingual", |
|
"n_languages": null, |
|
"tasks": [ |
|
"datasets" |
|
], |
|
"parallel": false |
|
} |
|
] |