evals-for-every-language / datasets.json
David Pomerenke
Add indic datasets
c1db7ba
raw
history blame
8.98 kB
[
{
"name": "FLORES+",
"author": "Meta",
"url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
"n_languages": 185,
"tasks": [
"translation",
"classification",
"language_modeling"
],
"parallel": true,
"base": "FLORES",
"implemented": true
},
{
"name": "FLEURS",
"author": "Meta",
"url": "https://huggingface.co/datasets/google/fleurs",
"n_languages": 102,
"tasks": [
"speech_recognition"
],
"parallel": true,
"base": "FLORES",
"implemented": true
},
{
"name": "CommonVoice",
"author": "Mozilla",
"url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_1_0",
"n_languages": 231,
"tasks": [
"speech_recognition"
],
"parallel": null
},
{
"name": "MMMLU",
"author": "OpenAI",
"url": "https://huggingface.co/datasets/openai/MMMLU",
"n_languages": "14",
"tasks": [
"question_answering"
],
"parallel": true,
"base": "MMLU"
},
{
"name": "AfriMMLU",
"author": "Masakhane",
"url": "https://huggingface.co/datasets/masakhane/afrimmlu",
"n_languages": "17",
"tasks": [
"question_answering"
],
"parallel": true,
"base": "MMLU"
},
{
"name": "Okapi MMLU",
"author": "Okapi",
"url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
"n_languages": 16,
"tasks": [
"question_answering"
],
"parallel": true,
"base": "MMLU"
},
{
"name": "Global MMLU",
"author": "Cohere",
"url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
"n_languages": 42,
"tasks": [
"question_answering"
],
"parallel": true,
"base": "MMLU"
},
{
"name": "MGSM",
"author": "Google",
"url": "https://huggingface.co/datasets/juletxara/mgsm",
"n_languages": 10,
"tasks": [
"math"
],
"parallel": true,
"base": "MGSM"
},
{
"name": "AfriMGSM",
"author": "Masakhane",
"url": "https://huggingface.co/datasets/masakhane/afrimgsm",
"n_languages": 18,
"tasks": [
"math"
],
"parallel": true,
"base": "MGSM"
},
{
"name": "Okapi ARC Challenge",
"author": "Okapi",
"url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
"n_languages": 31,
"tasks": [
"question_answering"
],
"parallel": true,
"base": "AI2 ARC"
},
{
"name": "Uhuru ARC Easy",
"author": "Masakhane",
"url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
"n_languages": 6,
"tasks": [
"question_answering"
],
"parallel": true,
"base": "AI2 ARC"
},
{
"name": "Okapi TruthfulQA",
"author": "Okapi",
"url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
"n_languages": 31,
"tasks": [
"question_answering"
],
"parallel": true,
"base": "TruthfulQA"
},
{
"name": "Uhura TruthfulQA",
"author": "Masakhane",
"url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
"n_languages": 6,
"tasks": [
"question_answering"
],
"parallel": true,
"base": "TruthfulQA"
},
{
"name": "XNLI",
"author": "Meta",
"url": "https://huggingface.co/datasets/facebook/xnli",
"n_languages": 14,
"tasks": [
"classification"
],
"parallel": true,
"base": "XNLI"
},
{
"name": "AfriXNLI",
"author": "Masakhane",
"url": "https://huggingface.co/datasets/masakhane/afrixnli",
"n_languages": 18,
"tasks": [
"classification"
],
"parallel": true,
"base": "XNLI"
},
{
"name": "IndicXNLI",
"author": "AI4Bharat",
"url": "https://huggingface.co/datasets/Divyanshu/indicxnli",
"n_languages": 11,
"tasks": [
"classification"
],
"parallel": true,
"base": "XNLI"
},
{
"name": "Okapi HellaSwag",
"author": "Okapi",
"url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
"n_languages": 31,
"tasks": [
"question_answering"
],
"parallel": true,
"base": "HellaSwag"
},
{
"name": "XTREME",
"author": "Google",
"url": "https://huggingface.co/datasets/google/xtreme",
"n_languages": 40,
"tasks": [
"translation",
"classification",
"question_answering",
"ner"
],
"parallel": null
},
{
"name": "IndicXTREME",
"author": "AI4Bharat",
"url": "https://huggingface.co/collections/ai4bharat/indicxtreme-66c59f576386ba2955650030",
"n_languages": 20,
"tasks": [
"translation",
"classification",
"question_answering",
"ner"
],
"parallel": null
},
{
"name": "XGLUE",
"author": "Microsoft",
"url": "https://huggingface.co/datasets/microsoft/xglue",
"n_languages": 18,
"tasks": [
"pos"
],
"parallel": null,
"base": "GLUE"
},
{
"name": "IndicGLUE",
"author": "AI4Bharat",
"url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
"n_languages": 11,
"tasks": [
"question_answering"
],
"parallel": null,
"base": "GLUE"
},
{
"name": "CCAligned",
"author": "Meta",
"url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
"n_languages": 137,
"tasks": [
"translation"
],
"parallel": false
},
{
"name": "Opus Gnome",
"author": "Helsinki NLP",
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
"n_languages": 187,
"tasks": [
"translation"
],
"parallel": true
},
{
"name": "Opus Paracrawl",
"author": "Helsinki NLP",
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
"n_languages": 43,
"tasks": [
"translation"
],
"parallel": false
},
{
"name": "OPUS Collection",
"author": "Helsinki NLP",
"url": "https://opus.nlpl.eu/",
"n_languages": 747,
"tasks": [
"translation"
],
"parallel": false
},
{
"name": "WikiANN / PAN-X",
"author": "Academic",
"url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
"n_languages": 176,
"tasks": [
"ner"
],
"parallel": false
},
{
"name": "MSVAMP",
"author": "Microsoft",
"url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
"n_languages": 10,
"tasks": [
"math"
],
"parallel": true
},
{
"name": "XLSUM",
"author": "Academic",
"url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
"n_languages": 45,
"tasks": [
"summarization"
],
"parallel": true
},
{
"name": "SEA-IFEVAL",
"author": "AI Singapore",
"url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
"n_languages": 7,
"tasks": [
"instruction_following"
],
"parallel": true,
"base": "IFEVAL"
},
{
"name": "MasakhaNER",
"author": "Masakhane",
"url": "https://huggingface.co/datasets/masakhane/masakhaner",
"n_languages": 10,
"tasks": [
"ner"
],
"parallel": null
},
{
"name": "Multilingual Sentiments",
"url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
"n_languages": 12,
"tasks": [
"sentiment_analysis"
],
"parallel": null
},
{
"name": "CulturaX",
"author": "Academic",
"url": "https://huggingface.co/datasets/uonlp/CulturaX",
"n_languages": 167,
"tasks": [
"language_modeling"
],
"parallel": false
},
{
"name": "T\u00fclu 3 SFT Mixture",
"author": "AllenAI",
"url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
"n_languages": 70,
"tasks": [
"instruction_following"
],
"parallel": false
},
{
"name": "xP3",
"author": "BigScience",
"url": "https://huggingface.co/datasets/bigscience/xP3",
"n_languages": 46,
"tasks": [
"instruction_following"
],
"parallel": false
},
{
"name": "Aya",
"author": "Cohere",
"url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
"n_languages": 65,
"tasks": [
"instruction_following"
],
"parallel": null
},
{
"name": "Lanfrica",
"author": "Lanfrica",
"url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
"n_languages": 2200,
"tasks": [
"datasets"
],
"parallel": null
},
{
"name": "HuggingFace Languages",
"author": "HuggingFace",
"url": "https://huggingface.co/languages",
"n_languages": 4680,
"tasks": [
"datasets",
"models"
],
"parallel": null
},
{
"name": "HuggingFace Multilingual Datasets",
"author": "HuggingFace",
"url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
"n_languages": null,
"tasks": [
"datasets"
],
"parallel": false
}
]