zhuohan-7's picture
Upload folder using huggingface_hub
71dd565 verified
{
"builder_name": "common_voice_17_0",
"citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n",
"config_name": "ta",
"dataset_name": "common_voice_17_0",
"dataset_size": 221361139,
"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 20408 validated hours of speech in 124 languages, but more voices and languages are always added.",
"download_checksums": {
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/n_shards.json": {
"num_bytes": 17491,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/train/ta_train_0.tar": {
"num_bytes": 1598955520,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/train/ta_train_1.tar": {
"num_bytes": 224542720,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/dev/ta_dev_0.tar": {
"num_bytes": 434257920,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/test/ta_test_0.tar": {
"num_bytes": 454778880,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/other/ta_other_0.tar": {
"num_bytes": 1560514560,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/other/ta_other_1.tar": {
"num_bytes": 1515827200,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/other/ta_other_2.tar": {
"num_bytes": 495831040,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/invalidated/ta_invalidated_0.tar": {
"num_bytes": 231424000,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/validated/ta_validated_0.tar": {
"num_bytes": 1447434240,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/validated/ta_validated_1.tar": {
"num_bytes": 1530644480,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/validated/ta_validated_2.tar": {
"num_bytes": 1654978560,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/audio/ta/validated/ta_validated_3.tar": {
"num_bytes": 652861440,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/transcript/ta/train.tsv": {
"num_bytes": 19608830,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/transcript/ta/dev.tsv": {
"num_bytes": 5203704,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/transcript/ta/test.tsv": {
"num_bytes": 4944646,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/transcript/ta/other.tsv": {
"num_bytes": 39470943,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/transcript/ta/invalidated.tsv": {
"num_bytes": 2499761,
"checksum": null
},
"https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0/resolve/main/transcript/ta/validated.tsv": {
"num_bytes": 56763398,
"checksum": null
}
},
"download_size": 11930559333,
"features": {
"context": {
"sampling_rate": 16000,
"_type": "Audio"
},
"instruction": {
"dtype": "string",
"_type": "Value"
},
"answer": {
"dtype": "string",
"_type": "Value"
},
"audio_length": {
"dtype": "float64",
"_type": "Value"
},
"language": {
"dtype": "string",
"_type": "Value"
}
},
"homepage": "https://commonvoice.mozilla.org/en/datasets",
"license": "https://creativecommons.org/publicdomain/zero/1.0/",
"size_in_bytes": 12151920472,
"splits": {
"train": {
"name": "train",
"num_bytes": 33336098,
"num_examples": 45587,
"dataset_name": "common_voice_17_0"
},
"validation": {
"name": "validation",
"num_bytes": 8797317,
"num_examples": 12095,
"dataset_name": "common_voice_17_0"
},
"test": {
"name": "test",
"num_bytes": 8556167,
"num_examples": 12074,
"dataset_name": "common_voice_17_0"
},
"other": {
"name": "other",
"num_bytes": 67773267,
"num_examples": 93989,
"dataset_name": "common_voice_17_0"
},
"invalidated": {
"name": "invalidated",
"num_bytes": 4282268,
"num_examples": 5693,
"dataset_name": "common_voice_17_0"
},
"validated": {
"name": "validated",
"num_bytes": 98616022,
"num_examples": 135391,
"dataset_name": "common_voice_17_0"
}
},
"version": {
"version_str": "17.0.0",
"major": 17,
"minor": 0,
"patch": 0
}
}