zhuohan-7's picture
Upload folder using huggingface_hub
71dd565 verified
{
"builder_name": "common_voice_16_0",
"citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n",
"config_name": "zh-CN",
"dataset_name": "common_voice_16_0",
"dataset_size": 423983727,
"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 19159 validated hours of speech in 119 languages, but more voices and languages are always added.",
"download_checksums": {
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/n_shards.json": {
"num_bytes": 17487,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/train/zh-CN_train_0.tar": {
"num_bytes": 1157220864,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/dev/zh-CN_dev_0.tar": {
"num_bytes": 436442624,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/test/zh-CN_test_0.tar": {
"num_bytes": 506296320,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_0.tar": {
"num_bytes": 1252570624,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_1.tar": {
"num_bytes": 1216365056,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_2.tar": {
"num_bytes": 1057693696,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_3.tar": {
"num_bytes": 1037878784,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_4.tar": {
"num_bytes": 1006488064,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_5.tar": {
"num_bytes": 951297024,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_6.tar": {
"num_bytes": 1054305280,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_7.tar": {
"num_bytes": 1079122944,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_8.tar": {
"num_bytes": 1057605632,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_9.tar": {
"num_bytes": 1054744064,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_10.tar": {
"num_bytes": 1037184512,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_11.tar": {
"num_bytes": 1081821184,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_12.tar": {
"num_bytes": 1144596992,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_13.tar": {
"num_bytes": 1211527680,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_14.tar": {
"num_bytes": 1190928384,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_15.tar": {
"num_bytes": 299709952,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/invalidated/zh-CN_invalidated_0.tar": {
"num_bytes": 1000983552,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/invalidated/zh-CN_invalidated_1.tar": {
"num_bytes": 416471552,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/transcript/zh-CN/train.tsv": {
"num_bytes": 7373507,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/transcript/zh-CN/dev.tsv": {
"num_bytes": 2567399,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/transcript/zh-CN/test.tsv": {
"num_bytes": 2457920,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/transcript/zh-CN/other.tsv": {
"num_bytes": 137605043,
"checksum": null
},
"https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/transcript/zh-CN/invalidated.tsv": {
"num_bytes": 13622503,
"checksum": null
}
},
"download_size": 20414898643,
"features": {
"context": {
"sampling_rate": 16000,
"_type": "Audio"
},
"instruction": {
"dtype": "string",
"_type": "Value"
},
"answer": {
"dtype": "string",
"_type": "Value"
},
"audio_length": {
"dtype": "float64",
"_type": "Value"
},
"language": {
"dtype": "string",
"_type": "Value"
}
},
"homepage": "https://commonvoice.mozilla.org/en/datasets",
"license": "https://creativecommons.org/publicdomain/zero/1.0/",
"size_in_bytes": 20838882370,
"splits": {
"train": {
"name": "train",
"num_bytes": 17963235,
"num_examples": 29406,
"dataset_name": "common_voice_16_0"
},
"validation": {
"name": "validation",
"num_bytes": 6351483,
"num_examples": 10626,
"dataset_name": "common_voice_16_0"
},
"test": {
"name": "test",
"num_bytes": 6263265,
"num_examples": 10626,
"dataset_name": "common_voice_16_0"
},
"other": {
"name": "other",
"num_bytes": 358056452,
"num_examples": 610981,
"dataset_name": "common_voice_16_0"
},
"invalidated": {
"name": "invalidated",
"num_bytes": 35349292,
"num_examples": 58386,
"dataset_name": "common_voice_16_0"
}
},
"version": {
"version_str": "15.0.0",
"major": 15,
"minor": 0,
"patch": 0
}
}