{ "builder_name": "common_voice_16_0", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "config_name": "zh-CN", "dataset_name": "common_voice_16_0", "dataset_size": 423983727, "description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 19159 validated hours of speech in 119 languages, but more voices and languages are always added.", "download_checksums": { "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/n_shards.json": { "num_bytes": 17487, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/train/zh-CN_train_0.tar": { "num_bytes": 1157220864, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/dev/zh-CN_dev_0.tar": { "num_bytes": 436442624, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/test/zh-CN_test_0.tar": { "num_bytes": 506296320, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_0.tar": { "num_bytes": 1252570624, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_1.tar": { "num_bytes": 1216365056, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_2.tar": { "num_bytes": 1057693696, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_3.tar": { "num_bytes": 1037878784, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_4.tar": { "num_bytes": 1006488064, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_5.tar": { "num_bytes": 951297024, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_6.tar": { "num_bytes": 1054305280, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_7.tar": { "num_bytes": 1079122944, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_8.tar": { "num_bytes": 1057605632, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_9.tar": { "num_bytes": 1054744064, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_10.tar": { "num_bytes": 1037184512, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_11.tar": { "num_bytes": 1081821184, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_12.tar": { "num_bytes": 1144596992, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_13.tar": { "num_bytes": 1211527680, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_14.tar": { "num_bytes": 1190928384, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/other/zh-CN_other_15.tar": { "num_bytes": 299709952, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/invalidated/zh-CN_invalidated_0.tar": { "num_bytes": 1000983552, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/audio/zh-CN/invalidated/zh-CN_invalidated_1.tar": { "num_bytes": 416471552, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/transcript/zh-CN/train.tsv": { "num_bytes": 7373507, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/transcript/zh-CN/dev.tsv": { "num_bytes": 2567399, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/transcript/zh-CN/test.tsv": { "num_bytes": 2457920, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/transcript/zh-CN/other.tsv": { "num_bytes": 137605043, "checksum": null }, "https://huggingface.co/datasets/fsicoli/common_voice_16_0/resolve/main/transcript/zh-CN/invalidated.tsv": { "num_bytes": 13622503, "checksum": null } }, "download_size": 20414898643, "features": { "context": { "sampling_rate": 16000, "_type": "Audio" }, "instruction": { "dtype": "string", "_type": "Value" }, "answer": { "dtype": "string", "_type": "Value" }, "audio_length": { "dtype": "float64", "_type": "Value" }, "language": { "dtype": "string", "_type": "Value" } }, "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://creativecommons.org/publicdomain/zero/1.0/", "size_in_bytes": 20838882370, "splits": { "train": { "name": "train", "num_bytes": 17963235, "num_examples": 29406, "dataset_name": "common_voice_16_0" }, "validation": { "name": "validation", "num_bytes": 6351483, "num_examples": 10626, "dataset_name": "common_voice_16_0" }, "test": { "name": "test", "num_bytes": 6263265, "num_examples": 10626, "dataset_name": "common_voice_16_0" }, "other": { "name": "other", "num_bytes": 358056452, "num_examples": 610981, "dataset_name": "common_voice_16_0" }, "invalidated": { "name": "invalidated", "num_bytes": 35349292, "num_examples": 58386, "dataset_name": "common_voice_16_0" } }, "version": { "version_str": "15.0.0", "major": 15, "minor": 0, "patch": 0 } }