{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "UNK", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "PAD", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "WORD_BOUNDARY", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "UTT_BOUNDARY", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Strip", "strip_left": true, "strip_right": true } ] }, "pre_tokenizer": { "type": "WhitespaceSplit" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "UTT_BOUNDARY", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "UTT_BOUNDARY", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "UTT_BOUNDARY", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "UTT_BOUNDARY": { "id": "UTT_BOUNDARY", "ids": [ 3 ], "tokens": [ "UTT_BOUNDARY" ] } } }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "UNK": 0, "PAD": 1, "WORD_BOUNDARY": 2, "UTT_BOUNDARY": 3, "j": 4, "ɐ˥": 5, "t": 6, "k": 7, "ɐu˧˥": 8, "i˨": 9, "n": 10, "i˧˩̰": 11, "y˨": 12, "s": 13, "ɐ˨": 14, "p": 15, "ts": 16, "ɐu˥": 17, "ɪ̞˧˥": 18, "ŋ": 19, "ɵ˧": 20, "a̞˧": 21, "l": 22, "ʊ̟˥": 23, "a̞˧˩̰": 24, "ɛ˥": 25, "ei˩˧": 26, "w": 27, "a̞˨": 28, "ɐi˧˥": 29, "a̞˧˥": 30, "m̩˧˥": 31, "m": 32, "ou˥": 33, "ei˥": 34, "i˧": 35, "ɔ̽˧˥": 36, "tʰ": 37, "i˥": 38, "f": 39, "aːĭ˧": 40, "h": 41, "ɵy˧": 42, "a̞˥": 43, "ei˧˩̰": 44, "ou˨": 45, "ɔ̽˧": 46, "ɐi˧˩̰": 47, "u˧": 48, "ɔːĭ˥": 49, "ɐu˨": 50, "ei˧˥": 51, "ɐi˨": 52, "ʊ̟˧˩̰": 53, "ʊ̟˨": 54, "a̞˩˧": 55, "ou˧˥": 56, "aːĭ˧˥": 57, "ɔ̽˨": 58, "ɛ˩˧": 59, "ɪ̞˨": 60, "iːŭ˧": 61, "ɛ˧˩̰": 62, "m̩˧˩̰": 63, "ɵ˧˥": 64, "ei˧": 65, "ɐu˧˩̰": 66, "m̩˧": 67, "ɐ˧˥": 68, "ɐu˩˧": 69, "ɐi˥": 70, "ɔ̽˥": 71, "ɔ̽˧˩̰": 72, "ɔːĭ˧": 73, "ou˩˧": 74, "m̩˥": 75, "ɐ˧": 76, "tsʰ": 77, "ɛ˧˥": 78, "i˧˥": 79, "ɔ̽˩˧": 80, "kʰ": 81, "ɐ˧˩̰": 82, "aːŭ˧˥": 83, "pʰ": 84, "aːĭ˧˩̰": 85, "ɵy˩˧": 86, "ɛ˧": 87, "u˧˥": 88, "ɛ˨": 89, "ʊ̟˧": 90, "u˥": 91, "m̩˩˧": 92, "aːŭ˧": 93, "œ̞˩˧": 94, "i˩˧": 95, "ɪ̞˧˩̰": 96, "u˨": 97, "ɪ̞˥": 98, "iːŭ˧˩̰": 99, "œ̞˧˥": 100, "y˧": 101, "uːĭ˩˧": 102, "uːĭ˥": 103, "ɵy˧˥": 104, "y˧˩̰": 105, "ɔːĭ˧˥": 106, "ɛ": 107, "ou˧": 108, "ei˨": 109, "ɵ˥": 110, "u˧˩̰": 111, "y˥": 112, "œ̞˥": 113, "œ̞˧˩̰": 114, "aːĭ˨": 115, "ɐ˩˧": 116, "œ̞˧": 117, "uːĭ˧˥": 118, "ɐu˧": 119, "ɐi˩˧": 120, "ɐi˧": 121, "ou˧˩̰": 122, "aːĭ˥": 123, "aːŭ˥": 124, "ŋ˩˧": 125, "y˧˥": 126, "iːŭ˥": 127, "ɔːĭ˨": 128, "ʊ̟˧˥": 129, "iːŭ˧˥": 130, "ɵy˥": 131, "ɔːĭ˧˩̰": 132, "uːĭ˧": 133, "ɵy˧˩̰": 134, "œ̞˨": 135, "m̩˨": 136, "aːŭ˧˩̰": 137, "y˩˧": 138, "aːŭ˩˧": 139, "aːĭ˩˧": 140, "uːĭ˨": 141, "ɵy˨": 142, "aːŭ˨": 143, "ɪ̞˧": 144, "ɵ˨": 145, "iːŭ˩˧": 146, "iːŭ˨": 147, "ɵ˧˩̰": 148, "uːĭ˧˩̰": 149, "u˩˧": 150, "ŋ˧˩̰": 151 }, "unk_token": "UNK" } }