{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"content": "UNK", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 1, | |
"content": "PAD", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 2, | |
"content": "WORD_BOUNDARY", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 3, | |
"content": "UTT_BOUNDARY", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": { | |
"type": "Sequence", | |
"normalizers": [ | |
{ | |
"type": "Strip", | |
"strip_left": true, | |
"strip_right": true | |
} | |
] | |
}, | |
"pre_tokenizer": { | |
"type": "WhitespaceSplit" | |
}, | |
"post_processor": { | |
"type": "TemplateProcessing", | |
"single": [ | |
{ | |
"SpecialToken": { | |
"id": "UTT_BOUNDARY", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "A", | |
"type_id": 0 | |
} | |
} | |
], | |
"pair": [ | |
{ | |
"SpecialToken": { | |
"id": "UTT_BOUNDARY", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "A", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"SpecialToken": { | |
"id": "UTT_BOUNDARY", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "B", | |
"type_id": 1 | |
} | |
} | |
], | |
"special_tokens": { | |
"UTT_BOUNDARY": { | |
"id": "UTT_BOUNDARY", | |
"ids": [ | |
3 | |
], | |
"tokens": [ | |
"UTT_BOUNDARY" | |
] | |
} | |
} | |
}, | |
"decoder": null, | |
"model": { | |
"type": "WordLevel", | |
"vocab": { | |
"UNK": 0, | |
"PAD": 1, | |
"WORD_BOUNDARY": 2, | |
"UTT_BOUNDARY": 3, | |
"j": 4, | |
"ɐ˥": 5, | |
"t": 6, | |
"k": 7, | |
"ɐu˧˥": 8, | |
"i˨": 9, | |
"n": 10, | |
"i˧˩̰": 11, | |
"y˨": 12, | |
"s": 13, | |
"ɐ˨": 14, | |
"p": 15, | |
"ts": 16, | |
"ɐu˥": 17, | |
"ɪ̞˧˥": 18, | |
"ŋ": 19, | |
"ɵ˧": 20, | |
"a̞˧": 21, | |
"l": 22, | |
"ʊ̟˥": 23, | |
"a̞˧˩̰": 24, | |
"ɛ˥": 25, | |
"ei˩˧": 26, | |
"w": 27, | |
"a̞˨": 28, | |
"ɐi˧˥": 29, | |
"a̞˧˥": 30, | |
"m̩˧˥": 31, | |
"m": 32, | |
"ou˥": 33, | |
"ei˥": 34, | |
"i˧": 35, | |
"ɔ̽˧˥": 36, | |
"tʰ": 37, | |
"i˥": 38, | |
"f": 39, | |
"aːĭ˧": 40, | |
"h": 41, | |
"ɵy˧": 42, | |
"a̞˥": 43, | |
"ei˧˩̰": 44, | |
"ou˨": 45, | |
"ɔ̽˧": 46, | |
"ɐi˧˩̰": 47, | |
"u˧": 48, | |
"ɔːĭ˥": 49, | |
"ɐu˨": 50, | |
"ei˧˥": 51, | |
"ɐi˨": 52, | |
"ʊ̟˧˩̰": 53, | |
"ʊ̟˨": 54, | |
"a̞˩˧": 55, | |
"ou˧˥": 56, | |
"aːĭ˧˥": 57, | |
"ɔ̽˨": 58, | |
"ɛ˩˧": 59, | |
"ɪ̞˨": 60, | |
"iːŭ˧": 61, | |
"ɛ˧˩̰": 62, | |
"m̩˧˩̰": 63, | |
"ɵ˧˥": 64, | |
"ei˧": 65, | |
"ɐu˧˩̰": 66, | |
"m̩˧": 67, | |
"ɐ˧˥": 68, | |
"ɐu˩˧": 69, | |
"ɐi˥": 70, | |
"ɔ̽˥": 71, | |
"ɔ̽˧˩̰": 72, | |
"ɔːĭ˧": 73, | |
"ou˩˧": 74, | |
"m̩˥": 75, | |
"ɐ˧": 76, | |
"tsʰ": 77, | |
"ɛ˧˥": 78, | |
"i˧˥": 79, | |
"ɔ̽˩˧": 80, | |
"kʰ": 81, | |
"ɐ˧˩̰": 82, | |
"aːŭ˧˥": 83, | |
"pʰ": 84, | |
"aːĭ˧˩̰": 85, | |
"ɵy˩˧": 86, | |
"ɛ˧": 87, | |
"u˧˥": 88, | |
"ɛ˨": 89, | |
"ʊ̟˧": 90, | |
"u˥": 91, | |
"m̩˩˧": 92, | |
"aːŭ˧": 93, | |
"œ̞˩˧": 94, | |
"i˩˧": 95, | |
"ɪ̞˧˩̰": 96, | |
"u˨": 97, | |
"ɪ̞˥": 98, | |
"iːŭ˧˩̰": 99, | |
"œ̞˧˥": 100, | |
"y˧": 101, | |
"uːĭ˩˧": 102, | |
"uːĭ˥": 103, | |
"ɵy˧˥": 104, | |
"y˧˩̰": 105, | |
"ɔːĭ˧˥": 106, | |
"ɛ": 107, | |
"ou˧": 108, | |
"ei˨": 109, | |
"ɵ˥": 110, | |
"u˧˩̰": 111, | |
"y˥": 112, | |
"œ̞˥": 113, | |
"œ̞˧˩̰": 114, | |
"aːĭ˨": 115, | |
"ɐ˩˧": 116, | |
"œ̞˧": 117, | |
"uːĭ˧˥": 118, | |
"ɐu˧": 119, | |
"ɐi˩˧": 120, | |
"ɐi˧": 121, | |
"ou˧˩̰": 122, | |
"aːĭ˥": 123, | |
"aːŭ˥": 124, | |
"ŋ˩˧": 125, | |
"y˧˥": 126, | |
"iːŭ˥": 127, | |
"ɔːĭ˨": 128, | |
"ʊ̟˧˥": 129, | |
"iːŭ˧˥": 130, | |
"ɵy˥": 131, | |
"ɔːĭ˧˩̰": 132, | |
"uːĭ˧": 133, | |
"ɵy˧˩̰": 134, | |
"œ̞˨": 135, | |
"m̩˨": 136, | |
"aːŭ˧˩̰": 137, | |
"y˩˧": 138, | |
"aːŭ˩˧": 139, | |
"aːĭ˩˧": 140, | |
"uːĭ˨": 141, | |
"ɵy˨": 142, | |
"aːŭ˨": 143, | |
"ɪ̞˧": 144, | |
"ɵ˨": 145, | |
"iːŭ˩˧": 146, | |
"iːŭ˨": 147, | |
"ɵ˧˩̰": 148, | |
"uːĭ˧˩̰": 149, | |
"u˩˧": 150, | |
"ŋ˧˩̰": 151 | |
}, | |
"unk_token": "UNK" | |
} | |
} |