|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"special": true, |
|
"content": "[STOP]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false |
|
}, |
|
{ |
|
"id": 1, |
|
"special": true, |
|
"content": "[UNK]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false |
|
}, |
|
{ |
|
"id": 2, |
|
"special": true, |
|
"content": "[SPACE]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false |
|
}, |
|
{ |
|
"id": 255, |
|
"special": true, |
|
"content": "[START]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false |
|
}, |
|
{ |
|
"id": 604, |
|
"content": "[UH]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 605, |
|
"content": "[UM]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 606, |
|
"content": "[giggle]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 607, |
|
"content": "[laughter]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 608, |
|
"content": "[guffaw]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 609, |
|
"content": "[inhale]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 610, |
|
"content": "[exhale]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 611, |
|
"content": "[sigh]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 612, |
|
"content": "[cry]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 613, |
|
"content": "[bark]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 614, |
|
"content": "[howl]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 615, |
|
"content": "[meow]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 616, |
|
"content": "[singing]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 617, |
|
"content": "[music]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 618, |
|
"content": "[whistle]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 619, |
|
"content": "[humming]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 620, |
|
"content": "[gasp]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 621, |
|
"content": "[groan]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 622, |
|
"content": "[whisper]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 623, |
|
"content": "[mumble]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 624, |
|
"content": "[sniff]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 625, |
|
"content": "[sneeze]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 626, |
|
"content": "[cough]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 627, |
|
"content": "[snore]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 628, |
|
"content": "[chew]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 629, |
|
"content": "[sip]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 630, |
|
"content": "[clear_throat]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 631, |
|
"content": "[kiss]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 632, |
|
"content": "[shhh]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 633, |
|
"content": "[gibberish]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 634, |
|
"content": "[fr]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 635, |
|
"content": "[es]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 636, |
|
"content": "[de]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 637, |
|
"content": "[it]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 638, |
|
"content": "[ipa]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 639, |
|
"content": "[end_of_label]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 695, |
|
"content": "[PLACEHOLDER55]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 696, |
|
"content": "[PLACEHOLDER56]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 697, |
|
"content": "[PLACEHOLDER57]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 698, |
|
"content": "[PLACEHOLDER58]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 699, |
|
"content": "[PLACEHOLDER59]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 700, |
|
"content": "[PLACEHOLDER60]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 701, |
|
"content": "[PLACEHOLDER61]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 702, |
|
"content": "[PLACEHOLDER62]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 703, |
|
"content": "[PLACEHOLDER63]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": { |
|
"type": "Whitespace" |
|
}, |
|
"post_processor": { |
|
"type": "TemplateProcessing", |
|
"single": [ |
|
{ "SpecialToken": { "id": "BOS", "type_id": 0 } }, |
|
{ "Sequence": { "id": "A", "type_id": 0 } }, |
|
{ "SpecialToken": { "id": "EOS", "type_id": 0 } } |
|
], |
|
"pair": [ |
|
{ "SpecialToken": { "id": "BOS", "type_id": 0 } }, |
|
{ "Sequence": { "id": "A", "type_id": 0 } }, |
|
{ "SpecialToken": { "id": "EOS", "type_id": 0 } }, |
|
{ "SpecialToken": { "id": "BOS", "type_id": 1 } }, |
|
{ "Sequence": { "id": "B", "type_id": 1 } }, |
|
{ "SpecialToken": { "id": "EOS", "type_id": 1 } } |
|
], |
|
"special_tokens": { |
|
"BOS": { |
|
"id": "BOS", |
|
"ids": [255], |
|
"tokens": ["<s>"] |
|
}, |
|
"EOS": { |
|
"id": "EOS", |
|
"ids": [0], |
|
"tokens": ["</s>"] |
|
} |
|
} |
|
}, |
|
"decoder": null, |
|
"model": { |
|
"type": "BPE", |
|
"dropout": null, |
|
"unk_token": "[UNK]", |
|
"continuing_subword_prefix": null, |
|
"end_of_word_suffix": null, |
|
"fuse_unk": false, |
|
"vocab": { |
|
"[STOP]": 0, |
|
"[UNK]": 1, |
|
"[SPACE]": 2, |
|
"!": 3, |
|
"'": 4, |
|
"(": 5, |
|
")": 6, |
|
",": 7, |
|
"-": 8, |
|
".": 9, |
|
"/": 10, |
|
":": 11, |
|
";": 12, |
|
"?": 13, |
|
"a": 14, |
|
"b": 15, |
|
"c": 16, |
|
"d": 17, |
|
"e": 18, |
|
"f": 19, |
|
"g": 20, |
|
"h": 21, |
|
"i": 22, |
|
"j": 23, |
|
"k": 24, |
|
"l": 25, |
|
"m": 26, |
|
"n": 27, |
|
"o": 28, |
|
"p": 29, |
|
"q": 30, |
|
"r": 31, |
|
"s": 32, |
|
"t": 33, |
|
"u": 34, |
|
"v": 35, |
|
"w": 36, |
|
"x": 37, |
|
"y": 38, |
|
"z": 39, |
|
"th": 40, |
|
"in": 41, |
|
"the": 42, |
|
"an": 43, |
|
"er": 44, |
|
"ou": 45, |
|
"re": 46, |
|
"on": 47, |
|
"at": 48, |
|
"ed": 49, |
|
"en": 50, |
|
"to": 51, |
|
"ing": 52, |
|
"and": 53, |
|
"is": 54, |
|
"as": 55, |
|
"al": 56, |
|
"or": 57, |
|
"of": 58, |
|
"ar": 59, |
|
"it": 60, |
|
"es": 61, |
|
"he": 62, |
|
"st": 63, |
|
"le": 64, |
|
"om": 65, |
|
"se": 66, |
|
"be": 67, |
|
"ad": 68, |
|
"ow": 69, |
|
"ly": 70, |
|
"ch": 71, |
|
"wh": 72, |
|
"that": 73, |
|
"you": 74, |
|
"li": 75, |
|
"ve": 76, |
|
"ac": 77, |
|
"ti": 78, |
|
"ld": 79, |
|
"me": 80, |
|
"was": 81, |
|
"gh": 82, |
|
"id": 83, |
|
"ll": 84, |
|
"wi": 85, |
|
"ent": 86, |
|
"for": 87, |
|
"ay": 88, |
|
"ro": 89, |
|
"ver": 90, |
|
"ic": 91, |
|
"her": 92, |
|
"ke": 93, |
|
"his": 94, |
|
"no": 95, |
|
"ut": 96, |
|
"un": 97, |
|
"ir": 98, |
|
"lo": 99, |
|
"we": 100, |
|
"ri": 101, |
|
"ha": 102, |
|
"with": 103, |
|
"ght": 104, |
|
"out": 105, |
|
"im": 106, |
|
"ion": 107, |
|
"all": 108, |
|
"ab": 109, |
|
"one": 110, |
|
"ne": 111, |
|
"ge": 112, |
|
"ould": 113, |
|
"ter": 114, |
|
"mo": 115, |
|
"had": 116, |
|
"ce": 117, |
|
"she": 118, |
|
"go": 119, |
|
"sh": 120, |
|
"ur": 121, |
|
"am": 122, |
|
"so": 123, |
|
"pe": 124, |
|
"my": 125, |
|
"de": 126, |
|
"are": 127, |
|
"but": 128, |
|
"ome": 129, |
|
"fr": 130, |
|
"ther": 131, |
|
"fe": 132, |
|
"su": 133, |
|
"do": 134, |
|
"con": 135, |
|
"te": 136, |
|
"ain": 137, |
|
"ere": 138, |
|
"po": 139, |
|
"if": 140, |
|
"they": 141, |
|
"us": 142, |
|
"ag": 143, |
|
"tr": 144, |
|
"now": 145, |
|
"oun": 146, |
|
"this": 147, |
|
"have": 148, |
|
"not": 149, |
|
"sa": 150, |
|
"il": 151, |
|
"up": 152, |
|
"thing": 153, |
|
"from": 154, |
|
"ap": 155, |
|
"him": 156, |
|
"ack": 157, |
|
"ation": 158, |
|
"ant": 159, |
|
"our": 160, |
|
"op": 161, |
|
"like": 162, |
|
"ust": 163, |
|
"ess": 164, |
|
"bo": 165, |
|
"ok": 166, |
|
"ul": 167, |
|
"ind": 168, |
|
"ex": 169, |
|
"com": 170, |
|
"some": 171, |
|
"there": 172, |
|
"ers": 173, |
|
"co": 174, |
|
"res": 175, |
|
"man": 176, |
|
"ard": 177, |
|
"pl": 178, |
|
"wor": 179, |
|
"way": 180, |
|
"tion": 181, |
|
"fo": 182, |
|
"ca": 183, |
|
"were": 184, |
|
"by": 185, |
|
"ate": 186, |
|
"pro": 187, |
|
"ted": 188, |
|
"ound": 189, |
|
"own": 190, |
|
"would": 191, |
|
"ts": 192, |
|
"what": 193, |
|
"qu": 194, |
|
"ally": 195, |
|
"ight": 196, |
|
"ck": 197, |
|
"gr": 198, |
|
"when": 199, |
|
"ven": 200, |
|
"can": 201, |
|
"ough": 202, |
|
"ine": 203, |
|
"end": 204, |
|
"per": 205, |
|
"ous": 206, |
|
"od": 207, |
|
"ide": 208, |
|
"know": 209, |
|
"ty": 210, |
|
"very": 211, |
|
"si": 212, |
|
"ak": 213, |
|
"who": 214, |
|
"about": 215, |
|
"ill": 216, |
|
"them": 217, |
|
"est": 218, |
|
"red": 219, |
|
"ye": 220, |
|
"could": 221, |
|
"ong": 222, |
|
"your": 223, |
|
"their": 224, |
|
"em": 225, |
|
"just": 226, |
|
"other": 227, |
|
"into": 228, |
|
"any": 229, |
|
"whi": 230, |
|
"um": 231, |
|
"tw": 232, |
|
"ast": 233, |
|
"der": 234, |
|
"did": 235, |
|
"ie": 236, |
|
"been": 237, |
|
"ace": 238, |
|
"ink": 239, |
|
"ity": 240, |
|
"back": 241, |
|
"ting": 242, |
|
"br": 243, |
|
"more": 244, |
|
"ake": 245, |
|
"pp": 246, |
|
"then": 247, |
|
"sp": 248, |
|
"el": 249, |
|
"use": 250, |
|
"bl": 251, |
|
"said": 252, |
|
"over": 253, |
|
"get": 254, |
|
"[START]": 255, |
|
"\"": 256, |
|
"#": 257, |
|
"$": 258, |
|
"%": 259, |
|
"&": 260, |
|
"*": 261, |
|
"+": 262, |
|
"0": 263, |
|
"1": 264, |
|
"2": 265, |
|
"3": 266, |
|
"4": 267, |
|
"5": 268, |
|
"6": 269, |
|
"7": 270, |
|
"8": 271, |
|
"9": 272, |
|
"<": 273, |
|
"=": 274, |
|
">": 275, |
|
"@": 276, |
|
"A": 277, |
|
"B": 278, |
|
"C": 279, |
|
"D": 280, |
|
"E": 281, |
|
"F": 282, |
|
"G": 283, |
|
"H": 284, |
|
"I": 285, |
|
"J": 286, |
|
"K": 287, |
|
"L": 288, |
|
"M": 289, |
|
"N": 290, |
|
"O": 291, |
|
"P": 292, |
|
"Q": 293, |
|
"R": 294, |
|
"S": 295, |
|
"T": 296, |
|
"U": 297, |
|
"V": 298, |
|
"W": 299, |
|
"X": 300, |
|
"Y": 301, |
|
"Z": 302, |
|
"[": 303, |
|
"\\": 304, |
|
"]": 305, |
|
"^": 306, |
|
"_": 307, |
|
"`": 308, |
|
"{": 309, |
|
"|": 310, |
|
"}": 311, |
|
"~": 312, |
|
"‐": 313, |
|
"‑": 314, |
|
"‒": 315, |
|
"–": 316, |
|
"—": 317, |
|
"―": 318, |
|
"‖": 319, |
|
"‗": 320, |
|
"‘": 321, |
|
"’": 322, |
|
"‚": 323, |
|
"‛": 324, |
|
"“": 325, |
|
"”": 326, |
|
"„": 327, |
|
"‟": 328, |
|
" ": 329, |
|
"¡": 330, |
|
"¢": 331, |
|
"£": 332, |
|
"¤": 333, |
|
"¥": 334, |
|
"¦": 335, |
|
"§": 336, |
|
"¨": 337, |
|
"©": 338, |
|
"ª": 339, |
|
"«": 340, |
|
"¬": 341, |
|
"": 342, |
|
"®": 343, |
|
"¯": 344, |
|
"°": 345, |
|
"±": 346, |
|
"²": 347, |
|
"³": 348, |
|
"´": 349, |
|
"µ": 350, |
|
"¶": 351, |
|
"·": 352, |
|
"¸": 353, |
|
"¹": 354, |
|
"º": 355, |
|
"»": 356, |
|
"¼": 357, |
|
"½": 358, |
|
"¾": 359, |
|
"¿": 360, |
|
"À": 361, |
|
"Á": 362, |
|
"Â": 363, |
|
"Ã": 364, |
|
"Ä": 365, |
|
"Å": 366, |
|
"Æ": 367, |
|
"Ç": 368, |
|
"È": 369, |
|
"É": 370, |
|
"Ê": 371, |
|
"Ë": 372, |
|
"Ì": 373, |
|
"Í": 374, |
|
"Î": 375, |
|
"Ï": 376, |
|
"Ð": 377, |
|
"Ñ": 378, |
|
"Ò": 379, |
|
"Ó": 380, |
|
"Ô": 381, |
|
"Õ": 382, |
|
"Ö": 383, |
|
"×": 384, |
|
"Ø": 385, |
|
"Ù": 386, |
|
"Ú": 387, |
|
"Û": 388, |
|
"Ü": 389, |
|
"Ý": 390, |
|
"Þ": 391, |
|
"ß": 392, |
|
"à": 393, |
|
"á": 394, |
|
"â": 395, |
|
"ã": 396, |
|
"ä": 397, |
|
"å": 398, |
|
"æ": 399, |
|
"ç": 400, |
|
"è": 401, |
|
"é": 402, |
|
"ê": 403, |
|
"ë": 404, |
|
"ì": 405, |
|
"í": 406, |
|
"î": 407, |
|
"ï": 408, |
|
"ð": 409, |
|
"ñ": 410, |
|
"ò": 411, |
|
"ó": 412, |
|
"ô": 413, |
|
"õ": 414, |
|
"ö": 415, |
|
"÷": 416, |
|
"ø": 417, |
|
"ù": 418, |
|
"ú": 419, |
|
"û": 420, |
|
"ü": 421, |
|
"ý": 422, |
|
"þ": 423, |
|
"ÿ": 424, |
|
"ɐ": 425, |
|
"ɑ": 426, |
|
"ɒ": 427, |
|
"ɓ": 428, |
|
"ɔ": 429, |
|
"ɕ": 430, |
|
"ɖ": 431, |
|
"ɗ": 432, |
|
"ɘ": 433, |
|
"ə": 434, |
|
"ɚ": 435, |
|
"ɛ": 436, |
|
"ɜ": 437, |
|
"ɝ": 438, |
|
"ɞ": 439, |
|
"ɟ": 440, |
|
"ɠ": 441, |
|
"ɡ": 442, |
|
"ɢ": 443, |
|
"ɣ": 444, |
|
"ɤ": 445, |
|
"ɥ": 446, |
|
"ɦ": 447, |
|
"ɧ": 448, |
|
"ɨ": 449, |
|
"ɩ": 450, |
|
"ɪ": 451, |
|
"ɫ": 452, |
|
"ɬ": 453, |
|
"ɭ": 454, |
|
"ɮ": 455, |
|
"ɯ": 456, |
|
"ɰ": 457, |
|
"ɱ": 458, |
|
"ɲ": 459, |
|
"ɳ": 460, |
|
"ɴ": 461, |
|
"ɵ": 462, |
|
"ɶ": 463, |
|
"ɷ": 464, |
|
"ɸ": 465, |
|
"ɹ": 466, |
|
"ɺ": 467, |
|
"ɻ": 468, |
|
"ɼ": 469, |
|
"ɽ": 470, |
|
"ɾ": 471, |
|
"ɿ": 472, |
|
"ʀ": 473, |
|
"ʁ": 474, |
|
"ʂ": 475, |
|
"ʃ": 476, |
|
"ʄ": 477, |
|
"ʅ": 478, |
|
"ʆ": 479, |
|
"ʇ": 480, |
|
"ʈ": 481, |
|
"ʉ": 482, |
|
"ʊ": 483, |
|
"ʋ": 484, |
|
"ʌ": 485, |
|
"ʍ": 486, |
|
"ʎ": 487, |
|
"ʏ": 488, |
|
"ʐ": 489, |
|
"ʑ": 490, |
|
"ʒ": 491, |
|
"ʓ": 492, |
|
"ʔ": 493, |
|
"ʕ": 494, |
|
"ʖ": 495, |
|
"ʗ": 496, |
|
"ʘ": 497, |
|
"ʙ": 498, |
|
"ʚ": 499, |
|
"ʛ": 500, |
|
"ʜ": 501, |
|
"ʝ": 502, |
|
"ʞ": 503, |
|
"ʟ": 504, |
|
"ʠ": 505, |
|
"ʡ": 506, |
|
"ʢ": 507, |
|
"ʣ": 508, |
|
"ʤ": 509, |
|
"ʥ": 510, |
|
"ʦ": 511, |
|
"ʧ": 512, |
|
"ʨ": 513, |
|
"ʩ": 514, |
|
"ʪ": 515, |
|
"ʫ": 516, |
|
"ʬ": 517, |
|
"ʭ": 518, |
|
"ʮ": 519, |
|
"ʯ": 520, |
|
"ʰ": 521, |
|
"ʱ": 522, |
|
"ʲ": 523, |
|
"ʳ": 524, |
|
"ʴ": 525, |
|
"ʵ": 526, |
|
"ʶ": 527, |
|
"ʷ": 528, |
|
"ʸ": 529, |
|
"ʹ": 530, |
|
"ʺ": 531, |
|
"ʻ": 532, |
|
"ʼ": 533, |
|
"ʽ": 534, |
|
"ʾ": 535, |
|
"ʿ": 536, |
|
"ˀ": 537, |
|
"ˁ": 538, |
|
"˂": 539, |
|
"˃": 540, |
|
"˄": 541, |
|
"˅": 542, |
|
"ˆ": 543, |
|
"ˇ": 544, |
|
"ˈ": 545, |
|
"ˉ": 546, |
|
"ˊ": 547, |
|
"ˋ": 548, |
|
"ˌ": 549, |
|
"ˍ": 550, |
|
"ˎ": 551, |
|
"ˏ": 552, |
|
"ː": 553, |
|
"ˑ": 554, |
|
"˒": 555, |
|
"˓": 556, |
|
"˔": 557, |
|
"˕": 558, |
|
"˖": 559, |
|
"˗": 560, |
|
"˘": 561, |
|
"˙": 562, |
|
"˚": 563, |
|
"˛": 564, |
|
"˜": 565, |
|
"˝": 566, |
|
"˞": 567, |
|
"˟": 568, |
|
"ˠ": 569, |
|
"ˡ": 570, |
|
"ˢ": 571, |
|
"ˣ": 572, |
|
"ˤ": 573, |
|
"˥": 574, |
|
"˦": 575, |
|
"˧": 576, |
|
"˨": 577, |
|
"˩": 578, |
|
"˪": 579, |
|
"˫": 580, |
|
"ˬ": 581, |
|
"˭": 582, |
|
"ˮ": 583, |
|
"˯": 584, |
|
"˰": 585, |
|
"˱": 586, |
|
"˲": 587, |
|
"˳": 588, |
|
"˴": 589, |
|
"˵": 590, |
|
"˶": 591, |
|
"˷": 592, |
|
"˸": 593, |
|
"˹": 594, |
|
"˺": 595, |
|
"˻": 596, |
|
"˼": 597, |
|
"˽": 598, |
|
"˾": 599, |
|
"˿": 600, |
|
"ā": 601, |
|
"ō": 602, |
|
"…": 603, |
|
"[UH]": 604, |
|
"[UM]": 605, |
|
"[giggle]": 606, |
|
"[laughter]": 607, |
|
"[guffaw]": 608, |
|
"[inhale]": 609, |
|
"[exhale]": 610, |
|
"[sigh]": 611, |
|
"[cry]": 612, |
|
"[bark]": 613, |
|
"[howl]": 614, |
|
"[meow]": 615, |
|
"[singing]": 616, |
|
"[music]": 617, |
|
"[whistle]": 618, |
|
"[humming]": 619, |
|
"[gasp]": 620, |
|
"[groan]": 621, |
|
"[whisper]": 622, |
|
"[mumble]": 623, |
|
"[sniff]": 624, |
|
"[sneeze]": 625, |
|
"[cough]": 626, |
|
"[snore]": 627, |
|
"[chew]": 628, |
|
"[sip]": 629, |
|
"[clear_throat]": 630, |
|
"[kiss]": 631, |
|
"[shhh]": 632, |
|
"[gibberish]": 633, |
|
"[fr]": 634, |
|
"[es]": 635, |
|
"[de]": 636, |
|
"[it]": 637, |
|
"[ipa]": 638, |
|
"[end_of_label]": 639, |
|
"ŋ": 640, |
|
"ᵻ": 641, |
|
"θ": 642, |
|
"̩": 643, |
|
"\u0303": 644, |
|
"ɑː": 645, |
|
"iː": 646, |
|
"uː": 647, |
|
"ɜː": 648, |
|
"ɔː": 649, |
|
"oː": 650, |
|
"eɪ": 651, |
|
"oʊ": 652, |
|
"aɪ": 653, |
|
"aʊ": 654, |
|
"ɔɪ": 655, |
|
"dʒ": 656, |
|
"tʃ": 657, |
|
"ɪŋ": 658, |
|
"ᵻd": 659, |
|
"ˈiː": 660, |
|
"ˌiː": 661, |
|
"ˈɪ": 662, |
|
"ˌɪ": 663, |
|
"ˈeɪ": 664, |
|
"ˌeɪ": 665, |
|
"ˈɛ": 666, |
|
"ˌɛ": 667, |
|
"ˈæ": 668, |
|
"ˌæ": 669, |
|
"ˈɑː": 670, |
|
"ˌɑː": 671, |
|
"ˈɔː": 672, |
|
"ˌɔː": 673, |
|
"oːɹ": 674, |
|
"ˈoːɹ": 675, |
|
"ˌoːɹ": 676, |
|
"ˈoʊ": 677, |
|
"ˌoʊ": 678, |
|
"ˈʊ": 679, |
|
"ˌʊ": 680, |
|
"ˈuː": 681, |
|
"ˌuː": 682, |
|
"ˈɜː": 683, |
|
"ˌɜː": 684, |
|
"ˈʌ": 685, |
|
"ˌʌ": 686, |
|
"ˈaɪ": 687, |
|
"ˌaɪ": 688, |
|
"ˈaʊ": 689, |
|
"ˌaʊ": 690, |
|
"ˈɔɪ": 691, |
|
"ˌɔɪ": 692, |
|
"ˈɚ": 693, |
|
"ˌɐ": 694, |
|
"[PLACEHOLDER55]": 695, |
|
"[PLACEHOLDER56]": 696, |
|
"[PLACEHOLDER57]": 697, |
|
"[PLACEHOLDER58]": 698, |
|
"[PLACEHOLDER59]": 699, |
|
"[PLACEHOLDER60]": 700, |
|
"[PLACEHOLDER61]": 701, |
|
"[PLACEHOLDER62]": 702, |
|
"[PLACEHOLDER63]": 703 |
|
}, |
|
"merges": [ |
|
"t h", |
|
"i n", |
|
"th e", |
|
"a n", |
|
"e r", |
|
"o u", |
|
"r e", |
|
"o n", |
|
"a t", |
|
"e d", |
|
"e n", |
|
"t o", |
|
"in g", |
|
"an d", |
|
"i s", |
|
"a s", |
|
"a l", |
|
"o r", |
|
"o f", |
|
"a r", |
|
"i t", |
|
"e s", |
|
"h e", |
|
"s t", |
|
"l e", |
|
"o m", |
|
"s e", |
|
"b e", |
|
"a d", |
|
"o w", |
|
"l y", |
|
"c h", |
|
"w h", |
|
"th at", |
|
"y ou", |
|
"l i", |
|
"v e", |
|
"a c", |
|
"t i", |
|
"l d", |
|
"m e", |
|
"w as", |
|
"g h", |
|
"i d", |
|
"l l", |
|
"w i", |
|
"en t", |
|
"f or", |
|
"a y", |
|
"r o", |
|
"v er", |
|
"i c", |
|
"h er", |
|
"k e", |
|
"h is", |
|
"n o", |
|
"u t", |
|
"u n", |
|
"i r", |
|
"l o", |
|
"w e", |
|
"r i", |
|
"h a", |
|
"wi th", |
|
"gh t", |
|
"ou t", |
|
"i m", |
|
"i on", |
|
"al l", |
|
"a b", |
|
"on e", |
|
"n e", |
|
"g e", |
|
"ou ld", |
|
"t er", |
|
"m o", |
|
"h ad", |
|
"c e", |
|
"s he", |
|
"g o", |
|
"s h", |
|
"u r", |
|
"a m", |
|
"s o", |
|
"p e", |
|
"m y", |
|
"d e", |
|
"a re", |
|
"b ut", |
|
"om e", |
|
"f r", |
|
"the r", |
|
"f e", |
|
"s u", |
|
"d o", |
|
"c on", |
|
"t e", |
|
"a in", |
|
"er e", |
|
"p o", |
|
"i f", |
|
"the y", |
|
"u s", |
|
"a g", |
|
"t r", |
|
"n ow", |
|
"ou n", |
|
"th is", |
|
"ha ve", |
|
"no t", |
|
"s a", |
|
"i l", |
|
"u p", |
|
"th ing", |
|
"fr om", |
|
"a p", |
|
"h im", |
|
"ac k", |
|
"at ion", |
|
"an t", |
|
"ou r", |
|
"o p", |
|
"li ke", |
|
"u st", |
|
"es s", |
|
"b o", |
|
"o k", |
|
"u l", |
|
"in d", |
|
"e x", |
|
"c om", |
|
"s ome", |
|
"the re", |
|
"er s", |
|
"c o", |
|
"re s", |
|
"m an", |
|
"ar d", |
|
"p l", |
|
"w or", |
|
"w ay", |
|
"ti on", |
|
"f o", |
|
"c a", |
|
"w ere", |
|
"b y", |
|
"at e", |
|
"p ro", |
|
"t ed", |
|
"oun d", |
|
"ow n", |
|
"w ould", |
|
"t s", |
|
"wh at", |
|
"q u", |
|
"al ly", |
|
"i ght", |
|
"c k", |
|
"g r", |
|
"wh en", |
|
"v en", |
|
"c an", |
|
"ou gh", |
|
"in e", |
|
"en d", |
|
"p er", |
|
"ou s", |
|
"o d", |
|
"id e", |
|
"k now", |
|
"t y", |
|
"ver y", |
|
"s i", |
|
"a k", |
|
"wh o", |
|
"ab out", |
|
"i ll", |
|
"the m", |
|
"es t", |
|
"re d", |
|
"y e", |
|
"c ould", |
|
"on g", |
|
"you r", |
|
"the ir", |
|
"e m", |
|
"j ust", |
|
"o ther", |
|
"in to", |
|
"an y", |
|
"wh i", |
|
"u m", |
|
"t w", |
|
"as t", |
|
"d er", |
|
"d id", |
|
"i e", |
|
"be en", |
|
"ac e", |
|
"in k", |
|
"it y", |
|
"b ack", |
|
"t ing", |
|
"b r", |
|
"mo re", |
|
"a ke", |
|
"p p", |
|
"the n", |
|
"s p", |
|
"e l", |
|
"u se", |
|
"b l", |
|
"sa id", |
|
"o ver", |
|
"ge t", |
|
"ɑ ː", |
|
"i ː", |
|
"u ː", |
|
"ɜ ː", |
|
"ɔ ː", |
|
"o ː", |
|
"e ɪ", |
|
"o ʊ", |
|
"a ɪ", |
|
"a ʊ", |
|
"ɔ ɪ", |
|
"d ʒ", |
|
"t ʃ", |
|
"ɪ ŋ", |
|
"ᵻ d", |
|
"ˈ iː", |
|
"ˌ iː", |
|
"ˈ ɪ", |
|
"ˌ ɪ", |
|
"ˈ eɪ", |
|
"ˌ eɪ", |
|
"ˈ ɛ", |
|
"ˌ ɛ", |
|
"ˈ æ", |
|
"ˌ æ", |
|
"ˈ ɑː", |
|
"ˌ ɑː", |
|
"ˈ ɔː", |
|
"ˌ ɔː", |
|
"oː ɹ", |
|
"ˈ oːɹ", |
|
"ˌ oːɹ", |
|
"ˈ oʊ", |
|
"ˌ oʊ", |
|
"ˈ ʊ", |
|
"ˌ ʊ", |
|
"ˈ uː", |
|
"ˌ uː", |
|
"ˈ ɜː", |
|
"ˌ ɜː", |
|
"ˈ ʌ", |
|
"ˌ ʌ", |
|
"ˈ aɪ", |
|
"ˌ aɪ", |
|
"ˈ aʊ", |
|
"ˌ aʊ", |
|
"ˈ ɔɪ", |
|
"ˌ ɔɪ", |
|
"ˈ ɚ", |
|
"ˌ ɐ" |
|
] |
|
} |
|
} |