chatterbox / tokenizer_jp.json
Jmica's picture
Upload 2 files
4659030 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[STOP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[SPACE]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 256,
"content": "[START]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "[UNK]",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"[STOP]": 0,
"[UNK]": 1,
"[SPACE]": 2,
"!": 3,
"%": 4,
"&": 5,
"'": 6,
",": 7,
"-": 8,
".": 9,
"0": 10,
"1": 11,
"2": 12,
"3": 13,
"4": 14,
"5": 15,
"6": 16,
"7": 17,
"8": 18,
"9": 19,
"?": 20,
"a": 21,
"b": 22,
"c": 23,
"d": 24,
"e": 25,
"f": 26,
"g": 27,
"h": 28,
"i": 29,
"j": 30,
"k": 31,
"l": 32,
"m": 33,
"n": 34,
"o": 35,
"p": 36,
"q": 37,
"r": 38,
"s": 39,
"t": 40,
"u": 41,
"v": 42,
"w": 43,
"x": 44,
"y": 45,
"z": 46,
"ร—": 47,
"ฮฒ": 48,
"ะฐ": 49,
"ะฑ": 50,
"ะธ": 51,
"ะบ": 52,
"ะป": 53,
"ะพ": 54,
"ะฟ": 55,
"ั": 56,
"ั‹": 57,
"ุŸ": 58,
"โ€ฆ": 59,
"โ„ƒ": 60,
"โ…ฑ": 61,
"โ‘ ": 62,
"โ‘ก": 63,
"โ‘ฃ": 64,
"โ‘ค": 65,
"โ—‹": 66,
"ใ€": 67,
"ใ€‚": 68,
"ใ€…": 69,
"ใ€‡": 70,
"ใ€Œ": 71,
"ใ€": 72,
"ใ€œ": 73,
"ใ": 74,
"ใ‚": 75,
"ใƒ": 76,
"ใ„": 77,
"ใ…": 78,
"ใ†": 79,
"ใ‡": 80,
"ใˆ": 81,
"ใ‰": 82,
"ใŠ": 83,
"ใ‹": 84,
"ใŒ": 85,
"ใ": 86,
"ใŽ": 87,
"ใ": 88,
"ใ": 89,
"ใ‘": 90,
"ใ’": 91,
"ใ“": 92,
"ใ”": 93,
"ใ•": 94,
"ใ–": 95,
"ใ—": 96,
"ใ˜": 97,
"ใ™": 98,
"ใš": 99,
"ใ›": 100,
"ใœ": 101,
"ใ": 102,
"ใž": 103,
"ใŸ": 104,
"ใ ": 105,
"ใก": 106,
"ใข": 107,
"ใฃ": 108,
"ใค": 109,
"ใฅ": 110,
"ใฆ": 111,
"ใง": 112,
"ใจ": 113,
"ใฉ": 114,
"ใช": 115,
"ใซ": 116,
"ใฌ": 117,
"ใญ": 118,
"ใฎ": 119,
"ใฏ": 120,
"ใฐ": 121,
"ใฑ": 122,
"ใฒ": 123,
"ใณ": 124,
"ใด": 125,
"ใต": 126,
"ใถ": 127,
"ใท": 128,
"ใธ": 129,
"ใน": 130,
"ใบ": 131,
"ใป": 132,
"ใผ": 133,
"ใฝ": 134,
"ใพ": 135,
"ใฟ": 136,
"ใ‚€": 137,
"ใ‚": 138,
"ใ‚‚": 139,
"ใ‚ƒ": 140,
"ใ‚„": 141,
"ใ‚…": 142,
"ใ‚†": 143,
"ใ‚‡": 144,
"ใ‚ˆ": 145,
"ใ‚‰": 146,
"ใ‚Š": 147,
"ใ‚‹": 148,
"ใ‚Œ": 149,
"ใ‚": 150,
"ใ‚": 151,
"ใ‚’": 152,
"ใ‚“": 153,
"ใ‚”": 154,
"ใ‚–": 155,
"ใƒป": 156,
"ใƒผ": 157,
"ใฃใฆ": 158,
"ใฃใŸ": 159,
"ใ‚‡ใ†": 160,
"ใ—ใŸ": 161,
"ใ•ใ‚“": 162,
"ใ‹ใ‚‰": 163,
"ใ—ใฆ": 164,
"ใชใ„": 165,
"ใงใ™": 166,
"ใชใ‚“": 167,
"ใ„ใ†": 168,
"ใพใ™": 169,
"ใจใ†": 170,
"ใŸใ„": 171,
"ใใ†": 172,
"ใŸใ—": 173,
"ใฃใจ": 174,
"ใ“ใจ": 175,
"ใ‹ใ‚“": 176,
"ใ‹ใ„": 177,
"ใ„ใŸ": 178,
"ใŠใ‚‚": 179,
"ใ“ใ†": 180,
"ใ‚ใŸใ—": 181,
"ใกใ‚ƒ": 182,
"ใ‚“ใ ": 183,
"ใ‚ใ‚Š": 184,
"ใ‚ˆใ†": 185,
"ใ‚“ใช": 186,
"ใพใ—ใŸ": 187,
"ใ„ใฎ": 188,
"ใ‚…ใ†": 189,
"ใกใ‚‡": 190,
"ใ‘ใฉ": 191,
"ใ‚ใ‚‹": 192,
"ใ•ใ„": 193,
"ใ˜ใ‚ƒ": 194,
"ใ‚‰ใ„": 195,
"ใ‚“ใฎ": 196,
"ใกใ‚‡ใฃใจ": 197,
"ใ„ใ‚‹": 198,
"ใ‚“ใง": 199,
"ใจใ‹": 200,
"ใ“ใฎ": 201,
"ใซใช": 202,
"ใใฎ": 203,
"ใ„ใฆ": 204,
"ใชใ‹": 205,
"ใ—ใ‚‡ใ†": 206,
"ใ—ใ‚‡": 207,
"ใจใŠใ‚‚": 208,
"ใ‚ใ‚ŠใŒ": 209,
"ใใ‚Œ": 210,
"ใจใ„ใ†": 211,
"ใชใ‚“ใ‹": 212,
"ใฟใช": 213,
"ใฏใ„": 214,
"ใ‚‚ใ†": 215,
"ใ—ใ‚“": 216,
"ใฎใง": 217,
"ใ‚ใ‚ŠใŒใจใ†": 218,
"ใปใ†": 219,
"ใ‹ใช": 220,
"ใ„ใพใ™": 221,
"ใ“ใ‚Œ": 222,
"ใ“ใ‚“": 223,
"ใ˜ใ‚‡ใ†": 224,
"ใŒใ„": 225,
"ใ—ใ„": 226,
"ใฉใ†": 227,
"ใงใ‚‚": 228,
"ใฟใŸใ„": 229,
"ใ›ใ‚“": 230,
"ใฏใช": 231,
"ใ™ใ‚‹": 232,
"ใ‚Œใฆ": 233,
"ใ›ใ„": 234,
"ใจใ": 235,
"ใ‚ใฎ": 236,
"ใ—ใ‚ƒ": 237,
"ใ‚‚ใฎ": 238,
"ใ ใ„": 239,
"ใ‚“ใงใ™": 240,
"ใ‚“ใฏ": 241,
"ใ‚ใ†": 242,
"ใปใ‚“": 243,
"ใŸใฎ": 244,
"ใ„ใ„": 245,
"ใฟใŸใ„ใช": 246,
"ใ ใฃใŸ": 247,
"ใซใ‚“": 248,
"ใฃใฆใ„ใ†": 249,
"ใ„ใฎใ‚Š": 250,
"ใœใ‚“": 251,
"ใ„ใŸใ ": 252,
"ใซใก": 253,
"ใงใ": 254,
"ใงใฏ": 255,
"[START]": 256
},
"merges": [
"ใฃ ใฆ",
"ใฃ ใŸ",
"ใ‚‡ ใ†",
"ใ— ใŸ",
"ใ• ใ‚“",
"ใ‹ ใ‚‰",
"ใ— ใฆ",
"ใช ใ„",
"ใง ใ™",
"ใช ใ‚“",
"ใ„ ใ†",
"ใพ ใ™",
"ใจ ใ†",
"ใŸ ใ„",
"ใ ใ†",
"ใŸ ใ—",
"ใฃ ใจ",
"ใ“ ใจ",
"ใ‹ ใ‚“",
"ใ‹ ใ„",
"ใ„ ใŸ",
"ใŠ ใ‚‚",
"ใ“ ใ†",
"ใ‚ ใŸใ—",
"ใก ใ‚ƒ",
"ใ‚“ ใ ",
"ใ‚ ใ‚Š",
"ใ‚ˆ ใ†",
"ใ‚“ ใช",
"ใพ ใ—ใŸ",
"ใ„ ใฎ",
"ใ‚… ใ†",
"ใก ใ‚‡",
"ใ‘ ใฉ",
"ใ‚ ใ‚‹",
"ใ• ใ„",
"ใ˜ ใ‚ƒ",
"ใ‚‰ ใ„",
"ใ‚“ ใฎ",
"ใกใ‚‡ ใฃใจ",
"ใ„ ใ‚‹",
"ใ‚“ ใง",
"ใจ ใ‹",
"ใ“ ใฎ",
"ใซ ใช",
"ใ ใฎ",
"ใ„ ใฆ",
"ใช ใ‹",
"ใ— ใ‚‡ใ†",
"ใ— ใ‚‡",
"ใจ ใŠใ‚‚",
"ใ‚ใ‚Š ใŒ",
"ใ ใ‚Œ",
"ใจ ใ„ใ†",
"ใชใ‚“ ใ‹",
"ใฟ ใช",
"ใฏ ใ„",
"ใ‚‚ ใ†",
"ใ— ใ‚“",
"ใฎ ใง",
"ใ‚ใ‚ŠใŒ ใจใ†",
"ใป ใ†",
"ใ‹ ใช",
"ใ„ ใพใ™",
"ใ“ ใ‚Œ",
"ใ“ ใ‚“",
"ใ˜ ใ‚‡ใ†",
"ใŒ ใ„",
"ใ— ใ„",
"ใฉ ใ†",
"ใง ใ‚‚",
"ใฟ ใŸใ„",
"ใ› ใ‚“",
"ใฏ ใช",
"ใ™ ใ‚‹",
"ใ‚Œ ใฆ",
"ใ› ใ„",
"ใจ ใ",
"ใ‚ ใฎ",
"ใ— ใ‚ƒ",
"ใ‚‚ ใฎ",
"ใ  ใ„",
"ใ‚“ ใงใ™",
"ใ‚“ ใฏ",
"ใ‚ ใ†",
"ใป ใ‚“",
"ใŸ ใฎ",
"ใ„ ใ„",
"ใฟใŸใ„ ใช",
"ใ  ใฃใŸ",
"ใซ ใ‚“",
"ใฃใฆ ใ„ใ†",
"ใ„ใฎ ใ‚Š",
"ใœ ใ‚“",
"ใ„ใŸ ใ ",
"ใซ ใก",
"ใง ใ",
"ใง ใฏ"
],
"language": "multi"
}
}