{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "[BLANK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFKC" }, { "type": "Lowercase" }, { "type": "StripAccents" }, { "type": "Replace", "pattern": { "String": "" }, "content": "[UNK]" }, { "type": "Replace", "pattern": { "Regex": "[^֐-׿\u0000-‌-‿₠-₿∀-⋿⅐-↋ff-ﭏ]+" }, "content": "[UNK]" } ] }, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "(\\[UNK\\]|[\\s\\S])" }, "behavior": "Removed", "invert": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 1 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 2 ], "tokens": [ "[SEP]" ] } } }, "decoder": null, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[UNK]": 0, "[CLS]": 1, "[SEP]": 2, "[PAD]": 3, "[MASK]": 4, "[BLANK]": 5, "\u0000": 6, "\u0001": 7, "\u0002": 8, "\u0003": 9, "\u0004": 10, "\u0005": 11, "\u0006": 12, "\u0007": 13, "\b": 14, "\t": 15, "\n": 16, "\u000b": 17, "\u000e": 18, "\u000f": 19, "\u0010": 20, "\u0011": 21, "\u0012": 22, "\u0013": 23, "\u0014": 24, "\u0015": 25, "\u0016": 26, "\u0017": 27, "\u0018": 28, "\u0019": 29, "\u001a": 30, "\u001b": 31, "\u001c": 32, "\u001d": 33, "\u001e": 34, "\u001f": 35, " ": 36, "!": 37, "\"": 38, "#": 39, "$": 40, "%": 41, "&": 42, "'": 43, "(": 44, ")": 45, "*": 46, "+": 47, ",": 48, "-": 49, ".": 50, "/": 51, "0": 52, "1": 53, "2": 54, "3": 55, "4": 56, "5": 57, "6": 58, "7": 59, "8": 60, "9": 61, ":": 62, ";": 63, "<": 64, "=": 65, ">": 66, "?": 67, "@": 68, "K": 69, "N": 70, "U": 71, "[": 72, "\\": 73, "]": 74, "^": 75, "_": 76, "`": 77, "a": 78, "b": 79, "c": 80, "d": 81, "e": 82, "f": 83, "g": 84, "h": 85, "i": 86, "j": 87, "k": 88, "l": 89, "m": 90, "n": 91, "o": 92, "p": 93, "q": 94, "r": 95, "s": 96, "t": 97, "u": 98, "v": 99, "w": 100, "x": 101, "y": 102, "z": 103, "{": 104, "|": 105, "}": 106, "~": 107, "": 108, "€": 109, "": 110, "‚": 111, "ƒ": 112, "„": 113, "†": 114, "ˆ": 115, "‰": 116, "Œ": 117, "": 118, "Ž": 119, "": 120, "": 121, "‘": 122, "’": 123, "“": 124, "”": 125, "•": 126, "–": 127, "—": 128, "˜": 129, "™": 130, "š": 131, "›": 132, "œ": 133, "": 134, "ž": 135, "Ÿ": 136, "¡": 137, "¢": 138, "£": 139, "¤": 140, "¥": 141, "¦": 142, "§": 143, "©": 144, "«": 145, "¬": 146, "­": 147, "®": 148, "°": 149, "±": 150, "¶": 151, "·": 152, "»": 153, "¿": 154, "×": 155, "ß": 156, "à": 157, "á": 158, "â": 159, "ã": 160, "ä": 161, "å": 162, "æ": 163, "ç": 164, "è": 165, "é": 166, "ê": 167, "ë": 168, "ì": 169, "í": 170, "î": 171, "ï": 172, "ð": 173, "ñ": 174, "ò": 175, "ó": 176, "ô": 177, "õ": 178, "ö": 179, "÷": 180, "ø": 181, "ù": 182, "ú": 183, "û": 184, "ü": 185, "ý": 186, "þ": 187, "ÿ": 188, "ȼ": 189, "˖": 190, "˗": 191, "ͱ": 192, "ͳ": 193, "͵": 194, "ӏ": 195, "ԝ": 196, "֎": 197, "־": 198, "׀": 199, "׃": 200, "׆": 201, "׈": 202, "׉": 203, "׊": 204, "׋": 205, "׍": 206, "׎": 207, "׏": 208, "א": 209, "ב": 210, "ג": 211, "ד": 212, "ה": 213, "ו": 214, "ז": 215, "ח": 216, "ט": 217, "י": 218, "ך": 219, "כ": 220, "ל": 221, "ם": 222, "מ": 223, "ן": 224, "נ": 225, "ס": 226, "ע": 227, "ף": 228, "פ": 229, "ץ": 230, "צ": 231, "ק": 232, "ר": 233, "ש": 234, "ת": 235, "׫": 236, "װ": 237, "ױ": 238, "ײ": 239, "׳": 240, "״": 241, "׸": 242, "׹": 243, "׺": 244, "׿": 245, "،": 246, "؛": 247, "؟": 248, "٪": 249, "٭": 250, "۔": 251, "۝": 252, "۞": 253, "۩": 254, "ߋ": 255, "ߐ": 256, "ߕ": 257, "ߗ": 258, "ߜ": 259, "ߝ": 260, "ߞ": 261, "ߟ": 262, "ߠ": 263, "ߡ": 264, "ߢ": 265, "ߨ": 266, "ߩ": 267, "ߪ": 268, "।": 269, "฿": 270, "๏": 271, "፡": 272, "ᤞ": 273, "᧐": 274, "ᨁ": 275, "ᨅ": 276, "ᨔ": 277, "ᨕ": 278, "‌": 279, "‍": 280, "‎": 281, "‏": 282, "‐": 283, "‒": 284, "–": 285, "—": 286, "―": 287, "‖": 288, "‘": 289, "’": 290, "‚": 291, "‛": 292, "“": 293, "”": 294, "„": 295, "‟": 296, "†": 297, "‡": 298, "•": 299, "‣": 300, "‧": 301, "
": 302, "
": 303, "‪": 304, "‫": 305, "‬": 306, "‭": 307, "‮": 308, "‰": 309, "′": 310, "‹": 311, "›": 312, "※": 313, "‽": 314, "‿": 315, "⁃": 316, "⁄": 317, "⁎": 318, "⁠": 319, "⁣": 320, "⁦": 321, "⁧": 322, "⁨": 323, "⁩": 324, "₡": 325, "₣": 326, "₤": 327, "₦": 328, "₩": 329, "₪": 330, "₫": 331, "€": 332, "₭": 333, "₮": 334, "₱": 335, "₴": 336, "₵": 337, "₸": 338, "₹": 339, "₺": 340, "₼": 341, "₽": 342, "₾": 343, "₿": 344, "ↄ": 345, "←": 346, "↑": 347, "→": 348, "↓": 349, "↔": 350, "↗": 351, "↘": 352, "↙": 353, "↩": 354, "↳": 355, "↵": 356, "⇌": 357, "⇐": 358, "⇒": 359, "⇓": 360, "⇔": 361, "⇦": 362, "⇧": 363, "⇨": 364, "⇱": 365, "∀": 366, "∂": 367, "∃": 368, "∅": 369, "∆": 370, "∇": 371, "∈": 372, "∉": 373, "∍": 374, "∎": 375, "∏": 376, "∐": 377, "∑": 378, "−": 379, "∕": 380, "∗": 381, "∘": 382, "∙": 383, "√": 384, "∛": 385, "∝": 386, "∞": 387, "∟": 388, "∠": 389, "∢": 390, "∧": 391, "∨": 392, "∩": 393, "∪": 394, "∫": 395, "∴": 396, "∼": 397, "≅": 398, "≈": 399, "≋": 400, "≟": 401, "≠": 402, "≡": 403, "≤": 404, "≥": 405, "≦": 406, "≧": 407, "≪": 408, "≫": 409, "⊂": 410, "⊃": 411, "⊆": 412, "⊇": 413, "⊕": 414, "⊗": 415, "⊙": 416, "⊞": 417, "⊠": 418, "⊢": 419, "⊤": 420, "⊦": 421, "⋃": 422, "⋄": 423, "⋅": 424, "⋆": 425, "⋇": 426, "⋧": 427, "⋮": 428, "⋯": 429, "⌀": 430, "⌂": 431, "⌘": 432, "⌚": 433, "⌛": 434, "⌥": 435, "⎙": 436, "⏎": 437, "⏪": 438, "⏮": 439, "⏰": 440, "⏱": 441, "⏳": 442, "⏺": 443, "─": 444, "│": 445, "┐": 446, "└": 447, "┴": 448, "╋": 449, "║": 450, "╬": 451, "█": 452, "▌": 453, "░": 454, "■": 455, "□": 456, "▪": 457, "▫": 458, "▲": 459, "△": 460, "▶": 461, "▷": 462, "▸": 463, "►": 464, "▼": 465, "▽": 466, "▾": 467, "◀": 468, "◁": 469, "◂": 470, "◃": 471, "◄": 472, "◆": 473, "◇": 474, "◈": 475, "◉": 476, "◊": 477, "○": 478, "◌": 479, "◎": 480, "●": 481, "◕": 482, "◘": 483, "◙": 484, "◡": 485, "◥": 486, "◦": 487, "◴": 488, "◻": 489, "◼": 490, "◽": 491, "◾": 492, "☀": 493, "☁": 494, "☂": 495, "☃": 496, "☄": 497, "★": 498, "☆": 499, "☉": 500, "☎": 501, "☏": 502, "☐": 503, "☑": 504, "☒": 505, "☔": 506, "☕": 507, "☘": 508, "☚": 509, "☜": 510, "☝": 511, "☠": 512, "☢": 513, "☯": 514, "☰": 515, "☹": 516, "☺": 517, "☻": 518, "☼": 519, "♀": 520, "♂": 521, "♔": 522, "♕": 523, "♚": 524, "♛": 525, "♟": 526, "♠": 527, "♡": 528, "♢": 529, "♣": 530, "♥": 531, "♦": 532, "♧": 533, "♨": 534, "♪": 535, "♫": 536, "♬": 537, "♭": 538, "♯": 539, "♰": 540, "♻": 541, "♿": 542, "⚇": 543, "⚒": 544, "⚓": 545, "⚔": 546, "⚖": 547, "⚘": 548, "⚛": 549, "⚜": 550, "⚠": 551, "⚡": 552, "⚧": 553, "⚪": 554, "⚫": 555, "⚽": 556, "⛔": 557, "⛰": 558, "✂": 559, "✅": 560, "✆": 561, "✈": 562, "✉": 563, "✊": 564, "✋": 565, "✌": 566, "✍": 567, "✎": 568, "✏": 569, "✓": 570, "✔": 571, "✖": 572, "✗": 573, "✙": 574, "✛": 575, "✡": 576, "✦": 577, "✧": 578, "✨": 579, "✩": 580, "✪": 581, "✫": 582, "✭": 583, "✮": 584, "✯": 585, "✰": 586, "✱": 587, "✲": 588, "✳": 589, "✴": 590, "✶": 591, "✸": 592, "✺": 593, "✻": 594, "✽": 595, "✾": 596, "✿": 597, "❀": 598, "❁": 599, "❂": 600, "❃": 601, "❄": 602, "❇": 603, "❈": 604, "❋": 605, "❌": 606, "❎": 607, "❏": 608, "❑": 609, "❒": 610, "❓": 611, "❔": 612, "❕": 613, "❖": 614, "❗": 615, "❝": 616, "❞": 617, "❣": 618, "❤": 619, "❥": 620, "❦": 621, "❭": 622, "❯": 623, "❶": 624, "❷": 625, "❸": 626, "➊": 627, "➋": 628, "➌": 629, "➍": 630, "➎": 631, "➔": 632, "➕": 633, "➖": 634, "➡": 635, "➢": 636, "➤": 637, "➦": 638, "⟨": 639, "⟩": 640, "⠀": 641, "⤵": 642, "⤶": 643, "⦁": 644, "⦿": 645, "⧼": 646, "⧽": 647, "⬅": 648, "⬆": 649, "⬇": 650, "⬛": 651, "⬜": 652, "⭐": 653, "⭕": 654, "ⰲ": 655, "ⰽ": 656, "ⰾ": 657, "ⱀ": 658, "ⱁ": 659, "ⱄ": 660, "ⱏ": 661, "ⱐ": 662, "ⱑ": 663, "ⱥ": 664, "ⲟ": 665, "ⴰ": 666, "ⴻ": 667, "ⵍ": 668, "ⵏ": 669, "ⵔ": 670, "ⵢ": 671, "ⵣ": 672, "、": 673, "。": 674, "〈": 675, "〉": 676, "《": 677, "》": 678, "「": 679, "」": 680, "【": 681, "】": 682, "ꙭ": 683, "": 684, "": 685, "": 686, "": 687, "": 688, "": 689, "": 690, "": 691, "": 692, "": 693, "": 694, "": 695, "": 696, "": 697, "": 698, "": 699, "": 700, "": 701, "": 702, "": 703, "": 704, "": 705, "": 706, "": 707, "": 708, "": 709, "": 710, "": 711, "": 712, "": 713, "": 714, "": 715, "": 716, "": 717, "": 718, "": 719, "": 720, "": 721, "": 722, "": 723, "": 724, "": 725, "": 726, "": 727, "": 728, "": 729, "": 730, "": 731, "": 732, "": 733, "": 734, "": 735, "": 736, "": 737, "": 738, "": 739, "": 740, "": 741, "": 742, "": 743, "": 744, "": 745, "": 746, "": 747, "": 748, "": 749, "": 750, "": 751, "": 752, "": 753, "": 754, "": 755, "": 756, "": 757, "": 758, "": 759, "": 760, "": 761, "": 762, "": 763, "": 764, "": 765, "": 766, "": 767, "": 768, "": 769, "": 770, "": 771, "": 772, "": 773, "": 774, "": 775, "": 776, "": 777, "": 778, "": 779, "": 780, "": 781, "": 782, "": 783, "": 784, "": 785, "": 786, "": 787, "": 788, "": 789, "": 790, "": 791, "": 792, "": 793, "": 794, "": 795, "": 796, "": 797, "": 798, "": 799, "": 800, "": 801, "": 802, "": 803, "": 804, "": 805, "": 806, "": 807, "": 808, "": 809, "": 810, "": 811, "": 812, "": 813, "": 814, "": 815, "": 816, "": 817, "": 818, "": 819, "": 820, "": 821, "": 822, "": 823, "": 824, "": 825, "": 826, "": 827, "": 828, "": 829, "": 830, "": 831, "": 832, "": 833, "": 834, "": 835, "": 836, "": 837, "": 838, "": 839, "": 840 } } }