Shaltiel's picture
Upload folder using huggingface_hub
b288887 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "[BLANK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "NFKC"
},
{
"type": "Lowercase"
},
{
"type": "StripAccents"
},
{
"type": "Replace",
"pattern": {
"String": "<foreign>"
},
"content": "[UNK]"
},
{
"type": "Replace",
"pattern": {
"Regex": "[^֐-׿\u0000-‌-‿₠-₿∀-⋿⅐-↋ff-ﭏ]+"
},
"content": "[UNK]"
}
]
},
"pre_tokenizer": {
"type": "Split",
"pattern": {
"Regex": "(\\[UNK\\]|[\\s\\S])"
},
"behavior": "Removed",
"invert": true
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 1
}
}
],
"special_tokens": {
"[CLS]": {
"id": "[CLS]",
"ids": [
1
],
"tokens": [
"[CLS]"
]
},
"[SEP]": {
"id": "[SEP]",
"ids": [
2
],
"tokens": [
"[SEP]"
]
}
}
},
"decoder": null,
"model": {
"type": "WordPiece",
"unk_token": "[UNK]",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"[UNK]": 0,
"[CLS]": 1,
"[SEP]": 2,
"[PAD]": 3,
"[MASK]": 4,
"[BLANK]": 5,
"\u0000": 6,
"\u0001": 7,
"\u0002": 8,
"\u0003": 9,
"\u0004": 10,
"\u0005": 11,
"\u0006": 12,
"\u0007": 13,
"\b": 14,
"\t": 15,
"\n": 16,
"\u000b": 17,
"\u000e": 18,
"\u000f": 19,
"\u0010": 20,
"\u0011": 21,
"\u0012": 22,
"\u0013": 23,
"\u0014": 24,
"\u0015": 25,
"\u0016": 26,
"\u0017": 27,
"\u0018": 28,
"\u0019": 29,
"\u001a": 30,
"\u001b": 31,
"\u001c": 32,
"\u001d": 33,
"\u001e": 34,
"\u001f": 35,
" ": 36,
"!": 37,
"\"": 38,
"#": 39,
"$": 40,
"%": 41,
"&": 42,
"'": 43,
"(": 44,
")": 45,
"*": 46,
"+": 47,
",": 48,
"-": 49,
".": 50,
"/": 51,
"0": 52,
"1": 53,
"2": 54,
"3": 55,
"4": 56,
"5": 57,
"6": 58,
"7": 59,
"8": 60,
"9": 61,
":": 62,
";": 63,
"<": 64,
"=": 65,
">": 66,
"?": 67,
"@": 68,
"K": 69,
"N": 70,
"U": 71,
"[": 72,
"\\": 73,
"]": 74,
"^": 75,
"_": 76,
"`": 77,
"a": 78,
"b": 79,
"c": 80,
"d": 81,
"e": 82,
"f": 83,
"g": 84,
"h": 85,
"i": 86,
"j": 87,
"k": 88,
"l": 89,
"m": 90,
"n": 91,
"o": 92,
"p": 93,
"q": 94,
"r": 95,
"s": 96,
"t": 97,
"u": 98,
"v": 99,
"w": 100,
"x": 101,
"y": 102,
"z": 103,
"{": 104,
"|": 105,
"}": 106,
"~": 107,
"": 108,
"€": 109,
"": 110,
"‚": 111,
"ƒ": 112,
"„": 113,
"†": 114,
"ˆ": 115,
"‰": 116,
"Œ": 117,
"": 118,
"Ž": 119,
"": 120,
"": 121,
"‘": 122,
"’": 123,
"“": 124,
"”": 125,
"•": 126,
"–": 127,
"—": 128,
"˜": 129,
"™": 130,
"š": 131,
"›": 132,
"œ": 133,
"": 134,
"ž": 135,
"Ÿ": 136,
"¡": 137,
"¢": 138,
"£": 139,
"¤": 140,
"¥": 141,
"¦": 142,
"§": 143,
"©": 144,
"«": 145,
"¬": 146,
"­": 147,
"®": 148,
"°": 149,
"±": 150,
"¶": 151,
"·": 152,
"»": 153,
"¿": 154,
"×": 155,
"ß": 156,
"à": 157,
"á": 158,
"â": 159,
"ã": 160,
"ä": 161,
"å": 162,
"æ": 163,
"ç": 164,
"è": 165,
"é": 166,
"ê": 167,
"ë": 168,
"ì": 169,
"í": 170,
"î": 171,
"ï": 172,
"ð": 173,
"ñ": 174,
"ò": 175,
"ó": 176,
"ô": 177,
"õ": 178,
"ö": 179,
"÷": 180,
"ø": 181,
"ù": 182,
"ú": 183,
"û": 184,
"ü": 185,
"ý": 186,
"þ": 187,
"ÿ": 188,
"ȼ": 189,
"˖": 190,
"˗": 191,
"ͱ": 192,
"ͳ": 193,
"͵": 194,
"ӏ": 195,
"ԝ": 196,
"֎": 197,
"־": 198,
"׀": 199,
"׃": 200,
"׆": 201,
"׈": 202,
"׉": 203,
"׊": 204,
"׋": 205,
"׍": 206,
"׎": 207,
"׏": 208,
"א": 209,
"ב": 210,
"ג": 211,
"ד": 212,
"ה": 213,
"ו": 214,
"ז": 215,
"ח": 216,
"ט": 217,
"י": 218,
"ך": 219,
"כ": 220,
"ל": 221,
"ם": 222,
"מ": 223,
"ן": 224,
"נ": 225,
"ס": 226,
"ע": 227,
"ף": 228,
"פ": 229,
"ץ": 230,
"צ": 231,
"ק": 232,
"ר": 233,
"ש": 234,
"ת": 235,
"׫": 236,
"װ": 237,
"ױ": 238,
"ײ": 239,
"׳": 240,
"״": 241,
"׸": 242,
"׹": 243,
"׺": 244,
"׿": 245,
"،": 246,
"؛": 247,
"؟": 248,
"٪": 249,
"٭": 250,
"۔": 251,
"۝": 252,
"۞": 253,
"۩": 254,
"ߋ": 255,
"ߐ": 256,
"ߕ": 257,
"ߗ": 258,
"ߜ": 259,
"ߝ": 260,
"ߞ": 261,
"ߟ": 262,
"ߠ": 263,
"ߡ": 264,
"ߢ": 265,
"ߨ": 266,
"ߩ": 267,
"ߪ": 268,
"।": 269,
"฿": 270,
"๏": 271,
"፡": 272,
"ᤞ": 273,
"᧐": 274,
"ᨁ": 275,
"ᨅ": 276,
"ᨔ": 277,
"ᨕ": 278,
"‌": 279,
"‍": 280,
"‎": 281,
"‏": 282,
"‐": 283,
"‒": 284,
"–": 285,
"—": 286,
"―": 287,
"‖": 288,
"‘": 289,
"’": 290,
"‚": 291,
"‛": 292,
"“": 293,
"”": 294,
"„": 295,
"‟": 296,
"†": 297,
"‡": 298,
"•": 299,
"‣": 300,
"‧": 301,
"
": 302,
"
": 303,
"‪": 304,
"‫": 305,
"‬": 306,
"‭": 307,
"‮": 308,
"‰": 309,
"′": 310,
"‹": 311,
"›": 312,
"※": 313,
"‽": 314,
"‿": 315,
"⁃": 316,
"⁄": 317,
"⁎": 318,
"⁠": 319,
"⁣": 320,
"⁦": 321,
"⁧": 322,
"⁨": 323,
"⁩": 324,
"₡": 325,
"₣": 326,
"₤": 327,
"₦": 328,
"₩": 329,
"₪": 330,
"₫": 331,
"€": 332,
"₭": 333,
"₮": 334,
"₱": 335,
"₴": 336,
"₵": 337,
"₸": 338,
"₹": 339,
"₺": 340,
"₼": 341,
"₽": 342,
"₾": 343,
"₿": 344,
"ↄ": 345,
"←": 346,
"↑": 347,
"→": 348,
"↓": 349,
"↔": 350,
"↗": 351,
"↘": 352,
"↙": 353,
"↩": 354,
"↳": 355,
"↵": 356,
"⇌": 357,
"⇐": 358,
"⇒": 359,
"⇓": 360,
"⇔": 361,
"⇦": 362,
"⇧": 363,
"⇨": 364,
"⇱": 365,
"∀": 366,
"∂": 367,
"∃": 368,
"∅": 369,
"∆": 370,
"∇": 371,
"∈": 372,
"∉": 373,
"∍": 374,
"∎": 375,
"∏": 376,
"∐": 377,
"∑": 378,
"−": 379,
"∕": 380,
"∗": 381,
"∘": 382,
"∙": 383,
"√": 384,
"∛": 385,
"∝": 386,
"∞": 387,
"∟": 388,
"∠": 389,
"∢": 390,
"∧": 391,
"∨": 392,
"∩": 393,
"∪": 394,
"∫": 395,
"∴": 396,
"∼": 397,
"≅": 398,
"≈": 399,
"≋": 400,
"≟": 401,
"≠": 402,
"≡": 403,
"≤": 404,
"≥": 405,
"≦": 406,
"≧": 407,
"≪": 408,
"≫": 409,
"⊂": 410,
"⊃": 411,
"⊆": 412,
"⊇": 413,
"⊕": 414,
"⊗": 415,
"⊙": 416,
"⊞": 417,
"⊠": 418,
"⊢": 419,
"⊤": 420,
"⊦": 421,
"⋃": 422,
"⋄": 423,
"⋅": 424,
"⋆": 425,
"⋇": 426,
"⋧": 427,
"⋮": 428,
"⋯": 429,
"⌀": 430,
"⌂": 431,
"⌘": 432,
"⌚": 433,
"⌛": 434,
"⌥": 435,
"⎙": 436,
"⏎": 437,
"⏪": 438,
"⏮": 439,
"⏰": 440,
"⏱": 441,
"⏳": 442,
"⏺": 443,
"─": 444,
"│": 445,
"┐": 446,
"└": 447,
"┴": 448,
"╋": 449,
"║": 450,
"╬": 451,
"█": 452,
"▌": 453,
"░": 454,
"■": 455,
"□": 456,
"▪": 457,
"▫": 458,
"▲": 459,
"△": 460,
"▶": 461,
"▷": 462,
"▸": 463,
"►": 464,
"▼": 465,
"▽": 466,
"▾": 467,
"◀": 468,
"◁": 469,
"◂": 470,
"◃": 471,
"◄": 472,
"◆": 473,
"◇": 474,
"◈": 475,
"◉": 476,
"◊": 477,
"○": 478,
"◌": 479,
"◎": 480,
"●": 481,
"◕": 482,
"◘": 483,
"◙": 484,
"◡": 485,
"◥": 486,
"◦": 487,
"◴": 488,
"◻": 489,
"◼": 490,
"◽": 491,
"◾": 492,
"☀": 493,
"☁": 494,
"☂": 495,
"☃": 496,
"☄": 497,
"★": 498,
"☆": 499,
"☉": 500,
"☎": 501,
"☏": 502,
"☐": 503,
"☑": 504,
"☒": 505,
"☔": 506,
"☕": 507,
"☘": 508,
"☚": 509,
"☜": 510,
"☝": 511,
"☠": 512,
"☢": 513,
"☯": 514,
"☰": 515,
"☹": 516,
"☺": 517,
"☻": 518,
"☼": 519,
"♀": 520,
"♂": 521,
"♔": 522,
"♕": 523,
"♚": 524,
"♛": 525,
"♟": 526,
"♠": 527,
"♡": 528,
"♢": 529,
"♣": 530,
"♥": 531,
"♦": 532,
"♧": 533,
"♨": 534,
"♪": 535,
"♫": 536,
"♬": 537,
"♭": 538,
"♯": 539,
"♰": 540,
"♻": 541,
"♿": 542,
"⚇": 543,
"⚒": 544,
"⚓": 545,
"⚔": 546,
"⚖": 547,
"⚘": 548,
"⚛": 549,
"⚜": 550,
"⚠": 551,
"⚡": 552,
"⚧": 553,
"⚪": 554,
"⚫": 555,
"⚽": 556,
"⛔": 557,
"⛰": 558,
"✂": 559,
"✅": 560,
"✆": 561,
"✈": 562,
"✉": 563,
"✊": 564,
"✋": 565,
"✌": 566,
"✍": 567,
"✎": 568,
"✏": 569,
"✓": 570,
"✔": 571,
"✖": 572,
"✗": 573,
"✙": 574,
"✛": 575,
"✡": 576,
"✦": 577,
"✧": 578,
"✨": 579,
"✩": 580,
"✪": 581,
"✫": 582,
"✭": 583,
"✮": 584,
"✯": 585,
"✰": 586,
"✱": 587,
"✲": 588,
"✳": 589,
"✴": 590,
"✶": 591,
"✸": 592,
"✺": 593,
"✻": 594,
"✽": 595,
"✾": 596,
"✿": 597,
"❀": 598,
"❁": 599,
"❂": 600,
"❃": 601,
"❄": 602,
"❇": 603,
"❈": 604,
"❋": 605,
"❌": 606,
"❎": 607,
"❏": 608,
"❑": 609,
"❒": 610,
"❓": 611,
"❔": 612,
"❕": 613,
"❖": 614,
"❗": 615,
"❝": 616,
"❞": 617,
"❣": 618,
"❤": 619,
"❥": 620,
"❦": 621,
"❭": 622,
"❯": 623,
"❶": 624,
"❷": 625,
"❸": 626,
"➊": 627,
"➋": 628,
"➌": 629,
"➍": 630,
"➎": 631,
"➔": 632,
"➕": 633,
"➖": 634,
"➡": 635,
"➢": 636,
"➤": 637,
"➦": 638,
"⟨": 639,
"⟩": 640,
"⠀": 641,
"⤵": 642,
"⤶": 643,
"⦁": 644,
"⦿": 645,
"⧼": 646,
"⧽": 647,
"⬅": 648,
"⬆": 649,
"⬇": 650,
"⬛": 651,
"⬜": 652,
"⭐": 653,
"⭕": 654,
"ⰲ": 655,
"ⰽ": 656,
"ⰾ": 657,
"ⱀ": 658,
"ⱁ": 659,
"ⱄ": 660,
"ⱏ": 661,
"ⱐ": 662,
"ⱑ": 663,
"ⱥ": 664,
"ⲟ": 665,
"ⴰ": 666,
"ⴻ": 667,
"ⵍ": 668,
"ⵏ": 669,
"ⵔ": 670,
"ⵢ": 671,
"ⵣ": 672,
"、": 673,
"。": 674,
"〈": 675,
"〉": 676,
"《": 677,
"》": 678,
"「": 679,
"」": 680,
"【": 681,
"】": 682,
"ꙭ": 683,
"": 684,
"": 685,
"": 686,
"": 687,
"": 688,
"": 689,
"": 690,
"": 691,
"": 692,
"": 693,
"": 694,
"": 695,
"": 696,
"": 697,
"": 698,
"": 699,
"": 700,
"": 701,
"": 702,
"": 703,
"": 704,
"": 705,
"": 706,
"": 707,
"": 708,
"": 709,
"": 710,
"": 711,
"": 712,
"": 713,
"": 714,
"": 715,
"": 716,
"": 717,
"": 718,
"": 719,
"": 720,
"": 721,
"": 722,
"": 723,
"": 724,
"": 725,
"": 726,
"": 727,
"": 728,
"": 729,
"": 730,
"": 731,
"": 732,
"": 733,
"": 734,
"": 735,
"": 736,
"": 737,
"": 738,
"": 739,
"": 740,
"": 741,
"": 742,
"": 743,
"": 744,
"": 745,
"": 746,
"": 747,
"": 748,
"": 749,
"": 750,
"": 751,
"": 752,
"": 753,
"": 754,
"": 755,
"": 756,
"": 757,
"": 758,
"": 759,
"": 760,
"": 761,
"": 762,
"": 763,
"": 764,
"": 765,
"": 766,
"": 767,
"": 768,
"": 769,
"": 770,
"": 771,
"": 772,
"": 773,
"": 774,
"": 775,
"": 776,
"": 777,
"": 778,
"": 779,
"": 780,
"": 781,
"": 782,
"": 783,
"": 784,
"": 785,
"": 786,
"": 787,
"": 788,
"": 789,
"": 790,
"": 791,
"": 792,
"": 793,
"": 794,
"": 795,
"": 796,
"": 797,
"": 798,
"": 799,
"": 800,
"": 801,
"": 802,
"": 803,
"": 804,
"": 805,
"": 806,
"": 807,
"": 808,
"": 809,
"": 810,
"": 811,
"": 812,
"": 813,
"": 814,
"": 815,
"": 816,
"": 817,
"": 818,
"": 819,
"": 820,
"": 821,
"": 822,
"": 823,
"": 824,
"": 825,
"": 826,
"": 827,
"": 828,
"": 829,
"": 830,
"": 831,
"": 832,
"": 833,
"": 834,
"": 835,
"": 836,
"": 837,
"": 838,
"": 839,
"": 840
}
}
}