Upload tokenizer
Browse files- tokenizer.json +102 -1
tokenizer.json
CHANGED
@@ -987,7 +987,108 @@
|
|
987 |
}
|
988 |
]
|
989 |
},
|
990 |
-
"post_processor":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
991 |
"decoder": {
|
992 |
"type": "ByteLevel",
|
993 |
"add_prefix_space": true,
|
|
|
987 |
}
|
988 |
]
|
989 |
},
|
990 |
+
"post_processor": {
|
991 |
+
"type": "TemplateProcessing",
|
992 |
+
"single": [
|
993 |
+
{
|
994 |
+
"SpecialToken": {
|
995 |
+
"id": "[CLS]",
|
996 |
+
"type_id": 0
|
997 |
+
}
|
998 |
+
},
|
999 |
+
{
|
1000 |
+
"Sequence": {
|
1001 |
+
"id": "A",
|
1002 |
+
"type_id": 0
|
1003 |
+
}
|
1004 |
+
},
|
1005 |
+
{
|
1006 |
+
"SpecialToken": {
|
1007 |
+
"id": "[SEP]",
|
1008 |
+
"type_id": 0
|
1009 |
+
}
|
1010 |
+
}
|
1011 |
+
],
|
1012 |
+
"pair": [
|
1013 |
+
{
|
1014 |
+
"SpecialToken": {
|
1015 |
+
"id": "[CLS]",
|
1016 |
+
"type_id": 0
|
1017 |
+
}
|
1018 |
+
},
|
1019 |
+
{
|
1020 |
+
"Sequence": {
|
1021 |
+
"id": "A",
|
1022 |
+
"type_id": 0
|
1023 |
+
}
|
1024 |
+
},
|
1025 |
+
{
|
1026 |
+
"SpecialToken": {
|
1027 |
+
"id": "[SEP]",
|
1028 |
+
"type_id": 0
|
1029 |
+
}
|
1030 |
+
},
|
1031 |
+
{
|
1032 |
+
"Sequence": {
|
1033 |
+
"id": "B",
|
1034 |
+
"type_id": 0
|
1035 |
+
}
|
1036 |
+
},
|
1037 |
+
{
|
1038 |
+
"SpecialToken": {
|
1039 |
+
"id": "[SEP]",
|
1040 |
+
"type_id": 0
|
1041 |
+
}
|
1042 |
+
}
|
1043 |
+
],
|
1044 |
+
"special_tokens": {
|
1045 |
+
"[CLS]": {
|
1046 |
+
"id": "[CLS]",
|
1047 |
+
"ids": [
|
1048 |
+
65003
|
1049 |
+
],
|
1050 |
+
"tokens": [
|
1051 |
+
"[CLS]"
|
1052 |
+
]
|
1053 |
+
},
|
1054 |
+
"[MASK]": {
|
1055 |
+
"id": "[MASK]",
|
1056 |
+
"ids": [
|
1057 |
+
65004
|
1058 |
+
],
|
1059 |
+
"tokens": [
|
1060 |
+
"[MASK]"
|
1061 |
+
]
|
1062 |
+
},
|
1063 |
+
"[PAD]": {
|
1064 |
+
"id": "[PAD]",
|
1065 |
+
"ids": [
|
1066 |
+
65002
|
1067 |
+
],
|
1068 |
+
"tokens": [
|
1069 |
+
"[PAD]"
|
1070 |
+
]
|
1071 |
+
},
|
1072 |
+
"[SEP]": {
|
1073 |
+
"id": "[SEP]",
|
1074 |
+
"ids": [
|
1075 |
+
65001
|
1076 |
+
],
|
1077 |
+
"tokens": [
|
1078 |
+
"[SEP]"
|
1079 |
+
]
|
1080 |
+
},
|
1081 |
+
"[UNK]": {
|
1082 |
+
"id": "[UNK]",
|
1083 |
+
"ids": [
|
1084 |
+
65000
|
1085 |
+
],
|
1086 |
+
"tokens": [
|
1087 |
+
"[UNK]"
|
1088 |
+
]
|
1089 |
+
}
|
1090 |
+
}
|
1091 |
+
},
|
1092 |
"decoder": {
|
1093 |
"type": "ByteLevel",
|
1094 |
"add_prefix_space": true,
|