cahya commited on
Commit
69e852e
·
verified ·
1 Parent(s): 065eb75

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +102 -1
tokenizer.json CHANGED
@@ -987,7 +987,108 @@
987
  }
988
  ]
989
  },
990
- "post_processor": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991
  "decoder": {
992
  "type": "ByteLevel",
993
  "add_prefix_space": true,
 
987
  }
988
  ]
989
  },
990
+ "post_processor": {
991
+ "type": "TemplateProcessing",
992
+ "single": [
993
+ {
994
+ "SpecialToken": {
995
+ "id": "[CLS]",
996
+ "type_id": 0
997
+ }
998
+ },
999
+ {
1000
+ "Sequence": {
1001
+ "id": "A",
1002
+ "type_id": 0
1003
+ }
1004
+ },
1005
+ {
1006
+ "SpecialToken": {
1007
+ "id": "[SEP]",
1008
+ "type_id": 0
1009
+ }
1010
+ }
1011
+ ],
1012
+ "pair": [
1013
+ {
1014
+ "SpecialToken": {
1015
+ "id": "[CLS]",
1016
+ "type_id": 0
1017
+ }
1018
+ },
1019
+ {
1020
+ "Sequence": {
1021
+ "id": "A",
1022
+ "type_id": 0
1023
+ }
1024
+ },
1025
+ {
1026
+ "SpecialToken": {
1027
+ "id": "[SEP]",
1028
+ "type_id": 0
1029
+ }
1030
+ },
1031
+ {
1032
+ "Sequence": {
1033
+ "id": "B",
1034
+ "type_id": 0
1035
+ }
1036
+ },
1037
+ {
1038
+ "SpecialToken": {
1039
+ "id": "[SEP]",
1040
+ "type_id": 0
1041
+ }
1042
+ }
1043
+ ],
1044
+ "special_tokens": {
1045
+ "[CLS]": {
1046
+ "id": "[CLS]",
1047
+ "ids": [
1048
+ 65003
1049
+ ],
1050
+ "tokens": [
1051
+ "[CLS]"
1052
+ ]
1053
+ },
1054
+ "[MASK]": {
1055
+ "id": "[MASK]",
1056
+ "ids": [
1057
+ 65004
1058
+ ],
1059
+ "tokens": [
1060
+ "[MASK]"
1061
+ ]
1062
+ },
1063
+ "[PAD]": {
1064
+ "id": "[PAD]",
1065
+ "ids": [
1066
+ 65002
1067
+ ],
1068
+ "tokens": [
1069
+ "[PAD]"
1070
+ ]
1071
+ },
1072
+ "[SEP]": {
1073
+ "id": "[SEP]",
1074
+ "ids": [
1075
+ 65001
1076
+ ],
1077
+ "tokens": [
1078
+ "[SEP]"
1079
+ ]
1080
+ },
1081
+ "[UNK]": {
1082
+ "id": "[UNK]",
1083
+ "ids": [
1084
+ 65000
1085
+ ],
1086
+ "tokens": [
1087
+ "[UNK]"
1088
+ ]
1089
+ }
1090
+ }
1091
+ },
1092
  "decoder": {
1093
  "type": "ByteLevel",
1094
  "add_prefix_space": true,