CrabInHoney's picture
Upload 6 files
9232dc6 verified
{
"version": "1.0",
"truncation": {
"direction": "Right",
"max_length": 34,
"strategy": "LongestFirst",
"stride": 0
},
"padding": {
"strategy": {
"Fixed": 34
},
"direction": "Right",
"pad_to_multiple_of": null,
"pad_id": 0,
"pad_type_id": 0,
"pad_token": "[PAD]"
},
"added_tokens": [
{
"id": 0,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "BertNormalizer",
"clean_text": true,
"handle_chinese_chars": true,
"strip_accents": false,
"lowercase": true
},
"pre_tokenizer": {
"type": "BertPreTokenizer"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 1
}
}
],
"special_tokens": {
"[CLS]": {
"id": "[CLS]",
"ids": [
2
],
"tokens": [
"[CLS]"
]
},
"[SEP]": {
"id": "[SEP]",
"ids": [
3
],
"tokens": [
"[SEP]"
]
}
}
},
"decoder": {
"type": "WordPiece",
"prefix": "##",
"cleanup": true
},
"model": {
"type": "WordPiece",
"unk_token": "[UNK]",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"[PAD]": 0,
"[UNK]": 1,
"[CLS]": 2,
"[SEP]": 3,
"[MASK]": 4,
"а": 5,
"б": 6,
"в": 7,
"г": 8,
"д": 9,
"е": 10,
"ё": 11,
"ж": 12,
"з": 13,
"и": 14,
"й": 15,
"к": 16,
"л": 17,
"м": 18,
"н": 19,
"о": 20,
"п": 21,
"р": 22,
"с": 23,
"т": 24,
"у": 25,
"ф": 26,
"х": 27,
"ц": 28,
"ч": 29,
"ш": 30,
"щ": 31,
"ъ": 32,
"ы": 33,
"ь": 34,
"э": 35,
"ю": 36,
"я": 37,
"##а": 38,
"##б": 39,
"##в": 40,
"##г": 41,
"##д": 42,
"##е": 43,
"##ё": 44,
"##ж": 45,
"##з": 46,
"##и": 47,
"##й": 48,
"##к": 49,
"##л": 50,
"##м": 51,
"##н": 52,
"##о": 53,
"##п": 54,
"##р": 55,
"##с": 56,
"##т": 57,
"##у": 58,
"##ф": 59,
"##х": 60,
"##ц": 61,
"##ч": 62,
"##ш": 63,
"##щ": 64,
"##ъ": 65,
"##ы": 66,
"##ь": 67,
"##э": 68,
"##ю": 69,
"##я": 70,
"_": 71,
"-": 72,
"##_": 73,
"##-": 74
}
}
}