nusa-tokenizer / tokenizer_config.json
libeanim
set max sec length to 8192
1b125c4
{
"added_tokens_decoder": {
"65000": {
"content": "[UNK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65001": {
"content": "[SEP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65002": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65003": {
"content": "[CLS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65004": {
"content": "[MASK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"65005": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65006": {
"content": "<|padding|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65007": {
"content": "[unused000]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65008": {
"content": "[unused001]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65009": {
"content": "[unused002]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65010": {
"content": "[unused003]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65011": {
"content": "[unused004]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65012": {
"content": "[unused005]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65013": {
"content": "[unused006]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65014": {
"content": "[unused007]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65015": {
"content": "[unused008]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65016": {
"content": "[unused009]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65017": {
"content": "[unused010]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65018": {
"content": "[unused011]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65019": {
"content": "[unused012]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65020": {
"content": "[unused013]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65021": {
"content": "[unused014]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65022": {
"content": "[unused015]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65023": {
"content": "[unused016]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65024": {
"content": "[unused017]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65025": {
"content": "[unused018]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65026": {
"content": "[unused019]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65027": {
"content": "[unused020]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65028": {
"content": "[unused021]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65029": {
"content": "[unused022]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65030": {
"content": "[unused023]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65031": {
"content": "[unused024]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65032": {
"content": "[unused025]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65033": {
"content": "[unused026]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65034": {
"content": "[unused027]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65035": {
"content": "[unused028]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65036": {
"content": "[unused029]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65037": {
"content": "[unused030]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65038": {
"content": "[unused031]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65039": {
"content": "[unused032]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65040": {
"content": "[unused033]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65041": {
"content": "[unused034]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65042": {
"content": "[unused035]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65043": {
"content": "[unused036]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65044": {
"content": "[unused037]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65045": {
"content": "[unused038]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65046": {
"content": "[unused039]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65047": {
"content": "[unused040]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65048": {
"content": "[unused041]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65049": {
"content": "[unused042]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65050": {
"content": "[unused043]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65051": {
"content": "[unused044]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65052": {
"content": "[unused045]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65053": {
"content": "[unused046]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65054": {
"content": "[unused047]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65055": {
"content": "[unused048]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65056": {
"content": "[unused049]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65057": {
"content": "[unused050]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65058": {
"content": "[unused051]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65059": {
"content": "[unused052]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65060": {
"content": "[unused053]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65061": {
"content": "[unused054]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65062": {
"content": "[unused055]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65063": {
"content": "[unused056]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65064": {
"content": "[unused057]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65065": {
"content": "[unused058]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65066": {
"content": "[unused059]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65067": {
"content": "[unused060]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65068": {
"content": "[unused061]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65069": {
"content": "[unused062]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65070": {
"content": "[unused063]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65071": {
"content": "[unused064]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65072": {
"content": "[unused065]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65073": {
"content": "[unused066]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65074": {
"content": "[unused067]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65075": {
"content": "[unused068]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65076": {
"content": "[unused069]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65077": {
"content": "[unused070]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65078": {
"content": "[unused071]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65079": {
"content": "[unused072]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65080": {
"content": "[unused073]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65081": {
"content": "[unused074]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65082": {
"content": "[unused075]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65083": {
"content": "[unused076]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65084": {
"content": "[unused077]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65085": {
"content": "[unused078]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65086": {
"content": "[unused079]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65087": {
"content": "[unused080]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65088": {
"content": "[unused081]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65089": {
"content": "[unused082]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65090": {
"content": "[unused083]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65091": {
"content": "[unused084]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65092": {
"content": "[unused085]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65093": {
"content": "[unused086]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65094": {
"content": "[unused087]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65095": {
"content": "[unused088]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65096": {
"content": "[unused089]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65097": {
"content": "[unused090]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65098": {
"content": "[unused091]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65099": {
"content": "[unused092]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65100": {
"content": "[unused093]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65101": {
"content": "[unused094]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65102": {
"content": "[unused095]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65103": {
"content": "[unused096]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65104": {
"content": "[unused097]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65105": {
"content": "[unused098]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"65106": {
"content": "[unused099]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"clean_up_tokenization_spaces": false,
"cls_token": "[CLS]",
"extra_special_tokens": {},
"mask_token": "[MASK]",
"model_max_length": 8192,
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"tokenizer_class": "PreTrainedTokenizerFast",
"unk_token": "[UNK]"
}