Abdulrahman Al-Ghamdi
commited on
Upload tokenizer
Browse files- tokenizer_config.json +7 -0
tokenizer_config.json
CHANGED
@@ -296,6 +296,7 @@
|
|
296 |
"extra_special_tokens": {},
|
297 |
"mask_token": "[MASK]",
|
298 |
"max_len": 512,
|
|
|
299 |
"model_max_length": 512,
|
300 |
"never_split": [
|
301 |
"+ك",
|
@@ -330,10 +331,16 @@
|
|
330 |
"+ات",
|
331 |
"[رابط]"
|
332 |
],
|
|
|
333 |
"pad_token": "[PAD]",
|
|
|
|
|
334 |
"sep_token": "[SEP]",
|
|
|
335 |
"strip_accents": null,
|
336 |
"tokenize_chinese_chars": true,
|
337 |
"tokenizer_class": "BertTokenizer",
|
|
|
|
|
338 |
"unk_token": "[UNK]"
|
339 |
}
|
|
|
296 |
"extra_special_tokens": {},
|
297 |
"mask_token": "[MASK]",
|
298 |
"max_len": 512,
|
299 |
+
"max_length": 512,
|
300 |
"model_max_length": 512,
|
301 |
"never_split": [
|
302 |
"+ك",
|
|
|
331 |
"+ات",
|
332 |
"[رابط]"
|
333 |
],
|
334 |
+
"pad_to_multiple_of": null,
|
335 |
"pad_token": "[PAD]",
|
336 |
+
"pad_token_type_id": 0,
|
337 |
+
"padding_side": "right",
|
338 |
"sep_token": "[SEP]",
|
339 |
+
"stride": 0,
|
340 |
"strip_accents": null,
|
341 |
"tokenize_chinese_chars": true,
|
342 |
"tokenizer_class": "BertTokenizer",
|
343 |
+
"truncation_side": "right",
|
344 |
+
"truncation_strategy": "longest_first",
|
345 |
"unk_token": "[UNK]"
|
346 |
}
|