limcheekin
commited on
Commit
•
c7fe87b
1
Parent(s):
5c35d4b
bug: fixed tokenizer
Browse files- added_tokens.json +0 -12
- special_tokens_map.json +1 -1
- spiece.model +0 -0
- tokenizer.json +0 -0
- tokenizer_config.json +2 -4
added_tokens.json
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"\t": 32109,
|
3 |
-
"\n": 32103,
|
4 |
-
" ": 32106,
|
5 |
-
"<": 32101,
|
6 |
-
"[PAD]": 32100,
|
7 |
-
"\\": 32107,
|
8 |
-
"^": 32108,
|
9 |
-
"`": 32105,
|
10 |
-
"{": 32102,
|
11 |
-
"}": 32104
|
12 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
special_tokens_map.json
CHANGED
@@ -102,6 +102,6 @@
|
|
102 |
"<extra_id_99>"
|
103 |
],
|
104 |
"eos_token": "</s>",
|
105 |
-
"pad_token": "
|
106 |
"unk_token": "<unk>"
|
107 |
}
|
|
|
102 |
"<extra_id_99>"
|
103 |
],
|
104 |
"eos_token": "</s>",
|
105 |
+
"pad_token": "<pad>",
|
106 |
"unk_token": "<unk>"
|
107 |
}
|
spiece.model
CHANGED
File without changes
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -104,11 +104,9 @@
|
|
104 |
"clean_up_tokenization_spaces": true,
|
105 |
"eos_token": "</s>",
|
106 |
"extra_ids": 100,
|
107 |
-
"model_max_length":
|
108 |
"pad_token": "<pad>",
|
109 |
-
"padding_side": "right",
|
110 |
"sp_model_kwargs": {},
|
111 |
"tokenizer_class": "T5Tokenizer",
|
112 |
-
"unk_token": "<unk>"
|
113 |
-
"use_fast": false
|
114 |
}
|
|
|
104 |
"clean_up_tokenization_spaces": true,
|
105 |
"eos_token": "</s>",
|
106 |
"extra_ids": 100,
|
107 |
+
"model_max_length": 512,
|
108 |
"pad_token": "<pad>",
|
|
|
109 |
"sp_model_kwargs": {},
|
110 |
"tokenizer_class": "T5Tokenizer",
|
111 |
+
"unk_token": "<unk>"
|
|
|
112 |
}
|