PereLluis13 commited on
Commit
0886c75
·
verified ·
1 Parent(s): 7d9090e

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[MASK]": 128000,
3
+ "[R-0]": 128001,
4
+ "[R-10]": 128011,
5
+ "[R-11]": 128012,
6
+ "[R-12]": 128013,
7
+ "[R-13]": 128014,
8
+ "[R-14]": 128015,
9
+ "[R-15]": 128016,
10
+ "[R-16]": 128017,
11
+ "[R-17]": 128018,
12
+ "[R-18]": 128019,
13
+ "[R-19]": 128020,
14
+ "[R-1]": 128002,
15
+ "[R-20]": 128021,
16
+ "[R-21]": 128022,
17
+ "[R-22]": 128023,
18
+ "[R-23]": 128024,
19
+ "[R-2]": 128003,
20
+ "[R-3]": 128004,
21
+ "[R-4]": 128005,
22
+ "[R-5]": 128006,
23
+ "[R-6]": 128007,
24
+ "[R-7]": 128008,
25
+ "[R-8]": 128009,
26
+ "[R-9]": 128010
27
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[R-0]",
4
+ "[R-1]",
5
+ "[R-2]",
6
+ "[R-3]",
7
+ "[R-4]",
8
+ "[R-5]",
9
+ "[R-6]",
10
+ "[R-7]",
11
+ "[R-8]",
12
+ "[R-9]",
13
+ "[R-10]",
14
+ "[R-11]",
15
+ "[R-12]",
16
+ "[R-13]",
17
+ "[R-14]",
18
+ "[R-15]",
19
+ "[R-16]",
20
+ "[R-17]",
21
+ "[R-18]",
22
+ "[R-19]",
23
+ "[R-20]",
24
+ "[R-21]",
25
+ "[R-22]",
26
+ "[R-23]"
27
+ ],
28
+ "bos_token": "[CLS]",
29
+ "cls_token": "[CLS]",
30
+ "eos_token": "[SEP]",
31
+ "mask_token": "[MASK]",
32
+ "pad_token": "[PAD]",
33
+ "sep_token": "[SEP]",
34
+ "unk_token": "[UNK]"
35
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "additional_special_tokens": [
4
+ "[R-0]",
5
+ "[R-1]",
6
+ "[R-2]",
7
+ "[R-3]",
8
+ "[R-4]",
9
+ "[R-5]",
10
+ "[R-6]",
11
+ "[R-7]",
12
+ "[R-8]",
13
+ "[R-9]",
14
+ "[R-10]",
15
+ "[R-11]",
16
+ "[R-12]",
17
+ "[R-13]",
18
+ "[R-14]",
19
+ "[R-15]",
20
+ "[R-16]",
21
+ "[R-17]",
22
+ "[R-18]",
23
+ "[R-19]",
24
+ "[R-20]",
25
+ "[R-21]",
26
+ "[R-22]",
27
+ "[R-23]"
28
+ ],
29
+ "bos_token": "[CLS]",
30
+ "clean_up_tokenization_spaces": true,
31
+ "cls_token": "[CLS]",
32
+ "do_lower_case": false,
33
+ "eos_token": "[SEP]",
34
+ "mask_token": "[MASK]",
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "[PAD]",
37
+ "sep_token": "[SEP]",
38
+ "sp_model_kwargs": {},
39
+ "split_by_punct": false,
40
+ "tokenizer_class": "DebertaV2Tokenizer",
41
+ "unk_token": "[UNK]",
42
+ "vocab_type": "spm"
43
+ }