Takvmi commited on
Commit
02c86bb
·
verified ·
1 Parent(s): e2eaf18

Upload tokenizer

Browse files
Files changed (3) hide show
  1. merges.txt +1 -1
  2. special_tokens_map.json +21 -3
  3. tokenizer.json +3 -1
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
 
1
+ #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
special_tokens_map.json CHANGED
@@ -1,6 +1,18 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
4
  "pad_token": {
5
  "content": "[PAD]",
6
  "lstrip": false,
@@ -8,5 +20,11 @@
8
  "rstrip": false,
9
  "single_word": false
10
  },
11
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
12
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
  "pad_token": {
17
  "content": "[PAD]",
18
  "lstrip": false,
 
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
  }
tokenizer.json CHANGED
@@ -9,7 +9,7 @@
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": false,
13
  "special": true
14
  },
15
  {
@@ -48,6 +48,8 @@
48
  "continuing_subword_prefix": "",
49
  "end_of_word_suffix": "",
50
  "fuse_unk": false,
 
 
51
  "vocab": {
52
  "!": 0,
53
  "\"": 1,
 
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": true,
13
  "special": true
14
  },
15
  {
 
48
  "continuing_subword_prefix": "",
49
  "end_of_word_suffix": "",
50
  "fuse_unk": false,
51
+ "byte_fallback": false,
52
+ "ignore_merges": false,
53
  "vocab": {
54
  "!": 0,
55
  "\"": 1,