Konstantinos commited on
Commit
9a0ef1d
·
verified ·
1 Parent(s): d22a110

Upload 4 files

Browse files
Files changed (3) hide show
  1. tokenizer.json +9 -44
  2. tokenizer.model +2 -2
  3. tokenizer_config.json +5 -36
tokenizer.json CHANGED
@@ -5,7 +5,7 @@
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
- "content": "<unk>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "id": 1,
17
- "content": "<s>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -22,49 +22,13 @@
22
  "special": true
23
  },
24
  {
25
- "id": 3,
26
- "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
- },
33
- {
34
- "id": 4,
35
- "content": "[INST]",
36
- "single_word": false,
37
- "lstrip": false,
38
- "rstrip": false,
39
- "normalized": true,
40
- "special": false
41
- },
42
- {
43
- "id": 5,
44
- "content": "[/INST]",
45
- "single_word": false,
46
- "lstrip": false,
47
- "rstrip": false,
48
- "normalized": true,
49
- "special": false
50
- },
51
- {
52
- "id": 6,
53
- "content": "<<SYS>>",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": true,
58
- "special": false
59
- },
60
- {
61
- "id": 7,
62
- "content": "<</SYS>>",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": true,
67
- "special": false
68
  }
69
  ],
70
  "normalizer": {
@@ -130,7 +94,7 @@
130
  "<s>": {
131
  "id": "<s>",
132
  "ids": [
133
- 1
134
  ],
135
  "tokens": [
136
  "<s>"
@@ -171,9 +135,10 @@
171
  "fuse_unk": true,
172
  "byte_fallback": true,
173
  "vocab": {
174
- "<unk>": 0,
175
- "<s>": 1,
176
- "</s>": 3,
 
177
  "[INST]": 4,
178
  "[/INST]": 5,
179
  "<<SYS>>": 6,
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
+ "content": "<s>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
 
14
  },
15
  {
16
  "id": 1,
17
+ "content": "</s>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
22
  "special": true
23
  },
24
  {
25
+ "id": 2,
26
+ "content": "<unk>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
 
94
  "<s>": {
95
  "id": "<s>",
96
  "ids": [
97
+ 0
98
  ],
99
  "tokens": [
100
  "<s>"
 
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
  "vocab": {
138
+ "<s>": 0,
139
+ "</s>": 1,
140
+ "<unk>": 2,
141
+ "<pad>": 3,
142
  "[INST]": 4,
143
  "[/INST]": 5,
144
  "<<SYS>>": 6,
tokenizer.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86b8450efded5ac1c41777f9b28f8f5f22cfc9f40cda4dc99b6c4f7f70b1400b
3
- size 483422
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d04049f54d79e18d6554117a2d41ef630239e608222b54d6434acbb41494b2
3
+ size 483425
tokenizer_config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
- "content": "<unk>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
@@ -11,52 +12,20 @@
11
  "special": true
12
  },
13
  "1": {
14
- "content": "<s>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
18
  "single_word": false,
19
  "special": true
20
  },
21
- "3": {
22
- "content": "</s>",
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
- },
29
- "4": {
30
- "content": "[INST]",
31
- "lstrip": false,
32
- "normalized": true,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": false
36
- },
37
- "5": {
38
- "content": "[/INST]",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": false
44
- },
45
- "6": {
46
- "content": "<<SYS>>",
47
- "lstrip": false,
48
- "normalized": true,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": false
52
- },
53
- "7": {
54
- "content": "<</SYS>>",
55
- "lstrip": false,
56
- "normalized": true,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": false
60
  }
61
  },
62
  "bos_token": "<s>",
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
7
+ "content": "<s>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
 
12
  "special": true
13
  },
14
  "1": {
15
+ "content": "</s>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
19
  "single_word": false,
20
  "special": true
21
  },
22
+ "2": {
23
+ "content": "<unk>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  },
31
  "bos_token": "<s>",