Jmica commited on
Commit
4659030
ยท
verified ยท
1 Parent(s): d7f2421

Upload 2 files

Browse files
Files changed (2) hide show
  1. t3_cfg.safetensors +3 -0
  2. tokenizer_jp.json +418 -0
t3_cfg.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe5fe2a96e64dfb5ce54e2e31d371f8bb41ae4e1382b80b3c1836c3df7dcab8
3
+ size 2129654648
tokenizer_jp.json ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[STOP]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[UNK]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[SPACE]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 256,
35
+ "content": "[START]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ }
42
+ ],
43
+ "normalizer": null,
44
+ "pre_tokenizer": {
45
+ "type": "Whitespace"
46
+ },
47
+ "post_processor": null,
48
+ "decoder": null,
49
+ "model": {
50
+ "type": "BPE",
51
+ "dropout": null,
52
+ "unk_token": "[UNK]",
53
+ "continuing_subword_prefix": null,
54
+ "end_of_word_suffix": null,
55
+ "fuse_unk": false,
56
+ "byte_fallback": false,
57
+ "vocab": {
58
+ "[STOP]": 0,
59
+ "[UNK]": 1,
60
+ "[SPACE]": 2,
61
+ "!": 3,
62
+ "%": 4,
63
+ "&": 5,
64
+ "'": 6,
65
+ ",": 7,
66
+ "-": 8,
67
+ ".": 9,
68
+ "0": 10,
69
+ "1": 11,
70
+ "2": 12,
71
+ "3": 13,
72
+ "4": 14,
73
+ "5": 15,
74
+ "6": 16,
75
+ "7": 17,
76
+ "8": 18,
77
+ "9": 19,
78
+ "?": 20,
79
+ "a": 21,
80
+ "b": 22,
81
+ "c": 23,
82
+ "d": 24,
83
+ "e": 25,
84
+ "f": 26,
85
+ "g": 27,
86
+ "h": 28,
87
+ "i": 29,
88
+ "j": 30,
89
+ "k": 31,
90
+ "l": 32,
91
+ "m": 33,
92
+ "n": 34,
93
+ "o": 35,
94
+ "p": 36,
95
+ "q": 37,
96
+ "r": 38,
97
+ "s": 39,
98
+ "t": 40,
99
+ "u": 41,
100
+ "v": 42,
101
+ "w": 43,
102
+ "x": 44,
103
+ "y": 45,
104
+ "z": 46,
105
+ "ร—": 47,
106
+ "ฮฒ": 48,
107
+ "ะฐ": 49,
108
+ "ะฑ": 50,
109
+ "ะธ": 51,
110
+ "ะบ": 52,
111
+ "ะป": 53,
112
+ "ะพ": 54,
113
+ "ะฟ": 55,
114
+ "ั": 56,
115
+ "ั‹": 57,
116
+ "ุŸ": 58,
117
+ "โ€ฆ": 59,
118
+ "โ„ƒ": 60,
119
+ "โ…ฑ": 61,
120
+ "โ‘ ": 62,
121
+ "โ‘ก": 63,
122
+ "โ‘ฃ": 64,
123
+ "โ‘ค": 65,
124
+ "โ—‹": 66,
125
+ "ใ€": 67,
126
+ "ใ€‚": 68,
127
+ "ใ€…": 69,
128
+ "ใ€‡": 70,
129
+ "ใ€Œ": 71,
130
+ "ใ€": 72,
131
+ "ใ€œ": 73,
132
+ "ใ": 74,
133
+ "ใ‚": 75,
134
+ "ใƒ": 76,
135
+ "ใ„": 77,
136
+ "ใ…": 78,
137
+ "ใ†": 79,
138
+ "ใ‡": 80,
139
+ "ใˆ": 81,
140
+ "ใ‰": 82,
141
+ "ใŠ": 83,
142
+ "ใ‹": 84,
143
+ "ใŒ": 85,
144
+ "ใ": 86,
145
+ "ใŽ": 87,
146
+ "ใ": 88,
147
+ "ใ": 89,
148
+ "ใ‘": 90,
149
+ "ใ’": 91,
150
+ "ใ“": 92,
151
+ "ใ”": 93,
152
+ "ใ•": 94,
153
+ "ใ–": 95,
154
+ "ใ—": 96,
155
+ "ใ˜": 97,
156
+ "ใ™": 98,
157
+ "ใš": 99,
158
+ "ใ›": 100,
159
+ "ใœ": 101,
160
+ "ใ": 102,
161
+ "ใž": 103,
162
+ "ใŸ": 104,
163
+ "ใ ": 105,
164
+ "ใก": 106,
165
+ "ใข": 107,
166
+ "ใฃ": 108,
167
+ "ใค": 109,
168
+ "ใฅ": 110,
169
+ "ใฆ": 111,
170
+ "ใง": 112,
171
+ "ใจ": 113,
172
+ "ใฉ": 114,
173
+ "ใช": 115,
174
+ "ใซ": 116,
175
+ "ใฌ": 117,
176
+ "ใญ": 118,
177
+ "ใฎ": 119,
178
+ "ใฏ": 120,
179
+ "ใฐ": 121,
180
+ "ใฑ": 122,
181
+ "ใฒ": 123,
182
+ "ใณ": 124,
183
+ "ใด": 125,
184
+ "ใต": 126,
185
+ "ใถ": 127,
186
+ "ใท": 128,
187
+ "ใธ": 129,
188
+ "ใน": 130,
189
+ "ใบ": 131,
190
+ "ใป": 132,
191
+ "ใผ": 133,
192
+ "ใฝ": 134,
193
+ "ใพ": 135,
194
+ "ใฟ": 136,
195
+ "ใ‚€": 137,
196
+ "ใ‚": 138,
197
+ "ใ‚‚": 139,
198
+ "ใ‚ƒ": 140,
199
+ "ใ‚„": 141,
200
+ "ใ‚…": 142,
201
+ "ใ‚†": 143,
202
+ "ใ‚‡": 144,
203
+ "ใ‚ˆ": 145,
204
+ "ใ‚‰": 146,
205
+ "ใ‚Š": 147,
206
+ "ใ‚‹": 148,
207
+ "ใ‚Œ": 149,
208
+ "ใ‚": 150,
209
+ "ใ‚": 151,
210
+ "ใ‚’": 152,
211
+ "ใ‚“": 153,
212
+ "ใ‚”": 154,
213
+ "ใ‚–": 155,
214
+ "ใƒป": 156,
215
+ "ใƒผ": 157,
216
+ "ใฃใฆ": 158,
217
+ "ใฃใŸ": 159,
218
+ "ใ‚‡ใ†": 160,
219
+ "ใ—ใŸ": 161,
220
+ "ใ•ใ‚“": 162,
221
+ "ใ‹ใ‚‰": 163,
222
+ "ใ—ใฆ": 164,
223
+ "ใชใ„": 165,
224
+ "ใงใ™": 166,
225
+ "ใชใ‚“": 167,
226
+ "ใ„ใ†": 168,
227
+ "ใพใ™": 169,
228
+ "ใจใ†": 170,
229
+ "ใŸใ„": 171,
230
+ "ใใ†": 172,
231
+ "ใŸใ—": 173,
232
+ "ใฃใจ": 174,
233
+ "ใ“ใจ": 175,
234
+ "ใ‹ใ‚“": 176,
235
+ "ใ‹ใ„": 177,
236
+ "ใ„ใŸ": 178,
237
+ "ใŠใ‚‚": 179,
238
+ "ใ“ใ†": 180,
239
+ "ใ‚ใŸใ—": 181,
240
+ "ใกใ‚ƒ": 182,
241
+ "ใ‚“ใ ": 183,
242
+ "ใ‚ใ‚Š": 184,
243
+ "ใ‚ˆใ†": 185,
244
+ "ใ‚“ใช": 186,
245
+ "ใพใ—ใŸ": 187,
246
+ "ใ„ใฎ": 188,
247
+ "ใ‚…ใ†": 189,
248
+ "ใกใ‚‡": 190,
249
+ "ใ‘ใฉ": 191,
250
+ "ใ‚ใ‚‹": 192,
251
+ "ใ•ใ„": 193,
252
+ "ใ˜ใ‚ƒ": 194,
253
+ "ใ‚‰ใ„": 195,
254
+ "ใ‚“ใฎ": 196,
255
+ "ใกใ‚‡ใฃใจ": 197,
256
+ "ใ„ใ‚‹": 198,
257
+ "ใ‚“ใง": 199,
258
+ "ใจใ‹": 200,
259
+ "ใ“ใฎ": 201,
260
+ "ใซใช": 202,
261
+ "ใใฎ": 203,
262
+ "ใ„ใฆ": 204,
263
+ "ใชใ‹": 205,
264
+ "ใ—ใ‚‡ใ†": 206,
265
+ "ใ—ใ‚‡": 207,
266
+ "ใจใŠใ‚‚": 208,
267
+ "ใ‚ใ‚ŠใŒ": 209,
268
+ "ใใ‚Œ": 210,
269
+ "ใจใ„ใ†": 211,
270
+ "ใชใ‚“ใ‹": 212,
271
+ "ใฟใช": 213,
272
+ "ใฏใ„": 214,
273
+ "ใ‚‚ใ†": 215,
274
+ "ใ—ใ‚“": 216,
275
+ "ใฎใง": 217,
276
+ "ใ‚ใ‚ŠใŒใจใ†": 218,
277
+ "ใปใ†": 219,
278
+ "ใ‹ใช": 220,
279
+ "ใ„ใพใ™": 221,
280
+ "ใ“ใ‚Œ": 222,
281
+ "ใ“ใ‚“": 223,
282
+ "ใ˜ใ‚‡ใ†": 224,
283
+ "ใŒใ„": 225,
284
+ "ใ—ใ„": 226,
285
+ "ใฉใ†": 227,
286
+ "ใงใ‚‚": 228,
287
+ "ใฟใŸใ„": 229,
288
+ "ใ›ใ‚“": 230,
289
+ "ใฏใช": 231,
290
+ "ใ™ใ‚‹": 232,
291
+ "ใ‚Œใฆ": 233,
292
+ "ใ›ใ„": 234,
293
+ "ใจใ": 235,
294
+ "ใ‚ใฎ": 236,
295
+ "ใ—ใ‚ƒ": 237,
296
+ "ใ‚‚ใฎ": 238,
297
+ "ใ ใ„": 239,
298
+ "ใ‚“ใงใ™": 240,
299
+ "ใ‚“ใฏ": 241,
300
+ "ใ‚ใ†": 242,
301
+ "ใปใ‚“": 243,
302
+ "ใŸใฎ": 244,
303
+ "ใ„ใ„": 245,
304
+ "ใฟใŸใ„ใช": 246,
305
+ "ใ ใฃใŸ": 247,
306
+ "ใซใ‚“": 248,
307
+ "ใฃใฆใ„ใ†": 249,
308
+ "ใ„ใฎใ‚Š": 250,
309
+ "ใœใ‚“": 251,
310
+ "ใ„ใŸใ ": 252,
311
+ "ใซใก": 253,
312
+ "ใงใ": 254,
313
+ "ใงใฏ": 255,
314
+ "[START]": 256
315
+ },
316
+ "merges": [
317
+ "ใฃ ใฆ",
318
+ "ใฃ ใŸ",
319
+ "ใ‚‡ ใ†",
320
+ "ใ— ใŸ",
321
+ "ใ• ใ‚“",
322
+ "ใ‹ ใ‚‰",
323
+ "ใ— ใฆ",
324
+ "ใช ใ„",
325
+ "ใง ใ™",
326
+ "ใช ใ‚“",
327
+ "ใ„ ใ†",
328
+ "ใพ ใ™",
329
+ "ใจ ใ†",
330
+ "ใŸ ใ„",
331
+ "ใ ใ†",
332
+ "ใŸ ใ—",
333
+ "ใฃ ใจ",
334
+ "ใ“ ใจ",
335
+ "ใ‹ ใ‚“",
336
+ "ใ‹ ใ„",
337
+ "ใ„ ใŸ",
338
+ "ใŠ ใ‚‚",
339
+ "ใ“ ใ†",
340
+ "ใ‚ ใŸใ—",
341
+ "ใก ใ‚ƒ",
342
+ "ใ‚“ ใ ",
343
+ "ใ‚ ใ‚Š",
344
+ "ใ‚ˆ ใ†",
345
+ "ใ‚“ ใช",
346
+ "ใพ ใ—ใŸ",
347
+ "ใ„ ใฎ",
348
+ "ใ‚… ใ†",
349
+ "ใก ใ‚‡",
350
+ "ใ‘ ใฉ",
351
+ "ใ‚ ใ‚‹",
352
+ "ใ• ใ„",
353
+ "ใ˜ ใ‚ƒ",
354
+ "ใ‚‰ ใ„",
355
+ "ใ‚“ ใฎ",
356
+ "ใกใ‚‡ ใฃใจ",
357
+ "ใ„ ใ‚‹",
358
+ "ใ‚“ ใง",
359
+ "ใจ ใ‹",
360
+ "ใ“ ใฎ",
361
+ "ใซ ใช",
362
+ "ใ ใฎ",
363
+ "ใ„ ใฆ",
364
+ "ใช ใ‹",
365
+ "ใ— ใ‚‡ใ†",
366
+ "ใ— ใ‚‡",
367
+ "ใจ ใŠใ‚‚",
368
+ "ใ‚ใ‚Š ใŒ",
369
+ "ใ ใ‚Œ",
370
+ "ใจ ใ„ใ†",
371
+ "ใชใ‚“ ใ‹",
372
+ "ใฟ ใช",
373
+ "ใฏ ใ„",
374
+ "ใ‚‚ ใ†",
375
+ "ใ— ใ‚“",
376
+ "ใฎ ใง",
377
+ "ใ‚ใ‚ŠใŒ ใจใ†",
378
+ "ใป ใ†",
379
+ "ใ‹ ใช",
380
+ "ใ„ ใพใ™",
381
+ "ใ“ ใ‚Œ",
382
+ "ใ“ ใ‚“",
383
+ "ใ˜ ใ‚‡ใ†",
384
+ "ใŒ ใ„",
385
+ "ใ— ใ„",
386
+ "ใฉ ใ†",
387
+ "ใง ใ‚‚",
388
+ "๏ฟฝ๏ฟฝ๏ฟฝ ใŸใ„",
389
+ "ใ› ใ‚“",
390
+ "ใฏ ใช",
391
+ "ใ™ ใ‚‹",
392
+ "ใ‚Œ ใฆ",
393
+ "ใ› ใ„",
394
+ "ใจ ใ",
395
+ "ใ‚ ใฎ",
396
+ "ใ— ใ‚ƒ",
397
+ "ใ‚‚ ใฎ",
398
+ "ใ  ใ„",
399
+ "ใ‚“ ใงใ™",
400
+ "ใ‚“ ใฏ",
401
+ "ใ‚ ใ†",
402
+ "ใป ใ‚“",
403
+ "ใŸ ใฎ",
404
+ "ใ„ ใ„",
405
+ "ใฟใŸใ„ ใช",
406
+ "ใ  ใฃใŸ",
407
+ "ใซ ใ‚“",
408
+ "ใฃใฆ ใ„ใ†",
409
+ "ใ„ใฎ ใ‚Š",
410
+ "ใœ ใ‚“",
411
+ "ใ„ใŸ ใ ",
412
+ "ใซ ใก",
413
+ "ใง ใ",
414
+ "ใง ใฏ"
415
+ ],
416
+ "language": "multi"
417
+ }
418
+ }