Safetensors
wav2vec2-bert
indiejoseph commited on
Commit
a17e9dc
·
verified ·
1 Parent(s): 05a0a95

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. added_tokens.json +2 -2
  2. config.json +6 -5
  3. model.safetensors +2 -2
  4. tokenizer_config.json +76 -1796
  5. vocab.json +72 -287
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 291,
3
- "<s>": 290
4
  }
 
1
  {
2
+ "</s>": 76,
3
+ "<s>": 75
4
  }
config.json CHANGED
@@ -7,9 +7,9 @@
7
  "add_adapter": true,
8
  "apply_spec_augment": false,
9
  "architectures": [
10
- "Wav2Vec2BertForCTC"
11
  ],
12
- "attention_dropout": 0.0,
13
  "bos_token_id": 1,
14
  "classifier_proj_size": 768,
15
  "codevector_dim": 768,
@@ -25,7 +25,7 @@
25
  "feature_projection_input_dim": 160,
26
  "final_dropout": 0.1,
27
  "hidden_act": "swish",
28
- "hidden_dropout": 0.0,
29
  "hidden_size": 1024,
30
  "initializer_range": 0.02,
31
  "intermediate_size": 4096,
@@ -73,10 +73,11 @@
73
  1,
74
  1
75
  ],
 
76
  "torch_dtype": "float32",
77
- "transformers_version": "4.45.0.dev0",
78
  "use_intermediate_ffn_before_adapter": false,
79
  "use_weighted_layer_sum": false,
80
- "vocab_size": 292,
81
  "xvector_output_dim": 512
82
  }
 
7
  "add_adapter": true,
8
  "apply_spec_augment": false,
9
  "architectures": [
10
+ "Wav2Vec2BertForCantonese"
11
  ],
12
+ "attention_dropout": 0.2,
13
  "bos_token_id": 1,
14
  "classifier_proj_size": 768,
15
  "codevector_dim": 768,
 
25
  "feature_projection_input_dim": 160,
26
  "final_dropout": 0.1,
27
  "hidden_act": "swish",
28
+ "hidden_dropout": 0.2,
29
  "hidden_size": 1024,
30
  "initializer_range": 0.02,
31
  "intermediate_size": 4096,
 
73
  1,
74
  1
75
  ],
76
+ "tone_vocab_size": 11,
77
  "torch_dtype": "float32",
78
+ "transformers_version": "4.46.0",
79
  "use_intermediate_ffn_before_adapter": false,
80
  "use_weighted_layer_sum": false,
81
+ "vocab_size": 77,
82
  "xvector_output_dim": 512
83
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbce4d6dfcd28928a3c2fc3b1bee90a6c879ac5927be8dd26d4875fef62e7052
3
- size 2424011768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3644f073720a83f1f6d62e8f540c41143590e160d6e76b60bbef55cbc9ab9527
3
+ size 2423167328
tokenizer_config.json CHANGED
@@ -25,7 +25,7 @@
25
  "special": false
26
  },
27
  "3": {
28
- "content": "aa1",
29
  "lstrip": false,
30
  "normalized": true,
31
  "rstrip": false,
@@ -33,7 +33,7 @@
33
  "special": false
34
  },
35
  "4": {
36
- "content": "aa2",
37
  "lstrip": false,
38
  "normalized": true,
39
  "rstrip": false,
@@ -41,7 +41,7 @@
41
  "special": false
42
  },
43
  "5": {
44
- "content": "aa3",
45
  "lstrip": false,
46
  "normalized": true,
47
  "rstrip": false,
@@ -49,7 +49,7 @@
49
  "special": false
50
  },
51
  "6": {
52
- "content": "aa4",
53
  "lstrip": false,
54
  "normalized": true,
55
  "rstrip": false,
@@ -57,7 +57,7 @@
57
  "special": false
58
  },
59
  "7": {
60
- "content": "aa5",
61
  "lstrip": false,
62
  "normalized": true,
63
  "rstrip": false,
@@ -65,7 +65,7 @@
65
  "special": false
66
  },
67
  "8": {
68
- "content": "aa6",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
@@ -73,7 +73,7 @@
73
  "special": false
74
  },
75
  "9": {
76
- "content": "aai1",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
@@ -81,7 +81,7 @@
81
  "special": false
82
  },
83
  "10": {
84
- "content": "aai2",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": false
90
  },
91
  "11": {
92
- "content": "aai3",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
@@ -97,7 +97,7 @@
97
  "special": false
98
  },
99
  "12": {
100
- "content": "aai4",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": false
106
  },
107
  "13": {
108
- "content": "aai5",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  "special": false
114
  },
115
  "14": {
116
- "content": "aai6",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
@@ -121,7 +121,7 @@
121
  "special": false
122
  },
123
  "15": {
124
- "content": "aak1",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
@@ -129,7 +129,7 @@
129
  "special": false
130
  },
131
  "16": {
132
- "content": "aak2",
133
  "lstrip": false,
134
  "normalized": true,
135
  "rstrip": false,
@@ -137,7 +137,7 @@
137
  "special": false
138
  },
139
  "17": {
140
- "content": "aak3",
141
  "lstrip": false,
142
  "normalized": true,
143
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  "special": false
146
  },
147
  "18": {
148
- "content": "aak6",
149
  "lstrip": false,
150
  "normalized": true,
151
  "rstrip": false,
@@ -153,7 +153,7 @@
153
  "special": false
154
  },
155
  "19": {
156
- "content": "aam1",
157
  "lstrip": false,
158
  "normalized": true,
159
  "rstrip": false,
@@ -161,7 +161,7 @@
161
  "special": false
162
  },
163
  "20": {
164
- "content": "aam2",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
@@ -169,7 +169,7 @@
169
  "special": false
170
  },
171
  "21": {
172
- "content": "aam3",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
@@ -177,7 +177,7 @@
177
  "special": false
178
  },
179
  "22": {
180
- "content": "aam4",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
@@ -185,7 +185,7 @@
185
  "special": false
186
  },
187
  "23": {
188
- "content": "aam5",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
@@ -193,7 +193,7 @@
193
  "special": false
194
  },
195
  "24": {
196
- "content": "aam6",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
@@ -201,7 +201,7 @@
201
  "special": false
202
  },
203
  "25": {
204
- "content": "aan1",
205
  "lstrip": false,
206
  "normalized": true,
207
  "rstrip": false,
@@ -209,7 +209,7 @@
209
  "special": false
210
  },
211
  "26": {
212
- "content": "aan2",
213
  "lstrip": false,
214
  "normalized": true,
215
  "rstrip": false,
@@ -217,7 +217,7 @@
217
  "special": false
218
  },
219
  "27": {
220
- "content": "aan3",
221
  "lstrip": false,
222
  "normalized": true,
223
  "rstrip": false,
@@ -225,7 +225,7 @@
225
  "special": false
226
  },
227
  "28": {
228
- "content": "aan4",
229
  "lstrip": false,
230
  "normalized": true,
231
  "rstrip": false,
@@ -233,7 +233,7 @@
233
  "special": false
234
  },
235
  "29": {
236
- "content": "aan5",
237
  "lstrip": false,
238
  "normalized": true,
239
  "rstrip": false,
@@ -241,7 +241,7 @@
241
  "special": false
242
  },
243
  "30": {
244
- "content": "aan6",
245
  "lstrip": false,
246
  "normalized": true,
247
  "rstrip": false,
@@ -249,7 +249,7 @@
249
  "special": false
250
  },
251
  "31": {
252
- "content": "aang1",
253
  "lstrip": false,
254
  "normalized": true,
255
  "rstrip": false,
@@ -257,7 +257,7 @@
257
  "special": false
258
  },
259
  "32": {
260
- "content": "aang2",
261
  "lstrip": false,
262
  "normalized": true,
263
  "rstrip": false,
@@ -265,7 +265,7 @@
265
  "special": false
266
  },
267
  "33": {
268
- "content": "aang3",
269
  "lstrip": false,
270
  "normalized": true,
271
  "rstrip": false,
@@ -273,7 +273,7 @@
273
  "special": false
274
  },
275
  "34": {
276
- "content": "aang4",
277
  "lstrip": false,
278
  "normalized": true,
279
  "rstrip": false,
@@ -281,7 +281,7 @@
281
  "special": false
282
  },
283
  "35": {
284
- "content": "aang5",
285
  "lstrip": false,
286
  "normalized": true,
287
  "rstrip": false,
@@ -289,7 +289,7 @@
289
  "special": false
290
  },
291
  "36": {
292
- "content": "aang6",
293
  "lstrip": false,
294
  "normalized": true,
295
  "rstrip": false,
@@ -297,7 +297,7 @@
297
  "special": false
298
  },
299
  "37": {
300
- "content": "aap1",
301
  "lstrip": false,
302
  "normalized": true,
303
  "rstrip": false,
@@ -305,7 +305,7 @@
305
  "special": false
306
  },
307
  "38": {
308
- "content": "aap2",
309
  "lstrip": false,
310
  "normalized": true,
311
  "rstrip": false,
@@ -313,7 +313,7 @@
313
  "special": false
314
  },
315
  "39": {
316
- "content": "aap3",
317
  "lstrip": false,
318
  "normalized": true,
319
  "rstrip": false,
@@ -321,7 +321,7 @@
321
  "special": false
322
  },
323
  "40": {
324
- "content": "aap6",
325
  "lstrip": false,
326
  "normalized": true,
327
  "rstrip": false,
@@ -329,7 +329,7 @@
329
  "special": false
330
  },
331
  "41": {
332
- "content": "aat1",
333
  "lstrip": false,
334
  "normalized": true,
335
  "rstrip": false,
@@ -337,7 +337,7 @@
337
  "special": false
338
  },
339
  "42": {
340
- "content": "aat2",
341
  "lstrip": false,
342
  "normalized": true,
343
  "rstrip": false,
@@ -345,7 +345,7 @@
345
  "special": false
346
  },
347
  "43": {
348
- "content": "aat3",
349
  "lstrip": false,
350
  "normalized": true,
351
  "rstrip": false,
@@ -353,7 +353,7 @@
353
  "special": false
354
  },
355
  "44": {
356
- "content": "aat6",
357
  "lstrip": false,
358
  "normalized": true,
359
  "rstrip": false,
@@ -361,7 +361,7 @@
361
  "special": false
362
  },
363
  "45": {
364
- "content": "aau1",
365
  "lstrip": false,
366
  "normalized": true,
367
  "rstrip": false,
@@ -369,7 +369,7 @@
369
  "special": false
370
  },
371
  "46": {
372
- "content": "aau2",
373
  "lstrip": false,
374
  "normalized": true,
375
  "rstrip": false,
@@ -377,7 +377,7 @@
377
  "special": false
378
  },
379
  "47": {
380
- "content": "aau3",
381
  "lstrip": false,
382
  "normalized": true,
383
  "rstrip": false,
@@ -385,7 +385,7 @@
385
  "special": false
386
  },
387
  "48": {
388
- "content": "aau4",
389
  "lstrip": false,
390
  "normalized": true,
391
  "rstrip": false,
@@ -393,7 +393,7 @@
393
  "special": false
394
  },
395
  "49": {
396
- "content": "aau5",
397
  "lstrip": false,
398
  "normalized": true,
399
  "rstrip": false,
@@ -401,7 +401,7 @@
401
  "special": false
402
  },
403
  "50": {
404
- "content": "aau6",
405
  "lstrip": false,
406
  "normalized": true,
407
  "rstrip": false,
@@ -409,7 +409,7 @@
409
  "special": false
410
  },
411
  "51": {
412
- "content": "ai1",
413
  "lstrip": false,
414
  "normalized": true,
415
  "rstrip": false,
@@ -417,7 +417,7 @@
417
  "special": false
418
  },
419
  "52": {
420
- "content": "ai2",
421
  "lstrip": false,
422
  "normalized": true,
423
  "rstrip": false,
@@ -425,7 +425,7 @@
425
  "special": false
426
  },
427
  "53": {
428
- "content": "ai3",
429
  "lstrip": false,
430
  "normalized": true,
431
  "rstrip": false,
@@ -433,7 +433,7 @@
433
  "special": false
434
  },
435
  "54": {
436
- "content": "ai4",
437
  "lstrip": false,
438
  "normalized": true,
439
  "rstrip": false,
@@ -441,7 +441,7 @@
441
  "special": false
442
  },
443
  "55": {
444
- "content": "ai5",
445
  "lstrip": false,
446
  "normalized": true,
447
  "rstrip": false,
@@ -449,7 +449,7 @@
449
  "special": false
450
  },
451
  "56": {
452
- "content": "ai6",
453
  "lstrip": false,
454
  "normalized": true,
455
  "rstrip": false,
@@ -457,7 +457,7 @@
457
  "special": false
458
  },
459
  "57": {
460
- "content": "ak1",
461
  "lstrip": false,
462
  "normalized": true,
463
  "rstrip": false,
@@ -465,7 +465,7 @@
465
  "special": false
466
  },
467
  "58": {
468
- "content": "ak2",
469
  "lstrip": false,
470
  "normalized": true,
471
  "rstrip": false,
@@ -473,7 +473,7 @@
473
  "special": false
474
  },
475
  "59": {
476
- "content": "ak6",
477
  "lstrip": false,
478
  "normalized": true,
479
  "rstrip": false,
@@ -481,7 +481,7 @@
481
  "special": false
482
  },
483
  "60": {
484
- "content": "am1",
485
  "lstrip": false,
486
  "normalized": true,
487
  "rstrip": false,
@@ -489,7 +489,7 @@
489
  "special": false
490
  },
491
  "61": {
492
- "content": "am2",
493
  "lstrip": false,
494
  "normalized": true,
495
  "rstrip": false,
@@ -497,7 +497,7 @@
497
  "special": false
498
  },
499
  "62": {
500
- "content": "am3",
501
  "lstrip": false,
502
  "normalized": true,
503
  "rstrip": false,
@@ -505,7 +505,7 @@
505
  "special": false
506
  },
507
  "63": {
508
- "content": "am4",
509
  "lstrip": false,
510
  "normalized": true,
511
  "rstrip": false,
@@ -513,7 +513,7 @@
513
  "special": false
514
  },
515
  "64": {
516
- "content": "am5",
517
  "lstrip": false,
518
  "normalized": true,
519
  "rstrip": false,
@@ -521,7 +521,7 @@
521
  "special": false
522
  },
523
  "65": {
524
- "content": "am6",
525
  "lstrip": false,
526
  "normalized": true,
527
  "rstrip": false,
@@ -529,7 +529,7 @@
529
  "special": false
530
  },
531
  "66": {
532
- "content": "an1",
533
  "lstrip": false,
534
  "normalized": true,
535
  "rstrip": false,
@@ -537,7 +537,7 @@
537
  "special": false
538
  },
539
  "67": {
540
- "content": "an2",
541
  "lstrip": false,
542
  "normalized": true,
543
  "rstrip": false,
@@ -545,7 +545,7 @@
545
  "special": false
546
  },
547
  "68": {
548
- "content": "an3",
549
  "lstrip": false,
550
  "normalized": true,
551
  "rstrip": false,
@@ -553,7 +553,7 @@
553
  "special": false
554
  },
555
  "69": {
556
- "content": "an4",
557
  "lstrip": false,
558
  "normalized": true,
559
  "rstrip": false,
@@ -561,7 +561,7 @@
561
  "special": false
562
  },
563
  "70": {
564
- "content": "an5",
565
  "lstrip": false,
566
  "normalized": true,
567
  "rstrip": false,
@@ -569,7 +569,7 @@
569
  "special": false
570
  },
571
  "71": {
572
- "content": "an6",
573
  "lstrip": false,
574
  "normalized": true,
575
  "rstrip": false,
@@ -577,7 +577,7 @@
577
  "special": false
578
  },
579
  "72": {
580
- "content": "ang1",
581
  "lstrip": false,
582
  "normalized": true,
583
  "rstrip": false,
@@ -585,7 +585,7 @@
585
  "special": false
586
  },
587
  "73": {
588
- "content": "ang2",
589
  "lstrip": false,
590
  "normalized": true,
591
  "rstrip": false,
@@ -593,7 +593,7 @@
593
  "special": false
594
  },
595
  "74": {
596
- "content": "ang3",
597
  "lstrip": false,
598
  "normalized": true,
599
  "rstrip": false,
@@ -601,1734 +601,14 @@
601
  "special": false
602
  },
603
  "75": {
604
- "content": "ang4",
605
  "lstrip": false,
606
- "normalized": true,
607
  "rstrip": false,
608
  "single_word": false,
609
- "special": false
610
  },
611
  "76": {
612
- "content": "ang6",
613
- "lstrip": false,
614
- "normalized": true,
615
- "rstrip": false,
616
- "single_word": false,
617
- "special": false
618
- },
619
- "77": {
620
- "content": "ap1",
621
- "lstrip": false,
622
- "normalized": true,
623
- "rstrip": false,
624
- "single_word": false,
625
- "special": false
626
- },
627
- "78": {
628
- "content": "ap2",
629
- "lstrip": false,
630
- "normalized": true,
631
- "rstrip": false,
632
- "single_word": false,
633
- "special": false
634
- },
635
- "79": {
636
- "content": "ap6",
637
- "lstrip": false,
638
- "normalized": true,
639
- "rstrip": false,
640
- "single_word": false,
641
- "special": false
642
- },
643
- "80": {
644
- "content": "at1",
645
- "lstrip": false,
646
- "normalized": true,
647
- "rstrip": false,
648
- "single_word": false,
649
- "special": false
650
- },
651
- "81": {
652
- "content": "at2",
653
- "lstrip": false,
654
- "normalized": true,
655
- "rstrip": false,
656
- "single_word": false,
657
- "special": false
658
- },
659
- "82": {
660
- "content": "at3",
661
- "lstrip": false,
662
- "normalized": true,
663
- "rstrip": false,
664
- "single_word": false,
665
- "special": false
666
- },
667
- "83": {
668
- "content": "at6",
669
- "lstrip": false,
670
- "normalized": true,
671
- "rstrip": false,
672
- "single_word": false,
673
- "special": false
674
- },
675
- "84": {
676
- "content": "au1",
677
- "lstrip": false,
678
- "normalized": true,
679
- "rstrip": false,
680
- "single_word": false,
681
- "special": false
682
- },
683
- "85": {
684
- "content": "au2",
685
- "lstrip": false,
686
- "normalized": true,
687
- "rstrip": false,
688
- "single_word": false,
689
- "special": false
690
- },
691
- "86": {
692
- "content": "au3",
693
- "lstrip": false,
694
- "normalized": true,
695
- "rstrip": false,
696
- "single_word": false,
697
- "special": false
698
- },
699
- "87": {
700
- "content": "au4",
701
- "lstrip": false,
702
- "normalized": true,
703
- "rstrip": false,
704
- "single_word": false,
705
- "special": false
706
- },
707
- "88": {
708
- "content": "au5",
709
- "lstrip": false,
710
- "normalized": true,
711
- "rstrip": false,
712
- "single_word": false,
713
- "special": false
714
- },
715
- "89": {
716
- "content": "au6",
717
- "lstrip": false,
718
- "normalized": true,
719
- "rstrip": false,
720
- "single_word": false,
721
- "special": false
722
- },
723
- "90": {
724
- "content": "b",
725
- "lstrip": false,
726
- "normalized": true,
727
- "rstrip": false,
728
- "single_word": false,
729
- "special": false
730
- },
731
- "91": {
732
- "content": "c",
733
- "lstrip": false,
734
- "normalized": true,
735
- "rstrip": false,
736
- "single_word": false,
737
- "special": false
738
- },
739
- "92": {
740
- "content": "d",
741
- "lstrip": false,
742
- "normalized": true,
743
- "rstrip": false,
744
- "single_word": false,
745
- "special": false
746
- },
747
- "93": {
748
- "content": "e1",
749
- "lstrip": false,
750
- "normalized": true,
751
- "rstrip": false,
752
- "single_word": false,
753
- "special": false
754
- },
755
- "94": {
756
- "content": "e2",
757
- "lstrip": false,
758
- "normalized": true,
759
- "rstrip": false,
760
- "single_word": false,
761
- "special": false
762
- },
763
- "95": {
764
- "content": "e3",
765
- "lstrip": false,
766
- "normalized": true,
767
- "rstrip": false,
768
- "single_word": false,
769
- "special": false
770
- },
771
- "96": {
772
- "content": "e4",
773
- "lstrip": false,
774
- "normalized": true,
775
- "rstrip": false,
776
- "single_word": false,
777
- "special": false
778
- },
779
- "97": {
780
- "content": "e5",
781
- "lstrip": false,
782
- "normalized": true,
783
- "rstrip": false,
784
- "single_word": false,
785
- "special": false
786
- },
787
- "98": {
788
- "content": "e6",
789
- "lstrip": false,
790
- "normalized": true,
791
- "rstrip": false,
792
- "single_word": false,
793
- "special": false
794
- },
795
- "99": {
796
- "content": "ei1",
797
- "lstrip": false,
798
- "normalized": true,
799
- "rstrip": false,
800
- "single_word": false,
801
- "special": false
802
- },
803
- "100": {
804
- "content": "ei2",
805
- "lstrip": false,
806
- "normalized": true,
807
- "rstrip": false,
808
- "single_word": false,
809
- "special": false
810
- },
811
- "101": {
812
- "content": "ei3",
813
- "lstrip": false,
814
- "normalized": true,
815
- "rstrip": false,
816
- "single_word": false,
817
- "special": false
818
- },
819
- "102": {
820
- "content": "ei4",
821
- "lstrip": false,
822
- "normalized": true,
823
- "rstrip": false,
824
- "single_word": false,
825
- "special": false
826
- },
827
- "103": {
828
- "content": "ei5",
829
- "lstrip": false,
830
- "normalized": true,
831
- "rstrip": false,
832
- "single_word": false,
833
- "special": false
834
- },
835
- "104": {
836
- "content": "ei6",
837
- "lstrip": false,
838
- "normalized": true,
839
- "rstrip": false,
840
- "single_word": false,
841
- "special": false
842
- },
843
- "105": {
844
- "content": "ek1",
845
- "lstrip": false,
846
- "normalized": true,
847
- "rstrip": false,
848
- "single_word": false,
849
- "special": false
850
- },
851
- "106": {
852
- "content": "ek2",
853
- "lstrip": false,
854
- "normalized": true,
855
- "rstrip": false,
856
- "single_word": false,
857
- "special": false
858
- },
859
- "107": {
860
- "content": "ek3",
861
- "lstrip": false,
862
- "normalized": true,
863
- "rstrip": false,
864
- "single_word": false,
865
- "special": false
866
- },
867
- "108": {
868
- "content": "ek6",
869
- "lstrip": false,
870
- "normalized": true,
871
- "rstrip": false,
872
- "single_word": false,
873
- "special": false
874
- },
875
- "109": {
876
- "content": "eng1",
877
- "lstrip": false,
878
- "normalized": true,
879
- "rstrip": false,
880
- "single_word": false,
881
- "special": false
882
- },
883
- "110": {
884
- "content": "eng2",
885
- "lstrip": false,
886
- "normalized": true,
887
- "rstrip": false,
888
- "single_word": false,
889
- "special": false
890
- },
891
- "111": {
892
- "content": "eng3",
893
- "lstrip": false,
894
- "normalized": true,
895
- "rstrip": false,
896
- "single_word": false,
897
- "special": false
898
- },
899
- "112": {
900
- "content": "eng4",
901
- "lstrip": false,
902
- "normalized": true,
903
- "rstrip": false,
904
- "single_word": false,
905
- "special": false
906
- },
907
- "113": {
908
- "content": "eng5",
909
- "lstrip": false,
910
- "normalized": true,
911
- "rstrip": false,
912
- "single_word": false,
913
- "special": false
914
- },
915
- "114": {
916
- "content": "eng6",
917
- "lstrip": false,
918
- "normalized": true,
919
- "rstrip": false,
920
- "single_word": false,
921
- "special": false
922
- },
923
- "115": {
924
- "content": "eoi1",
925
- "lstrip": false,
926
- "normalized": true,
927
- "rstrip": false,
928
- "single_word": false,
929
- "special": false
930
- },
931
- "116": {
932
- "content": "eoi2",
933
- "lstrip": false,
934
- "normalized": true,
935
- "rstrip": false,
936
- "single_word": false,
937
- "special": false
938
- },
939
- "117": {
940
- "content": "eoi3",
941
- "lstrip": false,
942
- "normalized": true,
943
- "rstrip": false,
944
- "single_word": false,
945
- "special": false
946
- },
947
- "118": {
948
- "content": "eoi4",
949
- "lstrip": false,
950
- "normalized": true,
951
- "rstrip": false,
952
- "single_word": false,
953
- "special": false
954
- },
955
- "119": {
956
- "content": "eoi5",
957
- "lstrip": false,
958
- "normalized": true,
959
- "rstrip": false,
960
- "single_word": false,
961
- "special": false
962
- },
963
- "120": {
964
- "content": "eoi6",
965
- "lstrip": false,
966
- "normalized": true,
967
- "rstrip": false,
968
- "single_word": false,
969
- "special": false
970
- },
971
- "121": {
972
- "content": "eon1",
973
- "lstrip": false,
974
- "normalized": true,
975
- "rstrip": false,
976
- "single_word": false,
977
- "special": false
978
- },
979
- "122": {
980
- "content": "eon2",
981
- "lstrip": false,
982
- "normalized": true,
983
- "rstrip": false,
984
- "single_word": false,
985
- "special": false
986
- },
987
- "123": {
988
- "content": "eon3",
989
- "lstrip": false,
990
- "normalized": true,
991
- "rstrip": false,
992
- "single_word": false,
993
- "special": false
994
- },
995
- "124": {
996
- "content": "eon4",
997
- "lstrip": false,
998
- "normalized": true,
999
- "rstrip": false,
1000
- "single_word": false,
1001
- "special": false
1002
- },
1003
- "125": {
1004
- "content": "eon5",
1005
- "lstrip": false,
1006
- "normalized": true,
1007
- "rstrip": false,
1008
- "single_word": false,
1009
- "special": false
1010
- },
1011
- "126": {
1012
- "content": "eon6",
1013
- "lstrip": false,
1014
- "normalized": true,
1015
- "rstrip": false,
1016
- "single_word": false,
1017
- "special": false
1018
- },
1019
- "127": {
1020
- "content": "eot1",
1021
- "lstrip": false,
1022
- "normalized": true,
1023
- "rstrip": false,
1024
- "single_word": false,
1025
- "special": false
1026
- },
1027
- "128": {
1028
- "content": "eot2",
1029
- "lstrip": false,
1030
- "normalized": true,
1031
- "rstrip": false,
1032
- "single_word": false,
1033
- "special": false
1034
- },
1035
- "129": {
1036
- "content": "eot6",
1037
- "lstrip": false,
1038
- "normalized": true,
1039
- "rstrip": false,
1040
- "single_word": false,
1041
- "special": false
1042
- },
1043
- "130": {
1044
- "content": "ep6",
1045
- "lstrip": false,
1046
- "normalized": true,
1047
- "rstrip": false,
1048
- "single_word": false,
1049
- "special": false
1050
- },
1051
- "131": {
1052
- "content": "eu6",
1053
- "lstrip": false,
1054
- "normalized": true,
1055
- "rstrip": false,
1056
- "single_word": false,
1057
- "special": false
1058
- },
1059
- "132": {
1060
- "content": "f",
1061
- "lstrip": false,
1062
- "normalized": true,
1063
- "rstrip": false,
1064
- "single_word": false,
1065
- "special": false
1066
- },
1067
- "133": {
1068
- "content": "g",
1069
- "lstrip": false,
1070
- "normalized": true,
1071
- "rstrip": false,
1072
- "single_word": false,
1073
- "special": false
1074
- },
1075
- "134": {
1076
- "content": "gw",
1077
- "lstrip": false,
1078
- "normalized": true,
1079
- "rstrip": false,
1080
- "single_word": false,
1081
- "special": false
1082
- },
1083
- "135": {
1084
- "content": "h",
1085
- "lstrip": false,
1086
- "normalized": true,
1087
- "rstrip": false,
1088
- "single_word": false,
1089
- "special": false
1090
- },
1091
- "136": {
1092
- "content": "i1",
1093
- "lstrip": false,
1094
- "normalized": true,
1095
- "rstrip": false,
1096
- "single_word": false,
1097
- "special": false
1098
- },
1099
- "137": {
1100
- "content": "i2",
1101
- "lstrip": false,
1102
- "normalized": true,
1103
- "rstrip": false,
1104
- "single_word": false,
1105
- "special": false
1106
- },
1107
- "138": {
1108
- "content": "i3",
1109
- "lstrip": false,
1110
- "normalized": true,
1111
- "rstrip": false,
1112
- "single_word": false,
1113
- "special": false
1114
- },
1115
- "139": {
1116
- "content": "i4",
1117
- "lstrip": false,
1118
- "normalized": true,
1119
- "rstrip": false,
1120
- "single_word": false,
1121
- "special": false
1122
- },
1123
- "140": {
1124
- "content": "i5",
1125
- "lstrip": false,
1126
- "normalized": true,
1127
- "rstrip": false,
1128
- "single_word": false,
1129
- "special": false
1130
- },
1131
- "141": {
1132
- "content": "i6",
1133
- "lstrip": false,
1134
- "normalized": true,
1135
- "rstrip": false,
1136
- "single_word": false,
1137
- "special": false
1138
- },
1139
- "142": {
1140
- "content": "ik1",
1141
- "lstrip": false,
1142
- "normalized": true,
1143
- "rstrip": false,
1144
- "single_word": false,
1145
- "special": false
1146
- },
1147
- "143": {
1148
- "content": "ik3",
1149
- "lstrip": false,
1150
- "normalized": true,
1151
- "rstrip": false,
1152
- "single_word": false,
1153
- "special": false
1154
- },
1155
- "144": {
1156
- "content": "ik4",
1157
- "lstrip": false,
1158
- "normalized": true,
1159
- "rstrip": false,
1160
- "single_word": false,
1161
- "special": false
1162
- },
1163
- "145": {
1164
- "content": "ik6",
1165
- "lstrip": false,
1166
- "normalized": true,
1167
- "rstrip": false,
1168
- "single_word": false,
1169
- "special": false
1170
- },
1171
- "146": {
1172
- "content": "im1",
1173
- "lstrip": false,
1174
- "normalized": true,
1175
- "rstrip": false,
1176
- "single_word": false,
1177
- "special": false
1178
- },
1179
- "147": {
1180
- "content": "im2",
1181
- "lstrip": false,
1182
- "normalized": true,
1183
- "rstrip": false,
1184
- "single_word": false,
1185
- "special": false
1186
- },
1187
- "148": {
1188
- "content": "im3",
1189
- "lstrip": false,
1190
- "normalized": true,
1191
- "rstrip": false,
1192
- "single_word": false,
1193
- "special": false
1194
- },
1195
- "149": {
1196
- "content": "im4",
1197
- "lstrip": false,
1198
- "normalized": true,
1199
- "rstrip": false,
1200
- "single_word": false,
1201
- "special": false
1202
- },
1203
- "150": {
1204
- "content": "im5",
1205
- "lstrip": false,
1206
- "normalized": true,
1207
- "rstrip": false,
1208
- "single_word": false,
1209
- "special": false
1210
- },
1211
- "151": {
1212
- "content": "im6",
1213
- "lstrip": false,
1214
- "normalized": true,
1215
- "rstrip": false,
1216
- "single_word": false,
1217
- "special": false
1218
- },
1219
- "152": {
1220
- "content": "in1",
1221
- "lstrip": false,
1222
- "normalized": true,
1223
- "rstrip": false,
1224
- "single_word": false,
1225
- "special": false
1226
- },
1227
- "153": {
1228
- "content": "in2",
1229
- "lstrip": false,
1230
- "normalized": true,
1231
- "rstrip": false,
1232
- "single_word": false,
1233
- "special": false
1234
- },
1235
- "154": {
1236
- "content": "in3",
1237
- "lstrip": false,
1238
- "normalized": true,
1239
- "rstrip": false,
1240
- "single_word": false,
1241
- "special": false
1242
- },
1243
- "155": {
1244
- "content": "in4",
1245
- "lstrip": false,
1246
- "normalized": true,
1247
- "rstrip": false,
1248
- "single_word": false,
1249
- "special": false
1250
- },
1251
- "156": {
1252
- "content": "in5",
1253
- "lstrip": false,
1254
- "normalized": true,
1255
- "rstrip": false,
1256
- "single_word": false,
1257
- "special": false
1258
- },
1259
- "157": {
1260
- "content": "in6",
1261
- "lstrip": false,
1262
- "normalized": true,
1263
- "rstrip": false,
1264
- "single_word": false,
1265
- "special": false
1266
- },
1267
- "158": {
1268
- "content": "ing1",
1269
- "lstrip": false,
1270
- "normalized": true,
1271
- "rstrip": false,
1272
- "single_word": false,
1273
- "special": false
1274
- },
1275
- "159": {
1276
- "content": "ing2",
1277
- "lstrip": false,
1278
- "normalized": true,
1279
- "rstrip": false,
1280
- "single_word": false,
1281
- "special": false
1282
- },
1283
- "160": {
1284
- "content": "ing3",
1285
- "lstrip": false,
1286
- "normalized": true,
1287
- "rstrip": false,
1288
- "single_word": false,
1289
- "special": false
1290
- },
1291
- "161": {
1292
- "content": "ing4",
1293
- "lstrip": false,
1294
- "normalized": true,
1295
- "rstrip": false,
1296
- "single_word": false,
1297
- "special": false
1298
- },
1299
- "162": {
1300
- "content": "ing5",
1301
- "lstrip": false,
1302
- "normalized": true,
1303
- "rstrip": false,
1304
- "single_word": false,
1305
- "special": false
1306
- },
1307
- "163": {
1308
- "content": "ing6",
1309
- "lstrip": false,
1310
- "normalized": true,
1311
- "rstrip": false,
1312
- "single_word": false,
1313
- "special": false
1314
- },
1315
- "164": {
1316
- "content": "ip1",
1317
- "lstrip": false,
1318
- "normalized": true,
1319
- "rstrip": false,
1320
- "single_word": false,
1321
- "special": false
1322
- },
1323
- "165": {
1324
- "content": "ip2",
1325
- "lstrip": false,
1326
- "normalized": true,
1327
- "rstrip": false,
1328
- "single_word": false,
1329
- "special": false
1330
- },
1331
- "166": {
1332
- "content": "ip3",
1333
- "lstrip": false,
1334
- "normalized": true,
1335
- "rstrip": false,
1336
- "single_word": false,
1337
- "special": false
1338
- },
1339
- "167": {
1340
- "content": "ip6",
1341
- "lstrip": false,
1342
- "normalized": true,
1343
- "rstrip": false,
1344
- "single_word": false,
1345
- "special": false
1346
- },
1347
- "168": {
1348
- "content": "it1",
1349
- "lstrip": false,
1350
- "normalized": true,
1351
- "rstrip": false,
1352
- "single_word": false,
1353
- "special": false
1354
- },
1355
- "169": {
1356
- "content": "it3",
1357
- "lstrip": false,
1358
- "normalized": true,
1359
- "rstrip": false,
1360
- "single_word": false,
1361
- "special": false
1362
- },
1363
- "170": {
1364
- "content": "it6",
1365
- "lstrip": false,
1366
- "normalized": true,
1367
- "rstrip": false,
1368
- "single_word": false,
1369
- "special": false
1370
- },
1371
- "171": {
1372
- "content": "iu1",
1373
- "lstrip": false,
1374
- "normalized": true,
1375
- "rstrip": false,
1376
- "single_word": false,
1377
- "special": false
1378
- },
1379
- "172": {
1380
- "content": "iu2",
1381
- "lstrip": false,
1382
- "normalized": true,
1383
- "rstrip": false,
1384
- "single_word": false,
1385
- "special": false
1386
- },
1387
- "173": {
1388
- "content": "iu3",
1389
- "lstrip": false,
1390
- "normalized": true,
1391
- "rstrip": false,
1392
- "single_word": false,
1393
- "special": false
1394
- },
1395
- "174": {
1396
- "content": "iu4",
1397
- "lstrip": false,
1398
- "normalized": true,
1399
- "rstrip": false,
1400
- "single_word": false,
1401
- "special": false
1402
- },
1403
- "175": {
1404
- "content": "iu5",
1405
- "lstrip": false,
1406
- "normalized": true,
1407
- "rstrip": false,
1408
- "single_word": false,
1409
- "special": false
1410
- },
1411
- "176": {
1412
- "content": "iu6",
1413
- "lstrip": false,
1414
- "normalized": true,
1415
- "rstrip": false,
1416
- "single_word": false,
1417
- "special": false
1418
- },
1419
- "177": {
1420
- "content": "j",
1421
- "lstrip": false,
1422
- "normalized": true,
1423
- "rstrip": false,
1424
- "single_word": false,
1425
- "special": false
1426
- },
1427
- "178": {
1428
- "content": "k",
1429
- "lstrip": false,
1430
- "normalized": true,
1431
- "rstrip": false,
1432
- "single_word": false,
1433
- "special": false
1434
- },
1435
- "179": {
1436
- "content": "kw",
1437
- "lstrip": false,
1438
- "normalized": true,
1439
- "rstrip": false,
1440
- "single_word": false,
1441
- "special": false
1442
- },
1443
- "180": {
1444
- "content": "l",
1445
- "lstrip": false,
1446
- "normalized": true,
1447
- "rstrip": false,
1448
- "single_word": false,
1449
- "special": false
1450
- },
1451
- "181": {
1452
- "content": "m",
1453
- "lstrip": false,
1454
- "normalized": true,
1455
- "rstrip": false,
1456
- "single_word": false,
1457
- "special": false
1458
- },
1459
- "182": {
1460
- "content": "m2",
1461
- "lstrip": false,
1462
- "normalized": true,
1463
- "rstrip": false,
1464
- "single_word": false,
1465
- "special": false
1466
- },
1467
- "183": {
1468
- "content": "m4",
1469
- "lstrip": false,
1470
- "normalized": true,
1471
- "rstrip": false,
1472
- "single_word": false,
1473
- "special": false
1474
- },
1475
- "184": {
1476
- "content": "m6",
1477
- "lstrip": false,
1478
- "normalized": true,
1479
- "rstrip": false,
1480
- "single_word": false,
1481
- "special": false
1482
- },
1483
- "185": {
1484
- "content": "n",
1485
- "lstrip": false,
1486
- "normalized": true,
1487
- "rstrip": false,
1488
- "single_word": false,
1489
- "special": false
1490
- },
1491
- "186": {
1492
- "content": "ng",
1493
- "lstrip": false,
1494
- "normalized": true,
1495
- "rstrip": false,
1496
- "single_word": false,
1497
- "special": false
1498
- },
1499
- "187": {
1500
- "content": "ng4",
1501
- "lstrip": false,
1502
- "normalized": true,
1503
- "rstrip": false,
1504
- "single_word": false,
1505
- "special": false
1506
- },
1507
- "188": {
1508
- "content": "ng5",
1509
- "lstrip": false,
1510
- "normalized": true,
1511
- "rstrip": false,
1512
- "single_word": false,
1513
- "special": false
1514
- },
1515
- "189": {
1516
- "content": "ng6",
1517
- "lstrip": false,
1518
- "normalized": true,
1519
- "rstrip": false,
1520
- "single_word": false,
1521
- "special": false
1522
- },
1523
- "190": {
1524
- "content": "o1",
1525
- "lstrip": false,
1526
- "normalized": true,
1527
- "rstrip": false,
1528
- "single_word": false,
1529
- "special": false
1530
- },
1531
- "191": {
1532
- "content": "o2",
1533
- "lstrip": false,
1534
- "normalized": true,
1535
- "rstrip": false,
1536
- "single_word": false,
1537
- "special": false
1538
- },
1539
- "192": {
1540
- "content": "o3",
1541
- "lstrip": false,
1542
- "normalized": true,
1543
- "rstrip": false,
1544
- "single_word": false,
1545
- "special": false
1546
- },
1547
- "193": {
1548
- "content": "o4",
1549
- "lstrip": false,
1550
- "normalized": true,
1551
- "rstrip": false,
1552
- "single_word": false,
1553
- "special": false
1554
- },
1555
- "194": {
1556
- "content": "o5",
1557
- "lstrip": false,
1558
- "normalized": true,
1559
- "rstrip": false,
1560
- "single_word": false,
1561
- "special": false
1562
- },
1563
- "195": {
1564
- "content": "o6",
1565
- "lstrip": false,
1566
- "normalized": true,
1567
- "rstrip": false,
1568
- "single_word": false,
1569
- "special": false
1570
- },
1571
- "196": {
1572
- "content": "oe1",
1573
- "lstrip": false,
1574
- "normalized": true,
1575
- "rstrip": false,
1576
- "single_word": false,
1577
- "special": false
1578
- },
1579
- "197": {
1580
- "content": "oe2",
1581
- "lstrip": false,
1582
- "normalized": true,
1583
- "rstrip": false,
1584
- "single_word": false,
1585
- "special": false
1586
- },
1587
- "198": {
1588
- "content": "oe3",
1589
- "lstrip": false,
1590
- "normalized": true,
1591
- "rstrip": false,
1592
- "single_word": false,
1593
- "special": false
1594
- },
1595
- "199": {
1596
- "content": "oe4",
1597
- "lstrip": false,
1598
- "normalized": true,
1599
- "rstrip": false,
1600
- "single_word": false,
1601
- "special": false
1602
- },
1603
- "200": {
1604
- "content": "oek2",
1605
- "lstrip": false,
1606
- "normalized": true,
1607
- "rstrip": false,
1608
- "single_word": false,
1609
- "special": false
1610
- },
1611
- "201": {
1612
- "content": "oek3",
1613
- "lstrip": false,
1614
- "normalized": true,
1615
- "rstrip": false,
1616
- "single_word": false,
1617
- "special": false
1618
- },
1619
- "202": {
1620
- "content": "oek6",
1621
- "lstrip": false,
1622
- "normalized": true,
1623
- "rstrip": false,
1624
- "single_word": false,
1625
- "special": false
1626
- },
1627
- "203": {
1628
- "content": "oeng1",
1629
- "lstrip": false,
1630
- "normalized": true,
1631
- "rstrip": false,
1632
- "single_word": false,
1633
- "special": false
1634
- },
1635
- "204": {
1636
- "content": "oeng2",
1637
- "lstrip": false,
1638
- "normalized": true,
1639
- "rstrip": false,
1640
- "single_word": false,
1641
- "special": false
1642
- },
1643
- "205": {
1644
- "content": "oeng3",
1645
- "lstrip": false,
1646
- "normalized": true,
1647
- "rstrip": false,
1648
- "single_word": false,
1649
- "special": false
1650
- },
1651
- "206": {
1652
- "content": "oeng4",
1653
- "lstrip": false,
1654
- "normalized": true,
1655
- "rstrip": false,
1656
- "single_word": false,
1657
- "special": false
1658
- },
1659
- "207": {
1660
- "content": "oeng5",
1661
- "lstrip": false,
1662
- "normalized": true,
1663
- "rstrip": false,
1664
- "single_word": false,
1665
- "special": false
1666
- },
1667
- "208": {
1668
- "content": "oeng6",
1669
- "lstrip": false,
1670
- "normalized": true,
1671
- "rstrip": false,
1672
- "single_word": false,
1673
- "special": false
1674
- },
1675
- "209": {
1676
- "content": "oi1",
1677
- "lstrip": false,
1678
- "normalized": true,
1679
- "rstrip": false,
1680
- "single_word": false,
1681
- "special": false
1682
- },
1683
- "210": {
1684
- "content": "oi2",
1685
- "lstrip": false,
1686
- "normalized": true,
1687
- "rstrip": false,
1688
- "single_word": false,
1689
- "special": false
1690
- },
1691
- "211": {
1692
- "content": "oi3",
1693
- "lstrip": false,
1694
- "normalized": true,
1695
- "rstrip": false,
1696
- "single_word": false,
1697
- "special": false
1698
- },
1699
- "212": {
1700
- "content": "oi4",
1701
- "lstrip": false,
1702
- "normalized": true,
1703
- "rstrip": false,
1704
- "single_word": false,
1705
- "special": false
1706
- },
1707
- "213": {
1708
- "content": "oi6",
1709
- "lstrip": false,
1710
- "normalized": true,
1711
- "rstrip": false,
1712
- "single_word": false,
1713
- "special": false
1714
- },
1715
- "214": {
1716
- "content": "ok1",
1717
- "lstrip": false,
1718
- "normalized": true,
1719
- "rstrip": false,
1720
- "single_word": false,
1721
- "special": false
1722
- },
1723
- "215": {
1724
- "content": "ok2",
1725
- "lstrip": false,
1726
- "normalized": true,
1727
- "rstrip": false,
1728
- "single_word": false,
1729
- "special": false
1730
- },
1731
- "216": {
1732
- "content": "ok3",
1733
- "lstrip": false,
1734
- "normalized": true,
1735
- "rstrip": false,
1736
- "single_word": false,
1737
- "special": false
1738
- },
1739
- "217": {
1740
- "content": "ok6",
1741
- "lstrip": false,
1742
- "normalized": true,
1743
- "rstrip": false,
1744
- "single_word": false,
1745
- "special": false
1746
- },
1747
- "218": {
1748
- "content": "on1",
1749
- "lstrip": false,
1750
- "normalized": true,
1751
- "rstrip": false,
1752
- "single_word": false,
1753
- "special": false
1754
- },
1755
- "219": {
1756
- "content": "on2",
1757
- "lstrip": false,
1758
- "normalized": true,
1759
- "rstrip": false,
1760
- "single_word": false,
1761
- "special": false
1762
- },
1763
- "220": {
1764
- "content": "on3",
1765
- "lstrip": false,
1766
- "normalized": true,
1767
- "rstrip": false,
1768
- "single_word": false,
1769
- "special": false
1770
- },
1771
- "221": {
1772
- "content": "on4",
1773
- "lstrip": false,
1774
- "normalized": true,
1775
- "rstrip": false,
1776
- "single_word": false,
1777
- "special": false
1778
- },
1779
- "222": {
1780
- "content": "on5",
1781
- "lstrip": false,
1782
- "normalized": true,
1783
- "rstrip": false,
1784
- "single_word": false,
1785
- "special": false
1786
- },
1787
- "223": {
1788
- "content": "on6",
1789
- "lstrip": false,
1790
- "normalized": true,
1791
- "rstrip": false,
1792
- "single_word": false,
1793
- "special": false
1794
- },
1795
- "224": {
1796
- "content": "ong1",
1797
- "lstrip": false,
1798
- "normalized": true,
1799
- "rstrip": false,
1800
- "single_word": false,
1801
- "special": false
1802
- },
1803
- "225": {
1804
- "content": "ong2",
1805
- "lstrip": false,
1806
- "normalized": true,
1807
- "rstrip": false,
1808
- "single_word": false,
1809
- "special": false
1810
- },
1811
- "226": {
1812
- "content": "ong3",
1813
- "lstrip": false,
1814
- "normalized": true,
1815
- "rstrip": false,
1816
- "single_word": false,
1817
- "special": false
1818
- },
1819
- "227": {
1820
- "content": "ong4",
1821
- "lstrip": false,
1822
- "normalized": true,
1823
- "rstrip": false,
1824
- "single_word": false,
1825
- "special": false
1826
- },
1827
- "228": {
1828
- "content": "ong5",
1829
- "lstrip": false,
1830
- "normalized": true,
1831
- "rstrip": false,
1832
- "single_word": false,
1833
- "special": false
1834
- },
1835
- "229": {
1836
- "content": "ong6",
1837
- "lstrip": false,
1838
- "normalized": true,
1839
- "rstrip": false,
1840
- "single_word": false,
1841
- "special": false
1842
- },
1843
- "230": {
1844
- "content": "ot3",
1845
- "lstrip": false,
1846
- "normalized": true,
1847
- "rstrip": false,
1848
- "single_word": false,
1849
- "special": false
1850
- },
1851
- "231": {
1852
- "content": "ou1",
1853
- "lstrip": false,
1854
- "normalized": true,
1855
- "rstrip": false,
1856
- "single_word": false,
1857
- "special": false
1858
- },
1859
- "232": {
1860
- "content": "ou2",
1861
- "lstrip": false,
1862
- "normalized": true,
1863
- "rstrip": false,
1864
- "single_word": false,
1865
- "special": false
1866
- },
1867
- "233": {
1868
- "content": "ou3",
1869
- "lstrip": false,
1870
- "normalized": true,
1871
- "rstrip": false,
1872
- "single_word": false,
1873
- "special": false
1874
- },
1875
- "234": {
1876
- "content": "ou4",
1877
- "lstrip": false,
1878
- "normalized": true,
1879
- "rstrip": false,
1880
- "single_word": false,
1881
- "special": false
1882
- },
1883
- "235": {
1884
- "content": "ou5",
1885
- "lstrip": false,
1886
- "normalized": true,
1887
- "rstrip": false,
1888
- "single_word": false,
1889
- "special": false
1890
- },
1891
- "236": {
1892
- "content": "ou6",
1893
- "lstrip": false,
1894
- "normalized": true,
1895
- "rstrip": false,
1896
- "single_word": false,
1897
- "special": false
1898
- },
1899
- "237": {
1900
- "content": "p",
1901
- "lstrip": false,
1902
- "normalized": true,
1903
- "rstrip": false,
1904
- "single_word": false,
1905
- "special": false
1906
- },
1907
- "238": {
1908
- "content": "s",
1909
- "lstrip": false,
1910
- "normalized": true,
1911
- "rstrip": false,
1912
- "single_word": false,
1913
- "special": false
1914
- },
1915
- "239": {
1916
- "content": "t",
1917
- "lstrip": false,
1918
- "normalized": true,
1919
- "rstrip": false,
1920
- "single_word": false,
1921
- "special": false
1922
- },
1923
- "240": {
1924
- "content": "u1",
1925
- "lstrip": false,
1926
- "normalized": true,
1927
- "rstrip": false,
1928
- "single_word": false,
1929
- "special": false
1930
- },
1931
- "241": {
1932
- "content": "u2",
1933
- "lstrip": false,
1934
- "normalized": true,
1935
- "rstrip": false,
1936
- "single_word": false,
1937
- "special": false
1938
- },
1939
- "242": {
1940
- "content": "u3",
1941
- "lstrip": false,
1942
- "normalized": true,
1943
- "rstrip": false,
1944
- "single_word": false,
1945
- "special": false
1946
- },
1947
- "243": {
1948
- "content": "u4",
1949
- "lstrip": false,
1950
- "normalized": true,
1951
- "rstrip": false,
1952
- "single_word": false,
1953
- "special": false
1954
- },
1955
- "244": {
1956
- "content": "u5",
1957
- "lstrip": false,
1958
- "normalized": true,
1959
- "rstrip": false,
1960
- "single_word": false,
1961
- "special": false
1962
- },
1963
- "245": {
1964
- "content": "u6",
1965
- "lstrip": false,
1966
- "normalized": true,
1967
- "rstrip": false,
1968
- "single_word": false,
1969
- "special": false
1970
- },
1971
- "246": {
1972
- "content": "ui1",
1973
- "lstrip": false,
1974
- "normalized": true,
1975
- "rstrip": false,
1976
- "single_word": false,
1977
- "special": false
1978
- },
1979
- "247": {
1980
- "content": "ui2",
1981
- "lstrip": false,
1982
- "normalized": true,
1983
- "rstrip": false,
1984
- "single_word": false,
1985
- "special": false
1986
- },
1987
- "248": {
1988
- "content": "ui3",
1989
- "lstrip": false,
1990
- "normalized": true,
1991
- "rstrip": false,
1992
- "single_word": false,
1993
- "special": false
1994
- },
1995
- "249": {
1996
- "content": "ui4",
1997
- "lstrip": false,
1998
- "normalized": true,
1999
- "rstrip": false,
2000
- "single_word": false,
2001
- "special": false
2002
- },
2003
- "250": {
2004
- "content": "ui5",
2005
- "lstrip": false,
2006
- "normalized": true,
2007
- "rstrip": false,
2008
- "single_word": false,
2009
- "special": false
2010
- },
2011
- "251": {
2012
- "content": "ui6",
2013
- "lstrip": false,
2014
- "normalized": true,
2015
- "rstrip": false,
2016
- "single_word": false,
2017
- "special": false
2018
- },
2019
- "252": {
2020
- "content": "uk1",
2021
- "lstrip": false,
2022
- "normalized": true,
2023
- "rstrip": false,
2024
- "single_word": false,
2025
- "special": false
2026
- },
2027
- "253": {
2028
- "content": "uk2",
2029
- "lstrip": false,
2030
- "normalized": true,
2031
- "rstrip": false,
2032
- "single_word": false,
2033
- "special": false
2034
- },
2035
- "254": {
2036
- "content": "uk4",
2037
- "lstrip": false,
2038
- "normalized": true,
2039
- "rstrip": false,
2040
- "single_word": false,
2041
- "special": false
2042
- },
2043
- "255": {
2044
- "content": "uk6",
2045
- "lstrip": false,
2046
- "normalized": true,
2047
- "rstrip": false,
2048
- "single_word": false,
2049
- "special": false
2050
- },
2051
- "256": {
2052
- "content": "un1",
2053
- "lstrip": false,
2054
- "normalized": true,
2055
- "rstrip": false,
2056
- "single_word": false,
2057
- "special": false
2058
- },
2059
- "257": {
2060
- "content": "un2",
2061
- "lstrip": false,
2062
- "normalized": true,
2063
- "rstrip": false,
2064
- "single_word": false,
2065
- "special": false
2066
- },
2067
- "258": {
2068
- "content": "un3",
2069
- "lstrip": false,
2070
- "normalized": true,
2071
- "rstrip": false,
2072
- "single_word": false,
2073
- "special": false
2074
- },
2075
- "259": {
2076
- "content": "un4",
2077
- "lstrip": false,
2078
- "normalized": true,
2079
- "rstrip": false,
2080
- "single_word": false,
2081
- "special": false
2082
- },
2083
- "260": {
2084
- "content": "un5",
2085
- "lstrip": false,
2086
- "normalized": true,
2087
- "rstrip": false,
2088
- "single_word": false,
2089
- "special": false
2090
- },
2091
- "261": {
2092
- "content": "un6",
2093
- "lstrip": false,
2094
- "normalized": true,
2095
- "rstrip": false,
2096
- "single_word": false,
2097
- "special": false
2098
- },
2099
- "262": {
2100
- "content": "ung1",
2101
- "lstrip": false,
2102
- "normalized": true,
2103
- "rstrip": false,
2104
- "single_word": false,
2105
- "special": false
2106
- },
2107
- "263": {
2108
- "content": "ung2",
2109
- "lstrip": false,
2110
- "normalized": true,
2111
- "rstrip": false,
2112
- "single_word": false,
2113
- "special": false
2114
- },
2115
- "264": {
2116
- "content": "ung3",
2117
- "lstrip": false,
2118
- "normalized": true,
2119
- "rstrip": false,
2120
- "single_word": false,
2121
- "special": false
2122
- },
2123
- "265": {
2124
- "content": "ung4",
2125
- "lstrip": false,
2126
- "normalized": true,
2127
- "rstrip": false,
2128
- "single_word": false,
2129
- "special": false
2130
- },
2131
- "266": {
2132
- "content": "ung5",
2133
- "lstrip": false,
2134
- "normalized": true,
2135
- "rstrip": false,
2136
- "single_word": false,
2137
- "special": false
2138
- },
2139
- "267": {
2140
- "content": "ung6",
2141
- "lstrip": false,
2142
- "normalized": true,
2143
- "rstrip": false,
2144
- "single_word": false,
2145
- "special": false
2146
- },
2147
- "268": {
2148
- "content": "ut1",
2149
- "lstrip": false,
2150
- "normalized": true,
2151
- "rstrip": false,
2152
- "single_word": false,
2153
- "special": false
2154
- },
2155
- "269": {
2156
- "content": "ut3",
2157
- "lstrip": false,
2158
- "normalized": true,
2159
- "rstrip": false,
2160
- "single_word": false,
2161
- "special": false
2162
- },
2163
- "270": {
2164
- "content": "ut6",
2165
- "lstrip": false,
2166
- "normalized": true,
2167
- "rstrip": false,
2168
- "single_word": false,
2169
- "special": false
2170
- },
2171
- "271": {
2172
- "content": "w",
2173
- "lstrip": false,
2174
- "normalized": true,
2175
- "rstrip": false,
2176
- "single_word": false,
2177
- "special": false
2178
- },
2179
- "272": {
2180
- "content": "yu1",
2181
- "lstrip": false,
2182
- "normalized": true,
2183
- "rstrip": false,
2184
- "single_word": false,
2185
- "special": false
2186
- },
2187
- "273": {
2188
- "content": "yu2",
2189
- "lstrip": false,
2190
- "normalized": true,
2191
- "rstrip": false,
2192
- "single_word": false,
2193
- "special": false
2194
- },
2195
- "274": {
2196
- "content": "yu3",
2197
- "lstrip": false,
2198
- "normalized": true,
2199
- "rstrip": false,
2200
- "single_word": false,
2201
- "special": false
2202
- },
2203
- "275": {
2204
- "content": "yu4",
2205
- "lstrip": false,
2206
- "normalized": true,
2207
- "rstrip": false,
2208
- "single_word": false,
2209
- "special": false
2210
- },
2211
- "276": {
2212
- "content": "yu5",
2213
- "lstrip": false,
2214
- "normalized": true,
2215
- "rstrip": false,
2216
- "single_word": false,
2217
- "special": false
2218
- },
2219
- "277": {
2220
- "content": "yu6",
2221
- "lstrip": false,
2222
- "normalized": true,
2223
- "rstrip": false,
2224
- "single_word": false,
2225
- "special": false
2226
- },
2227
- "278": {
2228
- "content": "yun1",
2229
- "lstrip": false,
2230
- "normalized": true,
2231
- "rstrip": false,
2232
- "single_word": false,
2233
- "special": false
2234
- },
2235
- "279": {
2236
- "content": "yun2",
2237
- "lstrip": false,
2238
- "normalized": true,
2239
- "rstrip": false,
2240
- "single_word": false,
2241
- "special": false
2242
- },
2243
- "280": {
2244
- "content": "yun3",
2245
- "lstrip": false,
2246
- "normalized": true,
2247
- "rstrip": false,
2248
- "single_word": false,
2249
- "special": false
2250
- },
2251
- "281": {
2252
- "content": "yun4",
2253
- "lstrip": false,
2254
- "normalized": true,
2255
- "rstrip": false,
2256
- "single_word": false,
2257
- "special": false
2258
- },
2259
- "282": {
2260
- "content": "yun5",
2261
- "lstrip": false,
2262
- "normalized": true,
2263
- "rstrip": false,
2264
- "single_word": false,
2265
- "special": false
2266
- },
2267
- "283": {
2268
- "content": "yun6",
2269
- "lstrip": false,
2270
- "normalized": true,
2271
- "rstrip": false,
2272
- "single_word": false,
2273
- "special": false
2274
- },
2275
- "284": {
2276
- "content": "yut1",
2277
- "lstrip": false,
2278
- "normalized": true,
2279
- "rstrip": false,
2280
- "single_word": false,
2281
- "special": false
2282
- },
2283
- "285": {
2284
- "content": "yut2",
2285
- "lstrip": false,
2286
- "normalized": true,
2287
- "rstrip": false,
2288
- "single_word": false,
2289
- "special": false
2290
- },
2291
- "286": {
2292
- "content": "yut3",
2293
- "lstrip": false,
2294
- "normalized": true,
2295
- "rstrip": false,
2296
- "single_word": false,
2297
- "special": false
2298
- },
2299
- "287": {
2300
- "content": "yut4",
2301
- "lstrip": false,
2302
- "normalized": true,
2303
- "rstrip": false,
2304
- "single_word": false,
2305
- "special": false
2306
- },
2307
- "288": {
2308
- "content": "yut6",
2309
- "lstrip": false,
2310
- "normalized": true,
2311
- "rstrip": false,
2312
- "single_word": false,
2313
- "special": false
2314
- },
2315
- "289": {
2316
- "content": "z",
2317
- "lstrip": false,
2318
- "normalized": true,
2319
- "rstrip": false,
2320
- "single_word": false,
2321
- "special": false
2322
- },
2323
- "290": {
2324
- "content": "<s>",
2325
- "lstrip": false,
2326
- "normalized": false,
2327
- "rstrip": false,
2328
- "single_word": false,
2329
- "special": true
2330
- },
2331
- "291": {
2332
  "content": "</s>",
2333
  "lstrip": false,
2334
  "normalized": false,
@@ -2338,7 +618,7 @@
2338
  }
2339
  },
2340
  "bos_token": "<s>",
2341
- "clean_up_tokenization_spaces": true,
2342
  "do_lower_case": false,
2343
  "eos_token": "</s>",
2344
  "model_max_length": 1000000000000000019884624838656,
 
25
  "special": false
26
  },
27
  "3": {
28
+ "content": "aa",
29
  "lstrip": false,
30
  "normalized": true,
31
  "rstrip": false,
 
33
  "special": false
34
  },
35
  "4": {
36
+ "content": "aai",
37
  "lstrip": false,
38
  "normalized": true,
39
  "rstrip": false,
 
41
  "special": false
42
  },
43
  "5": {
44
+ "content": "aak",
45
  "lstrip": false,
46
  "normalized": true,
47
  "rstrip": false,
 
49
  "special": false
50
  },
51
  "6": {
52
+ "content": "aam",
53
  "lstrip": false,
54
  "normalized": true,
55
  "rstrip": false,
 
57
  "special": false
58
  },
59
  "7": {
60
+ "content": "aan",
61
  "lstrip": false,
62
  "normalized": true,
63
  "rstrip": false,
 
65
  "special": false
66
  },
67
  "8": {
68
+ "content": "aang",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
 
73
  "special": false
74
  },
75
  "9": {
76
+ "content": "aap",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
 
81
  "special": false
82
  },
83
  "10": {
84
+ "content": "aat",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
 
89
  "special": false
90
  },
91
  "11": {
92
+ "content": "aau",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
 
97
  "special": false
98
  },
99
  "12": {
100
+ "content": "ai",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
 
105
  "special": false
106
  },
107
  "13": {
108
+ "content": "ak",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
 
113
  "special": false
114
  },
115
  "14": {
116
+ "content": "am",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
 
121
  "special": false
122
  },
123
  "15": {
124
+ "content": "an",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
 
129
  "special": false
130
  },
131
  "16": {
132
+ "content": "ang",
133
  "lstrip": false,
134
  "normalized": true,
135
  "rstrip": false,
 
137
  "special": false
138
  },
139
  "17": {
140
+ "content": "ap",
141
  "lstrip": false,
142
  "normalized": true,
143
  "rstrip": false,
 
145
  "special": false
146
  },
147
  "18": {
148
+ "content": "at",
149
  "lstrip": false,
150
  "normalized": true,
151
  "rstrip": false,
 
153
  "special": false
154
  },
155
  "19": {
156
+ "content": "au",
157
  "lstrip": false,
158
  "normalized": true,
159
  "rstrip": false,
 
161
  "special": false
162
  },
163
  "20": {
164
+ "content": "b",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
 
169
  "special": false
170
  },
171
  "21": {
172
+ "content": "c",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
 
177
  "special": false
178
  },
179
  "22": {
180
+ "content": "d",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
 
185
  "special": false
186
  },
187
  "23": {
188
+ "content": "e",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
 
193
  "special": false
194
  },
195
  "24": {
196
+ "content": "ei",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
 
201
  "special": false
202
  },
203
  "25": {
204
+ "content": "ek",
205
  "lstrip": false,
206
  "normalized": true,
207
  "rstrip": false,
 
209
  "special": false
210
  },
211
  "26": {
212
+ "content": "eng",
213
  "lstrip": false,
214
  "normalized": true,
215
  "rstrip": false,
 
217
  "special": false
218
  },
219
  "27": {
220
+ "content": "eoi",
221
  "lstrip": false,
222
  "normalized": true,
223
  "rstrip": false,
 
225
  "special": false
226
  },
227
  "28": {
228
+ "content": "eon",
229
  "lstrip": false,
230
  "normalized": true,
231
  "rstrip": false,
 
233
  "special": false
234
  },
235
  "29": {
236
+ "content": "eot",
237
  "lstrip": false,
238
  "normalized": true,
239
  "rstrip": false,
 
241
  "special": false
242
  },
243
  "30": {
244
+ "content": "ep",
245
  "lstrip": false,
246
  "normalized": true,
247
  "rstrip": false,
 
249
  "special": false
250
  },
251
  "31": {
252
+ "content": "eu",
253
  "lstrip": false,
254
  "normalized": true,
255
  "rstrip": false,
 
257
  "special": false
258
  },
259
  "32": {
260
+ "content": "f",
261
  "lstrip": false,
262
  "normalized": true,
263
  "rstrip": false,
 
265
  "special": false
266
  },
267
  "33": {
268
+ "content": "g",
269
  "lstrip": false,
270
  "normalized": true,
271
  "rstrip": false,
 
273
  "special": false
274
  },
275
  "34": {
276
+ "content": "gw",
277
  "lstrip": false,
278
  "normalized": true,
279
  "rstrip": false,
 
281
  "special": false
282
  },
283
  "35": {
284
+ "content": "h",
285
  "lstrip": false,
286
  "normalized": true,
287
  "rstrip": false,
 
289
  "special": false
290
  },
291
  "36": {
292
+ "content": "i",
293
  "lstrip": false,
294
  "normalized": true,
295
  "rstrip": false,
 
297
  "special": false
298
  },
299
  "37": {
300
+ "content": "ik",
301
  "lstrip": false,
302
  "normalized": true,
303
  "rstrip": false,
 
305
  "special": false
306
  },
307
  "38": {
308
+ "content": "im",
309
  "lstrip": false,
310
  "normalized": true,
311
  "rstrip": false,
 
313
  "special": false
314
  },
315
  "39": {
316
+ "content": "in",
317
  "lstrip": false,
318
  "normalized": true,
319
  "rstrip": false,
 
321
  "special": false
322
  },
323
  "40": {
324
+ "content": "ing",
325
  "lstrip": false,
326
  "normalized": true,
327
  "rstrip": false,
 
329
  "special": false
330
  },
331
  "41": {
332
+ "content": "ip",
333
  "lstrip": false,
334
  "normalized": true,
335
  "rstrip": false,
 
337
  "special": false
338
  },
339
  "42": {
340
+ "content": "it",
341
  "lstrip": false,
342
  "normalized": true,
343
  "rstrip": false,
 
345
  "special": false
346
  },
347
  "43": {
348
+ "content": "iu",
349
  "lstrip": false,
350
  "normalized": true,
351
  "rstrip": false,
 
353
  "special": false
354
  },
355
  "44": {
356
+ "content": "j",
357
  "lstrip": false,
358
  "normalized": true,
359
  "rstrip": false,
 
361
  "special": false
362
  },
363
  "45": {
364
+ "content": "k",
365
  "lstrip": false,
366
  "normalized": true,
367
  "rstrip": false,
 
369
  "special": false
370
  },
371
  "46": {
372
+ "content": "kw",
373
  "lstrip": false,
374
  "normalized": true,
375
  "rstrip": false,
 
377
  "special": false
378
  },
379
  "47": {
380
+ "content": "l",
381
  "lstrip": false,
382
  "normalized": true,
383
  "rstrip": false,
 
385
  "special": false
386
  },
387
  "48": {
388
+ "content": "m",
389
  "lstrip": false,
390
  "normalized": true,
391
  "rstrip": false,
 
393
  "special": false
394
  },
395
  "49": {
396
+ "content": "n",
397
  "lstrip": false,
398
  "normalized": true,
399
  "rstrip": false,
 
401
  "special": false
402
  },
403
  "50": {
404
+ "content": "ng",
405
  "lstrip": false,
406
  "normalized": true,
407
  "rstrip": false,
 
409
  "special": false
410
  },
411
  "51": {
412
+ "content": "o",
413
  "lstrip": false,
414
  "normalized": true,
415
  "rstrip": false,
 
417
  "special": false
418
  },
419
  "52": {
420
+ "content": "oe",
421
  "lstrip": false,
422
  "normalized": true,
423
  "rstrip": false,
 
425
  "special": false
426
  },
427
  "53": {
428
+ "content": "oek",
429
  "lstrip": false,
430
  "normalized": true,
431
  "rstrip": false,
 
433
  "special": false
434
  },
435
  "54": {
436
+ "content": "oeng",
437
  "lstrip": false,
438
  "normalized": true,
439
  "rstrip": false,
 
441
  "special": false
442
  },
443
  "55": {
444
+ "content": "oi",
445
  "lstrip": false,
446
  "normalized": true,
447
  "rstrip": false,
 
449
  "special": false
450
  },
451
  "56": {
452
+ "content": "ok",
453
  "lstrip": false,
454
  "normalized": true,
455
  "rstrip": false,
 
457
  "special": false
458
  },
459
  "57": {
460
+ "content": "on",
461
  "lstrip": false,
462
  "normalized": true,
463
  "rstrip": false,
 
465
  "special": false
466
  },
467
  "58": {
468
+ "content": "ong",
469
  "lstrip": false,
470
  "normalized": true,
471
  "rstrip": false,
 
473
  "special": false
474
  },
475
  "59": {
476
+ "content": "ot",
477
  "lstrip": false,
478
  "normalized": true,
479
  "rstrip": false,
 
481
  "special": false
482
  },
483
  "60": {
484
+ "content": "ou",
485
  "lstrip": false,
486
  "normalized": true,
487
  "rstrip": false,
 
489
  "special": false
490
  },
491
  "61": {
492
+ "content": "p",
493
  "lstrip": false,
494
  "normalized": true,
495
  "rstrip": false,
 
497
  "special": false
498
  },
499
  "62": {
500
+ "content": "s",
501
  "lstrip": false,
502
  "normalized": true,
503
  "rstrip": false,
 
505
  "special": false
506
  },
507
  "63": {
508
+ "content": "t",
509
  "lstrip": false,
510
  "normalized": true,
511
  "rstrip": false,
 
513
  "special": false
514
  },
515
  "64": {
516
+ "content": "u",
517
  "lstrip": false,
518
  "normalized": true,
519
  "rstrip": false,
 
521
  "special": false
522
  },
523
  "65": {
524
+ "content": "ui",
525
  "lstrip": false,
526
  "normalized": true,
527
  "rstrip": false,
 
529
  "special": false
530
  },
531
  "66": {
532
+ "content": "uk",
533
  "lstrip": false,
534
  "normalized": true,
535
  "rstrip": false,
 
537
  "special": false
538
  },
539
  "67": {
540
+ "content": "un",
541
  "lstrip": false,
542
  "normalized": true,
543
  "rstrip": false,
 
545
  "special": false
546
  },
547
  "68": {
548
+ "content": "ung",
549
  "lstrip": false,
550
  "normalized": true,
551
  "rstrip": false,
 
553
  "special": false
554
  },
555
  "69": {
556
+ "content": "ut",
557
  "lstrip": false,
558
  "normalized": true,
559
  "rstrip": false,
 
561
  "special": false
562
  },
563
  "70": {
564
+ "content": "w",
565
  "lstrip": false,
566
  "normalized": true,
567
  "rstrip": false,
 
569
  "special": false
570
  },
571
  "71": {
572
+ "content": "yu",
573
  "lstrip": false,
574
  "normalized": true,
575
  "rstrip": false,
 
577
  "special": false
578
  },
579
  "72": {
580
+ "content": "yun",
581
  "lstrip": false,
582
  "normalized": true,
583
  "rstrip": false,
 
585
  "special": false
586
  },
587
  "73": {
588
+ "content": "yut",
589
  "lstrip": false,
590
  "normalized": true,
591
  "rstrip": false,
 
593
  "special": false
594
  },
595
  "74": {
596
+ "content": "z",
597
  "lstrip": false,
598
  "normalized": true,
599
  "rstrip": false,
 
601
  "special": false
602
  },
603
  "75": {
604
+ "content": "<s>",
605
  "lstrip": false,
606
+ "normalized": false,
607
  "rstrip": false,
608
  "single_word": false,
609
+ "special": true
610
  },
611
  "76": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
  "content": "</s>",
613
  "lstrip": false,
614
  "normalized": false,
 
618
  }
619
  },
620
  "bos_token": "<s>",
621
+ "clean_up_tokenization_spaces": false,
622
  "do_lower_case": false,
623
  "eos_token": "</s>",
624
  "model_max_length": 1000000000000000019884624838656,
vocab.json CHANGED
@@ -1,292 +1,77 @@
1
  {
2
  "[PAD]": 0,
3
  "[UNK]": 1,
4
- "aa1": 3,
5
- "aa2": 4,
6
- "aa3": 5,
7
- "aa4": 6,
8
- "aa5": 7,
9
- "aa6": 8,
10
- "aai1": 9,
11
- "aai2": 10,
12
- "aai3": 11,
13
- "aai4": 12,
14
- "aai5": 13,
15
- "aai6": 14,
16
- "aak1": 15,
17
- "aak2": 16,
18
- "aak3": 17,
19
- "aak6": 18,
20
- "aam1": 19,
21
- "aam2": 20,
22
- "aam3": 21,
23
- "aam4": 22,
24
- "aam5": 23,
25
- "aam6": 24,
26
- "aan1": 25,
27
- "aan2": 26,
28
- "aan3": 27,
29
- "aan4": 28,
30
- "aan5": 29,
31
- "aan6": 30,
32
- "aang1": 31,
33
- "aang2": 32,
34
- "aang3": 33,
35
- "aang4": 34,
36
- "aang5": 35,
37
- "aang6": 36,
38
- "aap1": 37,
39
- "aap2": 38,
40
- "aap3": 39,
41
- "aap6": 40,
42
- "aat1": 41,
43
- "aat2": 42,
44
- "aat3": 43,
45
- "aat6": 44,
46
- "aau1": 45,
47
- "aau2": 46,
48
- "aau3": 47,
49
- "aau4": 48,
50
- "aau5": 49,
51
- "aau6": 50,
52
- "ai1": 51,
53
- "ai2": 52,
54
- "ai3": 53,
55
- "ai4": 54,
56
- "ai5": 55,
57
- "ai6": 56,
58
- "ak1": 57,
59
- "ak2": 58,
60
- "ak6": 59,
61
- "am1": 60,
62
- "am2": 61,
63
- "am3": 62,
64
- "am4": 63,
65
- "am5": 64,
66
- "am6": 65,
67
- "an1": 66,
68
- "an2": 67,
69
- "an3": 68,
70
- "an4": 69,
71
- "an5": 70,
72
- "an6": 71,
73
- "ang1": 72,
74
- "ang2": 73,
75
- "ang3": 74,
76
- "ang4": 75,
77
- "ang6": 76,
78
- "ap1": 77,
79
- "ap2": 78,
80
- "ap6": 79,
81
- "at1": 80,
82
- "at2": 81,
83
- "at3": 82,
84
- "at6": 83,
85
- "au1": 84,
86
- "au2": 85,
87
- "au3": 86,
88
- "au4": 87,
89
- "au5": 88,
90
- "au6": 89,
91
- "b": 90,
92
- "c": 91,
93
- "d": 92,
94
- "e1": 93,
95
- "e2": 94,
96
- "e3": 95,
97
- "e4": 96,
98
- "e5": 97,
99
- "e6": 98,
100
- "ei1": 99,
101
- "ei2": 100,
102
- "ei3": 101,
103
- "ei4": 102,
104
- "ei5": 103,
105
- "ei6": 104,
106
- "ek1": 105,
107
- "ek2": 106,
108
- "ek3": 107,
109
- "ek6": 108,
110
- "eng1": 109,
111
- "eng2": 110,
112
- "eng3": 111,
113
- "eng4": 112,
114
- "eng5": 113,
115
- "eng6": 114,
116
- "eoi1": 115,
117
- "eoi2": 116,
118
- "eoi3": 117,
119
- "eoi4": 118,
120
- "eoi5": 119,
121
- "eoi6": 120,
122
- "eon1": 121,
123
- "eon2": 122,
124
- "eon3": 123,
125
- "eon4": 124,
126
- "eon5": 125,
127
- "eon6": 126,
128
- "eot1": 127,
129
- "eot2": 128,
130
- "eot6": 129,
131
- "ep6": 130,
132
- "eu6": 131,
133
- "f": 132,
134
- "g": 133,
135
- "gw": 134,
136
- "h": 135,
137
- "i1": 136,
138
- "i2": 137,
139
- "i3": 138,
140
- "i4": 139,
141
- "i5": 140,
142
- "i6": 141,
143
- "ik1": 142,
144
- "ik3": 143,
145
- "ik4": 144,
146
- "ik6": 145,
147
- "im1": 146,
148
- "im2": 147,
149
- "im3": 148,
150
- "im4": 149,
151
- "im5": 150,
152
- "im6": 151,
153
- "in1": 152,
154
- "in2": 153,
155
- "in3": 154,
156
- "in4": 155,
157
- "in5": 156,
158
- "in6": 157,
159
- "ing1": 158,
160
- "ing2": 159,
161
- "ing3": 160,
162
- "ing4": 161,
163
- "ing5": 162,
164
- "ing6": 163,
165
- "ip1": 164,
166
- "ip2": 165,
167
- "ip3": 166,
168
- "ip6": 167,
169
- "it1": 168,
170
- "it3": 169,
171
- "it6": 170,
172
- "iu1": 171,
173
- "iu2": 172,
174
- "iu3": 173,
175
- "iu4": 174,
176
- "iu5": 175,
177
- "iu6": 176,
178
- "j": 177,
179
- "k": 178,
180
- "kw": 179,
181
- "l": 180,
182
- "m": 181,
183
- "m2": 182,
184
- "m4": 183,
185
- "m6": 184,
186
- "n": 185,
187
- "ng": 186,
188
- "ng4": 187,
189
- "ng5": 188,
190
- "ng6": 189,
191
- "o1": 190,
192
- "o2": 191,
193
- "o3": 192,
194
- "o4": 193,
195
- "o5": 194,
196
- "o6": 195,
197
- "oe1": 196,
198
- "oe2": 197,
199
- "oe3": 198,
200
- "oe4": 199,
201
- "oek2": 200,
202
- "oek3": 201,
203
- "oek6": 202,
204
- "oeng1": 203,
205
- "oeng2": 204,
206
- "oeng3": 205,
207
- "oeng4": 206,
208
- "oeng5": 207,
209
- "oeng6": 208,
210
- "oi1": 209,
211
- "oi2": 210,
212
- "oi3": 211,
213
- "oi4": 212,
214
- "oi6": 213,
215
- "ok1": 214,
216
- "ok2": 215,
217
- "ok3": 216,
218
- "ok6": 217,
219
- "on1": 218,
220
- "on2": 219,
221
- "on3": 220,
222
- "on4": 221,
223
- "on5": 222,
224
- "on6": 223,
225
- "ong1": 224,
226
- "ong2": 225,
227
- "ong3": 226,
228
- "ong4": 227,
229
- "ong5": 228,
230
- "ong6": 229,
231
- "ot3": 230,
232
- "ou1": 231,
233
- "ou2": 232,
234
- "ou3": 233,
235
- "ou4": 234,
236
- "ou5": 235,
237
- "ou6": 236,
238
- "p": 237,
239
- "s": 238,
240
- "t": 239,
241
- "u1": 240,
242
- "u2": 241,
243
- "u3": 242,
244
- "u4": 243,
245
- "u5": 244,
246
- "u6": 245,
247
- "ui1": 246,
248
- "ui2": 247,
249
- "ui3": 248,
250
- "ui4": 249,
251
- "ui5": 250,
252
- "ui6": 251,
253
- "uk1": 252,
254
- "uk2": 253,
255
- "uk4": 254,
256
- "uk6": 255,
257
- "un1": 256,
258
- "un2": 257,
259
- "un3": 258,
260
- "un4": 259,
261
- "un5": 260,
262
- "un6": 261,
263
- "ung1": 262,
264
- "ung2": 263,
265
- "ung3": 264,
266
- "ung4": 265,
267
- "ung5": 266,
268
- "ung6": 267,
269
- "ut1": 268,
270
- "ut3": 269,
271
- "ut6": 270,
272
- "w": 271,
273
- "yu1": 272,
274
- "yu2": 273,
275
- "yu3": 274,
276
- "yu4": 275,
277
- "yu5": 276,
278
- "yu6": 277,
279
- "yun1": 278,
280
- "yun2": 279,
281
- "yun3": 280,
282
- "yun4": 281,
283
- "yun5": 282,
284
- "yun6": 283,
285
- "yut1": 284,
286
- "yut2": 285,
287
- "yut3": 286,
288
- "yut4": 287,
289
- "yut6": 288,
290
- "z": 289,
291
  "|": 2
292
  }
 
1
  {
2
  "[PAD]": 0,
3
  "[UNK]": 1,
4
+ "aa": 3,
5
+ "aai": 4,
6
+ "aak": 5,
7
+ "aam": 6,
8
+ "aan": 7,
9
+ "aang": 8,
10
+ "aap": 9,
11
+ "aat": 10,
12
+ "aau": 11,
13
+ "ai": 12,
14
+ "ak": 13,
15
+ "am": 14,
16
+ "an": 15,
17
+ "ang": 16,
18
+ "ap": 17,
19
+ "at": 18,
20
+ "au": 19,
21
+ "b": 20,
22
+ "c": 21,
23
+ "d": 22,
24
+ "e": 23,
25
+ "ei": 24,
26
+ "ek": 25,
27
+ "eng": 26,
28
+ "eoi": 27,
29
+ "eon": 28,
30
+ "eot": 29,
31
+ "ep": 30,
32
+ "eu": 31,
33
+ "f": 32,
34
+ "g": 33,
35
+ "gw": 34,
36
+ "h": 35,
37
+ "i": 36,
38
+ "ik": 37,
39
+ "im": 38,
40
+ "in": 39,
41
+ "ing": 40,
42
+ "ip": 41,
43
+ "it": 42,
44
+ "iu": 43,
45
+ "j": 44,
46
+ "k": 45,
47
+ "kw": 46,
48
+ "l": 47,
49
+ "m": 48,
50
+ "n": 49,
51
+ "ng": 50,
52
+ "o": 51,
53
+ "oe": 52,
54
+ "oek": 53,
55
+ "oeng": 54,
56
+ "oi": 55,
57
+ "ok": 56,
58
+ "on": 57,
59
+ "ong": 58,
60
+ "ot": 59,
61
+ "ou": 60,
62
+ "p": 61,
63
+ "s": 62,
64
+ "t": 63,
65
+ "u": 64,
66
+ "ui": 65,
67
+ "uk": 66,
68
+ "un": 67,
69
+ "ung": 68,
70
+ "ut": 69,
71
+ "w": 70,
72
+ "yu": 71,
73
+ "yun": 72,
74
+ "yut": 73,
75
+ "z": 74,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  "|": 2
77
  }