JonusNattapong commited on
Commit
a408e1e
·
verified ·
1 Parent(s): 7784060

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +20 -0
tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "unk_token": {
3
+ "content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "__type": "AddedToken"
4
+ },
5
+ "model_max_length": 2048,
6
+ "tokenizer_class": "PreTrainedTokenizerFast",
7
+ "auto_map": {
8
+ "AutoTokenizer": [
9
+ "tokenizers.Tokenizer",
10
+ null
11
+ ]
12
+ },
13
+ "model_type": "unigram",
14
+ "vocab_size": 35590,
15
+ "language": ["th", "thai"],
16
+ "license": "apache-2.0",
17
+ "library_name": "tokenizers",
18
+ "tags": ["thai", "tokenizer", "nlp", "subword"],
19
+ "creation_date": "2025-07-02"
20
+ }