anuragshas
commited on
Commit
•
8de2057
1
Parent(s):
5a853fc
add tokenizer
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 62, "</s>": 63}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"ଁ": 1, "ଂ": 2, "ଃ": 3, "ଅ": 4, "ଆ": 5, "ଇ": 6, "ଈ": 7, "ଉ": 8, "ଊ": 9, "ଏ": 10, "ଓ": 11, "କ": 12, "ଖ": 13, "ଗ": 14, "ଘ": 15, "ଙ": 16, "ଚ": 17, "ଛ": 18, "ଜ": 19, "ଝ": 20, "ଞ": 21, "ଟ": 22, "ଠ": 23, "ଡ": 24, "ଢ": 25, "ଣ": 26, "ତ": 27, "ଥ": 28, "ଦ": 29, "ଧ": 30, "ନ": 31, "ପ": 32, "ଫ": 33, "ବ": 34, "ଭ": 35, "ମ": 36, "ଯ": 37, "ର": 38, "ଲ": 39, "ଳ": 40, "ଵ": 41, "ଶ": 42, "ଷ": 43, "ସ": 44, "ହ": 45, "଼": 46, "ା": 47, "ି": 48, "ୀ": 49, "ୁ": 50, "ୂ": 51, "ୃ": 52, "େ": 53, "ୈ": 54, "ୋ": 55, "ୌ": 56, "୍": 57, "ୟ": 58, "ୱ": 59, "|": 0, "[UNK]": 60, "[PAD]": 61}
|