anuragshas commited on
Commit
8de2057
1 Parent(s): 5a853fc

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 62, "</s>": 63}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ଁ": 1, "ଂ": 2, "ଃ": 3, "ଅ": 4, "ଆ": 5, "ଇ": 6, "ଈ": 7, "ଉ": 8, "ଊ": 9, "ଏ": 10, "ଓ": 11, "କ": 12, "ଖ": 13, "ଗ": 14, "ଘ": 15, "ଙ": 16, "ଚ": 17, "ଛ": 18, "ଜ": 19, "ଝ": 20, "ଞ": 21, "ଟ": 22, "ଠ": 23, "ଡ": 24, "ଢ": 25, "ଣ": 26, "ତ": 27, "ଥ": 28, "ଦ": 29, "ଧ": 30, "ନ": 31, "ପ": 32, "ଫ": 33, "ବ": 34, "ଭ": 35, "ମ": 36, "ଯ": 37, "ର": 38, "ଲ": 39, "ଳ": 40, "ଵ": 41, "ଶ": 42, "ଷ": 43, "ସ": 44, "ହ": 45, "଼": 46, "ା": 47, "ି": 48, "ୀ": 49, "ୁ": 50, "ୂ": 51, "ୃ": 52, "େ": 53, "ୈ": 54, "ୋ": 55, "ୌ": 56, "୍": 57, "ୟ": 58, "ୱ": 59, "|": 0, "[UNK]": 60, "[PAD]": 61}