Spaces:
Sleeping
Sleeping
from transformers import PreTrainedTokenizerFast | |
from typing import List, Dict | |
class NigerianLanguageTokenizer: | |
def __init__(self, base_tokenizer: PreTrainedTokenizerFast): | |
self.tokenizer = base_tokenizer | |
def tokenize_batch(self, texts: List[str]) -> Dict: | |
return self.tokenizer( | |
texts, | |
padding=True, | |
truncation=True, | |
return_tensors="pt" | |
) |