from transformers import DistilBertTokenizer tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased") def get_tokens(string): text = tokenizer(string, return_tensors="pt", padding="max_length", max_length=256, truncation=True) return { "input_ids": text["input_ids"], "attention_mask": text["attention_mask"] }