File size: 370 Bytes
4736ae1
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

def get_tokens(string):
    text = tokenizer(string, return_tensors="pt", padding="max_length", max_length=256, truncation=True)
    return {
            "input_ids": text["input_ids"],
            "attention_mask": text["attention_mask"]
        }