Spaces:
Sleeping
Sleeping
File size: 370 Bytes
4736ae1 |
1 2 3 4 5 6 7 8 9 10 |
from transformers import DistilBertTokenizer
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
def get_tokens(string):
text = tokenizer(string, return_tensors="pt", padding="max_length", max_length=256, truncation=True)
return {
"input_ids": text["input_ids"],
"attention_mask": text["attention_mask"]
} |