gradio python-docx transformers torch flax tiktoken sentencepiece pdfminer.six datasets nltk>=3.8.2 spacy scikit-learn