tokenizers_info: - name: AA tokenizer_id: 0 json_path: ./t5_tokenizer_AA_special.json modular_json_path: ./t5_tokenizer_AA_special.json start_delimiter: <start_AA> end_delimiter: <end_AA> - name: SMILES tokenizer_id: 1 json_path: ./bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json modular_json_path: ./bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json start_delimiter: <start_SMILES> end_delimiter: <end_SMILES> - name: CELL_ATTRIBUTES tokenizer_id: 2 json_path: ./cell_attributes_tokenizer.json modular_json_path: ./cell_attributes_tokenizer.json start_delimiter: <start_CELL_ATTRIBUTES> end_delimiter: <end_CELL_ATTRIBUTES> - name: GENE tokenizer_id: 3 json_path: ./gene_tokenizer.json modular_json_path: ./gene_tokenizer.json start_delimiter: <start_GENE> end_delimiter: <end_GENE> minimal_token_id: 5000 max_possible_token_id: 100000 max_special_token_id: 500