{ | |
"added_tokens_decoder": { | |
"0": { | |
"content": "<s>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"1": { | |
"content": "<pad>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"2": { | |
"content": "</s>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"3": { | |
"content": "<unk>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32001": { | |
"content": "<mask>", | |
"lstrip": true, | |
"normalized": true, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32002": { | |
"content": "ace_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32003": { | |
"content": "ace_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32004": { | |
"content": "acm_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32005": { | |
"content": "acq_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32006": { | |
"content": "aeb_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32007": { | |
"content": "afr_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32008": { | |
"content": "ajp_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32009": { | |
"content": "aka_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32010": { | |
"content": "amh_Ethi", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32011": { | |
"content": "apc_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32012": { | |
"content": "arb_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32013": { | |
"content": "ars_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32014": { | |
"content": "ary_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32015": { | |
"content": "arz_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32016": { | |
"content": "asm_Beng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32017": { | |
"content": "ast_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32018": { | |
"content": "awa_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32019": { | |
"content": "ayr_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32020": { | |
"content": "azb_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32021": { | |
"content": "azj_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32022": { | |
"content": "bak_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32023": { | |
"content": "bam_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32024": { | |
"content": "ban_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32025": { | |
"content": "bel_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32026": { | |
"content": "bem_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32027": { | |
"content": "ben_Beng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32028": { | |
"content": "bho_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32029": { | |
"content": "bjn_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32030": { | |
"content": "bjn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32031": { | |
"content": "bod_Tibt", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32032": { | |
"content": "bos_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32033": { | |
"content": "bug_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32034": { | |
"content": "bul_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32035": { | |
"content": "cat_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32036": { | |
"content": "ceb_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32037": { | |
"content": "ces_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32038": { | |
"content": "cjk_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32039": { | |
"content": "ckb_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32040": { | |
"content": "crh_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32041": { | |
"content": "cym_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32042": { | |
"content": "dan_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32043": { | |
"content": "deu_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32044": { | |
"content": "dik_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32045": { | |
"content": "dyu_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32046": { | |
"content": "dzo_Tibt", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32047": { | |
"content": "ell_Grek", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32048": { | |
"content": "eng_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32049": { | |
"content": "epo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32050": { | |
"content": "est_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32051": { | |
"content": "eus_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32052": { | |
"content": "ewe_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32053": { | |
"content": "fao_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32054": { | |
"content": "pes_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32055": { | |
"content": "fij_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32056": { | |
"content": "fin_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32057": { | |
"content": "fon_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32058": { | |
"content": "fra_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32059": { | |
"content": "fur_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32060": { | |
"content": "fuv_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32061": { | |
"content": "gla_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32062": { | |
"content": "gle_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32063": { | |
"content": "glg_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32064": { | |
"content": "grn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32065": { | |
"content": "guj_Gujr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32066": { | |
"content": "hat_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32067": { | |
"content": "hau_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32068": { | |
"content": "heb_Hebr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32069": { | |
"content": "hin_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32070": { | |
"content": "hne_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32071": { | |
"content": "hrv_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32072": { | |
"content": "hun_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32073": { | |
"content": "hye_Armn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32074": { | |
"content": "ibo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32075": { | |
"content": "ilo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32076": { | |
"content": "ind_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32077": { | |
"content": "isl_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32078": { | |
"content": "ita_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32079": { | |
"content": "jav_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32080": { | |
"content": "jpn_Jpan", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32081": { | |
"content": "kab_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32082": { | |
"content": "kac_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32083": { | |
"content": "kam_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32084": { | |
"content": "kan_Knda", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32085": { | |
"content": "kas_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32086": { | |
"content": "kas_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32087": { | |
"content": "kat_Geor", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32088": { | |
"content": "knc_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32089": { | |
"content": "knc_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32090": { | |
"content": "kaz_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32091": { | |
"content": "kbp_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32092": { | |
"content": "kea_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32093": { | |
"content": "khm_Khmr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32094": { | |
"content": "kik_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32095": { | |
"content": "kin_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32096": { | |
"content": "kir_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32097": { | |
"content": "kmb_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32098": { | |
"content": "kon_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32099": { | |
"content": "kor_Hang", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32100": { | |
"content": "kmr_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32101": { | |
"content": "lao_Laoo", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32102": { | |
"content": "lvs_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32103": { | |
"content": "lij_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32104": { | |
"content": "lim_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32105": { | |
"content": "lin_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32106": { | |
"content": "lit_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32107": { | |
"content": "lmo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32108": { | |
"content": "ltg_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32109": { | |
"content": "ltz_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32110": { | |
"content": "lua_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32111": { | |
"content": "lug_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32112": { | |
"content": "luo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32113": { | |
"content": "lus_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32114": { | |
"content": "mag_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32115": { | |
"content": "mai_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32116": { | |
"content": "mal_Mlym", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32117": { | |
"content": "mar_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32118": { | |
"content": "min_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32119": { | |
"content": "mkd_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32120": { | |
"content": "plt_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32121": { | |
"content": "mlt_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32122": { | |
"content": "mni_Beng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32123": { | |
"content": "khk_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32124": { | |
"content": "mos_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32125": { | |
"content": "mri_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32126": { | |
"content": "zsm_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32127": { | |
"content": "mya_Mymr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32128": { | |
"content": "nld_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32129": { | |
"content": "nno_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32130": { | |
"content": "nob_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32131": { | |
"content": "npi_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32132": { | |
"content": "nso_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32133": { | |
"content": "nus_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32134": { | |
"content": "nya_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32135": { | |
"content": "oci_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32136": { | |
"content": "gaz_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32137": { | |
"content": "ory_Orya", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32138": { | |
"content": "pag_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32139": { | |
"content": "pan_Guru", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32140": { | |
"content": "pap_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32141": { | |
"content": "pol_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32142": { | |
"content": "por_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32143": { | |
"content": "prs_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32144": { | |
"content": "pbt_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32145": { | |
"content": "quy_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32146": { | |
"content": "ron_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32147": { | |
"content": "run_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32148": { | |
"content": "rus_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32149": { | |
"content": "sag_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32150": { | |
"content": "san_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32151": { | |
"content": "sat_Beng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32152": { | |
"content": "scn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32153": { | |
"content": "shn_Mymr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32154": { | |
"content": "sin_Sinh", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32155": { | |
"content": "slk_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32156": { | |
"content": "slv_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32157": { | |
"content": "smo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32158": { | |
"content": "sna_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32159": { | |
"content": "snd_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32160": { | |
"content": "som_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32161": { | |
"content": "sot_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32162": { | |
"content": "spa_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32163": { | |
"content": "als_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32164": { | |
"content": "srd_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32165": { | |
"content": "srp_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32166": { | |
"content": "ssw_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32167": { | |
"content": "sun_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32168": { | |
"content": "swe_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32169": { | |
"content": "swh_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32170": { | |
"content": "szl_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32171": { | |
"content": "tam_Taml", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32172": { | |
"content": "tat_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32173": { | |
"content": "tel_Telu", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32174": { | |
"content": "tgk_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32175": { | |
"content": "tgl_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32176": { | |
"content": "tha_Thai", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32177": { | |
"content": "tir_Ethi", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32178": { | |
"content": "taq_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32179": { | |
"content": "taq_Tfng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32180": { | |
"content": "tpi_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32181": { | |
"content": "tsn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32182": { | |
"content": "tso_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32183": { | |
"content": "tuk_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32184": { | |
"content": "tum_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32185": { | |
"content": "tur_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32186": { | |
"content": "twi_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32187": { | |
"content": "tzm_Tfng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32188": { | |
"content": "uig_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32189": { | |
"content": "ukr_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32190": { | |
"content": "umb_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32191": { | |
"content": "urd_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32192": { | |
"content": "uzn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32193": { | |
"content": "vec_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32194": { | |
"content": "vie_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32195": { | |
"content": "war_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32196": { | |
"content": "wol_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32197": { | |
"content": "xho_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32198": { | |
"content": "ydd_Hebr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32199": { | |
"content": "yor_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32200": { | |
"content": "yue_Hant", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32201": { | |
"content": "zho_Hans", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32202": { | |
"content": "zho_Hant", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"32203": { | |
"content": "zul_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
} | |
}, | |
"additional_special_tokens": [ | |
"<s>", | |
"</s>", | |
"<unk>", | |
"</s>", | |
"<pad>", | |
"<s>", | |
"<mask>", | |
"ace_Arab", | |
"ace_Latn", | |
"acm_Arab", | |
"acq_Arab", | |
"aeb_Arab", | |
"afr_Latn", | |
"ajp_Arab", | |
"aka_Latn", | |
"amh_Ethi", | |
"apc_Arab", | |
"arb_Arab", | |
"ars_Arab", | |
"ary_Arab", | |
"arz_Arab", | |
"asm_Beng", | |
"ast_Latn", | |
"awa_Deva", | |
"ayr_Latn", | |
"azb_Arab", | |
"azj_Latn", | |
"bak_Cyrl", | |
"bam_Latn", | |
"ban_Latn", | |
"bel_Cyrl", | |
"bem_Latn", | |
"ben_Beng", | |
"bho_Deva", | |
"bjn_Arab", | |
"bjn_Latn", | |
"bod_Tibt", | |
"bos_Latn", | |
"bug_Latn", | |
"bul_Cyrl", | |
"cat_Latn", | |
"ceb_Latn", | |
"ces_Latn", | |
"cjk_Latn", | |
"ckb_Arab", | |
"crh_Latn", | |
"cym_Latn", | |
"dan_Latn", | |
"deu_Latn", | |
"dik_Latn", | |
"dyu_Latn", | |
"dzo_Tibt", | |
"ell_Grek", | |
"eng_Latn", | |
"epo_Latn", | |
"est_Latn", | |
"eus_Latn", | |
"ewe_Latn", | |
"fao_Latn", | |
"pes_Arab", | |
"fij_Latn", | |
"fin_Latn", | |
"fon_Latn", | |
"fra_Latn", | |
"fur_Latn", | |
"fuv_Latn", | |
"gla_Latn", | |
"gle_Latn", | |
"glg_Latn", | |
"grn_Latn", | |
"guj_Gujr", | |
"hat_Latn", | |
"hau_Latn", | |
"heb_Hebr", | |
"hin_Deva", | |
"hne_Deva", | |
"hrv_Latn", | |
"hun_Latn", | |
"hye_Armn", | |
"ibo_Latn", | |
"ilo_Latn", | |
"ind_Latn", | |
"isl_Latn", | |
"ita_Latn", | |
"jav_Latn", | |
"jpn_Jpan", | |
"kab_Latn", | |
"kac_Latn", | |
"kam_Latn", | |
"kan_Knda", | |
"kas_Arab", | |
"kas_Deva", | |
"kat_Geor", | |
"knc_Arab", | |
"knc_Latn", | |
"kaz_Cyrl", | |
"kbp_Latn", | |
"kea_Latn", | |
"khm_Khmr", | |
"kik_Latn", | |
"kin_Latn", | |
"kir_Cyrl", | |
"kmb_Latn", | |
"kon_Latn", | |
"kor_Hang", | |
"kmr_Latn", | |
"lao_Laoo", | |
"lvs_Latn", | |
"lij_Latn", | |
"lim_Latn", | |
"lin_Latn", | |
"lit_Latn", | |
"lmo_Latn", | |
"ltg_Latn", | |
"ltz_Latn", | |
"lua_Latn", | |
"lug_Latn", | |
"luo_Latn", | |
"lus_Latn", | |
"mag_Deva", | |
"mai_Deva", | |
"mal_Mlym", | |
"mar_Deva", | |
"min_Latn", | |
"mkd_Cyrl", | |
"plt_Latn", | |
"mlt_Latn", | |
"mni_Beng", | |
"khk_Cyrl", | |
"mos_Latn", | |
"mri_Latn", | |
"zsm_Latn", | |
"mya_Mymr", | |
"nld_Latn", | |
"nno_Latn", | |
"nob_Latn", | |
"npi_Deva", | |
"nso_Latn", | |
"nus_Latn", | |
"nya_Latn", | |
"oci_Latn", | |
"gaz_Latn", | |
"ory_Orya", | |
"pag_Latn", | |
"pan_Guru", | |
"pap_Latn", | |
"pol_Latn", | |
"por_Latn", | |
"prs_Arab", | |
"pbt_Arab", | |
"quy_Latn", | |
"ron_Latn", | |
"run_Latn", | |
"rus_Cyrl", | |
"sag_Latn", | |
"san_Deva", | |
"sat_Beng", | |
"scn_Latn", | |
"shn_Mymr", | |
"sin_Sinh", | |
"slk_Latn", | |
"slv_Latn", | |
"smo_Latn", | |
"sna_Latn", | |
"snd_Arab", | |
"som_Latn", | |
"sot_Latn", | |
"spa_Latn", | |
"als_Latn", | |
"srd_Latn", | |
"srp_Cyrl", | |
"ssw_Latn", | |
"sun_Latn", | |
"swe_Latn", | |
"swh_Latn", | |
"szl_Latn", | |
"tam_Taml", | |
"tat_Cyrl", | |
"tel_Telu", | |
"tgk_Cyrl", | |
"tgl_Latn", | |
"tha_Thai", | |
"tir_Ethi", | |
"taq_Latn", | |
"taq_Tfng", | |
"tpi_Latn", | |
"tsn_Latn", | |
"tso_Latn", | |
"tuk_Latn", | |
"tum_Latn", | |
"tur_Latn", | |
"twi_Latn", | |
"tzm_Tfng", | |
"uig_Arab", | |
"ukr_Cyrl", | |
"umb_Latn", | |
"urd_Arab", | |
"uzn_Latn", | |
"vec_Latn", | |
"vie_Latn", | |
"war_Latn", | |
"wol_Latn", | |
"xho_Latn", | |
"ydd_Hebr", | |
"yor_Latn", | |
"yue_Hant", | |
"zho_Hans", | |
"zho_Hant", | |
"zul_Latn" | |
], | |
"bos_token": "<s>", | |
"clean_up_tokenization_spaces": false, | |
"cls_token": "<s>", | |
"eos_token": "</s>", | |
"extra_special_tokens": {}, | |
"legacy_behaviour": false, | |
"mask_token": "<mask>", | |
"model_max_length": 1000000000000000019884624838656, | |
"pad_token": "<pad>", | |
"sep_token": "</s>", | |
"sp_model_kwargs": {}, | |
"src_lang": "fra", | |
"tgt_lang": "shr", | |
"tokenizer_class": "NllbTokenizer", | |
"unk_token": "<unk>" | |
} | |