Spaces:
Runtime error
Runtime error
import streamlit as st | |
import epitran | |
import langcodes | |
from langcodes import LanguageTagError | |
from pathlib import Path | |
from operator import itemgetter | |
# TODO: reverse transliterate? | |
def get_lang_description_from_mapping_name(string_to_check): | |
if "generic-Latn" == string_to_check: | |
return "Generic Latin Script" | |
if len(string_to_check)<2: | |
return None | |
try: | |
description = None | |
lang = langcodes.get(string_to_check) | |
if lang: | |
items = [] | |
for key, value in lang.describe().items(): | |
items.append(f"{key}: {value}") | |
description = ", ".join(items) | |
return description | |
except LanguageTagError as e: | |
if any(["out of place" in str(e), "must be followed by something" in str(e)]): | |
# print("*****") | |
# print(e) | |
# LanguageTagError: This extlang subtag, 'red', is out of place. Expected territory, variant, extension, or end of string. | |
# LanguageTagError: This script subtag, 'east', is out of place. Expected territory, variant, extension, or end of string. | |
# LanguageTagError: The subtag 'p' must be followed by something | |
substrings = string_to_check.split("-") | |
substrings = substrings[:-1] # remove the last one | |
string_to_check = "-".join(substrings) | |
desc = get_lang_description_from_mapping_name(string_to_check) | |
if substrings[-1] == "red": | |
desc = desc + " (reduced)" | |
return desc | |
else: | |
print("*****") | |
print(e) | |
return None | |
def get_valid_epitran_mappings_list(): | |
map_path = Path(epitran.__path__[0]) / "data" / "map" | |
map_files = map_path.glob("*.*") | |
valid_mappings = [map_file.stem for map_file in map_files] | |
problem_mappings = ['generic-Latn', | |
'tur-Latn-bab', | |
'ood-Latn-sax', | |
'vie-Latn-so', | |
'vie-Latn-ce', | |
'vie-Latn-no', | |
'kaz-Cyrl-bab'] # https://github.com/dmort27/epitran/issues/98 | |
filtered_mappings = [mapping for mapping in valid_mappings if mapping not in problem_mappings] | |
return valid_mappings | |
if __name__ == "__main__": | |
st.write("# Phonemize your text with [Epitran](https://github.com/dmort27/epitran)!") | |
st.write("Epitran is a grapheme-to-phoneme tool. It supports converting many writing sytems to IPA symbols, including the following languages/scripts:") | |
valid_epitran_mappings = get_valid_epitran_mappings_list() | |
st.write(valid_epitran_mappings) | |
selected_mapping = st.selectbox("Select input language/script:", valid_epitran_mappings) | |
description = get_lang_description_from_mapping_name(selected_mapping) | |
st.write(f"Selected input language/script: {description}") | |
# iso_lang_code = st.text_input( | |
# label="Three-letter ISO-639-3 (https://iso639-3.sil.org/) language code", | |
# value="swa" | |
# ) | |
# st.write(f"iso code is {iso_lang_code}") | |
# | |
# iso_script_code = st.text_input( | |
# label="ISO 15924 (https://unicode.org/iso15924/iso15924-codes.html) script code, e.g. 'Latn' for Latin script, 'Hans' for Chinese script, etc.", | |
# value="Latn" | |
# ) | |
# st.write(f'iso code is {iso_script_code}') | |
input_text = st.text_area(label="Whatever you type here will be transliterated!", value="Gari langu linaloangama limejaa na mikunga") | |
# combined_code = "-".join([iso_lang_code, iso_script_code]) | |
# st.write(f"Combined code: {combined_code}") | |
st.info("attempting to instantiate epitran transliterator for your language/script") | |
epi = epitran.Epitran(selected_mapping) | |
st.info(f"transliterating `{input_text}`\n\tusing {selected_mapping}...") | |
transliteration = epi.transliterate(input_text) | |
st.success(transliteration) | |