Spaces:
Runtime error
Runtime error
File size: 3,390 Bytes
a013c5c c95a8ea db6d318 0db47ae 145304e c95a8ea dea6246 c95a8ea 4b5050f c95a8ea 7c012d1 c95a8ea 0db47ae c95a8ea a013c5c c95a8ea b33e08e c95a8ea b33e08e c95a8ea 383b08c c95a8ea a013c5c 62383b9 44f705d c95a8ea 44f705d 383b08c 2bd2657 b33e08e 44f705d 2bd2657 c95a8ea 2bd2657 44f705d a013c5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import streamlit as st
import epitran
import langcodes
from langcodes import LanguageTagError
from pathlib import Path
from operator import itemgetter
# TODO: reverse transliterate?
def get_lang_description_from_mapping_name(string_to_check):
if "generic-Latn" == string_to_check:
return "Generic Latin Script"
if len(string_to_check)<2:
return None
try:
description = None
lang = langcodes.get(string_to_check)
if lang:
items = []
for key, value in lang.describe().items():
items.append(f"{key}: {value}")
description = ", ".join(items)
return description
except LanguageTagError as e:
if any(["out of place" in str(e), "must be followed by something" in str(e)]):
# print("*****")
# print(e)
# LanguageTagError: This extlang subtag, 'red', is out of place. Expected territory, variant, extension, or end of string.
# LanguageTagError: This script subtag, 'east', is out of place. Expected territory, variant, extension, or end of string.
# LanguageTagError: The subtag 'p' must be followed by something
substrings = string_to_check.split("-")
substrings = substrings[:-1] # remove the last one
string_to_check = "-".join(substrings)
return get_lang_description_from_mapping_name(string_to_check)
else:
print("*****")
print(e)
return None
def get_valid_epitran_mappings_list():
map_path = Path(epitran.__path__[0]) / "data" / "map"
map_files = map_path.glob("*.*")
valid_mappings = [map_file.stem for map_file in map_files]
problem_mappings = ['generic-Latn',
'tur-Latn-bab',
'ood-Latn-sax',
'vie-Latn-so',
'vie-Latn-ce',
'vie-Latn-no',
'kaz-Cyrl-bab'] # https://github.com/dmort27/epitran/issues/98
filtered_mappings = [mapping for mapping in valid_epitran_mappings if mapping not in problem_mappings]
return valid_mappings
if __name__ == "__main__":
st.write("# Phonemize your text with epitran!")
st.write("Epitran supports the following languages/scripts:")
valid_epitran_mappings = get_valid_epitran_mappings_list()
st.write(valid_epitran_mappings)
selected_mapping = st.selectbox("Select input language/script:", valid_epitran_mappings)
description = get_lang_description_from_mapping_name(selected_mapping)
st.write(f"Selected input language/script: {description}")
# iso_lang_code = st.text_input(
# label="Three-letter ISO-639-3 (https://iso639-3.sil.org/) language code",
# value="swa"
# )
# st.write(f"iso code is {iso_lang_code}")
#
# iso_script_code = st.text_input(
# label="ISO 15924 (https://unicode.org/iso15924/iso15924-codes.html) script code, e.g. 'Latn' for Latin script, 'Hans' for Chinese script, etc.",
# value="Latn"
# )
# st.write(f'iso code is {iso_script_code}')
input_text = st.text_area(label="Whatever you type here will be transliterated!", value="Gari langu linaloangama limejaa na mikunga")
# combined_code = "-".join([iso_lang_code, iso_script_code])
# st.write(f"Combined code: {combined_code}")
st.info("attempting to instantiate epitran transliterator for your language/script")
epi = epitran.Epitran(selected_mapping)
st.info(f"transliterating `{input_text}`\n\tusing {selected_mapping}...")
transliteration = epi.transliterate(input_text)
st.success(transliteration)
|