File size: 2,875 Bytes
a013c5c
 
c95a8ea
 
145304e
 
c95a8ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a013c5c
c95a8ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a013c5c
62383b9
44f705d
c95a8ea
 
44f705d
c95a8ea
2bd2657
145304e
44f705d
2bd2657
 
c95a8ea
2bd2657
44f705d
a013c5c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import streamlit as st
import epitran
import langcodes
from langcodes import LanguageTagError
# TODO: reverse transliterate? 


def get_lang_description_from_mapping_name(string_to_check):  
  if "generic-Latn" == string_to_check: 
    return "Generic Latin Script"
    
  if len(string_to_check)<2:
    return None
  
  try:
    description = None
    lang = langcodes.get(string_to_check)
    if lang: 
      items = []
      for key, value in lang.describe().items():
        items.append(f"{key}: {value}")
        
    
      description = ", ".join(items))
    return description

  except LanguageTagError as e: 
    if any(["out of place" in str(e), "must be followed by something" in str(e)]):  
      # print("*****")
      # print(e)
      # LanguageTagError: This extlang subtag, 'red', is out of place. Expected territory, variant, extension, or end of string.
      # LanguageTagError: This script subtag, 'east', is out of place. Expected territory, variant, extension, or end of string.
      # LanguageTagError: The subtag 'p' must be followed by something
      substrings = string_to_check.split("-")
      substrings = substrings[:-1] # remove the last one
      string_to_check = "-".join(substrings)
      return get_lang_from_mapping_name(string_to_check)
    else:
      print("*****")
      print(e)
      return None


def get_valid_epitran_mappings_list():
  map_path = Path(epitran.__path__[0]) / "data"/"map"
  map_files = list(map_path.glob("*.*"))
  valid_mappings = [map_file.stem for map_file in map_files]
  return valid_mappings


if __name__ == "__main__":

  valid_epitran_mappings = get_valid_epitran_mappings_list()
  selected_mapping = st.selectbox("Which language/script pair would you like to use?", valid_epitran_mappings)
  description = get_lang_description_from_mapping_name(selected_mapping)
  st.write(f"You selected {selected_mapping}")
  
  
#  iso_lang_code = st.text_input(
#    label="Three-letter ISO-639-3 (https://iso639-3.sil.org/) language code", 
#    value="swa"
#    )
#  st.write(f"iso code is {iso_lang_code}")
#  
#  iso_script_code = st.text_input(
#    label="ISO 15924 (https://unicode.org/iso15924/iso15924-codes.html) script code, e.g. 'Latn' for Latin script, 'Hans' for Chinese script, etc.", 
#    value="Latn"
#    )
#  st.write(f'iso code is {iso_script_code}')
  
  input_text = st.text_area(label="Whatever you type here will be transliterated!", value="Gari langu linaloangama limejaa na mikunga")
  
#  combined_code = "-".join([iso_lang_code, iso_script_code])
#  st.write(f"Combined code: {combined_code}")
  st.info("attempting to instantiate epitran transliterator for your language/script")
  epi = epitran.Epitran(selected_mapping )

  st.info(f"transliterating `{input_text}`\n\tusing {epi}...")  
  transliteration = epi.transliterate(input_text)
  
  st.success(transliteration)