Spaces:
Sleeping
Sleeping
| from OldHangeul import text_to_jamo | |
| import torch | |
| import string | |
| # ํ๊ธ ์๋ชจ์ ๋ก๋ง์ ๋์ ๊ฐ | |
| initials = { | |
| 'แ': 'K', 'แ': 'KK', 'แ': 'N', 'แ': 'T', 'แ': 'TT', 'แ ': 'R', 'แ': 'M', 'แ': 'P', 'แ': 'PP', 'แ': 'S', 'แ': 'SS', | |
| 'แ': 'NG', 'แ': 'C', 'แ': 'CC', 'แ': 'CH', 'แ': 'KH', 'แ': 'TH', 'แ': 'PH', 'แ': 'H' | |
| } | |
| medials = { | |
| 'แ ก': 'a', 'แ ข': 'ae', 'แ ฃ': 'ya', 'แ ค': 'yae', 'แ ฅ': 'eo', 'แ ฆ': 'e', 'แ ง': 'yeo', 'แ จ': 'ye', 'แ ฉ': 'o', 'แ ช': 'wa', | |
| 'แ ซ': 'wae', 'แ ฌ': 'oe', 'แ ญ': 'yo', 'แ ฎ': 'u', 'แ ฏ': 'wo', 'แ ฐ': 'we', 'แ ฑ': 'wi', 'แ ฒ': 'yu', 'แ ณ': 'eu', 'แ ด': 'ui', 'แ ต': 'i' | |
| } | |
| finals = { | |
| '': '', 'แจ': 'k', 'แฉ': 'kk', 'แช': 'ks', 'แซ': 'n', 'แฌ': 'nj', 'แญ': 'nh', 'แฎ': 't', 'แฏ': 'r', 'แฐ': 'rk', 'แฑ': 'rm', | |
| 'แฒ': 'rb', 'แณ': 'rs', 'แด': 'rt', 'แต': 'rp', 'แถ': 'rh', 'แท': 'm', 'แธ': 'p', 'แน': 'ps', 'แบ': 's', 'แป': 'ss', 'แผ': 'ng', | |
| 'แฝ': 'c', 'แพ': 'ch', 'แฟ': 'kh', 'แ': 'th', 'แ': 'ph', 'แ': 'h' | |
| } | |
| # ์ญ๋ฐฉํฅ ๋งคํ์ ์ํ ์ฌ์ ์์ฑ | |
| rev_initials = {v: k for k, v in initials.items()} | |
| rev_medials = {v: k for k, v in medials.items()} | |
| rev_finals = {v: k for k, v in finals.items()} | |
| def hangul_to_roman(hangul): | |
| result = [] | |
| for char in hangul: | |
| if '๊ฐ' <= char <= 'ํฃ': | |
| jamos=text_to_jamo(char, compatibility=False, spacing=False) | |
| initial = initials[jamos[0]] | |
| medial = medials[jamos[2]] | |
| final = finals[jamos[4]] if len(jamos) == 5 else '' | |
| result.append(initial + medial + final) | |
| else: | |
| result.append(char) | |
| return ''.join(result) | |
| all_letters = string.ascii_letters + " .,;'" | |
| n_letters = len(all_letters) | |
| # all_letters ๋ก ๋ฌธ์์ ์ฃผ์ ์ฐพ๊ธฐ, ์์ "a" = 0 | |
| def letterToIndex(letter): | |
| return all_letters.find(letter) | |
| # ํ ์ค(์ด๋ฆ)์ <line_length x 1 x n_letters>, | |
| # ๋๋ One-Hot ๋ฌธ์ ๋ฒกํฐ์ Array๋ก ๋ณ๊ฒฝ | |
| def lineToTensor(line): | |
| tensor = torch.zeros(len(line), 1, n_letters) | |
| for li, letter in enumerate(line): | |
| tensor[li][0][letterToIndex(letter)] = 1 | |
| return tensor |