|
import ctranslate2 |
|
import gradio as gr |
|
from huggingface_hub import snapshot_download |
|
from sentencepiece import SentencePieceProcessor |
|
|
|
title = "Mesolitica t5-base-standard-bahasa Translation Demo" |
|
description = """ |
|
<p> |
|
Translator using <a href='https://huggingface.co/spaces/mesolitica/malaysian-translation/' target='_blank'>Mesolitica Malaysian Translation model</a>. This demo application uses |
|
CTranslate2 optimized version of it: <a href="https://huggingface.co/santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2">santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2</a>, |
|
</p> |
|
""" |
|
|
|
|
|
model_name = "santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2" |
|
model_path = snapshot_download(model_name) |
|
|
|
tokenizer = SentencePieceProcessor() |
|
tokenizer.load(f"{model_path}/sentencepiece.model") |
|
translator = ctranslate2.Translator(model_path) |
|
|
|
map_lang = {"en": "Inggeris", "jv": "Jawa", "bjn": "Banjarese", "ms": "Melayu", "id": "Indonesia"} |
|
|
|
|
|
def translate(input_text, target_language): |
|
input_tokens = tokenizer.encode( |
|
f"f'terjemah ke {map_lang[target_language]}: {input_text}", out_type=str |
|
) |
|
results = translator.translate_batch( |
|
[input_tokens], |
|
batch_type="tokens", |
|
max_input_length=6144, |
|
max_decoding_length=6144, |
|
max_batch_size=1024, |
|
beam_size=1, |
|
) |
|
translated_sentence = tokenizer.decode(results[0].hypotheses[0]) |
|
return translated_sentence |
|
|
|
|
|
def translate_interface(input_text, target_language): |
|
translated_text = translate(input_text, target_language) |
|
return translated_text |
|
|
|
|
|
input_text = gr.Textbox( |
|
label="Input Text", |
|
value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge.", |
|
) |
|
languages = [ |
|
("English", "en"), |
|
("Bahasa Melayu", "ms"), |
|
("Indonesian", "id"), |
|
("Banjarese", "bjn"), |
|
("Jawa", "jv"), |
|
] |
|
target_language = gr.Dropdown(languages, value="en", label="Target Language") |
|
output_text = gr.Textbox(label="Translated Text") |
|
|
|
gr.Interface( |
|
title=title, |
|
description=description, |
|
fn=translate_interface, |
|
inputs=[input_text, target_language], |
|
outputs=output_text, |
|
).launch() |
|
|