Spaces:
Runtime error
Runtime error
| import src.constants as constants_utils | |
| import requests | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
| from mosestokenizer import * | |
| from indicnlp.tokenize import sentence_tokenize | |
| from googletrans import Translator, constants | |
| class TRANSLATOR: | |
| def __init__(self): | |
| print() | |
| def split_sentences(self, paragraph, language): | |
| if language == "en": | |
| with MosesSentenceSplitter(language) as splitter: | |
| return splitter([paragraph]) | |
| elif language in constants_utils.INDIC_LANGUAGE: | |
| return sentence_tokenize.sentence_split(paragraph, lang=language) | |
| def get_in_hindi(self, payload): | |
| tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M") | |
| article = self.split_sentences(payload['inputs'], 'en') | |
| # inputs = tokenizer(payload['input'], return_tensors="pt") | |
| out_text = "" | |
| for a in article: | |
| inputs = tokenizer(a, return_tensors="pt") | |
| translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["hin_Deva"], max_length=100) | |
| translated_sent = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] | |
| out_text = out_text.join(translated_sent) | |
| return out_text | |
| def get_in_indic(self, text, language='Hindi'): | |
| tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M") | |
| inputs = tokenizer(text, return_tensors="pt") | |
| code = "eng_Latn" | |
| if language == 'Hindi': | |
| code= "hin_Deva" | |
| elif language == 'Marathi': | |
| code = "mar_Deva" | |
| translated_tokens = model.generate( | |
| **inputs, | |
| forced_bos_token_id=tokenizer.lang_code_to_id[code], | |
| max_length=1000 | |
| ) | |
| out_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] | |
| return out_text | |
| def get_indic_google_translate(self, text, language='Hindi'): | |
| # Init the Google API translator | |
| translator = Translator() | |
| translations = translator.translate(text, dest=constants_utils.INDIC_LANGUAGE.get(language, 'en')) | |
| return str(translations.text) | |