|
import whisper |
|
import tempfile |
|
import os |
|
|
|
from googletrans.client import Translator, LANGUAGES |
|
import logging |
|
import torch |
|
import asyncio |
|
from faster_whisper import WhisperModel |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
import random |
|
|
|
def get_random_proxy(): |
|
proxies_path = os.path.join(os.path.dirname(__file__), "proxies.txt") |
|
with open(proxies_path, "r") as f: |
|
proxies = [line.strip() for line in f if line.strip()] |
|
if not proxies: |
|
return None |
|
return random.choice(proxies) |
|
|
|
def build_translator_with_proxy(): |
|
return Translator(service_urls=['translate.googleapis.com']) |
|
|
|
|
|
|
|
SUPPORTED_LANGUAGES = { |
|
'hi': {'name': 'Hindi'}, |
|
'en': {'name': 'English'}, |
|
'es': {'name': 'Spanish'}, |
|
'fr': {'name': 'French'}, |
|
'de': {'name': 'German'}, |
|
'ja': {'name': 'Japanese'}, |
|
'ko': {'name': 'Korean'}, |
|
'zh-cn': {'name': 'Chinese (Simplified)'}, |
|
'ar': {'name': 'Arabic'}, |
|
'ru': {'name': 'Russian'}, |
|
'gu': {'name': 'Gujarati'}, |
|
'mr': {'name': 'Marathi'}, |
|
'kn': {'name': 'Kannada'} |
|
} |
|
|
|
if 'zh-cn' in SUPPORTED_LANGUAGES: |
|
SUPPORTED_LANGUAGES['zh'] = SUPPORTED_LANGUAGES['zh-cn'] |
|
|
|
|
|
|
|
try: |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
logger.info(f"Using device: {device}") |
|
model_size = "medium" |
|
model = WhisperModel(model_size, device=device, |
|
compute_type="float16" if device == "cuda" else "int8", |
|
num_workers=8) |
|
logger.info("Faster-Whisper model loaded successfully.") |
|
except Exception as e: |
|
logger.error(f"Error loading Faster-Whisper model: {e}") |
|
model = None |
|
|
|
|
|
translator = build_translator_with_proxy() |
|
|
|
async def process_audio(audio_file_content: bytes, lang1: str, lang2: str): |
|
if not model: |
|
return {"error": "Faster-Whisper model not available."} |
|
|
|
if lang1 not in SUPPORTED_LANGUAGES or lang2 not in SUPPORTED_LANGUAGES or lang1 == lang2: |
|
return {"error": "Invalid or duplicate input languages."} |
|
|
|
temp_audio_path = None |
|
try: |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: |
|
temp_audio.write(audio_file_content) |
|
temp_audio_path = temp_audio.name |
|
logger.info(f"Temporary audio file saved at: {temp_audio_path}") |
|
|
|
|
|
segments, info = model.transcribe(temp_audio_path, beam_size=5, language=None) |
|
detected_lang_code = info.language |
|
logger.info(f"Detected language: {detected_lang_code}") |
|
|
|
|
|
transcribed_text = " ".join([segment.text for segment in segments]) |
|
logger.info(f"Transcription: {transcribed_text}") |
|
|
|
|
|
|
|
if detected_lang_code in ["bn", "sa"] and ("gu" in [lang1, lang2]): |
|
logger.info(f"Mapping detected language '{detected_lang_code}' to 'gu' for Gujarati support.") |
|
detected_lang_code = "gu" |
|
|
|
if detected_lang_code == "pt" and ("mr" in [lang1, lang2]): |
|
logger.info("Mapping detected language 'pt' to 'mr' for Marathi support.") |
|
detected_lang_code = "mr" |
|
|
|
if detected_lang_code == "ur" and ("hi" in [lang1, lang2]): |
|
logger.info("Mapping detected language 'ur' to 'hi' for Hindi support.") |
|
detected_lang_code = "hi" |
|
|
|
if detected_lang_code == "hi" and ("mr" in [lang1, lang2]): |
|
devanagari_count = sum('\u0900' <= c <= '\u097F' for c in transcribed_text) |
|
if devanagari_count > 0: |
|
logger.info("Mapping detected language 'hi' to 'mr' for Marathi support (Devanagari script detected).") |
|
detected_lang_code = "mr" |
|
|
|
if detected_lang_code == "ur" and ("hi" in [lang1, lang2]): |
|
devanagari_count = sum('\u0900' <= c <= '\u097F' for c in transcribed_text) |
|
if devanagari_count > 0: |
|
logger.info("Mapping detected language 'ur' to 'hi' for Hindi support (Devanagari script detected).") |
|
detected_lang_code = "hi" |
|
|
|
if detected_lang_code not in SUPPORTED_LANGUAGES: |
|
return {"error": f"Detected language '{detected_lang_code}' is not supported."} |
|
if detected_lang_code not in [lang1, lang2]: |
|
return {"error": f"Detected language '{detected_lang_code}' was not one of the expected languages: {lang1} or {lang2}."} |
|
|
|
|
|
if detected_lang_code == "hi": |
|
latin_count = sum('a' <= c.lower() <= 'z' for c in transcribed_text) |
|
devanagari_count = sum('\u0900' <= c <= '\u097F' for c in transcribed_text) |
|
if latin_count > devanagari_count: |
|
logger.info("Transcription appears to be in Latin script, retrying with forced Hindi decoding.") |
|
segments, _ = model.transcribe(temp_audio_path, language="hi", beam_size=5) |
|
transcribed_text = " ".join([segment.text for segment in segments]) |
|
logger.info(f"Retried Hindi transcription: {transcribed_text}") |
|
|
|
|
|
|
|
if (('mr' in [lang1, lang2]) and detected_lang_code == 'hi'): |
|
devanagari_count = sum('\u0900' <= c <= '\u097F' for c in transcribed_text) |
|
latin_count = sum('a' <= c.lower() <= 'z' for c in transcribed_text) |
|
|
|
if devanagari_count > 0: |
|
logger.info("Detected Devanagari script and Marathi is expected, retrying with forced Marathi decoding.") |
|
segments, _ = model.transcribe(temp_audio_path, language="mr", beam_size=5) |
|
transcribed_text = " ".join([segment.text for segment in segments]) |
|
detected_lang_code = 'mr' |
|
logger.info(f"Retried Marathi transcription: {transcribed_text}") |
|
|
|
|
|
if (('gu' in [lang1, lang2]) and detected_lang_code != 'gu'): |
|
gujarati_count = sum('\u0A80' <= c <= '\u0AFF' for c in transcribed_text) |
|
if gujarati_count > 0: |
|
logger.info("Detected Gujarati script and Gujarati is expected, retrying with forced Gujarati decoding.") |
|
segments, _ = model.transcribe(temp_audio_path, language="gu", beam_size=5) |
|
transcribed_text = " ".join([segment.text for segment in segments]) |
|
detected_lang_code = 'gu' |
|
logger.info(f"Retried Gujarati transcription: {transcribed_text}") |
|
|
|
|
|
target_lang = lang2 if detected_lang_code == lang1 else lang1 |
|
translated_text = "Translation not applicable." |
|
if transcribed_text: |
|
try: |
|
translation = await translator.translate(transcribed_text, src=detected_lang_code, dest=target_lang) |
|
if translation and hasattr(translation, 'text'): |
|
translated_text = translation.text |
|
logger.info(f"Translation to {target_lang}: {translated_text}") |
|
else: |
|
translated_text = "Translation failed (invalid result)." |
|
except Exception as e: |
|
logger.error(f"Translation error: {e}", exc_info=True) |
|
translated_text = f"Translation failed: {e}" |
|
else: |
|
translated_text = "Transcription was empty." |
|
|
|
return { |
|
"detected_language": detected_lang_code, |
|
"transcribed_text": transcribed_text, |
|
"translated_text": translated_text |
|
} |
|
|
|
except Exception as e: |
|
logger.error(f"Audio processing error: {e}", exc_info=True) |
|
return {"error": f"An unexpected error occurred during audio processing: {e}"} |
|
finally: |
|
if temp_audio_path and os.path.exists(temp_audio_path): |
|
try: |
|
os.unlink(temp_audio_path) |
|
logger.info(f"Temporary audio file deleted: {temp_audio_path}") |
|
except Exception as e: |
|
logger.error(f"Failed to delete temp file: {e}") |
|
|
|
|