import gradio as gr from config import custom_css from synthesis import generate_speech from GE2PE import GE2PE MODEL_PATHS = { "Homo-GE2PE": "./homo-ge2pe", "Homo-T5": "./homo-t5", } _g2p_cache = {} def _get_g2p(model_name: str) -> GE2PE: if model_name not in _g2p_cache: path = MODEL_PATHS.get(model_name) if path is None: raise ValueError(f"Unknown model: {model_name}") _g2p_cache[model_name] = GE2PE(model_path=path, GPU=False) return _g2p_cache[model_name] def ge2pe_infer(model_name: str, text: str, use_rules: bool, use_dict: bool): if not text or not text.strip(): return "" try: model = _get_g2p(model_name) result = model.generate([text], use_rules=use_rules, use_dict=use_dict) return result[0] if result else "" except Exception as e: return f"⚠️ Error: {str(e)}" def create_interface(): with gr.Blocks(title="Persian Speech Suite", css=custom_css) as demo: gr.Markdown("# Persian Speech Suite: GE2PE & TTS\n" "A unified playground for Persian grapheme‑to‑phoneme conversion (GE2PE) **and** text‑to‑speech synthesis (Mana TTS).") with gr.Tabs(): with gr.TabItem("Grapheme → Phoneme (GE2PE)"): gr.Markdown("Convert Persian text to its phonemic transcription. Choose between **Homo‑GE2PE** and **Homo‑T5**, optionally applying short‑vowel rules and/or a custom dictionary.") with gr.Row(): model_selector = gr.Radio( choices=list(MODEL_PATHS.keys()), value="Homo-GE2PE", label="G2P Model", ) g2p_input = gr.Textbox( label="Persian Text", placeholder="مثال: این کتابِ علی است", lines=4, ) with gr.Row(): g2p_use_rules = gr.Checkbox(value=True, label="Apply short‑vowel rules (optional)") g2p_use_dict = gr.Checkbox(value=False, label="Use custom dictionary (optional)") g2p_button = gr.Button("Convert", variant="primary") g2p_output = gr.Textbox(label="Phoneme Output", interactive=False) g2p_button.click( fn=ge2pe_infer, inputs=[model_selector, g2p_input, g2p_use_rules, g2p_use_dict], outputs=[g2p_output], ) gr.Examples( examples=[ ["او مرد خوبی است."], ["او مرد."], ["این کتابِ علی است."], ["به خانه آمد."] ], inputs=[g2p_input], ) with gr.TabItem("Text‑to‑Speech"): gr.Markdown("Generate natural‑sounding Persian speech from your text using Tacotron2 + HiFiGAN.") tts_input = gr.Textbox( label="Persian Text", placeholder="مدل تولید گفتار با دادگان نسل مانا", lines=5, ) tts_button = gr.Button("Generate Speech", variant="primary") tts_output = gr.Audio(label="Generated Speech") tts_button.click( fn=generate_speech, inputs=[tts_input], outputs=[tts_output], ) gr.Examples( examples=[ ["سلام، چطور هستید؟"], ["ایران سرزمین زیبایی‌ها و افتخارات است."], ["فناوری هوش مصنوعی به سرعت در حال پیشرفت است."], ["مدل تولید گفتار با دادگان نسل مانا"], ], inputs=[tts_input], ) gr.Markdown( """ ### Acknowledgments - [**Nasl‑e‑Mana**](https://naslemana.com/), the monthly magazine of the blind community of Iran - [ManaTTS Dataset](https://huggingface.co/datasets/MahtaFetrat/Mana-TTS) - [Persian‑MultiSpeaker‑Tacotron2](https://github.com/MahtaFetrat/Persian-MultiSpeaker-Tacotron2/) - [Homo-GE2PE (Github)](https://github.com/MahtaFetrat/Homo-GE2PE-Persian/) - [Base GE2PE Paper](https://aclanthology.org/2024.findings-emnlp.196/) - [Base GE2PE Model](https://github.com/Sharif-SLPL/GE2PE) - [HomoRich Dataset (Huggingface)](https://huggingface.co/datasets/MahtaFetrat/HomoRich-G2P-Persian) - [HomoRich Dataset (Github)](https://github.com/MahtaFetrat/HomoRich-G2P-Persian) - [SentenceBench Persian G2P Benchmark](https://huggingface.co/datasets/MahtaFetrat/SentenceBench) ### Citation ```bibtex @misc{qharabagh2025fastfancyrethinkingg2p, title={Fast, Not Fancy: Rethinking G2P with Rich Data and Rule-Based Models}, author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee}, year={2025}, eprint={2505.12973}, archivePrefix={arXiv}, primaryClass={cs.CL}, } @article{fetrat2024manatts, title={ManaTTS Persian: A Recipe for Creating TTS Datasets for Lower-Resource Languages}, author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee}, journal={arXiv preprint arXiv:2409.07259}, year={2024}, } ``` """ ) return demo