E2-F5-TTS

Running

App Files Files Community

eBlessings commited on Mar 8

Commit

ffce350

verified ·

1 Parent(s): a01bbfd

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -130

app.py CHANGED Viewed

@@ -1,174 +1,72 @@
 # ruff: noqa: E402
-# Correct import order for Hugging Face Spaces
 try:
-    import spaces  # Must be first import!
     USING_SPACES = True
 except ImportError:
     USING_SPACES = False
-# Suppress warnings
 import warnings
-warnings.filterwarnings("ignore", message="Can't initialize NVML")
-warnings.filterwarnings("ignore", category=UserWarning, module="thinc")
-# Configure device
-if not USING_SPACES:
-    import torch
-    from torch.cuda import is_available as cuda_is_available
-    device = "cuda" if cuda_is_available() else "cpu"
-else:
-    device = "cpu"
-# Rest of imports
 import json
 import re
 import tempfile
 import hashlib
 import spacy
-from collections import defaultdict
 from pathlib import Path
-from importlib.resources import files
 from pydub import AudioSegment, silence
 from spacy.matcher import DependencyMatcher
-from typing import List, Dict
-import click
 import gradio as gr
 import numpy as np
 import soundfile as sf
 import torchaudio
-from cached_path import cached_path
-from transformers import AutoModelForCausalLM, AutoTokenizer
-# ========== CORE NLP COMPONENTS ========== #
 nlp = spacy.load("en_core_web_trf")
 class DialogueParser:
     def __init__(self):
         self.matcher = DependencyMatcher(nlp.vocab)
-        self._add_patterns()
-    def _add_patterns(self):
-        said_pattern = [
-            {"RIGHT_ID": "verb", "RIGHT_ATTRS": {"POS": "VERB", "LEMMA": {"IN": ["say", "ask", "reply"]}}},
             {"LEFT_ID": "verb", "REL_OP": ">", "RIGHT_ID": "speaker", "RIGHT_ATTRS": {"DEP": "nsubj"}}
         ]
-        self.matcher.add("SAID_CONSTRUCTION", [said_pattern])
-    def parse(self, text: str) -> List[Dict]:
-        doc = nlp(text)
-        matches = self.matcher(doc)
-        dialogues = []
-        for match_id, token_ids in matches:
-            if nlp.vocab.strings[match_id] == "SAID_CONSTRUCTION":
-                speaker = doc[token_ids[1]].text
-                quote = self._extract_quote(doc[token_ids[0]])
-                dialogues.append({"speaker": speaker, "text": quote})
-        return dialogues
-    def _extract_quote(self, verb_token):
-        return " ".join([t.text for t in verb_token.children if t.dep_ == "ccomp"])
-# ========== FIXED GRADIO UI ========== #
-class VoiceBank:
-    def __init__(self):
-        self.voices = {}
-        self.speaker_map = {}
-    def add_voice(self, speaker_name: str, audio_path: str):
-        voice_id = hashlib.md5(Path(audio_path).read_bytes()).hexdigest()
-        self.voices[voice_id] = self._preprocess_audio(audio_path)
-    def _preprocess_audio(self, audio_path: str):
-        audio, sr = torchaudio.load(audio_path)
-        if sr != 24000:
-            resampler = torchaudio.transforms.Resample(sr, 24000)
-            audio = resampler(audio)
-        return audio.numpy()
-class AudiobookGenerator:
-    def __init__(self, voice_bank: VoiceBank):
-        self.voice_bank = voice_bank
-        self.sample_rate = 24000
-    def generate_audiobook(self, text: str, progress=gr.Progress()):
-        parser = DialogueParser()
-        dialogues = parser.parse(text)
-        audio_segments = []
-        for dialogue in progress.tqdm(dialogues):
-            audio = self._generate_segment(dialogue["text"])
-            audio_segments.append(audio)
-        return (self.sample_rate, np.concatenate(audio_segments))
-    def _generate_segment(self, text: str):
-        # Implement your TTS logic here
-        return np.random.rand(16000)  # Placeholder
-voice_bank = VoiceBank()
-audiobook_gen = AudiobookGenerator(voice_bank)
-# ========== CORRECTED GRADIO UI SECTION ========== #
-with gr.Blocks() as app_audiobook:
     gr.Markdown("# Audiobook Generator")
     with gr.Row():
-        txt_input = gr.File(label="Upload Text File", file_types=[".txt"])
-        analyze_btn = gr.Button("Analyze Text", variant="secondary")
-    detected_speakers = gr.State()
-    with gr.Column(visible=False) as speaker_ui:
-        gr.Markdown("## Assign Voices to Speakers")
-        speaker_container = gr.Row()
-    generate_btn = gr.Button("Generate Audiobook", variant="primary", visible=False)
-    audio_output = gr.Audio(label="Generated Audiobook", autoplay=True)
-    def analyze_text(file):
-        with open(file.name, "r") as f:
-            text = f.read()
-        parser = DialogueParser()
-        dialogues = parser.parse(text)
-        speakers = list({d["speaker"] for d in dialogues if d["speaker"] != "Narrator"})
-        components = []
-        for speaker in speakers:
-            components.append(gr.Audio(label=f"Voice for {speaker}", type="filepath"))
-        return [
-            gr.update(visible=True),  # speaker_ui visibility
-            gr.Row.update(components=components),  # Update speaker_container
-            gr.update(visible=True),  # generate_btn visibility
-            speakers  # Store in detected_speakers state
-        ]
-    analyze_btn.click(
-        analyze_text,
-        inputs=txt_input,
-        outputs=[speaker_ui, speaker_container, generate_btn, detected_speakers]
-    )
     generate_btn.click(
-        lambda text: (24000, np.random.rand(16000)),  # Replace with actual TTS
         inputs=txt_input,
         outputs=audio_output
     )
-# ========== APP LAUNCH ========== #
-with gr.Blocks() as app:
-    gr.TabbedInterface(
-        [app_audiobook],
-        ["Audiobook"]
-    )
-@click.command()
-@click.option("--port", "-p", default=7860)
-@click.option("--host", "-H", default="0.0.0.0")
-def main(port, host):
-    app.queue().launch(server_port=port, server_name=host)
 if __name__ == "__main__":
-    main()

 # ruff: noqa: E402
+# 1. Force spaces to initialize first
 try:
+    import spaces  # Must be first!
     USING_SPACES = True
 except ImportError:
     USING_SPACES = False
+# 2. Suppress all non-critical warnings
 import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", message=".*GLIBCXX.*")
+warnings.filterwarnings("ignore", module="thinc")
+# 3. Configure device before any other imports
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Force CPU-only
+# 4. Import remaining packages
 import json
 import re
 import tempfile
 import hashlib
 import spacy
 from pathlib import Path
 from pydub import AudioSegment, silence
 from spacy.matcher import DependencyMatcher
 import gradio as gr
 import numpy as np
 import soundfile as sf
 import torchaudio
+# 5. Load spaCy model
 nlp = spacy.load("en_core_web_trf")
+# 6. Simplified Dialogue Parser
 class DialogueParser:
     def __init__(self):
         self.matcher = DependencyMatcher(nlp.vocab)
+        pattern = [
+            {"RIGHT_ID": "verb", "RIGHT_ATTRS": {"POS": "VERB", "LEMMA": {"IN": ["say", "ask", "reply"]}},
             {"LEFT_ID": "verb", "REL_OP": ">", "RIGHT_ID": "speaker", "RIGHT_ATTRS": {"DEP": "nsubj"}}
         ]
+        self.matcher.add("DIALOGUE", [pattern])
+    def parse(self, text):
+        return [{"speaker": "Narrator", "text": text}]  # Simplified for demo
+# 7. Gradio UI
+with gr.Blocks() as app:
     gr.Markdown("# Audiobook Generator")
     with gr.Row():
+        txt_input = gr.File(label="Upload Text", file_types=[".txt"])
+        generate_btn = gr.Button("Generate", variant="primary")
+    audio_output = gr.Audio(label="Result")
+    def generate(file):
+        return (24000, np.random.rand(16000))  # Replace with TTS
     generate_btn.click(
+        generate,
         inputs=txt_input,
         outputs=audio_output
     )
+# 8. Hugging Face Spaces entry point
 if __name__ == "__main__":
+    app.launch(server_name="0.0.0.0", server_port=7860)