eBlessings commited on
Commit
ffce350
·
verified ·
1 Parent(s): a01bbfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -130
app.py CHANGED
@@ -1,174 +1,72 @@
1
  # ruff: noqa: E402
2
- # Correct import order for Hugging Face Spaces
 
3
  try:
4
- import spaces # Must be first import!
5
  USING_SPACES = True
6
  except ImportError:
7
  USING_SPACES = False
8
 
9
- # Suppress warnings
10
  import warnings
11
- warnings.filterwarnings("ignore", message="Can't initialize NVML")
12
- warnings.filterwarnings("ignore", category=UserWarning, module="thinc")
 
13
 
14
- # Configure device
15
- if not USING_SPACES:
16
- import torch
17
- from torch.cuda import is_available as cuda_is_available
18
- device = "cuda" if cuda_is_available() else "cpu"
19
- else:
20
- device = "cpu"
21
 
22
- # Rest of imports
23
  import json
24
  import re
25
  import tempfile
26
  import hashlib
27
  import spacy
28
- from collections import defaultdict
29
  from pathlib import Path
30
- from importlib.resources import files
31
  from pydub import AudioSegment, silence
32
  from spacy.matcher import DependencyMatcher
33
- from typing import List, Dict
34
 
35
- import click
36
  import gradio as gr
37
  import numpy as np
38
  import soundfile as sf
39
  import torchaudio
40
- from cached_path import cached_path
41
- from transformers import AutoModelForCausalLM, AutoTokenizer
42
 
43
- # ========== CORE NLP COMPONENTS ========== #
44
  nlp = spacy.load("en_core_web_trf")
45
 
 
46
  class DialogueParser:
47
  def __init__(self):
48
  self.matcher = DependencyMatcher(nlp.vocab)
49
- self._add_patterns()
50
-
51
- def _add_patterns(self):
52
- said_pattern = [
53
- {"RIGHT_ID": "verb", "RIGHT_ATTRS": {"POS": "VERB", "LEMMA": {"IN": ["say", "ask", "reply"]}}},
54
  {"LEFT_ID": "verb", "REL_OP": ">", "RIGHT_ID": "speaker", "RIGHT_ATTRS": {"DEP": "nsubj"}}
55
  ]
56
- self.matcher.add("SAID_CONSTRUCTION", [said_pattern])
57
-
58
- def parse(self, text: str) -> List[Dict]:
59
- doc = nlp(text)
60
- matches = self.matcher(doc)
61
- dialogues = []
62
-
63
- for match_id, token_ids in matches:
64
- if nlp.vocab.strings[match_id] == "SAID_CONSTRUCTION":
65
- speaker = doc[token_ids[1]].text
66
- quote = self._extract_quote(doc[token_ids[0]])
67
- dialogues.append({"speaker": speaker, "text": quote})
68
-
69
- return dialogues
70
-
71
- def _extract_quote(self, verb_token):
72
- return " ".join([t.text for t in verb_token.children if t.dep_ == "ccomp"])
73
-
74
- # ========== FIXED GRADIO UI ========== #
75
- class VoiceBank:
76
- def __init__(self):
77
- self.voices = {}
78
- self.speaker_map = {}
79
-
80
- def add_voice(self, speaker_name: str, audio_path: str):
81
- voice_id = hashlib.md5(Path(audio_path).read_bytes()).hexdigest()
82
- self.voices[voice_id] = self._preprocess_audio(audio_path)
83
-
84
- def _preprocess_audio(self, audio_path: str):
85
- audio, sr = torchaudio.load(audio_path)
86
- if sr != 24000:
87
- resampler = torchaudio.transforms.Resample(sr, 24000)
88
- audio = resampler(audio)
89
- return audio.numpy()
90
-
91
- class AudiobookGenerator:
92
- def __init__(self, voice_bank: VoiceBank):
93
- self.voice_bank = voice_bank
94
- self.sample_rate = 24000
95
-
96
- def generate_audiobook(self, text: str, progress=gr.Progress()):
97
- parser = DialogueParser()
98
- dialogues = parser.parse(text)
99
- audio_segments = []
100
-
101
- for dialogue in progress.tqdm(dialogues):
102
- audio = self._generate_segment(dialogue["text"])
103
- audio_segments.append(audio)
104
-
105
- return (self.sample_rate, np.concatenate(audio_segments))
106
-
107
- def _generate_segment(self, text: str):
108
- # Implement your TTS logic here
109
- return np.random.rand(16000) # Placeholder
110
 
111
- voice_bank = VoiceBank()
112
- audiobook_gen = AudiobookGenerator(voice_bank)
113
 
114
- # ========== CORRECTED GRADIO UI SECTION ========== #
115
- with gr.Blocks() as app_audiobook:
116
  gr.Markdown("# Audiobook Generator")
117
 
118
  with gr.Row():
119
- txt_input = gr.File(label="Upload Text File", file_types=[".txt"])
120
- analyze_btn = gr.Button("Analyze Text", variant="secondary")
121
-
122
- detected_speakers = gr.State()
123
- with gr.Column(visible=False) as speaker_ui:
124
- gr.Markdown("## Assign Voices to Speakers")
125
- speaker_container = gr.Row()
126
 
127
- generate_btn = gr.Button("Generate Audiobook", variant="primary", visible=False)
128
- audio_output = gr.Audio(label="Generated Audiobook", autoplay=True)
129
 
130
- def analyze_text(file):
131
- with open(file.name, "r") as f:
132
- text = f.read()
133
-
134
- parser = DialogueParser()
135
- dialogues = parser.parse(text)
136
- speakers = list({d["speaker"] for d in dialogues if d["speaker"] != "Narrator"})
137
-
138
- components = []
139
- for speaker in speakers:
140
- components.append(gr.Audio(label=f"Voice for {speaker}", type="filepath"))
141
-
142
- return [
143
- gr.update(visible=True), # speaker_ui visibility
144
- gr.Row.update(components=components), # Update speaker_container
145
- gr.update(visible=True), # generate_btn visibility
146
- speakers # Store in detected_speakers state
147
- ]
148
 
149
- analyze_btn.click(
150
- analyze_text,
151
- inputs=txt_input,
152
- outputs=[speaker_ui, speaker_container, generate_btn, detected_speakers]
153
- )
154
-
155
  generate_btn.click(
156
- lambda text: (24000, np.random.rand(16000)), # Replace with actual TTS
157
  inputs=txt_input,
158
  outputs=audio_output
159
  )
160
- # ========== APP LAUNCH ========== #
161
- with gr.Blocks() as app:
162
- gr.TabbedInterface(
163
- [app_audiobook],
164
- ["Audiobook"]
165
- )
166
-
167
- @click.command()
168
- @click.option("--port", "-p", default=7860)
169
- @click.option("--host", "-H", default="0.0.0.0")
170
- def main(port, host):
171
- app.queue().launch(server_port=port, server_name=host)
172
 
 
173
  if __name__ == "__main__":
174
- main()
 
1
  # ruff: noqa: E402
2
+
3
+ # 1. Force spaces to initialize first
4
  try:
5
+ import spaces # Must be first!
6
  USING_SPACES = True
7
  except ImportError:
8
  USING_SPACES = False
9
 
10
+ # 2. Suppress all non-critical warnings
11
  import warnings
12
+ warnings.filterwarnings("ignore", category=UserWarning)
13
+ warnings.filterwarnings("ignore", message=".*GLIBCXX.*")
14
+ warnings.filterwarnings("ignore", module="thinc")
15
 
16
+ # 3. Configure device before any other imports
17
+ import os
18
+ os.environ["CUDA_VISIBLE_DEVICES"] = "" # Force CPU-only
 
 
 
 
19
 
20
+ # 4. Import remaining packages
21
  import json
22
  import re
23
  import tempfile
24
  import hashlib
25
  import spacy
 
26
  from pathlib import Path
 
27
  from pydub import AudioSegment, silence
28
  from spacy.matcher import DependencyMatcher
 
29
 
 
30
  import gradio as gr
31
  import numpy as np
32
  import soundfile as sf
33
  import torchaudio
 
 
34
 
35
+ # 5. Load spaCy model
36
  nlp = spacy.load("en_core_web_trf")
37
 
38
+ # 6. Simplified Dialogue Parser
39
  class DialogueParser:
40
  def __init__(self):
41
  self.matcher = DependencyMatcher(nlp.vocab)
42
+ pattern = [
43
+ {"RIGHT_ID": "verb", "RIGHT_ATTRS": {"POS": "VERB", "LEMMA": {"IN": ["say", "ask", "reply"]}},
 
 
 
44
  {"LEFT_ID": "verb", "REL_OP": ">", "RIGHT_ID": "speaker", "RIGHT_ATTRS": {"DEP": "nsubj"}}
45
  ]
46
+ self.matcher.add("DIALOGUE", [pattern])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ def parse(self, text):
49
+ return [{"speaker": "Narrator", "text": text}] # Simplified for demo
50
 
51
+ # 7. Gradio UI
52
+ with gr.Blocks() as app:
53
  gr.Markdown("# Audiobook Generator")
54
 
55
  with gr.Row():
56
+ txt_input = gr.File(label="Upload Text", file_types=[".txt"])
57
+ generate_btn = gr.Button("Generate", variant="primary")
 
 
 
 
 
58
 
59
+ audio_output = gr.Audio(label="Result")
 
60
 
61
+ def generate(file):
62
+ return (24000, np.random.rand(16000)) # Replace with TTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
 
 
 
 
 
 
64
  generate_btn.click(
65
+ generate,
66
  inputs=txt_input,
67
  outputs=audio_output
68
  )
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ # 8. Hugging Face Spaces entry point
71
  if __name__ == "__main__":
72
+ app.launch(server_name="0.0.0.0", server_port=7860)