yasserrmd commited on
Commit
03e7073
·
verified ·
1 Parent(s): 3ad533a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import argparse
2
  import os
3
  import time
4
  import numpy as np
@@ -8,8 +7,8 @@ import soundfile as sf
8
  import torch
9
  import traceback
10
  from spaces import GPU
 
11
 
12
- from vibevoice.modular.configuration_vibevoice import VibeVoiceConfig
13
  from vibevoice.modular.modeling_vibevoice_inference import VibeVoiceForConditionalGenerationInference
14
  from vibevoice.processor.vibevoice_processor import VibeVoiceProcessor
15
  from transformers.utils import logging
@@ -120,11 +119,13 @@ class VibeVoiceDemo:
120
  verbose=False
121
  )
122
 
123
- # Extract audio
124
- if isinstance(outputs, dict) and "audio" in outputs:
125
- audio = outputs["audio"]
 
 
126
  else:
127
- audio = outputs
128
 
129
  if torch.is_tensor(audio):
130
  audio = audio.float().cpu().numpy()
@@ -134,8 +135,15 @@ class VibeVoiceDemo:
134
  sample_rate = 24000
135
  audio16 = convert_to_16_bit_wav(audio)
136
 
 
 
 
 
 
 
 
137
  total_dur = len(audio16) / sample_rate
138
- log = f"✅ Generation complete in {time.time()-start:.1f}s, {total_dur:.1f}s audio"
139
 
140
  self.is_generating = False
141
  return (sample_rate, audio16), log
 
 
1
  import os
2
  import time
3
  import numpy as np
 
7
  import torch
8
  import traceback
9
  from spaces import GPU
10
+ from datetime import datetime
11
 
 
12
  from vibevoice.modular.modeling_vibevoice_inference import VibeVoiceForConditionalGenerationInference
13
  from vibevoice.processor.vibevoice_processor import VibeVoiceProcessor
14
  from transformers.utils import logging
 
119
  verbose=False
120
  )
121
 
122
+ # --- handle model output ---
123
+ if hasattr(outputs, "audio"):
124
+ audio = outputs.audio
125
+ elif hasattr(outputs, "audios"):
126
+ audio = outputs.audios[0]
127
  else:
128
+ raise gr.Error("Model did not return audio in expected format.")
129
 
130
  if torch.is_tensor(audio):
131
  audio = audio.float().cpu().numpy()
 
135
  sample_rate = 24000
136
  audio16 = convert_to_16_bit_wav(audio)
137
 
138
+ # --- save automatically to disk ---
139
+ os.makedirs("outputs", exist_ok=True)
140
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
141
+ file_path = os.path.join("outputs", f"podcast_{timestamp}.wav")
142
+ sf.write(file_path, audio16, sample_rate)
143
+ print(f"💾 Saved podcast to {file_path}")
144
+
145
  total_dur = len(audio16) / sample_rate
146
+ log = f"✅ Generation complete in {time.time()-start:.1f}s, {total_dur:.1f}s audio\nSaved to {file_path}"
147
 
148
  self.is_generating = False
149
  return (sample_rate, audio16), log