Spaces:
Running
on
Zero
Running
on
Zero
Upload 5 files
Browse files- ACLlama_el_s2s.py +1 -1
- app.py +9 -7
ACLlama_el_s2s.py
CHANGED
@@ -23,7 +23,7 @@ class ACLlamaConfig(LlamaConfig):
|
|
23 |
|
24 |
def load_whisper(audio_tower_name, device="cuda"):
|
25 |
model = WhisperModel.from_pretrained(
|
26 |
-
|
27 |
model.config.forced_decoder_ids = None
|
28 |
return model
|
29 |
|
|
|
23 |
|
24 |
def load_whisper(audio_tower_name, device="cuda"):
|
25 |
model = WhisperModel.from_pretrained(
|
26 |
+
audio_tower_name,torch_dtype=torch.float16,low_cpu_mem_usage=True).to(device)
|
27 |
model.config.forced_decoder_ids = None
|
28 |
return model
|
29 |
|
app.py
CHANGED
@@ -58,7 +58,7 @@ def process_audio_input(audio):
|
|
58 |
return None
|
59 |
|
60 |
@spaces.GPU(duration=180) # 使用ZeroGPU,3分钟超时
|
61 |
-
def process_audio_text(audio):
|
62 |
"""主要处理函数"""
|
63 |
global _MODEL_ON_CUDA, inference_model
|
64 |
|
@@ -123,23 +123,25 @@ init_model()
|
|
123 |
|
124 |
if __name__ == "__main__":
|
125 |
examples = [
|
126 |
-
["./show_case/1.wav"],
|
127 |
-
["./show_case/2.wav"],
|
128 |
]
|
129 |
|
130 |
iface = gr.Interface(
|
131 |
fn=process_audio_text,
|
132 |
inputs=[
|
133 |
-
|
134 |
gr.Audio(type="filepath", label="Upload Audio")
|
135 |
],
|
136 |
outputs=[
|
137 |
-
gr.
|
138 |
-
gr.
|
139 |
],
|
140 |
examples=examples,
|
|
|
|
|
141 |
live=False,
|
142 |
allow_flagging="never"
|
143 |
)
|
144 |
|
145 |
-
iface.launch(server_name="0.0.0.0", server_port=7860, share=
|
|
|
58 |
return None
|
59 |
|
60 |
@spaces.GPU(duration=180) # 使用ZeroGPU,3分钟超时
|
61 |
+
def process_audio_text(text, audio):
|
62 |
"""主要处理函数"""
|
63 |
global _MODEL_ON_CUDA, inference_model
|
64 |
|
|
|
123 |
|
124 |
if __name__ == "__main__":
|
125 |
examples = [
|
126 |
+
["", "./show_case/1.wav"],
|
127 |
+
["", "./show_case/2.wav"],
|
128 |
]
|
129 |
|
130 |
iface = gr.Interface(
|
131 |
fn=process_audio_text,
|
132 |
inputs=[
|
133 |
+
gr.Textbox(label="Enter text instruction", value=""),
|
134 |
gr.Audio(type="filepath", label="Upload Audio")
|
135 |
],
|
136 |
outputs=[
|
137 |
+
gr.Textbox(label="Model output"),
|
138 |
+
gr.Audio(label="Streamed Audio", streaming=True, autoplay=True)
|
139 |
],
|
140 |
examples=examples,
|
141 |
+
title="🔊 EchoX Assistant",
|
142 |
+
description="A multimodal AI assistant that understands speech and responds with both text and audio",
|
143 |
live=False,
|
144 |
allow_flagging="never"
|
145 |
)
|
146 |
|
147 |
+
iface.launch(server_name="0.0.0.0", server_port=7860, share=True)
|