tzzte commited on
Commit
9eb296a
·
verified ·
1 Parent(s): 930b040

Upload 5 files

Browse files
Files changed (2) hide show
  1. ACLlama_el_s2s.py +1 -1
  2. app.py +9 -7
ACLlama_el_s2s.py CHANGED
@@ -23,7 +23,7 @@ class ACLlamaConfig(LlamaConfig):
23
 
24
  def load_whisper(audio_tower_name, device="cuda"):
25
  model = WhisperModel.from_pretrained(
26
- "openai/whisper-large-v3",torch_dtype=torch.float16,low_cpu_mem_usage=True).to(device)
27
  model.config.forced_decoder_ids = None
28
  return model
29
 
 
23
 
24
  def load_whisper(audio_tower_name, device="cuda"):
25
  model = WhisperModel.from_pretrained(
26
+ audio_tower_name,torch_dtype=torch.float16,low_cpu_mem_usage=True).to(device)
27
  model.config.forced_decoder_ids = None
28
  return model
29
 
app.py CHANGED
@@ -58,7 +58,7 @@ def process_audio_input(audio):
58
  return None
59
 
60
  @spaces.GPU(duration=180) # 使用ZeroGPU,3分钟超时
61
- def process_audio_text(audio):
62
  """主要处理函数"""
63
  global _MODEL_ON_CUDA, inference_model
64
 
@@ -123,23 +123,25 @@ init_model()
123
 
124
  if __name__ == "__main__":
125
  examples = [
126
- ["./show_case/1.wav"],
127
- ["./show_case/2.wav"],
128
  ]
129
 
130
  iface = gr.Interface(
131
  fn=process_audio_text,
132
  inputs=[
133
- # gr.Textbox(label="Enter text instruction", value=""),
134
  gr.Audio(type="filepath", label="Upload Audio")
135
  ],
136
  outputs=[
137
- gr.Audio(label="Streamed Audio", streaming=True, autoplay=True),
138
- gr.Textbox(label="Model output")
139
  ],
140
  examples=examples,
 
 
141
  live=False,
142
  allow_flagging="never"
143
  )
144
 
145
- iface.launch(server_name="0.0.0.0", server_port=7860, share=False)
 
58
  return None
59
 
60
  @spaces.GPU(duration=180) # 使用ZeroGPU,3分钟超时
61
+ def process_audio_text(text, audio):
62
  """主要处理函数"""
63
  global _MODEL_ON_CUDA, inference_model
64
 
 
123
 
124
  if __name__ == "__main__":
125
  examples = [
126
+ ["", "./show_case/1.wav"],
127
+ ["", "./show_case/2.wav"],
128
  ]
129
 
130
  iface = gr.Interface(
131
  fn=process_audio_text,
132
  inputs=[
133
+ gr.Textbox(label="Enter text instruction", value=""),
134
  gr.Audio(type="filepath", label="Upload Audio")
135
  ],
136
  outputs=[
137
+ gr.Textbox(label="Model output"),
138
+ gr.Audio(label="Streamed Audio", streaming=True, autoplay=True)
139
  ],
140
  examples=examples,
141
+ title="🔊 EchoX Assistant",
142
+ description="A multimodal AI assistant that understands speech and responds with both text and audio",
143
  live=False,
144
  allow_flagging="never"
145
  )
146
 
147
+ iface.launch(server_name="0.0.0.0", server_port=7860, share=True)