Update app.py
Browse files
app.py
CHANGED
@@ -39,7 +39,7 @@ def infer_img2img(prompt, audio_path, progress=gr.Progress(track_tqdm=True)):
|
|
39 |
pipe = pipe.to(device)
|
40 |
|
41 |
width_start, width = 0, 160
|
42 |
-
strength_list
|
43 |
prompt = "ambulance siren"
|
44 |
seed = 42
|
45 |
|
@@ -76,35 +76,35 @@ def infer_img2img(prompt, audio_path, progress=gr.Progress(track_tqdm=True)):
|
|
76 |
denorm_spec_audio = vocoder.inference(denorm_spec)
|
77 |
audio_list.append(denorm_spec_audio)
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
|
109 |
css="""
|
110 |
div#col-container{
|
|
|
39 |
pipe = pipe.to(device)
|
40 |
|
41 |
width_start, width = 0, 160
|
42 |
+
strength_list = [0.0, 0.1, 0.2, 0.3, 0.5, 0.6, 0.7]
|
43 |
prompt = "ambulance siren"
|
44 |
seed = 42
|
45 |
|
|
|
76 |
denorm_spec_audio = vocoder.inference(denorm_spec)
|
77 |
audio_list.append(denorm_spec_audio)
|
78 |
|
79 |
+
# Display
|
80 |
+
|
81 |
+
# Concat image with different strength & add interval between images with black color
|
82 |
+
concat_image_list = []
|
83 |
+
for i in range(len(image_list)):
|
84 |
+
if i == len(image_list) - 1:
|
85 |
+
concat_image_list.append(np.array(image_list[i]))
|
86 |
+
else:
|
87 |
+
concat_image_list.append(np.concatenate([np.array(image_list[i]), np.ones((256, 20, 3))*0], axis=1))
|
88 |
+
|
89 |
+
concat_image = np.concatenate(concat_image_list, axis=1)
|
90 |
+
concat_image = Image.fromarray(np.uint8(concat_image))
|
91 |
+
|
92 |
+
### Concat audio
|
93 |
+
concat_audio_list = [np.concatenate([audio, np.zeros((1, 16000))], axis=1) for audio in audio_list]
|
94 |
+
concat_audio = np.concatenate(concat_audio_list, axis=1)
|
95 |
+
|
96 |
+
print("audio_path:", audio_path)
|
97 |
+
print("width_start:", width_start, "width:", width)
|
98 |
+
print("text prompt:", prompt)
|
99 |
+
print("strength_list:", strength_list)
|
100 |
+
|
101 |
+
# Ensure correct shape
|
102 |
+
concat_audio = concat_audio.flatten() # Converts (1, N) → (N,)
|
103 |
+
|
104 |
+
# Save as WAV
|
105 |
+
sf.write("output.wav", concat_audio, 16000)
|
106 |
+
|
107 |
+
return "output.wav"
|
108 |
|
109 |
css="""
|
110 |
div#col-container{
|