Spaces:
Running
Running
Update
Browse files- .gitattributes +1 -0
- .pre-commit-config.yaml +33 -0
- .python-version +1 -0
- .vscode/extensions.json +8 -0
- .vscode/settings.json +17 -0
- README.md +2 -2
- app.py +75 -290
- assets/sample.wav +3 -0
- packages.txt +1 -1
- pyproject.toml +52 -0
- requirements.txt +182 -6
- style.css +4 -0
- utils.py +230 -0
- uv.lock +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
.pre-commit-config.yaml
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
repos:
|
2 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
3 |
+
rev: v5.0.0
|
4 |
+
hooks:
|
5 |
+
- id: check-executables-have-shebangs
|
6 |
+
- id: check-json
|
7 |
+
- id: check-merge-conflict
|
8 |
+
- id: check-shebang-scripts-are-executable
|
9 |
+
- id: check-toml
|
10 |
+
- id: check-yaml
|
11 |
+
- id: end-of-file-fixer
|
12 |
+
- id: mixed-line-ending
|
13 |
+
args: ["--fix=lf"]
|
14 |
+
- id: requirements-txt-fixer
|
15 |
+
- id: trailing-whitespace
|
16 |
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
17 |
+
rev: v0.11.11
|
18 |
+
hooks:
|
19 |
+
- id: ruff-check
|
20 |
+
args: ["--fix"]
|
21 |
+
- id: ruff-format
|
22 |
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
23 |
+
rev: v1.15.0
|
24 |
+
hooks:
|
25 |
+
- id: mypy
|
26 |
+
args: ["--ignore-missing-imports"]
|
27 |
+
additional_dependencies:
|
28 |
+
[
|
29 |
+
"types-python-slugify",
|
30 |
+
"types-pytz",
|
31 |
+
"types-PyYAML",
|
32 |
+
"types-requests",
|
33 |
+
]
|
.python-version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.10
|
.vscode/extensions.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"recommendations": [
|
3 |
+
"ms-python.python",
|
4 |
+
"charliermarsh.ruff",
|
5 |
+
"streetsidesoftware.code-spell-checker",
|
6 |
+
"tamasfe.even-better-toml"
|
7 |
+
]
|
8 |
+
}
|
.vscode/settings.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"editor.formatOnSave": true,
|
3 |
+
"files.insertFinalNewline": false,
|
4 |
+
"[python]": {
|
5 |
+
"editor.defaultFormatter": "charliermarsh.ruff",
|
6 |
+
"editor.formatOnType": true,
|
7 |
+
"editor.codeActionsOnSave": {
|
8 |
+
"source.fixAll.ruff": "explicit",
|
9 |
+
"source.organizeImports": "explicit"
|
10 |
+
}
|
11 |
+
},
|
12 |
+
"[jupyter]": {
|
13 |
+
"files.insertFinalNewline": false
|
14 |
+
},
|
15 |
+
"notebook.output.scrolling": true,
|
16 |
+
"notebook.formatOnSave.enabled": true
|
17 |
+
}
|
README.md
CHANGED
@@ -4,9 +4,9 @@ emoji: 🔊
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
license: cc0-1.0
|
10 |
---
|
11 |
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.31.0
|
8 |
app_file: app.py
|
9 |
license: cc0-1.0
|
10 |
---
|
11 |
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,314 +1,99 @@
|
|
1 |
-
|
2 |
-
# This program is also dedicated into the public domain.
|
3 |
-
# You may use it, at your choice, under the Unlicense, CC0, or WTFPL license.
|
4 |
-
# Enjoy!
|
5 |
-
|
6 |
-
# Mostly from: https://github.com/adefossez/seewav
|
7 |
-
# Original author: adefossez
|
8 |
-
|
9 |
-
|
10 |
-
import math
|
11 |
import tempfile
|
12 |
-
from pathlib import Path
|
13 |
-
import subprocess
|
14 |
-
import cairo
|
15 |
-
import numpy as np
|
16 |
-
import gradio as gr
|
17 |
-
from pydub import AudioSegment
|
18 |
-
|
19 |
-
|
20 |
-
def read_audio(audio, seek=None, duration=None):
|
21 |
-
"""
|
22 |
-
Read the `audio` file, starting at `seek` (or 0) seconds for `duration` (or all) seconds.
|
23 |
-
Returns `float[channels, samples]`.
|
24 |
-
"""
|
25 |
-
|
26 |
-
audio_segment = AudioSegment.from_file(audio)
|
27 |
-
channels = audio_segment.channels
|
28 |
-
samplerate = audio_segment.frame_rate
|
29 |
-
|
30 |
-
if seek is not None:
|
31 |
-
seek_ms = int(seek * 1000)
|
32 |
-
audio_segment = audio_segment[seek_ms:]
|
33 |
-
|
34 |
-
if duration is not None:
|
35 |
-
duration_ms = int(duration * 1000)
|
36 |
-
audio_segment = audio_segment[:duration_ms]
|
37 |
-
|
38 |
-
samples = audio_segment.get_array_of_samples()
|
39 |
-
wav = np.array(samples, dtype=np.float32)
|
40 |
-
return wav.reshape(channels, -1), samplerate
|
41 |
-
|
42 |
-
|
43 |
-
def sigmoid(x):
|
44 |
-
return 1 / (1 + np.exp(-x))
|
45 |
-
|
46 |
-
|
47 |
-
def envelope(wav, window, stride):
|
48 |
-
"""
|
49 |
-
Extract the envelope of the waveform `wav` (float[samples]), using average pooling
|
50 |
-
with `window` samples and the given `stride`.
|
51 |
-
"""
|
52 |
-
# pos = np.pad(np.maximum(wav, 0), window // 2)
|
53 |
-
wav = np.pad(wav, window // 2)
|
54 |
-
out = []
|
55 |
-
for off in range(0, len(wav) - window, stride):
|
56 |
-
frame = wav[off : off + window]
|
57 |
-
out.append(np.maximum(frame, 0).mean())
|
58 |
-
out = np.array(out)
|
59 |
-
# Some form of audio compressor based on the sigmoid.
|
60 |
-
out = 1.9 * (sigmoid(2.5 * out) - 0.5)
|
61 |
-
return out
|
62 |
-
|
63 |
-
|
64 |
-
def draw_env(envs, out, fg_colors, bg_color, size):
|
65 |
-
"""
|
66 |
-
Internal function, draw a single frame (two frames for stereo) using cairo and save
|
67 |
-
it to the `out` file as png. envs is a list of envelopes over channels, each env
|
68 |
-
is a float[bars] representing the height of the envelope to draw. Each entry will
|
69 |
-
be represented by a bar.
|
70 |
-
"""
|
71 |
-
surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, *size)
|
72 |
-
ctx = cairo.Context(surface)
|
73 |
-
ctx.scale(*size)
|
74 |
-
|
75 |
-
ctx.set_source_rgb(*bg_color)
|
76 |
-
ctx.rectangle(0, 0, 1, 1)
|
77 |
-
ctx.fill()
|
78 |
-
|
79 |
-
K = len(envs) # Number of waves to draw (waves are stacked vertically)
|
80 |
-
T = len(envs[0]) # Numbert of time steps
|
81 |
-
pad_ratio = 0.1 # spacing ratio between 2 bars
|
82 |
-
width = 1.0 / (T * (1 + 2 * pad_ratio))
|
83 |
-
pad = pad_ratio * width
|
84 |
-
delta = 2 * pad + width
|
85 |
-
|
86 |
-
ctx.set_line_width(width)
|
87 |
-
for step in range(T):
|
88 |
-
for i in range(K):
|
89 |
-
half = 0.5 * envs[i][step] # (semi-)height of the bar
|
90 |
-
half /= K # as we stack K waves vertically
|
91 |
-
midrule = (1 + 2 * i) / (2 * K) # midrule of i-th wave
|
92 |
-
ctx.set_source_rgb(*fg_colors[i])
|
93 |
-
ctx.move_to(pad + step * delta, midrule - half)
|
94 |
-
ctx.line_to(pad + step * delta, midrule)
|
95 |
-
ctx.stroke()
|
96 |
-
ctx.set_source_rgba(*fg_colors[i], 0.8)
|
97 |
-
ctx.move_to(pad + step * delta, midrule)
|
98 |
-
ctx.line_to(pad + step * delta, midrule + 0.9 * half)
|
99 |
-
ctx.stroke()
|
100 |
-
|
101 |
-
surface.write_to_png(out)
|
102 |
-
|
103 |
-
|
104 |
-
def interpole(x1, y1, x2, y2, x):
|
105 |
-
return y1 + (y2 - y1) * (x - x1) / (x2 - x1)
|
106 |
-
|
107 |
-
|
108 |
-
def visualize(
|
109 |
-
progress,
|
110 |
-
audio,
|
111 |
-
tmp,
|
112 |
-
out,
|
113 |
-
seek=None,
|
114 |
-
duration=None,
|
115 |
-
rate=60,
|
116 |
-
bars=50,
|
117 |
-
speed=4,
|
118 |
-
time=0.4,
|
119 |
-
oversample=3,
|
120 |
-
fg_color=(0.2, 0.2, 0.2),
|
121 |
-
fg_color2=(0.5, 0.3, 0.6),
|
122 |
-
bg_color=(1, 1, 1),
|
123 |
-
size=(400, 400),
|
124 |
-
stereo=False,
|
125 |
-
):
|
126 |
-
"""
|
127 |
-
Generate the visualisation for the `audio` file, using a `tmp` folder and saving the final
|
128 |
-
video in `out`.
|
129 |
-
`seek` and `durations` gives the extract location if any.
|
130 |
-
`rate` is the framerate of the output video.
|
131 |
-
|
132 |
-
`bars` is the number of bars in the animation.
|
133 |
-
`speed` is the base speed of transition. Depending on volume, actual speed will vary
|
134 |
-
between 0.5 and 2 times it.
|
135 |
-
`time` amount of audio shown at once on a frame.
|
136 |
-
`oversample` higher values will lead to more frequent changes.
|
137 |
-
`fg_color` is the rgb color to use for the foreground.
|
138 |
-
`fg_color2` is the rgb color to use for the second wav if stereo is set.
|
139 |
-
`bg_color` is the rgb color to use for the background.
|
140 |
-
`size` is the `(width, height)` in pixels to generate.
|
141 |
-
`stereo` is whether to create 2 waves.
|
142 |
-
"""
|
143 |
-
try:
|
144 |
-
wav, sr = read_audio(audio, seek=seek, duration=duration)
|
145 |
-
except (IOError, ValueError) as err:
|
146 |
-
raise gr.Error(err)
|
147 |
-
# wavs is a list of wav over channels
|
148 |
-
wavs = []
|
149 |
-
if stereo:
|
150 |
-
assert wav.shape[0] == 2, "stereo requires stereo audio file"
|
151 |
-
wavs.append(wav[0])
|
152 |
-
wavs.append(wav[1])
|
153 |
-
else:
|
154 |
-
wav = wav.mean(0)
|
155 |
-
wavs.append(wav)
|
156 |
-
|
157 |
-
for i, wav in enumerate(wavs):
|
158 |
-
wavs[i] = wav / wav.std()
|
159 |
-
|
160 |
-
window = int(sr * time / bars)
|
161 |
-
stride = int(window / oversample)
|
162 |
-
# envs is a list of env over channels
|
163 |
-
envs = []
|
164 |
-
for wav in wavs:
|
165 |
-
env = envelope(wav, window, stride)
|
166 |
-
env = np.pad(env, (bars // 2, 2 * bars))
|
167 |
-
envs.append(env)
|
168 |
-
|
169 |
-
duration = len(wavs[0]) / sr
|
170 |
-
frames = int(rate * duration)
|
171 |
-
smooth = np.hanning(bars)
|
172 |
-
|
173 |
-
gr.Info("Generating the frames...")
|
174 |
-
for idx in progress(range(frames)):
|
175 |
-
pos = (((idx / rate)) * sr) / stride / bars
|
176 |
-
off = int(pos)
|
177 |
-
loc = pos - off
|
178 |
-
denvs = []
|
179 |
-
for env in envs:
|
180 |
-
env1 = env[off * bars : (off + 1) * bars]
|
181 |
-
env2 = env[(off + 1) * bars : (off + 2) * bars]
|
182 |
-
|
183 |
-
# we want loud parts to be updated faster
|
184 |
-
maxvol = math.log10(1e-4 + env2.max()) * 10
|
185 |
-
speedup = np.clip(interpole(-6, 0.5, 0, 2, maxvol), 0.5, 2)
|
186 |
-
w = sigmoid(speed * speedup * (loc - 0.5))
|
187 |
-
denv = (1 - w) * env1 + w * env2
|
188 |
-
denv *= smooth
|
189 |
-
denvs.append(denv)
|
190 |
-
draw_env(denvs, tmp / f"{idx:06d}.png", (fg_color, fg_color2), bg_color, size)
|
191 |
-
gr.Info("Encoding the animation video...")
|
192 |
-
subprocess.run([
|
193 |
-
"ffmpeg", "-y", "-loglevel", "panic", "-r",
|
194 |
-
str(rate), "-f", "image2", "-s", f"{size[0]}x{size[1]}", "-i", "%06d.png", "-i", audio, "-c:a", "aac", "-vcodec", "libx264", "-crf", "10", "-pix_fmt", "yuv420p",
|
195 |
-
out.resolve()
|
196 |
-
], check=True, cwd=tmp)
|
197 |
-
return out
|
198 |
-
|
199 |
|
|
|
200 |
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
"""
|
203 |
-
|
204 |
-
"""
|
205 |
-
try:
|
206 |
-
r, g, b = [float(i) for i in colorstr.split(",")]
|
207 |
-
return r, g, b
|
208 |
-
except ValueError:
|
209 |
-
raise gr.Error(
|
210 |
-
"Format for color is 3 floats separated by commas 0.xx,0.xx,0.xx, rgb order"
|
211 |
-
)
|
212 |
-
|
213 |
-
|
214 |
-
def hex_to_rgb(hex_color):
|
215 |
-
hex_color = hex_color.lstrip('#')
|
216 |
-
r = int(hex_color[0:2], 16) / 255.0
|
217 |
-
g = int(hex_color[2:4], 16) / 255.0
|
218 |
-
b = int(hex_color[4:6], 16) / 255.0
|
219 |
-
return (r, g, b)
|
220 |
-
|
221 |
-
def do_viz(
|
222 |
-
inp_aud,
|
223 |
-
inp_bgcolor,
|
224 |
-
inp_color1,
|
225 |
-
inp_nbars,
|
226 |
-
inp_vidw,
|
227 |
-
inp_vidh,
|
228 |
-
progress=gr.Progress(),
|
229 |
-
):
|
230 |
-
with tempfile.TemporaryDirectory() as tmp, tempfile.NamedTemporaryFile(
|
231 |
-
suffix=".mp4",
|
232 |
-
delete=False
|
233 |
-
) as out:
|
234 |
return visualize(
|
235 |
-
|
236 |
-
|
237 |
-
Path(
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
size=(inp_vidw, inp_vidh),
|
243 |
)
|
244 |
|
245 |
|
246 |
-
|
247 |
-
|
248 |
-
ABOUT = """
|
249 |
-
# seewav GUI
|
250 |
-
|
251 |
-
> Have an audio clip but need a video (e.g. for X/Twitter)?
|
252 |
-
|
253 |
-
**Convert audio into a nice video!**
|
254 |
-
|
255 |
-
An online graphical user interface for [seewav](https://github.com/adefossez/seewav).
|
256 |
-
|
257 |
-
Enjoy!
|
258 |
-
"""
|
259 |
-
with gr.Blocks() as demo:
|
260 |
gr.Markdown(ABOUT)
|
261 |
with gr.Row():
|
262 |
with gr.Column():
|
263 |
-
|
264 |
-
with gr.Group():
|
265 |
-
inp_color1 = gr.ColorPicker(
|
266 |
-
label="Color",
|
267 |
-
info="Color of the top waveform",
|
268 |
-
value="#00237E",
|
269 |
-
interactive=True,
|
270 |
-
)
|
271 |
-
inp_bgcolor = gr.ColorPicker(
|
272 |
-
label="Background Color",
|
273 |
-
info="Color of the background",
|
274 |
-
value="#000000",
|
275 |
-
interactive=True,
|
276 |
-
)
|
277 |
with gr.Accordion("Advanced Configuration", open=False):
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
minimum=5,
|
283 |
maximum=1500,
|
|
|
|
|
284 |
)
|
285 |
-
|
286 |
label="Video Width",
|
287 |
-
value=400,
|
288 |
-
interactive=True,
|
289 |
minimum=100,
|
290 |
maximum=3000,
|
|
|
|
|
291 |
)
|
292 |
-
|
293 |
label="Video Height",
|
294 |
-
value=400,
|
295 |
-
interactive=True,
|
296 |
minimum=100,
|
297 |
maximum=3000,
|
|
|
|
|
298 |
)
|
299 |
-
|
300 |
with gr.Column():
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pathlib
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
import gradio as gr
|
5 |
|
6 |
+
from utils import hex_to_rgb, visualize
|
7 |
+
|
8 |
+
ABOUT = "# [seewav](https://github.com/adefossez/seewav)"
|
9 |
+
|
10 |
+
|
11 |
+
def run(
|
12 |
+
audio_file: str,
|
13 |
+
wave_color: str = "#00237E",
|
14 |
+
background_color: str = "#000000",
|
15 |
+
num_bars: int = 50,
|
16 |
+
video_width: int = 400,
|
17 |
+
video_height: int = 300,
|
18 |
+
progress: gr.Progress = gr.Progress(track_tqdm=True), # noqa: ARG001, B008
|
19 |
+
) -> str:
|
20 |
+
"""Generates a waveform video from an audio file using the seewav tool.
|
21 |
+
|
22 |
+
This function processes the input audio file and creates a video visualizing its waveform.
|
23 |
+
The waveform and background colors, number of waveform bars, and video resolution can be customized.
|
24 |
+
|
25 |
+
Args:
|
26 |
+
audio_file (str): Path to the input audio file (e.g., WAV or MP3).
|
27 |
+
wave_color (str, optional): Hex color code for the waveform. Defaults to "#00237E".
|
28 |
+
background_color (str, optional): Hex color code for the background. Defaults to "#000000".
|
29 |
+
num_bars (int, optional): Number of bars to display in the waveform visualization. Defaults to 50.
|
30 |
+
video_width (int, optional): Width of the output video in pixels. Defaults to 400.
|
31 |
+
video_height (int, optional): Height of the output video in pixels. Defaults to 300.
|
32 |
+
progress (gr.Progress, optional): Internal parameter for displaying progress in a Gradio interface.
|
33 |
+
Not intended to be set manually by the user.
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
str: Path to the generated waveform video file.
|
37 |
"""
|
38 |
+
with tempfile.TemporaryDirectory() as tmp, tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as out:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
return visualize(
|
40 |
+
audio_file,
|
41 |
+
pathlib.Path(tmp),
|
42 |
+
pathlib.Path(out.name),
|
43 |
+
bars=num_bars,
|
44 |
+
fg_color=hex_to_rgb(wave_color),
|
45 |
+
bg_color=hex_to_rgb(background_color),
|
46 |
+
size=(video_width, video_height),
|
|
|
47 |
)
|
48 |
|
49 |
|
50 |
+
with gr.Blocks(css_paths="style.css") as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
gr.Markdown(ABOUT)
|
52 |
with gr.Row():
|
53 |
with gr.Column():
|
54 |
+
audio_file = gr.Audio(type="filepath")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
with gr.Accordion("Advanced Configuration", open=False):
|
56 |
+
wave_color = gr.ColorPicker(label="Waveform Color", value="#00237E")
|
57 |
+
background_color = gr.ColorPicker(label="Background Color", value="#000000")
|
58 |
+
num_bars = gr.Slider(
|
59 |
+
label="Number of Bars",
|
60 |
minimum=5,
|
61 |
maximum=1500,
|
62 |
+
step=5,
|
63 |
+
value=50,
|
64 |
)
|
65 |
+
video_width = gr.Slider(
|
66 |
label="Video Width",
|
|
|
|
|
67 |
minimum=100,
|
68 |
maximum=3000,
|
69 |
+
step=10,
|
70 |
+
value=400,
|
71 |
)
|
72 |
+
video_height = gr.Slider(
|
73 |
label="Video Height",
|
|
|
|
|
74 |
minimum=100,
|
75 |
maximum=3000,
|
76 |
+
step=10,
|
77 |
+
value=300,
|
78 |
)
|
79 |
+
run_button = gr.Button(variant="primary")
|
80 |
with gr.Column():
|
81 |
+
video = gr.Video(interactive=False)
|
82 |
+
|
83 |
+
gr.Examples(examples=["assets/sample.wav"], fn=run, inputs=audio_file, outputs=video)
|
84 |
+
|
85 |
+
run_button.click(
|
86 |
+
fn=run,
|
87 |
+
inputs=[
|
88 |
+
audio_file,
|
89 |
+
wave_color,
|
90 |
+
background_color,
|
91 |
+
num_bars,
|
92 |
+
video_width,
|
93 |
+
video_height,
|
94 |
+
],
|
95 |
+
outputs=video,
|
96 |
+
)
|
97 |
+
|
98 |
+
if __name__ == "__main__":
|
99 |
+
demo.launch(mcp_server=True)
|
assets/sample.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd3186d0ca643fa0742dc349829c17edf101b4ffe8410ea871d8f0c2768a237f
|
3 |
+
size 452444
|
packages.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
ffmpeg
|
|
|
1 |
+
ffmpeg
|
pyproject.toml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "seewav-gui"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Add your description here"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.10"
|
7 |
+
dependencies = [
|
8 |
+
"gradio[mcp]>=5.31.0",
|
9 |
+
"pycairo>=1.28.0",
|
10 |
+
]
|
11 |
+
|
12 |
+
[tool.ruff]
|
13 |
+
line-length = 119
|
14 |
+
exclude = ["utils.py"]
|
15 |
+
|
16 |
+
[tool.ruff.lint]
|
17 |
+
select = ["ALL"]
|
18 |
+
ignore = [
|
19 |
+
"COM812", # missing-trailing-comma
|
20 |
+
"D203", # one-blank-line-before-class
|
21 |
+
"D213", # multi-line-summary-second-line
|
22 |
+
"E501", # line-too-long
|
23 |
+
"SIM117", # multiple-with-statements
|
24 |
+
#
|
25 |
+
"D100", # undocumented-public-module
|
26 |
+
"D101", # undocumented-public-class
|
27 |
+
"D102", # undocumented-public-method
|
28 |
+
"D103", # undocumented-public-function
|
29 |
+
"D104", # undocumented-public-package
|
30 |
+
"D105", # undocumented-magic-method
|
31 |
+
"D107", # undocumented-public-init
|
32 |
+
"EM101", # raw-string-in-exception
|
33 |
+
"FBT001", # boolean-type-hint-positional-argument
|
34 |
+
"FBT002", # boolean-default-value-positional-argument
|
35 |
+
"PD901", # pandas-df-variable-name
|
36 |
+
"PGH003", # blanket-type-ignore
|
37 |
+
"PLR0913", # too-many-arguments
|
38 |
+
"PLR0915", # too-many-statements
|
39 |
+
"TRY003", # raise-vanilla-args
|
40 |
+
]
|
41 |
+
unfixable = [
|
42 |
+
"F401", # unused-import
|
43 |
+
]
|
44 |
+
|
45 |
+
[tool.ruff.lint.pydocstyle]
|
46 |
+
convention = "google"
|
47 |
+
|
48 |
+
[tool.ruff.lint.per-file-ignores]
|
49 |
+
"*.ipynb" = ["T201", "T203"]
|
50 |
+
|
51 |
+
[tool.ruff.format]
|
52 |
+
docstring-code-format = true
|
requirements.txt
CHANGED
@@ -1,6 +1,182 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file was autogenerated by uv via the following command:
|
2 |
+
# uv pip compile pyproject.toml -o requirements.txt
|
3 |
+
aiofiles==24.1.0
|
4 |
+
# via gradio
|
5 |
+
annotated-types==0.7.0
|
6 |
+
# via pydantic
|
7 |
+
anyio==4.9.0
|
8 |
+
# via
|
9 |
+
# gradio
|
10 |
+
# httpx
|
11 |
+
# mcp
|
12 |
+
# sse-starlette
|
13 |
+
# starlette
|
14 |
+
certifi==2025.4.26
|
15 |
+
# via
|
16 |
+
# httpcore
|
17 |
+
# httpx
|
18 |
+
# requests
|
19 |
+
charset-normalizer==3.4.2
|
20 |
+
# via requests
|
21 |
+
click==8.2.1
|
22 |
+
# via
|
23 |
+
# typer
|
24 |
+
# uvicorn
|
25 |
+
exceptiongroup==1.3.0
|
26 |
+
# via anyio
|
27 |
+
fastapi==0.115.12
|
28 |
+
# via gradio
|
29 |
+
ffmpy==0.5.0
|
30 |
+
# via gradio
|
31 |
+
filelock==3.18.0
|
32 |
+
# via huggingface-hub
|
33 |
+
fsspec==2025.5.1
|
34 |
+
# via
|
35 |
+
# gradio-client
|
36 |
+
# huggingface-hub
|
37 |
+
gradio==5.31.0
|
38 |
+
# via seewav-gui (pyproject.toml)
|
39 |
+
gradio-client==1.10.1
|
40 |
+
# via gradio
|
41 |
+
groovy==0.1.2
|
42 |
+
# via gradio
|
43 |
+
h11==0.16.0
|
44 |
+
# via
|
45 |
+
# httpcore
|
46 |
+
# uvicorn
|
47 |
+
hf-xet==1.1.2
|
48 |
+
# via huggingface-hub
|
49 |
+
httpcore==1.0.9
|
50 |
+
# via httpx
|
51 |
+
httpx==0.28.1
|
52 |
+
# via
|
53 |
+
# gradio
|
54 |
+
# gradio-client
|
55 |
+
# mcp
|
56 |
+
# safehttpx
|
57 |
+
httpx-sse==0.4.0
|
58 |
+
# via mcp
|
59 |
+
huggingface-hub==0.32.3
|
60 |
+
# via
|
61 |
+
# gradio
|
62 |
+
# gradio-client
|
63 |
+
idna==3.10
|
64 |
+
# via
|
65 |
+
# anyio
|
66 |
+
# httpx
|
67 |
+
# requests
|
68 |
+
jinja2==3.1.6
|
69 |
+
# via gradio
|
70 |
+
markdown-it-py==3.0.0
|
71 |
+
# via rich
|
72 |
+
markupsafe==3.0.2
|
73 |
+
# via
|
74 |
+
# gradio
|
75 |
+
# jinja2
|
76 |
+
mcp==1.9.0
|
77 |
+
# via gradio
|
78 |
+
mdurl==0.1.2
|
79 |
+
# via markdown-it-py
|
80 |
+
numpy==2.2.6
|
81 |
+
# via
|
82 |
+
# gradio
|
83 |
+
# pandas
|
84 |
+
orjson==3.10.18
|
85 |
+
# via gradio
|
86 |
+
packaging==25.0
|
87 |
+
# via
|
88 |
+
# gradio
|
89 |
+
# gradio-client
|
90 |
+
# huggingface-hub
|
91 |
+
pandas==2.2.3
|
92 |
+
# via gradio
|
93 |
+
pillow==11.2.1
|
94 |
+
# via gradio
|
95 |
+
pycairo==1.28.0
|
96 |
+
# via seewav-gui (pyproject.toml)
|
97 |
+
pydantic==2.11.5
|
98 |
+
# via
|
99 |
+
# fastapi
|
100 |
+
# gradio
|
101 |
+
# mcp
|
102 |
+
# pydantic-settings
|
103 |
+
pydantic-core==2.33.2
|
104 |
+
# via pydantic
|
105 |
+
pydantic-settings==2.9.1
|
106 |
+
# via mcp
|
107 |
+
pydub==0.25.1
|
108 |
+
# via gradio
|
109 |
+
pygments==2.19.1
|
110 |
+
# via rich
|
111 |
+
python-dateutil==2.9.0.post0
|
112 |
+
# via pandas
|
113 |
+
python-dotenv==1.1.0
|
114 |
+
# via pydantic-settings
|
115 |
+
python-multipart==0.0.20
|
116 |
+
# via
|
117 |
+
# gradio
|
118 |
+
# mcp
|
119 |
+
pytz==2025.2
|
120 |
+
# via pandas
|
121 |
+
pyyaml==6.0.2
|
122 |
+
# via
|
123 |
+
# gradio
|
124 |
+
# huggingface-hub
|
125 |
+
requests==2.32.3
|
126 |
+
# via huggingface-hub
|
127 |
+
rich==14.0.0
|
128 |
+
# via typer
|
129 |
+
ruff==0.11.12
|
130 |
+
# via gradio
|
131 |
+
safehttpx==0.1.6
|
132 |
+
# via gradio
|
133 |
+
semantic-version==2.10.0
|
134 |
+
# via gradio
|
135 |
+
shellingham==1.5.4
|
136 |
+
# via typer
|
137 |
+
six==1.17.0
|
138 |
+
# via python-dateutil
|
139 |
+
sniffio==1.3.1
|
140 |
+
# via anyio
|
141 |
+
sse-starlette==2.3.5
|
142 |
+
# via mcp
|
143 |
+
starlette==0.46.2
|
144 |
+
# via
|
145 |
+
# fastapi
|
146 |
+
# gradio
|
147 |
+
# mcp
|
148 |
+
# sse-starlette
|
149 |
+
tomlkit==0.13.2
|
150 |
+
# via gradio
|
151 |
+
tqdm==4.67.1
|
152 |
+
# via huggingface-hub
|
153 |
+
typer==0.16.0
|
154 |
+
# via gradio
|
155 |
+
typing-extensions==4.13.2
|
156 |
+
# via
|
157 |
+
# anyio
|
158 |
+
# exceptiongroup
|
159 |
+
# fastapi
|
160 |
+
# gradio
|
161 |
+
# gradio-client
|
162 |
+
# huggingface-hub
|
163 |
+
# pydantic
|
164 |
+
# pydantic-core
|
165 |
+
# rich
|
166 |
+
# typer
|
167 |
+
# typing-inspection
|
168 |
+
# uvicorn
|
169 |
+
typing-inspection==0.4.1
|
170 |
+
# via
|
171 |
+
# pydantic
|
172 |
+
# pydantic-settings
|
173 |
+
tzdata==2025.2
|
174 |
+
# via pandas
|
175 |
+
urllib3==2.4.0
|
176 |
+
# via requests
|
177 |
+
uvicorn==0.34.2
|
178 |
+
# via
|
179 |
+
# gradio
|
180 |
+
# mcp
|
181 |
+
websockets==15.0.1
|
182 |
+
# via gradio-client
|
style.css
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
h1 {
|
2 |
+
text-align: center;
|
3 |
+
display: block;
|
4 |
+
}
|
utils.py
ADDED
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Thank you to the authors of seewav for dedicating it into the public domain.
|
2 |
+
# This program is also dedicated into the public domain.
|
3 |
+
# You may use it, at your choice, under the Unlicense, CC0, or WTFPL license.
|
4 |
+
# Enjoy!
|
5 |
+
|
6 |
+
# Mostly from: https://github.com/adefossez/seewav
|
7 |
+
# Original author: adefossez
|
8 |
+
|
9 |
+
|
10 |
+
import math
|
11 |
+
import subprocess
|
12 |
+
|
13 |
+
import cairo
|
14 |
+
import gradio as gr
|
15 |
+
import numpy as np
|
16 |
+
import tqdm
|
17 |
+
from pydub import AudioSegment
|
18 |
+
|
19 |
+
|
20 |
+
def read_audio(audio, seek=None, duration=None):
|
21 |
+
"""Read the `audio` file, starting at `seek` (or 0) seconds for `duration` (or all) seconds.
|
22 |
+
Returns `float[channels, samples]`.
|
23 |
+
"""
|
24 |
+
audio_segment = AudioSegment.from_file(audio)
|
25 |
+
channels = audio_segment.channels
|
26 |
+
samplerate = audio_segment.frame_rate
|
27 |
+
|
28 |
+
if seek is not None:
|
29 |
+
seek_ms = int(seek * 1000)
|
30 |
+
audio_segment = audio_segment[seek_ms:]
|
31 |
+
|
32 |
+
if duration is not None:
|
33 |
+
duration_ms = int(duration * 1000)
|
34 |
+
audio_segment = audio_segment[:duration_ms]
|
35 |
+
|
36 |
+
samples = audio_segment.get_array_of_samples()
|
37 |
+
wav = np.array(samples, dtype=np.float32)
|
38 |
+
return wav.reshape(channels, -1), samplerate
|
39 |
+
|
40 |
+
|
41 |
+
def sigmoid(x):
|
42 |
+
return 1 / (1 + np.exp(-x))
|
43 |
+
|
44 |
+
|
45 |
+
def envelope(wav, window, stride):
|
46 |
+
"""Extract the envelope of the waveform `wav` (float[samples]), using average pooling
|
47 |
+
with `window` samples and the given `stride`.
|
48 |
+
"""
|
49 |
+
# pos = np.pad(np.maximum(wav, 0), window // 2)
|
50 |
+
wav = np.pad(wav, window // 2)
|
51 |
+
out = []
|
52 |
+
for off in range(0, len(wav) - window, stride):
|
53 |
+
frame = wav[off : off + window]
|
54 |
+
out.append(np.maximum(frame, 0).mean())
|
55 |
+
out = np.array(out)
|
56 |
+
# Some form of audio compressor based on the sigmoid.
|
57 |
+
out = 1.9 * (sigmoid(2.5 * out) - 0.5)
|
58 |
+
return out
|
59 |
+
|
60 |
+
|
61 |
+
def draw_env(envs, out, fg_colors, bg_color, size):
|
62 |
+
"""Internal function, draw a single frame (two frames for stereo) using cairo and save
|
63 |
+
it to the `out` file as png. envs is a list of envelopes over channels, each env
|
64 |
+
is a float[bars] representing the height of the envelope to draw. Each entry will
|
65 |
+
be represented by a bar.
|
66 |
+
"""
|
67 |
+
surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, *size)
|
68 |
+
ctx = cairo.Context(surface)
|
69 |
+
ctx.scale(*size)
|
70 |
+
|
71 |
+
ctx.set_source_rgb(*bg_color)
|
72 |
+
ctx.rectangle(0, 0, 1, 1)
|
73 |
+
ctx.fill()
|
74 |
+
|
75 |
+
K = len(envs) # Number of waves to draw (waves are stacked vertically)
|
76 |
+
T = len(envs[0]) # Numbert of time steps
|
77 |
+
pad_ratio = 0.1 # spacing ratio between 2 bars
|
78 |
+
width = 1.0 / (T * (1 + 2 * pad_ratio))
|
79 |
+
pad = pad_ratio * width
|
80 |
+
delta = 2 * pad + width
|
81 |
+
|
82 |
+
ctx.set_line_width(width)
|
83 |
+
for step in range(T):
|
84 |
+
for i in range(K):
|
85 |
+
half = 0.5 * envs[i][step] # (semi-)height of the bar
|
86 |
+
half /= K # as we stack K waves vertically
|
87 |
+
midrule = (1 + 2 * i) / (2 * K) # midrule of i-th wave
|
88 |
+
ctx.set_source_rgb(*fg_colors[i])
|
89 |
+
ctx.move_to(pad + step * delta, midrule - half)
|
90 |
+
ctx.line_to(pad + step * delta, midrule)
|
91 |
+
ctx.stroke()
|
92 |
+
ctx.set_source_rgba(*fg_colors[i], 0.8)
|
93 |
+
ctx.move_to(pad + step * delta, midrule)
|
94 |
+
ctx.line_to(pad + step * delta, midrule + 0.9 * half)
|
95 |
+
ctx.stroke()
|
96 |
+
|
97 |
+
surface.write_to_png(out)
|
98 |
+
|
99 |
+
|
100 |
+
def interpole(x1, y1, x2, y2, x):
|
101 |
+
return y1 + (y2 - y1) * (x - x1) / (x2 - x1)
|
102 |
+
|
103 |
+
|
104 |
+
def visualize(
|
105 |
+
audio,
|
106 |
+
tmp,
|
107 |
+
out,
|
108 |
+
seek=None,
|
109 |
+
duration=None,
|
110 |
+
rate=60,
|
111 |
+
bars=50,
|
112 |
+
speed=4,
|
113 |
+
time=0.4,
|
114 |
+
oversample=3,
|
115 |
+
fg_color=(0.2, 0.2, 0.2),
|
116 |
+
fg_color2=(0.5, 0.3, 0.6),
|
117 |
+
bg_color=(1, 1, 1),
|
118 |
+
size=(400, 400),
|
119 |
+
stereo=False,
|
120 |
+
):
|
121 |
+
"""Generate the visualisation for the `audio` file, using a `tmp` folder and saving the final
|
122 |
+
video in `out`.
|
123 |
+
`seek` and `durations` gives the extract location if any.
|
124 |
+
`rate` is the framerate of the output video.
|
125 |
+
|
126 |
+
`bars` is the number of bars in the animation.
|
127 |
+
`speed` is the base speed of transition. Depending on volume, actual speed will vary
|
128 |
+
between 0.5 and 2 times it.
|
129 |
+
`time` amount of audio shown at once on a frame.
|
130 |
+
`oversample` higher values will lead to more frequent changes.
|
131 |
+
`fg_color` is the rgb color to use for the foreground.
|
132 |
+
`fg_color2` is the rgb color to use for the second wav if stereo is set.
|
133 |
+
`bg_color` is the rgb color to use for the background.
|
134 |
+
`size` is the `(width, height)` in pixels to generate.
|
135 |
+
`stereo` is whether to create 2 waves.
|
136 |
+
"""
|
137 |
+
try:
|
138 |
+
wav, sr = read_audio(audio, seek=seek, duration=duration)
|
139 |
+
except (OSError, ValueError) as err:
|
140 |
+
raise gr.Error(err)
|
141 |
+
# wavs is a list of wav over channels
|
142 |
+
wavs = []
|
143 |
+
if stereo:
|
144 |
+
assert wav.shape[0] == 2, "stereo requires stereo audio file"
|
145 |
+
wavs.append(wav[0])
|
146 |
+
wavs.append(wav[1])
|
147 |
+
else:
|
148 |
+
wav = wav.mean(0)
|
149 |
+
wavs.append(wav)
|
150 |
+
|
151 |
+
for i, wav in enumerate(wavs):
|
152 |
+
wavs[i] = wav / wav.std()
|
153 |
+
|
154 |
+
window = int(sr * time / bars)
|
155 |
+
stride = int(window / oversample)
|
156 |
+
# envs is a list of env over channels
|
157 |
+
envs = []
|
158 |
+
for wav in wavs:
|
159 |
+
env = envelope(wav, window, stride)
|
160 |
+
env = np.pad(env, (bars // 2, 2 * bars))
|
161 |
+
envs.append(env)
|
162 |
+
|
163 |
+
duration = len(wavs[0]) / sr
|
164 |
+
frames = int(rate * duration)
|
165 |
+
smooth = np.hanning(bars)
|
166 |
+
|
167 |
+
for idx in tqdm.tqdm(range(frames)):
|
168 |
+
pos = ((idx / rate) * sr) / stride / bars
|
169 |
+
off = int(pos)
|
170 |
+
loc = pos - off
|
171 |
+
denvs = []
|
172 |
+
for env in envs:
|
173 |
+
env1 = env[off * bars : (off + 1) * bars]
|
174 |
+
env2 = env[(off + 1) * bars : (off + 2) * bars]
|
175 |
+
|
176 |
+
# we want loud parts to be updated faster
|
177 |
+
maxvol = math.log10(1e-4 + env2.max()) * 10
|
178 |
+
speedup = np.clip(interpole(-6, 0.5, 0, 2, maxvol), 0.5, 2)
|
179 |
+
w = sigmoid(speed * speedup * (loc - 0.5))
|
180 |
+
denv = (1 - w) * env1 + w * env2
|
181 |
+
denv *= smooth
|
182 |
+
denvs.append(denv)
|
183 |
+
draw_env(denvs, tmp / f"{idx:06d}.png", (fg_color, fg_color2), bg_color, size)
|
184 |
+
subprocess.run(
|
185 |
+
[
|
186 |
+
"ffmpeg",
|
187 |
+
"-y",
|
188 |
+
"-loglevel",
|
189 |
+
"panic",
|
190 |
+
"-r",
|
191 |
+
str(rate),
|
192 |
+
"-f",
|
193 |
+
"image2",
|
194 |
+
"-s",
|
195 |
+
f"{size[0]}x{size[1]}",
|
196 |
+
"-i",
|
197 |
+
"%06d.png",
|
198 |
+
"-i",
|
199 |
+
audio,
|
200 |
+
"-c:a",
|
201 |
+
"aac",
|
202 |
+
"-vcodec",
|
203 |
+
"libx264",
|
204 |
+
"-crf",
|
205 |
+
"10",
|
206 |
+
"-pix_fmt",
|
207 |
+
"yuv420p",
|
208 |
+
out.resolve(),
|
209 |
+
],
|
210 |
+
check=True,
|
211 |
+
cwd=tmp,
|
212 |
+
)
|
213 |
+
return out
|
214 |
+
|
215 |
+
|
216 |
+
def parse_color(colorstr):
|
217 |
+
"""Given a comma separated rgb(a) colors, returns a 4-tuple of float."""
|
218 |
+
try:
|
219 |
+
r, g, b = [float(i) for i in colorstr.split(",")]
|
220 |
+
return r, g, b
|
221 |
+
except ValueError:
|
222 |
+
raise gr.Error("Format for color is 3 floats separated by commas 0.xx,0.xx,0.xx, rgb order")
|
223 |
+
|
224 |
+
|
225 |
+
def hex_to_rgb(hex_color):
|
226 |
+
hex_color = hex_color.lstrip("#")
|
227 |
+
r = int(hex_color[0:2], 16) / 255.0
|
228 |
+
g = int(hex_color[2:4], 16) / 255.0
|
229 |
+
b = int(hex_color[4:6], 16) / 255.0
|
230 |
+
return (r, g, b)
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|