change to static
Browse files- README.md +4 -3
- app.py +0 -190
- front/package-lock.json +19 -0
- front/package.json +2 -1
- front/src/App.tsx +3 -0
- front/src/components/AuthCard.tsx +17 -0
- front/src/components/PodcastGenerator.tsx +1 -1
- front/src/utils/prompts.ts +1 -0
- front/src/utils/utils.ts +14 -7
- packages.txt +0 -2
- requirements.txt +0 -1
README.md
CHANGED
@@ -3,10 +3,11 @@ title: Kokoro Podcast Generator
|
|
3 |
emoji: 🦀
|
4 |
colorFrom: indigo
|
5 |
colorTo: pink
|
6 |
-
sdk:
|
7 |
-
sdk_version: 5.16.0
|
8 |
-
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
3 |
emoji: 🦀
|
4 |
colorFrom: indigo
|
5 |
colorTo: pink
|
6 |
+
sdk: static
|
|
|
|
|
7 |
pinned: false
|
8 |
+
hf_oauth: true
|
9 |
+
hf_oauth_scopes:
|
10 |
+
- inference-api
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
DELETED
@@ -1,190 +0,0 @@
|
|
1 |
-
import spaces
|
2 |
-
from kokoro import KModel, KPipeline
|
3 |
-
import gradio as gr
|
4 |
-
import os
|
5 |
-
import random
|
6 |
-
import torch
|
7 |
-
from urllib.parse import quote
|
8 |
-
|
9 |
-
print(os.system("""
|
10 |
-
cd front;
|
11 |
-
npm ci;
|
12 |
-
npm run build;
|
13 |
-
cd ..;
|
14 |
-
"""))
|
15 |
-
|
16 |
-
CHAR_LIMIT = 5000 # test
|
17 |
-
|
18 |
-
SPACE_ID = os.environ.get('SPACE_ID')
|
19 |
-
LLM_ENDPOINT = os.environ.get('LLM_ENDPOINT', 'null')
|
20 |
-
|
21 |
-
CUDA_AVAILABLE = torch.cuda.is_available()
|
22 |
-
models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if CUDA_AVAILABLE else [])}
|
23 |
-
pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
|
24 |
-
pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
|
25 |
-
pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
|
26 |
-
|
27 |
-
gr.set_static_paths(paths=["./front/dist"])
|
28 |
-
|
29 |
-
@spaces.GPU(duration=30)
|
30 |
-
def forward_gpu(ps, ref_s, speed):
|
31 |
-
return models[True](ps, ref_s, speed)
|
32 |
-
|
33 |
-
def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
34 |
-
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
35 |
-
pipeline = pipelines[voice[0]]
|
36 |
-
pack = pipeline.load_voice(voice)
|
37 |
-
use_gpu = use_gpu and CUDA_AVAILABLE
|
38 |
-
for _, ps, _ in pipeline(text, voice, speed):
|
39 |
-
ref_s = pack[len(ps)-1]
|
40 |
-
try:
|
41 |
-
if use_gpu:
|
42 |
-
audio = forward_gpu(ps, ref_s, speed)
|
43 |
-
else:
|
44 |
-
audio = models[False](ps, ref_s, speed)
|
45 |
-
except gr.exceptions.Error as e:
|
46 |
-
if use_gpu:
|
47 |
-
gr.Warning(str(e))
|
48 |
-
gr.Info('Retrying with CPU. To avoid this error, change Hardware to CPU.')
|
49 |
-
audio = models[False](ps, ref_s, speed)
|
50 |
-
else:
|
51 |
-
raise gr.Error(e)
|
52 |
-
return (24000, audio.numpy()), ps
|
53 |
-
return None, ''
|
54 |
-
|
55 |
-
# Arena API
|
56 |
-
def predict(text, voice='af_heart', speed=1):
|
57 |
-
return generate_first(text, voice, speed, use_gpu=False)[0]
|
58 |
-
|
59 |
-
def tokenize_first(text, voice='af_heart'):
|
60 |
-
pipeline = pipelines[voice[0]]
|
61 |
-
for _, ps, _ in pipeline(text, voice):
|
62 |
-
return ps
|
63 |
-
return ''
|
64 |
-
|
65 |
-
def generate_all(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
66 |
-
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
67 |
-
pipeline = pipelines[voice[0]]
|
68 |
-
pack = pipeline.load_voice(voice)
|
69 |
-
use_gpu = use_gpu and CUDA_AVAILABLE
|
70 |
-
first = True
|
71 |
-
for _, ps, _ in pipeline(text, voice, speed):
|
72 |
-
ref_s = pack[len(ps)-1]
|
73 |
-
try:
|
74 |
-
if use_gpu:
|
75 |
-
audio = forward_gpu(ps, ref_s, speed)
|
76 |
-
else:
|
77 |
-
audio = models[False](ps, ref_s, speed)
|
78 |
-
except gr.exceptions.Error as e:
|
79 |
-
if use_gpu:
|
80 |
-
gr.Warning(str(e))
|
81 |
-
gr.Info('Switching to CPU')
|
82 |
-
audio = models[False](ps, ref_s, speed)
|
83 |
-
else:
|
84 |
-
raise gr.Error(e)
|
85 |
-
yield 24000, audio.numpy()
|
86 |
-
if first:
|
87 |
-
first = False
|
88 |
-
yield 24000, torch.zeros(1).numpy()
|
89 |
-
|
90 |
-
CHOICES = {
|
91 |
-
'🇺🇸 🚺 Heart ❤️': 'af_heart',
|
92 |
-
'🇺🇸 🚺 Bella 🔥': 'af_bella',
|
93 |
-
'🇺🇸 🚺 Nicole 🎧': 'af_nicole',
|
94 |
-
'🇺🇸 🚺 Aoede': 'af_aoede',
|
95 |
-
'🇺🇸 🚺 Kore': 'af_kore',
|
96 |
-
'🇺🇸 🚺 Sarah': 'af_sarah',
|
97 |
-
'🇺🇸 🚺 Nova': 'af_nova',
|
98 |
-
'🇺🇸 🚺 Sky': 'af_sky',
|
99 |
-
'🇺🇸 🚺 Alloy': 'af_alloy',
|
100 |
-
'🇺🇸 🚺 Jessica': 'af_jessica',
|
101 |
-
'🇺🇸 🚺 River': 'af_river',
|
102 |
-
'🇺🇸 🚹 Michael': 'am_michael',
|
103 |
-
'🇺🇸 🚹 Fenrir': 'am_fenrir',
|
104 |
-
'🇺🇸 🚹 Puck': 'am_puck',
|
105 |
-
'🇺🇸 🚹 Echo': 'am_echo',
|
106 |
-
'🇺🇸 🚹 Eric': 'am_eric',
|
107 |
-
'🇺🇸 🚹 Liam': 'am_liam',
|
108 |
-
'🇺🇸 🚹 Onyx': 'am_onyx',
|
109 |
-
'🇺🇸 🚹 Santa': 'am_santa',
|
110 |
-
'🇺🇸 🚹 Adam': 'am_adam',
|
111 |
-
'🇬🇧 🚺 Emma': 'bf_emma',
|
112 |
-
'🇬🇧 🚺 Isabella': 'bf_isabella',
|
113 |
-
'🇬🇧 🚺 Alice': 'bf_alice',
|
114 |
-
'🇬🇧 🚺 Lily': 'bf_lily',
|
115 |
-
'🇬🇧 🚹 George': 'bm_george',
|
116 |
-
'🇬🇧 🚹 Fable': 'bm_fable',
|
117 |
-
'🇬🇧 🚹 Lewis': 'bm_lewis',
|
118 |
-
'🇬🇧 🚹 Daniel': 'bm_daniel',
|
119 |
-
}
|
120 |
-
for v in CHOICES.values():
|
121 |
-
pipelines[v[0]].load_voice(v)
|
122 |
-
|
123 |
-
TOKEN_NOTE = '''
|
124 |
-
💡 Customize pronunciation with Markdown link syntax and /slashes/ like `[Kokoro](/kˈOkəɹO/)`
|
125 |
-
|
126 |
-
💬 To adjust intonation, try punctuation `;:,.!?—…"()“”` or stress `ˈ` and `ˌ`
|
127 |
-
|
128 |
-
⬇️ Lower stress `[1 level](-1)` or `[2 levels](-2)`
|
129 |
-
|
130 |
-
⬆️ Raise stress 1 level `[or](+2)` 2 levels (only works on less stressed, usually short words)
|
131 |
-
'''
|
132 |
-
|
133 |
-
with gr.Blocks() as generate_tab:
|
134 |
-
out_audio = gr.Audio(label='Output Audio', interactive=False, streaming=False, autoplay=True)
|
135 |
-
generate_btn = gr.Button('Generate', variant='primary')
|
136 |
-
with gr.Accordion('Output Tokens', open=True):
|
137 |
-
out_ps = gr.Textbox(interactive=False, show_label=False, info='Tokens used to generate the audio, up to 510 context length.')
|
138 |
-
tokenize_btn = gr.Button('Tokenize', variant='secondary')
|
139 |
-
gr.Markdown(TOKEN_NOTE)
|
140 |
-
predict_btn = gr.Button('Predict', variant='secondary', visible=False)
|
141 |
-
|
142 |
-
STREAM_NOTE = ['⚠️ There is an unknown Gradio bug that might yield no audio the first time you click `Stream`.']
|
143 |
-
if CHAR_LIMIT is not None:
|
144 |
-
STREAM_NOTE.append(f'✂️ Each stream is capped at {CHAR_LIMIT} characters.')
|
145 |
-
STREAM_NOTE.append('🚀 Want more characters? You can [use Kokoro directly](https://huggingface.co/hexgrad/Kokoro-82M#usage) or duplicate this space:')
|
146 |
-
STREAM_NOTE = '\n\n'.join(STREAM_NOTE)
|
147 |
-
|
148 |
-
with gr.Blocks() as stream_tab:
|
149 |
-
out_stream = gr.Audio(label='Output Audio Stream', interactive=False, streaming=True, autoplay=True)
|
150 |
-
with gr.Row():
|
151 |
-
stream_btn = gr.Button('Stream', variant='primary')
|
152 |
-
stop_btn = gr.Button('Stop', variant='stop')
|
153 |
-
with gr.Accordion('Note', open=True):
|
154 |
-
gr.Markdown(STREAM_NOTE)
|
155 |
-
gr.DuplicateButton()
|
156 |
-
|
157 |
-
API_NAME = 'tts'
|
158 |
-
|
159 |
-
|
160 |
-
head = f'''
|
161 |
-
<script>
|
162 |
-
document.addEventListener('DOMContentLoaded', () => {{
|
163 |
-
console.log('DOM content loaded');
|
164 |
-
if (!localStorage.getItem('debug') && !window.location.href.match(/debug=1/)) {{
|
165 |
-
console.log('Attaching frontend app');
|
166 |
-
const frontendApp = document.createElement('iframe');
|
167 |
-
frontendApp.src = '/gradio_api/file=./front/dist/index.html?SPACE_ID={quote(SPACE_ID)}&LLM_ENDPOINT={quote(LLM_ENDPOINT)}';
|
168 |
-
frontendApp.style = 'position: fixed; top: 0; left: 0; width: 100%; height: 100%; border: none; z-index: 999999;';
|
169 |
-
document.body.appendChild(frontendApp);
|
170 |
-
}}
|
171 |
-
}});
|
172 |
-
</script>
|
173 |
-
'''
|
174 |
-
|
175 |
-
with gr.Blocks(head=head) as app:
|
176 |
-
with gr.Row():
|
177 |
-
with gr.Column():
|
178 |
-
text = gr.Textbox(label='Input Text', info=f"Up to ~500 characters per Generate, or {'∞' if CHAR_LIMIT is None else CHAR_LIMIT} characters per Stream")
|
179 |
-
voice = gr.Dropdown(list(CHOICES.items()), value='af_heart', label='Voice', info='Quality and availability vary by language')
|
180 |
-
speed = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='Speed')
|
181 |
-
with gr.Column():
|
182 |
-
gr.TabbedInterface([generate_tab, stream_tab], ['Generate', 'Stream'])
|
183 |
-
generate_btn.click(fn=generate_first, inputs=[text, voice, speed], outputs=[out_audio, out_ps], api_name=API_NAME)
|
184 |
-
tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps], api_name=API_NAME)
|
185 |
-
stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed], outputs=[out_stream], api_name=API_NAME)
|
186 |
-
stop_btn.click(fn=None, cancels=stream_event)
|
187 |
-
predict_btn.click(fn=predict, inputs=[text, voice, speed], outputs=[out_audio], api_name=API_NAME)
|
188 |
-
|
189 |
-
if __name__ == '__main__':
|
190 |
-
app.queue(api_open=True).launch(show_api=True, ssr_mode=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
front/package-lock.json
CHANGED
@@ -9,6 +9,7 @@
|
|
9 |
"version": "0.0.0",
|
10 |
"dependencies": {
|
11 |
"@gradio/client": "^1.12.0",
|
|
|
12 |
"@sec-ant/readable-stream": "^0.6.0",
|
13 |
"autoprefixer": "^10.4.20",
|
14 |
"base64-arraybuffer": "^1.0.2",
|
@@ -954,6 +955,24 @@
|
|
954 |
"node": ">=18.0.0"
|
955 |
}
|
956 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
957 |
"node_modules/@humanfs/core": {
|
958 |
"version": "0.19.1",
|
959 |
"resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz",
|
|
|
9 |
"version": "0.0.0",
|
10 |
"dependencies": {
|
11 |
"@gradio/client": "^1.12.0",
|
12 |
+
"@huggingface/hub": "^1.0.1",
|
13 |
"@sec-ant/readable-stream": "^0.6.0",
|
14 |
"autoprefixer": "^10.4.20",
|
15 |
"base64-arraybuffer": "^1.0.2",
|
|
|
955 |
"node": ">=18.0.0"
|
956 |
}
|
957 |
},
|
958 |
+
"node_modules/@huggingface/hub": {
|
959 |
+
"version": "1.0.1",
|
960 |
+
"resolved": "https://registry.npmjs.org/@huggingface/hub/-/hub-1.0.1.tgz",
|
961 |
+
"integrity": "sha512-wogGVETaNUV/wYBkny0uQD48L0rK9cttVtbaA1Rw/pGCuSYoZ8YlvTV6zymsGJfXaxQU8zup0aOR2XLIf6HVfg==",
|
962 |
+
"license": "MIT",
|
963 |
+
"dependencies": {
|
964 |
+
"@huggingface/tasks": "^0.15.9"
|
965 |
+
},
|
966 |
+
"engines": {
|
967 |
+
"node": ">=18"
|
968 |
+
}
|
969 |
+
},
|
970 |
+
"node_modules/@huggingface/tasks": {
|
971 |
+
"version": "0.15.9",
|
972 |
+
"resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.15.9.tgz",
|
973 |
+
"integrity": "sha512-cbnZcpMHKdhURWIplVP4obHxAZcxjyRm0zI7peTPksZN4CtIOMmJC4ZqGEymo0lk+0VNkXD7ULwFJ3JjT/VpkQ==",
|
974 |
+
"license": "MIT"
|
975 |
+
},
|
976 |
"node_modules/@humanfs/core": {
|
977 |
"version": "0.19.1",
|
978 |
"resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz",
|
front/package.json
CHANGED
@@ -5,13 +5,14 @@
|
|
5 |
"type": "module",
|
6 |
"scripts": {
|
7 |
"dev": "vite",
|
8 |
-
"build": "tsc -b && vite build",
|
9 |
"lint": "eslint .",
|
10 |
"format": "npm run lint && prettier --write .",
|
11 |
"preview": "vite preview"
|
12 |
},
|
13 |
"dependencies": {
|
14 |
"@gradio/client": "^1.12.0",
|
|
|
15 |
"@sec-ant/readable-stream": "^0.6.0",
|
16 |
"autoprefixer": "^10.4.20",
|
17 |
"base64-arraybuffer": "^1.0.2",
|
|
|
5 |
"type": "module",
|
6 |
"scripts": {
|
7 |
"dev": "vite",
|
8 |
+
"build": "tsc -b && vite build && cp ./dist/index.html ../index.html",
|
9 |
"lint": "eslint .",
|
10 |
"format": "npm run lint && prettier --write .",
|
11 |
"preview": "vite preview"
|
12 |
},
|
13 |
"dependencies": {
|
14 |
"@gradio/client": "^1.12.0",
|
15 |
+
"@huggingface/hub": "^1.0.1",
|
16 |
"@sec-ant/readable-stream": "^0.6.0",
|
17 |
"autoprefixer": "^10.4.20",
|
18 |
"base64-arraybuffer": "^1.0.2",
|
front/src/App.tsx
CHANGED
@@ -2,6 +2,7 @@ import { OpenInNewTab } from './utils/common';
|
|
2 |
import { PodcastGenerator } from './components/PodcastGenerator';
|
3 |
import { useState } from 'react';
|
4 |
import { ScriptMaker } from './components/ScriptMaker';
|
|
|
5 |
|
6 |
function App() {
|
7 |
const [genratedScript, setGeneratedScript] = useState<string>('');
|
@@ -20,6 +21,8 @@ function App() {
|
|
20 |
</p>
|
21 |
</div>
|
22 |
|
|
|
|
|
23 |
<ScriptMaker
|
24 |
setScript={setGeneratedScript}
|
25 |
setBusy={setBusy}
|
|
|
2 |
import { PodcastGenerator } from './components/PodcastGenerator';
|
3 |
import { useState } from 'react';
|
4 |
import { ScriptMaker } from './components/ScriptMaker';
|
5 |
+
import { AuthCard } from './components/AuthCard';
|
6 |
|
7 |
function App() {
|
8 |
const [genratedScript, setGeneratedScript] = useState<string>('');
|
|
|
21 |
</p>
|
22 |
</div>
|
23 |
|
24 |
+
<AuthCard />
|
25 |
+
|
26 |
<ScriptMaker
|
27 |
setScript={setGeneratedScript}
|
28 |
setBusy={setBusy}
|
front/src/components/AuthCard.tsx
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { oauthLoginUrl, oauthHandleRedirectIfPresent } from "@huggingface/hub";
|
2 |
+
|
3 |
+
const login = async () => {
|
4 |
+
const url = await oauthLoginUrl();
|
5 |
+
window.location.href = url;
|
6 |
+
}
|
7 |
+
|
8 |
+
export const AuthCard = () => {
|
9 |
+
return <div className="card bg-base-100 w-full shadow-xl">
|
10 |
+
<div className="card-body">
|
11 |
+
<h2 className="card-title">Step 0: Sign in to use Inference Providers</h2>
|
12 |
+
<div>
|
13 |
+
<button className="btn btn-primary" onClick={login}>🤗 Sign in with Hugging Face</button>
|
14 |
+
</div>
|
15 |
+
</div>
|
16 |
+
</div>
|
17 |
+
}
|
front/src/components/PodcastGenerator.tsx
CHANGED
@@ -132,7 +132,7 @@ export const PodcastGenerator = ({
|
|
132 |
if (i === 0) {
|
133 |
outputWav = step.audioBuffer;
|
134 |
const openingSound = await loadWavAndDecode(openingSoundSrc);
|
135 |
-
outputWav = joinAudio(openingSound, outputWav!, -
|
136 |
} else {
|
137 |
const lastStep = steps[i - 1];
|
138 |
outputWav = joinAudio(
|
|
|
132 |
if (i === 0) {
|
133 |
outputWav = step.audioBuffer;
|
134 |
const openingSound = await loadWavAndDecode(openingSoundSrc);
|
135 |
+
outputWav = joinAudio(openingSound, outputWav!, -2);
|
136 |
} else {
|
137 |
const lastStep = steps[i - 1];
|
138 |
outputWav = joinAudio(
|
front/src/utils/prompts.ts
CHANGED
@@ -16,6 +16,7 @@ Some rules:
|
|
16 |
- First turns should be the introduction for the theme and speakers.
|
17 |
- The script will be passed to TTS engine, make sure to write plain pronunciation, for example the www. must pronounced like "www dot". Do NOT add anything strange, do NOT add facial expression in the text.
|
18 |
- Only use base ASCII, do NOT use ALL CAPS, strings are wrapped inside "..."
|
|
|
19 |
|
20 |
There is an example (it is truncated):
|
21 |
|
|
|
16 |
- First turns should be the introduction for the theme and speakers.
|
17 |
- The script will be passed to TTS engine, make sure to write plain pronunciation, for example the www. must pronounced like "www dot". Do NOT add anything strange, do NOT add facial expression in the text.
|
18 |
- Only use base ASCII, do NOT use ALL CAPS, strings are wrapped inside "..."
|
19 |
+
- In the first turn, you must introduce the subject and speakers. Make up a story about the speakers, how they know each other, and why they are talking about the subject.
|
20 |
|
21 |
There is an example (it is truncated):
|
22 |
|
front/src/utils/utils.ts
CHANGED
@@ -112,7 +112,8 @@ export const trimSilence = (audioBuffer: AudioBuffer): AudioBuffer => {
|
|
112 |
export const joinAudio = (
|
113 |
audio1: AudioBuffer,
|
114 |
audio2: AudioBuffer,
|
115 |
-
gapSeconds: number
|
|
|
116 |
): AudioBuffer => {
|
117 |
const sampleRate = audio1.sampleRate;
|
118 |
const numChannels = audio1.numberOfChannels;
|
@@ -175,12 +176,18 @@ export const joinAudio = (
|
|
175 |
offset += nonOverlapLength;
|
176 |
|
177 |
// Blend overlapping region.
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
}
|
185 |
offset += effectiveOverlap;
|
186 |
|
|
|
112 |
export const joinAudio = (
|
113 |
audio1: AudioBuffer,
|
114 |
audio2: AudioBuffer,
|
115 |
+
gapSeconds: number,
|
116 |
+
overlap: 'none' | 'cross-fade' = 'none'
|
117 |
): AudioBuffer => {
|
118 |
const sampleRate = audio1.sampleRate;
|
119 |
const numChannels = audio1.numberOfChannels;
|
|
|
176 |
offset += nonOverlapLength;
|
177 |
|
178 |
// Blend overlapping region.
|
179 |
+
if (overlap === 'cross-fade') {
|
180 |
+
for (let i = 0; i < effectiveOverlap; i++) {
|
181 |
+
// Linear crossfade:
|
182 |
+
const fadeOut = 1 - i / effectiveOverlap;
|
183 |
+
const fadeIn = i / effectiveOverlap;
|
184 |
+
outputData[offset + i] =
|
185 |
+
data1[nonOverlapLength + i] * fadeOut + data2[i] * fadeIn;
|
186 |
+
}
|
187 |
+
} else {
|
188 |
+
for (let i = 0; i < effectiveOverlap; i++) {
|
189 |
+
outputData[offset + i] = data1[nonOverlapLength + i] + data2[i];
|
190 |
+
}
|
191 |
}
|
192 |
offset += effectiveOverlap;
|
193 |
|
packages.txt
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
espeak-ng
|
2 |
-
nodejs
|
|
|
|
|
|
requirements.txt
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
kokoro>=0.7.16
|
|
|
|