add comments on pipeline
Browse files
front/src/utils/pipeline.ts
CHANGED
|
@@ -38,9 +38,10 @@ export const pipelineGeneratePodcast = async (
|
|
| 38 |
let outputWav: AudioBuffer;
|
| 39 |
const { speakerNames, turns } = podcast;
|
| 40 |
for (const turn of turns) {
|
| 41 |
-
// normalize it
|
| 42 |
turn.nextGapMilisecs =
|
| 43 |
Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100;
|
|
|
|
| 44 |
turn.text = turn.text
|
| 45 |
.trim()
|
| 46 |
.replace(/’/g, "'")
|
|
@@ -49,6 +50,7 @@ export const pipelineGeneratePodcast = async (
|
|
| 49 |
}
|
| 50 |
const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
|
| 51 |
onUpdate(0, steps.length);
|
|
|
|
| 52 |
for (let i = 0; i < steps.length; i++) {
|
| 53 |
const step = steps[i];
|
| 54 |
const speakerIdx = speakerNames.indexOf(step.turn.speakerName as string) as
|
|
@@ -60,9 +62,11 @@ export const pipelineGeneratePodcast = async (
|
|
| 60 |
if (i === 0) {
|
| 61 |
outputWav = step.audioBuffer;
|
| 62 |
if (isAddIntroMusic) {
|
|
|
|
| 63 |
const openingSound = await loadWavAndDecode(openingSoundSrc);
|
| 64 |
outputWav = joinAudio(openingSound, outputWav!, -2000);
|
| 65 |
} else {
|
|
|
|
| 66 |
outputWav = addSilence(outputWav!, true, 200);
|
| 67 |
}
|
| 68 |
} else {
|
|
@@ -76,6 +80,7 @@ export const pipelineGeneratePodcast = async (
|
|
| 76 |
onUpdate(i + 1, steps.length);
|
| 77 |
}
|
| 78 |
if (isAddNoise) {
|
|
|
|
| 79 |
outputWav = addNoise(outputWav!, 0.002);
|
| 80 |
}
|
| 81 |
// @ts-expect-error this is fine
|
|
|
|
| 38 |
let outputWav: AudioBuffer;
|
| 39 |
const { speakerNames, turns } = podcast;
|
| 40 |
for (const turn of turns) {
|
| 41 |
+
// normalize the gap, make it not too long or too short
|
| 42 |
turn.nextGapMilisecs =
|
| 43 |
Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100;
|
| 44 |
+
// normalize text input for TTS
|
| 45 |
turn.text = turn.text
|
| 46 |
.trim()
|
| 47 |
.replace(/’/g, "'")
|
|
|
|
| 50 |
}
|
| 51 |
const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
|
| 52 |
onUpdate(0, steps.length);
|
| 53 |
+
// generate audio for each step (aka each turn)
|
| 54 |
for (let i = 0; i < steps.length; i++) {
|
| 55 |
const step = steps[i];
|
| 56 |
const speakerIdx = speakerNames.indexOf(step.turn.speakerName as string) as
|
|
|
|
| 62 |
if (i === 0) {
|
| 63 |
outputWav = step.audioBuffer;
|
| 64 |
if (isAddIntroMusic) {
|
| 65 |
+
// add intro music at the beginning to make it feels like radio station
|
| 66 |
const openingSound = await loadWavAndDecode(openingSoundSrc);
|
| 67 |
outputWav = joinAudio(openingSound, outputWav!, -2000);
|
| 68 |
} else {
|
| 69 |
+
// if there is no intro music, add a little silence at the beginning
|
| 70 |
outputWav = addSilence(outputWav!, true, 200);
|
| 71 |
}
|
| 72 |
} else {
|
|
|
|
| 80 |
onUpdate(i + 1, steps.length);
|
| 81 |
}
|
| 82 |
if (isAddNoise) {
|
| 83 |
+
// small nits: adding small background noise to the whole audio make it sound more natural
|
| 84 |
outputWav = addNoise(outputWav!, 0.002);
|
| 85 |
}
|
| 86 |
// @ts-expect-error this is fine
|