File size: 2,651 Bytes
64db5cc 983ba85 64db5cc 983ba85 64db5cc 983ba85 64db5cc 983ba85 64db5cc 983ba85 64db5cc 983ba85 64db5cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import { Podcast, PodcastTurn } from './types';
import {
addNoise,
addSilence,
generateAudio,
joinAudio,
loadWavAndDecode,
} from './utils';
// taken from https://freesound.org/people/artxmp1/sounds/660540
import openingSoundSrc from '../opening-sound.wav';
export interface GenerationStep {
turn: PodcastTurn;
audioBuffer?: AudioBuffer;
}
export interface PodcastGenerationOptions {
podcast: Podcast;
speaker1: string;
speaker2: string;
speed: number;
isAddIntroMusic: boolean;
isAddNoise: boolean;
}
export const pipelineGeneratePodcast = async (
{
podcast,
speaker1,
speaker2,
speed,
isAddIntroMusic,
isAddNoise,
}: PodcastGenerationOptions,
onUpdate: (done: number, total: number) => void
) => {
let outputWav: AudioBuffer;
const { speakerNames, turns } = podcast;
for (const turn of turns) {
// normalize the gap, make it not too long or too short
turn.nextGapMilisecs =
Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100;
// normalize text input for TTS
turn.text = turn.text
.trim()
.replace(/’/g, "'")
.replace(/“/g, '"')
.replace(/”/g, '"');
}
const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
onUpdate(0, steps.length);
// generate audio for each step (aka each turn)
for (let i = 0; i < steps.length; i++) {
const step = steps[i];
const speakerIdx = speakerNames.indexOf(step.turn.speakerName as string) as
| 1
| 0;
const speakerVoice = speakerIdx === 0 ? speaker1 : speaker2;
const url = await generateAudio(step.turn.text, speakerVoice, speed);
step.audioBuffer = await loadWavAndDecode(url);
if (i === 0) {
outputWav = step.audioBuffer;
if (isAddIntroMusic) {
// add intro music at the beginning to make it feels like radio station
const openingSound = await loadWavAndDecode(openingSoundSrc);
outputWav = joinAudio(openingSound, outputWav!, -2000);
} else {
// if there is no intro music, add a little silence at the beginning
outputWav = addSilence(outputWav!, true, 200);
}
} else {
const lastStep = steps[i - 1];
outputWav = joinAudio(
outputWav!,
step.audioBuffer,
lastStep.turn.nextGapMilisecs
);
}
onUpdate(i + 1, steps.length);
}
if (isAddNoise) {
// small nits: adding small background noise to the whole audio make it sound more natural
outputWav = addNoise(outputWav!, 0.002);
}
// @ts-expect-error this is fine
if (!outputWav) {
throw new Error('outputWav is undefined');
}
return outputWav;
};
|