import { Podcast, PodcastTurn } from './types'; import { addNoise, addSilence, generateAudio, joinAudio, loadWavAndDecode, } from './utils'; // taken from https://freesound.org/people/artxmp1/sounds/660540 import openingSoundSrc from '../opening-sound.wav'; export interface GenerationStep { turn: PodcastTurn; audioBuffer?: AudioBuffer; } export interface PodcastGenerationOptions { podcast: Podcast; speaker1: string; speaker2: string; speed: number; isAddIntroMusic: boolean; isAddNoise: boolean; } export const pipelineGeneratePodcast = async ( { podcast, speaker1, speaker2, speed, isAddIntroMusic, isAddNoise, }: PodcastGenerationOptions, onUpdate: (done: number, total: number) => void ) => { let outputWav: AudioBuffer; const { speakerNames, turns } = podcast; for (const turn of turns) { // normalize the gap, make it not too long or too short turn.nextGapMilisecs = Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100; // normalize text input for TTS turn.text = turn.text .trim() .replace(/’/g, "'") .replace(/“/g, '"') .replace(/”/g, '"'); } const steps: GenerationStep[] = turns.map((turn) => ({ turn })); onUpdate(0, steps.length); // generate audio for each step (aka each turn) for (let i = 0; i < steps.length; i++) { const step = steps[i]; const speakerIdx = speakerNames.indexOf(step.turn.speakerName as string) as | 1 | 0; const speakerVoice = speakerIdx === 0 ? speaker1 : speaker2; const url = await generateAudio(step.turn.text, speakerVoice, speed); step.audioBuffer = await loadWavAndDecode(url); if (i === 0) { outputWav = step.audioBuffer; if (isAddIntroMusic) { // add intro music at the beginning to make it feels like radio station const openingSound = await loadWavAndDecode(openingSoundSrc); outputWav = joinAudio(openingSound, outputWav!, -2000); } else { // if there is no intro music, add a little silence at the beginning outputWav = addSilence(outputWav!, true, 200); } } else { const lastStep = steps[i - 1]; outputWav = joinAudio( outputWav!, step.audioBuffer, lastStep.turn.nextGapMilisecs ); } onUpdate(i + 1, steps.length); } if (isAddNoise) { // small nits: adding small background noise to the whole audio make it sound more natural outputWav = addNoise(outputWav!, 0.002); } // @ts-expect-error this is fine if (!outputWav) { throw new Error('outputWav is undefined'); } return outputWav; };