Spaces:

ngxson
/

kokoro-podcast-generator

Running

App Files Files Community

kokoro-podcast-generator / front /src /utils /pipeline.ts

ngxson HF staff

add comments on pipeline

983ba85 22 days ago

raw

history blame contribute delete

2.65 kB

	import { Podcast, PodcastTurn } from './types';
	import {
	addNoise,
	addSilence,
	generateAudio,
	joinAudio,
	loadWavAndDecode,
	} from './utils';

	// taken from https://freesound.org/people/artxmp1/sounds/660540
	import openingSoundSrc from '../opening-sound.wav';

	export interface GenerationStep {
	turn: PodcastTurn;
	audioBuffer?: AudioBuffer;
	}

	export interface PodcastGenerationOptions {
	podcast: Podcast;
	speaker1: string;
	speaker2: string;
	speed: number;
	isAddIntroMusic: boolean;
	isAddNoise: boolean;
	}

	export const pipelineGeneratePodcast = async (
	{
	podcast,
	speaker1,
	speaker2,
	speed,
	isAddIntroMusic,
	isAddNoise,
	}: PodcastGenerationOptions,
	onUpdate: (done: number, total: number) => void
	) => {
	let outputWav: AudioBuffer;
	const { speakerNames, turns } = podcast;
	for (const turn of turns) {
	// normalize the gap, make it not too long or too short
	turn.nextGapMilisecs =
	Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100;
	// normalize text input for TTS
	turn.text = turn.text
	.trim()
	.replace(/’/g, "'")
	.replace(/“/g, '"')
	.replace(/”/g, '"');
	}
	const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
	onUpdate(0, steps.length);
	// generate audio for each step (aka each turn)
	for (let i = 0; i < steps.length; i++) {
	const step = steps[i];
	const speakerIdx = speakerNames.indexOf(step.turn.speakerName as string) as
	\| 1
	\| 0;
	const speakerVoice = speakerIdx === 0 ? speaker1 : speaker2;
	const url = await generateAudio(step.turn.text, speakerVoice, speed);
	step.audioBuffer = await loadWavAndDecode(url);
	if (i === 0) {
	outputWav = step.audioBuffer;
	if (isAddIntroMusic) {
	// add intro music at the beginning to make it feels like radio station
	const openingSound = await loadWavAndDecode(openingSoundSrc);
	outputWav = joinAudio(openingSound, outputWav!, -2000);
	} else {
	// if there is no intro music, add a little silence at the beginning
	outputWav = addSilence(outputWav!, true, 200);
	}
	} else {
	const lastStep = steps[i - 1];
	outputWav = joinAudio(
	outputWav!,
	step.audioBuffer,
	lastStep.turn.nextGapMilisecs
	);
	}
	onUpdate(i + 1, steps.length);
	}
	if (isAddNoise) {
	// small nits: adding small background noise to the whole audio make it sound more natural
	outputWav = addNoise(outputWav!, 0.002);
	}
	// @ts-expect-error this is fine
	if (!outputWav) {
	throw new Error('outputWav is undefined');
	}
	return outputWav;
	};