ngxson HF staff commited on
Commit
983ba85
·
1 Parent(s): 9eb519e

add comments on pipeline

Browse files
Files changed (1) hide show
  1. front/src/utils/pipeline.ts +6 -1
front/src/utils/pipeline.ts CHANGED
@@ -38,9 +38,10 @@ export const pipelineGeneratePodcast = async (
38
  let outputWav: AudioBuffer;
39
  const { speakerNames, turns } = podcast;
40
  for (const turn of turns) {
41
- // normalize it
42
  turn.nextGapMilisecs =
43
  Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100;
 
44
  turn.text = turn.text
45
  .trim()
46
  .replace(/’/g, "'")
@@ -49,6 +50,7 @@ export const pipelineGeneratePodcast = async (
49
  }
50
  const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
51
  onUpdate(0, steps.length);
 
52
  for (let i = 0; i < steps.length; i++) {
53
  const step = steps[i];
54
  const speakerIdx = speakerNames.indexOf(step.turn.speakerName as string) as
@@ -60,9 +62,11 @@ export const pipelineGeneratePodcast = async (
60
  if (i === 0) {
61
  outputWav = step.audioBuffer;
62
  if (isAddIntroMusic) {
 
63
  const openingSound = await loadWavAndDecode(openingSoundSrc);
64
  outputWav = joinAudio(openingSound, outputWav!, -2000);
65
  } else {
 
66
  outputWav = addSilence(outputWav!, true, 200);
67
  }
68
  } else {
@@ -76,6 +80,7 @@ export const pipelineGeneratePodcast = async (
76
  onUpdate(i + 1, steps.length);
77
  }
78
  if (isAddNoise) {
 
79
  outputWav = addNoise(outputWav!, 0.002);
80
  }
81
  // @ts-expect-error this is fine
 
38
  let outputWav: AudioBuffer;
39
  const { speakerNames, turns } = podcast;
40
  for (const turn of turns) {
41
+ // normalize the gap, make it not too long or too short
42
  turn.nextGapMilisecs =
43
  Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100;
44
+ // normalize text input for TTS
45
  turn.text = turn.text
46
  .trim()
47
  .replace(/’/g, "'")
 
50
  }
51
  const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
52
  onUpdate(0, steps.length);
53
+ // generate audio for each step (aka each turn)
54
  for (let i = 0; i < steps.length; i++) {
55
  const step = steps[i];
56
  const speakerIdx = speakerNames.indexOf(step.turn.speakerName as string) as
 
62
  if (i === 0) {
63
  outputWav = step.audioBuffer;
64
  if (isAddIntroMusic) {
65
+ // add intro music at the beginning to make it feels like radio station
66
  const openingSound = await loadWavAndDecode(openingSoundSrc);
67
  outputWav = joinAudio(openingSound, outputWav!, -2000);
68
  } else {
69
+ // if there is no intro music, add a little silence at the beginning
70
  outputWav = addSilence(outputWav!, true, 200);
71
  }
72
  } else {
 
80
  onUpdate(i + 1, steps.length);
81
  }
82
  if (isAddNoise) {
83
+ // small nits: adding small background noise to the whole audio make it sound more natural
84
  outputWav = addNoise(outputWav!, 0.002);
85
  }
86
  // @ts-expect-error this is fine