Spaces:

yuvabe-ai
/

viseme3d_backend

Sleeping

App Files Files Community

Thamaraikannan commited on Jan 20

Commit

c0d0e84

verified ·

1 Parent(s): 60c771d

Update helpers/tts.js

Browse files

Files changed (1) hide show

helpers/tts.js +69 -50

helpers/tts.js CHANGED Viewed

@@ -12,8 +12,8 @@ let SSML = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml
 </speak>`;
-const key = process.env['AZURE_KEY'];
-const region = process.env['AZURE_REGION'];
 // Check if variables are loaded
 if (!key || !region) {
@@ -33,53 +33,72 @@ if (!key || !region) {
  * @param {*} text text to convert to audio/speech
  * @param {*} filename optional - best for long text - temp file for converted speech/audio
  */
-const textToSpeech = async (text, voice) => {
-  return new Promise((resolve, reject) => {
-    let ssml = SSML.replace("__TEXT__", text);
-    const speechConfig = sdk.SpeechConfig.fromSubscription(key, region);
-    speechConfig.speechSynthesisOutputFormat = 5; // mp3
-    let audioConfig = null;
-    let randomString = Math.random().toString(36).slice(2, 7);
-    let filename = `/tmp/speech-${randomString}.mp3`;  // Write to /tmp directory
-    audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename);
-    let blendData = [];
-    let timeStep = 1/60;
-    let timeStamp = 0;
-    const synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
-    synthesizer.visemeReceived = function (s, e) {
-      var animation = JSON.parse(e.animation);
-      _.each(animation.BlendShapes, blendArray => {
-        let blend = {};
-        _.each(blendShapeNames, (shapeName, i) => {
-          blend[shapeName] = blendArray[i];
-        });
-        blendData.push({
-          time: timeStamp,
-          blendshapes: blend
-        });
-        timeStamp += timeStep;
-      });
-    };
-    synthesizer.speakSsmlAsync(
-      ssml,
-      result => {
-        synthesizer.close();
-        resolve({ blendData, filename: `/speech-${randomString}.mp3` });
-      },
-      error => {
-        synthesizer.close();
-        reject(error);
-      }
-    );
-  });
 };

 </speak>`;
+const key = process.env.AZURE_KEY;
+const region = process.env.AZURE_REGION;
 // Check if variables are loaded
 if (!key || !region) {
  * @param {*} text text to convert to audio/speech
  * @param {*} filename optional - best for long text - temp file for converted speech/audio
  */
+const textToSpeech = async (text, voice)=> {
+    // convert callback function to promise
+    return new Promise((resolve, reject) => {
+        let ssml = SSML.replace("__TEXT__", text);
+        const speechConfig = sdk.SpeechConfig.fromSubscription(key, region);
+        speechConfig.speechSynthesisOutputFormat = 5; // mp3
+        let audioConfig = null;
+        // if (filename) {
+        let randomString = Math.random().toString(36).slice(2, 7);
+        let filename = `./public/speech-${randomString}.mp3`;
+        audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename);
+        // }
+        let blendData = [];
+        let timeStep = 1/60;
+        let timeStamp = 0;
+        const synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
+        // Subscribes to viseme received event
+        synthesizer.visemeReceived = function (s, e) {
+            // `Animation` is an xml string for SVG or a json string for blend shapes
+            var animation = JSON.parse(e.animation);
+            _.each(animation.BlendShapes, blendArray => {
+                let blend = {};
+                _.each(blendShapeNames, (shapeName, i) => {
+                    blend[shapeName] = blendArray[i];
+                });
+                blendData.push({
+                    time: timeStamp,
+                    blendshapes: blend
+                });
+                console.log(`Timestamp: ${timeStamp.toFixed(3)}s`);
+                console.log(JSON.stringify(blend, null, 2));
+                timeStamp += timeStep;
+            });
+        }
+        synthesizer.speakSsmlAsync(
+            ssml,
+            result => {
+                synthesizer.close();
+                resolve({blendData, filename: `/speech-${randomString}.mp3`});
+            },
+            error => {
+                synthesizer.close();
+                reject(error);
+            });
+    });
 };
+module.exports = textToSpeech;