Spaces:

0xrushi
/

Priyanka-Chopra-TTS

Build error

App Files Files Community

0xrushi commited on May 21, 2022

Commit

c852ee2

1 Parent(s): 4204f70

some api changes for tts

Browse files

Files changed (6) hide show

.gitignore +1 -1
api_app.py +38 -12
app.py +1 -2
requirements.txt +2 -2
templates/details.html +131 -0
templates/index.html +143 -0

.gitignore CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- ~~audio.~~wav
2	*.png


1	+ *.wav
2	*.png

api_app.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from flask import Flask, redirect, url_for, request
 import gradio as gr
 from synthesize import synthesize, load_model
 from synthesis.vocoders import Hifigan
@@ -13,22 +14,47 @@ def inference(text: str):
         model=model,
         text=text,
         graph_path="graph.png",
-        audio_path="audio.wav",
         vocoder=vocoder,
     )
-    return "audio.wav"
 app = Flask(__name__)
-@app.route('/process',methods = ['POST'])
-def login():
-   if request.method == 'POST':
-        text = request.json['text']
         inference(text)
-        data, fs = sf.read("audio.wav", dtype='float32')
-        sd.play(data, fs)
-        status = sd.wait()  # Wait until file is done playing
-        return {'success': True}
 if __name__ == '__main__':
-   app.run(debug = True)

+from flask import Flask, redirect, url_for, request, send_from_directory
+from flask import Flask, render_template, request, send_file
 import gradio as gr
 from synthesize import synthesize, load_model
 from synthesis.vocoders import Hifigan
         model=model,
         text=text,
         graph_path="graph.png",
+        audio_path="tts.wav",
         vocoder=vocoder,
     )
+    return True
 app = Flask(__name__)
+@app.route("/")
+def index():
+    return render_template(
+    "index.html",
+    show_details=False,
+    use_multi_speaker=False,
+    speaker_ids=None,
+    use_gst=False)
+@app.route("/details")
+def details():
+    vocoder_config = None
+    return render_template(
+        "details.html",
+        show_details=False,
+        model_config=None,
+        vocoder_config=None,
+        args=None,
+    )
+# This format is similar to  coqui-ai TTS api
+@app.route('/api/tts', methods = ['GET']) #?text=this%20is%20a%20tses&speaker_id=&style_wav=
+def get_file():
+    text = request.args.get('text')
+    print(request.args)
+    if not text:
+        print('empty text')
+        return {'message':'textempty'}
+    else:
         inference(text)
+        return send_from_directory('./', 'tts.wav', as_attachment=True)
 if __name__ == '__main__':
+   app.run(debug = True, host='0.0.0.0', port=5002)

app.py CHANGED Viewed

@@ -15,8 +15,7 @@ article = """<p style='text-align: center'>
                 class='footer'>Blog</a> |
                 <a href='https://github.com/eugenesiow/practical-ml' target='_blank'
                 class='footer'>Github Repo</a></p>"""
-examples = ["Generate english speech from text using a Tacotron2 model.",
-            ""]
 def inference(text: str):
     synthesize(

                 class='footer'>Blog</a> |
                 <a href='https://github.com/eugenesiow/practical-ml' target='_blank'
                 class='footer'>Github Repo</a></p>"""
+examples = ["Generate english speech from text using a Tacotron2 model."]
 def inference(text: str):
     synthesize(

requirements.txt CHANGED Viewed

@@ -9,8 +9,8 @@ omegaconf==2.0.6
 numba==0.47
 webrtcvad==2.0.10
 requests==2.25.1
-torch==1.9.0+cu111
-torchvision==0.10.0+cu111
 torchaudio===0.9.0
 deepspeech==0.9.3
 llvmlite==0.32.1

 numba==0.47
 webrtcvad==2.0.10
 requests==2.25.1
+torch==1.9.0
+torchvision==0.10.0
 torchaudio===0.9.0
 deepspeech==0.9.3
 llvmlite==0.32.1

templates/details.html ADDED Viewed

	@@ -0,0 +1,131 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
+  <meta name="description" content="">
+  <meta name="author" content="">
+  <title>TTS engine</title>
+  <!-- Bootstrap core CSS -->
+  <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css"
+    integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous"
+    rel="stylesheet">
+  <!-- Custom styles for this template -->
+  <style>
+    body {
+      padding-top: 54px;
+    }
+    @media (min-width: 992px) {
+      body {
+        padding-top: 56px;
+      }
+    }
+  </style>
+</head>
+<body>
+  <a href="https://github.com/mozilla/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;"
+      src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
+  {% if show_details == true %}
+  <div class="container">
+    <b>Model details</b>
+  </div>
+  <div class="container">
+    <details>
+      <summary>CLI arguments:</summary>
+      <table border="1" align="center" width="75%">
+        <tr>
+          <td> CLI key </td>
+          <td> Value </td>
+        </tr>
+        {% for key, value in args.items() %}
+        <tr>
+          <td>{{ key }}</td>
+          <td>{{ value }}</td>
+        </tr>
+        {% endfor %}
+      </table>
+    </details>
+  </div></br>
+  <div class="container">
+    {% if model_config != None %}
+    <details>
+      <summary>Model config:</summary>
+      <table border="1" align="center" width="75%">
+        <tr>
+          <td> Key </td>
+          <td> Value </td>
+        </tr>
+        {% for key, value in model_config.items() %}
+        <tr>
+          <td>{{ key }}</td>
+          <td>{{ value }}</td>
+        </tr>
+        {% endfor %}
+      </table>
+    </details>
+    {% endif %}
+  </div></br>
+  <div class="container">
+    {% if vocoder_config != None %}
+    <details>
+      <summary>Vocoder model config:</summary>
+      <table border="1" align="center" width="75%">
+        <tr>
+          <td> Key </td>
+          <td> Value </td>
+        </tr>
+        {% for key, value in vocoder_config.items() %}
+        <tr>
+          <td>{{ key }}</td>
+          <td>{{ value }}</td>
+        </tr>
+        {% endfor %}
+      </table>
+    </details>
+    {% endif %}
+  </div></br>
+  {% else %}
+  <div class="container">
+    <b>Please start server with --show_details=true to see details.</b>
+  </div>
+  {% endif %}
+</body>
+</html>

templates/index.html ADDED Viewed

	@@ -0,0 +1,143 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
+    <meta name="description" content="🐸Coqui AI TTS demo server.">
+    <meta name="author" content="🐸Coqui AI TTS">
+    <title>TTS engine</title>
+    <!-- Bootstrap core CSS -->
+    <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css"
+        integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous"
+        rel="stylesheet">
+    <!-- Custom styles for this template -->
+    <style>
+        body {
+            padding-top: 54px;
+        }
+        @media (min-width: 992px) {
+            body {
+                padding-top: 56px;
+            }
+        }
+    </style>
+</head>
+<body>
+    <a href="https://github.com/coqui-ai/TTS"><img style="position: absolute; z-index:1000; top: 0; left: 0; border: 0;"
+            src="https://s3.amazonaws.com/github/ribbons/forkme_left_darkblue_121621.png" alt="Fork me on GitHub"></a>
+    <!-- Navigation -->
+    <!--
+    <nav class="navbar navbar-expand-lg navbar-dark bg-dark fixed-top">
+      <div class="container">
+        <a class="navbar-brand" href="#">Coqui TTS</a>
+        <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarResponsive" aria-controls="navbarResponsive" aria-expanded="false" aria-label="Toggle navigation">
+          <span class="navbar-toggler-icon"></span>
+        </button>
+        <div class="collapse navbar-collapse" id="navbarResponsive">
+          <ul class="navbar-nav ml-auto">
+            <li class="nav-item active">
+              <a class="nav-link" href="#">Home
+                <span class="sr-only">(current)</span>
+              </a>
+            </li>
+          </ul>
+        </div>
+      </div>
+    </nav>
+    -->
+    <!-- Page Content -->
+    <div class="container">
+        <div class="row">
+            <div class="col-lg-12 text-center">
+                <img class="mt-5" src="{{url_for('static', filename='coqui-log-green-TTS.png')}}" align="middle"
+                    width="512" />
+                <ul class="list-unstyled">
+                </ul>
+                {%if use_gst%}
+                <input value='{"0": 0.1}' id="style_wav" placeholder="style wav (dict or path ot wav).." size=45
+                    type="text" name="style_wav">
+                {%endif%}
+                <input id="text" placeholder="Type here..." size=45 type="text" name="text">
+                <button id="speak-button" name="speak">Speak</button><br /><br />
+                {%if use_multi_speaker%}
+                Choose a speaker:
+                <select id="speaker_id" name=speaker_id method="GET" action="/">
+                    {% for speaker_id in speaker_ids %}
+                    <option value="{{speaker_id}}" SELECTED>{{speaker_id}}</option>"
+                    {% endfor %}
+                </select><br /><br />
+                {%endif%}
+                {%if show_details%}
+                <button id="details-button" onclick="location.href = 'details'" name="model-details">Model
+                    Details</button><br /><br />
+                {%endif%}
+                <audio id="audio" controls autoplay hidden></audio>
+                <p id="message"></p>
+            </div>
+        </div>
+    </div>
+    <!-- Bootstrap core JavaScript -->
+    <script>
+        function getTextValue(textId) {
+            const container = q(textId)
+            if (container) {
+                return container.value
+            }
+            return ""
+        }
+        function q(selector) { return document.querySelector(selector) }
+        q('#text').focus()
+        function do_tts(e) {
+            const text = q('#text').value
+            const speaker_id = getTextValue('#speaker_id')
+            const style_wav = getTextValue('#style_wav')
+            if (text) {
+                q('#message').textContent = 'Synthesizing...'
+                q('#speak-button').disabled = true
+                q('#audio').hidden = true
+                synthesize(text, speaker_id, style_wav)
+            }
+            e.preventDefault()
+            return false
+        }
+        q('#speak-button').addEventListener('click', do_tts)
+        q('#text').addEventListener('keyup', function (e) {
+            if (e.keyCode == 13) { // enter
+                do_tts(e)
+            }
+        })
+        function synthesize(text, speaker_id = "", style_wav = "") {
+            fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}`, { cache: 'no-cache' })
+                .then(function (res) {
+                    if (!res.ok) throw Error(res.statusText)
+                    return res.blob()
+                }).then(function (blob) {
+                    q('#message').textContent = ''
+                    q('#speak-button').disabled = false
+                    q('#audio').src = URL.createObjectURL(blob)
+                    q('#audio').hidden = false
+                }).catch(function (err) {
+                    q('#message').textContent = 'Error: ' + err.message
+                    q('#speak-button').disabled = false
+                })
+        }
+    </script>
+</body>
+</html>