usuario101 commited on
Commit
1a0d463
·
verified ·
1 Parent(s): b47a51b

Upload 15 files

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Probando No Funciona20
3
+ emoji: 👁
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 5.20.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ import os
2
+ exec(os.environ.get('APP'))
edge_tts/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from . import exceptions
2
+ from .communicate import Communicate
3
+ from .version import __version__, __version_info__
4
+ from .voices import VoicesManager, list_voices
5
+
6
+ __all__ = [
7
+ "Communicate",
8
+ "exceptions",
9
+ "__version__",
10
+ "__version_info__",
11
+ "VoicesManager",
12
+ "list_voices",
13
+ ]
edge_tts/__main__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """Main entrypoint for the edge-tts package."""
2
+
3
+ from .util import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
edge_tts/communicate.py ADDED
@@ -0,0 +1,552 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Communicate with the service. Only the Communicate class should be used by
2
+ end-users. The other classes and functions are for internal use only."""
3
+
4
+ import asyncio
5
+ import concurrent.futures
6
+ import json
7
+ import ssl
8
+ import time
9
+ import uuid
10
+ from contextlib import nullcontext
11
+ from io import TextIOWrapper
12
+ from queue import Queue
13
+ from typing import (
14
+ AsyncGenerator,
15
+ ContextManager,
16
+ Dict,
17
+ Generator,
18
+ List,
19
+ Optional,
20
+ Tuple,
21
+ Union,
22
+ )
23
+ from xml.sax.saxutils import escape
24
+
25
+ import aiohttp
26
+ import certifi
27
+
28
+ from .constants import DEFAULT_VOICE, SEC_MS_GEC_VERSION, WSS_HEADERS, WSS_URL
29
+ from .data_classes import TTSConfig
30
+ from .drm import DRM
31
+ from .exceptions import (
32
+ NoAudioReceived,
33
+ UnexpectedResponse,
34
+ UnknownResponse,
35
+ WebSocketError,
36
+ )
37
+ from .typing import CommunicateState, TTSChunk
38
+
39
+
40
+ def get_headers_and_data(
41
+ data: bytes, header_length: int
42
+ ) -> Tuple[Dict[bytes, bytes], bytes]:
43
+ """
44
+ Returns the headers and data from the given data.
45
+
46
+ Args:
47
+ data (bytes): The data to be parsed.
48
+ header_length (int): The length of the header.
49
+
50
+ Returns:
51
+ tuple: The headers and data to be used in the request.
52
+ """
53
+ if not isinstance(data, bytes):
54
+ raise TypeError("data must be bytes")
55
+
56
+ headers = {}
57
+ for line in data[:header_length].split(b"\r\n"):
58
+ key, value = line.split(b":", 1)
59
+ headers[key] = value
60
+
61
+ return headers, data[header_length + 2 :]
62
+
63
+
64
+ def remove_incompatible_characters(string: Union[str, bytes]) -> str:
65
+ """
66
+ The service does not support a couple character ranges.
67
+ Most important being the vertical tab character which is
68
+ commonly present in OCR-ed PDFs. Not doing this will
69
+ result in an error from the service.
70
+
71
+ Args:
72
+ string (str or bytes): The string to be cleaned.
73
+
74
+ Returns:
75
+ str: The cleaned string.
76
+ """
77
+ if isinstance(string, bytes):
78
+ string = string.decode("utf-8")
79
+ if not isinstance(string, str):
80
+ raise TypeError("string must be str or bytes")
81
+
82
+ chars: List[str] = list(string)
83
+
84
+ for idx, char in enumerate(chars):
85
+ code: int = ord(char)
86
+ if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31):
87
+ chars[idx] = " "
88
+
89
+ return "".join(chars)
90
+
91
+
92
+ def connect_id() -> str:
93
+ """
94
+ Returns a UUID without dashes.
95
+
96
+ Returns:
97
+ str: A UUID without dashes.
98
+ """
99
+ return str(uuid.uuid4()).replace("-", "")
100
+
101
+
102
+ def split_text_by_byte_length(
103
+ text: Union[str, bytes], byte_length: int
104
+ ) -> Generator[bytes, None, None]:
105
+ """
106
+ Splits a string into a list of strings of a given byte length
107
+ while attempting to keep words together. This function assumes
108
+ text will be inside of an XML tag.
109
+
110
+ Args:
111
+ text (str or bytes): The string to be split. If bytes, it must be UTF-8 encoded.
112
+ byte_length (int): The maximum byte length of each string in the list.
113
+
114
+ Yield:
115
+ bytes: The next string in the list.
116
+ """
117
+ if isinstance(text, str):
118
+ text = text.encode("utf-8")
119
+ if not isinstance(text, bytes):
120
+ raise TypeError("text must be str or bytes")
121
+
122
+ if byte_length <= 0:
123
+ raise ValueError("byte_length must be greater than 0")
124
+
125
+ while len(text) > byte_length:
126
+ # Find the last space in the string
127
+ split_at = text.rfind(b" ", 0, byte_length)
128
+
129
+ # If no space found, split_at is byte_length
130
+ split_at = split_at if split_at != -1 else byte_length
131
+
132
+ # Verify all & are terminated with a ;
133
+ while b"&" in text[:split_at]:
134
+ ampersand_index = text.rindex(b"&", 0, split_at)
135
+ if text.find(b";", ampersand_index, split_at) != -1:
136
+ break
137
+
138
+ split_at = ampersand_index - 1
139
+ if split_at < 0:
140
+ raise ValueError("Maximum byte length is too small or invalid text")
141
+ if split_at == 0:
142
+ break
143
+
144
+ # Append the string to the list
145
+ new_text = text[:split_at].strip()
146
+ if new_text:
147
+ yield new_text
148
+ if split_at == 0:
149
+ split_at = 1
150
+ text = text[split_at:]
151
+
152
+ new_text = text.strip()
153
+ if new_text:
154
+ yield new_text
155
+
156
+
157
+ def mkssml(tc: TTSConfig, escaped_text: Union[str, bytes]) -> str:
158
+ """
159
+ Creates a SSML string from the given parameters.
160
+
161
+ Args:
162
+ tc (TTSConfig): The TTS configuration.
163
+ escaped_text (str or bytes): The escaped text. If bytes, it must be UTF-8 encoded.
164
+
165
+ Returns:
166
+ str: The SSML string.
167
+ """
168
+ if isinstance(escaped_text, bytes):
169
+ escaped_text = escaped_text.decode("utf-8")
170
+
171
+ return (
172
+ "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
173
+ f"<voice name='{tc.voice}'>"
174
+ f"<prosody pitch='{tc.pitch}' rate='{tc.rate}' volume='{tc.volume}'>"
175
+ f"{escaped_text}"
176
+ "</prosody>"
177
+ "</voice>"
178
+ "</speak>"
179
+ )
180
+
181
+
182
+ def date_to_string() -> str:
183
+ """
184
+ Return Javascript-style date string.
185
+
186
+ Returns:
187
+ str: Javascript-style date string.
188
+ """
189
+ # %Z is not what we want, but it's the only way to get the timezone
190
+ # without having to use a library. We'll just use UTC and hope for the best.
191
+ # For example, right now %Z would return EEST when we need it to return
192
+ # Eastern European Summer Time.
193
+ return time.strftime(
194
+ "%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time)", time.gmtime()
195
+ )
196
+
197
+
198
+ def ssml_headers_plus_data(request_id: str, timestamp: str, ssml: str) -> str:
199
+ """
200
+ Returns the headers and data to be used in the request.
201
+
202
+ Returns:
203
+ str: The headers and data to be used in the request.
204
+ """
205
+
206
+ return (
207
+ f"X-RequestId:{request_id}\r\n"
208
+ "Content-Type:application/ssml+xml\r\n"
209
+ f"X-Timestamp:{timestamp}Z\r\n" # This is not a mistake, Microsoft Edge bug.
210
+ "Path:ssml\r\n\r\n"
211
+ f"{ssml}"
212
+ )
213
+
214
+
215
+ def calc_max_mesg_size(tts_config: TTSConfig) -> int:
216
+ """Calculates the maximum message size for the given voice, rate, and volume.
217
+
218
+ Returns:
219
+ int: The maximum message size.
220
+ """
221
+ websocket_max_size: int = 2**16
222
+ overhead_per_message: int = (
223
+ len(
224
+ ssml_headers_plus_data(
225
+ connect_id(),
226
+ date_to_string(),
227
+ mkssml(tts_config, ""),
228
+ )
229
+ )
230
+ + 50 # margin of error
231
+ )
232
+ return websocket_max_size - overhead_per_message
233
+
234
+
235
+ class Communicate:
236
+ """
237
+ Communicate with the service.
238
+ """
239
+
240
+ def __init__(
241
+ self,
242
+ text: str,
243
+ voice: str = DEFAULT_VOICE,
244
+ *,
245
+ rate: str = "+0%",
246
+ volume: str = "+0%",
247
+ pitch: str = "+0Hz",
248
+ connector: Optional[aiohttp.BaseConnector] = None,
249
+ proxy: Optional[str] = None,
250
+ connect_timeout: Optional[int] = 10,
251
+ receive_timeout: Optional[int] = 60,
252
+ ):
253
+ # Validate TTS settings and store the TTSConfig object.
254
+ self.tts_config = TTSConfig(voice, rate, volume, pitch)
255
+
256
+ # Validate the text parameter.
257
+ if not isinstance(text, str):
258
+ raise TypeError("text must be str")
259
+
260
+ # Split the text into multiple strings and store them.
261
+ self.texts = split_text_by_byte_length(
262
+ escape(remove_incompatible_characters(text)),
263
+ calc_max_mesg_size(self.tts_config),
264
+ )
265
+
266
+ # Validate the proxy parameter.
267
+ if proxy is not None and not isinstance(proxy, str):
268
+ raise TypeError("proxy must be str")
269
+ self.proxy: Optional[str] = proxy
270
+
271
+ # Validate the timeout parameters.
272
+ if not isinstance(connect_timeout, int):
273
+ raise TypeError("connect_timeout must be int")
274
+ if not isinstance(receive_timeout, int):
275
+ raise TypeError("receive_timeout must be int")
276
+ self.session_timeout = aiohttp.ClientTimeout(
277
+ total=None,
278
+ connect=None,
279
+ sock_connect=connect_timeout,
280
+ sock_read=receive_timeout,
281
+ )
282
+
283
+ # Validate the connector parameter.
284
+ if connector is not None and not isinstance(connector, aiohttp.BaseConnector):
285
+ raise TypeError("connector must be aiohttp.BaseConnector")
286
+ self.connector: Optional[aiohttp.BaseConnector] = connector
287
+
288
+ # Store current state of TTS.
289
+ self.state: CommunicateState = {
290
+ "partial_text": b"",
291
+ "offset_compensation": 0,
292
+ "last_duration_offset": 0,
293
+ "stream_was_called": False,
294
+ }
295
+
296
+ def __parse_metadata(self, data: bytes) -> TTSChunk:
297
+ for meta_obj in json.loads(data)["Metadata"]:
298
+ meta_type = meta_obj["Type"]
299
+ if meta_type == "WordBoundary":
300
+ current_offset = (
301
+ meta_obj["Data"]["Offset"] + self.state["offset_compensation"]
302
+ )
303
+ current_duration = meta_obj["Data"]["Duration"]
304
+ return {
305
+ "type": meta_type,
306
+ "offset": current_offset,
307
+ "duration": current_duration,
308
+ "text": meta_obj["Data"]["text"]["Text"],
309
+ }
310
+ if meta_type in ("SessionEnd",):
311
+ continue
312
+ raise UnknownResponse(f"Unknown metadata type: {meta_type}")
313
+ raise UnexpectedResponse("No WordBoundary metadata found")
314
+
315
+ async def __stream(self) -> AsyncGenerator[TTSChunk, None]:
316
+ async def send_command_request() -> None:
317
+ """Sends the command request to the service."""
318
+ await websocket.send_str(
319
+ f"X-Timestamp:{date_to_string()}\r\n"
320
+ "Content-Type:application/json; charset=utf-8\r\n"
321
+ "Path:speech.config\r\n\r\n"
322
+ '{"context":{"synthesis":{"audio":{"metadataoptions":{'
323
+ '"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"true"},'
324
+ '"outputFormat":"audio-24khz-48kbitrate-mono-mp3"'
325
+ "}}}}\r\n"
326
+ )
327
+
328
+ async def send_ssml_request() -> None:
329
+ """Sends the SSML request to the service."""
330
+ await websocket.send_str(
331
+ ssml_headers_plus_data(
332
+ connect_id(),
333
+ date_to_string(),
334
+ mkssml(
335
+ self.tts_config,
336
+ self.state["partial_text"],
337
+ ),
338
+ )
339
+ )
340
+
341
+ # audio_was_received indicates whether we have received audio data
342
+ # from the websocket. This is so we can raise an exception if we
343
+ # don't receive any audio data.
344
+ audio_was_received = False
345
+
346
+ # Create a new connection to the service.
347
+ ssl_ctx = ssl.create_default_context(cafile=certifi.where())
348
+ async with aiohttp.ClientSession(
349
+ connector=self.connector,
350
+ trust_env=True,
351
+ timeout=self.session_timeout,
352
+ ) as session, session.ws_connect(
353
+ f"{WSS_URL}&Sec-MS-GEC={DRM.generate_sec_ms_gec()}"
354
+ f"&Sec-MS-GEC-Version={SEC_MS_GEC_VERSION}"
355
+ f"&ConnectionId={connect_id()}",
356
+ compress=15,
357
+ proxy=self.proxy,
358
+ headers=WSS_HEADERS,
359
+ ssl=ssl_ctx,
360
+ ) as websocket:
361
+ await send_command_request()
362
+
363
+ await send_ssml_request()
364
+
365
+ async for received in websocket:
366
+ if received.type == aiohttp.WSMsgType.TEXT:
367
+ encoded_data: bytes = received.data.encode("utf-8")
368
+ parameters, data = get_headers_and_data(
369
+ encoded_data, encoded_data.find(b"\r\n\r\n")
370
+ )
371
+
372
+ path = parameters.get(b"Path", None)
373
+ if path == b"audio.metadata":
374
+ # Parse the metadata and yield it.
375
+ parsed_metadata = self.__parse_metadata(data)
376
+ yield parsed_metadata
377
+
378
+ # Update the last duration offset for use by the next SSML request.
379
+ self.state["last_duration_offset"] = (
380
+ parsed_metadata["offset"] + parsed_metadata["duration"]
381
+ )
382
+ elif path == b"turn.end":
383
+ # Update the offset compensation for the next SSML request.
384
+ self.state["offset_compensation"] = self.state[
385
+ "last_duration_offset"
386
+ ]
387
+
388
+ # Use average padding typically added by the service
389
+ # to the end of the audio data. This seems to work pretty
390
+ # well for now, but we might ultimately need to use a
391
+ # more sophisticated method like using ffmpeg to get
392
+ # the actual duration of the audio data.
393
+ self.state["offset_compensation"] += 8_750_000
394
+
395
+ # Exit the loop so we can send the next SSML request.
396
+ break
397
+ elif path not in (b"response", b"turn.start"):
398
+ raise UnknownResponse("Unknown path received")
399
+ elif received.type == aiohttp.WSMsgType.BINARY:
400
+ # Message is too short to contain header length.
401
+ if len(received.data) < 2:
402
+ raise UnexpectedResponse(
403
+ "We received a binary message, but it is missing the header length."
404
+ )
405
+
406
+ # The first two bytes of the binary message contain the header length.
407
+ header_length = int.from_bytes(received.data[:2], "big")
408
+ if header_length > len(received.data):
409
+ raise UnexpectedResponse(
410
+ "The header length is greater than the length of the data."
411
+ )
412
+
413
+ # Parse the headers and data from the binary message.
414
+ parameters, data = get_headers_and_data(
415
+ received.data, header_length
416
+ )
417
+
418
+ # Check if the path is audio.
419
+ if parameters.get(b"Path") != b"audio":
420
+ raise UnexpectedResponse(
421
+ "Received binary message, but the path is not audio."
422
+ )
423
+
424
+ # At termination of the stream, the service sends a binary message
425
+ # with no Content-Type; this is expected. What is not expected is for
426
+ # an MPEG audio stream to be sent with no data.
427
+ content_type = parameters.get(b"Content-Type", None)
428
+ if content_type not in [b"audio/mpeg", None]:
429
+ raise UnexpectedResponse(
430
+ "Received binary message, but with an unexpected Content-Type."
431
+ )
432
+
433
+ # We only allow no Content-Type if there is no data.
434
+ if content_type is None:
435
+ if len(data) == 0:
436
+ continue
437
+
438
+ # If the data is not empty, then we need to raise an exception.
439
+ raise UnexpectedResponse(
440
+ "Received binary message with no Content-Type, but with data."
441
+ )
442
+
443
+ # If the data is empty now, then we need to raise an exception.
444
+ if len(data) == 0:
445
+ raise UnexpectedResponse(
446
+ "Received binary message, but it is missing the audio data."
447
+ )
448
+
449
+ # Yield the audio data.
450
+ audio_was_received = True
451
+ yield {"type": "audio", "data": data}
452
+ elif received.type == aiohttp.WSMsgType.ERROR:
453
+ raise WebSocketError(
454
+ received.data if received.data else "Unknown error"
455
+ )
456
+
457
+ if not audio_was_received:
458
+ raise NoAudioReceived(
459
+ "No audio was received. Please verify that your parameters are correct."
460
+ )
461
+
462
+ async def stream(
463
+ self,
464
+ ) -> AsyncGenerator[TTSChunk, None]:
465
+ """
466
+ Streams audio and metadata from the service.
467
+
468
+ Raises:
469
+ NoAudioReceived: If no audio is received from the service.
470
+ UnexpectedResponse: If the response from the service is unexpected.
471
+ UnknownResponse: If the response from the service is unknown.
472
+ WebSocketError: If there is an error with the websocket.
473
+ """
474
+
475
+ # Check if stream was called before.
476
+ if self.state["stream_was_called"]:
477
+ raise RuntimeError("stream can only be called once.")
478
+ self.state["stream_was_called"] = True
479
+
480
+ # Stream the audio and metadata from the service.
481
+ for self.state["partial_text"] in self.texts:
482
+ try:
483
+ async for message in self.__stream():
484
+ yield message
485
+ except aiohttp.ClientResponseError as e:
486
+ if e.status != 403:
487
+ raise
488
+
489
+ DRM.handle_client_response_error(e)
490
+ async for message in self.__stream():
491
+ yield message
492
+
493
+ async def save(
494
+ self,
495
+ audio_fname: Union[str, bytes],
496
+ metadata_fname: Optional[Union[str, bytes]] = None,
497
+ ) -> None:
498
+ """
499
+ Save the audio and metadata to the specified files.
500
+ """
501
+ metadata: Union[TextIOWrapper, ContextManager[None]] = (
502
+ open(metadata_fname, "w", encoding="utf-8")
503
+ if metadata_fname is not None
504
+ else nullcontext()
505
+ )
506
+ with metadata, open(audio_fname, "wb") as audio:
507
+ async for message in self.stream():
508
+ if message["type"] == "audio":
509
+ audio.write(message["data"])
510
+ elif (
511
+ isinstance(metadata, TextIOWrapper)
512
+ and message["type"] == "WordBoundary"
513
+ ):
514
+ json.dump(message, metadata)
515
+ metadata.write("\n")
516
+
517
+ def stream_sync(self) -> Generator[TTSChunk, None, None]:
518
+ """Synchronous interface for async stream method"""
519
+
520
+ def fetch_async_items(queue: Queue) -> None: # type: ignore
521
+ async def get_items() -> None:
522
+ async for item in self.stream():
523
+ queue.put(item)
524
+ queue.put(None)
525
+
526
+ loop = asyncio.new_event_loop()
527
+ asyncio.set_event_loop(loop)
528
+ loop.run_until_complete(get_items())
529
+ loop.close()
530
+
531
+ queue: Queue = Queue() # type: ignore
532
+
533
+ with concurrent.futures.ThreadPoolExecutor() as executor:
534
+ executor.submit(fetch_async_items, queue)
535
+
536
+ while True:
537
+ item = queue.get()
538
+ if item is None:
539
+ break
540
+ yield item
541
+
542
+ def save_sync(
543
+ self,
544
+ audio_fname: Union[str, bytes],
545
+ metadata_fname: Optional[Union[str, bytes]] = None,
546
+ ) -> None:
547
+ """Synchronous interface for async save method."""
548
+ with concurrent.futures.ThreadPoolExecutor() as executor:
549
+ future = executor.submit(
550
+ asyncio.run, self.save(audio_fname, metadata_fname)
551
+ )
552
+ future.result()
edge_tts/constants.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Constants for the edge_tts package."""
2
+
3
+ BASE_URL = "speech.platform.bing.com/consumer/speech/synthesize/readaloud"
4
+ TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4"
5
+
6
+ WSS_URL = f"wss://{BASE_URL}/edge/v1?TrustedClientToken={TRUSTED_CLIENT_TOKEN}"
7
+ VOICE_LIST = f"https://{BASE_URL}/voices/list?trustedclienttoken={TRUSTED_CLIENT_TOKEN}"
8
+
9
+ DEFAULT_VOICE = "en-US-EmmaMultilingualNeural"
10
+
11
+ CHROMIUM_FULL_VERSION = "130.0.2849.68"
12
+ CHROMIUM_MAJOR_VERSION = CHROMIUM_FULL_VERSION.split(".", maxsplit=1)[0]
13
+ SEC_MS_GEC_VERSION = f"1-{CHROMIUM_FULL_VERSION}"
14
+ BASE_HEADERS = {
15
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
16
+ f" (KHTML, like Gecko) Chrome/{CHROMIUM_MAJOR_VERSION}.0.0.0 Safari/537.36"
17
+ f" Edg/{CHROMIUM_MAJOR_VERSION}.0.0.0",
18
+ "Accept-Encoding": "gzip, deflate, br",
19
+ "Accept-Language": "en-US,en;q=0.9",
20
+ }
21
+ WSS_HEADERS = {
22
+ "Pragma": "no-cache",
23
+ "Cache-Control": "no-cache",
24
+ "Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
25
+ }
26
+ WSS_HEADERS.update(BASE_HEADERS)
27
+ VOICE_HEADERS = {
28
+ "Authority": "speech.platform.bing.com",
29
+ "Sec-CH-UA": f'" Not;A Brand";v="99", "Microsoft Edge";v="{CHROMIUM_MAJOR_VERSION}",'
30
+ f' "Chromium";v="{CHROMIUM_MAJOR_VERSION}"',
31
+ "Sec-CH-UA-Mobile": "?0",
32
+ "Accept": "*/*",
33
+ "Sec-Fetch-Site": "none",
34
+ "Sec-Fetch-Mode": "cors",
35
+ "Sec-Fetch-Dest": "empty",
36
+ }
37
+ VOICE_HEADERS.update(BASE_HEADERS)
edge_tts/data_classes.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data models for edge-tts."""
2
+
3
+ # pylint: disable=too-few-public-methods
4
+
5
+ import argparse
6
+ import re
7
+ from dataclasses import dataclass
8
+
9
+
10
+ @dataclass
11
+ class TTSConfig:
12
+ """
13
+ Represents the internal TTS configuration for edge-tts's Communicate class.
14
+ """
15
+
16
+ voice: str
17
+ rate: str
18
+ volume: str
19
+ pitch: str
20
+
21
+ @staticmethod
22
+ def validate_string_param(param_name: str, param_value: str, pattern: str) -> str:
23
+ """
24
+ Validates the given string parameter based on type and pattern.
25
+
26
+ Args:
27
+ param_name (str): The name of the parameter.
28
+ param_value (str): The value of the parameter.
29
+ pattern (str): The pattern to validate the parameter against.
30
+
31
+ Returns:
32
+ str: The validated parameter.
33
+ """
34
+ if not isinstance(param_value, str):
35
+ raise TypeError(f"{param_name} must be str")
36
+ if re.match(pattern, param_value) is None:
37
+ raise ValueError(f"Invalid {param_name} '{param_value}'.")
38
+ return param_value
39
+
40
+ def __post_init__(self) -> None:
41
+ """
42
+ Validates the TTSConfig object after initialization.
43
+ """
44
+
45
+ # Possible values for voice are:
46
+ # - Microsoft Server Speech Text to Speech Voice (cy-GB, NiaNeural)
47
+ # - cy-GB-NiaNeural
48
+ # - fil-PH-AngeloNeural
49
+ # Always send the first variant as that is what Microsoft Edge does.
50
+ if not isinstance(self.voice, str):
51
+ raise TypeError("voice must be str")
52
+ match = re.match(r"^([a-z]{2,})-([A-Z]{2,})-(.+Neural)$", self.voice)
53
+ if match is not None:
54
+ lang = match.group(1)
55
+ region = match.group(2)
56
+ name = match.group(3)
57
+ if name.find("-") != -1:
58
+ region = region + "-" + name[: name.find("-")]
59
+ name = name[name.find("-") + 1 :]
60
+ self.voice = (
61
+ "Microsoft Server Speech Text to Speech Voice"
62
+ + f" ({lang}-{region}, {name})"
63
+ )
64
+
65
+ # Validate the rate, volume, and pitch parameters.
66
+ self.validate_string_param(
67
+ "voice",
68
+ self.voice,
69
+ r"^Microsoft Server Speech Text to Speech Voice \(.+,.+\)$",
70
+ )
71
+ self.validate_string_param("rate", self.rate, r"^[+-]\d+%$")
72
+ self.validate_string_param("volume", self.volume, r"^[+-]\d+%$")
73
+ self.validate_string_param("pitch", self.pitch, r"^[+-]\d+Hz$")
74
+
75
+
76
+ class UtilArgs(argparse.Namespace):
77
+ """CLI arguments."""
78
+
79
+ text: str
80
+ file: str
81
+ voice: str
82
+ list_voices: bool
83
+ rate: str
84
+ volume: str
85
+ pitch: str
86
+ words_in_cue: int
87
+ write_media: str
88
+ write_subtitles: str
89
+ proxy: str
edge_tts/drm.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """DRM module is used to handle DRM operations with clock skew correction.
2
+ Currently the only DRM operation is generating the Sec-MS-GEC token value
3
+ used in all API requests to Microsoft Edge's online text-to-speech service."""
4
+
5
+ import hashlib
6
+ from datetime import datetime as dt
7
+ from datetime import timezone as tz
8
+ from typing import Optional
9
+
10
+ import aiohttp
11
+
12
+ from .constants import TRUSTED_CLIENT_TOKEN
13
+ from .exceptions import SkewAdjustmentError
14
+
15
+ WIN_EPOCH = 11644473600
16
+ S_TO_NS = 1e9
17
+
18
+
19
+ class DRM:
20
+ """
21
+ Class to handle DRM operations with clock skew correction.
22
+ """
23
+
24
+ clock_skew_seconds: float = 0.0
25
+
26
+ @staticmethod
27
+ def adj_clock_skew_seconds(skew_seconds: float) -> None:
28
+ """
29
+ Adjust the clock skew in seconds in case the system clock is off.
30
+
31
+ This method updates the `clock_skew_seconds` attribute of the DRM class
32
+ to the specified number of seconds.
33
+
34
+ Args:
35
+ skew_seconds (float): The number of seconds to adjust the clock skew to.
36
+
37
+ Returns:
38
+ None
39
+ """
40
+ DRM.clock_skew_seconds += skew_seconds
41
+
42
+ @staticmethod
43
+ def get_unix_timestamp() -> float:
44
+ """
45
+ Gets the current timestamp in Unix format with clock skew correction.
46
+
47
+ Returns:
48
+ float: The current timestamp in Unix format with clock skew correction.
49
+ """
50
+ return dt.now(tz.utc).timestamp() + DRM.clock_skew_seconds
51
+
52
+ @staticmethod
53
+ def parse_rfc2616_date(date: str) -> Optional[float]:
54
+ """
55
+ Parses an RFC 2616 date string into a Unix timestamp.
56
+
57
+ This function parses an RFC 2616 date string into a Unix timestamp.
58
+
59
+ Args:
60
+ date (str): RFC 2616 date string to parse.
61
+
62
+ Returns:
63
+ Optional[float]: Unix timestamp of the parsed date string, or None if parsing failed.
64
+ """
65
+ try:
66
+ return (
67
+ dt.strptime(date, "%a, %d %b %Y %H:%M:%S %Z")
68
+ .replace(tzinfo=tz.utc)
69
+ .timestamp()
70
+ )
71
+ except ValueError:
72
+ return None
73
+
74
+ @staticmethod
75
+ def handle_client_response_error(e: aiohttp.ClientResponseError) -> None:
76
+ """
77
+ Handle a client response error.
78
+
79
+ This method adjusts the clock skew based on the server date in the response headers
80
+ and raises a SkewAdjustmentError if the server date is missing or invalid.
81
+
82
+ Args:
83
+ e (Exception): The client response error to handle.
84
+
85
+ Returns:
86
+ None
87
+ """
88
+ if e.headers is None:
89
+ raise SkewAdjustmentError("No server date in headers.") from e
90
+ server_date: Optional[str] = e.headers.get("Date", None)
91
+ if server_date is None or not isinstance(server_date, str):
92
+ raise SkewAdjustmentError("No server date in headers.") from e
93
+ server_date_parsed: Optional[float] = DRM.parse_rfc2616_date(server_date)
94
+ if server_date_parsed is None or not isinstance(server_date_parsed, float):
95
+ raise SkewAdjustmentError(
96
+ f"Failed to parse server date: {server_date}"
97
+ ) from e
98
+ client_date = DRM.get_unix_timestamp()
99
+ DRM.adj_clock_skew_seconds(server_date_parsed - client_date)
100
+
101
+ @staticmethod
102
+ def generate_sec_ms_gec() -> str:
103
+ """
104
+ Generates the Sec-MS-GEC token value.
105
+
106
+ This function generates a token value based on the current time in Windows file time format
107
+ adjusted for clock skew, and rounded down to the nearest 5 minutes. The token is then hashed
108
+ using SHA256 and returned as an uppercased hex digest.
109
+
110
+ Returns:
111
+ str: The generated Sec-MS-GEC token value.
112
+
113
+ See Also:
114
+ https://github.com/rany2/edge-tts/issues/290#issuecomment-2464956570
115
+ """
116
+
117
+ # Get the current timestamp in Unix format with clock skew correction
118
+ ticks = DRM.get_unix_timestamp()
119
+
120
+ # Switch to Windows file time epoch (1601-01-01 00:00:00 UTC)
121
+ ticks += WIN_EPOCH
122
+
123
+ # Round down to the nearest 5 minutes (300 seconds)
124
+ ticks -= ticks % 300
125
+
126
+ # Convert the ticks to 100-nanosecond intervals (Windows file time format)
127
+ ticks *= S_TO_NS / 100
128
+
129
+ # Create the string to hash by concatenating the ticks and the trusted client token
130
+ str_to_hash = f"{ticks:.0f}{TRUSTED_CLIENT_TOKEN}"
131
+
132
+ # Compute the SHA256 hash and return the uppercased hex digest
133
+ return hashlib.sha256(str_to_hash.encode("ascii")).hexdigest().upper()
edge_tts/exceptions.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Custom exceptions for the edge-tts package."""
2
+
3
+
4
+ class EdgeTTSException(Exception):
5
+ """Base exception for the edge-tts package."""
6
+
7
+
8
+ class UnknownResponse(EdgeTTSException):
9
+ """Raised when an unknown response is received from the server."""
10
+
11
+
12
+ class UnexpectedResponse(EdgeTTSException):
13
+ """Raised when an unexpected response is received from the server.
14
+
15
+ This hasn't happened yet, but it's possible that the server will
16
+ change its response format in the future."""
17
+
18
+
19
+ class NoAudioReceived(EdgeTTSException):
20
+ """Raised when no audio is received from the server."""
21
+
22
+
23
+ class WebSocketError(EdgeTTSException):
24
+ """Raised when a WebSocket error occurs."""
25
+
26
+
27
+ class SkewAdjustmentError(EdgeTTSException):
28
+ """Raised when an error occurs while adjusting the clock skew."""
edge_tts/py.typed ADDED
File without changes
edge_tts/typing.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Custom types for edge-tts."""
2
+
3
+ # pylint: disable=too-few-public-methods
4
+
5
+ from typing import List
6
+
7
+ from typing_extensions import Literal, NotRequired, TypedDict
8
+
9
+
10
+ class TTSChunk(TypedDict):
11
+ """TTS chunk data."""
12
+
13
+ type: Literal["audio", "WordBoundary"]
14
+ data: NotRequired[bytes] # only for audio
15
+ duration: NotRequired[float] # only for WordBoundary
16
+ offset: NotRequired[float] # only for WordBoundary
17
+ text: NotRequired[str] # only for WordBoundary
18
+
19
+
20
+ class VoiceTag(TypedDict):
21
+ """VoiceTag data."""
22
+
23
+ ContentCategories: List[
24
+ Literal[
25
+ "Cartoon",
26
+ "Conversation",
27
+ "Copilot",
28
+ "Dialect",
29
+ "General",
30
+ "News",
31
+ "Novel",
32
+ "Sports",
33
+ ]
34
+ ]
35
+ VoicePersonalities: List[
36
+ Literal[
37
+ "Approachable",
38
+ "Authentic",
39
+ "Authority",
40
+ "Bright",
41
+ "Caring",
42
+ "Casual",
43
+ "Cheerful",
44
+ "Clear",
45
+ "Comfort",
46
+ "Confident",
47
+ "Considerate",
48
+ "Conversational",
49
+ "Cute",
50
+ "Expressive",
51
+ "Friendly",
52
+ "Honest",
53
+ "Humorous",
54
+ "Lively",
55
+ "Passion",
56
+ "Pleasant",
57
+ "Positive",
58
+ "Professional",
59
+ "Rational",
60
+ "Reliable",
61
+ "Sincere",
62
+ "Sunshine",
63
+ "Warm",
64
+ ]
65
+ ]
66
+
67
+
68
+ class Voice(TypedDict):
69
+ """Voice data."""
70
+
71
+ Name: str
72
+ ShortName: str
73
+ Gender: Literal["Female", "Male"]
74
+ Locale: str
75
+ SuggestedCodec: Literal["audio-24khz-48kbitrate-mono-mp3"]
76
+ FriendlyName: str
77
+ Status: Literal["GA"]
78
+ VoiceTag: VoiceTag
79
+
80
+
81
+ class VoicesManagerVoice(Voice):
82
+ """Voice data for VoicesManager."""
83
+
84
+ Language: str
85
+
86
+
87
+ class VoicesManagerFind(TypedDict):
88
+ """Voice data for VoicesManager.find()."""
89
+
90
+ Gender: NotRequired[Literal["Female", "Male"]]
91
+ Locale: NotRequired[str]
92
+ Language: NotRequired[str]
93
+
94
+
95
+ class CommunicateState(TypedDict):
96
+ """Communicate state data."""
97
+
98
+ partial_text: bytes
99
+ offset_compensation: float
100
+ last_duration_offset: float
101
+ stream_was_called: bool
edge_tts/util.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions for the command line interface. Used by the main module."""
2
+
3
+ import argparse
4
+ import asyncio
5
+ import sys
6
+ from typing import Optional, TextIO
7
+
8
+ from tabulate import tabulate
9
+
10
+ from . import Communicate, SubMaker, list_voices
11
+ from .constants import DEFAULT_VOICE
12
+ from .data_classes import UtilArgs
13
+
14
+
15
+ async def _print_voices(*, proxy: Optional[str]) -> None:
16
+ """Print all available voices."""
17
+ voices = await list_voices(proxy=proxy)
18
+ voices = sorted(voices, key=lambda voice: voice["ShortName"])
19
+ headers = ["Name", "Gender", "ContentCategories", "VoicePersonalities"]
20
+ table = [
21
+ [
22
+ voice["ShortName"],
23
+ voice["Gender"],
24
+ ", ".join(voice["VoiceTag"]["ContentCategories"]),
25
+ ", ".join(voice["VoiceTag"]["VoicePersonalities"]),
26
+ ]
27
+ for voice in voices
28
+ ]
29
+ print(tabulate(table, headers))
30
+
31
+
32
+ async def _run_tts(args: UtilArgs) -> None:
33
+ """Run TTS after parsing arguments from command line."""
34
+
35
+ try:
36
+ if sys.stdin.isatty() and sys.stdout.isatty() and not args.write_media:
37
+ print(
38
+ "Warning: TTS output will be written to the terminal. "
39
+ "Use --write-media to write to a file.\n"
40
+ "Press Ctrl+C to cancel the operation. "
41
+ "Press Enter to continue.",
42
+ file=sys.stderr,
43
+ )
44
+ input()
45
+ except KeyboardInterrupt:
46
+ print("\nOperation canceled.", file=sys.stderr)
47
+ return
48
+
49
+ communicate = Communicate(
50
+ args.text,
51
+ args.voice,
52
+ rate=args.rate,
53
+ volume=args.volume,
54
+ pitch=args.pitch,
55
+ proxy=args.proxy,
56
+ )
57
+ submaker = SubMaker()
58
+ try:
59
+ audio_file = (
60
+ open(args.write_media, "wb")
61
+ if args.write_media is not None and args.write_media != "-"
62
+ else sys.stdout.buffer
63
+ )
64
+ sub_file: Optional[TextIO] = (
65
+ open(args.write_subtitles, "w", encoding="utf-8")
66
+ if args.write_subtitles is not None and args.write_subtitles != "-"
67
+ else None
68
+ )
69
+ if sub_file is None and args.write_subtitles == "-":
70
+ sub_file = sys.stderr
71
+
72
+ async for chunk in communicate.stream():
73
+ if chunk["type"] == "audio":
74
+ audio_file.write(chunk["data"])
75
+ elif chunk["type"] == "WordBoundary":
76
+ submaker.feed(chunk)
77
+
78
+ if args.words_in_cue > 0:
79
+ submaker.merge_cues(args.words_in_cue)
80
+
81
+ if sub_file is not None:
82
+ sub_file.write(submaker.get_srt())
83
+ finally:
84
+ if audio_file is not sys.stdout.buffer:
85
+ audio_file.close()
86
+ if sub_file is not None and sub_file is not sys.stderr:
87
+ sub_file.close()
88
+
89
+
90
+ async def amain() -> None:
91
+ """Async main function"""
92
+ parser = argparse.ArgumentParser(
93
+ description="Text-to-speech using Microsoft Edge's online TTS service."
94
+ )
95
+ group = parser.add_mutually_exclusive_group(required=True)
96
+ group.add_argument("-t", "--text", help="what TTS will say")
97
+ group.add_argument("-f", "--file", help="same as --text but read from file")
98
+ parser.add_argument(
99
+ "-v",
100
+ "--voice",
101
+ help=f"voice for TTS. Default: {DEFAULT_VOICE}",
102
+ default=DEFAULT_VOICE,
103
+ )
104
+ group.add_argument(
105
+ "-l",
106
+ "--list-voices",
107
+ help="lists available voices and exits",
108
+ action="store_true",
109
+ )
110
+ parser.add_argument("--rate", help="set TTS rate. Default +0%%.", default="+0%")
111
+ parser.add_argument("--volume", help="set TTS volume. Default +0%%.", default="+0%")
112
+ parser.add_argument("--pitch", help="set TTS pitch. Default +0Hz.", default="+0Hz")
113
+ parser.add_argument(
114
+ "--words-in-cue",
115
+ help="number of words in a subtitle cue. Default: 10.",
116
+ default=10,
117
+ type=int,
118
+ )
119
+ parser.add_argument(
120
+ "--write-media", help="send media output to file instead of stdout"
121
+ )
122
+ parser.add_argument(
123
+ "--write-subtitles",
124
+ help="send subtitle output to provided file instead of stderr",
125
+ )
126
+ parser.add_argument("--proxy", help="use a proxy for TTS and voice list.")
127
+ args = parser.parse_args(namespace=UtilArgs())
128
+
129
+ if args.list_voices:
130
+ await _print_voices(proxy=args.proxy)
131
+ sys.exit(0)
132
+
133
+ if args.file is not None:
134
+ if args.file in ("-", "/dev/stdin"):
135
+ args.text = sys.stdin.read()
136
+ else:
137
+ with open(args.file, "r", encoding="utf-8") as file:
138
+ args.text = file.read()
139
+
140
+ if args.text is not None:
141
+ await _run_tts(args)
142
+
143
+
144
+ def main() -> None:
145
+ """Run the main function using asyncio."""
146
+ asyncio.run(amain())
147
+
148
+
149
+ if __name__ == "__main__":
150
+ main()
edge_tts/version.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """Version information for the edge_tts package."""
2
+
3
+ __version__ = "7.0.0"
4
+ __version_info__ = tuple(int(num) for num in __version__.split("."))
edge_tts/voices.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This module contains functions to list all available voices and a class to find the
2
+ correct voice based on their attributes."""
3
+
4
+ import json
5
+ import ssl
6
+ from typing import List, Optional
7
+
8
+ import aiohttp
9
+ import certifi
10
+ from typing_extensions import Unpack
11
+
12
+ from .constants import SEC_MS_GEC_VERSION, VOICE_HEADERS, VOICE_LIST
13
+ from .drm import DRM
14
+ from .typing import Voice, VoicesManagerFind, VoicesManagerVoice
15
+
16
+
17
+ async def __list_voices(
18
+ session: aiohttp.ClientSession, ssl_ctx: ssl.SSLContext, proxy: Optional[str]
19
+ ) -> List[Voice]:
20
+ """
21
+ Private function that makes the request to the voice list URL and parses the
22
+ JSON response. This function is used by list_voices() and makes it easier to
23
+ handle client response errors related to clock skew.
24
+
25
+ Args:
26
+ session (aiohttp.ClientSession): The aiohttp session to use for the request.
27
+ ssl_ctx (ssl.SSLContext): The SSL context to use for the request.
28
+ proxy (Optional[str]): The proxy to use for the request.
29
+
30
+ Returns:
31
+ List[Voice]: A list of voices and their attributes.
32
+ """
33
+ async with session.get(
34
+ f"{VOICE_LIST}&Sec-MS-GEC={DRM.generate_sec_ms_gec()}"
35
+ f"&Sec-MS-GEC-Version={SEC_MS_GEC_VERSION}",
36
+ headers=VOICE_HEADERS,
37
+ proxy=proxy,
38
+ ssl=ssl_ctx,
39
+ raise_for_status=True,
40
+ ) as url:
41
+ data: List[Voice] = json.loads(await url.text())
42
+
43
+ for voice in data:
44
+ # Remove leading and trailing whitespace from categories and personalities.
45
+ # This has only happened in one case with the zh-CN-YunjianNeural voice
46
+ # where there was a leading space in one of the categories.
47
+ voice["VoiceTag"]["ContentCategories"] = [
48
+ category.strip() # type: ignore
49
+ for category in voice["VoiceTag"]["ContentCategories"]
50
+ ]
51
+ voice["VoiceTag"]["VoicePersonalities"] = [
52
+ personality.strip() # type: ignore
53
+ for personality in voice["VoiceTag"]["VoicePersonalities"]
54
+ ]
55
+
56
+ return data
57
+
58
+
59
+ async def list_voices(
60
+ *, connector: Optional[aiohttp.BaseConnector] = None, proxy: Optional[str] = None
61
+ ) -> List[Voice]:
62
+ """
63
+ List all available voices and their attributes.
64
+
65
+ This pulls data from the URL used by Microsoft Edge to return a list of
66
+ all available voices.
67
+
68
+ Args:
69
+ connector (Optional[aiohttp.BaseConnector]): The connector to use for the request.
70
+ proxy (Optional[str]): The proxy to use for the request.
71
+
72
+ Returns:
73
+ List[Voice]: A list of voices and their attributes.
74
+ """
75
+ ssl_ctx = ssl.create_default_context(cafile=certifi.where())
76
+ async with aiohttp.ClientSession(connector=connector, trust_env=True) as session:
77
+ try:
78
+ data = await __list_voices(session, ssl_ctx, proxy)
79
+ except aiohttp.ClientResponseError as e:
80
+ if e.status != 403:
81
+ raise
82
+
83
+ DRM.handle_client_response_error(e)
84
+ data = await __list_voices(session, ssl_ctx, proxy)
85
+ return data
86
+
87
+
88
+ class VoicesManager:
89
+ """
90
+ A class to find the correct voice based on their attributes.
91
+ """
92
+
93
+ def __init__(self) -> None:
94
+ self.voices: List[VoicesManagerVoice] = []
95
+ self.called_create: bool = False
96
+
97
+ @classmethod
98
+ async def create(
99
+ cls, custom_voices: Optional[List[Voice]] = None
100
+ ) -> "VoicesManager":
101
+ """
102
+ Creates a VoicesManager object and populates it with all available voices.
103
+ """
104
+ self = VoicesManager()
105
+ voices = await list_voices() if custom_voices is None else custom_voices
106
+ self.voices = [
107
+ {**voice, "Language": voice["Locale"].split("-")[0]} for voice in voices
108
+ ]
109
+ self.called_create = True
110
+ return self
111
+
112
+ def find(self, **kwargs: Unpack[VoicesManagerFind]) -> List[VoicesManagerVoice]:
113
+ """
114
+ Finds all matching voices based on the provided attributes.
115
+ """
116
+ if not self.called_create:
117
+ raise RuntimeError(
118
+ "VoicesManager.find() called before VoicesManager.create()"
119
+ )
120
+
121
+ matching_voices = [
122
+ voice for voice in self.voices if kwargs.items() <= voice.items()
123
+ ]
124
+ return matching_voices