Spaces:
Runtime error
Runtime error
import re | |
from .translator import translate_text | |
def sub_to_ssml(input, output, lang, voice, gender, from_lang, to_lang): | |
srtFile = input | |
# Output file name | |
outputFile = output | |
#------- SSML Options ------- | |
# Language | |
# language = "en-US" | |
language = lang | |
# Voice Name - To not specify a voice, put nothing between the quotes or set value to None | |
# voiceName = "en-US-DavisNeural" | |
voiceName = voice | |
durationAttributeName = "duration" | |
# Whether to escape special characters in the text. Possible Values: True, False | |
enableCharacterEscape = True | |
#------- Advanced SSML Options ------- | |
# SSML Version | |
ssmlVersion = "1.0" | |
# Whether to include the xmlns:xsi and xsi:schemaLocation attributes in the <speak> tag. | |
includeSchemaLocation = True # Possible Values: True, False | |
# Output File Encoding | |
chosenFileEncoding = "utf_8_sig" # utf_8_sig for BOM, utf_8 for no BOM | |
def escapeChars(enableCharacterEscape, text): | |
if enableCharacterEscape: | |
text = text.replace("&", "&") | |
text = text.replace('"', """) | |
text = text.replace("'", "'") | |
text = text.replace("<", "<") | |
text = text.replace(">", ">") | |
return text | |
#======================================== Parse SRT File ================================================ | |
# Open an srt file and read the lines into a list | |
with open(srtFile, 'r', encoding='utf-8-sig') as f: | |
lines = f.readlines() | |
# Matches the following example with regex: 00:00:20,130 --> 00:00:23,419 | |
subtitleTimeLineRegex = re.compile(r'\d\d:\d\d:\d\d,\d\d\d --> \d\d:\d\d:\d\d,\d\d\d') | |
# Create a dictionary | |
subsDict = {} | |
# Enumerate lines, and if a line in lines contains only an integer, put that number in the key, and a dictionary in the value | |
# The dictionary contains the start, ending, and duration of the subtitles as well as the text | |
# The next line uses the syntax HH:MM:SS,MMM --> HH:MM:SS,MMM . Get the difference between the two times and put that in the dictionary | |
# For the line after that, put the text in the dictionary | |
for lineNum, line in enumerate(lines): | |
line = line.strip() | |
# If line has no text | |
if line.isdigit() and subtitleTimeLineRegex.match(lines[lineNum + 1]): | |
lineWithTimestamps = lines[lineNum + 1].strip() | |
lineWithSubtitleText = lines[lineNum + 2].strip() | |
# If there are more lines after the subtitle text, add them to the text | |
count = 3 | |
while True: | |
# Check if the next line is blank or not | |
if (lineNum+count) < len(lines) and lines[lineNum + count].strip(): | |
lineWithSubtitleText += ' ' + lines[lineNum + count].strip() | |
count += 1 | |
else: | |
break | |
# Create empty dictionary with keys for start and end times and subtitle text | |
subsDict[line] = {'start_ms': '', 'end_ms': '', 'duration_ms': '', 'text': '', 'break_until_next': ''} | |
time = lineWithTimestamps.split(' --> ') | |
time1 = time[0].split(':') | |
time2 = time[1].split(':') | |
# Converts the time to milliseconds | |
processedTime1 = int(time1[0]) * 3600000 + int(time1[1]) * 60000 + int(time1[2].split(',')[0]) * 1000 + int(time1[2].split(',')[1]) #/ 1000 #Uncomment to turn into seconds | |
processedTime2 = int(time2[0]) * 3600000 + int(time2[1]) * 60000 + int(time2[2].split(',')[0]) * 1000 + int(time2[2].split(',')[1]) #/ 1000 #Uncomment to turn into seconds | |
timeDifferenceMs = str(processedTime2 - processedTime1) | |
# Set the keys in the dictionary to the values | |
subsDict[line]['start_ms'] = str(processedTime1) | |
subsDict[line]['end_ms'] = str(processedTime2) | |
subsDict[line]['duration_ms'] = timeDifferenceMs | |
subsDict[line]['text'] = lineWithSubtitleText | |
if lineNum > 0: | |
# Goes back to previous line's dictionary and writes difference in time to current line | |
subsDict[str(int(line)-1)]['break_until_next'] = str(processedTime1 - int(subsDict[str(int(line) - 1)]['end_ms'])) | |
else: | |
subsDict[line]['break_until_next'] = '0' | |
#=========================================== Create SSML File ============================================ | |
# Make voice tag if applicable | |
if voiceName is None or voiceName == '' or voiceName.lower() == 'none': | |
voiceTag = '' | |
voiceTagEnd = '' | |
else: | |
voiceTag = f'<voice xml:lang="{language}" xml:gender="{gender}" name="' + voiceName + '">' | |
voiceTagEnd = '</voice>' | |
# Encoding with utf-8-sig adds BOM to the beginning of the file, because use with Azure requires it | |
output_string = f'<speak version="{ssmlVersion}" xml:lang="{language}">{voiceTag}\n' | |
for key, value in subsDict.items(): | |
if not value['break_until_next'] or value['break_until_next'] == '0': | |
breakTimeString = '' | |
else: | |
breakTime = str(value['break_until_next']) | |
breakTimeString = f'<break time="{breakTime}ms"/>' | |
text = escapeChars(enableCharacterEscape, value['text']) | |
translated_text = translate_text(text, from_lang, to_lang) | |
texToWrite = (f'\t<prosody {durationAttributeName}="{value["duration_ms"]}ms">{translated_text}</prosody>{breakTimeString}\n') | |
output_string += texToWrite | |
output_string += f'{voiceTagEnd}</speak>' | |
with open(outputFile, 'w', encoding=chosenFileEncoding) as f: | |
f.write(output_string) | |
return output_string | |