Spaces:

badalsahani
/

srt

Runtime error

App Files Files Community

srt / app /srt_to_ssml.py

badalsahani

Update app/srt_to_ssml.py

9e20a4b almost 2 years ago

raw

history blame contribute delete

5.61 kB

	import re
	from .translator import translate_text


	def sub_to_ssml(input, output, lang, voice, gender, from_lang, to_lang):
	srtFile = input

	# Output file name
	outputFile = output

	#------- SSML Options -------
	# Language
	# language = "en-US"
	language = lang
	# Voice Name - To not specify a voice, put nothing between the quotes or set value to None
	# voiceName = "en-US-DavisNeural"
	voiceName = voice

	durationAttributeName = "duration"
	# Whether to escape special characters in the text. Possible Values: True, False
	enableCharacterEscape = True

	#------- Advanced SSML Options -------
	# SSML Version
	ssmlVersion = "1.0"
	# Whether to include the xmlns:xsi and xsi:schemaLocation attributes in the <speak> tag.
	includeSchemaLocation = True # Possible Values: True, False

	# Output File Encoding
	chosenFileEncoding = "utf_8_sig" # utf_8_sig for BOM, utf_8 for no BOM

	def escapeChars(enableCharacterEscape, text):
	if enableCharacterEscape:
	text = text.replace("&", "&")
	text = text.replace('"', """)
	text = text.replace("'", "'")
	text = text.replace("<", "<")
	text = text.replace(">", ">")
	return text


	#======================================== Parse SRT File ================================================
	# Open an srt file and read the lines into a list
	with open(srtFile, 'r', encoding='utf-8-sig') as f:
	lines = f.readlines()

	# Matches the following example with regex: 00:00:20,130 --> 00:00:23,419
	subtitleTimeLineRegex = re.compile(r'\d\d:\d\d:\d\d,\d\d\d --> \d\d:\d\d:\d\d,\d\d\d')

	# Create a dictionary
	subsDict = {}

	# Enumerate lines, and if a line in lines contains only an integer, put that number in the key, and a dictionary in the value
	# The dictionary contains the start, ending, and duration of the subtitles as well as the text
	# The next line uses the syntax HH:MM:SS,MMM --> HH:MM:SS,MMM . Get the difference between the two times and put that in the dictionary
	# For the line after that, put the text in the dictionary
	for lineNum, line in enumerate(lines):
	line = line.strip()
	# If line has no text
	if line.isdigit() and subtitleTimeLineRegex.match(lines[lineNum + 1]):
	lineWithTimestamps = lines[lineNum + 1].strip()
	lineWithSubtitleText = lines[lineNum + 2].strip()

	# If there are more lines after the subtitle text, add them to the text
	count = 3
	while True:
	# Check if the next line is blank or not
	if (lineNum+count) < len(lines) and lines[lineNum + count].strip():
	lineWithSubtitleText += ' ' + lines[lineNum + count].strip()
	count += 1
	else:
	break

	# Create empty dictionary with keys for start and end times and subtitle text
	subsDict[line] = {'start_ms': '', 'end_ms': '', 'duration_ms': '', 'text': '', 'break_until_next': ''}

	time = lineWithTimestamps.split(' --> ')
	time1 = time[0].split(':')
	time2 = time[1].split(':')
	# Converts the time to milliseconds
	processedTime1 = int(time1[0]) * 3600000 + int(time1[1]) * 60000 + int(time1[2].split(',')[0]) * 1000 + int(time1[2].split(',')[1]) #/ 1000 #Uncomment to turn into seconds
	processedTime2 = int(time2[0]) * 3600000 + int(time2[1]) * 60000 + int(time2[2].split(',')[0]) * 1000 + int(time2[2].split(',')[1]) #/ 1000 #Uncomment to turn into seconds
	timeDifferenceMs = str(processedTime2 - processedTime1)
	# Set the keys in the dictionary to the values
	subsDict[line]['start_ms'] = str(processedTime1)
	subsDict[line]['end_ms'] = str(processedTime2)
	subsDict[line]['duration_ms'] = timeDifferenceMs
	subsDict[line]['text'] = lineWithSubtitleText
	if lineNum > 0:
	# Goes back to previous line's dictionary and writes difference in time to current line
	subsDict[str(int(line)-1)]['break_until_next'] = str(processedTime1 - int(subsDict[str(int(line) - 1)]['end_ms']))
	else:
	subsDict[line]['break_until_next'] = '0'

	#=========================================== Create SSML File ============================================
	# Make voice tag if applicable
	if voiceName is None or voiceName == '' or voiceName.lower() == 'none':
	voiceTag = ''
	voiceTagEnd = ''
	else:
	voiceTag = f'<voice xml:lang="{language}" xml:gender="{gender}" name="' + voiceName + '">'
	voiceTagEnd = '</voice>'

	# Encoding with utf-8-sig adds BOM to the beginning of the file, because use with Azure requires it
	output_string = f'<speak version="{ssmlVersion}" xml:lang="{language}">{voiceTag}\n'

	for key, value in subsDict.items():
	if not value['break_until_next'] or value['break_until_next'] == '0':
	breakTimeString = ''
	else:
	breakTime = str(value['break_until_next'])
	breakTimeString = f'<break time="{breakTime}ms"/>'

	text = escapeChars(enableCharacterEscape, value['text'])
	translated_text = translate_text(text, from_lang, to_lang)
	texToWrite = (f'\t<prosody {durationAttributeName}="{value["duration_ms"]}ms">{translated_text}</prosody>{breakTimeString}\n')
	output_string += texToWrite

	output_string += f'{voiceTagEnd}</speak>'
	with open(outputFile, 'w', encoding=chosenFileEncoding) as f:
	f.write(output_string)

	return output_string