Spaces:

SentiLab01
/

asltranslator

Sleeping

App Files Files Community

asltranslator / app.py

SentiLab01

Update app.py

4c61d44 verified 5 months ago

raw

history blame contribute delete

6.79 kB

	import gradio as gr
	import whisper
	import torch
	import string

	# Load Whisper model
	model = whisper.load_model("base")

	# Default images
	DEFAULT_SPACE_IMAGE = 'https://asl-hands.s3.amazonaws.com/gifs/png-smiling-face-smiley-png-3896.png' # Create a blank image for space
	PLACEHOLDER_IMAGE = 'https://asl-hands.s3.amazonaws.com/placeholder.png' # Create a placeholder image

	# ASL dictionary mapping letters and numbers to corresponding S3 images
	asl_images = {
	'A': 'https://asl-hands.s3.amazonaws.com/gifs/A-Sign-Language-Alphabet.gif',
	'B': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-B-in-Sign-Language-ASL.gif',
	'C': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-say-letter-C-in-ASL-sign-Language.gif',
	'D': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-D-in-Sign-Language-ASL.gif',
	'E': 'https://asl-hands.s3.amazonaws.com/gifs/The-Letter-E-in-Sign-Language.gif',
	'F': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-F-in-Sign-Language-ASL.gif',
	'G': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-G-in-Sign-Language-ASL.gif',
	'H': 'https://asl-hands.s3.amazonaws.com/gifs/H-in-Sign-Language-Alphabet.gif',
	'I': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-I-in-Sign-Language-ASL.gif',
	'J': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-J-in-ASL-Alphabets.gif',
	'K': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-J-in-ASL-Alphabets.gif',
	'L': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-L-in-ASL-Alphabets.gif',
	'M': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-M-in-ASL-Alphabets.gif',
	'N': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-N-in-ASL-Alphabets.gif',
	'O': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-O-in-ASL-Alphabets.gif',
	'P': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-P-in-ASL-Alphabets.gif',
	'Q': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Q-in-ASL-Alphabets.gif',
	'R': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-R-in-ASL-Alphabets.gif',
	'S': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-S-in-ASL-Alphabets.gif',
	'T': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-T-in-ASL-Alphabets.gif',
	'U': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-U-in-ASL-Alphabets.gif',
	'V': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-V-in-ASL-Alphabets.gif',
	'W': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-W-in-ASL-Alphabets.gif',
	'X': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-X-in-ASL-Alphabets.gif',
	'Y': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Y-in-ASL-Alphabets.gif',
	'Z': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Z-in-ASL-Alphabets.gif',
	'1': 'https://asl-hands.s3.amazonaws.com/LSQ_1.jpg',
	'2': 'https://asl-hands.s3.amazonaws.com/LSQ_2.jpg',
	'3': 'https://asl-hands.s3.amazonaws.com/LSQ_3.jpg',
	'4': 'https://asl-hands.s3.amazonaws.com/LSQ_4.jpg',
	'5': 'https://asl-hands.s3.amazonaws.com/LSQ_5.jpg',
	'6': 'https://asl-hands.s3.amazonaws.com/LSQ_6.jpg',
	'7': 'https://asl-hands.s3.amazonaws.com/LSQ_7.jpg',
	'8': 'https://asl-hands.s3.amazonaws.com/LSQ_8.jpg',
	'9': 'https://asl-hands.s3.amazonaws.com/LSQ_9.jpg',
	'10': 'https://asl-hands.s3.amazonaws.com/LSQ_10.jpg'
	}

	# Ensure 'SPACE' is in the dictionary
	asl_images['SPACE'] = DEFAULT_SPACE_IMAGE

	# Transcribe the audio file using Whisper
	def transcribe_audio(audio):
	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)
	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	_, probs = model.detect_language(mel)
	print(f"Detected language: {max(probs, key=probs.get)}")

	options = whisper.DecodingOptions(fp16=False)
	result = whisper.decode(model, mel, options)
	return result.text

	# Convert text to ASL images with corresponding letters, adding spaces between words
	def text_to_asl_images(text):
	# Remove punctuation
	text = text.translate(str.maketrans('', '', string.punctuation))

	images_with_text = []
	words = text.upper().split()
	for i, word in enumerate(words):
	for char in word:
	image_url = asl_images.get(char, PLACEHOLDER_IMAGE)
	images_with_text.append((image_url, f"{char}"))
	if i < len(words) - 1: # Don't add space after the last word
	images_with_text.append((DEFAULT_SPACE_IMAGE, "␣"))
	return images_with_text

	# Gradio interface for audio input
	def interface_audio(audio):
	transcription = transcribe_audio(audio)
	asl_translation = text_to_asl_images(transcription)
	return transcription, asl_translation

	# Gradio interface for text input
	def interface_text(text):
	asl_translation = text_to_asl_images(text)
	return asl_translation

	# Custom CSS for layout and scrolling with smaller images
	custom_css = """
	#asl-output, #asl-output-text {
	overflow-x: hidden;
	overflow-y: auto;
	max-height: 400px;
	padding: 10px;
	}
	.gallery {
	display: flex;
	flex-wrap: wrap;
	justify-content: flex-start;
	gap: 5px;
	}
	.gallery > div {
	flex: 0 0 auto;
	width: 60px;
	height: 80px;
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	border: 1px solid #ddd;
	border-radius: 4px;
	padding: 2px;
	}
	.gallery img {
	max-width: 100%;
	max-height: 60px;
	object-fit: contain;
	}
	.gallery .caption {
	font-size: 12px;
	margin-top: 2px;
	}
	"""

	# Gradio Blocks Interface
	with gr.Blocks(css=custom_css) as demo:
	gr.Markdown("# Whisper & ASL Translation App")

	with gr.Tab("Audio Input"):
	with gr.Row():
	audio_input = gr.Audio(type="filepath", label="Record or Upload Audio")

	with gr.Row():
	submit_btn_audio = gr.Button("Transcribe and Translate")

	with gr.Row():
	transcription_output = gr.Textbox(label="Transcription")

	with gr.Row():
	asl_output = gr.Gallery(label="ASL Translation", elem_id="asl-output", columns=10)

	submit_btn_audio.click(interface_audio, inputs=audio_input, outputs=[transcription_output, asl_output])

	with gr.Tab("Text Input"):
	with gr.Row():
	text_input = gr.Textbox(label="Enter text for ASL translation")

	with gr.Row():
	submit_btn_text = gr.Button("Translate to ASL")

	with gr.Row():
	asl_output_text = gr.Gallery(label="ASL Translation", elem_id="asl-output-text", columns=10)

	submit_btn_text.click(interface_text, inputs=text_input, outputs=asl_output_text)

	# Run the Gradio app
	demo.launch()