Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisper | |
import torch | |
import string | |
# Load Whisper model | |
model = whisper.load_model("base") | |
# Default images | |
DEFAULT_SPACE_IMAGE = 'https://asl-hands.s3.amazonaws.com/gifs/png-smiling-face-smiley-png-3896.png' # Create a blank image for space | |
PLACEHOLDER_IMAGE = 'https://asl-hands.s3.amazonaws.com/placeholder.png' # Create a placeholder image | |
# ASL dictionary mapping letters and numbers to corresponding S3 images | |
asl_images = { | |
'A': 'https://asl-hands.s3.amazonaws.com/gifs/A-Sign-Language-Alphabet.gif', | |
'B': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-B-in-Sign-Language-ASL.gif', | |
'C': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-say-letter-C-in-ASL-sign-Language.gif', | |
'D': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-D-in-Sign-Language-ASL.gif', | |
'E': 'https://asl-hands.s3.amazonaws.com/gifs/The-Letter-E-in-Sign-Language.gif', | |
'F': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-F-in-Sign-Language-ASL.gif', | |
'G': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-G-in-Sign-Language-ASL.gif', | |
'H': 'https://asl-hands.s3.amazonaws.com/gifs/H-in-Sign-Language-Alphabet.gif', | |
'I': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-I-in-Sign-Language-ASL.gif', | |
'J': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-J-in-ASL-Alphabets.gif', | |
'K': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-J-in-ASL-Alphabets.gif', | |
'L': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-L-in-ASL-Alphabets.gif', | |
'M': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-M-in-ASL-Alphabets.gif', | |
'N': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-N-in-ASL-Alphabets.gif', | |
'O': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-O-in-ASL-Alphabets.gif', | |
'P': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-P-in-ASL-Alphabets.gif', | |
'Q': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Q-in-ASL-Alphabets.gif', | |
'R': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-R-in-ASL-Alphabets.gif', | |
'S': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-S-in-ASL-Alphabets.gif', | |
'T': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-T-in-ASL-Alphabets.gif', | |
'U': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-U-in-ASL-Alphabets.gif', | |
'V': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-V-in-ASL-Alphabets.gif', | |
'W': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-W-in-ASL-Alphabets.gif', | |
'X': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-X-in-ASL-Alphabets.gif', | |
'Y': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Y-in-ASL-Alphabets.gif', | |
'Z': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Z-in-ASL-Alphabets.gif', | |
'1': 'https://asl-hands.s3.amazonaws.com/LSQ_1.jpg', | |
'2': 'https://asl-hands.s3.amazonaws.com/LSQ_2.jpg', | |
'3': 'https://asl-hands.s3.amazonaws.com/LSQ_3.jpg', | |
'4': 'https://asl-hands.s3.amazonaws.com/LSQ_4.jpg', | |
'5': 'https://asl-hands.s3.amazonaws.com/LSQ_5.jpg', | |
'6': 'https://asl-hands.s3.amazonaws.com/LSQ_6.jpg', | |
'7': 'https://asl-hands.s3.amazonaws.com/LSQ_7.jpg', | |
'8': 'https://asl-hands.s3.amazonaws.com/LSQ_8.jpg', | |
'9': 'https://asl-hands.s3.amazonaws.com/LSQ_9.jpg', | |
'10': 'https://asl-hands.s3.amazonaws.com/LSQ_10.jpg' | |
} | |
# Ensure 'SPACE' is in the dictionary | |
asl_images['SPACE'] = DEFAULT_SPACE_IMAGE | |
# Transcribe the audio file using Whisper | |
def transcribe_audio(audio): | |
audio = whisper.load_audio(audio) | |
audio = whisper.pad_or_trim(audio) | |
mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
_, probs = model.detect_language(mel) | |
print(f"Detected language: {max(probs, key=probs.get)}") | |
options = whisper.DecodingOptions(fp16=False) | |
result = whisper.decode(model, mel, options) | |
return result.text | |
# Convert text to ASL images with corresponding letters, adding spaces between words | |
def text_to_asl_images(text): | |
# Remove punctuation | |
text = text.translate(str.maketrans('', '', string.punctuation)) | |
images_with_text = [] | |
words = text.upper().split() | |
for i, word in enumerate(words): | |
for char in word: | |
image_url = asl_images.get(char, PLACEHOLDER_IMAGE) | |
images_with_text.append((image_url, f"{char}")) | |
if i < len(words) - 1: # Don't add space after the last word | |
images_with_text.append((DEFAULT_SPACE_IMAGE, "␣")) | |
return images_with_text | |
# Gradio interface for audio input | |
def interface_audio(audio): | |
transcription = transcribe_audio(audio) | |
asl_translation = text_to_asl_images(transcription) | |
return transcription, asl_translation | |
# Gradio interface for text input | |
def interface_text(text): | |
asl_translation = text_to_asl_images(text) | |
return asl_translation | |
# Custom CSS for layout and scrolling with smaller images | |
custom_css = """ | |
#asl-output, #asl-output-text { | |
overflow-x: hidden; | |
overflow-y: auto; | |
max-height: 400px; | |
padding: 10px; | |
} | |
.gallery { | |
display: flex; | |
flex-wrap: wrap; | |
justify-content: flex-start; | |
gap: 5px; | |
} | |
.gallery > div { | |
flex: 0 0 auto; | |
width: 60px; | |
height: 80px; | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
justify-content: center; | |
border: 1px solid #ddd; | |
border-radius: 4px; | |
padding: 2px; | |
} | |
.gallery img { | |
max-width: 100%; | |
max-height: 60px; | |
object-fit: contain; | |
} | |
.gallery .caption { | |
font-size: 12px; | |
margin-top: 2px; | |
} | |
""" | |
# Gradio Blocks Interface | |
with gr.Blocks(css=custom_css) as demo: | |
gr.Markdown("# Whisper & ASL Translation App") | |
with gr.Tab("Audio Input"): | |
with gr.Row(): | |
audio_input = gr.Audio(type="filepath", label="Record or Upload Audio") | |
with gr.Row(): | |
submit_btn_audio = gr.Button("Transcribe and Translate") | |
with gr.Row(): | |
transcription_output = gr.Textbox(label="Transcription") | |
with gr.Row(): | |
asl_output = gr.Gallery(label="ASL Translation", elem_id="asl-output", columns=10) | |
submit_btn_audio.click(interface_audio, inputs=audio_input, outputs=[transcription_output, asl_output]) | |
with gr.Tab("Text Input"): | |
with gr.Row(): | |
text_input = gr.Textbox(label="Enter text for ASL translation") | |
with gr.Row(): | |
submit_btn_text = gr.Button("Translate to ASL") | |
with gr.Row(): | |
asl_output_text = gr.Gallery(label="ASL Translation", elem_id="asl-output-text", columns=10) | |
submit_btn_text.click(interface_text, inputs=text_input, outputs=asl_output_text) | |
# Run the Gradio app | |
demo.launch() | |