import gradio as gr import whisper import torch import string # Load Whisper model model = whisper.load_model("base") # Default images DEFAULT_SPACE_IMAGE = 'https://asl-hands.s3.amazonaws.com/gifs/png-smiling-face-smiley-png-3896.png' # Create a blank image for space PLACEHOLDER_IMAGE = 'https://asl-hands.s3.amazonaws.com/placeholder.png' # Create a placeholder image # ASL dictionary mapping letters and numbers to corresponding S3 images asl_images = { 'A': 'https://asl-hands.s3.amazonaws.com/gifs/A-Sign-Language-Alphabet.gif', 'B': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-B-in-Sign-Language-ASL.gif', 'C': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-say-letter-C-in-ASL-sign-Language.gif', 'D': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-D-in-Sign-Language-ASL.gif', 'E': 'https://asl-hands.s3.amazonaws.com/gifs/The-Letter-E-in-Sign-Language.gif', 'F': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-F-in-Sign-Language-ASL.gif', 'G': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-G-in-Sign-Language-ASL.gif', 'H': 'https://asl-hands.s3.amazonaws.com/gifs/H-in-Sign-Language-Alphabet.gif', 'I': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-I-in-Sign-Language-ASL.gif', 'J': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-J-in-ASL-Alphabets.gif', 'K': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-J-in-ASL-Alphabets.gif', 'L': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-L-in-ASL-Alphabets.gif', 'M': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-M-in-ASL-Alphabets.gif', 'N': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-N-in-ASL-Alphabets.gif', 'O': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-O-in-ASL-Alphabets.gif', 'P': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-P-in-ASL-Alphabets.gif', 'Q': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Q-in-ASL-Alphabets.gif', 'R': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-R-in-ASL-Alphabets.gif', 'S': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-S-in-ASL-Alphabets.gif', 'T': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-T-in-ASL-Alphabets.gif', 'U': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-U-in-ASL-Alphabets.gif', 'V': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-V-in-ASL-Alphabets.gif', 'W': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-W-in-ASL-Alphabets.gif', 'X': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-X-in-ASL-Alphabets.gif', 'Y': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Y-in-ASL-Alphabets.gif', 'Z': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Z-in-ASL-Alphabets.gif', '1': 'https://asl-hands.s3.amazonaws.com/LSQ_1.jpg', '2': 'https://asl-hands.s3.amazonaws.com/LSQ_2.jpg', '3': 'https://asl-hands.s3.amazonaws.com/LSQ_3.jpg', '4': 'https://asl-hands.s3.amazonaws.com/LSQ_4.jpg', '5': 'https://asl-hands.s3.amazonaws.com/LSQ_5.jpg', '6': 'https://asl-hands.s3.amazonaws.com/LSQ_6.jpg', '7': 'https://asl-hands.s3.amazonaws.com/LSQ_7.jpg', '8': 'https://asl-hands.s3.amazonaws.com/LSQ_8.jpg', '9': 'https://asl-hands.s3.amazonaws.com/LSQ_9.jpg', '10': 'https://asl-hands.s3.amazonaws.com/LSQ_10.jpg' } # Ensure 'SPACE' is in the dictionary asl_images['SPACE'] = DEFAULT_SPACE_IMAGE # Transcribe the audio file using Whisper def transcribe_audio(audio): audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(model.device) _, probs = model.detect_language(mel) print(f"Detected language: {max(probs, key=probs.get)}") options = whisper.DecodingOptions(fp16=False) result = whisper.decode(model, mel, options) return result.text # Convert text to ASL images with corresponding letters, adding spaces between words def text_to_asl_images(text): # Remove punctuation text = text.translate(str.maketrans('', '', string.punctuation)) images_with_text = [] words = text.upper().split() for i, word in enumerate(words): for char in word: image_url = asl_images.get(char, PLACEHOLDER_IMAGE) images_with_text.append((image_url, f"{char}")) if i < len(words) - 1: # Don't add space after the last word images_with_text.append((DEFAULT_SPACE_IMAGE, "␣")) return images_with_text # Gradio interface for audio input def interface_audio(audio): transcription = transcribe_audio(audio) asl_translation = text_to_asl_images(transcription) return transcription, asl_translation # Gradio interface for text input def interface_text(text): asl_translation = text_to_asl_images(text) return asl_translation # Custom CSS for layout and scrolling with smaller images custom_css = """ #asl-output, #asl-output-text { overflow-x: hidden; overflow-y: auto; max-height: 400px; padding: 10px; } .gallery { display: flex; flex-wrap: wrap; justify-content: flex-start; gap: 5px; } .gallery > div { flex: 0 0 auto; width: 60px; height: 80px; display: flex; flex-direction: column; align-items: center; justify-content: center; border: 1px solid #ddd; border-radius: 4px; padding: 2px; } .gallery img { max-width: 100%; max-height: 60px; object-fit: contain; } .gallery .caption { font-size: 12px; margin-top: 2px; } """ # Gradio Blocks Interface with gr.Blocks(css=custom_css) as demo: gr.Markdown("# Whisper & ASL Translation App") with gr.Tab("Audio Input"): with gr.Row(): audio_input = gr.Audio(type="filepath", label="Record or Upload Audio") with gr.Row(): submit_btn_audio = gr.Button("Transcribe and Translate") with gr.Row(): transcription_output = gr.Textbox(label="Transcription") with gr.Row(): asl_output = gr.Gallery(label="ASL Translation", elem_id="asl-output", columns=10) submit_btn_audio.click(interface_audio, inputs=audio_input, outputs=[transcription_output, asl_output]) with gr.Tab("Text Input"): with gr.Row(): text_input = gr.Textbox(label="Enter text for ASL translation") with gr.Row(): submit_btn_text = gr.Button("Translate to ASL") with gr.Row(): asl_output_text = gr.Gallery(label="ASL Translation", elem_id="asl-output-text", columns=10) submit_btn_text.click(interface_text, inputs=text_input, outputs=asl_output_text) # Run the Gradio app demo.launch()