import gradio as gr
import whisper
import torch
import string

# Load Whisper model
model = whisper.load_model("base")

# Default images
DEFAULT_SPACE_IMAGE = 'https://asl-hands.s3.amazonaws.com/gifs/png-smiling-face-smiley-png-3896.png'  # Create a blank image for space
PLACEHOLDER_IMAGE = 'https://asl-hands.s3.amazonaws.com/placeholder.png'  # Create a placeholder image

# ASL dictionary mapping letters and numbers to corresponding S3 images
asl_images = {
    'A': 'https://asl-hands.s3.amazonaws.com/gifs/A-Sign-Language-Alphabet.gif',
    'B': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-B-in-Sign-Language-ASL.gif',
    'C': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-say-letter-C-in-ASL-sign-Language.gif',
    'D': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-D-in-Sign-Language-ASL.gif',
    'E': 'https://asl-hands.s3.amazonaws.com/gifs/The-Letter-E-in-Sign-Language.gif',
    'F': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-F-in-Sign-Language-ASL.gif',
    'G': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-G-in-Sign-Language-ASL.gif',
    'H': 'https://asl-hands.s3.amazonaws.com/gifs/H-in-Sign-Language-Alphabet.gif',
    'I': 'https://asl-hands.s3.amazonaws.com/gifs/What-is-I-in-Sign-Language-ASL.gif',
    'J': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-J-in-ASL-Alphabets.gif',
    'K': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-J-in-ASL-Alphabets.gif',
    'L': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-L-in-ASL-Alphabets.gif',
    'M': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-M-in-ASL-Alphabets.gif',
    'N': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-N-in-ASL-Alphabets.gif',
    'O': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-O-in-ASL-Alphabets.gif',
    'P': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-P-in-ASL-Alphabets.gif',
    'Q': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Q-in-ASL-Alphabets.gif',
    'R': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-R-in-ASL-Alphabets.gif',
    'S': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-S-in-ASL-Alphabets.gif',
    'T': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-T-in-ASL-Alphabets.gif',
    'U': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-U-in-ASL-Alphabets.gif',
    'V': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-V-in-ASL-Alphabets.gif',
    'W': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-W-in-ASL-Alphabets.gif',
    'X': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-X-in-ASL-Alphabets.gif',
    'Y': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Y-in-ASL-Alphabets.gif',
    'Z': 'https://asl-hands.s3.amazonaws.com/gifs/How-to-Say-Letter-Z-in-ASL-Alphabets.gif',
    '1': 'https://asl-hands.s3.amazonaws.com/LSQ_1.jpg',
    '2': 'https://asl-hands.s3.amazonaws.com/LSQ_2.jpg',
    '3': 'https://asl-hands.s3.amazonaws.com/LSQ_3.jpg',
    '4': 'https://asl-hands.s3.amazonaws.com/LSQ_4.jpg',
    '5': 'https://asl-hands.s3.amazonaws.com/LSQ_5.jpg',
    '6': 'https://asl-hands.s3.amazonaws.com/LSQ_6.jpg',
    '7': 'https://asl-hands.s3.amazonaws.com/LSQ_7.jpg',
    '8': 'https://asl-hands.s3.amazonaws.com/LSQ_8.jpg',
    '9': 'https://asl-hands.s3.amazonaws.com/LSQ_9.jpg',
    '10': 'https://asl-hands.s3.amazonaws.com/LSQ_10.jpg'
}

# Ensure 'SPACE' is in the dictionary
asl_images['SPACE'] = DEFAULT_SPACE_IMAGE

# Transcribe the audio file using Whisper
def transcribe_audio(audio):
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    
    _, probs = model.detect_language(mel)
    print(f"Detected language: {max(probs, key=probs.get)}")
    
    options = whisper.DecodingOptions(fp16=False)
    result = whisper.decode(model, mel, options)
    return result.text

# Convert text to ASL images with corresponding letters, adding spaces between words
def text_to_asl_images(text):
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    images_with_text = []
    words = text.upper().split()
    for i, word in enumerate(words):
        for char in word:
            image_url = asl_images.get(char, PLACEHOLDER_IMAGE)
            images_with_text.append((image_url, f"{char}"))
        if i < len(words) - 1:  # Don't add space after the last word
            images_with_text.append((DEFAULT_SPACE_IMAGE, "␣"))
    return images_with_text

# Gradio interface for audio input
def interface_audio(audio):
    transcription = transcribe_audio(audio)
    asl_translation = text_to_asl_images(transcription)
    return transcription, asl_translation

# Gradio interface for text input
def interface_text(text):
    asl_translation = text_to_asl_images(text)
    return asl_translation

# Custom CSS for layout and scrolling with smaller images
custom_css = """
#asl-output, #asl-output-text {
    overflow-x: hidden;
    overflow-y: auto;
    max-height: 400px;
    padding: 10px;
}
.gallery {
    display: flex;
    flex-wrap: wrap;
    justify-content: flex-start;
    gap: 5px;
}
.gallery > div {
    flex: 0 0 auto;
    width: 60px;
    height: 80px;
    display: flex;
    flex-direction: column;
    align-items: center;
    justify-content: center;
    border: 1px solid #ddd;
    border-radius: 4px;
    padding: 2px;
}
.gallery img {
    max-width: 100%;
    max-height: 60px;
    object-fit: contain;
}
.gallery .caption {
    font-size: 12px;
    margin-top: 2px;
}
"""

# Gradio Blocks Interface
with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("# Whisper & ASL Translation App")
    
    with gr.Tab("Audio Input"):
        with gr.Row():
            audio_input = gr.Audio(type="filepath", label="Record or Upload Audio")
        
        with gr.Row():
            submit_btn_audio = gr.Button("Transcribe and Translate")
        
        with gr.Row():
            transcription_output = gr.Textbox(label="Transcription")
        
        with gr.Row():
            asl_output = gr.Gallery(label="ASL Translation", elem_id="asl-output", columns=10)

        submit_btn_audio.click(interface_audio, inputs=audio_input, outputs=[transcription_output, asl_output])

    with gr.Tab("Text Input"):
        with gr.Row():
            text_input = gr.Textbox(label="Enter text for ASL translation")
        
        with gr.Row():
            submit_btn_text = gr.Button("Translate to ASL")
        
        with gr.Row():
            asl_output_text = gr.Gallery(label="ASL Translation", elem_id="asl-output-text", columns=10)

        submit_btn_text.click(interface_text, inputs=text_input, outputs=asl_output_text)

# Run the Gradio app
demo.launch()