import os
import gradio as gr
import requests
import json
import tempfile
from dotenv import load_dotenv

import spaces


def check_gpu_availability():
    """Check if GPU is available and return status"""
    return "🚀 Running on Hugging Face Spaces with GPU acceleration"


# Load environment variables
load_dotenv()

TIMESTAMP_INTERVAL = 2


@spaces.GPU
def create_transcript_file(transcript: str, original_filename: str = None):
    """Create a transcript file for download"""
    if not transcript or transcript.strip() == "":
        return None

    try:
        if original_filename:
            # Extract the base name without extension
            base_name = os.path.splitext(os.path.basename(original_filename))[0]
            filename = f"Transcript_{base_name}.txt"
        else:
            filename = "Transcript.txt"

        filepath = os.path.join(tempfile.gettempdir(), filename)

        with open(filepath, "w", encoding="utf-8") as f:
            f.write(transcript)

        print(f"Transcript file created: {filepath}")
        return filepath

    except Exception as e:
        print(f"Error creating transcript file: {e}")
        return None


@spaces.GPU
def transcribe_audio(audio_file):
    """
    Transcribe audio/video file using ElevenLabs API

    Args:
        audio_file: Uploaded audio/video file

    Returns:
        Transcribed text and optional download file
    """
    try:
        # Get API key from environment
        api_key = os.getenv("ELEVENLABS_API_KEY")
        if not api_key:
            return "❌ Error: No API key found. Please set ELEVENLABS_API_KEY in your .env file."

        if not audio_file:
            return "❌ Error: Please upload an audio or video file."

        # Prepare the request
        url = "https://api.elevenlabs.io/v1/speech-to-text"
        headers = {"xi-api-key": api_key}

        # Prepare the files and data for the request
        with open(audio_file, "rb") as audio_data:
            files = {
                "file": (os.path.basename(audio_file), audio_data),
                "model_id": (None, "scribe_v1"),
                "language_code": (None, "no"),
                "tag_audio_events": (None, "true"),
                "timestamps_granularity": (None, "word"),
                "file_format": (None, "other"),
                "webhook_metadata": (None, json.dumps({"start": "0", "end": "2"})),
            }

            # Make the API request
            response = requests.post(url, headers=headers, files=files)

            if response.status_code == 200:
                result = response.json()
                text = result.get("text", "No text found in response")

                # Process all words to include timestamps every 2 seconds
                timestamped_text = ""
                i = 0
                cur_text = ""
                for word in result.get("words", []):
                    text, start, end = (
                        word.get("text"),
                        word.get("start"),
                        word.get("end"),
                    )

                    print(i, ". ", f"<{text}>", start, end)

                    cur_text += f"{text} "

                    if start >= (i + 1) * TIMESTAMP_INTERVAL:
                        lower_bound, upper_bound = (
                            i * TIMESTAMP_INTERVAL,
                            (i + 1) * TIMESTAMP_INTERVAL,
                        )
                        start_minutes = lower_bound // 60
                        start_seconds = lower_bound % 60
                        end_minutes = upper_bound // 60
                        end_seconds = upper_bound % 60
                        timestamped_text += f"[{start_minutes:02d}:{start_seconds:02d} - {end_minutes:02d}:{end_seconds:02d}]\n{cur_text.strip()}\n\n"
                        i += 1
                        cur_text = ""

                display_text = timestamped_text

                # Create transcript file for download
                filepath = create_transcript_file(display_text, audio_file)

                return display_text, filepath
            else:
                return f"❌ API Error: {response.status_code} - {response.text}", None

    except Exception as e:
        return f"❌ Error during transcription: {str(e)}", None


@spaces.GPU
def create_interface():
    """Create and configure the Gradio interface"""

    with gr.Blocks(
        title="Audio/Video Transcriber",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            max-width: 800px !important;
            margin: 0 auto !important;
        }
        .main-header {
            text-align: center;
            margin-bottom: 2rem;
        }
        """,
    ) as interface:

        gr.HTML(
            f"""
        <div class="main-header">
            <h1>🎵 Audio/Video Transcriber</h1>
            <p>Upload audio or video files and transcribe them using ElevenLabs API</p>
            <p style="font-size: 0.9em; color: #666; margin-top: 10px;">{check_gpu_availability()}</p>
        </div>
        """
        )

        with gr.Row():
            with gr.Column(scale=2):
                # File upload component
                audio_input = gr.Audio(
                    label="Upload Audio/Video File",
                    type="filepath",
                )

                # Transcribe button
                transcribe_btn = gr.Button(
                    "🎤 Transcribe Audio/Video", variant="primary", size="lg"
                )

            with gr.Column(scale=2):
                # Output area
                output_text = gr.Textbox(
                    label="Transcription Result",
                    placeholder="Transcribed text will appear here...",
                    lines=15,
                    max_lines=20,
                    interactive=False,
                )

                # Download button
                download_btn = gr.DownloadButton(
                    label="📥 Download Transcript",
                    variant="secondary",
                    visible=True,
                )

        # Instructions and info
        with gr.Accordion("ℹ️ Instructions & Information", open=False):
            gr.HTML(
                """
            <div style="padding: 1rem;">
                <h3>How to use:</h3>
                <ol>
                    <li>Upload an audio or video file (supported formats: MP3, WAV, MP4, MOV, etc.)</li>
                    <li>Click "Transcribe Audio/Video"</li>
                </ol>
                
                <h3>Transcription Model:</h3>
                <p>This application uses the <strong>scribe_v1</strong> model for high-quality transcription.</p>
                
                <h3>API Key Setup:</h3>
                <p>Get your API key from <a href="https://elevenlabs.io/" target="_blank">ElevenLabs</a></p>
                <p>Create a .env file in the project root with: <code>ELEVENLABS_API_KEY=your_key_here</code></p>
            </div>
            """
            )

        # Connect the transcribe function
        transcribe_btn.click(
            fn=transcribe_audio,
            inputs=[audio_input],
            outputs=[output_text, download_btn],
        )

        # Example usage
        gr.Examples(
            examples=[],
            inputs=[audio_input],
            label="Example Files (upload your own files to test)",
        )

    return interface


# For Hugging Face Spaces, launch the interface directly
print("🚀 Starting Audio/Video Transcriber...")
print(f"📊 {check_gpu_availability()}")
print("📝 Make sure you have set your ELEVENLABS_API_KEY in the .env file")
print("🌐 Opening Gradio interface...")

interface = create_interface()
interface.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True)