<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Computer Agent Evaluation Viewer</title>
    <style>
        /* CSS styles here */
        body {
            font-family: Arial, sans-serif;
            margin: 0;
            padding: 20px;
            background-color: #f5f5f5;
        }
        .container {
            max-width: 1200px;
            margin: 0 auto;
            background-color: #fff;
            padding: 20px;
            border-radius: 8px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }
        h1, h2, h3 {
            color: #333;
        }
        select, input, button {
            padding: 8px 12px;
            margin: 5px 0;
            border: 1px solid #ddd;
            border-radius: 4px;
        }
        button {
            background-color: #4a6cf7;
            color: white;
            cursor: pointer;
            border: none;
        }
        button:hover {
            background-color: #3a5ce5;
        }
        button:disabled {
            background-color: #cccccc;
            cursor: not-allowed;
        }
        .row {
            display: flex;
            margin-bottom: 20px;
        }
        .col {
            flex: 1;
            padding: 0 10px;
        }
        .image-viewer {
            width: 100%;
            max-height: 500px;
            border: 1px solid #ddd;
            border-radius: 4px;
            overflow: hidden;
            margin-bottom: 10px;
            position: relative;
        }
        .image-viewer img {
            max-width: 100%;
            max-height: 450px;
            display: block;
            margin: 0 auto;
        }
        .image-controls {
            display: flex;
            justify-content: space-between;
            align-items: center;
            margin-top: 10px;
        }
        .nav-buttons {
            display: flex;
            gap: 10px;
        }
        .step {
            border: 1px solid #ddd;
            border-radius: 4px;
            margin-bottom: 10px;
            overflow: hidden;
        }
        .step-header {
            background-color: #f0f0f0;
            padding: 10px;
            font-weight: bold;
            cursor: pointer;
            display: flex;
            justify-content: space-between;
        }
        .step-content {
            padding: 15px;
            white-space: pre-wrap;
            font-family: monospace;
            background-color: #f9f9f9;
            max-height: 300px;
            overflow-y: auto;
        }
        .hidden {
            display: none;
        }
        .status-success {
            color: #22c55e;
            font-weight: bold;
        }
        .status-failure {
            color: #ef4444;
            font-weight: bold;
        }
        .tabs {
            display: flex;
            border-bottom: 1px solid #ddd;
            margin-bottom: 20px;
        }
        .tab {
            padding: 10px 20px;
            cursor: pointer;
            border-bottom: 2px solid transparent;
        }
        .tab.active {
            border-bottom-color: #4a6cf7;
            font-weight: bold;
        }
        .tab-content {
            display: none;
        }
        .tab-content.active {
            display: block;
        }
        pre {
            background-color: #f0f0f0;
            padding: 10px;
            border-radius: 4px;
            overflow-x: auto;
            white-space: pre-wrap;
        }
        .error-message {
            background-color: #fee2e2;
            color: #b91c1c;
            padding: 10px;
            border-radius: 4px;
            margin: 10px 0;
        }
        .loading {
            display: inline-block;
            width: 20px;
            height: 20px;
            border: 2px solid #f3f3f3;
            border-top: 2px solid #3498db;
            border-radius: 50%;
            animation: spin 1s linear infinite;
            margin-left: 10px;
        }
        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>Computer Agent Evaluation Viewer</h1>
        
        <!-- Path and Eval Selection -->
        <div style="margin-bottom: 20px; padding: 15px; background-color: #f0f0f0; border-radius: 8px;">
            <h2>Load Evaluation Data</h2>
            <div style="display: flex; gap: 10px; margin-top: 10px;">
                <input type="text" id="base-path" placeholder="Base directory path (leave empty for default)" 
                       style="flex-grow: 1; padding: 8px; border: 1px solid #ddd; border-radius: 4px;">
                <button id="refresh-evals-btn">Refresh</button>
            </div>
            <div style="margin-top: 10px;">
                <label for="eval-select">Select Evaluation:</label>
                <select id="eval-select" style="min-width: 300px;"></select>
            </div>
            <div id="load-status" style="margin-top: 10px; font-style: italic;"></div>
        </div>
        
        <!-- Example and Run Selectors -->
        <div class="row">
            <div class="col">
                <label for="example-select">Select Example:</label>
                <select id="example-select">
                    <option value="">-- Select Example --</option>
                </select>
            </div>
            <div class="col">
                <label for="run-select">Select Run:</label>
                <select id="run-select" disabled>
                    <option value="">-- Select Run --</option>
                </select>
            </div>
        </div>
        
        <!-- Task & Status Display -->
        <div id="run-details" class="hidden">
            <div>
                <h2>Task</h2>
                <pre id="task-text"></pre>
            </div>
            
            <div>
                <h2>Run Status</h2>
                <div id="status-display"></div>
            </div>
            
            <!-- Tabs -->
            <div class="tabs">
                <div class="tab active" data-tab="screenshots">Screenshots</div>
                <div class="tab" data-tab="agent-trace">Agent Trace</div>
                <div class="tab" data-tab="raw-json">Raw JSON</div>
            </div>
            
            <!-- Screenshots Tab -->
            <div id="screenshots-tab" class="tab-content active">
                <div id="no-images" class="hidden">
                    <p>No screenshots available for this run.</p>
                </div>
                <div id="image-container" class="image-viewer hidden">
                    <img id="current-image" src="" alt="Screenshot">
                    <p id="image-caption" class="text-center"></p>
                </div>
                <div class="image-controls hidden" id="image-controls">
                    <div class="nav-buttons">
                        <button id="prev-image">Previous</button>
                        <span id="image-counter">0 / 0</span>
                        <button id="next-image">Next</button>
                    </div>
                    <input type="range" id="image-slider" min="0" max="0" value="0" style="width: 100%">
                </div>
            </div>
            
            <!-- Agent Trace Tab -->
            <div id="agent-trace-tab" class="tab-content">
                <div id="agent-steps"></div>
            </div>
            
            <!-- Raw JSON Tab -->
            <div id="raw-json-tab" class="tab-content">
                <div id="json-loading-indicator" class="hidden">
                    <p>Loading metadata... <span class="loading"></span></p>
                </div>
                <div id="json-error" class="error-message hidden"></div>
                <pre id="raw-json"></pre>
            </div>
        </div>
    </div>

    <script>
        // Application state
        const appState = {
            basePath: '',
            evalId: null,
            currentExampleId: null,
            currentRunId: null,
            currentImages: [],
            currentImageIndex: 0,
            loadedData: {
                examples: {},
                runs: {},
                metadata: {},
                screenshots: {}
            }
        };
        
        // DOM elements
        const basePathInput = document.getElementById('base-path');
        const refreshEvalsBtn = document.getElementById('refresh-evals-btn');
        const evalSelect = document.getElementById('eval-select');
        const loadStatusDisplay = document.getElementById('load-status');
        const exampleSelect = document.getElementById('example-select');
        const runSelect = document.getElementById('run-select');
        const runDetails = document.getElementById('run-details');
        const taskText = document.getElementById('task-text');
        const statusDisplay = document.getElementById('status-display');
        const imageContainer = document.getElementById('image-container');
        const noImages = document.getElementById('no-images');
        const imageControls = document.getElementById('image-controls');
        const currentImage = document.getElementById('current-image');
        const imageCaption = document.getElementById('image-caption');
        const imageCounter = document.getElementById('image-counter');
        const imageSlider = document.getElementById('image-slider');
        const prevImage = document.getElementById('prev-image');
        const nextImage = document.getElementById('next-image');
        const agentSteps = document.getElementById('agent-steps');
        const rawJson = document.getElementById('raw-json');
        const jsonLoadingIndicator = document.getElementById('json-loading-indicator');
        const jsonError = document.getElementById('json-error');
        
        // Initialize by loading available evaluations
        refreshEvalsBtn.addEventListener('click', loadEvaluations);
        
        // Load evaluations from server
        async function loadEvaluations() {
            appState.basePath = basePathInput.value.trim();
            loadStatusDisplay.textContent = 'Loading evaluations...';
            refreshEvalsBtn.disabled = true;
            
            try {
                const response = await fetch(`/api/evals?path=${encodeURIComponent(appState.basePath)}`);
                if (!response.ok) {
                    const errorData = await response.json();
                    throw new Error(errorData.error || 'Failed to load evaluations');
                }
                
                const evals = await response.json();
                
                // Clear existing options
                evalSelect.innerHTML = '<option value="">-- Select Evaluation --</option>';
                
                // Add new options
                evals.forEach(evalId => {
                    const option = document.createElement('option');
                    option.value = evalId;
                    option.textContent = evalId;
                    evalSelect.appendChild(option);
                });
                
                loadStatusDisplay.textContent = `Loaded ${evals.length} evaluations`;
                
                // AUTO-SELECT LATEST EVALUATION
                if (evals.length > 0) {
                    // Sort evaluations to get the latest one
                    evals.sort().reverse();
                    evalSelect.value = evals[0];
                    // Trigger change event to load examples
                    evalSelect.dispatchEvent(new Event('change'));
                }
            } catch (err) {
                console.error('Error loading evaluations:', err);
                loadStatusDisplay.textContent = `Error: ${err.message}`;
            } finally {
                refreshEvalsBtn.disabled = false;
            }
        }
        
        // Handle evaluation selection
        evalSelect.addEventListener('change', async () => {
            appState.evalId = evalSelect.value;
            
            if (!appState.evalId) {
                exampleSelect.innerHTML = '<option value="">-- Select Example --</option>';
                exampleSelect.disabled = true;
                runSelect.innerHTML = '<option value="">-- Select Run --</option>';
                runSelect.disabled = true;
                runDetails.classList.add('hidden');
                return;
            }
            
            try {
                loadStatusDisplay.textContent = 'Loading examples...';
                evalSelect.disabled = true;
                
                const response = await fetch(`/api/eval/${appState.evalId}/examples?path=${encodeURIComponent(appState.basePath)}`);
                if (!response.ok) {
                    const errorData = await response.json();
                    throw new Error(errorData.error || 'Failed to load examples');
                }
                
                const examples = await response.json();
                appState.loadedData.examples = examples;
                
                // Update example dropdown
                exampleSelect.innerHTML = '<option value="">-- Select Example --</option>';
                
                for (const [exampleId, task] of Object.entries(examples)) {
                    const option = document.createElement('option');
                    option.value = exampleId;
                    option.textContent = exampleId;
                    option.title = task; // Show task as tooltip
                    exampleSelect.appendChild(option);
                }
                
                exampleSelect.disabled = false;
                runSelect.innerHTML = '<option value="">-- Select Run --</option>';
                runSelect.disabled = true;
                runDetails.classList.add('hidden');
                
                loadStatusDisplay.textContent = `Loaded ${Object.keys(examples).length} examples`;
                
                // AUTO-SELECT FIRST EXAMPLE
                if (Object.keys(examples).length > 0) {
                    const firstExampleId = Object.keys(examples)[0];
                    exampleSelect.value = firstExampleId;
                    // Trigger change event to load runs
                    exampleSelect.dispatchEvent(new Event('change'));
                }
            } catch (err) {
                console.error('Error loading examples:', err);
                loadStatusDisplay.textContent = `Error: ${err.message}`;
            } finally {
                evalSelect.disabled = false;
            }
        });
        
        // Example selection
        exampleSelect.addEventListener('change', async () => {
            appState.currentExampleId = exampleSelect.value;
            
            // Reset run selection
            runSelect.innerHTML = '<option value="">-- Select Run --</option>';
            
            if (!appState.currentExampleId) {
                runSelect.disabled = true;
                runDetails.classList.add('hidden');
                return;
            }
            
            try {
                loadStatusDisplay.textContent = 'Loading runs...';
                exampleSelect.disabled = true;
                
                const response = await fetch(`/api/eval/${appState.evalId}/example/${appState.currentExampleId}/runs?path=${encodeURIComponent(appState.basePath)}`);
                if (!response.ok) {
                    const errorData = await response.json();
                    throw new Error(errorData.error || 'Failed to load runs');
                }
                
                const runs = await response.json();
                appState.loadedData.runs[appState.currentExampleId] = runs;
                
                // SORT RUNS by ID (assuming run IDs have timestamps or sequence numbers)
                runs.sort((a, b) => a.id.localeCompare(b.id, undefined, {numeric: true}));
                
                // Update run dropdown with sorted runs
                runSelect.innerHTML = '<option value="">-- Select Run --</option>';
                runs.forEach(run => {
                    const option = document.createElement('option');
                    option.value = run.id;
                    option.textContent = `${run.id} (${run.status})`;
                    option.dataset.status = run.status;
                    runSelect.appendChild(option);
                });
                
                runSelect.disabled = false;
                runDetails.classList.add('hidden');
                
                loadStatusDisplay.textContent = `Loaded ${runs.length} runs`;
                
                // AUTO-SELECT FIRST RUN
                if (runs.length > 0) {
                    runSelect.value = runs[0].id;
                    // Trigger change event to load run data
                    runSelect.dispatchEvent(new Event('change'));
                }
            } catch (err) {
                console.error('Error loading runs:', err);
                loadStatusDisplay.textContent = `Error: ${err.message}`;
            } finally {
                exampleSelect.disabled = false;
            }
        });
        
        // Run selection
        runSelect.addEventListener('change', () => {
            appState.currentRunId = runSelect.value;
            
            if (appState.currentRunId && appState.currentExampleId) {
                loadRunData(appState.currentExampleId, appState.currentRunId);
                runDetails.classList.remove('hidden');
            } else {
                runDetails.classList.add('hidden');
            }
        });
        
        // Load run data
        async function loadRunData(exampleId, runId) {
            loadStatusDisplay.textContent = 'Loading run data...';
            runSelect.disabled = true;
            jsonLoadingIndicator.classList.remove('hidden');
            jsonError.classList.add('hidden');
            
            try {
                // Get metadata
                const metadataResponse = await fetch(`/api/eval/${appState.evalId}/example/${exampleId}/run/${runId}/metadata?path=${encodeURIComponent(appState.basePath)}`);
                let metadata;
                
                if (metadataResponse.ok) {
                    metadata = await metadataResponse.json();
                } else {
                    const errorData = await metadataResponse.json();
                    console.error('Error loading metadata:', errorData);
                    jsonError.textContent = `Error loading metadata: ${errorData.error || 'Unknown error'}`;
                    jsonError.classList.remove('hidden');
                    metadata = null;
                }
                
                appState.loadedData.metadata[exampleId] = appState.loadedData.metadata[exampleId] || {};
                appState.loadedData.metadata[exampleId][runId] = metadata;
                
                // Display task
                const task = appState.loadedData.examples[exampleId];
                taskText.textContent = task || "No task available";
                
                // Display status
                let statusHtml = "";
                
                if (metadata) {
                    if (metadata.status === 'completed') {
                        statusHtml = `<p><span class="status-success">✓ Completed successfully</span></p>`;
                    } else {
                        statusHtml = `<p><span class="status-failure">✗ Failed</span></p>`;
                        if (metadata.error_message) {
                            statusHtml += `<p>Error: ${metadata.error_message}</p>`;
                        }
                    }
                } else {
                    statusHtml = "<p>Status information not available</p>";
                }
                
                statusDisplay.innerHTML = statusHtml;
                
                // Get screenshots
                const screenshotsResponse = await fetch(`/api/eval/${appState.evalId}/example/${exampleId}/run/${runId}/screenshots?path=${encodeURIComponent(appState.basePath)}`);
                const screenshots = await screenshotsResponse.json();
                
                appState.loadedData.screenshots[exampleId] = appState.loadedData.screenshots[exampleId] || {};
                appState.loadedData.screenshots[exampleId][runId] = screenshots;
                
                // Load screenshots
                loadScreenshots(exampleId, runId);
                
                // Load agent trace
                renderAgentTrace(metadata);
                
                // Display raw JSON
                if (metadata) {
                    rawJson.textContent = JSON.stringify(metadata, null, 2);
                } else {
                    rawJson.textContent = "No metadata available";
                }
                
                // Show screenshots tab by default
                document.querySelector('.tab[data-tab="screenshots"]').click();
                
                loadStatusDisplay.textContent = 'Run data loaded successfully';
            } catch (err) {
                console.error('Error loading run data:', err);
                loadStatusDisplay.textContent = `Error: ${err.message}`;
                jsonError.textContent = `Error loading data: ${err.message}`;
                jsonError.classList.remove('hidden');
            } finally {
                jsonLoadingIndicator.classList.add('hidden');
                runSelect.disabled = false;
            }
        }
        
        // Load screenshots
        function loadScreenshots(exampleId, runId) {
            appState.currentImages = appState.loadedData.screenshots[exampleId]?.[runId] || [];
            
            if (appState.currentImages.length === 0) {
                imageContainer.classList.add('hidden');
                imageControls.classList.add('hidden');
                noImages.classList.remove('hidden');
                return;
            }
            
            // Setup image viewer
            noImages.classList.add('hidden');
            imageContainer.classList.remove('hidden');
            imageControls.classList.remove('hidden');
            
            // Configure slider
            imageSlider.min = 0;
            imageSlider.max = appState.currentImages.length - 1;
            imageSlider.value = 0;
            
            // Reset to first image
            appState.currentImageIndex = 0;
            updateImageDisplay();
        }
        
        // Update image display
        function updateImageDisplay() {
            if (appState.currentImages.length === 0) return;
            
            const image = appState.currentImages[appState.currentImageIndex];
            currentImage.src = image.path;
            imageCaption.textContent = image.name;
            imageCounter.textContent = `${appState.currentImageIndex + 1} / ${appState.currentImages.length}`;
            imageSlider.value = appState.currentImageIndex;
            
            // Update button states
            prevImage.disabled = appState.currentImageIndex === 0;
            nextImage.disabled = appState.currentImageIndex === appState.currentImages.length - 1;
        }
        
        // Image navigation
        prevImage.addEventListener('click', () => {
            if (appState.currentImageIndex > 0) {
                appState.currentImageIndex--;
                updateImageDisplay();
            }
        });
        
        nextImage.addEventListener('click', () => {
            if (appState.currentImageIndex < appState.currentImages.length - 1) {
                appState.currentImageIndex++;
                updateImageDisplay();
            }
        });
        
        imageSlider.addEventListener('input', () => {
            appState.currentImageIndex = parseInt(imageSlider.value);
            updateImageDisplay();
        });
        
        // Tab handling
        document.querySelectorAll('.tab').forEach(tab => {
            tab.addEventListener('click', () => {
                // Set active tab
                document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
                tab.classList.add('active');
                
                // Show active content
                const tabId = tab.getAttribute('data-tab');
                document.querySelectorAll('.tab-content').forEach(content => {
                    content.classList.remove('active');
                });
                document.getElementById(`${tabId}-tab`).classList.add('active');
            });
        });
        
        // Render agent trace - UPDATED to show all sections expanded and remove duplicated task title
        function renderAgentTrace(metadata) {
            agentSteps.innerHTML = '';
            
            if (!metadata || !metadata.summary || metadata.summary.length === 0) {
                agentSteps.innerHTML = '<p>No agent trace data available</p>';
                return;
            }
            
            // Process each step
            metadata.summary.forEach((step, index) => {
                const stepDiv = document.createElement('div');
                stepDiv.className = 'step';
                
                // Create step header
                const headerDiv = document.createElement('div');
                headerDiv.className = 'step-header';
                
                let headerText = `Step ${index}`;
                if (index === 0 && step.task) {
                    headerText = 'Task';
                } else if (step.model_output_message) {
                    headerText = 'Planning';
                } else if (step.tool_calls) {
                    headerText = `Action ${index}`;
                } else if (step.error) {
                    headerText = 'Error';
                }
                
                headerDiv.innerHTML = `<span>${headerText}</span><span>▲</span>`;
                stepDiv.appendChild(headerDiv);
                
                // Create step content
                const contentDiv = document.createElement('div');
                contentDiv.className = 'step-content';
                // Make all sections visible by default
                contentDiv.style.display = 'block';
                
                let contentHtml = '';
                
                // Task information - don't duplicate the title
                if (index === 0 && step.task) {
                    // Just show the task content without the "Task:" title
                    contentHtml += `${step.task}\n\n`;
                }
                
                // Model output and planning
                if (step.model_output_message && step.model_output_message.content) {
                    contentHtml += `<strong>Model Output:</strong>\n${step.model_output_message.content}\n\n`;
                    
                    if (step.plan) {
                        contentHtml += `<strong>Plan:</strong>\n${step.plan}\n\n`;
                    }
                }
                
                // Tool calls
                if (step.tool_calls && step.tool_calls.length > 0) {
                    step.tool_calls.forEach(toolCall => {
                        if (toolCall.function) {
                            contentHtml += `<strong>Tool Call:</strong> ${toolCall.function.name}\n`;
                            if (toolCall.function.arguments) {
                                contentHtml += `<strong>Arguments:</strong>\n${toolCall.function.arguments}\n\n`;
                            }
                        }
                    });
                }
                
                // Model reasoning
                if (step.model_output) {
                    contentHtml += `<strong>Model Reasoning:</strong>\n${step.model_output}\n\n`;
                }
                
                // Observations
                if (step.observations) {
                    contentHtml += `<strong>Observations:</strong>\n${step.observations}\n\n`;
                }
                
                // Action output
                if (step.action_output) {
                    contentHtml += `<strong>Action Output:</strong>\n${step.action_output}\n\n`;
                }
                
                // Errors
                if (step.error) {
                    contentHtml += `<strong>Error Type:</strong> ${step.error.type || 'Unknown'}\n`;
                    if (step.error.message) {
                        contentHtml += `<strong>Error Message:</strong> ${step.error.message}\n`;
                    }
                }
                
                contentDiv.textContent = contentHtml || "No content available for this step";
                stepDiv.appendChild(contentDiv);
                
                // Add click handler to toggle content
                headerDiv.addEventListener('click', () => {
                    const isHidden = contentDiv.style.display === 'none';
                    contentDiv.style.display = isHidden ? 'block' : 'none';
                    headerDiv.querySelector('span:last-child').textContent = isHidden ? '▲' : '▼';
                });
                
                agentSteps.appendChild(stepDiv);
            });
            
            // No need to expand the first step by default since all are now expanded
        }
        
        // Handle keyboard navigation for images
        document.addEventListener('keydown', (e) => {
            if (!appState.currentImages || appState.currentImages.length === 0) return;
            
            // Check if the screenshots tab is active
            const screenshotsTab = document.getElementById('screenshots-tab');
            if (!screenshotsTab.classList.contains('active')) return;
            
            if (e.key === 'ArrowLeft' && appState.currentImageIndex > 0) {
                appState.currentImageIndex--;
                updateImageDisplay();
            } else if (e.key === 'ArrowRight' && appState.currentImageIndex < appState.currentImages.length - 1) {
                appState.currentImageIndex++;
                updateImageDisplay();
            }
        });
        
        // Load evaluations on page load
        document.addEventListener('DOMContentLoaded', loadEvaluations);
    </script>
</body>
</html>