Spaces:

akhaliq
/

image-captioner-ai-newer

Running

App Files Files Community

akhaliq HF Staff commited on 14 days ago

Commit

c4d95e0

verified ·

1 Parent(s): 5950436

Upload index.js with huggingface_hub

Browse files

Files changed (1) hide show

index.js +256 -56

index.js CHANGED Viewed

@@ -1,76 +1,276 @@
-import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
-// Reference the elements that we will need
-const status = document.getElementById('status');
-const fileUpload = document.getElementById('upload');
-const imageContainer = document.getElementById('container');
-const example = document.getElementById('example');
-const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';
-// Create a new object detection pipeline
-status.textContent = 'Loading model...';
-const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
-status.textContent = 'Ready';
-example.addEventListener('click', (e) => {
-    e.preventDefault();
-    detect(EXAMPLE_URL);
 });
-fileUpload.addEventListener('change', function (e) {
     const file = e.target.files[0];
-    if (!file) {
         return;
     }
     const reader = new FileReader();
-    // Set up a callback when the file is loaded
-    reader.onload = e2 => detect(e2.target.result);
-    reader.readAsDataURL(file);
 });
-// Detect objects in the image
-async function detect(img) {
-    imageContainer.innerHTML = '';
-    imageContainer.style.backgroundImage = `url(${img})`;
-    status.textContent = 'Analysing...';
-    const output = await detector(img, {
-        threshold: 0.5,
-        percentage: true,
     });
-    status.textContent = '';
-    output.forEach(renderBox);
 }
-// Render a bounding box and label on the image
-function renderBox({ box, label }) {
-    const { xmax, xmin, ymax, ymin } = box;
-    // Generate a random color for the box
-    const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
-    // Draw the box
-    const boxElement = document.createElement('div');
-    boxElement.className = 'bounding-box';
-    Object.assign(boxElement.style, {
-        borderColor: color,
-        left: 100 * xmin + '%',
-        top: 100 * ymin + '%',
-        width: 100 * (xmax - xmin) + '%',
-        height: 100 * (ymax - ymin) + '%',
-    })
-    // Draw label
-    const labelElement = document.createElement('span');
-    labelElement.textContent = label;
-    labelElement.className = 'bounding-box-label';
-    labelElement.style.backgroundColor = color;
-    boxElement.appendChild(labelElement);
-    imageContainer.appendChild(boxElement);
 }

+import {
+  AutoProcessor,
+  AutoModelForImageTextToText,
+  load_image,
+  TextStreamer,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
+let processor = null;
+let model = null;
+let currentImageUrl = null;
+let isModelLoading = false;
+// DOM Elements
+const imageUrlInput = document.getElementById('image-url');
+const loadUrlBtn = document.getElementById('load-url-btn');
+const fileUpload = document.getElementById('file-upload');
+const imagePreview = document.getElementById('image-preview');
+const generateBtn = document.getElementById('generate-btn');
+const outputContainer = document.getElementById('output-container');
+const copyBtn = document.getElementById('copy-btn');
+const customPrompt = document.getElementById('custom-prompt');
+const maxTokensInput = document.getElementById('max-tokens');
+const deviceSelect = document.getElementById('device-select');
+const loadingOverlay = document.getElementById('loading-overlay');
+const loadingText = document.getElementById('loading-text');
+const tabBtns = document.querySelectorAll('.tab-btn');
+const tabContents = document.querySelectorAll('.tab-content');
+// Tab switching
+tabBtns.forEach(btn => {
+    btn.addEventListener('click', () => {
+        const targetTab = btn.dataset.tab;
+        tabBtns.forEach(b => b.classList.remove('active'));
+        tabContents.forEach(c => c.classList.remove('active'));
+        btn.classList.add('active');
+        document.getElementById(`${targetTab}-tab`).classList.add('active');
+    });
+});
+// Check WebGPU support
+async function checkWebGPUSupport() {
+    if (!navigator.gpu) {
+        deviceSelect.querySelector('option[value="webgpu"]').disabled = true;
+        deviceSelect.querySelector('option[value="webgpu"]').text += ' (Not Supported)';
+        deviceSelect.value = 'wasm';
+    }
+}
+// Initialize model
+async function initializeModel() {
+    if (isModelLoading || (processor && model)) return;
+    isModelLoading = true;
+    showLoading('Initializing AI model...');
+    try {
+        const model_id = 'onnx-community/FastVLM-0.5B-ONNX';
+        const device = deviceSelect.value;
+        loadingText.textContent = 'Loading processor...';
+        processor = await AutoProcessor.from_pretrained(model_id);
+        loadingText.textContent = 'Loading model weights...';
+        const modelOptions = {
+            dtype: {
+                embed_tokens: 'fp16',
+                vision_encoder: 'q4',
+                decoder_model_merged: 'q4',
+            }
+        };
+        if (device === 'webgpu') {
+            modelOptions.device = 'webgpu';
+        }
+        model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions);
+        hideLoading();
+        showNotification('Model loaded successfully!', 'success');
+    } catch (error) {
+        console.error('Model initialization error:', error);
+        showNotification('Failed to load model. Please try again.', 'error');
+        hideLoading();
+    } finally {
+        isModelLoading = false;
+    }
+}
+// Load image from URL
+loadUrlBtn.addEventListener('click', async () => {
+    const url = imageUrlInput.value.trim();
+    if (!url) {
+        showNotification('Please enter a valid image URL', 'error');
+        return;
+    }
+    try {
+        showLoading('Loading image...');
+        await loadImagePreview(url);
+        currentImageUrl = url;
+        generateBtn.disabled = false;
+        hideLoading();
+    } catch (error) {
+        showNotification('Failed to load image. Please check the URL.', 'error');
+        hideLoading();
+    }
 });
+// Handle file upload
+fileUpload.addEventListener('change', async (e) => {
     const file = e.target.files[0];
+    if (!file) return;
+    if (file.size > 10 * 1024 * 1024) {
+        showNotification('File size must be less than 10MB', 'error');
         return;
     }
     const reader = new FileReader();
+    reader.onload = async (event) => {
+        currentImageUrl = event.target.result;
+        await loadImagePreview(currentImageUrl);
+        generateBtn.disabled = false;
+    };
+    reader.readAsDataURL(file);
+});
+// Drag and drop
+const fileLabel = document.querySelector('.file-label');
+fileLabel.addEventListener('dragover', (e) => {
+    e.preventDefault();
+    fileLabel.classList.add('drag-over');
+});
+fileLabel.addEventListener('dragleave', () => {
+    fileLabel.classList.remove('drag-over');
+});
+fileLabel.addEventListener('drop', (e) => {
+    e.preventDefault();
+    fileLabel.classList.remove('drag-over');
+    const file = e.dataTransfer.files[0];
+    if (file && file.type.startsWith('image/')) {
+        fileUpload.files = e.dataTransfer.files;
+        fileUpload.dispatchEvent(new Event('change'));
+    }
 });
+// Load image preview
+async function loadImagePreview(url) {
+    const img = new Image();
+    img.src = url;
+    return new Promise((resolve, reject) => {
+        img.onload = () => {
+            imagePreview.innerHTML = '';
+            img.style.maxWidth = '100%';
+            img.style.maxHeight = '100%';
+            img.style.objectFit = 'contain';
+            imagePreview.appendChild(img);
+            resolve();
+        };
+        img.onerror = reject;
+    });
+}
+// Generate caption
+generateBtn.addEventListener('click', async () => {
+    if (!currentImageUrl) {
+        showNotification('Please load an image first', 'error');
+        return;
+    }
+    if (!processor || !model) {
+        await initializeModel();
+    }
+    generateBtn.disabled = true;
+    generateBtn.classList.add('loading');
+    outputContainer.innerHTML = '<div class="typing-indicator">Generating caption...</div>';
+    copyBtn.style.display = 'none';
+    try {
+        const promptText = customPrompt.value.trim() || 'Describe this image in detail.';
+        const messages = [
+            {
+                role: 'user',
+                content: `<image>${promptText}`,
+            },
+        ];
+        const prompt = processor.apply_chat_template(messages, {
+            add_generation_prompt: true,
+        });
+        const image = await load_image(currentImageUrl);
+        const inputs = await processor(image, prompt, {
+            add_special_tokens: false,
+        });
+        let streamedText = '';
+        outputContainer.innerHTML = '';
+        const outputs = await model.generate({
+            ...inputs,
+            max_new_tokens: parseInt(maxTokensInput.value),
+            do_sample: false,
+            streamer: new TextStreamer(processor.tokenizer, {
+                skip_prompt: true,
+                skip_special_tokens: false,
+                callback_function: (text) => {
+                    streamedText += text;
+                    outputContainer.textContent = streamedText;
+                    outputContainer.scrollTop = outputContainer.scrollHeight;
+                },
+            }),
+        });
+        const decoded = processor.batch_decode(
+            outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
+            { skip_special_tokens: true }
+        );
+        outputContainer.textContent = decoded[0];
+        copyBtn.style.display = 'inline-flex';
+        showNotification('Caption generated successfully!', 'success');
+    } catch (error) {
+        console.error('Generation error:', error);
+        outputContainer.innerHTML = '<div class="error">Failed to generate caption. Please try again.</div>';
+        showNotification('Generation failed. Please try again.', 'error');
+    } finally {
+        generateBtn.disabled = false;
+        generateBtn.classList.remove('loading');
+    }
+});
+// Copy caption
+copyBtn.addEventListener('click', () => {
+    const text = outputContainer.textContent;
+    navigator.clipboard.writeText(text).then(() => {
+        showNotification('Caption copied to clipboard!', 'success');
     });
+});
+// Helper functions
+function showLoading(text = 'Loading...') {
+    loadingText.textContent = text;
+    loadingOverlay.classList.add('active');
+}
+function hideLoading() {
+    loadingOverlay.classList.remove('active');
 }
+function showNotification(message, type = 'info') {
+    const notification = document.createElement('div');
+    notification.className = `notification ${type}`;
+    notification.textContent = message;
+    document.body.appendChild(notification);
+    setTimeout(() => {
+        notification.classList.add('show');
+    }, 10);
+    setTimeout(() => {
+        notification.classList.remove('show');
+        setTimeout(() => notification.remove(), 300);
+    }, 3000);
 }
+// Initialize
+checkWebGPUSupport();