vid / index.js
akhaliq's picture
akhaliq HF Staff
Upload index.js with huggingface_hub
aefc326 verified
import {
AutoProcessor,
AutoModelForImageTextToText,
TextStreamer,
} from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
let processor = null;
let model = null;
let videoFile = null;
let frames = [];
let captions = [];
// DOM Elements
const uploadArea = document.getElementById('uploadArea');
const videoInput = document.getElementById('videoInput');
const videoSection = document.getElementById('videoSection');
const videoPlayer = document.getElementById('videoPlayer');
const frameCanvas = document.getElementById('frameCanvas');
const processBtn = document.getElementById('processBtn');
const progressSection = document.getElementById('progressSection');
const progressFill = document.getElementById('progressFill');
const progressText = document.getElementById('progressText');
const resultsSection = document.getElementById('resultsSection');
const framesList = document.getElementById('framesList');
const deviceSelect = document.getElementById('deviceSelect');
// Check WebGPU support
async function checkWebGPU() {
if (!navigator.gpu) {
deviceSelect.querySelector('option[value="webgpu"]').disabled = true;
deviceSelect.value = 'wasm';
}
}
// Initialize model
async function initializeModel() {
try {
progressText.textContent = 'Loading processor...';
progressFill.style.width = '30%';
const model_id = 'onnx-community/FastVLM-0.5B-ONNX';
processor = await AutoProcessor.from_pretrained(model_id);
progressText.textContent = 'Loading model (this may take a moment)...';
progressFill.style.width = '60%';
const device = deviceSelect.value === 'webgpu' ? 'webgpu' : 'wasm';
model = await AutoModelForImageTextToText.from_pretrained(model_id, {
device: device,
dtype: {
embed_tokens: 'fp16',
vision_encoder: 'q4',
decoder_model_merged: 'q4',
},
});
progressFill.style.width = '100%';
progressText.textContent = 'Model loaded successfully!';
return true;
} catch (error) {
console.error('Error initializing model:', error);
progressText.textContent = 'Error loading model. Please refresh and try again.';
return false;
}
}
// Upload handling
uploadArea.addEventListener('click', () => videoInput.click());
uploadArea.addEventListener('dragover', (e) => {
e.preventDefault();
uploadArea.classList.add('dragover');
});
uploadArea.addEventListener('dragleave', () => {
uploadArea.classList.remove('dragover');
});
uploadArea.addEventListener('drop', (e) => {
e.preventDefault();
uploadArea.classList.remove('dragover');
const files = e.dataTransfer.files;
if (files.length > 0 && files[0].type.startsWith('video/')) {
handleVideoFile(files[0]);
}
});
videoInput.addEventListener('change', (e) => {
if (e.target.files.length > 0) {
handleVideoFile(e.target.files[0]);
}
});
function handleVideoFile(file) {
if (file.size > 100 * 1024 * 1024) {
alert('File size exceeds 100MB limit');
return;
}
videoFile = file;
const url = URL.createObjectURL(file);
videoPlayer.src = url;
videoSection.classList.remove('hidden');
resultsSection.classList.add('hidden');
frames = [];
captions = [];
}
// Extract frames from video
async function extractFrames() {
const interval = parseInt(document.getElementById('frameInterval').value);
const ctx = frameCanvas.getContext('2d');
const duration = videoPlayer.duration;
frames = [];
for (let time = 0; time < duration; time += interval) {
videoPlayer.currentTime = time;
await new Promise(resolve => {
videoPlayer.onseeked = resolve;
});
frameCanvas.width = videoPlayer.videoWidth;
frameCanvas.height = videoPlayer.videoHeight;
ctx.drawImage(videoPlayer, 0, 0);
const blob = await new Promise(resolve => {
frameCanvas.toBlob(resolve, 'image/jpeg', 0.9);
});
frames.push({
time: time,
blob: blob,
dataUrl: await blobToDataUrl(blob)
});
}
return frames;
}
function blobToDataUrl(blob) {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result);
reader.readAsDataURL(blob);
});
}
// Generate caption for a frame
async function generateCaption(imageDataUrl, frameIndex, totalFrames) {
try {
progressText.textContent = `Processing frame ${frameIndex + 1} of ${totalFrames}...`;
progressFill.style.width = `${((frameIndex + 1) / totalFrames) * 100}%`;
const messages = [
{
role: 'user',
content: '<image>Describe this video frame in detail. What is happening in this scene?',
},
];
const prompt = processor.apply_chat_template(messages, {
add_generation_prompt: true,
});
// Create image element from data URL
const img = new Image();
img.src = imageDataUrl;
await new Promise(resolve => img.onload = resolve);
const inputs = await processor(img, prompt, {
add_special_tokens: false,
});
let streamedText = '';
const outputs = await model.generate({
...inputs,
max_new_tokens: 256,
do_sample: false,
streamer: new TextStreamer(processor.tokenizer, {
skip_prompt: true,
skip_special_tokens: false,
callback_function: (text) => {
streamedText += text;
},
}),
});
const decoded = processor.batch_decode(
outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
{ skip_special_tokens: true }
);
return decoded[0];
} catch (error) {
console.error('Error generating caption:', error);
return 'Error generating caption for this frame';
}
}
// Process video
processBtn.addEventListener('click', async () => {
if (!videoFile) return;
processBtn.disabled = true;
progressSection.classList.remove('hidden');
resultsSection.classList.add('hidden');
try {
// Initialize model if not already loaded
if (!model || !processor) {
const success = await initializeModel();
if (!success) {
processBtn.disabled = false;
return;
}
}
// Extract frames
progressText.textContent = 'Extracting frames...';
progressFill.style.width = '20%';
frames = await extractFrames();
// Generate captions
captions = [];
for (let i = 0; i < frames.length; i++) {
const caption = await generateCaption(frames[i].dataUrl, i, frames.length);
captions.push({
time: frames[i].time,
caption: caption,
thumbnail: frames[i].dataUrl
});
// Update results in real-time
displayResults();
resultsSection.classList.remove('hidden');
}
progressText.textContent = 'Processing complete!';
setTimeout(() => {
progressSection.classList.add('hidden');
}, 2000);
} catch (error) {
console.error('Processing error:', error);
progressText.textContent = 'Error processing video. Please try again.';
}
processBtn.disabled = false;
});
// Display results
function displayResults() {
framesList.innerHTML = '';
captions.forEach((item, index) => {
const frameCard = document.createElement('div');
frameCard.className = 'frame-card';
const time = formatTime(item.time);
frameCard.innerHTML = `
<div class="frame-thumbnail">
<img src="${item.thumbnail}" alt="Frame at ${time}">
<span class="frame-time">${time}</span>
</div>
<div class="frame-caption">
<p>${item.caption}</p>
</div>
`;
framesList.appendChild(frameCard);
});
}
function formatTime(seconds) {
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
}
// Export functions
document.getElementById('exportJson').addEventListener('click', () => {
const data = JSON.stringify(captions, null, 2);
downloadFile(data, 'captions.json', 'application/json');
});
document.getElementById('exportSrt').addEventListener('click', () => {
let srt = '';
captions.forEach((item, index) => {
const startTime = formatSrtTime(item.time);
const endTime = formatSrtTime(item.time + 5);
srt += `${index + 1}\n${startTime} --> ${endTime}\n${item.caption}\n\n`;
});
downloadFile(srt, 'captions.srt', 'text/plain');
});
document.getElementById('exportTxt').addEventListener('click', () => {
let txt = '';
captions.forEach(item => {
txt += `[${formatTime(item.time)}] ${item.caption}\n\n`;
});
downloadFile(txt, 'captions.txt', 'text/plain');
});
function formatSrtTime(seconds) {
const hours = Math.floor(seconds / 3600);
const mins = Math.floor((seconds % 3600) / 60);
const secs = Math.floor(seconds % 60);
const ms = Math.floor((seconds % 1) * 1000);
return `${hours.toString().padStart(2, '0')}:${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')},${ms.toString().padStart(3, '0')}`;
}
function downloadFile(content, filename, type) {
const blob = new Blob([content], { type });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
a.click();
URL.revokeObjectURL(url);
}
// Initialize
checkWebGPU();