Upload index.js with huggingface_hub
Browse files
index.js
CHANGED
@@ -1,76 +1,276 @@
|
|
1 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
const example = document.getElementById('example');
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
//
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
});
|
20 |
|
21 |
-
|
|
|
22 |
const file = e.target.files[0];
|
23 |
-
if (!file)
|
|
|
|
|
|
|
24 |
return;
|
25 |
}
|
26 |
-
|
27 |
const reader = new FileReader();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
|
30 |
-
|
|
|
31 |
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
});
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
//
|
37 |
-
async
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
45 |
});
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
}
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
width: 100 * (xmax - xmin) + '%',
|
65 |
-
height: 100 * (ymax - ymin) + '%',
|
66 |
-
})
|
67 |
-
|
68 |
-
// Draw label
|
69 |
-
const labelElement = document.createElement('span');
|
70 |
-
labelElement.textContent = label;
|
71 |
-
labelElement.className = 'bounding-box-label';
|
72 |
-
labelElement.style.backgroundColor = color;
|
73 |
-
|
74 |
-
boxElement.appendChild(labelElement);
|
75 |
-
imageContainer.appendChild(boxElement);
|
76 |
}
|
|
|
|
|
|
|
|
1 |
+
import {
|
2 |
+
AutoProcessor,
|
3 |
+
AutoModelForImageTextToText,
|
4 |
+
load_image,
|
5 |
+
TextStreamer,
|
6 |
+
} from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
|
7 |
|
8 |
+
let processor = null;
|
9 |
+
let model = null;
|
10 |
+
let currentImageUrl = null;
|
11 |
+
let isModelLoading = false;
|
|
|
12 |
|
13 |
+
// DOM Elements
|
14 |
+
const imageUrlInput = document.getElementById('image-url');
|
15 |
+
const loadUrlBtn = document.getElementById('load-url-btn');
|
16 |
+
const fileUpload = document.getElementById('file-upload');
|
17 |
+
const imagePreview = document.getElementById('image-preview');
|
18 |
+
const generateBtn = document.getElementById('generate-btn');
|
19 |
+
const outputContainer = document.getElementById('output-container');
|
20 |
+
const copyBtn = document.getElementById('copy-btn');
|
21 |
+
const customPrompt = document.getElementById('custom-prompt');
|
22 |
+
const maxTokensInput = document.getElementById('max-tokens');
|
23 |
+
const deviceSelect = document.getElementById('device-select');
|
24 |
+
const loadingOverlay = document.getElementById('loading-overlay');
|
25 |
+
const loadingText = document.getElementById('loading-text');
|
26 |
+
const tabBtns = document.querySelectorAll('.tab-btn');
|
27 |
+
const tabContents = document.querySelectorAll('.tab-content');
|
28 |
|
29 |
+
// Tab switching
|
30 |
+
tabBtns.forEach(btn => {
|
31 |
+
btn.addEventListener('click', () => {
|
32 |
+
const targetTab = btn.dataset.tab;
|
33 |
+
|
34 |
+
tabBtns.forEach(b => b.classList.remove('active'));
|
35 |
+
tabContents.forEach(c => c.classList.remove('active'));
|
36 |
+
|
37 |
+
btn.classList.add('active');
|
38 |
+
document.getElementById(`${targetTab}-tab`).classList.add('active');
|
39 |
+
});
|
40 |
+
});
|
41 |
|
42 |
+
// Check WebGPU support
|
43 |
+
async function checkWebGPUSupport() {
|
44 |
+
if (!navigator.gpu) {
|
45 |
+
deviceSelect.querySelector('option[value="webgpu"]').disabled = true;
|
46 |
+
deviceSelect.querySelector('option[value="webgpu"]').text += ' (Not Supported)';
|
47 |
+
deviceSelect.value = 'wasm';
|
48 |
+
}
|
49 |
+
}
|
50 |
+
|
51 |
+
// Initialize model
|
52 |
+
async function initializeModel() {
|
53 |
+
if (isModelLoading || (processor && model)) return;
|
54 |
+
|
55 |
+
isModelLoading = true;
|
56 |
+
showLoading('Initializing AI model...');
|
57 |
+
|
58 |
+
try {
|
59 |
+
const model_id = 'onnx-community/FastVLM-0.5B-ONNX';
|
60 |
+
const device = deviceSelect.value;
|
61 |
+
|
62 |
+
loadingText.textContent = 'Loading processor...';
|
63 |
+
processor = await AutoProcessor.from_pretrained(model_id);
|
64 |
+
|
65 |
+
loadingText.textContent = 'Loading model weights...';
|
66 |
+
const modelOptions = {
|
67 |
+
dtype: {
|
68 |
+
embed_tokens: 'fp16',
|
69 |
+
vision_encoder: 'q4',
|
70 |
+
decoder_model_merged: 'q4',
|
71 |
+
}
|
72 |
+
};
|
73 |
+
|
74 |
+
if (device === 'webgpu') {
|
75 |
+
modelOptions.device = 'webgpu';
|
76 |
+
}
|
77 |
+
|
78 |
+
model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions);
|
79 |
+
|
80 |
+
hideLoading();
|
81 |
+
showNotification('Model loaded successfully!', 'success');
|
82 |
+
} catch (error) {
|
83 |
+
console.error('Model initialization error:', error);
|
84 |
+
showNotification('Failed to load model. Please try again.', 'error');
|
85 |
+
hideLoading();
|
86 |
+
} finally {
|
87 |
+
isModelLoading = false;
|
88 |
+
}
|
89 |
+
}
|
90 |
+
|
91 |
+
// Load image from URL
|
92 |
+
loadUrlBtn.addEventListener('click', async () => {
|
93 |
+
const url = imageUrlInput.value.trim();
|
94 |
+
if (!url) {
|
95 |
+
showNotification('Please enter a valid image URL', 'error');
|
96 |
+
return;
|
97 |
+
}
|
98 |
+
|
99 |
+
try {
|
100 |
+
showLoading('Loading image...');
|
101 |
+
await loadImagePreview(url);
|
102 |
+
currentImageUrl = url;
|
103 |
+
generateBtn.disabled = false;
|
104 |
+
hideLoading();
|
105 |
+
} catch (error) {
|
106 |
+
showNotification('Failed to load image. Please check the URL.', 'error');
|
107 |
+
hideLoading();
|
108 |
+
}
|
109 |
});
|
110 |
|
111 |
+
// Handle file upload
|
112 |
+
fileUpload.addEventListener('change', async (e) => {
|
113 |
const file = e.target.files[0];
|
114 |
+
if (!file) return;
|
115 |
+
|
116 |
+
if (file.size > 10 * 1024 * 1024) {
|
117 |
+
showNotification('File size must be less than 10MB', 'error');
|
118 |
return;
|
119 |
}
|
120 |
+
|
121 |
const reader = new FileReader();
|
122 |
+
reader.onload = async (event) => {
|
123 |
+
currentImageUrl = event.target.result;
|
124 |
+
await loadImagePreview(currentImageUrl);
|
125 |
+
generateBtn.disabled = false;
|
126 |
+
};
|
127 |
+
reader.readAsDataURL(file);
|
128 |
+
});
|
129 |
+
|
130 |
+
// Drag and drop
|
131 |
+
const fileLabel = document.querySelector('.file-label');
|
132 |
+
fileLabel.addEventListener('dragover', (e) => {
|
133 |
+
e.preventDefault();
|
134 |
+
fileLabel.classList.add('drag-over');
|
135 |
+
});
|
136 |
|
137 |
+
fileLabel.addEventListener('dragleave', () => {
|
138 |
+
fileLabel.classList.remove('drag-over');
|
139 |
+
});
|
140 |
|
141 |
+
fileLabel.addEventListener('drop', (e) => {
|
142 |
+
e.preventDefault();
|
143 |
+
fileLabel.classList.remove('drag-over');
|
144 |
+
|
145 |
+
const file = e.dataTransfer.files[0];
|
146 |
+
if (file && file.type.startsWith('image/')) {
|
147 |
+
fileUpload.files = e.dataTransfer.files;
|
148 |
+
fileUpload.dispatchEvent(new Event('change'));
|
149 |
+
}
|
150 |
});
|
151 |
|
152 |
+
// Load image preview
|
153 |
+
async function loadImagePreview(url) {
|
154 |
+
const img = new Image();
|
155 |
+
img.src = url;
|
156 |
+
|
157 |
+
return new Promise((resolve, reject) => {
|
158 |
+
img.onload = () => {
|
159 |
+
imagePreview.innerHTML = '';
|
160 |
+
img.style.maxWidth = '100%';
|
161 |
+
img.style.maxHeight = '100%';
|
162 |
+
img.style.objectFit = 'contain';
|
163 |
+
imagePreview.appendChild(img);
|
164 |
+
resolve();
|
165 |
+
};
|
166 |
+
img.onerror = reject;
|
167 |
+
});
|
168 |
+
}
|
169 |
|
170 |
+
// Generate caption
|
171 |
+
generateBtn.addEventListener('click', async () => {
|
172 |
+
if (!currentImageUrl) {
|
173 |
+
showNotification('Please load an image first', 'error');
|
174 |
+
return;
|
175 |
+
}
|
176 |
+
|
177 |
+
if (!processor || !model) {
|
178 |
+
await initializeModel();
|
179 |
+
}
|
180 |
+
|
181 |
+
generateBtn.disabled = true;
|
182 |
+
generateBtn.classList.add('loading');
|
183 |
+
outputContainer.innerHTML = '<div class="typing-indicator">Generating caption...</div>';
|
184 |
+
copyBtn.style.display = 'none';
|
185 |
+
|
186 |
+
try {
|
187 |
+
const promptText = customPrompt.value.trim() || 'Describe this image in detail.';
|
188 |
+
const messages = [
|
189 |
+
{
|
190 |
+
role: 'user',
|
191 |
+
content: `<image>${promptText}`,
|
192 |
+
},
|
193 |
+
];
|
194 |
+
|
195 |
+
const prompt = processor.apply_chat_template(messages, {
|
196 |
+
add_generation_prompt: true,
|
197 |
+
});
|
198 |
+
|
199 |
+
const image = await load_image(currentImageUrl);
|
200 |
+
const inputs = await processor(image, prompt, {
|
201 |
+
add_special_tokens: false,
|
202 |
+
});
|
203 |
+
|
204 |
+
let streamedText = '';
|
205 |
+
outputContainer.innerHTML = '';
|
206 |
+
|
207 |
+
const outputs = await model.generate({
|
208 |
+
...inputs,
|
209 |
+
max_new_tokens: parseInt(maxTokensInput.value),
|
210 |
+
do_sample: false,
|
211 |
+
streamer: new TextStreamer(processor.tokenizer, {
|
212 |
+
skip_prompt: true,
|
213 |
+
skip_special_tokens: false,
|
214 |
+
callback_function: (text) => {
|
215 |
+
streamedText += text;
|
216 |
+
outputContainer.textContent = streamedText;
|
217 |
+
outputContainer.scrollTop = outputContainer.scrollHeight;
|
218 |
+
},
|
219 |
+
}),
|
220 |
+
});
|
221 |
+
|
222 |
+
const decoded = processor.batch_decode(
|
223 |
+
outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
|
224 |
+
{ skip_special_tokens: true }
|
225 |
+
);
|
226 |
+
|
227 |
+
outputContainer.textContent = decoded[0];
|
228 |
+
copyBtn.style.display = 'inline-flex';
|
229 |
+
showNotification('Caption generated successfully!', 'success');
|
230 |
+
|
231 |
+
} catch (error) {
|
232 |
+
console.error('Generation error:', error);
|
233 |
+
outputContainer.innerHTML = '<div class="error">Failed to generate caption. Please try again.</div>';
|
234 |
+
showNotification('Generation failed. Please try again.', 'error');
|
235 |
+
} finally {
|
236 |
+
generateBtn.disabled = false;
|
237 |
+
generateBtn.classList.remove('loading');
|
238 |
+
}
|
239 |
+
});
|
240 |
|
241 |
+
// Copy caption
|
242 |
+
copyBtn.addEventListener('click', () => {
|
243 |
+
const text = outputContainer.textContent;
|
244 |
+
navigator.clipboard.writeText(text).then(() => {
|
245 |
+
showNotification('Caption copied to clipboard!', 'success');
|
246 |
});
|
247 |
+
});
|
248 |
+
|
249 |
+
// Helper functions
|
250 |
+
function showLoading(text = 'Loading...') {
|
251 |
+
loadingText.textContent = text;
|
252 |
+
loadingOverlay.classList.add('active');
|
253 |
+
}
|
254 |
+
|
255 |
+
function hideLoading() {
|
256 |
+
loadingOverlay.classList.remove('active');
|
257 |
}
|
258 |
|
259 |
+
function showNotification(message, type = 'info') {
|
260 |
+
const notification = document.createElement('div');
|
261 |
+
notification.className = `notification ${type}`;
|
262 |
+
notification.textContent = message;
|
263 |
+
document.body.appendChild(notification);
|
264 |
+
|
265 |
+
setTimeout(() => {
|
266 |
+
notification.classList.add('show');
|
267 |
+
}, 10);
|
268 |
+
|
269 |
+
setTimeout(() => {
|
270 |
+
notification.classList.remove('show');
|
271 |
+
setTimeout(() => notification.remove(), 300);
|
272 |
+
}, 3000);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
}
|
274 |
+
|
275 |
+
// Initialize
|
276 |
+
checkWebGPUSupport();
|