akhaliq HF Staff commited on
Commit
c4d95e0
·
verified ·
1 Parent(s): 5950436

Upload index.js with huggingface_hub

Browse files
Files changed (1) hide show
  1. index.js +256 -56
index.js CHANGED
@@ -1,76 +1,276 @@
1
- import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
 
 
 
 
 
2
 
3
- // Reference the elements that we will need
4
- const status = document.getElementById('status');
5
- const fileUpload = document.getElementById('upload');
6
- const imageContainer = document.getElementById('container');
7
- const example = document.getElementById('example');
8
 
9
- const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- // Create a new object detection pipeline
12
- status.textContent = 'Loading model...';
13
- const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
14
- status.textContent = 'Ready';
 
 
 
 
 
 
 
 
15
 
16
- example.addEventListener('click', (e) => {
17
- e.preventDefault();
18
- detect(EXAMPLE_URL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  });
20
 
21
- fileUpload.addEventListener('change', function (e) {
 
22
  const file = e.target.files[0];
23
- if (!file) {
 
 
 
24
  return;
25
  }
26
-
27
  const reader = new FileReader();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- // Set up a callback when the file is loaded
30
- reader.onload = e2 => detect(e2.target.result);
 
31
 
32
- reader.readAsDataURL(file);
 
 
 
 
 
 
 
 
33
  });
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- // Detect objects in the image
37
- async function detect(img) {
38
- imageContainer.innerHTML = '';
39
- imageContainer.style.backgroundImage = `url(${img})`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- status.textContent = 'Analysing...';
42
- const output = await detector(img, {
43
- threshold: 0.5,
44
- percentage: true,
 
45
  });
46
- status.textContent = '';
47
- output.forEach(renderBox);
 
 
 
 
 
 
 
 
48
  }
49
 
50
- // Render a bounding box and label on the image
51
- function renderBox({ box, label }) {
52
- const { xmax, xmin, ymax, ymin } = box;
53
-
54
- // Generate a random color for the box
55
- const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
56
-
57
- // Draw the box
58
- const boxElement = document.createElement('div');
59
- boxElement.className = 'bounding-box';
60
- Object.assign(boxElement.style, {
61
- borderColor: color,
62
- left: 100 * xmin + '%',
63
- top: 100 * ymin + '%',
64
- width: 100 * (xmax - xmin) + '%',
65
- height: 100 * (ymax - ymin) + '%',
66
- })
67
-
68
- // Draw label
69
- const labelElement = document.createElement('span');
70
- labelElement.textContent = label;
71
- labelElement.className = 'bounding-box-label';
72
- labelElement.style.backgroundColor = color;
73
-
74
- boxElement.appendChild(labelElement);
75
- imageContainer.appendChild(boxElement);
76
  }
 
 
 
 
1
+ import {
2
+ AutoProcessor,
3
+ AutoModelForImageTextToText,
4
+ load_image,
5
+ TextStreamer,
6
+ } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
7
 
8
+ let processor = null;
9
+ let model = null;
10
+ let currentImageUrl = null;
11
+ let isModelLoading = false;
 
12
 
13
+ // DOM Elements
14
+ const imageUrlInput = document.getElementById('image-url');
15
+ const loadUrlBtn = document.getElementById('load-url-btn');
16
+ const fileUpload = document.getElementById('file-upload');
17
+ const imagePreview = document.getElementById('image-preview');
18
+ const generateBtn = document.getElementById('generate-btn');
19
+ const outputContainer = document.getElementById('output-container');
20
+ const copyBtn = document.getElementById('copy-btn');
21
+ const customPrompt = document.getElementById('custom-prompt');
22
+ const maxTokensInput = document.getElementById('max-tokens');
23
+ const deviceSelect = document.getElementById('device-select');
24
+ const loadingOverlay = document.getElementById('loading-overlay');
25
+ const loadingText = document.getElementById('loading-text');
26
+ const tabBtns = document.querySelectorAll('.tab-btn');
27
+ const tabContents = document.querySelectorAll('.tab-content');
28
 
29
+ // Tab switching
30
+ tabBtns.forEach(btn => {
31
+ btn.addEventListener('click', () => {
32
+ const targetTab = btn.dataset.tab;
33
+
34
+ tabBtns.forEach(b => b.classList.remove('active'));
35
+ tabContents.forEach(c => c.classList.remove('active'));
36
+
37
+ btn.classList.add('active');
38
+ document.getElementById(`${targetTab}-tab`).classList.add('active');
39
+ });
40
+ });
41
 
42
+ // Check WebGPU support
43
+ async function checkWebGPUSupport() {
44
+ if (!navigator.gpu) {
45
+ deviceSelect.querySelector('option[value="webgpu"]').disabled = true;
46
+ deviceSelect.querySelector('option[value="webgpu"]').text += ' (Not Supported)';
47
+ deviceSelect.value = 'wasm';
48
+ }
49
+ }
50
+
51
+ // Initialize model
52
+ async function initializeModel() {
53
+ if (isModelLoading || (processor && model)) return;
54
+
55
+ isModelLoading = true;
56
+ showLoading('Initializing AI model...');
57
+
58
+ try {
59
+ const model_id = 'onnx-community/FastVLM-0.5B-ONNX';
60
+ const device = deviceSelect.value;
61
+
62
+ loadingText.textContent = 'Loading processor...';
63
+ processor = await AutoProcessor.from_pretrained(model_id);
64
+
65
+ loadingText.textContent = 'Loading model weights...';
66
+ const modelOptions = {
67
+ dtype: {
68
+ embed_tokens: 'fp16',
69
+ vision_encoder: 'q4',
70
+ decoder_model_merged: 'q4',
71
+ }
72
+ };
73
+
74
+ if (device === 'webgpu') {
75
+ modelOptions.device = 'webgpu';
76
+ }
77
+
78
+ model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions);
79
+
80
+ hideLoading();
81
+ showNotification('Model loaded successfully!', 'success');
82
+ } catch (error) {
83
+ console.error('Model initialization error:', error);
84
+ showNotification('Failed to load model. Please try again.', 'error');
85
+ hideLoading();
86
+ } finally {
87
+ isModelLoading = false;
88
+ }
89
+ }
90
+
91
+ // Load image from URL
92
+ loadUrlBtn.addEventListener('click', async () => {
93
+ const url = imageUrlInput.value.trim();
94
+ if (!url) {
95
+ showNotification('Please enter a valid image URL', 'error');
96
+ return;
97
+ }
98
+
99
+ try {
100
+ showLoading('Loading image...');
101
+ await loadImagePreview(url);
102
+ currentImageUrl = url;
103
+ generateBtn.disabled = false;
104
+ hideLoading();
105
+ } catch (error) {
106
+ showNotification('Failed to load image. Please check the URL.', 'error');
107
+ hideLoading();
108
+ }
109
  });
110
 
111
+ // Handle file upload
112
+ fileUpload.addEventListener('change', async (e) => {
113
  const file = e.target.files[0];
114
+ if (!file) return;
115
+
116
+ if (file.size > 10 * 1024 * 1024) {
117
+ showNotification('File size must be less than 10MB', 'error');
118
  return;
119
  }
120
+
121
  const reader = new FileReader();
122
+ reader.onload = async (event) => {
123
+ currentImageUrl = event.target.result;
124
+ await loadImagePreview(currentImageUrl);
125
+ generateBtn.disabled = false;
126
+ };
127
+ reader.readAsDataURL(file);
128
+ });
129
+
130
+ // Drag and drop
131
+ const fileLabel = document.querySelector('.file-label');
132
+ fileLabel.addEventListener('dragover', (e) => {
133
+ e.preventDefault();
134
+ fileLabel.classList.add('drag-over');
135
+ });
136
 
137
+ fileLabel.addEventListener('dragleave', () => {
138
+ fileLabel.classList.remove('drag-over');
139
+ });
140
 
141
+ fileLabel.addEventListener('drop', (e) => {
142
+ e.preventDefault();
143
+ fileLabel.classList.remove('drag-over');
144
+
145
+ const file = e.dataTransfer.files[0];
146
+ if (file && file.type.startsWith('image/')) {
147
+ fileUpload.files = e.dataTransfer.files;
148
+ fileUpload.dispatchEvent(new Event('change'));
149
+ }
150
  });
151
 
152
+ // Load image preview
153
+ async function loadImagePreview(url) {
154
+ const img = new Image();
155
+ img.src = url;
156
+
157
+ return new Promise((resolve, reject) => {
158
+ img.onload = () => {
159
+ imagePreview.innerHTML = '';
160
+ img.style.maxWidth = '100%';
161
+ img.style.maxHeight = '100%';
162
+ img.style.objectFit = 'contain';
163
+ imagePreview.appendChild(img);
164
+ resolve();
165
+ };
166
+ img.onerror = reject;
167
+ });
168
+ }
169
 
170
+ // Generate caption
171
+ generateBtn.addEventListener('click', async () => {
172
+ if (!currentImageUrl) {
173
+ showNotification('Please load an image first', 'error');
174
+ return;
175
+ }
176
+
177
+ if (!processor || !model) {
178
+ await initializeModel();
179
+ }
180
+
181
+ generateBtn.disabled = true;
182
+ generateBtn.classList.add('loading');
183
+ outputContainer.innerHTML = '<div class="typing-indicator">Generating caption...</div>';
184
+ copyBtn.style.display = 'none';
185
+
186
+ try {
187
+ const promptText = customPrompt.value.trim() || 'Describe this image in detail.';
188
+ const messages = [
189
+ {
190
+ role: 'user',
191
+ content: `<image>${promptText}`,
192
+ },
193
+ ];
194
+
195
+ const prompt = processor.apply_chat_template(messages, {
196
+ add_generation_prompt: true,
197
+ });
198
+
199
+ const image = await load_image(currentImageUrl);
200
+ const inputs = await processor(image, prompt, {
201
+ add_special_tokens: false,
202
+ });
203
+
204
+ let streamedText = '';
205
+ outputContainer.innerHTML = '';
206
+
207
+ const outputs = await model.generate({
208
+ ...inputs,
209
+ max_new_tokens: parseInt(maxTokensInput.value),
210
+ do_sample: false,
211
+ streamer: new TextStreamer(processor.tokenizer, {
212
+ skip_prompt: true,
213
+ skip_special_tokens: false,
214
+ callback_function: (text) => {
215
+ streamedText += text;
216
+ outputContainer.textContent = streamedText;
217
+ outputContainer.scrollTop = outputContainer.scrollHeight;
218
+ },
219
+ }),
220
+ });
221
+
222
+ const decoded = processor.batch_decode(
223
+ outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
224
+ { skip_special_tokens: true }
225
+ );
226
+
227
+ outputContainer.textContent = decoded[0];
228
+ copyBtn.style.display = 'inline-flex';
229
+ showNotification('Caption generated successfully!', 'success');
230
+
231
+ } catch (error) {
232
+ console.error('Generation error:', error);
233
+ outputContainer.innerHTML = '<div class="error">Failed to generate caption. Please try again.</div>';
234
+ showNotification('Generation failed. Please try again.', 'error');
235
+ } finally {
236
+ generateBtn.disabled = false;
237
+ generateBtn.classList.remove('loading');
238
+ }
239
+ });
240
 
241
+ // Copy caption
242
+ copyBtn.addEventListener('click', () => {
243
+ const text = outputContainer.textContent;
244
+ navigator.clipboard.writeText(text).then(() => {
245
+ showNotification('Caption copied to clipboard!', 'success');
246
  });
247
+ });
248
+
249
+ // Helper functions
250
+ function showLoading(text = 'Loading...') {
251
+ loadingText.textContent = text;
252
+ loadingOverlay.classList.add('active');
253
+ }
254
+
255
+ function hideLoading() {
256
+ loadingOverlay.classList.remove('active');
257
  }
258
 
259
+ function showNotification(message, type = 'info') {
260
+ const notification = document.createElement('div');
261
+ notification.className = `notification ${type}`;
262
+ notification.textContent = message;
263
+ document.body.appendChild(notification);
264
+
265
+ setTimeout(() => {
266
+ notification.classList.add('show');
267
+ }, 10);
268
+
269
+ setTimeout(() => {
270
+ notification.classList.remove('show');
271
+ setTimeout(() => notification.remove(), 300);
272
+ }, 3000);
 
 
 
 
 
 
 
 
 
 
 
 
273
  }
274
+
275
+ // Initialize
276
+ checkWebGPUSupport();