akhaliq HF Staff commited on
Commit
e949992
·
verified ·
1 Parent(s): 8a8466b

Upload index.js with huggingface_hub

Browse files
Files changed (1) hide show
  1. index.js +268 -60
index.js CHANGED
@@ -1,76 +1,284 @@
1
- import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
 
 
 
 
 
2
 
3
- // Reference the elements that we will need
4
- const status = document.getElementById('status');
5
- const fileUpload = document.getElementById('upload');
6
- const imageContainer = document.getElementById('container');
7
- const example = document.getElementById('example');
8
 
9
- const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- // Create a new object detection pipeline
12
- status.textContent = 'Loading model...';
13
- const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
14
- status.textContent = 'Ready';
 
15
 
16
- example.addEventListener('click', (e) => {
17
- e.preventDefault();
18
- detect(EXAMPLE_URL);
19
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- fileUpload.addEventListener('change', function (e) {
22
- const file = e.target.files[0];
23
- if (!file) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  return;
25
  }
 
 
 
26
 
27
- const reader = new FileReader();
 
 
28
 
29
- // Set up a callback when the file is loaded
30
- reader.onload = e2 => detect(e2.target.result);
 
 
31
 
32
- reader.readAsDataURL(file);
 
33
  });
34
 
 
 
 
 
 
35
 
36
- // Detect objects in the image
37
- async function detect(img) {
38
- imageContainer.innerHTML = '';
39
- imageContainer.style.backgroundImage = `url(${img})`;
40
 
41
- status.textContent = 'Analysing...';
42
- const output = await detector(img, {
43
- threshold: 0.5,
44
- percentage: true,
45
- });
46
- status.textContent = '';
47
- output.forEach(renderBox);
48
- }
49
 
50
- // Render a bounding box and label on the image
51
- function renderBox({ box, label }) {
52
- const { xmax, xmin, ymax, ymin } = box;
53
-
54
- // Generate a random color for the box
55
- const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
56
-
57
- // Draw the box
58
- const boxElement = document.createElement('div');
59
- boxElement.className = 'bounding-box';
60
- Object.assign(boxElement.style, {
61
- borderColor: color,
62
- left: 100 * xmin + '%',
63
- top: 100 * ymin + '%',
64
- width: 100 * (xmax - xmin) + '%',
65
- height: 100 * (ymax - ymin) + '%',
66
- })
67
-
68
- // Draw label
69
- const labelElement = document.createElement('span');
70
- labelElement.textContent = label;
71
- labelElement.className = 'bounding-box-label';
72
- labelElement.style.backgroundColor = color;
73
-
74
- boxElement.appendChild(labelElement);
75
- imageContainer.appendChild(boxElement);
76
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ AutoProcessor,
3
+ AutoModelForImageTextToText,
4
+ load_image,
5
+ TextStreamer,
6
+ } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
7
 
8
+ // Global variables
9
+ let processor = null;
10
+ let model = null;
11
+ let isModelLoaded = false;
12
+ let currentDevice = 'cpu';
13
 
14
+ // DOM elements
15
+ const elements = {
16
+ imageUrl: document.getElementById('image-url'),
17
+ loadUrlBtn: document.getElementById('load-url-btn'),
18
+ fileInput: document.getElementById('file-input'),
19
+ uploadArea: document.getElementById('upload-area'),
20
+ uploadBtn: document.getElementById('upload-btn'),
21
+ previewSection: document.getElementById('preview-section'),
22
+ previewImage: document.getElementById('preview-image'),
23
+ customPrompt: document.getElementById('custom-prompt'),
24
+ loadingSection: document.getElementById('loading-section'),
25
+ loadingText: document.getElementById('loading-text'),
26
+ progressFill: document.getElementById('progress-fill'),
27
+ outputSection: document.getElementById('output-section'),
28
+ outputContent: document.getElementById('output-content'),
29
+ copyBtn: document.getElementById('copy-btn'),
30
+ errorSection: document.getElementById('error-section'),
31
+ errorMessage: document.getElementById('error-message'),
32
+ tabBtns: document.querySelectorAll('.tab-btn'),
33
+ tabContents: document.querySelectorAll('.tab-content'),
34
+ deviceRadios: document.querySelectorAll('input[name="device"]')
35
+ };
36
 
37
+ // Initialize model
38
+ async function initializeModel() {
39
+ if (isModelLoaded && currentDevice === getSelectedDevice()) {
40
+ return;
41
+ }
42
 
43
+ try {
44
+ showLoading('Loading model...');
45
+ currentDevice = getSelectedDevice();
46
+
47
+ const model_id = "onnx-community/FastVLM-0.5B-ONNX";
48
+
49
+ const modelOptions = {
50
+ dtype: {
51
+ embed_tokens: "fp16",
52
+ vision_encoder: "q4",
53
+ decoder_model_merged: "q4",
54
+ }
55
+ };
56
+
57
+ if (currentDevice === 'webgpu') {
58
+ modelOptions.device = 'webgpu';
59
+ }
60
+
61
+ updateLoadingText('Loading processor...');
62
+ processor = await AutoProcessor.from_pretrained(model_id);
63
+
64
+ updateLoadingText('Loading model...');
65
+ model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions);
66
+
67
+ isModelLoaded = true;
68
+ hideLoading();
69
+ } catch (error) {
70
+ console.error('Model initialization error:', error);
71
+ showError('Failed to load model. Please try again.');
72
+ hideLoading();
73
+ throw error;
74
+ }
75
+ }
76
+
77
+ // Generate caption for image
78
+ async function generateCaption(imageUrl) {
79
+ try {
80
+ hideError();
81
+ showLoading('Processing image...');
82
+
83
+ if (!isModelLoaded) {
84
+ await initializeModel();
85
+ }
86
+
87
+ // Prepare prompt
88
+ const customPromptText = elements.customPrompt.value.trim();
89
+ const promptContent = customPromptText || "Describe this image in detail.";
90
+
91
+ const messages = [
92
+ {
93
+ role: "user",
94
+ content: `<image>${promptContent}`,
95
+ },
96
+ ];
97
+
98
+ const prompt = processor.apply_chat_template(messages, {
99
+ add_generation_prompt: true,
100
+ });
101
+
102
+ updateLoadingText('Loading image...');
103
+ const image = await load_image(imageUrl);
104
+
105
+ updateLoadingText('Processing inputs...');
106
+ const inputs = await processor(image, prompt, {
107
+ add_special_tokens: false,
108
+ });
109
+
110
+ updateLoadingText('Generating caption...');
111
+ elements.outputContent.textContent = '';
112
+ showOutput();
113
+
114
+ const outputs = await model.generate({
115
+ ...inputs,
116
+ max_new_tokens: 512,
117
+ do_sample: false,
118
+ streamer: new TextStreamer(processor.tokenizer, {
119
+ skip_prompt: true,
120
+ skip_special_tokens: false,
121
+ callback_function: (text) => {
122
+ elements.outputContent.textContent += text;
123
+ },
124
+ }),
125
+ });
126
+
127
+ const decoded = processor.batch_decode(
128
+ outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
129
+ { skip_special_tokens: true },
130
+ );
131
+
132
+ elements.outputContent.textContent = decoded[0];
133
+ hideLoading();
134
+ } catch (error) {
135
+ console.error('Caption generation error:', error);
136
+ showError('Failed to generate caption. Please check your image URL and try again.');
137
+ hideLoading();
138
+ }
139
+ }
140
+
141
+ // Helper functions
142
+ function getSelectedDevice() {
143
+ const selected = document.querySelector('input[name="device"]:checked');
144
+ return selected ? selected.value : 'cpu';
145
+ }
146
+
147
+ function showLoading(text) {
148
+ elements.loadingSection.style.display = 'block';
149
+ elements.loadingText.textContent = text;
150
+ elements.progressFill.style.width = '50%';
151
+ }
152
+
153
+ function updateLoadingText(text) {
154
+ elements.loadingText.textContent = text;
155
+ const progress = {
156
+ 'Loading processor...': '30%',
157
+ 'Loading model...': '60%',
158
+ 'Loading image...': '70%',
159
+ 'Processing inputs...': '80%',
160
+ 'Generating caption...': '90%'
161
+ };
162
+ elements.progressFill.style.width = progress[text] || '50%';
163
+ }
164
+
165
+ function hideLoading() {
166
+ elements.loadingSection.style.display = 'none';
167
+ elements.progressFill.style.width = '0%';
168
+ }
169
+
170
+ function showOutput() {
171
+ elements.outputSection.style.display = 'block';
172
+ }
173
+
174
+ function hideOutput() {
175
+ elements.outputSection.style.display = 'none';
176
+ }
177
 
178
+ function showError(message) {
179
+ elements.errorSection.style.display = 'block';
180
+ elements.errorMessage.textContent = message;
181
+ }
182
+
183
+ function hideError() {
184
+ elements.errorSection.style.display = 'none';
185
+ }
186
+
187
+ function showPreview(url) {
188
+ elements.previewImage.src = url;
189
+ elements.previewSection.style.display = 'block';
190
+ }
191
+
192
+ // Event listeners
193
+ elements.loadUrlBtn.addEventListener('click', async () => {
194
+ const url = elements.imageUrl.value.trim();
195
+ if (!url) {
196
+ showError('Please enter a valid image URL');
197
  return;
198
  }
199
+ showPreview(url);
200
+ await generateCaption(url);
201
+ });
202
 
203
+ elements.uploadArea.addEventListener('click', () => {
204
+ elements.fileInput.click();
205
+ });
206
 
207
+ elements.uploadArea.addEventListener('dragover', (e) => {
208
+ e.preventDefault();
209
+ elements.uploadArea.classList.add('dragover');
210
+ });
211
 
212
+ elements.uploadArea.addEventListener('dragleave', () => {
213
+ elements.uploadArea.classList.remove('dragover');
214
  });
215
 
216
+ elements.uploadArea.addEventListener('drop', (e) => {
217
+ e.preventDefault();
218
+ elements.uploadArea.classList.remove('dragover');
219
+ handleFiles(e.dataTransfer.files);
220
+ });
221
 
222
+ elements.fileInput.addEventListener('change', (e) => {
223
+ handleFiles(e.target.files);
224
+ });
 
225
 
226
+ async function handleFiles(files) {
227
+ if (files.length === 0) return;
228
+
229
+ const file = files[0];
230
+ if (!file.type.startsWith('image/')) {
231
+ showError('Please select a valid image file');
232
+ return;
233
+ }
234
 
235
+ const url = URL.createObjectURL(file);
236
+ showPreview(url);
237
+ elements.uploadBtn.disabled = false;
238
+ elements.uploadBtn.dataset.imageUrl = url;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  }
240
+
241
+ elements.uploadBtn.addEventListener('click', async () => {
242
+ const url = elements.uploadBtn.dataset.imageUrl;
243
+ if (url) {
244
+ await generateCaption(url);
245
+ }
246
+ });
247
+
248
+ elements.copyBtn.addEventListener('click', () => {
249
+ const text = elements.outputContent.textContent;
250
+ navigator.clipboard.writeText(text).then(() => {
251
+ elements.copyBtn.textContent = 'Copied!';
252
+ setTimeout(() => {
253
+ elements.copyBtn.innerHTML = `
254
+ <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor">
255
+ <rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect>
256
+ <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path>
257
+ </svg>
258
+ Copy Caption
259
+ `;
260
+ }, 2000);
261
+ });
262
+ });
263
+
264
+ // Tab switching
265
+ elements.tabBtns.forEach(btn => {
266
+ btn.addEventListener('click', () => {
267
+ const targetTab = btn.dataset.tab;
268
+
269
+ elements.tabBtns.forEach(b => b.classList.remove('active'));
270
+ elements.tabContents.forEach(c => c.classList.remove('active'));
271
+
272
+ btn.classList.add('active');
273
+ document.getElementById(`${targetTab}-tab`).classList.add('active');
274
+ });
275
+ });
276
+
277
+ // Device selection
278
+ elements.deviceRadios.forEach(radio => {
279
+ radio.addEventListener('change', () => {
280
+ if (isModelLoaded && currentDevice !== getSelectedDevice()) {
281
+ isModelLoaded = false;
282
+ }
283
+ });
284
+ });