Spaces:
Running
Running
Upload index.js with huggingface_hub
Browse files
index.js
CHANGED
@@ -1,76 +1,284 @@
|
|
1 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
//
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
//
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
return;
|
25 |
}
|
|
|
|
|
|
|
26 |
|
27 |
-
|
|
|
|
|
28 |
|
29 |
-
|
30 |
-
|
|
|
|
|
31 |
|
32 |
-
|
|
|
33 |
});
|
34 |
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
imageContainer.style.backgroundImage = `url(${img})`;
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
}
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
// Generate a random color for the box
|
55 |
-
const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
|
56 |
-
|
57 |
-
// Draw the box
|
58 |
-
const boxElement = document.createElement('div');
|
59 |
-
boxElement.className = 'bounding-box';
|
60 |
-
Object.assign(boxElement.style, {
|
61 |
-
borderColor: color,
|
62 |
-
left: 100 * xmin + '%',
|
63 |
-
top: 100 * ymin + '%',
|
64 |
-
width: 100 * (xmax - xmin) + '%',
|
65 |
-
height: 100 * (ymax - ymin) + '%',
|
66 |
-
})
|
67 |
-
|
68 |
-
// Draw label
|
69 |
-
const labelElement = document.createElement('span');
|
70 |
-
labelElement.textContent = label;
|
71 |
-
labelElement.className = 'bounding-box-label';
|
72 |
-
labelElement.style.backgroundColor = color;
|
73 |
-
|
74 |
-
boxElement.appendChild(labelElement);
|
75 |
-
imageContainer.appendChild(boxElement);
|
76 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import {
|
2 |
+
AutoProcessor,
|
3 |
+
AutoModelForImageTextToText,
|
4 |
+
load_image,
|
5 |
+
TextStreamer,
|
6 |
+
} from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
|
7 |
|
8 |
+
// Global variables
|
9 |
+
let processor = null;
|
10 |
+
let model = null;
|
11 |
+
let isModelLoaded = false;
|
12 |
+
let currentDevice = 'cpu';
|
13 |
|
14 |
+
// DOM elements
|
15 |
+
const elements = {
|
16 |
+
imageUrl: document.getElementById('image-url'),
|
17 |
+
loadUrlBtn: document.getElementById('load-url-btn'),
|
18 |
+
fileInput: document.getElementById('file-input'),
|
19 |
+
uploadArea: document.getElementById('upload-area'),
|
20 |
+
uploadBtn: document.getElementById('upload-btn'),
|
21 |
+
previewSection: document.getElementById('preview-section'),
|
22 |
+
previewImage: document.getElementById('preview-image'),
|
23 |
+
customPrompt: document.getElementById('custom-prompt'),
|
24 |
+
loadingSection: document.getElementById('loading-section'),
|
25 |
+
loadingText: document.getElementById('loading-text'),
|
26 |
+
progressFill: document.getElementById('progress-fill'),
|
27 |
+
outputSection: document.getElementById('output-section'),
|
28 |
+
outputContent: document.getElementById('output-content'),
|
29 |
+
copyBtn: document.getElementById('copy-btn'),
|
30 |
+
errorSection: document.getElementById('error-section'),
|
31 |
+
errorMessage: document.getElementById('error-message'),
|
32 |
+
tabBtns: document.querySelectorAll('.tab-btn'),
|
33 |
+
tabContents: document.querySelectorAll('.tab-content'),
|
34 |
+
deviceRadios: document.querySelectorAll('input[name="device"]')
|
35 |
+
};
|
36 |
|
37 |
+
// Initialize model
|
38 |
+
async function initializeModel() {
|
39 |
+
if (isModelLoaded && currentDevice === getSelectedDevice()) {
|
40 |
+
return;
|
41 |
+
}
|
42 |
|
43 |
+
try {
|
44 |
+
showLoading('Loading model...');
|
45 |
+
currentDevice = getSelectedDevice();
|
46 |
+
|
47 |
+
const model_id = "onnx-community/FastVLM-0.5B-ONNX";
|
48 |
+
|
49 |
+
const modelOptions = {
|
50 |
+
dtype: {
|
51 |
+
embed_tokens: "fp16",
|
52 |
+
vision_encoder: "q4",
|
53 |
+
decoder_model_merged: "q4",
|
54 |
+
}
|
55 |
+
};
|
56 |
+
|
57 |
+
if (currentDevice === 'webgpu') {
|
58 |
+
modelOptions.device = 'webgpu';
|
59 |
+
}
|
60 |
+
|
61 |
+
updateLoadingText('Loading processor...');
|
62 |
+
processor = await AutoProcessor.from_pretrained(model_id);
|
63 |
+
|
64 |
+
updateLoadingText('Loading model...');
|
65 |
+
model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions);
|
66 |
+
|
67 |
+
isModelLoaded = true;
|
68 |
+
hideLoading();
|
69 |
+
} catch (error) {
|
70 |
+
console.error('Model initialization error:', error);
|
71 |
+
showError('Failed to load model. Please try again.');
|
72 |
+
hideLoading();
|
73 |
+
throw error;
|
74 |
+
}
|
75 |
+
}
|
76 |
+
|
77 |
+
// Generate caption for image
|
78 |
+
async function generateCaption(imageUrl) {
|
79 |
+
try {
|
80 |
+
hideError();
|
81 |
+
showLoading('Processing image...');
|
82 |
+
|
83 |
+
if (!isModelLoaded) {
|
84 |
+
await initializeModel();
|
85 |
+
}
|
86 |
+
|
87 |
+
// Prepare prompt
|
88 |
+
const customPromptText = elements.customPrompt.value.trim();
|
89 |
+
const promptContent = customPromptText || "Describe this image in detail.";
|
90 |
+
|
91 |
+
const messages = [
|
92 |
+
{
|
93 |
+
role: "user",
|
94 |
+
content: `<image>${promptContent}`,
|
95 |
+
},
|
96 |
+
];
|
97 |
+
|
98 |
+
const prompt = processor.apply_chat_template(messages, {
|
99 |
+
add_generation_prompt: true,
|
100 |
+
});
|
101 |
+
|
102 |
+
updateLoadingText('Loading image...');
|
103 |
+
const image = await load_image(imageUrl);
|
104 |
+
|
105 |
+
updateLoadingText('Processing inputs...');
|
106 |
+
const inputs = await processor(image, prompt, {
|
107 |
+
add_special_tokens: false,
|
108 |
+
});
|
109 |
+
|
110 |
+
updateLoadingText('Generating caption...');
|
111 |
+
elements.outputContent.textContent = '';
|
112 |
+
showOutput();
|
113 |
+
|
114 |
+
const outputs = await model.generate({
|
115 |
+
...inputs,
|
116 |
+
max_new_tokens: 512,
|
117 |
+
do_sample: false,
|
118 |
+
streamer: new TextStreamer(processor.tokenizer, {
|
119 |
+
skip_prompt: true,
|
120 |
+
skip_special_tokens: false,
|
121 |
+
callback_function: (text) => {
|
122 |
+
elements.outputContent.textContent += text;
|
123 |
+
},
|
124 |
+
}),
|
125 |
+
});
|
126 |
+
|
127 |
+
const decoded = processor.batch_decode(
|
128 |
+
outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
|
129 |
+
{ skip_special_tokens: true },
|
130 |
+
);
|
131 |
+
|
132 |
+
elements.outputContent.textContent = decoded[0];
|
133 |
+
hideLoading();
|
134 |
+
} catch (error) {
|
135 |
+
console.error('Caption generation error:', error);
|
136 |
+
showError('Failed to generate caption. Please check your image URL and try again.');
|
137 |
+
hideLoading();
|
138 |
+
}
|
139 |
+
}
|
140 |
+
|
141 |
+
// Helper functions
|
142 |
+
function getSelectedDevice() {
|
143 |
+
const selected = document.querySelector('input[name="device"]:checked');
|
144 |
+
return selected ? selected.value : 'cpu';
|
145 |
+
}
|
146 |
+
|
147 |
+
function showLoading(text) {
|
148 |
+
elements.loadingSection.style.display = 'block';
|
149 |
+
elements.loadingText.textContent = text;
|
150 |
+
elements.progressFill.style.width = '50%';
|
151 |
+
}
|
152 |
+
|
153 |
+
function updateLoadingText(text) {
|
154 |
+
elements.loadingText.textContent = text;
|
155 |
+
const progress = {
|
156 |
+
'Loading processor...': '30%',
|
157 |
+
'Loading model...': '60%',
|
158 |
+
'Loading image...': '70%',
|
159 |
+
'Processing inputs...': '80%',
|
160 |
+
'Generating caption...': '90%'
|
161 |
+
};
|
162 |
+
elements.progressFill.style.width = progress[text] || '50%';
|
163 |
+
}
|
164 |
+
|
165 |
+
function hideLoading() {
|
166 |
+
elements.loadingSection.style.display = 'none';
|
167 |
+
elements.progressFill.style.width = '0%';
|
168 |
+
}
|
169 |
+
|
170 |
+
function showOutput() {
|
171 |
+
elements.outputSection.style.display = 'block';
|
172 |
+
}
|
173 |
+
|
174 |
+
function hideOutput() {
|
175 |
+
elements.outputSection.style.display = 'none';
|
176 |
+
}
|
177 |
|
178 |
+
function showError(message) {
|
179 |
+
elements.errorSection.style.display = 'block';
|
180 |
+
elements.errorMessage.textContent = message;
|
181 |
+
}
|
182 |
+
|
183 |
+
function hideError() {
|
184 |
+
elements.errorSection.style.display = 'none';
|
185 |
+
}
|
186 |
+
|
187 |
+
function showPreview(url) {
|
188 |
+
elements.previewImage.src = url;
|
189 |
+
elements.previewSection.style.display = 'block';
|
190 |
+
}
|
191 |
+
|
192 |
+
// Event listeners
|
193 |
+
elements.loadUrlBtn.addEventListener('click', async () => {
|
194 |
+
const url = elements.imageUrl.value.trim();
|
195 |
+
if (!url) {
|
196 |
+
showError('Please enter a valid image URL');
|
197 |
return;
|
198 |
}
|
199 |
+
showPreview(url);
|
200 |
+
await generateCaption(url);
|
201 |
+
});
|
202 |
|
203 |
+
elements.uploadArea.addEventListener('click', () => {
|
204 |
+
elements.fileInput.click();
|
205 |
+
});
|
206 |
|
207 |
+
elements.uploadArea.addEventListener('dragover', (e) => {
|
208 |
+
e.preventDefault();
|
209 |
+
elements.uploadArea.classList.add('dragover');
|
210 |
+
});
|
211 |
|
212 |
+
elements.uploadArea.addEventListener('dragleave', () => {
|
213 |
+
elements.uploadArea.classList.remove('dragover');
|
214 |
});
|
215 |
|
216 |
+
elements.uploadArea.addEventListener('drop', (e) => {
|
217 |
+
e.preventDefault();
|
218 |
+
elements.uploadArea.classList.remove('dragover');
|
219 |
+
handleFiles(e.dataTransfer.files);
|
220 |
+
});
|
221 |
|
222 |
+
elements.fileInput.addEventListener('change', (e) => {
|
223 |
+
handleFiles(e.target.files);
|
224 |
+
});
|
|
|
225 |
|
226 |
+
async function handleFiles(files) {
|
227 |
+
if (files.length === 0) return;
|
228 |
+
|
229 |
+
const file = files[0];
|
230 |
+
if (!file.type.startsWith('image/')) {
|
231 |
+
showError('Please select a valid image file');
|
232 |
+
return;
|
233 |
+
}
|
234 |
|
235 |
+
const url = URL.createObjectURL(file);
|
236 |
+
showPreview(url);
|
237 |
+
elements.uploadBtn.disabled = false;
|
238 |
+
elements.uploadBtn.dataset.imageUrl = url;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
}
|
240 |
+
|
241 |
+
elements.uploadBtn.addEventListener('click', async () => {
|
242 |
+
const url = elements.uploadBtn.dataset.imageUrl;
|
243 |
+
if (url) {
|
244 |
+
await generateCaption(url);
|
245 |
+
}
|
246 |
+
});
|
247 |
+
|
248 |
+
elements.copyBtn.addEventListener('click', () => {
|
249 |
+
const text = elements.outputContent.textContent;
|
250 |
+
navigator.clipboard.writeText(text).then(() => {
|
251 |
+
elements.copyBtn.textContent = 'Copied!';
|
252 |
+
setTimeout(() => {
|
253 |
+
elements.copyBtn.innerHTML = `
|
254 |
+
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor">
|
255 |
+
<rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect>
|
256 |
+
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path>
|
257 |
+
</svg>
|
258 |
+
Copy Caption
|
259 |
+
`;
|
260 |
+
}, 2000);
|
261 |
+
});
|
262 |
+
});
|
263 |
+
|
264 |
+
// Tab switching
|
265 |
+
elements.tabBtns.forEach(btn => {
|
266 |
+
btn.addEventListener('click', () => {
|
267 |
+
const targetTab = btn.dataset.tab;
|
268 |
+
|
269 |
+
elements.tabBtns.forEach(b => b.classList.remove('active'));
|
270 |
+
elements.tabContents.forEach(c => c.classList.remove('active'));
|
271 |
+
|
272 |
+
btn.classList.add('active');
|
273 |
+
document.getElementById(`${targetTab}-tab`).classList.add('active');
|
274 |
+
});
|
275 |
+
});
|
276 |
+
|
277 |
+
// Device selection
|
278 |
+
elements.deviceRadios.forEach(radio => {
|
279 |
+
radio.addEventListener('change', () => {
|
280 |
+
if (isModelLoaded && currentDevice !== getSelectedDevice()) {
|
281 |
+
isModelLoaded = false;
|
282 |
+
}
|
283 |
+
});
|
284 |
+
});
|