Spaces:

whitphx
/

transformersjs-performance-leaderboard-backend

Runtime error

App Files Files Community

whitphx HF Staff commited on Oct 6

Commit

945bdba

1 Parent(s): fd2180b

Add batchsize param

Browse files

Files changed (4) hide show

.claude/settings.local.json +6 -1
bench-node/src/index.ts +16 -9
bench-web/src/cli.ts +42 -14
bench-web/src/main.ts +26 -17

.claude/settings.local.json CHANGED Viewed

@@ -15,7 +15,12 @@
       "Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype q8)",
       "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype fp32)",
       "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype fp32)",
-      "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype q8)"
     ],
     "deny": [],
     "ask": []

       "Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype q8)",
       "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype fp32)",
       "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype fp32)",
+      "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype q8)",
+      "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype q8 --batch-size 8)",
+      "Bash(timeout 300 npm run bench:cli -- Xenova/gpt2 feature-extraction --mode warm --repeats 2 --device webgpu --dtype q8 --batch-size 1)",
+      "Bash(timeout 300 npm run bench -- Xenova/gpt2 feature-extraction --mode warm --repeats 2 --dtype q8 --batch-size 1 --cache-dir .bench-cache/warm)",
+      "Bash(timeout 300 npm run bench:cli -- Xenova/t5-small feature-extraction --mode warm --repeats 2 --device webgpu --dtype fp32 --batch-size 1)",
+      "Bash(timeout 300 npm run bench:cli -- Xenova/t5-small feature-extraction --mode warm --repeats 2 --device wasm --dtype fp32 --batch-size 1)"
     ],
     "deny": [],
     "ask": []

bench-node/src/index.ts CHANGED Viewed

@@ -18,6 +18,7 @@ const mode = (getArg("mode", "warm") as "warm" | "cold");
 const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
 const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;
 const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
 // Point library cache to a dedicated directory for controllable cold/warm behavior
 env.cacheDir = cacheDir;
@@ -41,15 +42,18 @@ async function benchOnce() {
   const pipe = await pipeline(task, modelId, options);
   const t1 = performance.now();
   const t2 = performance.now();
-  await pipe("The quick brown fox jumps over the lazy dog.");
   const t3 = performance.now();
   // Run additional inferences to measure subsequent performance
   const subsequentTimes: number[] = [];
   for (let i = 0; i < 3; i++) {
     const t4 = performance.now();
-    await pipe("The quick brown fox jumps over the lazy dog.");
     const t5 = performance.now();
     subsequentTimes.push(+(t5 - t4).toFixed(1));
   }
@@ -62,12 +66,13 @@ async function benchOnce() {
 }
 async function main() {
-  console.log(`Model  : ${modelId}`);
-  console.log(`Task   : ${task}`);
-  console.log(`Mode   : ${mode}`);
-  console.log(`Repeats: ${repeats}`);
-  console.log(`DType  : ${dtype || 'auto'}`);
-  console.log(`Cache  : ${cacheDir}`);
   const loads: number[] = [];
   const firsts: number[] = [];
@@ -79,7 +84,8 @@ async function main() {
     const warmOptions: any = {};
     if (dtype) warmOptions.dtype = dtype;
     const warm = await pipeline(task, modelId, warmOptions);
-    await warm("warmup");
     for (let i = 0; i < repeats; i++) {
       const r = await benchOnce();
@@ -105,6 +111,7 @@ async function main() {
     task,
     mode,
     repeats,
     cacheDir,
     metrics: {
       load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },

 const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
 const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;
 const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
+const batchSize = Math.max(1, parseInt(getArg("batch-size", "1") || "1", 10));
 // Point library cache to a dedicated directory for controllable cold/warm behavior
 env.cacheDir = cacheDir;
   const pipe = await pipeline(task, modelId, options);
   const t1 = performance.now();
+  // Prepare batch input
+  const inputs = Array(batchSize).fill("The quick brown fox jumps over the lazy dog.");
   const t2 = performance.now();
+  await pipe(inputs);
   const t3 = performance.now();
   // Run additional inferences to measure subsequent performance
   const subsequentTimes: number[] = [];
   for (let i = 0; i < 3; i++) {
     const t4 = performance.now();
+    await pipe(inputs);
     const t5 = performance.now();
     subsequentTimes.push(+(t5 - t4).toFixed(1));
   }
 }
 async function main() {
+  console.log(`Model     : ${modelId}`);
+  console.log(`Task      : ${task}`);
+  console.log(`Mode      : ${mode}`);
+  console.log(`Repeats   : ${repeats}`);
+  console.log(`DType     : ${dtype || 'auto'}`);
+  console.log(`Batch Size: ${batchSize}`);
+  console.log(`Cache     : ${cacheDir}`);
   const loads: number[] = [];
   const firsts: number[] = [];
     const warmOptions: any = {};
     if (dtype) warmOptions.dtype = dtype;
     const warm = await pipeline(task, modelId, warmOptions);
+    const warmupInputs = Array(batchSize).fill("warmup");
+    await warm(warmupInputs);
     for (let i = 0; i < repeats; i++) {
       const r = await benchOnce();
     task,
     mode,
     repeats,
+    batchSize,
     cacheDir,
     metrics: {
       load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },

bench-web/src/cli.ts CHANGED Viewed

@@ -16,18 +16,20 @@ const mode = getArg("mode", "warm") as "warm" | "cold";
 const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
 const device = getArg("device", "webgpu") as "webgpu" | "wasm";
 const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
 const browserType = getArg("browser", "chromium") as "chromium" | "firefox" | "webkit";
 const headed = getArg("headed") === "true";
 async function main() {
-  console.log(`Model   : ${modelId}`);
-  console.log(`Task    : ${task}`);
-  console.log(`Mode    : ${mode}`);
-  console.log(`Repeats : ${repeats}`);
-  console.log(`Device  : ${device}`);
-  console.log(`DType   : ${dtype || 'auto'}`);
-  console.log(`Browser : ${browserType}`);
-  console.log(`Headed  : ${headed}`);
   // Start Vite dev server
   const server = await createServer({
@@ -88,23 +90,49 @@ async function main() {
     // Check WebGPU availability if using webgpu device
     if (device === "webgpu") {
-      const gpuAvailable = await page.evaluate(() => {
-        return 'gpu' in navigator;
       });
-      if (!gpuAvailable) {
         console.error("\n❌ WebGPU is not available in this browser!");
         console.error("Make sure to use --enable-unsafe-webgpu flag for Chromium.");
         throw new Error("WebGPU not available");
       }
       console.log("✓ WebGPU is available");
     }
     // Use the exposed CLI function from main.ts
-    const result = await page.evaluate(({ modelId, task, mode, repeats, device, dtype }) => {
-      return (window as any).runBenchmarkCLI({ modelId, task, mode, repeats, device, dtype });
-    }, { modelId, task, mode, repeats, device, dtype });
     console.log("\n" + JSON.stringify(result, null, 2));

 const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
 const device = getArg("device", "webgpu") as "webgpu" | "wasm";
 const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
+const batchSize = Math.max(1, parseInt(getArg("batch-size", "1") || "1", 10));
 const browserType = getArg("browser", "chromium") as "chromium" | "firefox" | "webkit";
 const headed = getArg("headed") === "true";
 async function main() {
+  console.log(`Model      : ${modelId}`);
+  console.log(`Task       : ${task}`);
+  console.log(`Mode       : ${mode}`);
+  console.log(`Repeats    : ${repeats}`);
+  console.log(`Device     : ${device}`);
+  console.log(`DType      : ${dtype || 'auto'}`);
+  console.log(`Batch Size : ${batchSize}`);
+  console.log(`Browser    : ${browserType}`);
+  console.log(`Headed     : ${headed}`);
   // Start Vite dev server
   const server = await createServer({
     // Check WebGPU availability if using webgpu device
     if (device === "webgpu") {
+      const gpuInfo = await page.evaluate(async () => {
+        if (!('gpu' in navigator)) {
+          return { available: false, adapter: null, features: null };
+        }
+        try {
+          const adapter = await (navigator as any).gpu.requestAdapter();
+          if (!adapter) {
+            return { available: false, adapter: null, features: null };
+          }
+          const features = Array.from(adapter.features || []);
+          const limits = adapter.limits ? {
+            maxTextureDimension2D: adapter.limits.maxTextureDimension2D,
+            maxComputeWorkgroupSizeX: adapter.limits.maxComputeWorkgroupSizeX,
+          } : null;
+          return {
+            available: true,
+            adapterInfo: adapter.info ? adapter.info.description : 'Unknown',
+            features,
+            limits
+          };
+        } catch (e) {
+          return { available: false, adapter: null, error: String(e) };
+        }
       });
+      if (!gpuInfo.available) {
         console.error("\n❌ WebGPU is not available in this browser!");
         console.error("Make sure to use --enable-unsafe-webgpu flag for Chromium.");
+        if (gpuInfo.error) console.error("Error:", gpuInfo.error);
         throw new Error("WebGPU not available");
       }
       console.log("✓ WebGPU is available");
+      console.log(`  Adapter: ${gpuInfo.adapterInfo}`);
+      if (gpuInfo.features && gpuInfo.features.length > 0) {
+        console.log(`  Features: ${gpuInfo.features.slice(0, 3).join(', ')}${gpuInfo.features.length > 3 ? '...' : ''}`);
+      }
     }
     // Use the exposed CLI function from main.ts
+    const result = await page.evaluate(({ modelId, task, mode, repeats, device, dtype, batchSize }) => {
+      return (window as any).runBenchmarkCLI({ modelId, task, mode, repeats, device, dtype, batchSize });
+    }, { modelId, task, mode, repeats, device, dtype, batchSize });
     console.log("\n" + JSON.stringify(result, null, 2));

bench-web/src/main.ts CHANGED Viewed

@@ -36,21 +36,25 @@ async function clearCaches({ clearSession = false }: { clearSession?: boolean }
     if (clearSession) sessionStorage.clear();
   } catch { }
 }
-async function benchOnce(modelId: string, task: string, device: string, dtype?: string) {
   const t0 = now();
   const options: any = { device };
   if (dtype) options.dtype = dtype;
   const pipe = await pipeline(task, modelId, options);
   const t1 = now();
   const t2 = now();
-  await pipe("The quick brown fox jumps over the lazy dog.");
   const t3 = now();
   // Run additional inferences to measure subsequent performance
   const subsequentTimes: number[] = [];
   for (let i = 0; i < 3; i++) {
     const t4 = now();
-    await pipe("The quick brown fox jumps over the lazy dog.");
     const t5 = now();
     subsequentTimes.push(+(t5 - t4).toFixed(1));
   }
@@ -61,12 +65,12 @@ async function benchOnce(modelId: string, task: string, device: string, dtype?:
     subsequent_infer_ms: subsequentTimes
   };
 }
-async function runMany(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
   const loads: number[] = [];
   const firsts: number[] = [];
   const subsequents: number[] = [];
   for (let i = 0; i < repeats; i++) {
-    const r = await benchOnce(modelId, task, device, dtype);
     loads.push(r.load_ms);
     firsts.push(r.first_infer_ms);
     subsequents.push(...r.subsequent_infer_ms);
@@ -77,16 +81,17 @@ async function runMany(modelId: string, task: string, repeats: number, device: s
     subsequent_infer_ms: { p50: +percentile(subsequents, 0.5).toFixed(1), p90: +percentile(subsequents, 0.9).toFixed(1), raw: subsequents },
   };
 }
-async function runCold(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
   statusEl.textContent = "clearing caches (cold)...";
   await clearCaches();
   statusEl.textContent = "running (cold)...";
-  const metrics = await runMany(modelId, task, repeats, device, dtype);
   const result: any = {
     platform: "browser",
     runtime: navigator.userAgent,
     mode: "cold",
     repeats,
     model: modelId,
     task,
     device,
@@ -96,19 +101,21 @@ async function runCold(modelId: string, task: string, repeats: number, device: s
   if (dtype) result.dtype = dtype;
   return result;
 }
-async function runWarmDirect(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
   statusEl.textContent = "prefetching (warmup) ...";
   const options: any = { device };
   if (dtype) options.dtype = dtype;
   const p = await pipeline(task, modelId, options);
-  await p("warmup");
   statusEl.textContent = "running (warm)...";
-  const metrics = await runMany(modelId, task, repeats, device, dtype);
   const result: any = {
     platform: "browser",
     runtime: navigator.userAgent,
     mode: "warm",
     repeats,
     model: modelId,
     task,
     device,
@@ -117,20 +124,21 @@ async function runWarmDirect(modelId: string, task: string, repeats: number, dev
   if (dtype) result.dtype = dtype;
   return result;
 }
-async function runWarm(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
   const flag = sessionStorage.getItem("__warm_ready__");
   if (!flag) {
     statusEl.textContent = "prefetching (warmup) ...";
     const options: any = { device };
     if (dtype) options.dtype = dtype;
     const p = await pipeline(task, modelId, options);
-    await p("warmup");
-    sessionStorage.setItem("__warm_ready__", JSON.stringify({ modelId, task, repeats, device, dtype }));
     location.reload();
     return null;
   } else {
     sessionStorage.removeItem("__warm_ready__");
-    return await runWarmDirect(modelId, task, repeats, device, dtype);
   }
 }
 async function run() {
@@ -160,11 +168,12 @@ btn.addEventListener("click", () => {
 });
 // Expose for CLI use
-(window as any).runBenchmarkCLI = async function (params: { modelId: string, task: string, mode: string, repeats: number, device: string, dtype?: string }) {
   if (params.mode === "cold") {
-    return await runCold(params.modelId, params.task, params.repeats, params.device, params.dtype);
   } else {
     // For warm, use the direct function that skips reload logic
-    return await runWarmDirect(params.modelId, params.task, params.repeats, params.device, params.dtype);
   }
 };

     if (clearSession) sessionStorage.clear();
   } catch { }
 }
+async function benchOnce(modelId: string, task: string, device: string, dtype?: string, batchSize: number = 1) {
   const t0 = now();
   const options: any = { device };
   if (dtype) options.dtype = dtype;
   const pipe = await pipeline(task, modelId, options);
   const t1 = now();
+  // Prepare batch input
+  const inputs = Array(batchSize).fill("The quick brown fox jumps over the lazy dog.");
   const t2 = now();
+  await pipe(inputs);
   const t3 = now();
   // Run additional inferences to measure subsequent performance
   const subsequentTimes: number[] = [];
   for (let i = 0; i < 3; i++) {
     const t4 = now();
+    await pipe(inputs);
     const t5 = now();
     subsequentTimes.push(+(t5 - t4).toFixed(1));
   }
     subsequent_infer_ms: subsequentTimes
   };
 }
+async function runMany(modelId: string, task: string, repeats: number, device: string, dtype?: string, batchSize: number = 1) {
   const loads: number[] = [];
   const firsts: number[] = [];
   const subsequents: number[] = [];
   for (let i = 0; i < repeats; i++) {
+    const r = await benchOnce(modelId, task, device, dtype, batchSize);
     loads.push(r.load_ms);
     firsts.push(r.first_infer_ms);
     subsequents.push(...r.subsequent_infer_ms);
     subsequent_infer_ms: { p50: +percentile(subsequents, 0.5).toFixed(1), p90: +percentile(subsequents, 0.9).toFixed(1), raw: subsequents },
   };
 }
+async function runCold(modelId: string, task: string, repeats: number, device: string, dtype?: string, batchSize: number = 1) {
   statusEl.textContent = "clearing caches (cold)...";
   await clearCaches();
   statusEl.textContent = "running (cold)...";
+  const metrics = await runMany(modelId, task, repeats, device, dtype, batchSize);
   const result: any = {
     platform: "browser",
     runtime: navigator.userAgent,
     mode: "cold",
     repeats,
+    batchSize,
     model: modelId,
     task,
     device,
   if (dtype) result.dtype = dtype;
   return result;
 }
+async function runWarmDirect(modelId: string, task: string, repeats: number, device: string, dtype?: string, batchSize: number = 1) {
   statusEl.textContent = "prefetching (warmup) ...";
   const options: any = { device };
   if (dtype) options.dtype = dtype;
   const p = await pipeline(task, modelId, options);
+  const warmupInputs = Array(batchSize).fill("warmup");
+  await p(warmupInputs);
   statusEl.textContent = "running (warm)...";
+  const metrics = await runMany(modelId, task, repeats, device, dtype, batchSize);
   const result: any = {
     platform: "browser",
     runtime: navigator.userAgent,
     mode: "warm",
     repeats,
+    batchSize,
     model: modelId,
     task,
     device,
   if (dtype) result.dtype = dtype;
   return result;
 }
+async function runWarm(modelId: string, task: string, repeats: number, device: string, dtype?: string, batchSize: number = 1) {
   const flag = sessionStorage.getItem("__warm_ready__");
   if (!flag) {
     statusEl.textContent = "prefetching (warmup) ...";
     const options: any = { device };
     if (dtype) options.dtype = dtype;
     const p = await pipeline(task, modelId, options);
+    const warmupInputs = Array(batchSize).fill("warmup");
+    await p(warmupInputs);
+    sessionStorage.setItem("__warm_ready__", JSON.stringify({ modelId, task, repeats, device, dtype, batchSize }));
     location.reload();
     return null;
   } else {
     sessionStorage.removeItem("__warm_ready__");
+    return await runWarmDirect(modelId, task, repeats, device, dtype, batchSize);
   }
 }
 async function run() {
 });
 // Expose for CLI use
+(window as any).runBenchmarkCLI = async function (params: { modelId: string, task: string, mode: string, repeats: number, device: string, dtype?: string, batchSize?: number }) {
+  const batchSize = params.batchSize || 1;
   if (params.mode === "cold") {
+    return await runCold(params.modelId, params.task, params.repeats, params.device, params.dtype, batchSize);
   } else {
     // For warm, use the direct function that skips reload logic
+    return await runWarmDirect(params.modelId, params.task, params.repeats, params.device, params.dtype, batchSize);
   }
 };