victor HF Staff commited on
Commit
15a5288
·
verified ·
1 Parent(s): eb825b2

Upload folder using huggingface_hub

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ node_modules
2
+ .claude/
README.md CHANGED
@@ -1,12 +1,73 @@
1
- ---
2
- title: Inference Metrics
3
- emoji: 🏃
4
- colorFrom: gray
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.39.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLM Pricing
2
+
3
+ A tool to fetch and compare LLM pricing and capabilities across multiple providers.
4
+
5
+ ## Data Sources
6
+
7
+ This tool uses two primary data sources:
8
+ 1. **HuggingFace Router API** (https://router.huggingface.co/v1/models) - Primary source for model pricing, context length, and capability flags
9
+ 2. **Provider-specific APIs** - Fallback source for additional metadata and capabilities
10
+
11
+ The HuggingFace Router API now provides comprehensive data including:
12
+ - Pricing (input/output costs per million tokens)
13
+ - Context length
14
+ - supports_tools flag
15
+ - supports_structured_output flag
16
+ - Provider status
17
+
18
+ When data is available from both sources, the HuggingFace Router data takes priority.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ bun install
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ```bash
29
+ # Fetch all models and enrich with provider data
30
+ bun run get-metrics.ts
31
+
32
+ # Skip specific providers
33
+ bun run get-metrics.ts --skip-providers novita featherless
34
+
35
+ # Test performance for models (requires HF_TOKEN)
36
+ HF_TOKEN=your_token bun run get-metrics.ts --test-performance
37
+
38
+ # Test specific number of models
39
+ HF_TOKEN=your_token bun run get-metrics.ts --test-performance --test-limit 10
40
+ ```
41
+
42
+ ## Supported Providers
43
+
44
+ - **novita** - Full API support
45
+ - **sambanova** - Full API support
46
+ - **groq** - Full API support
47
+ - **featherless** - Full API support
48
+ - **together** - Full API support
49
+ - **cohere** - Full API support
50
+ - **fireworks** - Full API support
51
+ - **nebius** - HF Router data only
52
+ - **hyperbolic** - HF Router data only
53
+ - **cerebras** - HF Router data only
54
+ - **nscale** - HF Router data only
55
+
56
+ ## Output Files
57
+
58
+ - `enriched_models.json` - Complete enriched model data
59
+ - `provider_models_raw.json` - Raw provider API responses for debugging
60
+
61
+ ## Environment Variables
62
+
63
+ Optional API keys for fetching provider-specific data:
64
+ - `NOVITA_API_KEY`
65
+ - `SAMBANOVA_API_KEY`
66
+ - `GROQ_API_KEY`
67
+ - `FEATHERLESS_API_KEY`
68
+ - `TOGETHER_API_KEY`
69
+ - `COHERE_API_KEY`
70
+ - `FIREWORKS_API_KEY`
71
+ - `HF_TOKEN` - Required for performance testing
72
+
73
+ This project was created using `bun init` in bun v1.2.4. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime.
bun.lock ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lockfileVersion": 1,
3
+ "workspaces": {
4
+ "": {
5
+ "name": "llm-pricing",
6
+ "devDependencies": {
7
+ "@types/bun": "latest",
8
+ },
9
+ "peerDependencies": {
10
+ "typescript": "^5",
11
+ },
12
+ },
13
+ },
14
+ "packages": {
15
+ "@types/bun": ["@types/[email protected]", "", { "dependencies": { "bun-types": "1.2.19" } }, "sha512-d9ZCmrH3CJ2uYKXQIUuZ/pUnTqIvLDS0SK7pFmbx8ma+ziH/FRMoAq5bYpRG7y+w1gl+HgyNZbtqgMq4W4e2Lg=="],
16
+
17
+ "@types/node": ["@types/[email protected]", "", { "dependencies": { "undici-types": "~7.8.0" } }, "sha512-ut5FthK5moxFKH2T1CUOC6ctR67rQRvvHdFLCD2Ql6KXmMuCrjsSsRI9UsLCm9M18BMwClv4pn327UvB7eeO1w=="],
18
+
19
+ "@types/react": ["@types/[email protected]", "", { "dependencies": { "csstype": "^3.0.2" } }, "sha512-WmdoynAX8Stew/36uTSVMcLJJ1KRh6L3IZRx1PZ7qJtBqT3dYTgyDTx8H1qoRghErydW7xw9mSJ3wS//tCRpFA=="],
20
+
21
+ "bun-types": ["[email protected]", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-uAOTaZSPuYsWIXRpj7o56Let0g/wjihKCkeRqUBhlLVM/Bt+Fj9xTo+LhC1OV1XDaGkz4hNC80et5xgy+9KTHQ=="],
22
+
23
+ "csstype": ["[email protected]", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
24
+
25
+ "typescript": ["[email protected]", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ=="],
26
+
27
+ "undici-types": ["[email protected]", "", {}, "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw=="],
28
+ }
29
+ }
enriched_models_enhanced.json ADDED
The diff for this file is too large to render. See raw diff
 
get-metrics.ts ADDED
@@ -0,0 +1,611 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // get-metrics-new.ts - Updated version using direct provider APIs
2
+ import * as fs from "node:fs";
3
+ import { parseArgs } from "util";
4
+ import { ProviderAggregator } from "./providers";
5
+ import type { ProviderEntry } from "./providers";
6
+ import { extractHFRouterData } from "./providers/huggingface-router";
7
+
8
+ /* -------------------------------------------------------------------------- */
9
+ /* CONSTANTS */
10
+ /* -------------------------------------------------------------------------- */
11
+
12
+ const HUGGINGFACE_API = "https://router.huggingface.co/v1/models";
13
+ const HUGGINGFACE_ROUTER_API =
14
+ "https://router.huggingface.co/v1/chat/completions";
15
+
16
+ /* -------------------------------------------------------------------------- */
17
+ /* TYPE DEFINITIONS */
18
+ /* -------------------------------------------------------------------------- */
19
+
20
+ interface HFModel {
21
+ id: string;
22
+ [key: string]: any;
23
+ providers?: ProviderEntry[];
24
+ }
25
+
26
+ interface Statistics {
27
+ total_models: number;
28
+ models_enriched: number;
29
+ providers_enriched: number;
30
+ new_capabilities_added: number;
31
+ providers_fetched: Record<string, number>;
32
+ }
33
+
34
+ interface PerformanceTestResult {
35
+ total_tested: number;
36
+ successful: number;
37
+ errors: number;
38
+ status_distribution: Record<string, number>;
39
+ }
40
+
41
+ /* -------------------------------------------------------------------------- */
42
+ /* FETCH HELPERS */
43
+ /* -------------------------------------------------------------------------- */
44
+
45
+ async function fetchHuggingfaceModels(): Promise<HFModel[]> {
46
+ const resp = await fetch(HUGGINGFACE_API).then(
47
+ (r) => r.json() as Promise<{ data: HFModel[] }>
48
+ );
49
+ return resp.data;
50
+ }
51
+
52
+ /* -------------------------------------------------------------------------- */
53
+ /* PROVIDER ENRICHMENT */
54
+ /* -------------------------------------------------------------------------- */
55
+
56
+ function normalizeModelId(modelId: string): string {
57
+ // Convert HF model ID to a normalized form for matching
58
+ // Remove organization prefix for common patterns
59
+ const patterns = [
60
+ /^meta-llama\/Meta-Llama-(.+)$/,
61
+ /^meta-llama\/Llama-(.+)$/,
62
+ /^mistralai\/(.+)$/,
63
+ /^google\/(.+)$/,
64
+ /^anthropic\/(.+)$/,
65
+ ];
66
+
67
+ for (const pattern of patterns) {
68
+ const match = modelId.match(pattern);
69
+ if (match) {
70
+ return match[1].toLowerCase();
71
+ }
72
+ }
73
+
74
+ // For other models, just use the part after the last slash
75
+ const parts = modelId.split("/");
76
+ return parts[parts.length - 1].toLowerCase();
77
+ }
78
+
79
+ function matchProviderModel(
80
+ hfModelId: string,
81
+ providerEntries: Map<string, ProviderEntry[]>
82
+ ): Map<string, ProviderEntry[]> {
83
+ const normalizedHfId = normalizeModelId(hfModelId);
84
+ const matches = new Map<string, ProviderEntry[]>();
85
+
86
+ for (const [provider, entries] of providerEntries) {
87
+ const matchingEntries = entries.filter((entry) => {
88
+ // This would need to be enhanced with provider-specific matching logic
89
+ // For now, we'll use simple substring matching
90
+ const entryId = (entry as any).id || (entry as any).model_id || "";
91
+ const normalizedEntryId = normalizeModelId(entryId);
92
+
93
+ return (
94
+ normalizedEntryId.includes(normalizedHfId) ||
95
+ normalizedHfId.includes(normalizedEntryId)
96
+ );
97
+ });
98
+
99
+ if (matchingEntries.length > 0) {
100
+ matches.set(provider, matchingEntries);
101
+ }
102
+ }
103
+
104
+ return matches;
105
+ }
106
+
107
+ async function enrichHuggingfaceModels(
108
+ hfModels: HFModel[],
109
+ aggregator: ProviderAggregator
110
+ ): Promise<{
111
+ enriched: HFModel[];
112
+ stats: Statistics;
113
+ matchedProviderData: any[];
114
+ }> {
115
+ console.log("\nFetching data from all providers...");
116
+ const providerData = await aggregator.fetchAllProviders();
117
+
118
+ const stats: Statistics = {
119
+ total_models: hfModels.length,
120
+ models_enriched: 0,
121
+ providers_enriched: 0,
122
+ new_capabilities_added: 0,
123
+ providers_fetched: {},
124
+ };
125
+
126
+ // Count models per provider
127
+ for (const [provider, entries] of providerData) {
128
+ stats.providers_fetched[provider] = entries.length;
129
+ }
130
+
131
+ const enrichedModels: HFModel[] = [];
132
+ const matchedProviderData: any[] = [];
133
+ const matchedProviderKeys = new Set<string>(); // Track unique model-provider combinations
134
+
135
+ console.log(
136
+ `\nProcessing ${hfModels.length} models from HuggingFace Router API...`
137
+ );
138
+
139
+ for (const hfModel of hfModels) {
140
+ const enrichedModel = structuredClone(hfModel);
141
+
142
+ // Extract HF router data first (this is already in the model)
143
+ const hfRouterData = extractHFRouterData(enrichedModel);
144
+
145
+ // Find matches from provider APIs
146
+ const matches = matchProviderModel(hfModel.id, providerData);
147
+
148
+ // Ensure providers array exists
149
+ if (!enrichedModel.providers) {
150
+ enrichedModel.providers = [];
151
+ }
152
+
153
+ let modelEnriched = false;
154
+
155
+ // Process HF router data first (prioritize it)
156
+ for (const [providerName, hfProviderData] of hfRouterData) {
157
+ const normalizedProvider = normalizeProviderName(providerName);
158
+
159
+ // Check if provider already exists in the model
160
+ let existingProvider = enrichedModel.providers.find(
161
+ (p) => normalizeProviderName(p.provider) === normalizedProvider
162
+ );
163
+
164
+ if (existingProvider) {
165
+ // HF router data is already there, just count it
166
+ if (hfProviderData.pricing) {
167
+ stats.providers_enriched++;
168
+ modelEnriched = true;
169
+ }
170
+ // Track this provider data as matched (avoid duplicates)
171
+ const matchKey = `${hfModel.id}:${providerName}`;
172
+ if (!matchedProviderKeys.has(matchKey)) {
173
+ matchedProviderKeys.add(matchKey);
174
+ matchedProviderData.push({
175
+ ...hfProviderData,
176
+ provider: providerName,
177
+ id: hfModel.id,
178
+ });
179
+ }
180
+ }
181
+ }
182
+
183
+ // Then enrich with provider API data where missing
184
+ if (matches.size > 0) {
185
+ for (const [provider, providerEntries] of matches) {
186
+ for (const providerEntry of providerEntries) {
187
+ // Find existing provider entry
188
+ let existingProvider = enrichedModel.providers.find(
189
+ (p) => normalizeProviderName(p.provider) === provider.toLowerCase()
190
+ );
191
+
192
+ if (!existingProvider) {
193
+ // No HF router data for this provider
194
+ // Skip - we only want providers that are listed in HF Router
195
+ continue;
196
+ } else {
197
+ // Merge data, but prioritize HF router data
198
+ const hadPricing = !!existingProvider.pricing;
199
+ const hadTools = existingProvider.supports_tools !== undefined;
200
+ const hadStructured =
201
+ existingProvider.supports_structured_output !== undefined;
202
+ const hadContext = !!existingProvider.context_length;
203
+
204
+ // Only add provider API data for missing fields
205
+ const mergedData: any = {};
206
+
207
+ // Add provider API data only if HF router doesn't have it
208
+ if (!hadPricing && providerEntry.pricing) {
209
+ mergedData.pricing = providerEntry.pricing;
210
+ stats.providers_enriched++;
211
+ modelEnriched = true;
212
+ }
213
+
214
+ if (!hadContext && providerEntry.context_length) {
215
+ mergedData.context_length = providerEntry.context_length;
216
+ }
217
+
218
+ if (!hadTools && providerEntry.supports_tools !== undefined) {
219
+ mergedData.supports_tools = providerEntry.supports_tools;
220
+ }
221
+
222
+ if (
223
+ !hadStructured &&
224
+ providerEntry.supports_structured_output !== undefined
225
+ ) {
226
+ mergedData.supports_structured_output =
227
+ providerEntry.supports_structured_output;
228
+ }
229
+
230
+ // Add other capabilities from provider API
231
+ for (const key of Object.keys(providerEntry)) {
232
+ if (
233
+ key.startsWith("supports_") &&
234
+ !["supports_tools", "supports_structured_output"].includes(
235
+ key
236
+ ) &&
237
+ !(key in existingProvider)
238
+ ) {
239
+ mergedData[key] = (providerEntry as any)[key];
240
+ stats.new_capabilities_added++;
241
+ }
242
+ }
243
+
244
+ // Apply merged data
245
+ Object.assign(existingProvider, mergedData);
246
+
247
+ // Track the enriched data (avoid duplicates)
248
+ const matchKey = `${hfModel.id}:${provider}`;
249
+ if (!matchedProviderKeys.has(matchKey)) {
250
+ matchedProviderKeys.add(matchKey);
251
+ matchedProviderData.push({
252
+ ...existingProvider,
253
+ provider,
254
+ id: hfModel.id,
255
+ });
256
+ }
257
+ }
258
+ }
259
+ }
260
+ }
261
+
262
+ if (modelEnriched) {
263
+ stats.models_enriched++;
264
+ }
265
+
266
+ enrichedModels.push(enrichedModel);
267
+ }
268
+
269
+ // Log models from provider APIs that weren't matched
270
+ let unmatchedCount = 0;
271
+ for (const [provider, entries] of providerData) {
272
+ for (const entry of entries) {
273
+ const modelId = (entry as any).model_id || (entry as any).id || "";
274
+ if (modelId) {
275
+ const matchKey = `${modelId}:${provider}`;
276
+ if (!matchedProviderKeys.has(matchKey)) {
277
+ unmatchedCount++;
278
+ }
279
+ }
280
+ }
281
+ }
282
+
283
+ if (unmatchedCount > 0) {
284
+ console.log(
285
+ `\nNote: ${unmatchedCount} models from provider APIs were not included (not in HF Router).`
286
+ );
287
+ }
288
+
289
+ return { enriched: enrichedModels, stats, matchedProviderData };
290
+ }
291
+
292
+ // Helper function to normalize provider names for comparison
293
+ function normalizeProviderName(providerName: string): string {
294
+ const providerMap: Record<string, string> = {
295
+ "featherless-ai": "featherless",
296
+ "fireworks-ai": "fireworks",
297
+ "hf-inference": "huggingface",
298
+ };
299
+
300
+ return (providerMap[providerName] || providerName).toLowerCase();
301
+ }
302
+
303
+ /* -------------------------------------------------------------------------- */
304
+ /* PERFORMANCE TESTING */
305
+ /* -------------------------------------------------------------------------- */
306
+
307
+ async function testModelProvider(
308
+ modelId: string,
309
+ providerName: string,
310
+ hfToken: string
311
+ ): Promise<Partial<ProviderEntry>> {
312
+ const nonce = crypto.randomUUID().slice(0, 8);
313
+ const prompt = `What is the capital of France?\n<!-- nonce:${nonce} -->`;
314
+
315
+ const payload = {
316
+ model: `${modelId}:${providerName}`,
317
+ messages: [{ role: "user", content: prompt }],
318
+ stream: false,
319
+ temperature: 0.7,
320
+ };
321
+
322
+ const headers = {
323
+ Authorization: `Bearer ${hfToken}`,
324
+ "Content-Type": "application/json",
325
+ };
326
+
327
+ const start = performance.now();
328
+ try {
329
+ const controller = new AbortController();
330
+ const timeoutId = setTimeout(() => controller.abort(), 30_000);
331
+
332
+ const resp = await fetch(HUGGINGFACE_ROUTER_API, {
333
+ method: "POST",
334
+ headers,
335
+ body: JSON.stringify(payload),
336
+ signal: controller.signal,
337
+ });
338
+
339
+ clearTimeout(timeoutId);
340
+ const latency = (performance.now() - start) / 1000;
341
+
342
+ if (resp.ok) {
343
+ const data = await resp.json();
344
+ const usage = data.usage ?? {};
345
+ const totalTokens =
346
+ usage.total_tokens ??
347
+ (usage.prompt_tokens ?? 0) + (usage.completion_tokens ?? 0);
348
+ const tps = totalTokens ? totalTokens / latency : 0;
349
+
350
+ return {
351
+ latency_s: Number(latency.toFixed(2)),
352
+ throughput_tps: Number(tps.toFixed(2)),
353
+ status: "live",
354
+ };
355
+ }
356
+
357
+ const data = await resp.json().catch(() => ({}));
358
+ const msg =
359
+ data?.error?.message ?? `HTTP ${resp.status} ${resp.statusText}`;
360
+ return { performance_error: msg, status: "offline" };
361
+ } catch (err: any) {
362
+ const msg = err.name === "AbortError" ? "Request timeout" : err.message;
363
+ return { performance_error: msg, status: "offline" };
364
+ }
365
+ }
366
+
367
+ async function testProvidersBatch(
368
+ triplets: [string, string, ProviderEntry][],
369
+ hfToken: string
370
+ ): Promise<void> {
371
+ await Promise.all(
372
+ triplets.map(async ([modelId, providerName, prov]) => {
373
+ const res = await testModelProvider(modelId, providerName, hfToken);
374
+ Object.assign(prov, res, {
375
+ performance_tested_at: new Date().toISOString(),
376
+ });
377
+ })
378
+ );
379
+ }
380
+
381
+ async function testAllProviders(
382
+ models: HFModel[],
383
+ hfToken: string,
384
+ limit: number | undefined,
385
+ batchSize: number,
386
+ filter: string[] | undefined
387
+ ): Promise<PerformanceTestResult> {
388
+ const subset = typeof limit === "number" ? models.slice(0, limit) : models;
389
+
390
+ const allPairs: [string, string, ProviderEntry][] = [];
391
+ for (const m of subset) {
392
+ for (const p of m.providers ?? []) {
393
+ if (filter && !filter.includes(p.provider)) continue;
394
+ allPairs.push([m.id, p.provider, p]);
395
+ }
396
+ }
397
+
398
+ console.log(
399
+ `\nTesting performance for ${allPairs.length} model-provider combinations...`
400
+ );
401
+
402
+ let tested = 0;
403
+ let errors = 0;
404
+ const statusDist: Record<string, number> = {
405
+ live: 0,
406
+ offline: 0,
407
+ not_tested: 0,
408
+ };
409
+
410
+ for (let i = 0; i < allPairs.length; i += batchSize) {
411
+ const batch = allPairs.slice(i, i + batchSize);
412
+ console.log(
413
+ `Testing batch ${i / batchSize + 1}/${Math.ceil(
414
+ allPairs.length / batchSize
415
+ )}...`
416
+ );
417
+ await testProvidersBatch(batch, hfToken);
418
+
419
+ batch.forEach(([_, __, prov]) => {
420
+ tested += 1;
421
+ if (prov.performance_error) errors += 1;
422
+ switch (prov.status) {
423
+ case "live":
424
+ statusDist.live += 1;
425
+ break;
426
+ case "offline":
427
+ statusDist.offline += 1;
428
+ break;
429
+ default:
430
+ statusDist.not_tested += 1;
431
+ }
432
+ });
433
+
434
+ if (i + batchSize < allPairs.length) {
435
+ await new Promise((resolve) => setTimeout(resolve, 1000));
436
+ }
437
+ }
438
+
439
+ return {
440
+ total_tested: tested,
441
+ successful: tested - errors,
442
+ errors,
443
+ status_distribution: statusDist,
444
+ };
445
+ }
446
+
447
+ /* -------------------------------------------------------------------------- */
448
+ /* PRINT HELPERS */
449
+ /* -------------------------------------------------------------------------- */
450
+
451
+ function printStatistics(s: Statistics): void {
452
+ console.log("\n" + "=".repeat(60));
453
+ console.log("ENRICHMENT STATISTICS");
454
+ console.log("=".repeat(60));
455
+ console.log(`Total models processed: ${s.total_models}`);
456
+ console.log(`Models enriched with pricing: ${s.models_enriched}`);
457
+ console.log(`Provider entries enriched: ${s.providers_enriched}`);
458
+ console.log(`New capability fields added: ${s.new_capabilities_added}`);
459
+
460
+ console.log("\nProvider data fetched:");
461
+ Object.entries(s.providers_fetched)
462
+ .sort(([a], [b]) => a.localeCompare(b))
463
+ .forEach(([provider, count]) => {
464
+ console.log(` ${provider}: ${count} models`);
465
+ });
466
+ }
467
+
468
+ /* -------------------------------------------------------------------------- */
469
+ /* CLI PARSER */
470
+ /* -------------------------------------------------------------------------- */
471
+
472
+ const { values: opts } = parseArgs({
473
+ args: Bun.argv.slice(2),
474
+ options: {
475
+ "test-performance": { type: "boolean" },
476
+ "test-limit": { type: "string" },
477
+ "test-providers": { type: "string", multiple: true },
478
+ "batch-size": { type: "string" },
479
+ providers: { type: "string", multiple: true },
480
+ "skip-providers": { type: "string", multiple: true },
481
+ },
482
+ strict: false,
483
+ });
484
+
485
+ const testLimit =
486
+ opts["test-limit"] && typeof opts["test-limit"] === "string"
487
+ ? parseInt(opts["test-limit"], 10)
488
+ : undefined;
489
+ const batchSize =
490
+ opts["batch-size"] && typeof opts["batch-size"] === "string"
491
+ ? parseInt(opts["batch-size"], 10)
492
+ : 20;
493
+
494
+ /* -------------------------------------------------------------------------- */
495
+ /* MAIN */
496
+ /* -------------------------------------------------------------------------- */
497
+
498
+ (async () => {
499
+ console.log("Fetching HuggingFace models...");
500
+ const hfModels = await fetchHuggingfaceModels();
501
+ console.log(`Found ${hfModels.length} HuggingFace models.`);
502
+
503
+ // Configure provider aggregator
504
+ const apiKeys: Record<string, string> = {};
505
+
506
+ // Only add API keys that are defined
507
+ if (process.env.NOVITA_API_KEY) apiKeys.novita = process.env.NOVITA_API_KEY;
508
+ if (process.env.SAMBANOVA_API_KEY) apiKeys.sambanova = process.env.SAMBANOVA_API_KEY;
509
+ if (process.env.GROQ_API_KEY) apiKeys.groq = process.env.GROQ_API_KEY;
510
+ if (process.env.FEATHERLESS_API_KEY) apiKeys.featherless = process.env.FEATHERLESS_API_KEY;
511
+ if (process.env.TOGETHER_API_KEY) apiKeys.together = process.env.TOGETHER_API_KEY;
512
+ if (process.env.COHERE_API_KEY) apiKeys.cohere = process.env.COHERE_API_KEY;
513
+ if (process.env.FIREWORKS_API_KEY) apiKeys.fireworks = process.env.FIREWORKS_API_KEY;
514
+ if (process.env.NEBIUS_API_KEY) apiKeys.nebius = process.env.NEBIUS_API_KEY;
515
+ if (process.env.HYPERBOLIC_API_KEY) apiKeys.hyperbolic = process.env.HYPERBOLIC_API_KEY;
516
+ if (process.env.CEREBRAS_API_KEY) apiKeys.cerebras = process.env.CEREBRAS_API_KEY;
517
+ if (process.env.NSCALE_API_KEY) apiKeys.nscale = process.env.NSCALE_API_KEY;
518
+
519
+ const config = {
520
+ providers: opts["providers"] as string[] | undefined,
521
+ apiKeys,
522
+ };
523
+
524
+ // Remove skip-providers if specified
525
+ if (opts["skip-providers"]) {
526
+ const skipProviders = opts["skip-providers"] as string[];
527
+ if (!config.providers) {
528
+ config.providers = [
529
+ "novita",
530
+ "sambanova",
531
+ "groq",
532
+ "featherless",
533
+ "together",
534
+ "cohere",
535
+ "fireworks",
536
+ "nebius",
537
+ "hyperbolic",
538
+ "cerebras",
539
+ "nscale",
540
+ ].filter((p) => !skipProviders.includes(p));
541
+ }
542
+ }
543
+
544
+ const aggregator = new ProviderAggregator(config);
545
+
546
+ console.log("\nEnriching HuggingFace models with provider data...");
547
+ const { enriched, stats, matchedProviderData } =
548
+ await enrichHuggingfaceModels(hfModels, aggregator);
549
+
550
+ // Optional performance tests
551
+ if (opts["test-performance"]) {
552
+ const hfToken = process.env.HF_TOKEN;
553
+ if (!hfToken) {
554
+ console.error(
555
+ "ERROR: HF_TOKEN environment variable not set. Skipping performance tests."
556
+ );
557
+ } else {
558
+ console.log("\n" + "=".repeat(60));
559
+ console.log("PERFORMANCE TESTING");
560
+ console.log("=".repeat(60));
561
+ const perfStats = await testAllProviders(
562
+ enriched,
563
+ hfToken,
564
+ testLimit,
565
+ batchSize,
566
+ opts["test-providers"] as string[] | undefined
567
+ );
568
+ console.log("\nPerformance testing complete:");
569
+ console.log(` Total tested: ${perfStats.total_tested}`);
570
+ console.log(` Successful: ${perfStats.successful}`);
571
+ console.log(` Errors: ${perfStats.errors}`);
572
+ console.log("\nProvider status distribution:");
573
+ Object.entries(perfStats.status_distribution)
574
+ .sort()
575
+ .forEach(([k, v]) => console.log(` ${k}: ${v}`));
576
+ }
577
+ }
578
+
579
+ // Save enriched data
580
+ const outFile = "enriched_models_enhanced.json";
581
+ fs.writeFileSync(
582
+ outFile,
583
+ JSON.stringify(
584
+ {
585
+ data: enriched,
586
+ generated_at: new Date().toISOString(),
587
+ metadata: {
588
+ total_models: enriched.length,
589
+ models_enriched: stats.models_enriched,
590
+ providers_enriched: stats.providers_enriched,
591
+ performance_tested: !!opts["test-performance"],
592
+ providers_fetched: stats.providers_fetched,
593
+ },
594
+ },
595
+ null,
596
+ 2
597
+ )
598
+ );
599
+ console.log(`\nEnriched data saved → ${outFile}`);
600
+
601
+ // Save only matched provider data (models that exist in HF Router)
602
+ fs.writeFileSync(
603
+ "provider_models_raw.json",
604
+ JSON.stringify({ data: matchedProviderData }, null, 2)
605
+ );
606
+ console.log(
607
+ `Matched provider models saved → provider_models_raw.json (${matchedProviderData.length} entries)`
608
+ );
609
+
610
+ printStatistics(stats);
611
+ })();
index.html ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>HuggingFace Models - Enriched</title>
5
+ <meta charset="UTF-8">
6
+ <style>
7
+ body {
8
+ font-family: monospace;
9
+ margin: 20px;
10
+ }
11
+ input {
12
+ font-family: monospace;
13
+ border: 1px solid #000;
14
+ padding: 4px 8px;
15
+ width: 300px;
16
+ }
17
+ table {
18
+ border-collapse: collapse;
19
+ width: 100%;
20
+ }
21
+ thead {
22
+ position: sticky;
23
+ top: 0;
24
+ z-index: 10;
25
+ }
26
+ th, td {
27
+ border: 1px solid #000;
28
+ padding: 4px 8px;
29
+ text-align: left;
30
+ }
31
+ tr.model-group-start td {
32
+ border-top: 2px solid #000;
33
+ }
34
+ th {
35
+ background: #f0f0f0;
36
+ font-weight: bold;
37
+ cursor: pointer;
38
+ user-select: none;
39
+ position: relative;
40
+ }
41
+ th:hover {
42
+ background: #e0e0e0;
43
+ }
44
+ th::after {
45
+ content: ' ↕';
46
+ color: #999;
47
+ font-size: 0.8em;
48
+ }
49
+ th.sort-asc::after {
50
+ content: ' ↑';
51
+ color: #333;
52
+ }
53
+ th.sort-desc::after {
54
+ content: ' ↓';
55
+ color: #333;
56
+ }
57
+ tr:hover {
58
+ background: #f9f9f9;
59
+ }
60
+ .hidden {
61
+ display: none;
62
+ }
63
+ .highlighted {
64
+ background: #fffacd !important;
65
+ }
66
+ .best-value {
67
+ color: #008000;
68
+ font-weight: bold;
69
+ }
70
+ .header-container {
71
+ display: flex;
72
+ justify-content: space-between;
73
+ align-items: center;
74
+ gap: 20px;
75
+ margin-bottom: 10px;
76
+ }
77
+ .generation-date {
78
+ color: #666;
79
+ font-size: 0.9em;
80
+ }
81
+ </style>
82
+ </head>
83
+ <body>
84
+ <div class="header-container">
85
+ <input type="search" id="filterInput" placeholder="Filter by model or provider...">
86
+ <span class="generation-date" id="generationDate"></span>
87
+ </div>
88
+ <table id="modelsTable">
89
+ <thead>
90
+ <tr>
91
+ <th>Model</th>
92
+ <th>Provider</th>
93
+ <th>Status</th>
94
+ <th>Uptime %</th>
95
+ <th>Input $/1M</th>
96
+ <th>Output $/1M</th>
97
+ <th>Context</th>
98
+ <th>Quant</th>
99
+ <th>Latency (s)</th>
100
+ <th>Throughput (t/s)</th>
101
+ <th>Tools</th>
102
+ <th>Structured</th>
103
+ </tr>
104
+ </thead>
105
+ <tbody id="tableBody">
106
+ <tr><td colspan="12">Loading...</td></tr>
107
+ </tbody>
108
+ </table>
109
+
110
+ <script>
111
+ // Get query parameters
112
+ const urlParams = new URLSearchParams(window.location.search);
113
+ const highlightModelId = urlParams.get('model');
114
+
115
+ fetch('enriched_models_enhanced.json')
116
+ .then(response => response.json())
117
+ .then(data => {
118
+ // Display generation date
119
+ if (data.generated_at) {
120
+ const date = new Date(data.generated_at);
121
+ const dateStr = date.toLocaleString('en-US', {
122
+ year: 'numeric',
123
+ month: 'short',
124
+ day: 'numeric',
125
+ hour: '2-digit',
126
+ minute: '2-digit',
127
+ timeZoneName: 'short'
128
+ });
129
+ document.getElementById('generationDate').textContent = `Last update: ${dateStr}`;
130
+ }
131
+
132
+ const tbody = document.getElementById('tableBody');
133
+ tbody.innerHTML = '';
134
+ let firstHighlightedRow = null;
135
+
136
+ // Handle both old format (direct array) and new format (with metadata)
137
+ const models = Array.isArray(data) ? data : data.data;
138
+ models.forEach((model, modelIndex) => {
139
+ if (model.providers) {
140
+ model.providers.forEach((provider, providerIndex) => {
141
+ const row = document.createElement('tr');
142
+
143
+ // Add class for first provider of each model to create visual separation
144
+ if (providerIndex === 0 && modelIndex > 0) {
145
+ row.classList.add('model-group-start');
146
+ }
147
+
148
+ // Highlight if model matches query parameter
149
+ if (highlightModelId && model.id === highlightModelId) {
150
+ row.classList.add('highlighted');
151
+ if (!firstHighlightedRow) {
152
+ firstHighlightedRow = row;
153
+ }
154
+ }
155
+
156
+ row.innerHTML = `
157
+ <td>${model.id}</td>
158
+ <td>${provider.provider}</td>
159
+ <td>${provider.endpoint_status_name || provider.status || '-'}</td>
160
+ <td>${provider.uptime_30d !== undefined ? provider.uptime_30d : '-'}</td>
161
+ <td>${provider.pricing?.input !== undefined ? provider.pricing.input : '-'}</td>
162
+ <td>${provider.pricing?.output !== undefined ? provider.pricing.output : '-'}</td>
163
+ <td>${provider.context_length || '-'}</td>
164
+ <td>${provider.quantization || '-'}</td>
165
+ <td>${provider.latency_s !== undefined ? provider.latency_s : '-'}</td>
166
+ <td>${provider.throughput_tps !== undefined ? provider.throughput_tps : '-'}</td>
167
+ <td>${provider.supports_tools ? 'Yes' : 'No'}</td>
168
+ <td>${provider.supports_structured_output ? 'Yes' : 'No'}</td>
169
+ `;
170
+
171
+ tbody.appendChild(row);
172
+ });
173
+ }
174
+ });
175
+
176
+ // Store original data for sorting
177
+ window.tableData = models;
178
+
179
+ // Function to find and mark best values
180
+ function markBestValues() {
181
+ const rows = Array.from(tbody.getElementsByTagName('tr'));
182
+ const highlightedRows = rows.filter(row => row.classList.contains('highlighted'));
183
+
184
+ if (highlightedRows.length === 0) return;
185
+
186
+ // Define which columns need min vs max for best value
187
+ const columnConfig = {
188
+ 4: 'min', // Input $/1M - lower is better
189
+ 5: 'min', // Output $/1M - lower is better
190
+ 6: 'max', // Context - higher is better
191
+ 8: 'min', // Latency - lower is better
192
+ 9: 'max', // Throughput - higher is better
193
+ 3: 'max' // Uptime % - higher is better
194
+ };
195
+
196
+ // For each configured column, find the best value among highlighted rows
197
+ Object.entries(columnConfig).forEach(([colIndex, type]) => {
198
+ const values = highlightedRows
199
+ .map(row => {
200
+ const cellText = row.cells[colIndex].textContent.trim();
201
+ const value = cellText === '-' ? null : parseFloat(cellText);
202
+ return { row, value, cell: row.cells[colIndex] };
203
+ })
204
+ .filter(item => item.value !== null && !isNaN(item.value));
205
+
206
+ if (values.length === 0) return;
207
+
208
+ // Find best value
209
+ let bestValue;
210
+ if (type === 'min') {
211
+ bestValue = Math.min(...values.map(v => v.value));
212
+ } else {
213
+ bestValue = Math.max(...values.map(v => v.value));
214
+ }
215
+
216
+ // Mark cells with best value
217
+ values.forEach(item => {
218
+ if (item.value === bestValue) {
219
+ item.cell.classList.add('best-value');
220
+ }
221
+ });
222
+ });
223
+ }
224
+
225
+ // Call markBestValues if model is highlighted
226
+ if (highlightModelId) {
227
+ markBestValues();
228
+ }
229
+
230
+ // Scroll to highlighted model if present
231
+ if (firstHighlightedRow) {
232
+ setTimeout(() => {
233
+ firstHighlightedRow.scrollIntoView({ behavior: 'smooth', block: 'center' });
234
+ }, 100);
235
+ }
236
+
237
+ // Filter functionality
238
+ document.getElementById('filterInput').addEventListener('input', function(e) {
239
+ const filter = e.target.value.toLowerCase();
240
+ const rows = tbody.getElementsByTagName('tr');
241
+
242
+ for (let row of rows) {
243
+ const modelText = row.cells[0].textContent.toLowerCase();
244
+ const providerText = row.cells[1].textContent.toLowerCase();
245
+
246
+ if (modelText.includes(filter) || providerText.includes(filter)) {
247
+ row.classList.remove('hidden');
248
+ } else {
249
+ row.classList.add('hidden');
250
+ }
251
+ }
252
+ });
253
+
254
+ // Sorting functionality
255
+ let sortColumn = -1;
256
+ let sortDirection = 'asc';
257
+
258
+ const headers = document.querySelectorAll('th');
259
+ headers.forEach((header, index) => {
260
+ header.addEventListener('click', () => {
261
+ // Remove sort classes from all headers
262
+ headers.forEach(h => {
263
+ h.classList.remove('sort-asc', 'sort-desc');
264
+ });
265
+
266
+ // Determine sort direction
267
+ if (sortColumn === index) {
268
+ sortDirection = sortDirection === 'asc' ? 'desc' : 'asc';
269
+ } else {
270
+ sortColumn = index;
271
+ sortDirection = 'asc';
272
+ }
273
+
274
+ // Add sort class to current header
275
+ header.classList.add(sortDirection === 'asc' ? 'sort-asc' : 'sort-desc');
276
+
277
+ // Sort the table
278
+ sortTable(index, sortDirection);
279
+ });
280
+ });
281
+
282
+ function sortTable(columnIndex, direction) {
283
+ const rows = Array.from(tbody.getElementsByTagName('tr'));
284
+
285
+ rows.sort((a, b) => {
286
+ const aText = a.cells[columnIndex].textContent.trim();
287
+ const bText = b.cells[columnIndex].textContent.trim();
288
+
289
+ // Handle special cases
290
+ if (aText === '-' && bText !== '-') return direction === 'asc' ? 1 : -1;
291
+ if (aText !== '-' && bText === '-') return direction === 'asc' ? -1 : 1;
292
+ if (aText === '-' && bText === '-') return 0;
293
+
294
+ // Try to parse as number
295
+ const aNum = parseFloat(aText);
296
+ const bNum = parseFloat(bText);
297
+
298
+ let comparison = 0;
299
+ if (!isNaN(aNum) && !isNaN(bNum)) {
300
+ comparison = aNum - bNum;
301
+ } else {
302
+ // Handle Yes/No specially
303
+ if (aText === 'Yes' || aText === 'No') {
304
+ comparison = aText === bText ? 0 : (aText === 'Yes' ? -1 : 1);
305
+ } else {
306
+ comparison = aText.localeCompare(bText);
307
+ }
308
+ }
309
+
310
+ return direction === 'asc' ? comparison : -comparison;
311
+ });
312
+
313
+ // Clear tbody and re-append sorted rows
314
+ tbody.innerHTML = '';
315
+ rows.forEach((row, index) => {
316
+ // Re-apply model-group-start class based on model changes
317
+ if (index > 0 && rows[index].cells[0].textContent !== rows[index-1].cells[0].textContent) {
318
+ row.classList.add('model-group-start');
319
+ } else if (index > 0) {
320
+ row.classList.remove('model-group-start');
321
+ }
322
+ tbody.appendChild(row);
323
+ });
324
+ }
325
+ })
326
+ .catch(error => {
327
+ console.error('Error loading data:', error);
328
+ document.getElementById('tableBody').innerHTML = '<tr><td colspan="12">Error loading data</td></tr>';
329
+ });
330
+ </script>
331
+ </body>
332
+ </html>
package.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "llm-pricing",
3
+ "private": true,
4
+ "devDependencies": {
5
+ "@types/bun": "latest"
6
+ },
7
+ "peerDependencies": {
8
+ "typescript": "^5"
9
+ }
10
+ }
provider_models_raw.json ADDED
@@ -0,0 +1,2417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": [
3
+ {
4
+ "provider": "novita",
5
+ "status": "live",
6
+ "id": "zai-org/GLM-4.5"
7
+ },
8
+ {
9
+ "provider": "novita",
10
+ "status": "live",
11
+ "pricing": {
12
+ "input": 0.95,
13
+ "output": 5
14
+ },
15
+ "context_length": 262144,
16
+ "supports_tools": true,
17
+ "supports_structured_output": false,
18
+ "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct"
19
+ },
20
+ {
21
+ "provider": "cerebras",
22
+ "status": "live",
23
+ "supports_tools": true,
24
+ "supports_structured_output": false,
25
+ "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct"
26
+ },
27
+ {
28
+ "provider": "fireworks-ai",
29
+ "status": "live",
30
+ "supports_tools": true,
31
+ "supports_structured_output": false,
32
+ "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct"
33
+ },
34
+ {
35
+ "provider": "together",
36
+ "status": "live",
37
+ "pricing": {
38
+ "input": 2,
39
+ "output": 2
40
+ },
41
+ "context_length": 262144,
42
+ "supports_tools": false,
43
+ "supports_structured_output": true,
44
+ "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct"
45
+ },
46
+ {
47
+ "provider": "hyperbolic",
48
+ "status": "live",
49
+ "supports_tools": true,
50
+ "supports_structured_output": false,
51
+ "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct"
52
+ },
53
+ {
54
+ "provider": "fireworks",
55
+ "status": "live",
56
+ "supports_tools": true,
57
+ "supports_structured_output": false,
58
+ "context_length": 163840,
59
+ "supports_image_input": false,
60
+ "supports_function_calling": false,
61
+ "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct"
62
+ },
63
+ {
64
+ "provider": "novita",
65
+ "status": "live",
66
+ "id": "moonshotai/Kimi-K2-Instruct"
67
+ },
68
+ {
69
+ "provider": "fireworks-ai",
70
+ "status": "live",
71
+ "supports_tools": true,
72
+ "supports_structured_output": false,
73
+ "id": "moonshotai/Kimi-K2-Instruct"
74
+ },
75
+ {
76
+ "provider": "featherless-ai",
77
+ "status": "live",
78
+ "id": "moonshotai/Kimi-K2-Instruct"
79
+ },
80
+ {
81
+ "provider": "groq",
82
+ "status": "live",
83
+ "id": "moonshotai/Kimi-K2-Instruct"
84
+ },
85
+ {
86
+ "provider": "featherless",
87
+ "status": "live",
88
+ "pricing": {
89
+ "input": 0.1,
90
+ "output": 0.1
91
+ },
92
+ "context_length": 16384,
93
+ "id": "moonshotai/Kimi-K2-Instruct"
94
+ },
95
+ {
96
+ "provider": "fireworks",
97
+ "status": "live",
98
+ "supports_tools": true,
99
+ "supports_structured_output": false,
100
+ "context_length": 163840,
101
+ "supports_image_input": false,
102
+ "supports_function_calling": false,
103
+ "id": "moonshotai/Kimi-K2-Instruct"
104
+ },
105
+ {
106
+ "provider": "novita",
107
+ "status": "live",
108
+ "pricing": {
109
+ "input": 0.3,
110
+ "output": 3
111
+ },
112
+ "context_length": 131072,
113
+ "supports_tools": true,
114
+ "supports_structured_output": false,
115
+ "id": "Qwen/Qwen3-235B-A22B-Thinking-2507"
116
+ },
117
+ {
118
+ "provider": "cerebras",
119
+ "status": "live",
120
+ "supports_tools": false,
121
+ "supports_structured_output": false,
122
+ "id": "Qwen/Qwen3-235B-A22B-Thinking-2507"
123
+ },
124
+ {
125
+ "provider": "cohere",
126
+ "status": "live",
127
+ "supports_tools": true,
128
+ "supports_structured_output": false,
129
+ "id": "CohereLabs/command-a-vision-07-2025"
130
+ },
131
+ {
132
+ "provider": "novita",
133
+ "status": "live",
134
+ "pricing": {
135
+ "input": 0.15,
136
+ "output": 0.8
137
+ },
138
+ "context_length": 262144,
139
+ "supports_tools": true,
140
+ "supports_structured_output": true,
141
+ "id": "Qwen/Qwen3-235B-A22B-Instruct-2507"
142
+ },
143
+ {
144
+ "provider": "nebius",
145
+ "status": "live",
146
+ "supports_tools": true,
147
+ "supports_structured_output": true,
148
+ "id": "Qwen/Qwen3-235B-A22B-Instruct-2507"
149
+ },
150
+ {
151
+ "provider": "cerebras",
152
+ "status": "live",
153
+ "supports_tools": true,
154
+ "supports_structured_output": false,
155
+ "id": "Qwen/Qwen3-235B-A22B-Instruct-2507"
156
+ },
157
+ {
158
+ "provider": "fireworks-ai",
159
+ "status": "live",
160
+ "supports_tools": true,
161
+ "supports_structured_output": false,
162
+ "id": "Qwen/Qwen3-235B-A22B-Instruct-2507"
163
+ },
164
+ {
165
+ "provider": "together",
166
+ "status": "live",
167
+ "pricing": {
168
+ "input": 0.2,
169
+ "output": 0.6
170
+ },
171
+ "context_length": 262144,
172
+ "supports_tools": true,
173
+ "supports_structured_output": true,
174
+ "id": "Qwen/Qwen3-235B-A22B-Instruct-2507"
175
+ },
176
+ {
177
+ "provider": "hyperbolic",
178
+ "status": "live",
179
+ "supports_tools": true,
180
+ "supports_structured_output": false,
181
+ "id": "Qwen/Qwen3-235B-A22B-Instruct-2507"
182
+ },
183
+ {
184
+ "provider": "nscale",
185
+ "status": "live",
186
+ "supports_tools": false,
187
+ "supports_structured_output": true,
188
+ "id": "Qwen/Qwen3-235B-A22B-Instruct-2507"
189
+ },
190
+ {
191
+ "provider": "fireworks",
192
+ "status": "live",
193
+ "supports_tools": true,
194
+ "supports_structured_output": false,
195
+ "context_length": 163840,
196
+ "supports_image_input": false,
197
+ "supports_function_calling": false,
198
+ "id": "Qwen/Qwen3-235B-A22B-Instruct-2507"
199
+ },
200
+ {
201
+ "provider": "novita",
202
+ "status": "live",
203
+ "pricing": {
204
+ "input": 0.02,
205
+ "output": 0.05
206
+ },
207
+ "context_length": 16384,
208
+ "supports_tools": false,
209
+ "supports_structured_output": false,
210
+ "id": "meta-llama/Llama-3.1-8B-Instruct"
211
+ },
212
+ {
213
+ "provider": "nebius",
214
+ "status": "live",
215
+ "pricing": {
216
+ "input": 0.03,
217
+ "output": 0.09
218
+ },
219
+ "context_length": 131072,
220
+ "supports_tools": true,
221
+ "supports_structured_output": true,
222
+ "id": "meta-llama/Llama-3.1-8B-Instruct"
223
+ },
224
+ {
225
+ "provider": "cerebras",
226
+ "status": "live",
227
+ "pricing": {
228
+ "input": 0.1,
229
+ "output": 0.1
230
+ },
231
+ "supports_tools": false,
232
+ "supports_structured_output": false,
233
+ "id": "meta-llama/Llama-3.1-8B-Instruct"
234
+ },
235
+ {
236
+ "provider": "fireworks-ai",
237
+ "status": "live",
238
+ "supports_tools": false,
239
+ "supports_structured_output": false,
240
+ "id": "meta-llama/Llama-3.1-8B-Instruct"
241
+ },
242
+ {
243
+ "provider": "featherless-ai",
244
+ "status": "live",
245
+ "id": "meta-llama/Llama-3.1-8B-Instruct"
246
+ },
247
+ {
248
+ "provider": "hyperbolic",
249
+ "status": "offline",
250
+ "supports_tools": false,
251
+ "supports_structured_output": false,
252
+ "id": "meta-llama/Llama-3.1-8B-Instruct"
253
+ },
254
+ {
255
+ "provider": "nscale",
256
+ "status": "live",
257
+ "supports_tools": false,
258
+ "supports_structured_output": true,
259
+ "id": "meta-llama/Llama-3.1-8B-Instruct"
260
+ },
261
+ {
262
+ "provider": "sambanova",
263
+ "status": "live",
264
+ "pricing": {
265
+ "input": 0.1,
266
+ "output": 0.2
267
+ },
268
+ "context_length": 16384,
269
+ "supports_tools": true,
270
+ "supports_structured_output": true,
271
+ "id": "meta-llama/Llama-3.1-8B-Instruct"
272
+ },
273
+ {
274
+ "provider": "featherless",
275
+ "status": "live",
276
+ "pricing": {
277
+ "input": 0.1,
278
+ "output": 0.1
279
+ },
280
+ "context_length": 16384,
281
+ "id": "meta-llama/Llama-3.1-8B-Instruct"
282
+ },
283
+ {
284
+ "provider": "fireworks",
285
+ "status": "live",
286
+ "supports_tools": false,
287
+ "supports_structured_output": false,
288
+ "context_length": 163840,
289
+ "supports_image_input": false,
290
+ "supports_function_calling": false,
291
+ "id": "meta-llama/Llama-3.1-8B-Instruct"
292
+ },
293
+ {
294
+ "provider": "together",
295
+ "status": "live",
296
+ "supports_tools": false,
297
+ "supports_structured_output": true,
298
+ "id": "zai-org/GLM-4.5-Air-FP8"
299
+ },
300
+ {
301
+ "provider": "hf-inference",
302
+ "status": "live",
303
+ "supports_tools": true,
304
+ "supports_structured_output": true,
305
+ "id": "HuggingFaceTB/SmolLM3-3B"
306
+ },
307
+ {
308
+ "provider": "novita",
309
+ "status": "live",
310
+ "pricing": {
311
+ "input": 0.7,
312
+ "output": 2.5
313
+ },
314
+ "context_length": 64000,
315
+ "supports_tools": true,
316
+ "supports_structured_output": false,
317
+ "id": "deepseek-ai/DeepSeek-R1"
318
+ },
319
+ {
320
+ "provider": "nebius",
321
+ "status": "live",
322
+ "pricing": {
323
+ "input": 2,
324
+ "output": 6
325
+ },
326
+ "context_length": 163840,
327
+ "supports_tools": false,
328
+ "supports_structured_output": false,
329
+ "id": "deepseek-ai/DeepSeek-R1"
330
+ },
331
+ {
332
+ "provider": "fireworks-ai",
333
+ "status": "live",
334
+ "supports_tools": false,
335
+ "supports_structured_output": false,
336
+ "id": "deepseek-ai/DeepSeek-R1"
337
+ },
338
+ {
339
+ "provider": "together",
340
+ "status": "live",
341
+ "pricing": {
342
+ "input": 3,
343
+ "output": 7
344
+ },
345
+ "context_length": 163840,
346
+ "supports_tools": false,
347
+ "supports_structured_output": true,
348
+ "id": "deepseek-ai/DeepSeek-R1"
349
+ },
350
+ {
351
+ "provider": "hyperbolic",
352
+ "status": "live",
353
+ "supports_tools": false,
354
+ "supports_structured_output": false,
355
+ "id": "deepseek-ai/DeepSeek-R1"
356
+ },
357
+ {
358
+ "provider": "sambanova",
359
+ "status": "live",
360
+ "supports_tools": false,
361
+ "supports_structured_output": false,
362
+ "id": "deepseek-ai/DeepSeek-R1"
363
+ },
364
+ {
365
+ "provider": "fireworks",
366
+ "status": "live",
367
+ "supports_tools": false,
368
+ "supports_structured_output": false,
369
+ "context_length": 163840,
370
+ "supports_image_input": false,
371
+ "supports_function_calling": false,
372
+ "id": "deepseek-ai/DeepSeek-R1"
373
+ },
374
+ {
375
+ "provider": "novita",
376
+ "status": "live",
377
+ "pricing": {
378
+ "input": 0.1,
379
+ "output": 0.45
380
+ },
381
+ "context_length": 40960,
382
+ "supports_tools": false,
383
+ "supports_structured_output": false,
384
+ "id": "Qwen/Qwen3-30B-A3B"
385
+ },
386
+ {
387
+ "provider": "nebius",
388
+ "status": "live",
389
+ "pricing": {
390
+ "input": 0.3,
391
+ "output": 0.9
392
+ },
393
+ "context_length": 40960,
394
+ "supports_tools": true,
395
+ "supports_structured_output": true,
396
+ "id": "Qwen/Qwen3-30B-A3B"
397
+ },
398
+ {
399
+ "provider": "fireworks-ai",
400
+ "status": "live",
401
+ "supports_tools": true,
402
+ "supports_structured_output": false,
403
+ "id": "Qwen/Qwen3-30B-A3B"
404
+ },
405
+ {
406
+ "provider": "fireworks",
407
+ "status": "live",
408
+ "supports_tools": true,
409
+ "supports_structured_output": false,
410
+ "context_length": 163840,
411
+ "supports_image_input": false,
412
+ "supports_function_calling": false,
413
+ "id": "Qwen/Qwen3-30B-A3B"
414
+ },
415
+ {
416
+ "provider": "featherless-ai",
417
+ "status": "live",
418
+ "id": "Qwen/Qwen3-8B"
419
+ },
420
+ {
421
+ "provider": "nscale",
422
+ "status": "live",
423
+ "supports_tools": false,
424
+ "supports_structured_output": false,
425
+ "id": "Qwen/Qwen3-8B"
426
+ },
427
+ {
428
+ "provider": "featherless",
429
+ "status": "live",
430
+ "pricing": {
431
+ "input": 0.1,
432
+ "output": 0.1
433
+ },
434
+ "context_length": 16384,
435
+ "id": "Qwen/Qwen3-8B"
436
+ },
437
+ {
438
+ "provider": "nebius",
439
+ "status": "live",
440
+ "pricing": {
441
+ "input": 0.2,
442
+ "output": 0.6
443
+ },
444
+ "context_length": 110000,
445
+ "supports_tools": false,
446
+ "supports_structured_output": true,
447
+ "id": "google/gemma-3-27b-it"
448
+ },
449
+ {
450
+ "provider": "featherless-ai",
451
+ "status": "live",
452
+ "id": "google/gemma-3-27b-it"
453
+ },
454
+ {
455
+ "provider": "featherless",
456
+ "status": "live",
457
+ "pricing": {
458
+ "input": 0.1,
459
+ "output": 0.1
460
+ },
461
+ "context_length": 16384,
462
+ "id": "google/gemma-3-27b-it"
463
+ },
464
+ {
465
+ "provider": "novita",
466
+ "status": "live",
467
+ "pricing": {
468
+ "input": 0.035,
469
+ "output": 0.138
470
+ },
471
+ "context_length": 65536,
472
+ "supports_tools": false,
473
+ "supports_structured_output": false,
474
+ "id": "zai-org/GLM-4.1V-9B-Thinking"
475
+ },
476
+ {
477
+ "provider": "together",
478
+ "status": "live",
479
+ "pricing": {
480
+ "input": 2,
481
+ "output": 2
482
+ },
483
+ "context_length": 262144,
484
+ "supports_tools": false,
485
+ "supports_structured_output": true,
486
+ "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
487
+ },
488
+ {
489
+ "provider": "novita",
490
+ "status": "live",
491
+ "id": "deepseek-ai/DeepSeek-R1-0528"
492
+ },
493
+ {
494
+ "provider": "nebius",
495
+ "status": "live",
496
+ "pricing": {
497
+ "input": 0.8,
498
+ "output": 2.4
499
+ },
500
+ "context_length": 131072,
501
+ "supports_tools": true,
502
+ "supports_structured_output": true,
503
+ "id": "deepseek-ai/DeepSeek-R1-0528"
504
+ },
505
+ {
506
+ "provider": "fireworks-ai",
507
+ "status": "live",
508
+ "id": "deepseek-ai/DeepSeek-R1-0528"
509
+ },
510
+ {
511
+ "provider": "together",
512
+ "status": "live",
513
+ "id": "deepseek-ai/DeepSeek-R1-0528"
514
+ },
515
+ {
516
+ "provider": "hyperbolic",
517
+ "status": "live",
518
+ "id": "deepseek-ai/DeepSeek-R1-0528"
519
+ },
520
+ {
521
+ "provider": "sambanova",
522
+ "status": "live",
523
+ "id": "deepseek-ai/DeepSeek-R1-0528"
524
+ },
525
+ {
526
+ "provider": "fireworks",
527
+ "status": "live",
528
+ "context_length": 163840,
529
+ "supports_tools": false,
530
+ "supports_image_input": false,
531
+ "supports_function_calling": false,
532
+ "id": "deepseek-ai/DeepSeek-R1-0528"
533
+ },
534
+ {
535
+ "provider": "hyperbolic",
536
+ "status": "live",
537
+ "supports_tools": false,
538
+ "supports_structured_output": false,
539
+ "id": "Qwen/Qwen2.5-VL-7B-Instruct"
540
+ },
541
+ {
542
+ "provider": "novita",
543
+ "status": "live",
544
+ "pricing": {
545
+ "input": 0.04,
546
+ "output": 0.04
547
+ },
548
+ "context_length": 8192,
549
+ "supports_tools": false,
550
+ "supports_structured_output": false,
551
+ "id": "meta-llama/Meta-Llama-3-8B-Instruct"
552
+ },
553
+ {
554
+ "provider": "featherless-ai",
555
+ "status": "live",
556
+ "id": "meta-llama/Meta-Llama-3-8B-Instruct"
557
+ },
558
+ {
559
+ "provider": "groq",
560
+ "status": "live",
561
+ "pricing": {
562
+ "input": 0.05,
563
+ "output": 0.08
564
+ },
565
+ "context_length": 8192,
566
+ "supports_tools": true,
567
+ "supports_structured_output": false,
568
+ "id": "meta-llama/Meta-Llama-3-8B-Instruct"
569
+ },
570
+ {
571
+ "provider": "featherless",
572
+ "status": "live",
573
+ "pricing": {
574
+ "input": 0.1,
575
+ "output": 0.1
576
+ },
577
+ "context_length": 16384,
578
+ "id": "meta-llama/Meta-Llama-3-8B-Instruct"
579
+ },
580
+ {
581
+ "provider": "novita",
582
+ "status": "live",
583
+ "pricing": {
584
+ "input": 0.03,
585
+ "output": 0.05
586
+ },
587
+ "context_length": 32768,
588
+ "supports_tools": true,
589
+ "supports_structured_output": false,
590
+ "id": "meta-llama/Llama-3.2-3B-Instruct"
591
+ },
592
+ {
593
+ "provider": "together",
594
+ "status": "live",
595
+ "pricing": {
596
+ "input": 0.060000000000000005,
597
+ "output": 0.060000000000000005
598
+ },
599
+ "context_length": 131072,
600
+ "supports_tools": false,
601
+ "supports_structured_output": false,
602
+ "id": "meta-llama/Llama-3.2-3B-Instruct"
603
+ },
604
+ {
605
+ "provider": "hyperbolic",
606
+ "status": "live",
607
+ "supports_tools": false,
608
+ "supports_structured_output": false,
609
+ "id": "meta-llama/Llama-3.2-3B-Instruct"
610
+ },
611
+ {
612
+ "provider": "novita",
613
+ "status": "live",
614
+ "pricing": {
615
+ "input": 0.1,
616
+ "output": 0.45
617
+ },
618
+ "context_length": 40960,
619
+ "supports_tools": false,
620
+ "supports_structured_output": false,
621
+ "id": "Qwen/Qwen3-32B"
622
+ },
623
+ {
624
+ "provider": "nebius",
625
+ "status": "live",
626
+ "pricing": {
627
+ "input": 0.1,
628
+ "output": 0.3
629
+ },
630
+ "context_length": 40960,
631
+ "supports_tools": true,
632
+ "supports_structured_output": true,
633
+ "id": "Qwen/Qwen3-32B"
634
+ },
635
+ {
636
+ "provider": "cerebras",
637
+ "status": "live",
638
+ "pricing": {
639
+ "input": 0.4,
640
+ "output": 0.8
641
+ },
642
+ "supports_tools": false,
643
+ "supports_structured_output": false,
644
+ "id": "Qwen/Qwen3-32B"
645
+ },
646
+ {
647
+ "provider": "featherless-ai",
648
+ "status": "live",
649
+ "id": "Qwen/Qwen3-32B"
650
+ },
651
+ {
652
+ "provider": "groq",
653
+ "status": "live",
654
+ "pricing": {
655
+ "input": 0.29,
656
+ "output": 0.59
657
+ },
658
+ "context_length": 131072,
659
+ "supports_tools": true,
660
+ "supports_structured_output": false,
661
+ "id": "Qwen/Qwen3-32B"
662
+ },
663
+ {
664
+ "provider": "nscale",
665
+ "status": "live",
666
+ "supports_tools": false,
667
+ "supports_structured_output": false,
668
+ "id": "Qwen/Qwen3-32B"
669
+ },
670
+ {
671
+ "provider": "sambanova",
672
+ "status": "live",
673
+ "pricing": {
674
+ "input": 0.4,
675
+ "output": 0.8
676
+ },
677
+ "context_length": 32768,
678
+ "supports_tools": true,
679
+ "supports_structured_output": false,
680
+ "id": "Qwen/Qwen3-32B"
681
+ },
682
+ {
683
+ "provider": "featherless",
684
+ "status": "live",
685
+ "pricing": {
686
+ "input": 0.1,
687
+ "output": 0.1
688
+ },
689
+ "context_length": 16384,
690
+ "id": "Qwen/Qwen3-32B"
691
+ },
692
+ {
693
+ "provider": "novita",
694
+ "status": "live",
695
+ "pricing": {
696
+ "input": 0.06,
697
+ "output": 0.09
698
+ },
699
+ "context_length": 128000,
700
+ "supports_tools": false,
701
+ "supports_structured_output": false,
702
+ "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"
703
+ },
704
+ {
705
+ "provider": "featherless-ai",
706
+ "status": "live",
707
+ "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"
708
+ },
709
+ {
710
+ "provider": "featherless",
711
+ "status": "live",
712
+ "pricing": {
713
+ "input": 0.1,
714
+ "output": 0.1
715
+ },
716
+ "context_length": 16384,
717
+ "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"
718
+ },
719
+ {
720
+ "provider": "novita",
721
+ "status": "live",
722
+ "pricing": {
723
+ "input": 0.28,
724
+ "output": 1.14
725
+ },
726
+ "context_length": 163840,
727
+ "supports_tools": true,
728
+ "supports_structured_output": false,
729
+ "id": "deepseek-ai/DeepSeek-V3-0324"
730
+ },
731
+ {
732
+ "provider": "nebius",
733
+ "status": "live",
734
+ "pricing": {
735
+ "input": 2,
736
+ "output": 6
737
+ },
738
+ "context_length": 163840,
739
+ "supports_tools": true,
740
+ "supports_structured_output": true,
741
+ "id": "deepseek-ai/DeepSeek-V3-0324"
742
+ },
743
+ {
744
+ "provider": "fireworks-ai",
745
+ "status": "live",
746
+ "supports_tools": true,
747
+ "supports_structured_output": false,
748
+ "id": "deepseek-ai/DeepSeek-V3-0324"
749
+ },
750
+ {
751
+ "provider": "together",
752
+ "status": "live",
753
+ "pricing": {
754
+ "input": 1.25,
755
+ "output": 1.25
756
+ },
757
+ "context_length": 131072,
758
+ "supports_tools": true,
759
+ "supports_structured_output": true,
760
+ "id": "deepseek-ai/DeepSeek-V3-0324"
761
+ },
762
+ {
763
+ "provider": "hyperbolic",
764
+ "status": "live",
765
+ "supports_tools": true,
766
+ "supports_structured_output": false,
767
+ "id": "deepseek-ai/DeepSeek-V3-0324"
768
+ },
769
+ {
770
+ "provider": "sambanova",
771
+ "status": "live",
772
+ "pricing": {
773
+ "input": 3,
774
+ "output": 4.5
775
+ },
776
+ "context_length": 32768,
777
+ "supports_tools": true,
778
+ "supports_structured_output": true,
779
+ "id": "deepseek-ai/DeepSeek-V3-0324"
780
+ },
781
+ {
782
+ "provider": "fireworks",
783
+ "status": "live",
784
+ "supports_tools": true,
785
+ "supports_structured_output": false,
786
+ "context_length": 163840,
787
+ "supports_image_input": false,
788
+ "supports_function_calling": false,
789
+ "id": "deepseek-ai/DeepSeek-V3-0324"
790
+ },
791
+ {
792
+ "provider": "featherless-ai",
793
+ "status": "live",
794
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
795
+ },
796
+ {
797
+ "provider": "nscale",
798
+ "status": "live",
799
+ "supports_tools": false,
800
+ "supports_structured_output": false,
801
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
802
+ },
803
+ {
804
+ "provider": "featherless",
805
+ "status": "live",
806
+ "pricing": {
807
+ "input": 0.1,
808
+ "output": 0.1
809
+ },
810
+ "context_length": 16384,
811
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
812
+ },
813
+ {
814
+ "provider": "novita",
815
+ "status": "live",
816
+ "pricing": {
817
+ "input": 0.13,
818
+ "output": 0.39
819
+ },
820
+ "context_length": 131072,
821
+ "supports_tools": true,
822
+ "supports_structured_output": false,
823
+ "id": "meta-llama/Llama-3.3-70B-Instruct"
824
+ },
825
+ {
826
+ "provider": "nebius",
827
+ "status": "live",
828
+ "pricing": {
829
+ "input": 0.25,
830
+ "output": 0.75
831
+ },
832
+ "context_length": 131072,
833
+ "supports_tools": true,
834
+ "supports_structured_output": true,
835
+ "id": "meta-llama/Llama-3.3-70B-Instruct"
836
+ },
837
+ {
838
+ "provider": "cerebras",
839
+ "status": "live",
840
+ "pricing": {
841
+ "input": 0.85,
842
+ "output": 1.2
843
+ },
844
+ "supports_tools": true,
845
+ "supports_structured_output": false,
846
+ "id": "meta-llama/Llama-3.3-70B-Instruct"
847
+ },
848
+ {
849
+ "provider": "fireworks-ai",
850
+ "status": "live",
851
+ "supports_tools": false,
852
+ "supports_structured_output": false,
853
+ "id": "meta-llama/Llama-3.3-70B-Instruct"
854
+ },
855
+ {
856
+ "provider": "together",
857
+ "status": "live",
858
+ "pricing": {
859
+ "input": 0.88,
860
+ "output": 0.88
861
+ },
862
+ "context_length": 131072,
863
+ "supports_tools": true,
864
+ "supports_structured_output": true,
865
+ "id": "meta-llama/Llama-3.3-70B-Instruct"
866
+ },
867
+ {
868
+ "provider": "groq",
869
+ "status": "live",
870
+ "pricing": {
871
+ "input": 0.59,
872
+ "output": 0.79
873
+ },
874
+ "context_length": 131072,
875
+ "supports_tools": true,
876
+ "supports_structured_output": false,
877
+ "id": "meta-llama/Llama-3.3-70B-Instruct"
878
+ },
879
+ {
880
+ "provider": "hyperbolic",
881
+ "status": "live",
882
+ "supports_tools": true,
883
+ "supports_structured_output": false,
884
+ "id": "meta-llama/Llama-3.3-70B-Instruct"
885
+ },
886
+ {
887
+ "provider": "nscale",
888
+ "status": "live",
889
+ "supports_tools": false,
890
+ "supports_structured_output": true,
891
+ "id": "meta-llama/Llama-3.3-70B-Instruct"
892
+ },
893
+ {
894
+ "provider": "sambanova",
895
+ "status": "live",
896
+ "pricing": {
897
+ "input": 0.6,
898
+ "output": 1.2
899
+ },
900
+ "context_length": 131072,
901
+ "supports_tools": true,
902
+ "supports_structured_output": true,
903
+ "id": "meta-llama/Llama-3.3-70B-Instruct"
904
+ },
905
+ {
906
+ "provider": "fireworks",
907
+ "status": "live",
908
+ "supports_tools": false,
909
+ "supports_structured_output": false,
910
+ "context_length": 163840,
911
+ "supports_image_input": false,
912
+ "supports_function_calling": false,
913
+ "id": "meta-llama/Llama-3.3-70B-Instruct"
914
+ },
915
+ {
916
+ "provider": "nebius",
917
+ "status": "live",
918
+ "pricing": {
919
+ "input": 0.02,
920
+ "output": 0.06
921
+ },
922
+ "context_length": 8192,
923
+ "supports_tools": false,
924
+ "supports_structured_output": true,
925
+ "id": "google/gemma-2-2b-it"
926
+ },
927
+ {
928
+ "provider": "novita",
929
+ "status": "live",
930
+ "pricing": {
931
+ "input": 0.24,
932
+ "output": 0.24
933
+ },
934
+ "context_length": 32000,
935
+ "supports_tools": false,
936
+ "supports_structured_output": true,
937
+ "id": "zai-org/GLM-4-32B-0414"
938
+ },
939
+ {
940
+ "provider": "featherless-ai",
941
+ "status": "live",
942
+ "id": "zai-org/GLM-4-32B-0414"
943
+ },
944
+ {
945
+ "provider": "featherless",
946
+ "status": "live",
947
+ "pricing": {
948
+ "input": 0.1,
949
+ "output": 0.1
950
+ },
951
+ "context_length": 16384,
952
+ "id": "zai-org/GLM-4-32B-0414"
953
+ },
954
+ {
955
+ "provider": "nebius",
956
+ "status": "live",
957
+ "pricing": {
958
+ "input": 0.08,
959
+ "output": 0.24
960
+ },
961
+ "context_length": 40960,
962
+ "supports_tools": true,
963
+ "supports_structured_output": true,
964
+ "id": "Qwen/Qwen3-4B"
965
+ },
966
+ {
967
+ "provider": "novita",
968
+ "status": "live",
969
+ "pricing": {
970
+ "input": 0.1,
971
+ "output": 0.5
972
+ },
973
+ "context_length": 131072,
974
+ "supports_tools": false,
975
+ "supports_structured_output": false,
976
+ "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct"
977
+ },
978
+ {
979
+ "provider": "cerebras",
980
+ "status": "live",
981
+ "pricing": {
982
+ "input": 0.65,
983
+ "output": 0.85
984
+ },
985
+ "supports_tools": true,
986
+ "supports_structured_output": false,
987
+ "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct"
988
+ },
989
+ {
990
+ "provider": "fireworks-ai",
991
+ "status": "live",
992
+ "supports_tools": true,
993
+ "supports_structured_output": false,
994
+ "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct"
995
+ },
996
+ {
997
+ "provider": "together",
998
+ "status": "live",
999
+ "pricing": {
1000
+ "input": 0.18000000000000002,
1001
+ "output": 0.5900000000000001
1002
+ },
1003
+ "context_length": 1048576,
1004
+ "supports_tools": true,
1005
+ "supports_structured_output": true,
1006
+ "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct"
1007
+ },
1008
+ {
1009
+ "provider": "groq",
1010
+ "status": "live",
1011
+ "pricing": {
1012
+ "input": 0.11,
1013
+ "output": 0.34
1014
+ },
1015
+ "context_length": 131072,
1016
+ "supports_tools": true,
1017
+ "supports_structured_output": false,
1018
+ "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct"
1019
+ },
1020
+ {
1021
+ "provider": "nscale",
1022
+ "status": "live",
1023
+ "supports_tools": false,
1024
+ "supports_structured_output": true,
1025
+ "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct"
1026
+ },
1027
+ {
1028
+ "provider": "fireworks",
1029
+ "status": "live",
1030
+ "supports_tools": true,
1031
+ "supports_structured_output": false,
1032
+ "context_length": 163840,
1033
+ "supports_image_input": false,
1034
+ "supports_function_calling": false,
1035
+ "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct"
1036
+ },
1037
+ {
1038
+ "provider": "novita",
1039
+ "status": "live",
1040
+ "context_length": 131000,
1041
+ "supports_tools": false,
1042
+ "supports_structured_output": false,
1043
+ "id": "meta-llama/Llama-3.2-1B-Instruct"
1044
+ },
1045
+ {
1046
+ "provider": "featherless-ai",
1047
+ "status": "live",
1048
+ "id": "Qwen/Qwen2.5-7B-Instruct"
1049
+ },
1050
+ {
1051
+ "provider": "together",
1052
+ "status": "live",
1053
+ "pricing": {
1054
+ "input": 0.3,
1055
+ "output": 0.3
1056
+ },
1057
+ "context_length": 32768,
1058
+ "supports_tools": true,
1059
+ "supports_structured_output": false,
1060
+ "id": "Qwen/Qwen2.5-7B-Instruct"
1061
+ },
1062
+ {
1063
+ "provider": "featherless",
1064
+ "status": "live",
1065
+ "pricing": {
1066
+ "input": 0.1,
1067
+ "output": 0.1
1068
+ },
1069
+ "context_length": 16384,
1070
+ "id": "Qwen/Qwen2.5-7B-Instruct"
1071
+ },
1072
+ {
1073
+ "provider": "nebius",
1074
+ "status": "live",
1075
+ "pricing": {
1076
+ "input": 0.08,
1077
+ "output": 0.24
1078
+ },
1079
+ "context_length": 40960,
1080
+ "supports_tools": true,
1081
+ "supports_structured_output": true,
1082
+ "id": "Qwen/Qwen3-14B"
1083
+ },
1084
+ {
1085
+ "provider": "featherless-ai",
1086
+ "status": "live",
1087
+ "id": "Qwen/Qwen3-14B"
1088
+ },
1089
+ {
1090
+ "provider": "featherless",
1091
+ "status": "live",
1092
+ "pricing": {
1093
+ "input": 0.1,
1094
+ "output": 0.1
1095
+ },
1096
+ "context_length": 16384,
1097
+ "id": "Qwen/Qwen3-14B"
1098
+ },
1099
+ {
1100
+ "provider": "novita",
1101
+ "status": "live",
1102
+ "pricing": {
1103
+ "input": 0.07,
1104
+ "output": 0.28
1105
+ },
1106
+ "context_length": 120000,
1107
+ "supports_tools": false,
1108
+ "supports_structured_output": false,
1109
+ "id": "baidu/ERNIE-4.5-21B-A3B-PT"
1110
+ },
1111
+ {
1112
+ "provider": "nebius",
1113
+ "status": "live",
1114
+ "pricing": {
1115
+ "input": 0.13,
1116
+ "output": 0.4
1117
+ },
1118
+ "context_length": 131072,
1119
+ "supports_tools": true,
1120
+ "supports_structured_output": true,
1121
+ "id": "meta-llama/Llama-3.1-70B-Instruct"
1122
+ },
1123
+ {
1124
+ "provider": "fireworks-ai",
1125
+ "status": "live",
1126
+ "supports_tools": true,
1127
+ "supports_structured_output": false,
1128
+ "id": "meta-llama/Llama-3.1-70B-Instruct"
1129
+ },
1130
+ {
1131
+ "provider": "hyperbolic",
1132
+ "status": "offline",
1133
+ "supports_tools": false,
1134
+ "supports_structured_output": false,
1135
+ "id": "meta-llama/Llama-3.1-70B-Instruct"
1136
+ },
1137
+ {
1138
+ "provider": "fireworks",
1139
+ "status": "live",
1140
+ "supports_tools": true,
1141
+ "supports_structured_output": false,
1142
+ "context_length": 163840,
1143
+ "supports_image_input": false,
1144
+ "supports_function_calling": false,
1145
+ "id": "meta-llama/Llama-3.1-70B-Instruct"
1146
+ },
1147
+ {
1148
+ "provider": "novita",
1149
+ "status": "live",
1150
+ "pricing": {
1151
+ "input": 0.14,
1152
+ "output": 0.56
1153
+ },
1154
+ "context_length": 30000,
1155
+ "supports_tools": false,
1156
+ "supports_structured_output": false,
1157
+ "id": "baidu/ERNIE-4.5-VL-28B-A3B-PT"
1158
+ },
1159
+ {
1160
+ "provider": "nebius",
1161
+ "status": "live",
1162
+ "pricing": {
1163
+ "input": 0.1,
1164
+ "output": 0.3
1165
+ },
1166
+ "context_length": 131072,
1167
+ "supports_tools": false,
1168
+ "supports_structured_output": false,
1169
+ "id": "Qwen/Qwen2.5-Coder-32B-Instruct"
1170
+ },
1171
+ {
1172
+ "provider": "featherless-ai",
1173
+ "status": "live",
1174
+ "id": "Qwen/Qwen2.5-Coder-32B-Instruct"
1175
+ },
1176
+ {
1177
+ "provider": "together",
1178
+ "status": "live",
1179
+ "pricing": {
1180
+ "input": 0.8,
1181
+ "output": 0.8
1182
+ },
1183
+ "context_length": 16384,
1184
+ "supports_tools": true,
1185
+ "supports_structured_output": true,
1186
+ "id": "Qwen/Qwen2.5-Coder-32B-Instruct"
1187
+ },
1188
+ {
1189
+ "provider": "hyperbolic",
1190
+ "status": "live",
1191
+ "supports_tools": false,
1192
+ "supports_structured_output": false,
1193
+ "id": "Qwen/Qwen2.5-Coder-32B-Instruct"
1194
+ },
1195
+ {
1196
+ "provider": "nscale",
1197
+ "status": "live",
1198
+ "supports_tools": false,
1199
+ "supports_structured_output": true,
1200
+ "id": "Qwen/Qwen2.5-Coder-32B-Instruct"
1201
+ },
1202
+ {
1203
+ "provider": "featherless",
1204
+ "status": "live",
1205
+ "pricing": {
1206
+ "input": 0.1,
1207
+ "output": 0.1
1208
+ },
1209
+ "context_length": 16384,
1210
+ "id": "Qwen/Qwen2.5-Coder-32B-Instruct"
1211
+ },
1212
+ {
1213
+ "provider": "featherless-ai",
1214
+ "status": "live",
1215
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
1216
+ },
1217
+ {
1218
+ "provider": "nscale",
1219
+ "status": "live",
1220
+ "supports_tools": false,
1221
+ "supports_structured_output": false,
1222
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
1223
+ },
1224
+ {
1225
+ "provider": "featherless",
1226
+ "status": "live",
1227
+ "pricing": {
1228
+ "input": 0.1,
1229
+ "output": 0.1
1230
+ },
1231
+ "context_length": 16384,
1232
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
1233
+ },
1234
+ {
1235
+ "provider": "nebius",
1236
+ "status": "live",
1237
+ "pricing": {
1238
+ "input": 0.03,
1239
+ "output": 0.09
1240
+ },
1241
+ "context_length": 8192,
1242
+ "supports_tools": false,
1243
+ "supports_structured_output": true,
1244
+ "id": "google/gemma-2-9b-it"
1245
+ },
1246
+ {
1247
+ "provider": "featherless-ai",
1248
+ "status": "live",
1249
+ "id": "google/gemma-2-9b-it"
1250
+ },
1251
+ {
1252
+ "provider": "groq",
1253
+ "status": "live",
1254
+ "pricing": {
1255
+ "input": 0.2,
1256
+ "output": 0.2
1257
+ },
1258
+ "context_length": 8192,
1259
+ "supports_tools": true,
1260
+ "supports_structured_output": false,
1261
+ "id": "google/gemma-2-9b-it"
1262
+ },
1263
+ {
1264
+ "provider": "featherless",
1265
+ "status": "live",
1266
+ "pricing": {
1267
+ "input": 0.1,
1268
+ "output": 0.1
1269
+ },
1270
+ "context_length": 16384,
1271
+ "id": "google/gemma-2-9b-it"
1272
+ },
1273
+ {
1274
+ "provider": "novita",
1275
+ "status": "live",
1276
+ "pricing": {
1277
+ "input": 0.4,
1278
+ "output": 1.3
1279
+ },
1280
+ "context_length": 64000,
1281
+ "supports_tools": true,
1282
+ "supports_structured_output": false,
1283
+ "id": "deepseek-ai/DeepSeek-V3"
1284
+ },
1285
+ {
1286
+ "provider": "nebius",
1287
+ "status": "live",
1288
+ "pricing": {
1289
+ "input": 0.5,
1290
+ "output": 1.5
1291
+ },
1292
+ "context_length": 163840,
1293
+ "supports_tools": false,
1294
+ "supports_structured_output": true,
1295
+ "id": "deepseek-ai/DeepSeek-V3"
1296
+ },
1297
+ {
1298
+ "provider": "fireworks-ai",
1299
+ "status": "live",
1300
+ "supports_tools": true,
1301
+ "supports_structured_output": false,
1302
+ "id": "deepseek-ai/DeepSeek-V3"
1303
+ },
1304
+ {
1305
+ "provider": "together",
1306
+ "status": "live",
1307
+ "pricing": {
1308
+ "input": 1.25,
1309
+ "output": 1.25
1310
+ },
1311
+ "context_length": 131072,
1312
+ "supports_tools": true,
1313
+ "supports_structured_output": true,
1314
+ "id": "deepseek-ai/DeepSeek-V3"
1315
+ },
1316
+ {
1317
+ "provider": "fireworks",
1318
+ "status": "live",
1319
+ "supports_tools": true,
1320
+ "supports_structured_output": false,
1321
+ "context_length": 163840,
1322
+ "supports_image_input": false,
1323
+ "supports_function_calling": false,
1324
+ "id": "deepseek-ai/DeepSeek-V3"
1325
+ },
1326
+ {
1327
+ "provider": "cohere",
1328
+ "status": "live",
1329
+ "supports_tools": true,
1330
+ "supports_structured_output": false,
1331
+ "id": "CohereLabs/aya-vision-8b"
1332
+ },
1333
+ {
1334
+ "provider": "cerebras",
1335
+ "status": "live",
1336
+ "pricing": {
1337
+ "input": 0.2,
1338
+ "output": 0.6
1339
+ },
1340
+ "supports_tools": true,
1341
+ "supports_structured_output": false,
1342
+ "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
1343
+ },
1344
+ {
1345
+ "provider": "fireworks-ai",
1346
+ "status": "live",
1347
+ "supports_tools": true,
1348
+ "supports_structured_output": false,
1349
+ "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
1350
+ },
1351
+ {
1352
+ "provider": "groq",
1353
+ "status": "live",
1354
+ "pricing": {
1355
+ "input": 0.2,
1356
+ "output": 0.6
1357
+ },
1358
+ "context_length": 131072,
1359
+ "supports_tools": true,
1360
+ "supports_structured_output": false,
1361
+ "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
1362
+ },
1363
+ {
1364
+ "provider": "sambanova",
1365
+ "status": "live",
1366
+ "pricing": {
1367
+ "input": 0.63,
1368
+ "output": 1.8
1369
+ },
1370
+ "context_length": 131072,
1371
+ "supports_tools": true,
1372
+ "supports_structured_output": true,
1373
+ "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
1374
+ },
1375
+ {
1376
+ "provider": "fireworks",
1377
+ "status": "live",
1378
+ "supports_tools": true,
1379
+ "supports_structured_output": false,
1380
+ "context_length": 163840,
1381
+ "supports_image_input": false,
1382
+ "supports_function_calling": false,
1383
+ "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
1384
+ },
1385
+ {
1386
+ "provider": "novita",
1387
+ "status": "live",
1388
+ "pricing": {
1389
+ "input": 0.55,
1390
+ "output": 2.2
1391
+ },
1392
+ "context_length": 1000000,
1393
+ "supports_tools": false,
1394
+ "supports_structured_output": false,
1395
+ "id": "MiniMaxAI/MiniMax-M1-80k"
1396
+ },
1397
+ {
1398
+ "provider": "cohere",
1399
+ "status": "live",
1400
+ "supports_tools": false,
1401
+ "supports_structured_output": false,
1402
+ "id": "CohereLabs/aya-expanse-8b"
1403
+ },
1404
+ {
1405
+ "provider": "featherless-ai",
1406
+ "status": "live",
1407
+ "id": "Qwen/Qwen2.5-Coder-7B-Instruct"
1408
+ },
1409
+ {
1410
+ "provider": "nscale",
1411
+ "status": "live",
1412
+ "supports_tools": false,
1413
+ "supports_structured_output": true,
1414
+ "id": "Qwen/Qwen2.5-Coder-7B-Instruct"
1415
+ },
1416
+ {
1417
+ "provider": "featherless",
1418
+ "status": "live",
1419
+ "pricing": {
1420
+ "input": 0.1,
1421
+ "output": 0.1
1422
+ },
1423
+ "context_length": 16384,
1424
+ "id": "Qwen/Qwen2.5-Coder-7B-Instruct"
1425
+ },
1426
+ {
1427
+ "provider": "cohere",
1428
+ "status": "live",
1429
+ "supports_tools": true,
1430
+ "supports_structured_output": false,
1431
+ "id": "CohereLabs/c4ai-command-a-03-2025"
1432
+ },
1433
+ {
1434
+ "provider": "nscale",
1435
+ "status": "live",
1436
+ "supports_tools": false,
1437
+ "supports_structured_output": true,
1438
+ "id": "Qwen/Qwen2.5-Coder-3B-Instruct"
1439
+ },
1440
+ {
1441
+ "provider": "novita",
1442
+ "status": "live",
1443
+ "pricing": {
1444
+ "input": 0.8,
1445
+ "output": 0.8
1446
+ },
1447
+ "context_length": 32000,
1448
+ "supports_tools": false,
1449
+ "supports_structured_output": false,
1450
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
1451
+ },
1452
+ {
1453
+ "provider": "nebius",
1454
+ "status": "live",
1455
+ "pricing": {
1456
+ "input": 0.25,
1457
+ "output": 0.75
1458
+ },
1459
+ "context_length": 131072,
1460
+ "supports_tools": false,
1461
+ "supports_structured_output": true,
1462
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
1463
+ },
1464
+ {
1465
+ "provider": "cerebras",
1466
+ "status": "live",
1467
+ "supports_tools": false,
1468
+ "supports_structured_output": false,
1469
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
1470
+ },
1471
+ {
1472
+ "provider": "groq",
1473
+ "status": "live",
1474
+ "pricing": {
1475
+ "input": 0.75,
1476
+ "output": 0.99
1477
+ },
1478
+ "context_length": 131072,
1479
+ "supports_tools": true,
1480
+ "supports_structured_output": false,
1481
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
1482
+ },
1483
+ {
1484
+ "provider": "nscale",
1485
+ "status": "live",
1486
+ "supports_tools": false,
1487
+ "supports_structured_output": true,
1488
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
1489
+ },
1490
+ {
1491
+ "provider": "sambanova",
1492
+ "status": "live",
1493
+ "pricing": {
1494
+ "input": 0.7,
1495
+ "output": 1.4
1496
+ },
1497
+ "context_length": 131072,
1498
+ "supports_tools": false,
1499
+ "supports_structured_output": false,
1500
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
1501
+ },
1502
+ {
1503
+ "provider": "novita",
1504
+ "status": "live",
1505
+ "pricing": {
1506
+ "input": 0.2,
1507
+ "output": 0.8
1508
+ },
1509
+ "context_length": 40960,
1510
+ "supports_tools": false,
1511
+ "supports_structured_output": false,
1512
+ "id": "Qwen/Qwen3-235B-A22B"
1513
+ },
1514
+ {
1515
+ "provider": "nebius",
1516
+ "status": "live",
1517
+ "pricing": {
1518
+ "input": 0.2,
1519
+ "output": 0.6
1520
+ },
1521
+ "context_length": 40960,
1522
+ "supports_tools": true,
1523
+ "supports_structured_output": true,
1524
+ "id": "Qwen/Qwen3-235B-A22B"
1525
+ },
1526
+ {
1527
+ "provider": "fireworks-ai",
1528
+ "status": "live",
1529
+ "supports_tools": true,
1530
+ "supports_structured_output": false,
1531
+ "id": "Qwen/Qwen3-235B-A22B"
1532
+ },
1533
+ {
1534
+ "provider": "together",
1535
+ "status": "live",
1536
+ "pricing": {
1537
+ "input": 0.2,
1538
+ "output": 0.6
1539
+ },
1540
+ "context_length": 40960,
1541
+ "supports_tools": true,
1542
+ "supports_structured_output": true,
1543
+ "id": "Qwen/Qwen3-235B-A22B"
1544
+ },
1545
+ {
1546
+ "provider": "nscale",
1547
+ "status": "live",
1548
+ "supports_tools": false,
1549
+ "supports_structured_output": true,
1550
+ "id": "Qwen/Qwen3-235B-A22B"
1551
+ },
1552
+ {
1553
+ "provider": "fireworks",
1554
+ "status": "live",
1555
+ "supports_tools": true,
1556
+ "supports_structured_output": false,
1557
+ "context_length": 163840,
1558
+ "supports_image_input": false,
1559
+ "supports_function_calling": false,
1560
+ "id": "Qwen/Qwen3-235B-A22B"
1561
+ },
1562
+ {
1563
+ "provider": "nebius",
1564
+ "status": "live",
1565
+ "pricing": {
1566
+ "input": 0.13,
1567
+ "output": 0.4
1568
+ },
1569
+ "context_length": 8192,
1570
+ "supports_tools": false,
1571
+ "supports_structured_output": true,
1572
+ "id": "aaditya/Llama3-OpenBioLLM-70B"
1573
+ },
1574
+ {
1575
+ "provider": "novita",
1576
+ "status": "live",
1577
+ "pricing": {
1578
+ "input": 0.04,
1579
+ "output": 0.04
1580
+ },
1581
+ "context_length": 32000,
1582
+ "supports_tools": false,
1583
+ "supports_structured_output": true,
1584
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
1585
+ },
1586
+ {
1587
+ "provider": "featherless-ai",
1588
+ "status": "live",
1589
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
1590
+ },
1591
+ {
1592
+ "provider": "nscale",
1593
+ "status": "live",
1594
+ "supports_tools": false,
1595
+ "supports_structured_output": true,
1596
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
1597
+ },
1598
+ {
1599
+ "provider": "featherless",
1600
+ "status": "live",
1601
+ "pricing": {
1602
+ "input": 0.1,
1603
+ "output": 0.1
1604
+ },
1605
+ "context_length": 16384,
1606
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
1607
+ },
1608
+ {
1609
+ "provider": "nebius",
1610
+ "status": "live",
1611
+ "pricing": {
1612
+ "input": 0.1,
1613
+ "output": 0.3
1614
+ },
1615
+ "context_length": 16384,
1616
+ "supports_tools": false,
1617
+ "supports_structured_output": true,
1618
+ "id": "microsoft/phi-4"
1619
+ },
1620
+ {
1621
+ "provider": "novita",
1622
+ "status": "live",
1623
+ "pricing": {
1624
+ "input": 0.3,
1625
+ "output": 0.3
1626
+ },
1627
+ "context_length": 64000,
1628
+ "supports_tools": false,
1629
+ "supports_structured_output": true,
1630
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
1631
+ },
1632
+ {
1633
+ "provider": "nscale",
1634
+ "status": "live",
1635
+ "supports_tools": false,
1636
+ "supports_structured_output": true,
1637
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
1638
+ },
1639
+ {
1640
+ "provider": "nebius",
1641
+ "status": "live",
1642
+ "pricing": {
1643
+ "input": 0.6,
1644
+ "output": 1.8
1645
+ },
1646
+ "context_length": 131072,
1647
+ "supports_tools": false,
1648
+ "supports_structured_output": true,
1649
+ "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1"
1650
+ },
1651
+ {
1652
+ "provider": "novita",
1653
+ "status": "live",
1654
+ "pricing": {
1655
+ "input": 0.51,
1656
+ "output": 0.74
1657
+ },
1658
+ "context_length": 8192,
1659
+ "supports_tools": false,
1660
+ "supports_structured_output": true,
1661
+ "id": "meta-llama/Meta-Llama-3-70B-Instruct"
1662
+ },
1663
+ {
1664
+ "provider": "featherless-ai",
1665
+ "status": "live",
1666
+ "id": "meta-llama/Meta-Llama-3-70B-Instruct"
1667
+ },
1668
+ {
1669
+ "provider": "together",
1670
+ "status": "live",
1671
+ "pricing": {
1672
+ "input": 0.88,
1673
+ "output": 0.88
1674
+ },
1675
+ "context_length": 8192,
1676
+ "supports_tools": false,
1677
+ "supports_structured_output": true,
1678
+ "id": "meta-llama/Meta-Llama-3-70B-Instruct"
1679
+ },
1680
+ {
1681
+ "provider": "groq",
1682
+ "status": "live",
1683
+ "pricing": {
1684
+ "input": 0.59,
1685
+ "output": 0.79
1686
+ },
1687
+ "context_length": 8192,
1688
+ "supports_tools": true,
1689
+ "supports_structured_output": false,
1690
+ "id": "meta-llama/Meta-Llama-3-70B-Instruct"
1691
+ },
1692
+ {
1693
+ "provider": "hyperbolic",
1694
+ "status": "live",
1695
+ "supports_tools": false,
1696
+ "supports_structured_output": false,
1697
+ "id": "meta-llama/Meta-Llama-3-70B-Instruct"
1698
+ },
1699
+ {
1700
+ "provider": "featherless",
1701
+ "status": "live",
1702
+ "pricing": {
1703
+ "input": 0.1,
1704
+ "output": 0.1
1705
+ },
1706
+ "context_length": 16384,
1707
+ "id": "meta-llama/Meta-Llama-3-70B-Instruct"
1708
+ },
1709
+ {
1710
+ "provider": "novita",
1711
+ "status": "live",
1712
+ "pricing": {
1713
+ "input": 0.05,
1714
+ "output": 0.05
1715
+ },
1716
+ "context_length": 8192,
1717
+ "supports_tools": false,
1718
+ "supports_structured_output": false,
1719
+ "id": "Sao10K/L3-8B-Stheno-v3.2"
1720
+ },
1721
+ {
1722
+ "provider": "featherless-ai",
1723
+ "status": "live",
1724
+ "id": "Sao10K/L3-8B-Stheno-v3.2"
1725
+ },
1726
+ {
1727
+ "provider": "featherless",
1728
+ "status": "live",
1729
+ "pricing": {
1730
+ "input": 0.1,
1731
+ "output": 0.1
1732
+ },
1733
+ "context_length": 16384,
1734
+ "id": "Sao10K/L3-8B-Stheno-v3.2"
1735
+ },
1736
+ {
1737
+ "provider": "cohere",
1738
+ "status": "live",
1739
+ "supports_tools": true,
1740
+ "supports_structured_output": false,
1741
+ "id": "CohereLabs/c4ai-command-r-plus"
1742
+ },
1743
+ {
1744
+ "provider": "novita",
1745
+ "status": "live",
1746
+ "pricing": {
1747
+ "input": 0.28,
1748
+ "output": 1.1
1749
+ },
1750
+ "context_length": 123000,
1751
+ "supports_tools": false,
1752
+ "supports_structured_output": true,
1753
+ "id": "baidu/ERNIE-4.5-300B-A47B-Base-PT"
1754
+ },
1755
+ {
1756
+ "provider": "fireworks-ai",
1757
+ "status": "live",
1758
+ "supports_tools": false,
1759
+ "supports_structured_output": false,
1760
+ "id": "Qwen/Qwen2.5-VL-32B-Instruct"
1761
+ },
1762
+ {
1763
+ "provider": "fireworks",
1764
+ "status": "live",
1765
+ "supports_tools": false,
1766
+ "supports_structured_output": false,
1767
+ "context_length": 163840,
1768
+ "supports_image_input": false,
1769
+ "supports_function_calling": false,
1770
+ "id": "Qwen/Qwen2.5-VL-32B-Instruct"
1771
+ },
1772
+ {
1773
+ "provider": "novita",
1774
+ "status": "live",
1775
+ "pricing": {
1776
+ "input": 0.15,
1777
+ "output": 0.15
1778
+ },
1779
+ "context_length": 64000,
1780
+ "supports_tools": false,
1781
+ "supports_structured_output": true,
1782
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
1783
+ },
1784
+ {
1785
+ "provider": "featherless-ai",
1786
+ "status": "live",
1787
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
1788
+ },
1789
+ {
1790
+ "provider": "nscale",
1791
+ "status": "live",
1792
+ "supports_tools": false,
1793
+ "supports_structured_output": true,
1794
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
1795
+ },
1796
+ {
1797
+ "provider": "featherless",
1798
+ "status": "live",
1799
+ "pricing": {
1800
+ "input": 0.1,
1801
+ "output": 0.1
1802
+ },
1803
+ "context_length": 16384,
1804
+ "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
1805
+ },
1806
+ {
1807
+ "provider": "cohere",
1808
+ "status": "live",
1809
+ "supports_tools": false,
1810
+ "supports_structured_output": false,
1811
+ "id": "CohereLabs/aya-expanse-32b"
1812
+ },
1813
+ {
1814
+ "provider": "novita",
1815
+ "status": "live",
1816
+ "context_length": 120000,
1817
+ "supports_tools": false,
1818
+ "supports_structured_output": false,
1819
+ "id": "baidu/ERNIE-4.5-0.3B-PT"
1820
+ },
1821
+ {
1822
+ "provider": "novita",
1823
+ "status": "live",
1824
+ "pricing": {
1825
+ "input": 0.38,
1826
+ "output": 0.4
1827
+ },
1828
+ "context_length": 32000,
1829
+ "supports_tools": true,
1830
+ "supports_structured_output": false,
1831
+ "id": "Qwen/Qwen2.5-72B-Instruct"
1832
+ },
1833
+ {
1834
+ "provider": "nebius",
1835
+ "status": "live",
1836
+ "pricing": {
1837
+ "input": 0.25,
1838
+ "output": 0.75
1839
+ },
1840
+ "context_length": 131072,
1841
+ "supports_tools": true,
1842
+ "supports_structured_output": true,
1843
+ "id": "Qwen/Qwen2.5-72B-Instruct"
1844
+ },
1845
+ {
1846
+ "provider": "featherless-ai",
1847
+ "status": "live",
1848
+ "id": "Qwen/Qwen2.5-72B-Instruct"
1849
+ },
1850
+ {
1851
+ "provider": "together",
1852
+ "status": "live",
1853
+ "pricing": {
1854
+ "input": 1.2,
1855
+ "output": 1.2
1856
+ },
1857
+ "context_length": 131072,
1858
+ "supports_tools": true,
1859
+ "supports_structured_output": true,
1860
+ "id": "Qwen/Qwen2.5-72B-Instruct"
1861
+ },
1862
+ {
1863
+ "provider": "hyperbolic",
1864
+ "status": "live",
1865
+ "supports_tools": false,
1866
+ "supports_structured_output": false,
1867
+ "id": "Qwen/Qwen2.5-72B-Instruct"
1868
+ },
1869
+ {
1870
+ "provider": "featherless",
1871
+ "status": "live",
1872
+ "pricing": {
1873
+ "input": 0.1,
1874
+ "output": 0.1
1875
+ },
1876
+ "context_length": 16384,
1877
+ "id": "Qwen/Qwen2.5-72B-Instruct"
1878
+ },
1879
+ {
1880
+ "provider": "nebius",
1881
+ "status": "live",
1882
+ "pricing": {
1883
+ "input": 1,
1884
+ "output": 3
1885
+ },
1886
+ "context_length": 131072,
1887
+ "supports_tools": true,
1888
+ "supports_structured_output": true,
1889
+ "id": "meta-llama/Llama-3.1-405B-Instruct"
1890
+ },
1891
+ {
1892
+ "provider": "fireworks-ai",
1893
+ "status": "live",
1894
+ "supports_tools": true,
1895
+ "supports_structured_output": false,
1896
+ "id": "meta-llama/Llama-3.1-405B-Instruct"
1897
+ },
1898
+ {
1899
+ "provider": "sambanova",
1900
+ "status": "live",
1901
+ "supports_tools": true,
1902
+ "supports_structured_output": true,
1903
+ "id": "meta-llama/Llama-3.1-405B-Instruct"
1904
+ },
1905
+ {
1906
+ "provider": "fireworks",
1907
+ "status": "live",
1908
+ "supports_tools": true,
1909
+ "supports_structured_output": false,
1910
+ "context_length": 163840,
1911
+ "supports_image_input": false,
1912
+ "supports_function_calling": false,
1913
+ "id": "meta-llama/Llama-3.1-405B-Instruct"
1914
+ },
1915
+ {
1916
+ "provider": "together",
1917
+ "status": "live",
1918
+ "pricing": {
1919
+ "input": 0.2,
1920
+ "output": 0.6
1921
+ },
1922
+ "context_length": 40960,
1923
+ "supports_tools": true,
1924
+ "supports_structured_output": true,
1925
+ "id": "Qwen/Qwen3-235B-A22B-FP8"
1926
+ },
1927
+ {
1928
+ "provider": "novita",
1929
+ "status": "live",
1930
+ "pricing": {
1931
+ "input": 0.42,
1932
+ "output": 1.25
1933
+ },
1934
+ "context_length": 123000,
1935
+ "supports_tools": false,
1936
+ "supports_structured_output": false,
1937
+ "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
1938
+ },
1939
+ {
1940
+ "provider": "nebius",
1941
+ "status": "live",
1942
+ "pricing": {
1943
+ "input": 0.13,
1944
+ "output": 0.4
1945
+ },
1946
+ "context_length": 32768,
1947
+ "supports_tools": false,
1948
+ "supports_structured_output": true,
1949
+ "id": "Qwen/Qwen2-VL-72B-Instruct"
1950
+ },
1951
+ {
1952
+ "provider": "nebius",
1953
+ "status": "live",
1954
+ "pricing": {
1955
+ "input": 0.03,
1956
+ "output": 0.09
1957
+ },
1958
+ "context_length": 32768,
1959
+ "supports_tools": false,
1960
+ "supports_structured_output": true,
1961
+ "id": "Qwen/Qwen2.5-Coder-7B"
1962
+ },
1963
+ {
1964
+ "provider": "featherless-ai",
1965
+ "status": "live",
1966
+ "id": "Qwen/Qwen2.5-Coder-7B"
1967
+ },
1968
+ {
1969
+ "provider": "featherless",
1970
+ "status": "live",
1971
+ "pricing": {
1972
+ "input": 0.1,
1973
+ "output": 0.1
1974
+ },
1975
+ "context_length": 16384,
1976
+ "id": "Qwen/Qwen2.5-Coder-7B"
1977
+ },
1978
+ {
1979
+ "provider": "cohere",
1980
+ "status": "live",
1981
+ "supports_tools": false,
1982
+ "supports_structured_output": false,
1983
+ "id": "CohereLabs/aya-vision-32b"
1984
+ },
1985
+ {
1986
+ "provider": "cohere",
1987
+ "status": "live",
1988
+ "supports_tools": true,
1989
+ "supports_structured_output": false,
1990
+ "id": "CohereLabs/c4ai-command-r7b-arabic-02-2025"
1991
+ },
1992
+ {
1993
+ "provider": "nebius",
1994
+ "status": "live",
1995
+ "pricing": {
1996
+ "input": 0.25,
1997
+ "output": 0.75
1998
+ },
1999
+ "context_length": 32000,
2000
+ "supports_tools": false,
2001
+ "supports_structured_output": true,
2002
+ "id": "Qwen/Qwen2.5-VL-72B-Instruct"
2003
+ },
2004
+ {
2005
+ "provider": "hyperbolic",
2006
+ "status": "live",
2007
+ "supports_tools": false,
2008
+ "supports_structured_output": false,
2009
+ "id": "Qwen/Qwen2.5-VL-72B-Instruct"
2010
+ },
2011
+ {
2012
+ "provider": "novita",
2013
+ "status": "live",
2014
+ "pricing": {
2015
+ "input": 0.14,
2016
+ "output": 0.14
2017
+ },
2018
+ "context_length": 8192,
2019
+ "supports_tools": false,
2020
+ "supports_structured_output": false,
2021
+ "id": "NousResearch/Hermes-2-Pro-Llama-3-8B"
2022
+ },
2023
+ {
2024
+ "provider": "nebius",
2025
+ "status": "live",
2026
+ "pricing": {
2027
+ "input": 0.5,
2028
+ "output": 1.5
2029
+ },
2030
+ "context_length": 131072,
2031
+ "supports_tools": false,
2032
+ "supports_structured_output": true,
2033
+ "id": "Qwen/QwQ-32B"
2034
+ },
2035
+ {
2036
+ "provider": "featherless-ai",
2037
+ "status": "live",
2038
+ "id": "Qwen/QwQ-32B"
2039
+ },
2040
+ {
2041
+ "provider": "hyperbolic",
2042
+ "status": "live",
2043
+ "supports_tools": false,
2044
+ "supports_structured_output": false,
2045
+ "id": "Qwen/QwQ-32B"
2046
+ },
2047
+ {
2048
+ "provider": "nscale",
2049
+ "status": "live",
2050
+ "supports_tools": false,
2051
+ "supports_structured_output": false,
2052
+ "id": "Qwen/QwQ-32B"
2053
+ },
2054
+ {
2055
+ "provider": "featherless",
2056
+ "status": "live",
2057
+ "pricing": {
2058
+ "input": 0.1,
2059
+ "output": 0.1
2060
+ },
2061
+ "context_length": 16384,
2062
+ "id": "Qwen/QwQ-32B"
2063
+ },
2064
+ {
2065
+ "provider": "novita",
2066
+ "status": "live",
2067
+ "pricing": {
2068
+ "input": 0.17,
2069
+ "output": 0.85
2070
+ },
2071
+ "context_length": 1048576,
2072
+ "supports_tools": true,
2073
+ "supports_structured_output": false,
2074
+ "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
2075
+ },
2076
+ {
2077
+ "provider": "together",
2078
+ "status": "live",
2079
+ "pricing": {
2080
+ "input": 0.27,
2081
+ "output": 0.85
2082
+ },
2083
+ "context_length": 1048576,
2084
+ "supports_tools": true,
2085
+ "supports_structured_output": false,
2086
+ "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
2087
+ },
2088
+ {
2089
+ "provider": "together",
2090
+ "status": "live",
2091
+ "pricing": {
2092
+ "input": 0.6,
2093
+ "output": 0.6
2094
+ },
2095
+ "context_length": 32768,
2096
+ "supports_tools": false,
2097
+ "supports_structured_output": false,
2098
+ "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
2099
+ },
2100
+ {
2101
+ "provider": "novita",
2102
+ "status": "live",
2103
+ "pricing": {
2104
+ "input": 1.48,
2105
+ "output": 1.48
2106
+ },
2107
+ "context_length": 8192,
2108
+ "supports_tools": false,
2109
+ "supports_structured_output": false,
2110
+ "id": "Sao10K/L3-70B-Euryale-v2.1"
2111
+ },
2112
+ {
2113
+ "provider": "featherless-ai",
2114
+ "status": "live",
2115
+ "id": "Sao10K/L3-70B-Euryale-v2.1"
2116
+ },
2117
+ {
2118
+ "provider": "featherless",
2119
+ "status": "live",
2120
+ "pricing": {
2121
+ "input": 0.1,
2122
+ "output": 0.1
2123
+ },
2124
+ "context_length": 16384,
2125
+ "id": "Sao10K/L3-70B-Euryale-v2.1"
2126
+ },
2127
+ {
2128
+ "provider": "featherless-ai",
2129
+ "status": "live",
2130
+ "id": "Qwen/QwQ-32B-Preview"
2131
+ },
2132
+ {
2133
+ "provider": "together",
2134
+ "status": "live",
2135
+ "supports_tools": true,
2136
+ "supports_structured_output": true,
2137
+ "id": "Qwen/QwQ-32B-Preview"
2138
+ },
2139
+ {
2140
+ "provider": "featherless",
2141
+ "status": "live",
2142
+ "pricing": {
2143
+ "input": 0.1,
2144
+ "output": 0.1
2145
+ },
2146
+ "context_length": 16384,
2147
+ "id": "Qwen/QwQ-32B-Preview"
2148
+ },
2149
+ {
2150
+ "provider": "novita",
2151
+ "status": "live",
2152
+ "pricing": {
2153
+ "input": 0.7,
2154
+ "output": 2.5
2155
+ },
2156
+ "context_length": 160000,
2157
+ "supports_tools": false,
2158
+ "supports_structured_output": false,
2159
+ "id": "deepseek-ai/DeepSeek-Prover-V2-671B"
2160
+ },
2161
+ {
2162
+ "provider": "featherless-ai",
2163
+ "status": "live",
2164
+ "id": "marin-community/marin-8b-instruct"
2165
+ },
2166
+ {
2167
+ "provider": "together",
2168
+ "status": "live",
2169
+ "pricing": {
2170
+ "input": 0.18000000000000002,
2171
+ "output": 0.18000000000000002
2172
+ },
2173
+ "context_length": 4096,
2174
+ "supports_tools": false,
2175
+ "supports_structured_output": true,
2176
+ "id": "marin-community/marin-8b-instruct"
2177
+ },
2178
+ {
2179
+ "provider": "featherless",
2180
+ "status": "live",
2181
+ "pricing": {
2182
+ "input": 0.1,
2183
+ "output": 0.1
2184
+ },
2185
+ "context_length": 16384,
2186
+ "id": "marin-community/marin-8b-instruct"
2187
+ },
2188
+ {
2189
+ "provider": "fireworks-ai",
2190
+ "status": "live",
2191
+ "supports_tools": false,
2192
+ "supports_structured_output": false,
2193
+ "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B"
2194
+ },
2195
+ {
2196
+ "provider": "fireworks",
2197
+ "status": "live",
2198
+ "supports_tools": false,
2199
+ "supports_structured_output": false,
2200
+ "context_length": 163840,
2201
+ "supports_image_input": false,
2202
+ "supports_function_calling": false,
2203
+ "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B"
2204
+ },
2205
+ {
2206
+ "provider": "nebius",
2207
+ "status": "live",
2208
+ "pricing": {
2209
+ "input": 1,
2210
+ "output": 3
2211
+ },
2212
+ "context_length": 131072,
2213
+ "supports_tools": false,
2214
+ "supports_structured_output": true,
2215
+ "id": "NousResearch/Hermes-3-Llama-3.1-405B"
2216
+ },
2217
+ {
2218
+ "provider": "novita",
2219
+ "status": "live",
2220
+ "pricing": {
2221
+ "input": 0.62,
2222
+ "output": 0.62
2223
+ },
2224
+ "context_length": 65535,
2225
+ "supports_tools": false,
2226
+ "supports_structured_output": false,
2227
+ "id": "alpindale/WizardLM-2-8x22B"
2228
+ },
2229
+ {
2230
+ "provider": "featherless-ai",
2231
+ "status": "live",
2232
+ "id": "alpindale/WizardLM-2-8x22B"
2233
+ },
2234
+ {
2235
+ "provider": "featherless",
2236
+ "status": "live",
2237
+ "pricing": {
2238
+ "input": 0.1,
2239
+ "output": 0.1
2240
+ },
2241
+ "context_length": 16384,
2242
+ "id": "alpindale/WizardLM-2-8x22B"
2243
+ },
2244
+ {
2245
+ "provider": "groq",
2246
+ "status": "live",
2247
+ "pricing": {
2248
+ "input": 0.2,
2249
+ "output": 0.2
2250
+ },
2251
+ "context_length": 131072,
2252
+ "supports_tools": false,
2253
+ "supports_structured_output": false,
2254
+ "id": "meta-llama/Llama-Guard-4-12B"
2255
+ },
2256
+ {
2257
+ "provider": "nebius",
2258
+ "status": "live",
2259
+ "pricing": {
2260
+ "input": 0.13,
2261
+ "output": 0.4
2262
+ },
2263
+ "context_length": 131072,
2264
+ "supports_tools": false,
2265
+ "supports_structured_output": true,
2266
+ "id": "Qwen/Qwen2.5-32B-Instruct"
2267
+ },
2268
+ {
2269
+ "provider": "featherless-ai",
2270
+ "status": "live",
2271
+ "id": "Qwen/Qwen2.5-32B-Instruct"
2272
+ },
2273
+ {
2274
+ "provider": "featherless",
2275
+ "status": "live",
2276
+ "pricing": {
2277
+ "input": 0.1,
2278
+ "output": 0.1
2279
+ },
2280
+ "context_length": 16384,
2281
+ "id": "Qwen/Qwen2.5-32B-Instruct"
2282
+ },
2283
+ {
2284
+ "provider": "novita",
2285
+ "status": "live",
2286
+ "pricing": {
2287
+ "input": 0.9,
2288
+ "output": 0.9
2289
+ },
2290
+ "context_length": 16000,
2291
+ "supports_tools": false,
2292
+ "supports_structured_output": false,
2293
+ "id": "dphn/dolphin-2.9.2-mixtral-8x22b"
2294
+ },
2295
+ {
2296
+ "provider": "featherless-ai",
2297
+ "status": "live",
2298
+ "id": "Qwen/Qwen2-72B-Instruct"
2299
+ },
2300
+ {
2301
+ "provider": "together",
2302
+ "status": "live",
2303
+ "pricing": {
2304
+ "input": 0.9,
2305
+ "output": 0.9
2306
+ },
2307
+ "context_length": 32768,
2308
+ "supports_tools": false,
2309
+ "supports_structured_output": false,
2310
+ "id": "Qwen/Qwen2-72B-Instruct"
2311
+ },
2312
+ {
2313
+ "provider": "featherless",
2314
+ "status": "live",
2315
+ "pricing": {
2316
+ "input": 0.1,
2317
+ "output": 0.1
2318
+ },
2319
+ "context_length": 16384,
2320
+ "id": "Qwen/Qwen2-72B-Instruct"
2321
+ },
2322
+ {
2323
+ "provider": "featherless-ai",
2324
+ "status": "live",
2325
+ "id": "NousResearch/Hermes-3-Llama-3.1-70B"
2326
+ },
2327
+ {
2328
+ "provider": "hyperbolic",
2329
+ "status": "live",
2330
+ "supports_tools": false,
2331
+ "supports_structured_output": false,
2332
+ "id": "NousResearch/Hermes-3-Llama-3.1-70B"
2333
+ },
2334
+ {
2335
+ "provider": "featherless",
2336
+ "status": "live",
2337
+ "pricing": {
2338
+ "input": 0.1,
2339
+ "output": 0.1
2340
+ },
2341
+ "context_length": 16384,
2342
+ "id": "NousResearch/Hermes-3-Llama-3.1-70B"
2343
+ },
2344
+ {
2345
+ "provider": "novita",
2346
+ "status": "live",
2347
+ "pricing": {
2348
+ "input": 0.05,
2349
+ "output": 0.05
2350
+ },
2351
+ "context_length": 8192,
2352
+ "supports_tools": false,
2353
+ "supports_structured_output": false,
2354
+ "id": "Sao10K/L3-8B-Lunaris-v1"
2355
+ },
2356
+ {
2357
+ "provider": "featherless-ai",
2358
+ "status": "live",
2359
+ "id": "Sao10K/L3-8B-Lunaris-v1"
2360
+ },
2361
+ {
2362
+ "provider": "featherless",
2363
+ "status": "live",
2364
+ "pricing": {
2365
+ "input": 0.1,
2366
+ "output": 0.1
2367
+ },
2368
+ "context_length": 16384,
2369
+ "id": "Sao10K/L3-8B-Lunaris-v1"
2370
+ },
2371
+ {
2372
+ "provider": "together",
2373
+ "status": "live",
2374
+ "pricing": {
2375
+ "input": 0.88,
2376
+ "output": 0.88
2377
+ },
2378
+ "context_length": 32768,
2379
+ "supports_tools": true,
2380
+ "supports_structured_output": false,
2381
+ "id": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
2382
+ },
2383
+ {
2384
+ "provider": "cohere",
2385
+ "status": "live",
2386
+ "supports_tools": true,
2387
+ "supports_structured_output": false,
2388
+ "id": "CohereLabs/c4ai-command-r-v01"
2389
+ },
2390
+ {
2391
+ "provider": "sambanova",
2392
+ "status": "live",
2393
+ "pricing": {
2394
+ "input": 0.6,
2395
+ "output": 1.2
2396
+ },
2397
+ "context_length": 131072,
2398
+ "supports_tools": false,
2399
+ "supports_structured_output": true,
2400
+ "id": "tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4"
2401
+ },
2402
+ {
2403
+ "provider": "cohere",
2404
+ "status": "live",
2405
+ "supports_tools": true,
2406
+ "supports_structured_output": false,
2407
+ "id": "CohereLabs/c4ai-command-r-08-2024"
2408
+ },
2409
+ {
2410
+ "provider": "cohere",
2411
+ "status": "live",
2412
+ "supports_tools": true,
2413
+ "supports_structured_output": false,
2414
+ "id": "CohereLabs/c4ai-command-r7b-12-2024"
2415
+ }
2416
+ ]
2417
+ }
providers/aggregator.ts ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ProviderEntry, ProviderFetcher } from './types';
2
+ import { getStaticPricing } from './static-pricing';
3
+ import { NovitaFetcher } from './novita';
4
+ import { SambaNovaFetcher } from './sambanova';
5
+ import { GroqFetcher } from './groq';
6
+ import { FeatherlessFetcher } from './featherless';
7
+ import { TogetherFetcher } from './together';
8
+ import { CohereFetcher } from './cohere';
9
+ import { FireworksFetcher } from './fireworks';
10
+ import { NebiusFetcher } from './nebius';
11
+ import { HyperbolicFetcher } from './hyperbolic';
12
+ import { CerebrasFetcher } from './cerebras';
13
+ import { NScaleFetcher } from './nscale';
14
+
15
+ export interface AggregatorConfig {
16
+ providers?: string[]; // Specific providers to fetch from
17
+ apiKeys?: {
18
+ [provider: string]: string;
19
+ };
20
+ concurrent?: number; // Number of concurrent fetches
21
+ includeStaticPricing?: boolean;
22
+ }
23
+
24
+ export class ProviderAggregator {
25
+ private fetchers: Map<string, ProviderFetcher>;
26
+ private config: AggregatorConfig;
27
+
28
+ constructor(config: AggregatorConfig = {}) {
29
+ this.config = {
30
+ concurrent: 3,
31
+ includeStaticPricing: true,
32
+ ...config
33
+ };
34
+
35
+ this.fetchers = new Map();
36
+ this.initializeFetchers();
37
+ }
38
+
39
+ private initializeFetchers() {
40
+ const apiKeys = this.config.apiKeys || {};
41
+
42
+ // Initialize all available fetchers
43
+ this.fetchers.set('novita', new NovitaFetcher(apiKeys.novita));
44
+ this.fetchers.set('sambanova', new SambaNovaFetcher(apiKeys.sambanova));
45
+ this.fetchers.set('groq', new GroqFetcher(apiKeys.groq));
46
+ this.fetchers.set('featherless', new FeatherlessFetcher(apiKeys.featherless));
47
+ this.fetchers.set('together', new TogetherFetcher(apiKeys.together));
48
+ this.fetchers.set('cohere', new CohereFetcher(apiKeys.cohere));
49
+ this.fetchers.set('fireworks', new FireworksFetcher(apiKeys.fireworks));
50
+ this.fetchers.set('nebius', new NebiusFetcher(apiKeys.nebius));
51
+ this.fetchers.set('hyperbolic', new HyperbolicFetcher(apiKeys.hyperbolic));
52
+ this.fetchers.set('cerebras', new CerebrasFetcher(apiKeys.cerebras));
53
+ this.fetchers.set('nscale', new NScaleFetcher(apiKeys.nscale));
54
+ }
55
+
56
+ async fetchAllProviders(): Promise<Map<string, ProviderEntry[]>> {
57
+ const results = new Map<string, ProviderEntry[]>();
58
+ const providers = this.config.providers || Array.from(this.fetchers.keys());
59
+
60
+ // Fetch in batches to respect rate limits
61
+ const batches = this.createBatches(providers, this.config.concurrent || 3);
62
+
63
+ for (const batch of batches) {
64
+ const batchPromises = batch.map(async (provider) => {
65
+ const fetcher = this.fetchers.get(provider);
66
+ if (!fetcher) {
67
+ console.warn(`No fetcher found for provider: ${provider}`);
68
+ return { provider, entries: [] };
69
+ }
70
+
71
+ try {
72
+ console.log(`Fetching models from ${provider}...`);
73
+ const entries = await fetcher.fetchModels();
74
+
75
+ // Enrich with static pricing if needed
76
+ const enrichedEntries = this.enrichWithStaticPricing(provider, entries);
77
+
78
+ return { provider, entries: enrichedEntries };
79
+ } catch (error) {
80
+ console.error(`Failed to fetch from ${provider}:`, error);
81
+ return { provider, entries: [] };
82
+ }
83
+ });
84
+
85
+ const batchResults = await Promise.all(batchPromises);
86
+ for (const { provider, entries } of batchResults) {
87
+ results.set(provider, entries);
88
+ }
89
+ }
90
+
91
+ return results;
92
+ }
93
+
94
+ async fetchProvider(provider: string): Promise<ProviderEntry[]> {
95
+ const fetcher = this.fetchers.get(provider);
96
+ if (!fetcher) {
97
+ throw new Error(`No fetcher found for provider: ${provider}`);
98
+ }
99
+
100
+ const entries = await fetcher.fetchModels();
101
+ return this.enrichWithStaticPricing(provider, entries);
102
+ }
103
+
104
+ private enrichWithStaticPricing(provider: string, entries: ProviderEntry[]): ProviderEntry[] {
105
+ if (!this.config.includeStaticPricing) {
106
+ return entries;
107
+ }
108
+
109
+ return entries.map(entry => {
110
+ // Only add static pricing if the entry doesn't already have pricing
111
+ if (!entry.pricing) {
112
+ const modelId = this.extractModelId(entry);
113
+ const staticPrice = getStaticPricing(provider, modelId);
114
+ if (staticPrice) {
115
+ return {
116
+ ...entry,
117
+ pricing: staticPrice
118
+ };
119
+ }
120
+ }
121
+ return entry;
122
+ });
123
+ }
124
+
125
+ private extractModelId(entry: ProviderEntry): string {
126
+ // Extract model ID from various possible fields
127
+ // This is a simplified version - in production you'd need provider-specific logic
128
+ return (entry as any).id || (entry as any).model_id || 'unknown';
129
+ }
130
+
131
+ private createBatches<T>(items: T[], batchSize: number): T[][] {
132
+ const batches: T[][] = [];
133
+ for (let i = 0; i < items.length; i += batchSize) {
134
+ batches.push(items.slice(i, i + batchSize));
135
+ }
136
+ return batches;
137
+ }
138
+
139
+ // Aggregate all provider data into a single array
140
+ async aggregateAll(): Promise<ProviderEntry[]> {
141
+ const providerMap = await this.fetchAllProviders();
142
+ const allEntries: ProviderEntry[] = [];
143
+
144
+ for (const [provider, entries] of providerMap) {
145
+ allEntries.push(...entries);
146
+ }
147
+
148
+ return allEntries;
149
+ }
150
+
151
+ // Get a summary of available models per provider
152
+ async getSummary(): Promise<{ [provider: string]: number }> {
153
+ const providerMap = await this.fetchAllProviders();
154
+ const summary: { [provider: string]: number } = {};
155
+
156
+ for (const [provider, entries] of providerMap) {
157
+ summary[provider] = entries.length;
158
+ }
159
+
160
+ return summary;
161
+ }
162
+ }
providers/base.ts ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ProviderEntry, ProviderFetcher, RateLimitConfig } from "./types";
2
+
3
+ export abstract class BaseProviderFetcher implements ProviderFetcher {
4
+ abstract name: string;
5
+ protected apiKey?: string;
6
+ protected baseUrl: string;
7
+ protected rateLimitConfig: RateLimitConfig;
8
+
9
+ private lastRequestTime: number = 0;
10
+ private requestCount: number = 0;
11
+ private requestWindowStart: number = Date.now();
12
+
13
+ constructor(
14
+ baseUrl: string,
15
+ apiKey?: string,
16
+ rateLimitConfig: RateLimitConfig = {}
17
+ ) {
18
+ this.baseUrl = baseUrl;
19
+ this.apiKey = apiKey;
20
+ this.rateLimitConfig = {
21
+ requestsPerMinute: rateLimitConfig.requestsPerMinute || 60,
22
+ retryAttempts: rateLimitConfig.retryAttempts || 3,
23
+ initialBackoffMs: rateLimitConfig.initialBackoffMs || 1000,
24
+ ...rateLimitConfig,
25
+ };
26
+ }
27
+
28
+ abstract fetchModels(): Promise<ProviderEntry[]>;
29
+
30
+ protected async fetchWithRetry<T>(
31
+ url: string,
32
+ options: RequestInit = {},
33
+ retries: number = this.rateLimitConfig.retryAttempts || 3
34
+ ): Promise<T> {
35
+ // Apply rate limiting
36
+ await this.enforceRateLimit();
37
+
38
+ for (let i = 0; i < retries; i++) {
39
+ try {
40
+ const headers: HeadersInit = {
41
+ "Content-Type": "application/json",
42
+ ...(options.headers || {}),
43
+ };
44
+
45
+ if (this.apiKey) {
46
+ headers["Authorization"] = `Bearer ${this.apiKey}`;
47
+ }
48
+
49
+ const response = await fetch(url, {
50
+ ...options,
51
+ headers,
52
+ });
53
+
54
+ if (response.ok) {
55
+ return (await response.json()) as T;
56
+ }
57
+
58
+ // Handle rate limit errors
59
+ if (response.status === 429) {
60
+ const retryAfter = response.headers.get("Retry-After");
61
+ const waitTime = retryAfter
62
+ ? parseInt(retryAfter) * 1000
63
+ : Math.pow(2, i) * (this.rateLimitConfig.initialBackoffMs || 1000);
64
+
65
+ console.log(`Rate limited by ${this.name}, waiting ${waitTime}ms...`);
66
+ await this.sleep(waitTime);
67
+ continue;
68
+ }
69
+
70
+ // Handle other errors
71
+ const errorBody = await response.text();
72
+ throw new Error(
73
+ `HTTP ${response.status}: ${response.statusText} - ${errorBody}`
74
+ );
75
+ } catch (error) {
76
+ if (i === retries - 1) {
77
+ console.error(
78
+ `Failed to fetch from ${this.name} after ${retries} attempts:`,
79
+ error
80
+ );
81
+ throw error;
82
+ }
83
+
84
+ // Exponential backoff for other errors
85
+ const waitTime =
86
+ Math.pow(2, i) * (this.rateLimitConfig.initialBackoffMs || 1000);
87
+ console.log(`Retrying ${this.name} request in ${waitTime}ms...`);
88
+ await this.sleep(waitTime);
89
+ }
90
+ }
91
+
92
+ throw new Error(
93
+ `Failed to fetch from ${this.name} after ${retries} attempts`
94
+ );
95
+ }
96
+
97
+ private async enforceRateLimit(): Promise<void> {
98
+ const now = Date.now();
99
+ const windowDuration = 60000; // 1 minute in milliseconds
100
+
101
+ // Reset window if needed
102
+ if (now - this.requestWindowStart >= windowDuration) {
103
+ this.requestCount = 0;
104
+ this.requestWindowStart = now;
105
+ }
106
+
107
+ // Check if we've hit the rate limit
108
+ if (this.requestCount >= (this.rateLimitConfig.requestsPerMinute || 60)) {
109
+ const waitTime = windowDuration - (now - this.requestWindowStart);
110
+ console.log(
111
+ `Rate limit reached for ${this.name}, waiting ${waitTime}ms...`
112
+ );
113
+ await this.sleep(waitTime);
114
+
115
+ // Reset after waiting
116
+ this.requestCount = 0;
117
+ this.requestWindowStart = Date.now();
118
+ }
119
+
120
+ // Ensure minimum time between requests (100ms default)
121
+ const timeSinceLastRequest = now - this.lastRequestTime;
122
+ const minInterval = 100;
123
+ if (timeSinceLastRequest < minInterval) {
124
+ await this.sleep(minInterval - timeSinceLastRequest);
125
+ }
126
+
127
+ this.requestCount++;
128
+ this.lastRequestTime = Date.now();
129
+ }
130
+
131
+ protected sleep(ms: number): Promise<void> {
132
+ return new Promise((resolve) => setTimeout(resolve, ms));
133
+ }
134
+
135
+ // Helper method to convert various price formats to $ per 1M tokens
136
+ protected normalizePricing(
137
+ input: number | string,
138
+ output: number | string,
139
+ unit: "per_token" | "per_million" | "cents_per_million" = "per_million"
140
+ ): { input: number; output: number } {
141
+ let inputPrice = typeof input === "string" ? parseFloat(input) : input;
142
+ let outputPrice = typeof output === "string" ? parseFloat(output) : output;
143
+
144
+ switch (unit) {
145
+ case "per_token":
146
+ // Convert from $ per token to $ per million tokens
147
+ inputPrice = inputPrice * 1_000_000;
148
+ outputPrice = outputPrice * 1_000_000;
149
+ break;
150
+ case "cents_per_million":
151
+ // Convert from cents per million to $ per million
152
+ inputPrice = inputPrice / 100;
153
+ outputPrice = outputPrice / 100;
154
+ break;
155
+ case "per_million":
156
+ // Already in the correct format
157
+ break;
158
+ }
159
+
160
+ return {
161
+ input: inputPrice,
162
+ output: outputPrice,
163
+ };
164
+ }
165
+
166
+ // Helper to parse supported parameters from various formats
167
+ protected parseSupportedParameters(
168
+ params: string[] | object
169
+ ): Partial<ProviderEntry> {
170
+ const result: Partial<ProviderEntry> = {};
171
+ const paramList = Array.isArray(params) ? params : Object.keys(params);
172
+
173
+ const paramMapping: { [key: string]: keyof ProviderEntry } = {
174
+ temperature: "supports_temperature",
175
+ top_p: "supports_top_p",
176
+ top_k: "supports_top_k",
177
+ max_tokens: "supports_max_tokens",
178
+ stop: "supports_stop_sequences",
179
+ seed: "supports_seed",
180
+ frequency_penalty: "supports_frequency_penalty",
181
+ presence_penalty: "supports_presence_penalty",
182
+ repetition_penalty: "supports_repetition_penalty",
183
+ min_p: "supports_min_p",
184
+ logit_bias: "supports_logit_bias",
185
+ logprobs: "supports_logprobs",
186
+ top_logprobs: "supports_top_logprobs",
187
+ stream: "supports_streaming",
188
+ };
189
+
190
+ for (const param of paramList) {
191
+ const mappedKey = paramMapping[param];
192
+ if (mappedKey) {
193
+ result[mappedKey] = true;
194
+ }
195
+ }
196
+
197
+ result.supported_parameters = paramList;
198
+ return result;
199
+ }
200
+ }
providers/cerebras.ts ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { ProviderEntry, ProviderFetcher } from './types';
2
+ import { BaseProviderFetcher } from './base';
3
+
4
+ export class CerebrasFetcher extends BaseProviderFetcher implements ProviderFetcher {
5
+ name = 'cerebras';
6
+
7
+ constructor(apiKey?: string) {
8
+ super('https://api.cerebras.ai/v1', apiKey);
9
+ }
10
+
11
+ async fetchModels(): Promise<ProviderEntry[]> {
12
+ // Cerebras doesn't provide detailed model information via their API
13
+ // Data will come from HuggingFace router API
14
+ console.log('Cerebras API limited - using HuggingFace router data');
15
+ return [];
16
+ }
17
+ }
providers/cohere.ts ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { BaseProviderFetcher } from './base';
2
+ import type { ProviderEntry, CohereModel } from './types';
3
+
4
+ export class CohereFetcher extends BaseProviderFetcher {
5
+ name = 'cohere';
6
+
7
+ constructor(apiKey?: string) {
8
+ super('https://api.cohere.ai', apiKey, {
9
+ requestsPerMinute: 60 // Conservative default
10
+ });
11
+ }
12
+
13
+ async fetchModels(): Promise<ProviderEntry[]> {
14
+ try {
15
+ // Fetch all models
16
+ const response = await this.fetchWithRetry<{ models: CohereModel[] }>(
17
+ `${this.baseUrl}/v1/models`
18
+ );
19
+
20
+ // Optionally filter by endpoint type
21
+ const chatModels = response.models.filter(model =>
22
+ model.endpoints.includes('chat') || model.endpoints.includes('generate')
23
+ );
24
+
25
+ return chatModels.map(model => this.mapModelToProviderEntry(model));
26
+ } catch (error) {
27
+ console.error(`Failed to fetch Cohere models: ${error}`);
28
+ return [];
29
+ }
30
+ }
31
+
32
+ async fetchModel(modelName: string): Promise<ProviderEntry | null> {
33
+ try {
34
+ const response = await this.fetchWithRetry<CohereModel>(
35
+ `${this.baseUrl}/v1/models/${encodeURIComponent(modelName)}`
36
+ );
37
+
38
+ return this.mapModelToProviderEntry(response);
39
+ } catch (error) {
40
+ console.error(`Failed to fetch Cohere model ${modelName}: ${error}`);
41
+ return null;
42
+ }
43
+ }
44
+
45
+ private mapModelToProviderEntry(model: CohereModel): ProviderEntry {
46
+ const entry: ProviderEntry = {
47
+ provider: this.name,
48
+ context_length: model.context_length,
49
+ status: model.is_deprecated ? 'deprecated' : 'live',
50
+ supports_image_input: model.supports_vision
51
+ };
52
+
53
+ // Map features to capability flags
54
+ const featureMapping = this.mapFeatures(model.features);
55
+ Object.assign(entry, featureMapping);
56
+
57
+ // Map endpoints to capabilities
58
+ const endpointCapabilities = this.mapEndpoints(model.endpoints);
59
+ Object.assign(entry, endpointCapabilities);
60
+
61
+ // Set supported parameters based on features
62
+ entry.supported_parameters = model.features;
63
+
64
+ return entry;
65
+ }
66
+
67
+ private mapFeatures(features: string[]): Partial<ProviderEntry> {
68
+ const result: Partial<ProviderEntry> = {};
69
+
70
+ // Feature mapping based on the spec
71
+ const featureMap: { [key: string]: (keyof ProviderEntry)[] } = {
72
+ 'tools': ['supports_tools'],
73
+ 'strict_tools': ['supports_function_calling'],
74
+ 'json_mode': ['supports_structured_output'],
75
+ 'json_schema': ['supports_structured_output', 'supports_response_format'],
76
+ 'logprobs': ['supports_logprobs']
77
+ };
78
+
79
+ for (const feature of features) {
80
+ const mappedKeys = featureMap[feature];
81
+ if (mappedKeys) {
82
+ for (const key of mappedKeys) {
83
+ (result[key] as any) = true;
84
+ }
85
+ }
86
+ }
87
+
88
+
89
+ return result;
90
+ }
91
+
92
+ private mapEndpoints(endpoints: string[]): Partial<ProviderEntry> {
93
+ const result: Partial<ProviderEntry> = {};
94
+
95
+ // If the model supports chat or generate endpoints, it's a text generation model
96
+ if (endpoints.includes('chat') || endpoints.includes('generate')) {
97
+ result.model_type = 'chat';
98
+ }
99
+
100
+ return result;
101
+ }
102
+ }
providers/featherless.ts ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { BaseProviderFetcher } from "./base";
2
+ import { ProviderEntry, FeatherlessModel } from "./types";
3
+
4
+ export class FeatherlessFetcher extends BaseProviderFetcher {
5
+ name = "featherless";
6
+
7
+ constructor(apiKey?: string) {
8
+ super("https://api.featherless.ai", apiKey, {
9
+ requestsPerMinute: 60, // Conservative default
10
+ });
11
+ }
12
+
13
+ async fetchModels(): Promise<ProviderEntry[]> {
14
+ try {
15
+ const response = await this.fetchWithRetry<{ data: FeatherlessModel[] }>(
16
+ `${this.baseUrl}/v1/models`
17
+ );
18
+
19
+ return response.data.map((model) => this.mapModelToProviderEntry(model));
20
+ } catch (error) {
21
+ console.error(`Failed to fetch Featherless models: ${error}`);
22
+ return [];
23
+ }
24
+ }
25
+
26
+ private mapModelToProviderEntry(model: FeatherlessModel): ProviderEntry {
27
+ const entry: ProviderEntry = {
28
+ provider: this.name,
29
+ context_length: model.context_length,
30
+ max_completion_tokens: model.max_completion_tokens,
31
+ status: model.available_on_current_plan ? "live" : "offline",
32
+ owned_by: model.owned_by,
33
+ model_class: model.model_class,
34
+ is_gated: model.is_gated,
35
+ };
36
+
37
+
38
+ return entry;
39
+ }
40
+ }
providers/fireworks.ts ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { BaseProviderFetcher } from './base';
2
+ import { ProviderEntry, FireworksModel, FireworksDetailedModel } from './types';
3
+
4
+ export class FireworksFetcher extends BaseProviderFetcher {
5
+ name = 'fireworks';
6
+
7
+ constructor(apiKey?: string) {
8
+ super('https://api.fireworks.ai', apiKey, {
9
+ requestsPerMinute: 60 // Conservative default
10
+ });
11
+ }
12
+
13
+ async fetchModels(): Promise<ProviderEntry[]> {
14
+ try {
15
+ const response = await this.fetchWithRetry<{ data: FireworksModel[] }>(
16
+ `${this.baseUrl}/inference/v1/models`
17
+ );
18
+
19
+ // Map basic model data
20
+ const basicEntries = response.data.map(model => this.mapBasicModelToProviderEntry(model));
21
+
22
+ // Optionally enrich with detailed data for important models
23
+ // This can be done selectively to avoid too many API calls
24
+ const enrichedEntries = await this.enrichModels(basicEntries, response.data);
25
+
26
+ return enrichedEntries;
27
+ } catch (error) {
28
+ console.error(`Failed to fetch Fireworks models: ${error}`);
29
+ return [];
30
+ }
31
+ }
32
+
33
+ private async enrichModels(
34
+ basicEntries: ProviderEntry[],
35
+ models: FireworksModel[]
36
+ ): Promise<ProviderEntry[]> {
37
+ // For now, we'll return basic entries
38
+ // In production, you might want to selectively enrich important models
39
+ // to avoid hitting rate limits
40
+ return basicEntries;
41
+ }
42
+
43
+ async fetchDetailedModel(accountId: string, modelId: string): Promise<ProviderEntry | null> {
44
+ try {
45
+ const response = await this.fetchWithRetry<FireworksDetailedModel>(
46
+ `${this.baseUrl}/v1/accounts/${accountId}/models/${modelId}`
47
+ );
48
+
49
+ return this.mapDetailedModelToProviderEntry(response);
50
+ } catch (error) {
51
+ console.error(`Failed to fetch detailed Fireworks model ${modelId}: ${error}`);
52
+ return null;
53
+ }
54
+ }
55
+
56
+ private mapBasicModelToProviderEntry(model: FireworksModel): ProviderEntry {
57
+ const entry: ProviderEntry = {
58
+ provider: this.name,
59
+ context_length: model.context_length,
60
+ owned_by: model.owned_by,
61
+ supports_image_input: model.supports_image_input,
62
+ supports_tools: model.supports_tools,
63
+ supports_function_calling: model.supports_tools
64
+ };
65
+
66
+ // Set model type based on chat support
67
+ if (model.supports_chat) {
68
+ entry.model_type = 'chat';
69
+ }
70
+
71
+ return entry;
72
+ }
73
+
74
+ private mapDetailedModelToProviderEntry(model: FireworksDetailedModel): ProviderEntry {
75
+ const entry: ProviderEntry = {
76
+ provider: this.name,
77
+ context_length: model.contextLength,
78
+ status: model.state === 'READY' ? 'live' : 'offline',
79
+ description: model.description,
80
+ quantization: model.baseModelDetails.defaultPrecision,
81
+ supports_image_input: model.supportsImageInput,
82
+ supports_tools: model.supportsTools,
83
+ supports_function_calling: model.supportsTools
84
+ };
85
+
86
+ // Check deprecation
87
+ if (model.deprecationDate) {
88
+ entry.status = 'deprecated';
89
+ entry.deprecated_at = model.deprecationDate;
90
+ }
91
+
92
+ // Parse parameter count if available
93
+ if (model.baseModelDetails.parameterCount) {
94
+ // Store as metadata - you might want to parse this into a number
95
+ entry.owned_by = model.displayName;
96
+ }
97
+
98
+ // Parse supported parameters from defaultSamplingParams
99
+ if (model.defaultSamplingParams) {
100
+ const paramCapabilities = this.parseSupportedParameters(model.defaultSamplingParams);
101
+ Object.assign(entry, paramCapabilities);
102
+ }
103
+
104
+ // Additional capabilities from model details
105
+ if (model.supportsLora) {
106
+ // Custom capability - not in standard ProviderEntry but could be added
107
+ // entry.supports_lora = true;
108
+ }
109
+
110
+ // Map supported precisions
111
+ if (model.supportedPrecisions && model.supportedPrecisions.length > 0) {
112
+ // Could store as metadata or custom field
113
+ }
114
+
115
+ return entry;
116
+ }
117
+
118
+ // Helper to extract model ID parts from Fireworks model ID format
119
+ private parseModelId(id: string): { accountId: string; modelId: string } | null {
120
+ // Format: "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507"
121
+ const match = id.match(/accounts\/([^\/]+)\/models\/([^\/]+)/);
122
+ if (match) {
123
+ return {
124
+ accountId: match[1],
125
+ modelId: match[2]
126
+ };
127
+ }
128
+ return null;
129
+ }
130
+ }
providers/groq.ts ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { BaseProviderFetcher } from "./base";
2
+ import { ProviderEntry, GroqModel } from "./types";
3
+
4
+ export class GroqFetcher extends BaseProviderFetcher {
5
+ name = "groq";
6
+
7
+ constructor(apiKey?: string) {
8
+ super("https://api.groq.com", apiKey, {
9
+ requestsPerMinute: 100, // Groq rate limit from spec
10
+ });
11
+ }
12
+
13
+ async fetchModels(): Promise<ProviderEntry[]> {
14
+ try {
15
+ const response = await this.fetchWithRetry<{ data: GroqModel[] }>(
16
+ `${this.baseUrl}/openai/v1/models`
17
+ );
18
+
19
+ return response.data.map((model) => this.mapModelToProviderEntry(model));
20
+ } catch (error) {
21
+ console.error(`Failed to fetch Groq models: ${error}`);
22
+ return [];
23
+ }
24
+ }
25
+
26
+ async fetchModel(modelId: string): Promise<ProviderEntry | null> {
27
+ try {
28
+ const response = await this.fetchWithRetry<GroqModel>(
29
+ `${this.baseUrl}/openai/v1/models/${encodeURIComponent(modelId)}`
30
+ );
31
+
32
+ return this.mapModelToProviderEntry(response);
33
+ } catch (error) {
34
+ console.error(`Failed to fetch Groq model ${modelId}: ${error}`);
35
+ return null;
36
+ }
37
+ }
38
+
39
+ private mapModelToProviderEntry(model: GroqModel): ProviderEntry {
40
+ const entry: ProviderEntry = {
41
+ provider: this.name,
42
+ context_length: model.context_window,
43
+ max_completion_tokens: model.max_completion_tokens,
44
+ status: model.active ? "live" : "offline",
45
+ owned_by: model.owned_by,
46
+ };
47
+
48
+ // Store the model ID for matching
49
+ (entry as any).id = model.id;
50
+
51
+ // Add static pricing from Groq's website if not provided by API
52
+ if (!entry.pricing) {
53
+ const staticPricing = this.getStaticPricing(model.id);
54
+ if (staticPricing) {
55
+ entry.pricing = staticPricing;
56
+ }
57
+ }
58
+
59
+
60
+ return entry;
61
+ }
62
+
63
+ private getStaticPricing(modelId: string): { input: number; output: number } | null {
64
+ // Import static pricing data
65
+ const { getStaticPricing } = require('./static-pricing');
66
+ return getStaticPricing('groq', modelId);
67
+ }
68
+ }
providers/huggingface-router.ts ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { ProviderEntry, ProviderFetcher } from './types';
2
+
3
+ interface HFRouterModel {
4
+ id: string;
5
+ object: string;
6
+ created: number;
7
+ owned_by: string;
8
+ providers?: HFRouterProvider[];
9
+ }
10
+
11
+ interface HFRouterProvider {
12
+ provider: string;
13
+ status?: "live" | "offline" | "staging" | "deprecated";
14
+ context_length?: number;
15
+ pricing?: {
16
+ input: number; // cents per million tokens
17
+ output: number; // cents per million tokens
18
+ };
19
+ supports_tools?: boolean;
20
+ supports_structured_output?: boolean;
21
+ }
22
+
23
+ export class HuggingFaceRouterFetcher implements ProviderFetcher {
24
+ name = 'huggingface-router';
25
+
26
+ async fetchModels(): Promise<ProviderEntry[]> {
27
+ try {
28
+ const response = await fetch('https://router.huggingface.co/v1/models');
29
+ if (!response.ok) {
30
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
31
+ }
32
+
33
+ const data = await response.json() as { data: HFRouterModel[] };
34
+ return this.normalizeModels(data.data);
35
+ } catch (error) {
36
+ console.error('Failed to fetch HuggingFace router models:', error);
37
+ throw error;
38
+ }
39
+ }
40
+
41
+ private normalizeModels(models: HFRouterModel[]): ProviderEntry[] {
42
+ const entries: ProviderEntry[] = [];
43
+
44
+ for (const model of models) {
45
+ if (!model.providers) continue;
46
+
47
+ for (const provider of model.providers) {
48
+ const entry: ProviderEntry = {
49
+ provider: this.normalizeProviderName(provider.provider),
50
+ model_id: model.id,
51
+ owned_by: model.owned_by,
52
+ created: model.created,
53
+ };
54
+
55
+ // Set status
56
+ if (provider.status) {
57
+ entry.status = provider.status === "staging" ? "offline" : provider.status;
58
+ }
59
+
60
+ // Convert pricing from cents to dollars per million tokens
61
+ if (provider.pricing) {
62
+ entry.pricing = {
63
+ input: provider.pricing.input / 100, // cents to dollars
64
+ output: provider.pricing.output / 100, // cents to dollars
65
+ };
66
+ }
67
+
68
+ // Copy context length
69
+ if (provider.context_length) {
70
+ entry.context_length = provider.context_length;
71
+ }
72
+
73
+ // Copy capability flags
74
+ if (provider.supports_tools !== undefined) {
75
+ entry.supports_tools = provider.supports_tools;
76
+ }
77
+
78
+ if (provider.supports_structured_output !== undefined) {
79
+ entry.supports_structured_output = provider.supports_structured_output;
80
+ }
81
+
82
+ entries.push(entry);
83
+ }
84
+ }
85
+
86
+ return entries;
87
+ }
88
+
89
+ private normalizeProviderName(providerName: string): string {
90
+ // Map HF router provider names to our standard names
91
+ const providerMap: Record<string, string> = {
92
+ 'featherless-ai': 'featherless',
93
+ 'fireworks-ai': 'fireworks',
94
+ 'hf-inference': 'huggingface',
95
+ // Keep others as-is
96
+ };
97
+
98
+ return providerMap[providerName] || providerName;
99
+ }
100
+ }
101
+
102
+ // Helper function to extract HF router data from a model
103
+ export function extractHFRouterData(model: any): Map<string, ProviderEntry> {
104
+ const providerMap = new Map<string, ProviderEntry>();
105
+
106
+ if (!model.providers || !Array.isArray(model.providers)) {
107
+ return providerMap;
108
+ }
109
+
110
+ for (const provider of model.providers) {
111
+ if (!provider.provider) continue;
112
+
113
+ const entry: ProviderEntry = {
114
+ provider: provider.provider,
115
+ };
116
+
117
+ // Set status
118
+ if (provider.status) {
119
+ entry.status = provider.status === "staging" ? "offline" : provider.status;
120
+ }
121
+
122
+ // Convert pricing from cents to dollars if needed
123
+ if (provider.pricing) {
124
+ // Check if pricing is already in dollars (values < 100 likely dollars)
125
+ const needsConversion = provider.pricing.input >= 100 || provider.pricing.output >= 100;
126
+ entry.pricing = {
127
+ input: needsConversion ? provider.pricing.input / 100 : provider.pricing.input,
128
+ output: needsConversion ? provider.pricing.output / 100 : provider.pricing.output,
129
+ };
130
+ }
131
+
132
+ // Copy other fields
133
+ if (provider.context_length) {
134
+ entry.context_length = provider.context_length;
135
+ }
136
+
137
+ if (provider.supports_tools !== undefined) {
138
+ entry.supports_tools = provider.supports_tools;
139
+ }
140
+
141
+ if (provider.supports_structured_output !== undefined) {
142
+ entry.supports_structured_output = provider.supports_structured_output;
143
+ }
144
+
145
+ providerMap.set(provider.provider, entry);
146
+ }
147
+
148
+ return providerMap;
149
+ }
providers/hyperbolic.ts ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { ProviderEntry, ProviderFetcher } from './types';
2
+ import { BaseProviderFetcher } from './base';
3
+
4
+ export class HyperbolicFetcher extends BaseProviderFetcher implements ProviderFetcher {
5
+ name = 'hyperbolic';
6
+
7
+ constructor(apiKey?: string) {
8
+ super('https://api.hyperbolic.ai/v1', apiKey);
9
+ }
10
+
11
+ async fetchModels(): Promise<ProviderEntry[]> {
12
+ // Hyperbolic doesn't provide a public API for model listing
13
+ // Data will come from HuggingFace router API
14
+ console.log('Hyperbolic API not available - using HuggingFace router data');
15
+ return [];
16
+ }
17
+ }
providers/index.ts ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Main export file for provider module
2
+ export * from './types';
3
+ export * from './base';
4
+ export * from './novita';
5
+ export * from './sambanova';
6
+ export * from './groq';
7
+ export * from './featherless';
8
+ export * from './together';
9
+ export * from './cohere';
10
+ export * from './fireworks';
11
+ export * from './static-pricing';
12
+ export * from './aggregator';
providers/nebius.ts ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { ProviderEntry, ProviderFetcher } from './types';
2
+ import { BaseProviderFetcher } from './base';
3
+
4
+ export class NebiusFetcher extends BaseProviderFetcher implements ProviderFetcher {
5
+ name = 'nebius';
6
+
7
+ constructor(apiKey?: string) {
8
+ super('https://api.nebius.ai/v1', apiKey);
9
+ }
10
+
11
+ async fetchModels(): Promise<ProviderEntry[]> {
12
+ // Nebius doesn't provide a public API for model listing
13
+ // Data will come from HuggingFace router API
14
+ console.log('Nebius API not available - using HuggingFace router data');
15
+ return [];
16
+ }
17
+ }
providers/novita.ts ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { BaseProviderFetcher } from "./base";
2
+ import { ProviderEntry, NovitaModel } from "./types";
3
+
4
+ export class NovitaFetcher extends BaseProviderFetcher {
5
+ name = "novita";
6
+
7
+ constructor(apiKey?: string) {
8
+ super("https://api.novita.ai", apiKey, {
9
+ requestsPerMinute: 60, // Conservative default
10
+ });
11
+ }
12
+
13
+ async fetchModels(): Promise<ProviderEntry[]> {
14
+ try {
15
+ const response = await this.fetchWithRetry<{ data: NovitaModel[] }>(
16
+ `${this.baseUrl}/v3/openai/models`
17
+ );
18
+
19
+ return response.data.map((model) => this.mapModelToProviderEntry(model));
20
+ } catch (error) {
21
+ console.error(`Failed to fetch Novita models: ${error}`);
22
+ return [];
23
+ }
24
+ }
25
+
26
+ private mapModelToProviderEntry(model: NovitaModel): ProviderEntry {
27
+ const entry: ProviderEntry = {
28
+ provider: this.name,
29
+ context_length: model.context_size,
30
+ max_completion_tokens: model.max_output_tokens,
31
+ pricing: this.normalizePricing(
32
+ model.input_token_price_per_m,
33
+ model.output_token_price_per_m,
34
+ "cents_per_million"
35
+ ),
36
+ description: model.description,
37
+ model_type: model.model_type,
38
+ status: model.status === 1 ? "live" : "offline",
39
+ };
40
+
41
+ // Store the model ID for matching
42
+ (entry as any).id = model.id;
43
+
44
+ // Map features to capability flags if features exist
45
+ if (model.features && Array.isArray(model.features)) {
46
+ const featureMapping = this.mapFeatures(model.features);
47
+ Object.assign(entry, featureMapping);
48
+ }
49
+
50
+ // Add additional metadata
51
+ if (model.display_name) {
52
+ entry.owned_by = model.owned_by || "unknown";
53
+ }
54
+
55
+
56
+ return entry;
57
+ }
58
+
59
+ private mapFeatures(features: string[]): Partial<ProviderEntry> {
60
+ const result: Partial<ProviderEntry> = {};
61
+
62
+ // Feature mapping based on the spec
63
+ const featureMap: { [key: string]: (keyof ProviderEntry)[] } = {
64
+ "function-calling": ["supports_tools", "supports_function_calling"],
65
+ "structured-outputs": [
66
+ "supports_structured_output",
67
+ "supports_response_format",
68
+ ],
69
+ };
70
+
71
+ for (const feature of features || []) {
72
+ const mappedKeys = featureMap[feature];
73
+ if (mappedKeys) {
74
+ for (const key of mappedKeys) {
75
+ result[key] = true;
76
+ }
77
+ }
78
+ }
79
+
80
+ return result;
81
+ }
82
+
83
+ // Optional: Fetch a single model with potentially more details
84
+ async fetchModel(modelId: string): Promise<ProviderEntry | null> {
85
+ try {
86
+ const response = await this.fetchWithRetry<NovitaModel>(
87
+ `${this.baseUrl}/v3/openai/models/${encodeURIComponent(modelId)}`
88
+ );
89
+
90
+ return this.mapModelToProviderEntry(response);
91
+ } catch (error) {
92
+ console.error(`Failed to fetch Novita model ${modelId}: ${error}`);
93
+ return null;
94
+ }
95
+ }
96
+ }
providers/nscale.ts ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { ProviderEntry, ProviderFetcher } from './types';
2
+ import { BaseProviderFetcher } from './base';
3
+
4
+ export class NScaleFetcher extends BaseProviderFetcher implements ProviderFetcher {
5
+ name = 'nscale';
6
+
7
+ constructor(apiKey?: string) {
8
+ super('https://api.nscale.ai/v1', apiKey);
9
+ }
10
+
11
+ async fetchModels(): Promise<ProviderEntry[]> {
12
+ // NScale doesn't provide a public API for model listing
13
+ // Data will come from HuggingFace router API
14
+ console.log('NScale API not available - using HuggingFace router data');
15
+ return [];
16
+ }
17
+ }
providers/sambanova.ts ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { BaseProviderFetcher } from "./base";
2
+ import { ProviderEntry, SambaNovaModel } from "./types";
3
+
4
+ export class SambaNovaFetcher extends BaseProviderFetcher {
5
+ name = "sambanova";
6
+
7
+ constructor(apiKey?: string) {
8
+ super("https://api.sambanova.ai", apiKey, {
9
+ requestsPerMinute: 60, // Conservative default
10
+ });
11
+ }
12
+
13
+ async fetchModels(): Promise<ProviderEntry[]> {
14
+ try {
15
+ const response = await this.fetchWithRetry<{ data: SambaNovaModel[] }>(
16
+ `${this.baseUrl}/v1/models`
17
+ );
18
+
19
+ return response.data.map((model) => this.mapModelToProviderEntry(model));
20
+ } catch (error) {
21
+ console.error(`Failed to fetch SambaNova models: ${error}`);
22
+ return [];
23
+ }
24
+ }
25
+
26
+ private mapModelToProviderEntry(model: SambaNovaModel): ProviderEntry {
27
+ const entry: ProviderEntry = {
28
+ provider: this.name,
29
+ context_length: model.context_length,
30
+ max_completion_tokens: model.max_completion_tokens,
31
+ pricing: this.normalizePricing(
32
+ model.pricing.prompt,
33
+ model.pricing.completion,
34
+ "per_token"
35
+ ),
36
+ owned_by: model.owned_by,
37
+ };
38
+
39
+ // Store the model ID for matching
40
+ (entry as any).id = model.id;
41
+
42
+
43
+ return entry;
44
+ }
45
+ }
providers/static-pricing.ts ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { StaticPricing } from './types';
2
+
3
+ // Static pricing data for providers without API pricing endpoints
4
+ // Prices are in $ per 1M tokens
5
+ // Last updated: January 2025
6
+ export const staticPricing: StaticPricing = {
7
+ groq: {
8
+ // Groq pricing from their website
9
+ 'llama-3.3-70b-versatile': { input: 0.59, output: 0.79 },
10
+ 'llama-3.1-70b-versatile': { input: 0.59, output: 0.79 },
11
+ 'llama-3.1-8b-instant': { input: 0.05, output: 0.08 },
12
+ 'llama-3.2-1b-preview': { input: 0.04, output: 0.04 },
13
+ 'llama-3.2-3b-preview': { input: 0.06, output: 0.06 },
14
+ 'llama-3.2-11b-vision-preview': { input: 0.18, output: 0.18 },
15
+ 'llama-3.2-90b-vision-preview': { input: 0.90, output: 0.90 },
16
+ 'llama3-70b-8192': { input: 0.59, output: 0.79 },
17
+ 'llama3-8b-8192': { input: 0.05, output: 0.08 },
18
+ 'mixtral-8x7b-32768': { input: 0.24, output: 0.24 },
19
+ 'gemma-7b-it': { input: 0.07, output: 0.07 },
20
+ 'gemma2-9b-it': { input: 0.20, output: 0.20 }
21
+ },
22
+
23
+ featherless: {
24
+ // Featherless pricing - typically uses pay-per-request model
25
+ // Converting to per-million-token estimates based on average usage
26
+ 'default': { input: 0.10, output: 0.10 } // Default pricing for all models
27
+ },
28
+
29
+ cohere: {
30
+ // Cohere pricing from their website
31
+ 'command-r-plus': { input: 2.50, output: 10.00 },
32
+ 'command-r': { input: 0.15, output: 0.60 },
33
+ 'command': { input: 0.50, output: 1.50 },
34
+ 'command-light': { input: 0.15, output: 0.60 },
35
+ 'c4ai-aya-expanse-8b': { input: 0.15, output: 0.60 },
36
+ 'c4ai-aya-expanse-32b': { input: 0.50, output: 2.00 }
37
+ },
38
+
39
+ fireworks: {
40
+ // Fireworks pricing from their documentation
41
+ 'qwen2.5-coder-32b-instruct': { input: 0.25, output: 0.25 },
42
+ 'qwen2.5-72b-instruct': { input: 0.50, output: 0.50 },
43
+ 'llama-v3p3-70b-instruct': { input: 0.50, output: 0.50 },
44
+ 'llama-v3p2-11b-vision-instruct': { input: 0.20, output: 0.20 },
45
+ 'llama-v3p2-90b-vision-instruct': { input: 1.00, output: 1.00 },
46
+ 'llama-v3p1-405b-instruct': { input: 3.00, output: 3.00 },
47
+ 'llama-v3p1-70b-instruct': { input: 0.50, output: 0.50 },
48
+ 'llama-v3p1-8b-instruct': { input: 0.10, output: 0.10 },
49
+ 'mixtral-8x7b-instruct': { input: 0.50, output: 0.50 },
50
+ 'mixtral-8x22b-instruct': { input: 0.90, output: 0.90 },
51
+ 'deepseek-v3': { input: 0.30, output: 0.30 },
52
+ 'mythomax-l2-13b': { input: 0.10, output: 0.10 }
53
+ },
54
+
55
+ cerebras: {
56
+ // Cerebras pricing - very competitive
57
+ 'llama3.1-8b': { input: 0.10, output: 0.10 },
58
+ 'llama3.1-70b': { input: 0.60, output: 0.60 }
59
+ },
60
+
61
+ nebius: {
62
+ // Nebius pricing estimates
63
+ 'llama-3.1-70b-instruct': { input: 0.50, output: 0.50 },
64
+ 'llama-3.1-8b-instruct': { input: 0.10, output: 0.10 },
65
+ 'llama-3.1-405b-instruct': { input: 2.50, output: 2.50 },
66
+ 'mistral-7b-instruct': { input: 0.10, output: 0.10 }
67
+ },
68
+
69
+ lambdalabs: {
70
+ // Lambda Labs pricing - typically hourly GPU pricing
71
+ // These are estimates based on typical usage patterns
72
+ 'hermes-3-llama-3.1-405b-fp8': { input: 3.00, output: 3.00 },
73
+ 'hermes-3-llama-3.1-70b-fp8': { input: 0.50, output: 0.50 }
74
+ },
75
+
76
+ lepton: {
77
+ // Lepton AI pricing
78
+ 'llama3.1-8b': { input: 0.10, output: 0.10 },
79
+ 'llama3.1-70b': { input: 0.50, output: 0.50 },
80
+ 'llama3.1-405b': { input: 2.50, output: 2.50 },
81
+ 'qwen2.5-72b': { input: 0.50, output: 0.50 },
82
+ 'mixtral-8x7b': { input: 0.30, output: 0.30 }
83
+ },
84
+
85
+ octoai: {
86
+ // OctoAI pricing
87
+ 'meta-llama-3.1-8b-instruct': { input: 0.05, output: 0.10 },
88
+ 'meta-llama-3.1-70b-instruct': { input: 0.50, output: 0.50 },
89
+ 'meta-llama-3.1-405b-instruct': { input: 2.50, output: 2.50 },
90
+ 'qwen2.5-72b-instruct': { input: 0.30, output: 0.30 },
91
+ 'mixtral-8x7b-instruct': { input: 0.30, output: 0.30 },
92
+ 'mixtral-8x22b-instruct': { input: 0.90, output: 0.90 }
93
+ }
94
+ };
95
+
96
+ // Helper function to get pricing for a model
97
+ export function getStaticPricing(provider: string, modelId: string): { input: number; output: number } | null {
98
+ const providerPricing = staticPricing[provider];
99
+ if (!providerPricing) return null;
100
+
101
+ // Check for exact match
102
+ if (providerPricing[modelId]) {
103
+ return providerPricing[modelId];
104
+ }
105
+
106
+ // Check for default pricing
107
+ if (providerPricing['default']) {
108
+ return providerPricing['default'];
109
+ }
110
+
111
+ return null;
112
+ }
providers/together.ts ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { BaseProviderFetcher } from './base';
2
+ import { ProviderEntry, TogetherModel } from './types';
3
+
4
+ export class TogetherFetcher extends BaseProviderFetcher {
5
+ name = 'together';
6
+
7
+ constructor(apiKey?: string) {
8
+ super('https://api.together.ai', apiKey, {
9
+ requestsPerMinute: 600 // Together rate limit from spec
10
+ });
11
+ }
12
+
13
+ async fetchModels(): Promise<ProviderEntry[]> {
14
+ try {
15
+ const response = await this.fetchWithRetry<TogetherModel[]>(
16
+ `${this.baseUrl}/v1/models`
17
+ );
18
+
19
+ return response.map(model => this.mapModelToProviderEntry(model));
20
+ } catch (error) {
21
+ console.error(`Failed to fetch Together models: ${error}`);
22
+ return [];
23
+ }
24
+ }
25
+
26
+ private mapModelToProviderEntry(model: TogetherModel): ProviderEntry {
27
+ const entry: ProviderEntry = {
28
+ provider: this.name,
29
+ context_length: model.context_length,
30
+ pricing: this.normalizePricing(
31
+ model.pricing.input,
32
+ model.pricing.output,
33
+ 'per_million'
34
+ ),
35
+ owned_by: model.organization,
36
+ model_type: model.type
37
+ };
38
+
39
+ // Parse supported parameters from config if available
40
+ if (model.config) {
41
+ const configParams = this.parseConfigParameters(model.config);
42
+ Object.assign(entry, configParams);
43
+ }
44
+
45
+
46
+ return entry;
47
+ }
48
+
49
+ private parseConfigParameters(config: TogetherModel['config']): Partial<ProviderEntry> {
50
+ const result: Partial<ProviderEntry> = {};
51
+
52
+ // Check for stop sequences support
53
+ if (config.stop && config.stop.length > 0) {
54
+ result.supports_stop_sequences = true;
55
+ }
56
+
57
+ return result;
58
+ }
59
+ }
providers/types.ts ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Core provider data structure based on provider-api-spec.md
2
+ export interface ProviderEntry {
3
+ provider: string;
4
+ status?: "live" | "offline" | "deprecated";
5
+ context_length?: number;
6
+ pricing?: {
7
+ input: number; // $ per 1M tokens
8
+ output: number; // $ per 1M tokens
9
+ };
10
+ quantization?: string;
11
+ max_completion_tokens?: number;
12
+ supported_parameters?: string[];
13
+
14
+ // Model identification
15
+ model_id?: string;
16
+ created?: number;
17
+
18
+ // Capability flags
19
+ supports_tools?: boolean;
20
+ supports_function_calling?: boolean;
21
+ supports_structured_output?: boolean;
22
+ supports_response_format?: boolean;
23
+ supports_streaming?: boolean;
24
+ supports_logprobs?: boolean;
25
+ supports_stop_sequences?: boolean;
26
+ supports_seed?: boolean;
27
+ supports_temperature?: boolean;
28
+ supports_top_p?: boolean;
29
+ supports_frequency_penalty?: boolean;
30
+ supports_presence_penalty?: boolean;
31
+ supports_repetition_penalty?: boolean;
32
+ supports_top_k?: boolean;
33
+ supports_min_p?: boolean;
34
+ supports_max_tokens?: boolean;
35
+ supports_logit_bias?: boolean;
36
+ supports_top_logprobs?: boolean;
37
+ supports_image_input?: boolean;
38
+
39
+ // Performance metrics
40
+ latency_s?: number;
41
+ throughput_tps?: number;
42
+ performance_error?: string;
43
+ performance_tested_at?: string;
44
+
45
+ // Additional metadata
46
+ owned_by?: string;
47
+ model_type?: string;
48
+ description?: string;
49
+ deprecated_at?: string;
50
+ model_class?: string;
51
+ is_gated?: boolean;
52
+ }
53
+
54
+ // Provider-specific response types
55
+ export interface GroqModel {
56
+ id: string;
57
+ object: string;
58
+ created: number;
59
+ owned_by: string;
60
+ active: boolean;
61
+ context_window: number;
62
+ public_apps: any;
63
+ max_completion_tokens: number;
64
+ }
65
+
66
+ export interface CohereModel {
67
+ name: string;
68
+ endpoints: string[];
69
+ finetuned: boolean;
70
+ context_length: number;
71
+ tokenizer_url: string;
72
+ supports_vision: boolean;
73
+ features: string[];
74
+ default_endpoints: string[];
75
+ is_deprecated?: boolean;
76
+ }
77
+
78
+ export interface FireworksModel {
79
+ id: string;
80
+ object: string;
81
+ owned_by: string;
82
+ created: number;
83
+ kind: string;
84
+ supports_chat: boolean;
85
+ supports_image_input: boolean;
86
+ supports_tools: boolean;
87
+ context_length: number;
88
+ }
89
+
90
+ export interface FireworksDetailedModel {
91
+ name: string;
92
+ displayName: string;
93
+ description: string;
94
+ contextLength: number;
95
+ baseModelDetails: {
96
+ checkpointFormat: string;
97
+ defaultPrecision: string;
98
+ modelType: string;
99
+ moe: boolean;
100
+ parameterCount: string;
101
+ supportsFireattention: boolean;
102
+ tunable: boolean;
103
+ worldSize: number;
104
+ };
105
+ defaultSamplingParams: {
106
+ temperature?: number;
107
+ top_p?: number;
108
+ max_tokens?: number;
109
+ [key: string]: any;
110
+ };
111
+ supportsImageInput: boolean;
112
+ supportsLora: boolean;
113
+ supportsTools: boolean;
114
+ state: string;
115
+ deprecationDate: string | null;
116
+ huggingFaceUrl: string;
117
+ supportedPrecisions: string[];
118
+ deployedModelRefs: any[];
119
+ }
120
+
121
+ export interface TogetherModel {
122
+ id: string;
123
+ object: string;
124
+ created: number;
125
+ type: string;
126
+ display_name: string;
127
+ organization: string;
128
+ context_length: number;
129
+ pricing: {
130
+ input: number; // $ per million tokens
131
+ output: number; // $ per million tokens
132
+ hourly: number;
133
+ base: number;
134
+ finetune: number;
135
+ };
136
+ config: {
137
+ chat_template: string;
138
+ stop: string[];
139
+ bos_token: string;
140
+ eos_token: string;
141
+ };
142
+ }
143
+
144
+ export interface SambaNovaModel {
145
+ id: string;
146
+ object: string;
147
+ owned_by: string;
148
+ context_length: number;
149
+ max_completion_tokens: number;
150
+ pricing: {
151
+ prompt: string; // $ per token
152
+ completion: string; // $ per token
153
+ };
154
+ sn_metadata: any;
155
+ }
156
+
157
+ export interface NovitaModel {
158
+ id: string;
159
+ object: string;
160
+ created: number;
161
+ owned_by: string;
162
+ input_token_price_per_m: number; // Cents per million tokens
163
+ output_token_price_per_m: number; // Cents per million tokens
164
+ title: string;
165
+ description: string;
166
+ context_size: number;
167
+ max_output_tokens: number;
168
+ model_type: string;
169
+ features: string[];
170
+ endpoints: string[];
171
+ status: number;
172
+ display_name: string;
173
+ }
174
+
175
+ export interface FeatherlessModel {
176
+ id: string;
177
+ is_gated: boolean;
178
+ created: number;
179
+ model_class: string;
180
+ owned_by: string;
181
+ context_length: number;
182
+ max_completion_tokens: number;
183
+ available_on_current_plan: boolean;
184
+ }
185
+
186
+ // Minimal response types
187
+ export interface CerebrasModel {
188
+ id: string;
189
+ object: string;
190
+ created: number;
191
+ owned_by: string;
192
+ }
193
+
194
+ export interface NebiusModel {
195
+ id: string;
196
+ created: number;
197
+ object: string;
198
+ owned_by: string;
199
+ }
200
+
201
+ export interface LambdaModel {
202
+ id: string;
203
+ object: string;
204
+ created: number;
205
+ owned_by: string;
206
+ }
207
+
208
+ // Base provider fetcher interface
209
+ export interface ProviderFetcher {
210
+ name: string;
211
+ fetchModels(): Promise<ProviderEntry[]>;
212
+ }
213
+
214
+ // Configuration for rate limiting
215
+ export interface RateLimitConfig {
216
+ requestsPerMinute?: number;
217
+ requestsPerHour?: number;
218
+ retryAttempts?: number;
219
+ initialBackoffMs?: number;
220
+ }
221
+
222
+ // Feature mapping types
223
+ export interface FeatureMapping {
224
+ [key: string]: keyof ProviderEntry | string[] | null;
225
+ }
226
+
227
+ // Static pricing data structure
228
+ export interface StaticPricing {
229
+ [provider: string]: {
230
+ [modelId: string]: {
231
+ input: number; // $ per 1M tokens
232
+ output: number; // $ per 1M tokens
233
+ };
234
+ };
235
+ }
tsconfig.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ // Enable latest features
4
+ "lib": ["ESNext", "DOM"],
5
+ "target": "ESNext",
6
+ "module": "ESNext",
7
+ "moduleDetection": "force",
8
+ "jsx": "react-jsx",
9
+ "allowJs": true,
10
+
11
+ // Bundler mode
12
+ "moduleResolution": "bundler",
13
+ "allowImportingTsExtensions": true,
14
+ "verbatimModuleSyntax": true,
15
+ "noEmit": true,
16
+
17
+ // Best practices
18
+ "strict": true,
19
+ "skipLibCheck": true,
20
+ "noFallthroughCasesInSwitch": true,
21
+
22
+ // Some stricter flags (disabled by default)
23
+ "noUnusedLocals": false,
24
+ "noUnusedParameters": false,
25
+ "noPropertyAccessFromIndexSignature": false
26
+ }
27
+ }