File size: 6,610 Bytes
7d8a788
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
"use client";

import { Inter } from "next/font/google";
import ActivityCalendar from "react-activity-calendar";
import * as duckdb from "@duckdb/duckdb-wasm"
import { useState, useEffect } from "react";
import { Tooltip as MuiTooltip } from '@mui/material';

const inter = Inter({ subsets: ["latin"] });

interface CustomActivity {
  date: string;
  count: number;
  level: number;
  details: Array<{ provider: string; count: number }>;
}

type ProviderKey = "BAAI" | "deepseek-ai" | "internlm" | "Qwen" | "THUDM" | "TencentARC" | "01-ai" | "openbmb";

export default function Home() {
  const [calendarData, setCalendarData] = useState<Record<ProviderKey, CustomActivity[]>>({} as Record<ProviderKey, CustomActivity[]>);
  const [isLoading, setIsLoading] = useState(true);

  const PROVIDERS_MAP: Record<ProviderKey, { name: string; color: string }> = {
    "BAAI": { "name": "BAAI", "color": "#FF6F61" },        // Coral Red
    "deepseek-ai": { "name": "Deepseek", "color": "#4B8BBE" }, // Soft Blue
    "internlm": { "name": "Internlm", "color": "#34A853" }, // Emerald Green
    "Qwen": { "name": "Qwen", "color": "#FFA500" },        // Bright Orange
    "THUDM": { "name": "GLM", "color": "#00A6D6" },        // Cerulean Blue
    "TencentARC": { "name": "Tencent", "color": "#1DA1F2" }, // Twitter Blue
    "01-ai": { "name": "Yi/01", "color": "#FF6347" },      // Tomato Red
    "openbmb": { "name": "OpenBMB", "color": "#8A2BE2" },  // Blue Violet
  }

  const getModelData = async (conn: duckdb.AsyncDuckDBConnection) => {
    const result = await conn.query(`
      SELECT 
        STRFTIME(DATE_TRUNC('day', CAST(createdAt AS DATE)), '%Y-%m-%d') AS date,
        CASE 
          WHEN provider IN ('TencentARC', 'Tencent-Hunyuan') THEN 'TencentARC'
          ELSE provider
        END AS provider,
        COUNT(*) AS count
      FROM (
        SELECT *, SPLIT_PART(id, '/', 1) AS provider
        FROM models
        WHERE CAST(createdAt AS DATE) >= DATE_TRUNC('year', CURRENT_DATE)
      ) subquery
      WHERE provider IN (${Object.keys(PROVIDERS_MAP).map(p => `'${p}'`).join(', ')}, 'Tencent-Hunyuan')
      GROUP BY 1, 2
      ORDER BY date
    `);
    return result.toArray().map((row: any) => ({
      date: row.date,
      provider: row.provider,
      count: Number(row.count)
    }));
  }

  const generateCalendarData = (modelData: any[]) => {
    const data: Record<ProviderKey, CustomActivity[]> = Object.keys(PROVIDERS_MAP).reduce((acc, provider) => {
      acc[provider as ProviderKey] = [];
      return acc;
    }, {} as Record<ProviderKey, CustomActivity[]>);

    const today = new Date();
    const startOfYear = new Date(today.getFullYear(), 0, 1);

    for (let d = new Date(startOfYear); d <= today; d.setDate(d.getDate() + 1)) {
      const dateString = d.toISOString().split('T')[0];

      Object.keys(PROVIDERS_MAP).forEach((provider) => {
        const dayData = modelData.filter(item => item.date === dateString && item.provider === provider);
        const count = dayData.reduce((sum, item) => sum + item.count, 0);

        data[provider as ProviderKey].push({
          date: dateString,
          count,
          level: 0,
          details: dayData,
        });
      });
    }

    const avgCounts: Record<ProviderKey, number> = Object.fromEntries(
      Object.keys(PROVIDERS_MAP).map(provider => [
        provider,
        data[provider as ProviderKey].reduce((sum, day) => sum + day.count, 0) / data[provider as ProviderKey].length || 0
      ])
    ) as Record<ProviderKey, number>;

    Object.entries(data).forEach(([provider, days]) => {
      const avgCount = avgCounts[provider as ProviderKey];
      days.forEach(day => {
        day.level = day.count === 0 ? 0 :
          day.count <= avgCount * 0.5 ? 1 :
            day.count <= avgCount ? 2 :
              day.count <= avgCount * 1.5 ? 3 : 4;
      });
    });

    return data;
  }

  const initDB = async () => {
    const CDN_BASE = `https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@next`

    const JSDELIVR_BUNDLES = {
      mvp: {
        mainModule: `${CDN_BASE}/dist/duckdb-mvp.wasm`,
        mainWorker: `${CDN_BASE}/dist/duckdb-browser-mvp.worker.js`,
      },
      eh: {
        mainModule: `${CDN_BASE}/dist/duckdb-eh.wasm`,
        mainWorker: `${CDN_BASE}/dist/duckdb-browser-eh.worker.js`,
      },
    }

    const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES)
    const worker_url = URL.createObjectURL(
      new Blob([`importScripts("${bundle.mainWorker}");`], {
        type: "text/javascript",
      })
    )

    const worker = new Worker(worker_url)
    const logger = new duckdb.ConsoleLogger()
    const db = new duckdb.AsyncDuckDB(logger, worker)
    await db.instantiate(bundle.mainModule)

    const connection = await db.connect()

    await connection.query(`
        CREATE VIEW models AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true');
    `);

    const modelData = await getModelData(connection);
    const calendarData = generateCalendarData(modelData);
    setCalendarData(calendarData);
    setIsLoading(false);

    await connection.close();
  }

  useEffect(() => {
    initDB();
  }, []);

  return (<main className={`grid grid-cols-1 md:grid-cols-2 gap-8 min-h-screen mx-auto p-6 ${inter.className}`}>
    <div className="col-span-2 text-center">
      <h1 className="text-5xl font-bold">Chinese AI Community: Open Source Heatmap</h1>
      <p className="mt-2 text-sm">A heatmap for open source model releases.</p>
    </div>
    {isLoading ? (
      <div className="col-span-2 text-center">
        <p>Loading...</p>
      </div>
    ) : (
      Object.entries(PROVIDERS_MAP)
        .sort(([keyA], [keyB]) =>
          calendarData[keyB as ProviderKey].reduce((sum, day) => sum + day.count, 0) -
          calendarData[keyA as ProviderKey].reduce((sum, day) => sum + day.count, 0)
        )
        .map(([key, value]) => (
          <div key={key} className="mt-16 md:col-span-1">
            <h2 className="text-2xl font-bold mb-2">{value.name}</h2>
            <ActivityCalendar
              data={calendarData[key as ProviderKey]}
              theme={{
                dark: ['#161b22', value.color],
              }}
              colorScheme="dark"
              renderBlock={(block, activity) => (
                <MuiTooltip title={`${activity.count} activities on ${activity.date}`}>
                  {block}
                </MuiTooltip>
              )}
            />
          </div>
        ))
    )}
  </main>

  );
}