Justin Chou commited on
Commit
8e38b8a
·
1 Parent(s): 679abc4
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ jupyter==1.1.1
2
+ ipykernel==6.29.5
3
+ tqdm==4.67.0
4
+ numpy==2.2.1
5
+ ipython==8.12.3
6
+ isort==5.13.2
7
+ numpy==2.2.1
8
+ pandas==2.2.3
9
+ pyrtl==0.11.2
10
+ matplotlib==3.10.0
11
+ pytest==8.3.4
12
+ torch==2.4.1
13
+ torchvision==0.19.1
14
+ onnx==1.17.0
15
+ netron==8.1.3
16
+ gradio==5.16.0
17
+ black[jupyter]==24.10.0
18
+ pyinstrument==5.0.1
19
+ platformdirs==4.3.6
20
+ python-dotenv==1.0.1
21
+ siliconcompiler==0.31.1
results/accelerator_analysis.csv ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ max_delay,max_freq,logic_area,mem_area,name,weights,activations,component
2
+ 4846.988687360263,191.2050024920285,449812.7568,2244263.653047217,w8a8-8x8-lmul-fast-pipePE,fp8,fp8,accelerator
3
+ 8385.188687360263,114.04864056376259,417236.84640000004,2244263.653047217,w8a8-8x8-lmul-pipePE,fp8,fp8,accelerator
4
+ 2112.04,400.79517763242274,722909.5632,2244263.653047217,w8a8-8x8-lmul-fast-pipeALL,fp8,fp8,accelerator
5
+ 3226.71,277.0305647822124,703269.2304,2244263.653047217,w8a8-8x8-lmul-pipeALL,fp8,fp8,accelerator
6
+ 5110.31,182.03960817794734,576938.2608,2244263.653047217,w8a8-8x8-ieee-fast-pipePE,fp8,fp8,accelerator
7
+ 2112.04,400.79517763242274,809444.1168,2244263.653047217,w8a8-8x8-ieee-fast-pipeALL,fp8,fp8,accelerator
8
+ 3226.71,277.0305647822124,797052.1680000001,2244263.653047217,w8a8-8x8-ieee-pipeALL,fp8,fp8,accelerator
9
+ 15727.619999999988,62.07085760821128,815678.424,4154687.1262499103,w8ab16-8x8-lmul,fp8,bf16,accelerator
10
+ 7465.220201510344,127.41742386478349,992228.8463999999,4154687.1262499103,w8ab16-8x8-lmul-fast-pipePE,fp8,bf16,accelerator
11
+ 13552.820201510342,71.75752740349087,875278.9584,4154687.1262499103,w8ab16-8x8-lmul-pipePE,fp8,bf16,accelerator
12
+ 8137.1100000000015,117.36937668645122,1458355.6944,4154687.1262499103,w8ab16-8x8-ieee-fast-pipePE,fp8,bf16,accelerator
13
+ 13552.820201510342,71.75752740349087,1389356.6544,4154687.1262499103,w8ab16-8x8-ieee-pipePE,fp8,bf16,accelerator
14
+ 22280.86999999997,44.123091069618795,1329756.12,4154687.1262499103,w8ab16-8x8-ieee,fp8,bf16,accelerator
15
+ 5052.610000000001,183.97199210392208,1775193.7104,4154687.1262499103,w8ab16-8x8-ieee-pipeALL,fp8,bf16,accelerator
16
+ 3090.4000000000005,287.9023435250763,1473075.4896,4154687.1262499103,w8ab16-8x8-lmul-fast-pipeALL,fp8,bf16,accelerator
17
+ 5052.610000000001,183.97199210392208,1377424.6992000001,4154687.1262499103,w8ab16-8x8-lmul-pipeALL,fp8,bf16,accelerator
18
+ 3090.4000000000005,287.9023435250763,1846311.5088,4154687.1262499103,w8ab16-8x8-ieee-fast-pipeALL,fp8,bf16,accelerator
19
+ 28036.61999999995,35.18695886855636,1894865.2271999998,7719663.3828588035,w8a32-8x8-lmul,fp8,fp32,accelerator
20
+ 9477.624359111618,101.41345655013816,2341153.1088,7719663.3828588035,w8a32-8x8-lmul-fast-pipePE,fp8,fp32,accelerator
21
+ 21036.72435911159,46.68594157583624,2013344.9423999998,7719663.3828588035,w8a32-8x8-lmul-pipePE,fp8,fp32,accelerator
22
+ 4476.04,205.8019691132405,3219434.2224,7719663.3828588035,w8a32-8x8-lmul-fast-pipeALL,fp8,fp32,accelerator
23
+ 10734.710000000005,89.94658072570697,2931659.4384,7719663.3828588035,w8a32-8x8-lmul-pipeALL,fp8,fp32,accelerator
24
+ 15138.11999999999,64.42834022287056,829506.1104,4154687.1262499103,wb16ab16-8x8-lmul,bf16,bf16,accelerator
25
+ 21625.02999999997,45.43796059892691,1370347.0704,4154687.1262499103,wb16ab16-8x8-ieee,bf16,bf16,accelerator
26
+ 12549.709999999995,77.32331429375594,5124532.5648,7719663.3828588035,w8a32-8x8-ieee-fast-pipePE,fp8,fp32,accelerator
27
+ 13552.820201510342,71.75752740349087,889106.6448,4154687.1262499103,wb16ab16-8x8-lmul-pipePE,bf16,bf16,accelerator
28
+ 13719.47999999998,70.90951378764596,6751446.2928,7719663.3828588035,w8a32-8x8-ieee-pipeALL,fp8,fp32,accelerator
29
+ 7465.220201510344,127.41742386478349,1006056.5327999999,4154687.1262499103,wb16ab16-8x8-lmul-fast-pipePE,bf16,bf16,accelerator
30
+ 7465.220201510344,127.41742386478349,1498946.6448,4154687.1262499103,wb16ab16-8x8-ieee-fast-pipePE,bf16,bf16,accelerator
31
+ 5140.169999999999,181.0554446088026,7046469.4608000005,7719663.3828588035,w8a32-8x8-ieee-fast-pipeALL,fp8,fp32,accelerator
32
+ 3090.4000000000005,287.9023435250763,1486903.176,4154687.1262499103,wb16ab16-8x8-lmul-fast-pipeALL,bf16,bf16,accelerator
33
+ 5052.610000000001,183.97199210392208,1391252.3856000002,4154687.1262499103,wb16ab16-8x8-lmul-pipeALL,bf16,bf16,accelerator
34
+ 5052.610000000001,183.97199210392208,1767722.2992,4154687.1262499103,wb16ab16-8x8-ieee-pipeALL,bf16,bf16,accelerator
35
+ 3090.4000000000005,287.9023435250763,1838840.0976,4154687.1262499103,wb16ab16-8x8-ieee-fast-pipeALL,bf16,bf16,accelerator
36
+ 27447.11999999995,35.93229206341912,1967572.0943999998,7719663.3828588035,w32a32-8x8-lmul,fp32,fp32,accelerator
37
+ 21036.72435911159,46.68594157583624,2086051.8095999998,7719663.3828588035,w32a32-8x8-lmul-pipePE,fp32,fp32,accelerator
38
+ 9477.624359111618,101.41345655013816,2413859.976,7719663.3828588035,w32a32-8x8-lmul-fast-pipePE,fp32,fp32,accelerator
39
+ 11854.91,81.71329908456592,6301224.072,7719663.3828588035,w32a32-8x8-ieee-fast-pipePE,fp32,fp32,accelerator
40
+ 41746.72999999997,23.736207186706412,5898984.0624,7719663.3828588035,w32a32-8x8-ieee,fp32,fp32,accelerator
41
+ 22410.249999999967,43.87263773266214,6017463.7776,7719663.3828588035,w32a32-8x8-ieee-pipePE,fp32,fp32,accelerator
42
+ 4476.04,205.8019691132405,3292141.0896,7719663.3828588035,w32a32-8x8-lmul-fast-pipeALL,fp32,fp32,accelerator
43
+ 10734.710000000005,89.94658072570697,3004366.3056,7719663.3828588035,w32a32-8x8-lmul-pipeALL,fp32,fp32,accelerator
44
+ 13719.47999999998,70.90951378764596,6802854.0624,7719663.3828588035,w32a32-8x8-ieee-pipeALL,fp32,fp32,accelerator
45
+ 5140.169999999999,181.0554446088026,7097877.2304,7719663.3828588035,w32a32-8x8-ieee-fast-pipeALL,fp32,fp32,accelerator
results/component_analysis.csv ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ max_delay,max_freq,logic_area,mem_area,name,dtype,component
2
+ 1537.1999999999996,520.7790855119259,749.232,0.0,adder_stage_2,fp8,adder
3
+ 1565.3,513.2679772109018,496.584,0.0,adder_stage_3,fp8,adder
4
+ 1230.84,619.6401130223567,818.928,0.0,adder_stage_2_fast,fp8,adder
5
+ 3226.71,277.0305647822124,1271.952,0.0,adder_stage_4,fp8,adder
6
+ 1838.6099999999997,450.1240091645249,1062.864,0.0,multiplier_stage_2,fp8,multiplier
7
+ 1445.6799999999998,546.8425312247086,1190.0592000000001,0.0,adder_stage_4_fast,fp8,adder
8
+ 1926.97,432.90605505699205,1219.6799999999998,0.0,multiplier_stage_4,fp8,multiplier
9
+ 1394.4399999999998,562.6068953101089,1062.864,0.0,multiplier_stage_2_fast,fp8,multiplier
10
+ 1585.1,508.10426299476654,662.112,0.0,multiplier_stage_3,fp8,multiplier
11
+ 1962.6399999999996,426.3228798963184,635.976,0.0,lmul_combinational_simple,fp8,lmul
12
+ 1406.37,558.8559101806782,1036.728,0.0,lmul_combinational_fast,fp8,lmul
13
+ 1085.34,681.0411757494859,2103.0768,0.0,lmul_pipelined_fast,fp8,lmul
14
+ 5449.18,171.46247200875143,3031.776,0.0,multiplier_combinational,fp8,multiplier
15
+ 1828.47,452.1879112083817,1210.968,0.0,multiplier_stage_4_fast,fp8,multiplier
16
+ 1585.1,508.10426299476654,670.824,0.0,multiplier_stage_3_fast,fp8,multiplier
17
+ 1926.97,432.90605505699205,3808.8864000000003,0.0,multiplier,fp8,multiplier
18
+ 2223.24,383.6945177727301,1998.5328000000002,0.0,lmul_pipelined_simple,fp8,lmul
19
+ 1908.2399999999998,436.4448944676246,6453.8496000000005,0.0,adder_pipelined_fast,fp8,adder
20
+ 1828.47,452.1879112083817,3800.1744,0.0,multiplier_fast,fp8,multiplier
21
+ 1619.7099999999998,499.3234167702763,1942.776,0.0,adder_stage_2_fast,bf16,adder
22
+ 2773.6000000000004,316.7965532535006,1585.5839999999998,0.0,adder_stage_2,bf16,adder
23
+ 3090.4000000000005,287.9023435250763,1724.976,0.0,adder_stage_3,bf16,adder
24
+ 5052.610000000001,183.97199210392208,2631.024,0.0,adder_stage_4,bf16,adder
25
+ 2015.6699999999998,416.89769747401687,2526.4799999999996,0.0,adder_stage_4_fast,bf16,adder
26
+ 2499.2400000000002,346.9523703785944,1986.336,0.0,adder_stage_5,bf16,adder
27
+ 2076.1,406.6528404700907,1533.312,0.0,multiplier_stage_3,bf16,multiplier
28
+ 4137.380000000002,221.2203398829301,5209.776,0.0,multiplier_stage_2,bf16,multiplier
29
+ 3431.67,262.1458736928751,3380.256,0.0,multiplier_stage_4,bf16,multiplier
30
+ 2321.7400000000002,369.7213040809837,5610.528,0.0,multiplier_stage_2_fast,bf16,multiplier
31
+ 12711.08,76.3703902832425,7910.495999999999,0.0,adder_combinational,bf16,adder
32
+ 2076.1,406.6528404700907,1603.008,0.0,multiplier_stage_3_fast,bf16,multiplier
33
+ 6623.480000000001,142.72502026695284,8163.144,0.0,adder_combinational_fast,bf16,adder
34
+ 4435.440000000001,207.53604901171326,1393.92,0.0,lmul_combinational_simple,bf16,lmul
35
+ 4137.380000000002,221.2203398829301,10656.5184,0.0,multiplier,bf16,multiplier
36
+ 5052.610000000001,183.97199210392208,12817.0944,0.0,adder_pipelined,bf16,adder
37
+ 3090.4000000000005,287.9023435250763,13069.742400000001,0.0,adder_pipelined_fast,bf16,adder
38
+ 9448.150000000003,101.71749998728528,9844.560000000001,0.0,multiplier_combinational,bf16,multiplier
39
+ 1610.1699999999998,501.713351093986,2683.296,0.0,lmul_combinational_fast,bf16,lmul
40
+ 4696.040000000002,196.88760080645153,3969.1872,0.0,lmul_pipelined_simple,bf16,lmul
41
+ 3037.67,292.34038945586684,3476.088,0.0,multiplier_stage_4_fast,bf16,multiplier
42
+ 1289.1399999999999,598.0360496130708,4753.2672,0.0,lmul_pipelined_fast,bf16,lmul
43
+ 2773.6000000000004,316.7965532535006,2421.936,0.0,adder_stage_2,fp32,adder
44
+ 7238.510000000002,131.20759534527932,10384.704,0.0,multiplier_combinational_fast,bf16,multiplier
45
+ 1619.7099999999998,499.3234167702763,2779.128,0.0,adder_stage_2_fast,fp32,adder
46
+ 2742.17,319.98259294694367,11057.2704,0.0,multiplier_fast,bf16,multiplier
47
+ 3406.3000000000015,263.90098435067154,5392.728,0.0,adder_stage_3,fp32,adder
48
+ 10734.710000000005,89.94658072570697,8032.464,0.0,adder_stage_4,fp32,adder
49
+ 4075.2400000000002,224.30376112546656,4495.392,0.0,adder_stage_5,fp32,adder
50
+ 3731.5800000000017,243.0381715752275,8386.1712,0.0,adder_stage_4_fast,fp32,adder
51
+ 5428.87,172.0616600164835,9652.895999999999,0.0,multiplier_stage_4,fp32,multiplier
52
+ 3058.1000000000004,290.60474848159015,4800.312,0.0,multiplier_stage_3,fp32,multiplier
53
+ 8515.380000000001,112.38000624832833,21035.9952,0.0,adder_combinational_fast,fp32,adder
54
+ 20074.479999999978,48.88187596908325,20325.096,0.0,adder_combinational,fp32,adder
55
+ 10734.710000000005,89.94658072570697,29023.1568,0.0,adder_pipelined,fp32,adder
56
+ 13719.47999999998,70.90951378764596,49693.248,0.0,multiplier_stage_2,fp32,multiplier
57
+ 4272.24,214.81169606722747,29734.055999999997,0.0,adder_pipelined_fast,fp32,adder
58
+ 1850.5399999999997,447.7197632457892,6420.744,0.0,lmul_combinational_fast,fp32,lmul
59
+ 9381.04,102.41662262751893,2648.448,0.0,lmul_combinational_simple,fp32,lmul
60
+ 9641.64,99.75420563730968,7649.1359999999995,0.0,lmul_pipelined_simple,fp32,lmul
61
+ 1492.9399999999998,533.0660895337804,10497.960000000001,0.0,lmul_pipelined_fast,fp32,lmul
62
+ 3058.1000000000004,290.60474848159015,4870.008000000001,0.0,multiplier_stage_3_fast,fp32,multiplier
63
+ 5034.87,184.57438070680914,9748.728,0.0,multiplier_stage_4_fast,fp32,multiplier
64
+ 3558.1400000000012,253.73369126699373,52646.616,0.0,multiplier_stage_2_fast,fp32,multiplier
65
+ 13719.47999999998,70.90951378764596,67805.49600000001,0.0,multiplier,fp32,multiplier
66
+ 22206.449999999968,44.26845275117373,64076.759999999995,0.0,multiplier_combinational,fp32,multiplier
67
+ 11651.11,83.09712974204157,67160.808,0.0,multiplier_combinational_fast,fp32,multiplier
68
+ 4936.369999999999,187.99218704470647,70767.576,0.0,multiplier_fast,fp32,multiplier
results/component_data.csv ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,area,power,max_arrival_time,dtype,dtype_order,architecture,operation,is_fast
2
+ adder_combinational_bf16,1032.88,0.00174519,2.5,bf16,bf16,combinational,adder,False
3
+ adder_combinational_fast_bf16,927.542,0.00195394,1.46,bf16,bf16,combinational,adder,True
4
+ adder_combinational_fast_fp32,2886.37,0.0108059,2.16,fp32,fp32,combinational,adder,True
5
+ adder_combinational_fast_fp8,510.188,0.00198759,1.2,fp8,fp8,combinational,adder,True
6
+ adder_combinational_fp32,3230.04,0.00594875,3.62,fp32,fp32,combinational,adder,False
7
+ adder_combinational_fp8,466.032,0.00190965,1.77,fp8,fp8,combinational,adder,False
8
+ adder_pipelined_bf16,1657.71,0.00266433,0.96,bf16,bf16,pipelined,adder,False
9
+ adder_pipelined_fast_bf16,1603.45,0.00245527,0.74,bf16,bf16,pipelined,adder,True
10
+ adder_pipelined_fast_fp32,3979.09,0.0052372,1.28,fp32,fp32,pipelined,adder,True
11
+ adder_pipelined_fast_fp8,883.652,0.00166099,0.67,fp8,fp8,pipelined,adder,True
12
+ adder_pipelined_fp32,4013.14,0.00520281,1.34,fp32,fp32,pipelined,adder,False
13
+ adder_pipelined_fp8,867.958,0.0016247,0.7,fp8,fp8,pipelined,adder,False
14
+ adder_stage_2_bf16,245.252,0.000211722,0.5,bf16,bf16,stage_2,adder,False
15
+ adder_stage_2_fast_bf16,278.768,0.000327808,0.59,bf16,bf16,stage_2,adder,True
16
+ adder_stage_2_fast_fp32,453.796,0.000368852,0.62,fp32,fp32,stage_2,adder,True
17
+ adder_stage_2_fast_fp8,134.33,0.000157464,0.5,fp8,fp8,stage_2,adder,True
18
+ adder_stage_2_fp32,407.778,0.00025247,0.54,fp32,fp32,stage_2,adder,False
19
+ adder_stage_2_fp8,126.616,0.000125909,0.34,fp8,fp8,stage_2,adder,False
20
+ adder_stage_3_bf16,187.53,7.11147e-05,0.43,bf16,bf16,stage_3,adder,False
21
+ adder_stage_3_fp32,683.354,0.000252061,0.64,fp32,fp32,stage_3,adder,False
22
+ adder_stage_3_fp8,88.046,4.12743e-05,0.22,fp8,fp8,stage_3,adder,False
23
+ adder_stage_4_bf16,325.584,0.000362093,0.73,bf16,bf16,stage_4,adder,False
24
+ adder_stage_4_fast_bf16,327.18,0.000361576,0.71,bf16,bf16,stage_4,adder,True
25
+ adder_stage_4_fast_fp32,1045.11,0.000710296,1.14,fp32,fp32,stage_4,adder,True
26
+ adder_stage_4_fast_fp8,174.762,0.000258125,0.45,fp8,fp8,stage_4,adder,True
27
+ adder_stage_4_fp32,1464.06,0.00105857,1.13,fp32,fp32,stage_4,adder,False
28
+ adder_stage_4_fp8,146.034,0.000176167,0.59,fp8,fp8,stage_4,adder,False
29
+ adder_stage_5_bf16,319.732,0.000188842,0.91,bf16,bf16,stage_5,adder,False
30
+ adder_stage_5_fp32,793.744,0.000386556,1.12,fp32,fp32,stage_5,adder,False
31
+ adder_stage_5_fp8,181.412,0.000121031,0.67,fp8,fp8,stage_5,adder,False
32
+ lmul_combinational_fast_bf16,312.55,0.000345894,0.52,bf16,bf16,combinational,lmul,True
33
+ lmul_combinational_fast_fp32,732.83,0.000580225,0.64,fp32,fp32,combinational,lmul,True
34
+ lmul_combinational_fast_fp8,172.368,0.000221201,0.44,fp8,fp8,combinational,lmul,True
35
+ lmul_combinational_simple_bf16,255.626,0.000252634,0.48,bf16,bf16,combinational,lmul,False
36
+ lmul_combinational_simple_fp32,702.506,0.000531981,0.55,fp32,fp32,combinational,lmul,False
37
+ lmul_combinational_simple_fp8,112.784,0.00011116,0.36,fp8,fp8,combinational,lmul,False
38
+ lmul_pipelined_fast_bf16,688.674,0.000928291,0.6,bf16,bf16,pipelined,lmul,True
39
+ lmul_pipelined_fast_fp32,1529.23,0.00130025,0.67,fp32,fp32,pipelined,lmul,True
40
+ lmul_pipelined_fast_fp8,348.726,0.000582629,0.51,fp8,fp8,pipelined,lmul,True
41
+ lmul_pipelined_simple_bf16,675.374,0.000895227,0.61,bf16,bf16,pipelined,lmul,False
42
+ lmul_pipelined_simple_fp32,1526.04,0.00132914,0.72,fp32,fp32,pipelined,lmul,False
43
+ lmul_pipelined_simple_fp8,348.194,0.00058525,0.55,fp8,fp8,pipelined,lmul,False
44
+ multiplier_pipelined_bf16,1148.59,0.00145203,1.03,bf16,bf16,pipelined,multiplier,False
45
+ multiplier_combinational_bf16,1102.57,0.00704769,1.98,bf16,bf16,combinational,multiplier,False
46
+ multiplier_combinational_fast_bf16,1067.72,0.00746015,1.94,bf16,bf16,combinational,multiplier,True
47
+ multiplier_combinational_fast_fp32,6311.91,0.133398,2.85,fp32,fp32,combinational,multiplier,True
48
+ multiplier_combinational_fast_fp8,347.396,0.00105533,1.29,fp8,fp8,combinational,multiplier,True
49
+ multiplier_combinational_fp32,6353.41,0.176407,3.08,fp32,fp32,combinational,multiplier,False
50
+ multiplier_combinational_fp8,236.208,0.000409495,1.4,fp8,fp8,combinational,multiplier,False
51
+ multiplier_pipelined_fast_bf16,1169.6,0.00165408,1.05,bf16,bf16,pipelined,multiplier,True
52
+ multiplier_pipelined_fast_fp32,6457.42,0.00931073,1.62,fp32,fp32,pipelined,multiplier,True
53
+ multiplier_pipelined_fast_fp8,487.578,0.000761612,0.72,fp8,fp8,pipelined,multiplier,True
54
+ multiplier_pipelined_fp32,6323.09,0.00905862,1.65,fp32,fp32,pipelined,multiplier,False
55
+ multiplier_pipelined_fp8,473.746,0.000716672,0.68,fp8,fp8,pipelined,multiplier,False
56
+ multiplier_stage_2_bf16,551.418,0.00124588,0.86,bf16,bf16,stage_2,multiplier,False
57
+ multiplier_stage_2_fast_bf16,552.482,0.00118352,0.91,bf16,bf16,stage_2,multiplier,True
58
+ multiplier_stage_2_fast_fp32,4149.6,0.0292743,1.46,fp32,fp32,stage_2,multiplier,True
59
+ multiplier_stage_2_fast_fp8,162.26,0.000161324,0.55,fp8,fp8,stage_2,multiplier,True
60
+ multiplier_stage_2_fp32,4009.42,0.0317692,1.51,fp32,fp32,stage_2,multiplier,False
61
+ multiplier_stage_2_fp8,165.186,0.000164563,0.5,fp8,fp8,stage_2,multiplier,False
62
+ multiplier_stage_3_bf16,132.468,4.80459e-05,0.28,bf16,bf16,stage_3,multiplier,False
63
+ multiplier_stage_3_fast_bf16,134.064,4.84747e-05,0.29,bf16,bf16,stage_3,multiplier,True
64
+ multiplier_stage_3_fast_fp32,319.466,8.04445e-05,0.42,fp32,fp32,stage_3,multiplier,True
65
+ multiplier_stage_3_fast_fp8,71.82,2.72315e-05,0.23,fp8,fp8,stage_3,multiplier,True
66
+ multiplier_stage_3_fp32,322.392,8.80949e-05,0.42,fp32,fp32,stage_3,multiplier,False
67
+ multiplier_stage_3_fp8,71.82,2.72315e-05,0.23,fp8,fp8,stage_3,multiplier,False
68
+ multiplier_stage_4_bf16,366.548,0.000207012,0.94,bf16,bf16,stage_4,multiplier,False
69
+ multiplier_stage_4_fast_bf16,352.982,0.000216456,0.69,bf16,bf16,stage_4,multiplier,True
70
+ multiplier_stage_4_fast_fp32,1253.66,0.000553272,1.07,fp32,fp32,stage_4,multiplier,True
71
+ multiplier_stage_4_fast_fp8,160.132,0.000118213,0.65,fp8,fp8,stage_4,multiplier,True
72
+ multiplier_stage_4_fp32,1285.58,0.000567724,1.24,fp32,fp32,stage_4,multiplier,False
73
+ multiplier_stage_4_fp8,148.162,9.70021e-05,0.74,fp8,fp8,stage_4,multiplier,False
results/mnist_eval.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ config,weight_type,activation_type,multiplier,avg_loss,accuracy,total_time,batch_size,total_batches,total_samples,samples_per_second,error
2
+ w8a8-8x8,Float8,Float8,float_multiplier,nan,9.59,3696.4578790664673,16,625,10000,2.705292560380934,
3
+ wb16ab16-8x8,BF16,BF16,float_multiplier,1.5101718571265486,97.46,6990.5663521289825,16,625,10000,1.430499260901013,
4
+ w8ab16-8x8,Float8,BF16,float_multiplier,1.5169549792829369,97.43,7083.499045133591,16,625,10000,1.4117316789743994,
5
+ w8a32-8x8-lmul,Float8,Float32,lmul_simple,nan,nan,0,64,0,0,0,Config not found: /Users/kaibreese/.hardware_accelerators/sim_cache/828aa49c61f5b03b/config.pkl
6
+ wb16ab16-8x8-lmul,BF16,BF16,lmul_simple,0.08935949255176635,97.42,2440.820196866989,64,157,10000,4.096983470079399,
7
+ w8ab16-8x8-lmul,Float8,BF16,lmul_simple,0.0963817737298118,97.41,2561.823173761368,64,157,10000,3.903470037441192,
results/pe_analysis.csv ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ max_delay,max_freq,logic_area,mem_area,name,weights,activations,component
2
+ 12953.759999999993,74.98072995240227,7992.3888,0.0,pe_combinational,fp8,fp8,pe
3
+ 5110.31,182.03960817794734,8431.4736,0.0,pe_fast,fp8,fp8,pe
4
+ 7848.580000000001,121.48336042412268,8452.3824,0.0,pe_standard,fp8,fp8,pe
5
+ 3226.71,277.0305647822124,12114.9072,0.0,pe_pipelined,fp8,fp8,pe
6
+ 8835.420000000002,108.4784594323105,5596.5887999999995,0.0,pe_combinational_lmul,fp8,fp8,pe
7
+ 7848.580000000001,121.48336042412268,6056.582399999999,0.0,pe_standard_lmul,fp8,fp8,pe
8
+ 2112.04,400.79517763242274,12093.9984,0.0,pe_fast_pipelined,fp8,fp8,pe
9
+ 4310.380000000001,213.06606326357544,6445.1376,0.0,pe_fast_lmul,fp8,fp8,pe
10
+ 3226.71,277.0305647822124,10304.553600000001,0.0,pe_pipelined_lmul,fp8,fp8,pe
11
+ 2112.04,400.79517763242274,10396.900800000001,0.0,pe_fast_pipelined_lmul,fp8,fp8,pe
12
+ 22280.86999999997,44.123091069618795,19880.784,0.0,pe_combinational,fp8,bf16,pe
13
+ 12914.88,75.19995668482495,20800.7712,0.0,pe_standard,fp8,bf16,pe
14
+ 5052.610000000001,183.97199210392208,26867.808,0.0,pe_pipelined,fp8,bf16,pe
15
+ 8137.1100000000015,117.36937668645122,21593.5632,0.0,pe_fast,fp8,bf16,pe
16
+ 15727.619999999988,62.07085760821128,11848.320000000002,0.0,pe_combinational_lmul,fp8,bf16,pe
17
+ 3090.4000000000005,287.9023435250763,27521.208,0.0,pe_fast_pipelined,fp8,bf16,pe
18
+ 12914.88,75.19995668482495,12768.307200000001,0.0,pe_standard_lmul,fp8,bf16,pe
19
+ 6827.280000000002,138.69086914793874,14310.3312,0.0,pe_fast_lmul,fp8,bf16,pe
20
+ 5052.610000000001,183.97199210392208,20250.1728,0.0,pe_pipelined_lmul,fp8,bf16,pe
21
+ 3090.4000000000005,287.9023435250763,21286.9008,0.0,pe_fast_pipelined_lmul,fp8,bf16,pe
22
+ 42336.22999999998,23.4086616261576,71535.97439999999,0.0,pe_combinational,fp8,fp32,pe
23
+ 22999.749999999967,42.766569372721406,73375.9488,0.0,pe_standard,fp8,fp32,pe
24
+ 12549.709999999995,77.32331429375594,77170.89600000001,0.0,pe_fast,fp8,fp32,pe
25
+ 13719.47999999998,70.90951378764596,102982.80960000001,0.0,pe_pipelined,fp8,fp32,pe
26
+ 28036.61999999995,35.18695886855636,27357.4224,0.0,pe_combinational_lmul,fp8,fp32,pe
27
+ 20278.279999999977,48.399711924914676,29197.396800000002,0.0,pe_standard_lmul,fp8,fp32,pe
28
+ 5140.169999999999,181.0554446088026,106655.7888,0.0,pe_fast_pipelined,fp8,fp32,pe
29
+ 8719.18,109.86379087207679,33680.592000000004,0.0,pe_fast_lmul,fp8,fp32,pe
30
+ 4476.04,205.8019691132405,46455.868800000004,0.0,pe_fast_pipelined_lmul,fp8,fp32,pe
31
+ 10734.710000000005,89.94658072570697,42896.145599999996,0.0,pe_pipelined_lmul,fp8,fp32,pe
32
+ 21625.02999999997,45.43796059892691,20515.017600000003,0.0,pe_combinational,bf16,bf16,pe
33
+ 12914.88,75.19995668482495,21435.004800000002,0.0,pe_standard,bf16,bf16,pe
34
+ 7442.310000000002,127.79046453111758,22227.7968,0.0,pe_fast,bf16,bf16,pe
35
+ 5052.610000000001,183.97199210392208,27153.5616,0.0,pe_pipelined,bf16,bf16,pe
36
+ 3090.4000000000005,287.9023435250763,27806.9616,0.0,pe_fast_pipelined,bf16,bf16,pe
37
+ 15138.11999999999,64.42834022287056,12064.3776,0.0,pe_combinational_lmul,bf16,bf16,pe
38
+ 12914.88,75.19995668482495,12984.3648,0.0,pe_standard_lmul,bf16,bf16,pe
39
+ 6827.280000000002,138.69086914793874,14526.3888,0.0,pe_fast_lmul,bf16,bf16,pe
40
+ 5052.610000000001,183.97199210392208,20466.2304,0.0,pe_pipelined_lmul,bf16,bf16,pe
41
+ 3090.4000000000005,287.9023435250763,21502.9584,0.0,pe_fast_pipelined_lmul,bf16,bf16,pe
42
+ 11854.91,81.71329908456592,95556.7008,0.0,pe_fast,fp32,fp32,pe
43
+ 22410.249999999967,43.87263773266214,91761.7536,0.0,pe_standard,fp32,fp32,pe
44
+ 13719.47999999998,70.90951378764596,104188.5504,0.0,pe_pipelined,fp32,fp32,pe
45
+ 27447.11999999995,35.93229206341912,28493.4672,0.0,pe_combinational_lmul,fp32,fp32,pe
46
+ 20278.279999999977,48.399711924914676,30333.441600000002,0.0,pe_standard_lmul,fp32,fp32,pe
47
+ 5140.169999999999,181.0554446088026,107861.52960000001,0.0,pe_fast_pipelined,fp32,fp32,pe
48
+ 8719.18,109.86379087207679,34816.6368,0.0,pe_fast_lmul,fp32,fp32,pe
49
+ 10734.710000000005,89.94658072570697,44032.1904,0.0,pe_pipelined_lmul,fp32,fp32,pe
50
+ 4476.04,205.8019691132405,47591.9136,0.0,pe_fast_pipelined_lmul,fp32,fp32,pe
51
+ 41746.72999999997,23.736207186706412,89921.77919999999,0.0,pe_combinational,fp32,fp32,pe