kernels-community
/

quantization-eetq

kernel

Model card Files Files and versions

xet

Community

danieldk HF Staff commited on 29 days ago

Commit

0c60fb4

1 Parent(s): 2491f56

Enable Torch 2.8 build

Browse files

Files changed (3) hide show

build.toml +80 -68
flake.lock +79 -27
flake.nix +1 -1

build.toml CHANGED Viewed

@@ -1,80 +1,92 @@
 [general]
 name = "quantization_eetq"
 [torch]
 src = [
-  "torch-ext/torch_binding.cpp",
-  "torch-ext/torch_binding.h"
 ]
-[kernel.cutlass_kernels]
 src = [
-  "cutlass_extensions/include/cutlass_extensions/arch/mma.h",
-  "cutlass_extensions/include/cutlass_extensions/compute_occupancy.h",
-  "cutlass_extensions/include/cutlass_extensions/epilogue/epilogue_quant_helper.h",
-  "cutlass_extensions/include/cutlass_extensions/epilogue/thread/ft_fused_activations.h",
-  "cutlass_extensions/include/cutlass_extensions/epilogue/threadblock/epilogue_per_row_per_col_scale.h",
-  "cutlass_extensions/include/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h",
-  "cutlass_extensions/include/cutlass_extensions/epilogue_helpers.h",
-  "cutlass_extensions/include/cutlass_extensions/ft_gemm_configs.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/kernel/default_fpA_intB_traits.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/kernel/fpA_intB_gemm.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/kernel/fpA_intB_gemm_with_broadcast.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/kernel/mixed_gemm_B_layout.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_dq_mma.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_dq_mma_multistage.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_dq_mma_pipelined.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_mma.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_mma_bf16.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_base.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_multistage.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_pipelined.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/warp/default_mma_tensor_op.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/warp/mma_tensorop_compute_B_with_f16.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/warp/mma_tensorop_dequantizer.h",
-  "cutlass_extensions/include/cutlass_extensions/interleaved_numeric_conversion.h",
-  "cutlass_extensions/include/cutlass_extensions/tile_interleaved_layout.h",
-  "cutlass_kernels/cutlass_heuristic.cu",
-  "cutlass_kernels/cutlass_heuristic.h",
-  "cutlass_kernels/cutlass_preprocessors.cc",
-  "cutlass_kernels/cutlass_preprocessors.h",
-  "cutlass_kernels/fpA_intB_gemm.cu",
-  "cutlass_kernels/fpA_intB_gemm.h",
-  "cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm.h",
-  "cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h",
-  "cutlass_kernels/fpA_intB_gemm_wrapper.cu",
-  "cutlass_kernels/fpA_intB_gemm_wrapper.h",
-  "weightOnlyBatchedGemv/common.h",
-  "weightOnlyBatchedGemv/enabled.h",
-  "utils/activation_types.h",
-  "utils/cuda_utils.h",
-  "utils/logger.cc",
-  "utils/logger.h",
-  "utils/string_utils.h",
-  "utils/torch_utils.h",
 ]
-depends = [ "cutlass_2_10", "torch" ]
-include = [ ".", "utils", "cutlass_extensions/include" ]
-[kernel.weight_only_batched_gemv]
 src = [
-  "cutlass_extensions/include/cutlass_extensions/interleaved_numeric_conversion.h",
-  "cutlass_extensions/include/cutlass_extensions/gemm/kernel/mixed_gemm_B_layout.h",
-  "weightOnlyBatchedGemv/common.h",
-  "weightOnlyBatchedGemv/enabled.h",
-  "weightOnlyBatchedGemv/kernel.h",
-  "weightOnlyBatchedGemv/kernelLauncher.cu",
-  "weightOnlyBatchedGemv/kernelLauncher.h",
-  "weightOnlyBatchedGemv/utility.h",
-  "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs1Int4b.cu",
-  "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs1Int8b.cu",
-  "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs2Int4b.cu",
-  "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs2Int8b.cu",
-  "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs3Int4b.cu",
-  "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs3Int8b.cu",
-  "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs4Int4b.cu",
-  "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs4Int8b.cu",
 ]
-depends = [ "cutlass_2_10", "torch" ]
-include = [ "cutlass_extensions/include" ]

 [general]
 name = "quantization_eetq"
+universal = false
 [torch]
 src = [
+    "torch-ext/torch_binding.cpp",
+    "torch-ext/torch_binding.h",
 ]
+[kernel.weight_only_batched_gemv]
+backend = "cuda"
+depends = [
+    "cutlass_2_10",
+    "torch",
+]
+include = ["cutlass_extensions/include"]
 src = [
+    "cutlass_extensions/include/cutlass_extensions/interleaved_numeric_conversion.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/kernel/mixed_gemm_B_layout.h",
+    "weightOnlyBatchedGemv/common.h",
+    "weightOnlyBatchedGemv/enabled.h",
+    "weightOnlyBatchedGemv/kernel.h",
+    "weightOnlyBatchedGemv/kernelLauncher.cu",
+    "weightOnlyBatchedGemv/kernelLauncher.h",
+    "weightOnlyBatchedGemv/utility.h",
+    "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs1Int4b.cu",
+    "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs1Int8b.cu",
+    "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs2Int4b.cu",
+    "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs2Int8b.cu",
+    "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs3Int4b.cu",
+    "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs3Int8b.cu",
+    "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs4Int4b.cu",
+    "weightOnlyBatchedGemv/weightOnlyBatchedGemvBs4Int8b.cu",
 ]
+[kernel.cutlass_kernels]
+backend = "cuda"
+depends = [
+    "cutlass_2_10",
+    "torch",
+]
+include = [
+    ".",
+    "utils",
+    "cutlass_extensions/include",
+]
 src = [
+    "cutlass_extensions/include/cutlass_extensions/arch/mma.h",
+    "cutlass_extensions/include/cutlass_extensions/compute_occupancy.h",
+    "cutlass_extensions/include/cutlass_extensions/epilogue/epilogue_quant_helper.h",
+    "cutlass_extensions/include/cutlass_extensions/epilogue/thread/ft_fused_activations.h",
+    "cutlass_extensions/include/cutlass_extensions/epilogue/threadblock/epilogue_per_row_per_col_scale.h",
+    "cutlass_extensions/include/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h",
+    "cutlass_extensions/include/cutlass_extensions/epilogue_helpers.h",
+    "cutlass_extensions/include/cutlass_extensions/ft_gemm_configs.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/kernel/default_fpA_intB_traits.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/kernel/fpA_intB_gemm.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/kernel/fpA_intB_gemm_with_broadcast.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/kernel/mixed_gemm_B_layout.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_dq_mma.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_dq_mma_multistage.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_dq_mma_pipelined.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_mma.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_mma_bf16.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_base.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_multistage.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_pipelined.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/warp/default_mma_tensor_op.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/warp/mma_tensorop_compute_B_with_f16.h",
+    "cutlass_extensions/include/cutlass_extensions/gemm/warp/mma_tensorop_dequantizer.h",
+    "cutlass_extensions/include/cutlass_extensions/interleaved_numeric_conversion.h",
+    "cutlass_extensions/include/cutlass_extensions/tile_interleaved_layout.h",
+    "cutlass_kernels/cutlass_heuristic.cu",
+    "cutlass_kernels/cutlass_heuristic.h",
+    "cutlass_kernels/cutlass_preprocessors.cc",
+    "cutlass_kernels/cutlass_preprocessors.h",
+    "cutlass_kernels/fpA_intB_gemm.cu",
+    "cutlass_kernels/fpA_intB_gemm.h",
+    "cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm.h",
+    "cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h",
+    "cutlass_kernels/fpA_intB_gemm_wrapper.cu",
+    "cutlass_kernels/fpA_intB_gemm_wrapper.h",
+    "weightOnlyBatchedGemv/common.h",
+    "weightOnlyBatchedGemv/enabled.h",
+    "utils/activation_types.h",
+    "utils/cuda_utils.h",
+    "utils/logger.cc",
+    "utils/logger.h",
+    "utils/string_utils.h",
+    "utils/torch_utils.h",
 ]

flake.lock CHANGED Viewed

@@ -1,6 +1,21 @@
 {
   "nodes": {
     "flake-compat": {
       "locked": {
         "lastModified": 1733328505,
         "narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
@@ -33,61 +48,83 @@
         "type": "github"
       }
     },
-    "kernel-builder": {
       "inputs": {
-        "flake-compat": "flake-compat",
-        "flake-utils": "flake-utils",
-        "nixpkgs": "nixpkgs",
-        "rocm-nix": "rocm-nix"
       },
       "locked": {
-        "lastModified": 1745320030,
-        "narHash": "sha256-HDGGPgp1pBi90zylndBySdL0XHuFtq+blv/0fH4g0q8=",
-        "owner": "huggingface",
-        "repo": "kernel-builder",
-        "rev": "c12ad49918de63907aaae26d4fe21150a463380b",
         "type": "github"
       },
       "original": {
-        "owner": "huggingface",
-        "repo": "kernel-builder",
         "type": "github"
       }
     },
-    "nixpkgs": {
       "locked": {
-        "lastModified": 1743559129,
-        "narHash": "sha256-7gpAWsENV3tY2HmeHYQ2MoQxGpys+jQWnkS/BHAMXVk=",
-        "owner": "nixos",
-        "repo": "nixpkgs",
-        "rev": "adae22bea8bcc0aa2fd6e8732044660fb7755f5e",
         "type": "github"
       },
       "original": {
-        "owner": "nixos",
-        "ref": "nixos-unstable-small",
-        "repo": "nixpkgs",
         "type": "github"
       }
     },
-    "rocm-nix": {
       "inputs": {
         "nixpkgs": [
           "kernel-builder",
           "nixpkgs"
         ]
       },
       "locked": {
-        "lastModified": 1745310663,
-        "narHash": "sha256-1U3PzCO/jt7HUlEgLOY3RpxadKwTo6GSvb2j4m0UFw0=",
         "owner": "huggingface",
-        "repo": "rocm-nix",
-        "rev": "e08373a0efa1c297b0c57af070e0a311df47481f",
         "type": "github"
       },
       "original": {
         "owner": "huggingface",
-        "repo": "rocm-nix",
         "type": "github"
       }
     },
@@ -110,6 +147,21 @@
         "repo": "default",
         "type": "github"
       }
     }
   },
   "root": "root",

 {
   "nodes": {
     "flake-compat": {
+      "locked": {
+        "lastModified": 1747046372,
+        "narHash": "sha256-CIVLLkVgvHYbgI2UpXvIIBJ12HWgX+fjA8Xf8PUmqCY=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-compat_2": {
       "locked": {
         "lastModified": 1733328505,
         "narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
         "type": "github"
       }
     },
+    "flake-utils_2": {
       "inputs": {
+        "systems": "systems_2"
       },
       "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
         "type": "github"
       },
       "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
         "type": "github"
       }
     },
+    "hf-nix": {
+      "inputs": {
+        "flake-compat": "flake-compat_2",
+        "flake-utils": "flake-utils_2",
+        "nixpkgs": "nixpkgs"
+      },
       "locked": {
+        "lastModified": 1753354560,
+        "narHash": "sha256-vmOfRmr0Qm/IbZTWB2sBn+UFrABSTTA/cTg+m27Yt/E=",
+        "owner": "huggingface",
+        "repo": "hf-nix",
+        "rev": "7f2aceda2a2e72cd573bdb25e5c0667fd75f89d3",
         "type": "github"
       },
       "original": {
+        "owner": "huggingface",
+        "repo": "hf-nix",
         "type": "github"
       }
     },
+    "kernel-builder": {
       "inputs": {
+        "flake-compat": "flake-compat",
+        "flake-utils": "flake-utils",
+        "hf-nix": "hf-nix",
         "nixpkgs": [
           "kernel-builder",
+          "hf-nix",
           "nixpkgs"
         ]
       },
       "locked": {
+        "lastModified": 1753354632,
+        "narHash": "sha256-31SX3Raiyx0qCuY9JSlx9ZZgxljeUxvW+JdujjxbofQ=",
         "owner": "huggingface",
+        "repo": "kernel-builder",
+        "rev": "524b628fd8e58525dbd28455bffb0628092c5265",
         "type": "github"
       },
       "original": {
         "owner": "huggingface",
+        "ref": "torch-2.8",
+        "repo": "kernel-builder",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1752785354,
+        "narHash": "sha256-Y33ryUz7MPqKrZwlbQcsYCUz2jAJCacRf8jbs0tYUlA=",
+        "owner": "nixos",
+        "repo": "nixpkgs",
+        "rev": "d38025438a6ee456758dc03188ca6873a415463b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nixos",
+        "repo": "nixpkgs",
+        "rev": "d38025438a6ee456758dc03188ca6873a415463b",
         "type": "github"
       }
     },
         "repo": "default",
         "type": "github"
       }
+    },
+    "systems_2": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
     }
   },
   "root": "root",

flake.nix CHANGED Viewed

@@ -2,7 +2,7 @@
   description = "Flake for EETQ kernels";
   inputs = {
-    kernel-builder.url = "github:huggingface/kernel-builder";
   };
   outputs =

   description = "Flake for EETQ kernels";
   inputs = {
+    kernel-builder.url = "github:huggingface/kernel-builder/torch-2.8";
   };
   outputs =