adding some activations

Files changed (3) hide show

activation/activation_kernels.cu CHANGED Viewed

@@ -44,7 +44,7 @@ __device__ __forceinline__ T gelu_kernel(const T& x) {
   // https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L36-L38
   const float f = (float)x;
   constexpr float ALPHA = M_SQRT1_2;
-  return (T)(f * 0.5f * (1.0f + ::erf(f * ALPHA)));
 }
 template <typename T>
@@ -183,6 +183,7 @@ __global__ void activation_kernel(
 namespace vllm {
 template <typename T>
 __device__ __forceinline__ T gelu_new_kernel(const T& x) {
   const float x3 = (float)(x * x * x);
@@ -223,3 +224,22 @@ void gelu_quick(torch::Tensor& out,    // [..., d]
 {
   LAUNCH_ACTIVATION_KERNEL(vllm::gelu_quick_kernel);
 }

   // https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L36-L38
   const float f = (float)x;
   constexpr float ALPHA = M_SQRT1_2;
+  return (T)(f * 0.5f * (1.0f + erf(f * ALPHA)));
 }
 template <typename T>
 namespace vllm {
 template <typename T>
 __device__ __forceinline__ T gelu_new_kernel(const T& x) {
   const float x3 = (float)(x * x * x);
 {
   LAUNCH_ACTIVATION_KERNEL(vllm::gelu_quick_kernel);
 }
+void gelu(torch::Tensor& out,    // [..., d]
+          torch::Tensor& input,
+          std::string approximation)  // [..., d]
+{
+  if (approximation == "none") {
+    LAUNCH_ACTIVATION_KERNEL(vllm::gelu_kernel);
+  } else if (approximation == "tanh") {
+    LAUNCH_ACTIVATION_KERNEL(vllm::gelu_tanh_kernel);
+  } else {
+    throw std::invalid_argument("Invalid approximation");
+  }
+}
+void silu(torch::Tensor& out,    // [..., d]
+          torch::Tensor& input)  // [..., d]
+{
+  LAUNCH_ACTIVATION_KERNEL(vllm::silu_kernel);
+}

torch-ext/activation/__init__.py CHANGED Viewed

@@ -30,6 +30,15 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out

     return out
+def gelu(out: torch.Tensor, x: torch.Tensor, approximation: str = "none") -> None:
+    ops.gelu(out, x, approximation)
+    return out
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out

torch-ext/activation/layers.py CHANGED Viewed

@@ -23,6 +23,39 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

         ops.silu_and_mul(out, x)
         return out
+class Silu(nn.Module):
+    """An activation function for SiLU.
+    The function computes x -> silu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+class Gelu(nn.Module):
+    """An activation function for GELU.
+    The function computes x -> gelu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor, approximation: str = "none"):
+        out = torch.empty_like(x)
+        ops.gelu(out, x, approximation)
+        return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.