§
    )`ƒiÞ  ã                   ó‚   — d Z ddlZddlZddlmZ ddlmZmZ ddl	m
Z
 dZded	ed
efd„ZdZdZdZeeedœZded
efd„ZdS )a3  
Copyright (c) 2024 by FlashInfer team.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
é    Né   )Úenv)ÚJitSpecÚgen_jit_spec)Úwrite_if_differentaÍ  
#include <flashinfer/activation.cuh>
#include <cuda_runtime.h>
#include "tvm_ffi_utils.h"

{% set func_name = act_func_name ~ '_and_mul' %}

using namespace flashinfer;

{{ act_func_def }}

void {{ func_name }}(TensorView out, TensorView input, bool enable_pdl) {
  int d = input.size(input.ndim() -1) / 2;
  int64_t num_tokens = input.numel() / input.size(input.ndim() -1);
  dim3 grid(num_tokens);

  cudaSetDevice(out.device().device_id);
  const cudaStream_t stream = get_stream(out.device());
  DISPATCH_DLPACK_DTYPE_TO_CTYPE_FP16(input.dtype(), c_type, [&] {
    uint32_t vec_size = 16 / sizeof(c_type);
    cudaLaunchConfig_t config;
    config.gridDim = num_tokens;
    config.blockDim = std::min(d / vec_size, 1024U);
    config.dynamicSmemBytes = 0;
    config.stream = stream;
    cudaLaunchAttribute attrs[1];
    attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
    attrs[0].val.programmaticStreamSerializationAllowed = enable_pdl;
    config.numAttrs = 1;
    config.attrs = attrs;

    auto kernel = flashinfer::activation::act_and_mul_kernel<c_type, {{ act_func_name }}>;

    cudaLaunchKernelEx(&config, kernel, static_cast<c_type*>(out.data_ptr()),
                       static_cast<c_type*>(input.data_ptr()), d);

    cudaError_t err = cudaGetLastError();
    TVM_FFI_ICHECK(err == cudaSuccess) << "Failed to launch kernel: " << cudaGetErrorString(err);

    return true;
  });
}

TVM_FFI_DLL_EXPORT_TYPED_FUNC({{ func_name }}, {{ func_name }});
Úact_func_nameÚact_func_defÚreturnc                 ób   — t          j        t          ¦  «        }|                     | |¬¦  «        S )N)r   r	   )Újinja2ÚTemplateÚactivation_templÚrender)r   r	   Útemplates      úm/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/flashinfer/jit/activation.pyÚget_act_and_mul_cu_strr   H   s(   € ÝŒÕ/Ñ0Ô0€HØ?Š?¨À\ˆ?ÑRÔRÐRó    zc
__device__ __forceinline__ float silu(const float& val) {
  return val / (1.0f + __expf(-val));
}
z—
__device__ __forceinline__ float gelu(const float& val) {
  constexpr float kAlpha = M_SQRT1_2;
  return val * 0.5f * (1.0f + ::erf(val * kAlpha));
}
zÉ
__device__ __forceinline__ float gelu_tanh(const float& val) {
  const float cdf =
      0.5f * (1.0f + math::tanh((0.7978845608028654f * (val + 0.044715f * val * val * val))));
  return val * cdf;
}
)ÚsiluÚgeluÚ	gelu_tanhc                 óà   — t           |          }t          j        }t          j        |d¬¦  «         || › dz  g}t          |d         t          | |¦  «        ¦  «         t          | › d|¦  «        S )NT)Úexist_okz_and_mul.cur   Ú_and_mul)Úact_func_def_strÚjit_envÚFLASHINFER_GEN_SRC_DIRÚosÚmakedirsr   r   r   )r   r	   Úgen_directoryÚsourcess       r   Úgen_act_and_mul_moduler!   i   sˆ   € Ý# MÔ2€LÝÔ2€MÝ„K¨Ð-Ñ-Ô-Ð-Ø -Ð<Ð<Ð<Ñ<Ð=€GÝØŒ
Ý˜}¨lÑ;Ô;ñô ð õ ØÐ"Ð"Ð"Øñô ð r   )Ú__doc__r   r   Ú r   r   Úcorer   r   Úutilsr   r   Ústrr   Úsilu_def_cu_strÚgelu_def_cu_strÚgelu_def_tanh_cu_strr   r!   © r   r   ú<module>r+      sò   ððð ð  
€	€	€	à €€€à Ð Ð Ð Ð Ð Ø 'Ð 'Ð 'Ð 'Ð 'Ð 'Ð 'Ð 'Ø %Ð %Ð %Ð %Ð %Ð %ð,Ð ð^S¨#ð S¸Sð SÀSð Sð Sð Sð Sð
€ð€ðÐ ð ØØ%ðð Ð ð¨#ð °'ð ð ð ð ð ð r   