
    )`iL                         d Z ddlZddlmZ ddlmZmZmZ ddlZddl	m
Z
 ddlmZ ddlmZmZ ej        d	             Ze
	 	 	 	 	 	 	 	 	 ddej        dej        dej        deej                 deej                 dee         deej                 dedeej                 dedeej                 deej                 deej        eej        ej        f         f         fd            ZdS )a3  
Copyright (c) 2025 by FlashInfer team.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
    N)SimpleNamespace)OptionalUnionTuple   )flashinfer_api)gen_gdn_prefill_sm90_module)register_custom_opregister_fake_opc                  (   t                                                      t          dd          dt          j        dt          j        dt          j        dt          j        dt          j        d	t          j        d
t
          t          j                 dt
          t          j                 dt
          t          j                 dt          dd ffd            } t          d          dt          j        dt          j        dt          j        dt          j        dt          j        d	t          j        d
t
          t          j                 dt
          t          j                 dt
          t          j                 dt          dd fd            }t          |           S )Nzflashinfer::gdn_prefill)outputoutput_state)mutates_argsr   r   qkv
cu_seqlensinitial_stategbetascalereturnc
                 D    
                     | |||||||||	
  
         d S Ngdn_prefill)r   r   r   r   r   r   r   r   r   r   modules             j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/flashinfer/gdn_prefill.pyr   z+get_gdn_prefill_module.<locals>.gdn_prefill"   sE     		
 	
 	
 	
 	
    c
                     d S r    )
r   r   r   r   r   r   r   r   r   r   s
             r   _fake_gdn_prefillz1get_gdn_prefill_module.<locals>._fake_gdn_prefill>   s	     	r   r   )	r	   build_and_loadr
   torchTensorr   floatr   r   )r   r"   r   s     @r   get_gdn_prefill_moduler'      s   (**99;;F!0J  

l
 <
 <	

 <
 L
  -
 EL!
 u|$
 
 

 
 
 
 
 
2 /00l < <	
 < L  - EL! u|$  
   10 {3333r   Fr   r   r   r   r   r   r   output_final_stater   use_qk_l2norm_in_kernelr   r   r   c                    |
J d            |                     d          dz
  }|                      d          }|                      d          }|                     d          }|                      d          }t          ||          }|}|
$t          j        |||f| j        | j                  }
|r-|+t          j        ||||ft          j        | j                  }n.|s,|*t          j        ||||ft          j        | j                  }t                                          |
|| |||	                    t          j
                  |||||nd
  
         |r|
|fS |
S )a2  Chunked Gated Delta Rule (GDN) attention for prefill.

    This implements the gated delta rule linear attention mechanism for efficient
    training and inference. Supports both GQA (grouped query attention) and GVA
    (grouped value attention) configurations.

    Args:
        q (torch.Tensor):
            Queries of shape ``[total_seq_len, num_q_heads, head_size]``.
            Must be contiguous and on CUDA.
        k (torch.Tensor):
            Keys of shape ``[total_seq_len, num_k_heads, head_size]``.
            Must be contiguous and on CUDA.
        v (torch.Tensor):
            Values of shape ``[total_seq_len, num_v_heads, head_size]``.
            Must be contiguous and on CUDA.
        g (Optional[torch.Tensor]):
            Forget gate (alpha) of shape ``[total_seq_len, num_sab_heads]`` where
            ``num_sab_heads = max(num_q_heads, num_v_heads)``. Must be float32.
            If None, defaults to all ones. Default: ``None``.
        beta (Optional[torch.Tensor]):
            Update gate (beta) of shape ``[total_seq_len, num_sab_heads]``.
            Must be float32. If None, defaults to all ones. Default: ``None``.
        scale (Optional[float]):
            Scale factor for the attention scores.
            If not provided, defaults to ``1 / sqrt(head_size)``. Default: ``None``.
        initial_state (Optional[torch.Tensor]):
            Initial KV state of shape ``[num_seqs, num_sab_heads, head_size, head_size]``.
            Must be float32. If None, starts from zero state. Default: ``None``.
        output_final_state (bool):
            Whether to output the final state. Default: ``False``.
        cu_seqlens (torch.Tensor):
            Cumulative sequence lengths of shape ``[num_seqs + 1]``, int64.
            Required for variable-length sequences (varlen mode).
        use_qk_l2norm_in_kernel (bool):
            Whether to use QK L2 normalization in kernel. Default: ``False``.
        output (Optional[torch.Tensor]):
            Pre-allocated output tensor of shape ``[total_seq_len, num_o_heads, head_size]``
            where ``num_o_heads = max(num_q_heads, num_v_heads)``.
            If None, will be allocated automatically. Default: ``None``.
        output_state (Optional[torch.Tensor]):
            Pre-allocated output state tensor of shape
            ``[num_seqs, num_sab_heads, head_size, head_size]``, float32.
            Required if ``output_final_state=True``. Default: ``None``.

    Returns:
        Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
            - If ``output_final_state=False``: Returns output tensor of shape
              ``[total_seq_len, num_o_heads, head_size]``.
            - If ``output_final_state=True``: Returns tuple of (output, final_state) where
              final_state has shape ``[num_seqs, num_sab_heads, head_size, head_size]``.

    Note:
        - Supports GQA: ``num_q_heads > num_k_heads = num_v_heads``
        - Supports GVA: ``num_v_heads > num_q_heads = num_k_heads``
        - The final state is in k-major layout ``[N, H, K, V]``.
        - Requires SM90 (Hopper) architecture.
    Nz&cu_seqlens is required for varlen moder   r      )dtypedeviceg        )sizemaxr$   emptyr,   r-   float32r'   r   toint64)r   r   r   r   r   r   r   r(   r   r)   r   r   num_seqstotal_seq_lennum_q_headsnum_v_heads	head_sizenum_o_headsnum_sab_headss                      r   chunk_gated_delta_ruler;   P   s   R !!#K!!!q!!A%HFF1IIM&&))K&&))Kq		Ik;//KM ~K3'8
 
 
  
l2{}i;-8
 
 

   
L$8{}i;-8
 
 
 ((			ek""	"    |##r   )	NNNNFNFNN)__doc__	functoolstypesr   typingr   r   r   r$   api_loggingr   jit.gdnr	   utilsr
   r   cacher'   r%   r&   boolr;   r!   r   r   <module>rE      s         ! ! ! ! ! ! ) ) ) ) ) ) ) ) ) )  ' ' ' ' ' ' 0 0 0 0 0 0        .4 .4 .4b 
 !%#'!,0$)-$)%)+/y y|y|y |y 	y
 5<
 y E?y EL)y y &y "y U\"y 5<(y 5<u|U\9::;y y y y y yr   