
    `iU              "          d dl mZ ddlmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlZddlmZmZ ddlZ G d	 d
e          ZddlZ ej        e          Zi Zdddddej        dddf	dej        dej        dej        dej        dej        deej                 deej                 dedee         deej                 dej        dee         dee         deej                 deej        ej        ej        f         fdZ dS )   )HopperSelectAttentionFwd    )_convert_to_cutlass_data_type)APIBaseN)from_dlpack)driver)TupleOptionalc                        e Zd Zddddej        ddfdej        dej        dej        dej        dej        d	ej        d
ej        dej        deej                 deej                 dee         dee         dej        dedee	         f fdZ
defdZdej        dej        dej        dej        dej        dej        deej        df         fdZd-deej                 ddfdZ	 	 	 	 	 d.d!ej        d"ej        d#ej        d$ej        d%ej        d&ej        d'ej        d(ej        d)eej                 d*eej                 dee	         deej                 d+efd,Z xZS )/SelectionAttentionNi   @   sample_qsample_ksample_vsample_osample_lsample_msample_block_indicessample_block_countssample_cum_seqlen_qsample_cum_seqlen_kmax_s_qmax_s_k	acc_dtype
block_sizescale_softmaxc                     t                                                       t          | _        | j                            d           | j                            d           || _        || _        || _	        || _
        || _        || _        || _        || _        |	| _        |
| _        || _        || _        || _        || _        d | _        d | _        d | _        d | _        d | _        d | _        d | _        || _        | j                            d|j         d|j         d|j         d|j         d|j         d|j         d	|j         d
|j         d|	|	j        nd d|
|
j        nd d| d| d| d| d|            d S )Nz)SelectionAttention is an experimental APIzEntering __init__z'__init__ completed with args: sample_q z, sample_k z, sample_v z, sample_o z, sample_l z, sample_m z, sample_block_indices z, sample_block_counts z, sample_cum_seqlen_q Nonez, sample_cum_seqlen_k z, acc_dtype z
, max_s_q z
, max_s_k z, block_size z, scale_softmax )super__init__r   _kernel_loggerwarningdebugr   r   r   r   r   r   r   r   r   r   r   r   r   r   input_layoutdtypeh_qh_kvgqa_group_sizehead_dim	value_dimr   shape)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	__class__s                   /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/cudnn/native_sparse_attention/selection/api.pyr    zSelectionAttention.__init__   s   $ 	/HIII./// !     $8!#6 #6 #6  #$ !
	"* g	hn  g	  g	QYQ_  g	  g	ltlz  g	  g	  HP  HV  g	  g	  ck  cq  g	  g	  ~F  ~L  g	  g	  ey  e  g	  g	  Wj  Wp  g	  g	  ex  eD  H[  Ha  Ha  JP  g	  g	  EX  Ed  h{  hA  hA  jp  g	  g	  ~G  g	  g	  SZ  g	  g	  fm  g	  g	  |F	  g	  g	  X	e	  g	  g		
 	
 	
 	
 	
    returnc                    | j                             d           | j                             d           | j        j        dk    rd| _        t          d          | j        j        dk    rd| _        | j        j        \  }}}| j        j        \  }}}| j        j        \  }}}| j	        j        \  }}}| j        j        |||fk    r"t          d|||f d	| j        j                   | j        j        |||fk    r"t          d
|||f d	| j        j                   | j        j        |||fk    r"t          d|||f d	| j        j                   | j	        j        |||fk    r"t          d|||f d	| j	        j                   |                     | j        dd          | _        | j        j        ||fk    r!t          d||f d	| j        j                   |                     | j        dd          | _        | j        j        ||fk    r!t          d||f d	| j        j                   | j        t          d| j                   | j        >t!          j        | j        | j                  st          d| j         d| j                   | j        t          d| j                   | j        /| j        | j        k    rt          d| j         d| j                   t)          | j                  dz
  | _        | j        dk    rt          d| j                   | j        j        t           j        t           j        fvrt          d| j        j                   | j        j        d d         ||fk    r?| j        j        dk    r/t          d||df d	t5          | j        j                             | j        j        ||fk    r.t          d||f d	t5          | j        j                             | j        j        t           j        k    s| j        j        t           j        k    r)t          d| j        j         d| j        j                   nt          d| j        j                   ||z  dk    rt          d           || _        || _        ||z  | _        || _        || _         | j                             d!           | j        j        | _        | j        | j        j        cxk    r!| j        j        cxk    r| j	        j        k    sn t          d"          | j        t           j!        t           j"        hvrt          d#          | j#        t           j$        hvrt          d$          | j%        d%vrt          d&          | j&        !d'tO          j(        | j                  z  | _&        t           j)        *                                s)| j         +                    d(           tY          d(          | j                             d)           t           j)        -                                }t           j)        .                    |          \  }}|d*z  |z   }	|	d+k     r5| j         +                    d,|	 d-|            tY          d,|	 d-|           |	d.k    rtY          d/          d0| _/        | j                             d1           d0S )2NzEntering check_supportz+Checking shape normalization and validation   B,H,S,Dz#B, H_q, S, D format not implemented   T,H,Dz.Input shape mismatch: expected Q tensor shape z, got z.Input shape mismatch: expected K tensor shape z.Input shape mismatch: expected V tensor shape z/Output shape mismatch: expected O tensor shape    r   z/Output shape mismatch: expected L tensor shape r   z/Output shape mismatch: expected M tensor shape z;sample_cum_seqlen_q must be provided for T,H,D format, got zaSelectionAttention requires sample_cum_seqlen_q and sample_cum_seqlen_k to be identical, but got z and z/max_s_q must be provided for T,H,D format, got zISelectionAttention requires max_s_q and max_s_k to be identical, but got r   r   zFbatch_size (len(sample_cum_seqlen_q) - 1) must be greater than 0, got z0sample_cum_seqlen_q must be int32 or int64, got z.sample_block_indices shape mismatch: expected Kz-sample_block_counts shape mismatch: expected z@sample_block_indices and sample_block_counts must be int32, got z9sample_q must be rank-3 (T,H,D) or rank-4 (B,H,S,D), got z3H_q must be a multiple of H_kv (GQA/MQA constraint)zChecking dtypes and configz1All input/output tensors must have the same dtypez!dtype must be Float16 or BFloat16zacc_dtype must be Float32>          r   z block_size must be 16, 32, or 64g      ?zCUDA is not availablezChecking environment
   Z   z/Requires SM90+ compute capability, but found SMz on device g   z4cuteDSL SelectionAttention is not supported on SM103Tz$check_support completed successfully)0r"   r$   r   ndimr%   NotImplementedErrorr,   r   r   r   
ValueError_unpad_tensor_to_ndimr   r   r   r   torchequalr   r   len
batch_sizer&   int32int64r   tupler   r'   r(   r)   r*   r+   float16bfloat16r   float32r   r   mathsqrtcudais_availableerrorRuntimeErrorcurrent_deviceget_device_capability_is_supported)
r-   tr'   d_qkr(   d_vdevicemajorminorcompute_capabilitys
             r/   check_supportz SelectionAttention.check_supportH   s   3444 	HIII="" )D%&KLLL]1$$ 'D=.LAsD M/MAtT=.LAtS--KAsC}"q#tn44 !{RSUXZ^Q_!{!{fjfsfy!{!{|||}"q$o55 !|RSUY[_Q`!|!|gkgtgz!|!|}}}}"q$n44 !{RSUY[^Q_!{!{fjfsfy!{!{|||}"q#sm33 !{STVY[^R_!{!{fjfsfy!{!{||| 66t}aTTDM}"q#h.. !vSTVYRZ!v!vaeanat!v!vwww 66t}aTTDM}"q#h.. !vSTVYRZ!v!vaeanat!v!vwww'/ !y_c_w!y!yzzz'3EKH`bfbz<{<{3) rx|  yQ  r  r  X\  Xp  r  r   |# !aSWS_!a!abbb|'DLDL,H,H)  +Xvz  wC  +X  +X  JN  JV  +X  +X  Y  Y  Y!$":;;a?DO!## !{jnjy!{!{|||'-ek5;5OOO !tTXTlTr!t!tuuu(.rr2q$i??DD]DbfgDgDg   "QSTVZ\_R`  "Q  "Qhmnr  oH  oN  iO  iO  "Q  "Q  R  R  R'-!T::   "JRSUYQZ  "J  "Jbghl  iA  iG  cH  cH  "J  "J  K  K  K(.%+==AYA_chcnAnAn  ^W[WpWv  ^  ^  ~B  ~V  ~\  ^  ^   Bo
 mY]YfYkmmnnn :??RSSS	!Tk 	7888](

dm1____T]5H____DML_____PQQQ:emU^<<<@AAA>%-008999?,..?@@@ %!$ty'?'?!?Dz&&(( 	8L677767771222**,,z77??u"RZ%/""LxQcxxpvxxyyyxQcxxpvxxyyy$$UVVV!ABBBtr0   qkvolm.c                    | j         dk    rt          d          | j         dk    r|j        \  }}}	|j        \  }
}}
|j        \  }
}
}|                    ||| j        |	                              dddd          }|                    ddd          }|                    ddd          }|                    ||| j        |                              dddd          }|                    ||| j                                      ddd          }|                    ||| j                                      ddd          }nt          d| j                    d	 } |||          st          d
           |||          st          d           |||          st          d           |||          st          d           |||          st          d           |||          st          d          ||||||fS )a(  
        Reshape tensors from input format to kernel expected format:
        - Q: (gqa_group_size, d, T, h_kv)
        - K: (T, d, h_kv)
        - V: (T, d_v, h_kv)
        - O: (gqa_group_size, d_v, T, h_kv)
        - L: (gqa_group_size, T, h_kv)
        - M: (gqa_group_size, T, h_kv)
        r4   zB,H,S,D format not implementedr6   r7   r5   r   r   zInvalid input layout: c                 V    |                                  |                                 k    S N)data_ptr)originalreshapeds     r/   shares_memoryz:SelectionAttention._reshape_tensors.<locals>.shares_memory   s%    $$&&(*;*;*=*===r0   z@Q tensor memory changed during reshape - expected view operationz@K tensor memory changed during reshape - expected view operationz@V tensor memory changed during reshape - expected view operationz@O tensor memory changed during reshape - expected view operationz@L tensor memory changed during reshape - expected view operationz@M tensor memory changed during reshape - expected view operation)r%   r?   r,   viewr)   permuter@   )r-   r]   r^   r_   r`   ra   rb   Tr'   d_r(   rW   
q_reshaped
k_reshaped
v_reshaped
o_reshaped
l_reshaped
m_reshapedri   s                       r/   _reshape_tensorsz#SelectionAttention._reshape_tensors   sM   $ 	))%&FGGG'))IAsAJAtQIAq# 4)<a@@HHAqRSTTJ1a++J1a++J4)<cBBJJ1aQRTUVVJ4)<==EEaANNJ4)<==EEaANNJJId6GIIJJJ	> 	> 	> }Q
++ 	a_```}Q
++ 	a_```}Q
++ 	a_```}Q
++ 	a_```}Q
++ 	a_```}Q
++ 	a_```:z:z:UUr0   current_streamc                    | j                             d           |                     |          }|                                  |                     | j        | j        | j        | j        t          | j
                  t          | j                            }| j                             d           |                     | j        | j        | j        | j        | j        | j                  \  }}}}}}t'          |d          }	t'          |d          }
t'          |d          }t'          |d          }t'          |          }t'          |          }t'          | j                  }t'          | j                  }t'          | j                  }| j                             d           t/          j        ||	|
||||||| j        || j        |          | _        | j                             d           d S )	NzEntering compiler*   r+   GQA_group_sizer   r&   r   +Reshaping tensors to kernel expected format   assumed_alignzCompiling selection_attentionQr8   VOLMblock_indicesblock_counts
max_lengthseq_offsetssoftmax_scalestreamzKernel compiled successfully)r"   r$   _get_default_stream_ensure_support_checkedr!   r*   r+   r)   r   r   r&   r   ru   r   r   r   r   r   r   r   r   r   r   cutecompiler   r   _compiled_kernel)r-   rv   selection_attentionro   rp   rq   rr   rs   rt   mQmKmVmOmLmMm_block_indicesm_block_countsm_cum_seqlen_qs                     r/   r   zSelectionAttention.compile   s   -...11.AA$$&&&"ll]n./
;;3DNCC + 
 
 	HIIIQUQfQfMMMMMMR
 R
N
J
J
J 3777377737773777$$$$%d&?@@$T%=>>$T%=>> 	:;;; $)'|&,!!
 !
 !
 	9:::::r0   Fq_tensork_tensorv_tensoro_tensorl_tensorm_tensorblock_indices_tensorblock_counts_tensorcum_seqlen_q_tensorcum_seqlen_k_tensorskip_compilec                    | j                             d           |                     |          }| j                             d           |                     |dd          }|                     |dd          }|                     ||||||          \  }}}}}}t          |d          }t          |d          }t          |d          }t          |d          }t          |          }t          |          }t          |          }t          |          }t          |	          }|| j        n|}|sr| j        t          d          | j                             d	           |                     ||||||||| j	        |||
           | j                             d           d S | j                             d           | 
                    | j        | j        | j        | j        t          | j                  t          | j                            } |||||||||| j	        |||
           | j                             d           d S )NzEntering executerz   r7   r   r   r{   r|   z&SelectionAttention kernel not compiledzExecuting with compiled kernelr~   z*Executed with compiled kernel successfullyz'Executing without compiled kernel (JIT)rx   zExecuted successfully)r"   r$   r   rA   ru   r   r   r   rQ   r   r!   r*   r+   r)   r   r   r&   r   )r-   r   r   r   r   r   r   r   r   r   r   r   rv   r   ro   rp   rq   rr   rs   rt   r   r   r   r   r   r   r   r   r   r   s                                 r/   executezSelectionAttention.execute"  s     	-...11.AAHIII--h:FF--h:FFQUQfQfh(HhR
 R
N
J
J
J 3777377737773777$$$$%&:;;$%899$%899 /<.C** +	8$,"#KLLLL?@@@!!-+<*+% "    LKLLLLLLHIII"&,,.#2?3DJ??7GG #/ # #  -+<*+%    L677777r0   re   )NNNNF)__name__
__module____qualname__rB   rK   Tensorr
   intr&   floatr    boolr\   r	   ru   rN   CUstreamr   r   __classcell__)r.   s   @r/   r   r      s        7;6:!%!%!&)-!7
 7
,7
 ,7
 ,	7

 ,7
 ,7
 ,7
 $l7
 #\7
 &el37
 &el37
 #7
 #7
 ;7
 7
   !7
 7
 7
 7
 7
 7
rgt g g g gR:V<:V <:V <	:V
 <:V <:V <:V 
u|S 	!:V :V :V :Vx3; 3;ht}&= 3; 3; 3; 3; 3;~ 7;6:)-26"R8 R8,R8 ,R8 ,	R8
 ,R8 ,R8 ,R8 $lR8 #\R8 &el3R8 &el3R8  R8 !/R8 R8 R8 R8 R8 R8 R8 R8 R8r0   r   r   r   r   r   r   r   r   r   r   r   o_dtyper   r   r   r   r1   c                    t                               d           |4t          |dd         |dd         z
                                            n|}|4t          |dd         |dd         z
                                            n|}| j        \  }}}|j        \  }}}|	|	n| j        }	t          j        |||f|	| j                  }t          j        ||dft          j	        | j                  }t          j        ||dft          j	        | j                  }| j        |j        |j        |j        |j        |j        |j        | j        |j        |j        | 
                                |
                                |
                                |
                                |
                                |
                                |
                                |||
||f}|t          v rIt                               d           t          |         }|                    | |||||||||||           nt                               d           t          | ||||||||||
||||	          }|                                sJ |                                 |                    | |||||||||||           |t          |<   |||fS )
z
    Selection Attention Wrapper that returns output tensors directly.

    Returns:
        tuple: (o_tensor, l_tensor, m_tensor) - Output, logsumexp, and max tensors
    zFselection_attention_wrapper: Creating empty output tensors o, l, and mNr   )r&   rX   zNselection_attention_wrapper: Using previously cached SelectionAttention object)r   r   r   r   r   r   r   r   r   r   r   rv   zyselection_attention_wrapper: No previously cached SelectionAttention object found, creating new SelectionAttention object)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r"   r$   maxitemr,   r&   rB   emptyrX   rK   stride#_cache_of_SelectionAttentionObjectsr   r   r\   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   rU   r'   rm   rn   r(   rW   r   r   r   	cache_keyr   s                            r/   selection_attention_wrapperr   }  s'   , MMZ[[[PWP_c%abb),?,DDEEJJLLLelGPWP_c%abb),?,DDEEJJLLLelGIAsA>LAtS ,gg(.G{AsC=PPPH{AsA;emHOTTTH{AsA;emHOTTTH 	"!!!##%%""$$""$$""$$-I0 777fgggA)L##!5 3 3 3'! 	$ 	
 	
 	
 	
 	  R  	S  	S  	S0!5 3 3 3!'
 
 
" #0022222##%%%##!5 3 3 3'! 	$ 	
 	
 	
 :M+I6Xx''r0   )!NSA_select_attn_fwd_hmmar   cudnn.datatypesr   cudnn.api_baser   cutlasscutlass.cuter   cutlass.cute.runtimer   cuda.bindingsr   rN   rB   typingr	   r
   rL   r   logging	getLoggerr   r"   r   rK   r   r   r   r&   r   rH   r    r0   r/   <module>r      s*   > > > > > > 9 9 9 9 9 9 " " " " " "        , , , , , , ( ( ( ( ( (  " " " " " " " " f8 f8 f8 f8 f8 f8 f8 f8R 
'
H
%
%&( # 3726%)%)"]!!&*q( q(lq(lq( lq(  ,	q(
 q( "%,/q( "%,/q( q( E?q( ek"q( {q( c]q( c]q( T]#q( 5<u|34q( q( q( q( q( q(r0   