
    .`i                        d dl mZmZ d dlmZ d dlZd dlmZ d dlm	Z	 ddl
mZmZ ddlmZ dd	lmZmZ e G d
 d                      Ze G d d                      Ze G d d                      Z ed          Z eddd          Zdeez  ej        z  fdZe G d d                      Ze G d de                      Zdej        ez  dz  fdZddZdedee         fdZdS )    )	dataclassfields)TypeN)TensorDescriptor)create_ragged_descriptor   )
clear_sumssum_bitmatrix_rows)cuda_capability_geq)LayoutStridedLayoutc                   d    e Zd ZU ej        ed<   dZeed<   d Ze	d             Z
d Zd
dZd
d	ZdS )StoragedataNlayoutc                     t          | j        t          j                  sJ | j         t          | j        j                  | _        d S d S N)
isinstancer   torchTensorr   r   shapeselfs    z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/third_party/triton_kernels/tensor.py__post_init__zStorage.__post_init__   sC    $)U\22222;'	88DKKK     c                     | j         j        S r   )r   devicer   s    r   r   zStorage.device   s    yr   c                    t          dd          sdS t          | j        j                  dvrdS t	          | j                                                  	                     d          n# t          $ r dY nw xY w| j        j        }| j        j	        t          j        k    rdn| j                                        dz  fd	t          |          D             }t          |          S )
N	   r   F)         r         c                 @    g | ]}|k    |         z  d z  dk    S )   r    ).0ibitwidth	major_dimstridess     r   
<listcomp>z,Storage.is_tma_compliant.<locals>.<listcomp>+   s2    ]]]!aS\nnWQZ(*S0A5nnnr   )r   lenr   r   liststrideindex
ValueErrorndimdtyper   uint8element_sizerangeall)r   r5   	compliantr,   r-   r.   s      @@@r   is_tma_compliantzStorage.is_tma_compliant   s    "1a(( 	5tyy005 ty''))**	a((II 	 	 	III	y~	5;6611DI<R<R<T<TWX<X]]]]]]uT{{]]]	9~~s   A0 0A?>A?Fc                    t          | j                                                  }t          | j        j                  }| j                                        d         dk    }|rQ|d d         |d         |d         gz   }|d d         |d         |d         gz   }|d d         |d         |d         gz   }| j        j        t
          j        k    rQ| j        j        dk    rA|	                    d          }||         dz  ||<   |d         dz  dk    rt          d          | j                            |          }t          | j        |||          S )	Nr$   r   BLACKWELL_VALUEr!   r(   r   z^inner shape need to be multiple of 128 for mxfp4 (CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B) TMAs.)r1   r   r2   r   r6   r   r7   r   namer3   r4   swizzle_block_shaper   )r   block_shape	transposer.   r   indxs         r   make_dense_tmazStorage.make_dense_tma.   sQ   ty''))**TY_%%I$$&&r*a/	 	@%crc*k"o{2-OOK#2#J%)U2Y!77Ecrclgbk72;%??G9?ek))dk.>BS.S.S==##D +D 1Q 6KRy3!##  "W X X Xk55kBB	5';GGGr   c                     |dv r|                      ||          S |dk    sJ t          | j        j                  dz
  }t	          | j        ||          S )N)densegatherscatterraggedr!   )
ragged_dim)rE   r0   r   r   r   )r   rB   moderC   rK   s        r   make_tmazStorage.make_tma?   sb    111&&{I>>>x))A-
'	;:VVVVr   )F)__name__
__module____qualname__r   r   __annotations__r   r   r   propertyr   r<   rE   rM   r)   r   r   r   r      s         
,FF9 9 9
     X   &H H H H"W W W W W Wr   r   c                       e Zd ZU eed<   dS )IntegerTyper,   N)rN   rO   rP   intrQ   r)   r   r   rT   rT   G   s         MMMMMr   rT   c                   4    e Zd ZU eed<   eed<   eed<   d ZdS )	FloatTypebitwidth_exponentbitwidth_mantissa	is_signedc                 X    t          | j                  | j        z   | j        z   | _        d S r   )rU   rZ   rX   rY   r,   r   s    r   r   zFloatType.__post_init__R   s'    DN++d.DDtG]]r   N)rN   rO   rP   rU   rQ   boolr   r)   r   r   rW   rW   L   sK         OOO^ ^ ^ ^ ^r   rW   r!   T)rX   rY   rZ   typec                 X    t          | t          j                  r
| j        dz  S | j        S Nr&   )r   r   r6   itemsizer,   )r]   s    r   r,   r,   Z   s+    $$$ !}q  =r   c                       e Zd ZU eej        z  ed<   dZee	z  ej        z  ed<   dZ
ee         dz  ed<   dZee         dz  ed<   d Zed             Zed             Zdd	Zd
 Zd Zd Zed             Zd ZddZdS )r   storageNr6   r   	shape_maxc           	      &   t          | j        t          j                  rt	          | j                  | _        | j        | j        j        j        | _        t          | j                  dk     r| j        t          d          | j        #t          | j        j        j                  | _        d d t          t          fd| j                            sJ | j        d gt          | j                  z  | _        t          t!          | j        | j                            D ]C\  }\  }}|- |          s"t          d| dt#          |                     |
|| j        |<   Dt          t          | j                            sJ d S )Nr&   z)shape must be provided for sub-byte typesc                 ,    t          | t                    S r   )r   rU   ss    r   <lambda>z&Tensor.__post_init__.<locals>.<lambda>t   s    :a-- r   c                 R    t          | d          o|                                 dk    S )Nnumelr   )hasattrrj   rf   s    r   rh   z&Tensor.__post_init__.<locals>.<lambda>u   s!    GAw//BAGGIIN r   c                 0     |           p
 |           S r   r)   )rg   is_intis_items    r   rh   z&Tensor.__post_init__.<locals>.<lambda>v   s    !8ggajj r   z
shape_max[z] must be `int` or `None`; got )r   rb   r   r   r   r6   r   r,   r   r4   r1   r:   maprc   r0   	enumeratezipr]   )r   r+   rg   smaxrm   rn   s       @@r   r   zTensor.__post_init__g   s   dlEL11 	1"4<00DL:*0DJDJ!##
(:HIII:dl/566DJ--BB388888$*EEFFFFF>!"Vc$*oo5DN%c$*dn&E&EFF 	& 	&LAy4t !\a!\!\PTUYPZPZ!\!\]]]|$%q!3vt~..///////r   c                 *    t          | j                  S r   )r0   r   r   s    r   r5   zTensor.ndim   s    4:r   c                     | j         j        S r   )rb   r   r   s    r   r   zTensor.device   s    |""r   c                     || j         j                                        n| j         j                            |          S r   )rb   r   r2   r   r+   s     r   r2   zTensor.stride   s6    -.Yt| '')))DL<M<T<TUV<W<WWr   c                 >    | j         j                                        S r   )rb   r   data_ptrr   s    r   rx   zTensor.data_ptr   s    | ))+++r   c                 >    | j         j                                        S r   )rb   r   rj   r   s    r   rj   zTensor.numel   s    | &&(((r   c                 0    t          | j                  dz  S r_   )r,   r6   r   s    r   r8   zTensor.element_size   s    
##q((r   c                 L    | j         }t          |t                    r|j        n|S r   )rb   r   r   r   )r   ts     r   r   zTensor.data   s$    L#Aw//6qvvQ6r   c                     | j         S r   )r5   r   s    r   dimz
Tensor.dim   s
    yr   c                 .    || j         S | j         |         S r   )r   rv   s     r   sizezTensor.size   s    9:z!}r   r   )rN   rO   rP   r   r   r   rQ   r6   rT   rW   r   r1   rU   rc   r   rR   r5   r   r2   rx   rj   r8   r   r~   r   r)   r   r   r   r   `   s?        u|####37E;"U[0777"E49t""""&ItCy4&&&0 0 08   X # # X#X X X X, , ,) ) )) ) ) 7 7 X7       r   r   c                   D     e Zd ZU dZdZej        ed<   d fd	Zd Z	 xZ
S )	Bitmatrixa@  
    Represents a boolean matrix in a packed format where each element occupies
    a single bit of memory.

    _scratchpad is either None or an all-zero array of size >= shape[-1]; we pass it along
    with the actual bitmatrix to avoid having to launch a separate memset
    kernel when we call Bitmatrix::sum().
    N
scratchpadc                 j    t                                          |t          ||           || _        d S )N)r6   r   rc   )super__init__BITr   )r   rb   r   rc   r   	__class__s        r   r   zBitmatrix.__init__   s/    5INNN$r   c                     | j         \  }}| j        }| j        t          ||          | _        | j        d |         }d | _        t	          | ||          S r   )r   r   r   r	   r
   )r   partials_block_size_n_colsdevout_rets         r   sumzBitmatrix.sum   sW    J	6k?"(55DO/'6'*!$1DEEEr   )NN)rN   rO   rP   __doc__r   r   r   rQ   r   r   __classcell__)r   s   @r   r   r      ss            $J###% % % % % %F F F F F F Fr   r   tensorc                 Z    | d S t          | t                    r| j        j        S t          S r   )r   r   rb   r   r   )r   s    r   
get_layoutr      s/    ~t&&!! %~$$r   c                 (   || j         }t          | j                  }||                                                     d          xx         t          | j                   t          |          z  z  cc<   t          t          |           ||          S )Nr   )r6   r   )r6   r1   r   r2   r3   r,   r   r   )torch_tensorr6   r   s      r   wrap_torch_tensorr      s    }"#$$E	,




%
%a
(
()))Xl6H-I-IXV[__-\\)))',''uEBBBBr   
layout_clsc                 0    t           t                    sJ  j        }|j                            |j                  } ||j        fi |}|                    |          } fdt                     D             }t          t          ||          fi |S )Nc                 \    i | ](}|j         d k    |j         t          |j                   )S )rb   )r@   getattr)r*   kr   s     r   
<dictcomp>z"convert_layout.<locals>.<dictcomp>   s6    \\\R[H[H[QVWVQV,,H[H[H[r   )
r   r   rb   r   unswizzle_datar   r   swizzle_datar   r   )r   r   layout_kwargsold_storageold_data
new_layoutnew_dataattrss   `       r   convert_layoutr      s    ff%%%%%.K!001ABBHHN<<m<<J&&x00H\\\\fVnn\\\E'(J//995999r   r   )dataclassesr   r   typingr   r   triton.tools.tensor_descriptorr   triton.tools.ragged_tmar   "reduction_details.reduce_bitmatrixr	   r
   target_infor   tensor_details.layoutr   r   r   rT   rW   r   FP4r6   r,   r   r   r   r   r   r)   r   r   <module>r      s   ) ) ) ) ) ) ) )        ; ; ; ; ; ; < < < < < < N N N N N N N N , , , , , , 8 8 8 8 8 8 8 8 6W 6W 6W 6W 6W 6W 6W 6Wr         ^ ^ ^ ^ ^ ^ ^ ^ k!nni!qDIII;*U[8     A A A A A A A AH F F F F F F F F4u|f,t3    C C C C:6 :tF| : : : : : :r   