
    0`i!"                     >   d dl mZmZ d dlZd dlZd dlmZ ej        dej	        dej	        dej	        dej	        dej	        dej	        fd	            Z
	 	 	 	 	 	 ddej        dej        dej        deeej        f         deej                 dee         dej        fdZdS )    )OptionalUnionNBLOCK_KIS_SEQLEN_OFFSETS_TENSOR	IS_VARLENINTERLEAVED	CONJUGATEBLOCK_Mc           	      8   t          j        d          }t          j        d          }t          j        d          }|dz  }|s|||z  z   ||z  z   }| ||	z  z   ||z  z   } nPt          j        ||z             }t          j        ||z   dz             |z
  }|||z  z   ||z  z   }| ||
z  z   ||z  z   } ||z  |k    rd S ||z  t          j        d|          z   }|s||z   }n|t          j        ||z             z   }t          j        d|          }t          j        d|dz            }|sc||d d d f         |z  |d d d f         |z  z   z   }||d d d f         |z  |d d d f         z   z   }||d d d f         |z  |d d d f         z   z   }t          j        ||d d d f         |k     |d d d f         |k     z  d                              t           j                  } t          j        ||d d d f         |k     |d d d f         |k     z  d                              t           j                  }!t          j        ||d d d f         |k     |d d d f         |k     z  d                              t           j                  }"t          j        |||z  z   |d d d f         |k     |d d d f         |k     z  d                              t           j                  }#|r|! }!|"| z  |#|!z  z
  }$|"|!z  |#| z  z   }%| |d d d f         |
z  |d d d f         |z  z   z   } t          j        | |$|d d d f         |k     |d d d f         |k     z             t          j        | ||z  z   |%|d d d f         |k     |d d d f         |k     z             d S ||dz   dz  dz  z   dz
  }&t          j        d|          dz  }'||d d d f         |z  |d d d f         |z  z   z   }(||d d d f         |z  |&d d d f         |z  z   z   })||d d d f         |z  |'d d d f         z   z   }||d d d f         |z  |'d d d f         z   z   }t          j        ||d d d f         |k     |'d d d f         |k     z  d                              t           j                  } t          j        ||d d d f         |k     |'d d d f         |k     z  d                              t           j                  }!t          j        |(|d d d f         |k     |d d d f         |k     z  d                              t           j                  }"t          j        |)|d d d f         |k     |&d d d f         |k     z  d                              t           j                  }#|r|! }!|"| z  }*|#|!z  }+t          j        |d d d f         dz  dk    |*|+z
  |*|+z             },| |d d d f         |
z  |d d d f         |z  z   z   } t          j        | |,|d d d f         |k     |d d d f         |k     z             d S )	Nr   )axis      g      ?)maskotherg        )r   )tl
program_idloadarangetofloat32storewhere)-OUTXCOSSIN
CU_SEQLENSSEQLEN_OFFSETSseqlen
rotary_dim	seqlen_rostride_out_batchstride_out_seqlenstride_out_nheadsstride_out_headdimstride_x_batchstride_x_seqlenstride_x_nheadsstride_x_headdimr   r   r   r   r	   r
   pid_mpid_head	pid_batchrotary_dim_half	start_idxrmrm_csrkrk_halfcossinx0x1o0o1rk_swap	rk_repeatX0X1x0_cosx1_sinouts-                                                z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/vllm_flash_attn/ops/triton/rotary.pyrotary_kernelrA      s   : Mq!!!E}!$$$H1%%%I AoO Q	N**X-GGI 0008>O3OOGJ233	i/!344y@	O++h.HHI 111H?P4PPw&  	29Q00	0B# 9^#RW^i7888	1g		Bi7a<((G BUAAAtG.qqq1ADT1TTUU111d7^o5aaa8HHIU111d7^o5aaa8HHIguQQQW~	1gdAAAg6F6XYad
 
 

"RZ.. 	 guQQQW~	1gdAAAg6F6XYad
 
 

"RZ.. 	 WR4[6)gdAAAg.>.PQY\
 
 

"RZ.. 	 W"222QQQW+&747+;o+MN
 
 
 "RZ..	 	
  	$C#XS #XS R4[#44wtQQQw7GJ\7\\]
b111d7f 4qqq9IO9[\]]]]
/$666QQQW+&747+;o+MN	
 	
 	
 	
 	
 	
 a1))A-Ia))Q.	"QQQW+/"T111W+@P2PPQ"QQQW+/'$'2BEU2UUVU111d7^o5	$'8JJKU111d7^o5	$'8JJKg4.9,471Co1UV
 
 
 "RZ..	 	
 g4.9,471Co1UV
 
 
 "RZ..	 	
 WRr!!!T'{V347j8PQY\]]]``J
 
 WbDkF*wtQQQw/?*/LMUX
 
 

"RZ.. 	  	$Ccchr$'{Q!+Vf_fvoNNR4[#44r$'{EW7WWX
cAAAtGv!5"T111W+
:R STTTTTT    Fxr3   r4   seqlen_offsets
cu_seqlens
max_seqlenreturnc	                 @   |du}	|	s| j         \  }
n+|
J d            | j         \  }}
|j         d         }|dz
  ||j         \  }}|j         |j         k    sJ |dz  }||
k    s
J d            |
dk    s
J d            |k    s
J d	            |j        |j        k    sJ d
|j         d|j                     | j        |j        k    sJ d| j         d|j                     |                                |                                }}t          |t          j                  rD|j         fk    sJ |j        t          j        t          j        fv sJ |                                }n|z   |k    sJ |st	          j        |           n| }||
k     r+|s)|d|df         	                    | d|df                    |dk    rdn|dk    rdn	|dk    rdnd}fd}|rdn	|dk    rdnd}t          j
                            | j        j                  5  t          |         || |||||||	s|                    d          nd|                    d          |                    d          |                    d          |	s|                     d          nd|                     d          |                     d          |                     d          |t          |t          j                  |	||||dk    rdnd           ddd           n# 1 swxY w Y   |S )a  
    Arguments:
        x: (batch, seqlen, nheads, headdim) if cu_seqlens is None
            else (total_seqlen, nheads, headdim).
        cos: (seqlen_ro, rotary_dim / 2)
        sin: (seqlen_ro, rotary_dim / 2)
        seqlen_offsets: integer or integer tensor of size (batch,)
        cu_seqlens: (batch + 1,) or None
        max_seqlen: int
    Returns:
        y: (batch, seqlen, nheads, headdim)
    Nz:If cu_seqlens is passed in, then max_seqlen must be passedr   r   r   zrotary_dim must be <= headdim   zOnly support headdim <= 256zseqlen_ro must be >= seqlenz*cos and sin must have the same dtype, got z and z0Input and cos/sin must have the same dtype, got .    @      c                 @    t          j        | d                   fS )Nr
   )tritoncdiv)METAbatchnheadsr   s    r@   <lambda>zapply_rotary.<locals>.<lambda>   s    VT)_==vuM rB         )	num_warps)shapedtype
contiguous
isinstancetorchTensorint32int64
empty_likecopy_cudadeviceindexrA   stride)rC   r3   r4   rD   rE   rF   interleavedinplace	conjugate	is_varlenheaddimtotal_seqlen	batch_p_1r!   r    outputr   gridr
   rQ   rR   r   s                      @@@r@   apply_rotaryrq      s   . $&I )*&vvww%%'c%%%()%fg$Q'	AIIz9	!!!!!OJ   "A   c>>>8>>> = 		SYOCIOOCIOO 	 	
39S!'SS	SS 	 ~~!1!1C.%,// 4#x////#U['AAAAA'2244&)3333(/6Ua   QFGGsJKK &&qjkk)9':;;;  	"$$bb*2C2C33 
 NMMMMMDCaa**;*;QQG 
		18>	*	* 
 
d$-4FMM!1MM"MM"MM"(/AHHQKKKaHHRLLHHRLLHHRLL~u|44%++aa1	
 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
6 Ms   C.LLL)r   NNFFF)typingr   r   r^   rN   triton.languagelanguager   jit	constexprrA   r_   intrq    rB   r@   <module>ry      sh   # " " " " " " "         vU* \+vU, !l-vU. |/vU0 1vU2 |3vU4 \5vU vU vU vUz 01)- $_ _|_	_ 
_ #u|+,	_
 &_ _ \_ _ _ _ _ _rB   