
    )`i                     2   d dl mZ d dlZd dlZd dlmZ 	 	 ddej        dej        dej        dedeej                 d	eej                 d
dfdZ	 	 	 ddej        dej        dej        dedeej                 deej                 deej                 d
dfdZ	dS )    )OptionalN)rms_norm_kernelxweightoutepsin_scale	out_scalereturnc                 f   | j         \  }}t          j        |          }t          dt	          d|dz                      }	t          |f         di d|d|d| d|                     d          d	|d
dddd|d|d|                    d          d|d|d|d|dud|duddddd|	 dS )z[RMS norm.

    Computes `out[i,j] = x[i,j] * weight[j] / sqrt(eps + sum(x[i]^2) / n)`.
              nbx_ptrx_strider   x_scale_ptrr_ptrNr_stridew_ptro_ptro_strideo_scale_ptrEPS
BLOCK_SIZEHAS_IN_SCALEHAS_OUT_SCALE
HAS_OUTPUTTHAS_RESIDUALF	num_warps )shapetritonnext_power_of_2maxminr   stride)
r   r   r   r   r	   r
   r   r   
block_sizer!   s
             j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/flashinfer/triton/norm.pyrms_normr+   	   sY    7DAq'**JAs2zS01122IQD   
!
! a !	
 H d  f c A I C : T))  t++  4!" U#$ )%       residualx_out
x_in_scalex_out_scalec                    | j         \  }}| j         |j         k    sJ |                     d          |                    d          k    sJ t          j        |          }	t	          dt          j        |	d                    }
t          |f         di d|d|d| d|                     d          d|d|d	|                    d          d
|d|d||                    d          ndd|d|d|	d|dud|dud|duddd|
 dS )zmIn-place RMS norm with fused residual addition.

    Computes `r = r + x`, followed by `x = rmsnorm(r)`.
    r   r   r   r   r   r   r   r   r   r   r   r   Nr   r   r   r   r   r   r    Tr!   r"   )r#   r(   r$   r%   r'   cdivr   )r   r-   r   r   r.   r/   r0   r   r   r)   r!   s              r*   rms_norm_add_residualr3   1   s    7DAq7hn$$$$88A;;(//!,,,,,,'**JBJ3344IQD   
!
! a !	
 J h ### f e %*$5a1  K C :  t++ "--  $$!" T#$ )%   r,   )NN)NNN)
typingr   torchr$   flashinfer.triton.kernels.normr   Tensorfloatr+   r3   r"   r,   r*   <module>r9      sH           : : : : : : (,(,% %|%L% 
% 
	%
 u|$% %% 
% % % %Z %))-*.) )|)l) L) 
	)
 EL!) &) %,') 
) ) ) ) ) )r,   