
    .`i':                     \   d Z ddlZddlmZ ddlmc mZ ddlmZ ddl	m
Z
 ddlmZmZ ddlmZ dej        dej        d	ed
ej        fdZdej        dej        dej        d	ed
eej        ej        f         f
dZdej        dej        dej        d	ed
ej        f
dZ	 d dedej        defdZ e
j        d           G d de
                      Z e
j        d           G d de
                      Z e
j        d           G d de
                      Z G d dej                  ZdS )!zCustom normalization layers.    N)rocm_aiter_ops)CustomOp)rms_norm_batch_invariantvllm_is_batch_invariant)current_platformxweightvariance_epsilonreturnc                     ddl m} t                      rt          | ||          S t	          j        |           }|                    || ||           |S Nr   )_custom_ops)vllmr   r   r   torch
empty_likerms_norm)r   r	   r
   opsouts        x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/layers/layernorm.pyr   r      sq     ('''''   E'63CDDD

1

CLL		   J    residualc                     ddl m} t                      rt          | |z   ||          | |z   fS |                    | |||           | |fS r   )r   r   r   r   fused_add_rms_norm)r   r   r	   r
   r   s        r   r   r   #   s     ('''''   'L&"2
 
x< 	 		   h;r   biasc                 l    ddl m} t          j        |           }|                    || |||           |S r   )r   r   r   r   	poly_norm)r   r	   r   r
   r   r   s         r   r   r   8   sR     ('''''

1

CMM	   Jr   Fwith_fused_adddtype	use_aiterc                     |o|t           j        t           j        fv }|r| rt          j        S |rt          j        S | rt          S t
          S N)r   float16bfloat16r   rms_norm2d_with_addr   r   r   r   r   s      r   dispatch_rocm_rmsnorm_funcr&   H   sd      e( I
  2^ 211 '&&  "!!Or   r   c                       e Zd ZdZ	 	 	 	 ddedededz  ded	ej        dz  d
df fdZ	e
	 	 	 ddej        dededej        dej        dz  dej        dz  dedz  d
ej        eej        ej        f         z  fd            Z	 ddej        dej        dz  d
ej        eej        ej        f         z  fdZ	 ddej        dej        dz  d
ej        eej        ej        f         z  fdZ	 ddej        dej        dz  d
ej        eej        ej        f         z  fdZ	 ddej        dej        dz  d
ej        eej        ej        f         z  fdZd
efdZ xZS )RMSNormzRoot mean square normalization.

    Computes x -> w * x / sqrt(E[x^2] + eps) where w is the learned weight.
    Refer to https://arxiv.org/abs/1910.07467
    ư>NThidden_sizeepsvar_hidden_size
has_weightr   r   c                    t                                                       || _        || _        ||k    rd n|| _        |pt          j                    }|| _        t          j        ||          | _	        | j        rt          j        | j	                  | _	        t          j                    rCt          j                    }t!          d||          | _        t!          d||          | _        d S d S )Nr   Fr%   T)super__init__r*   r
   variance_size_overrider   get_default_dtyper-   onesr	   nn	Parameterr   is_rocmr   is_rmsnorm_enabledr&   rocm_norm_funcrocm_norm_func_with_add)	selfr*   r+   r,   r-   r   weight_dtypeaiter_rmsnorm_enabled	__class__s	           r   r1   zRMSNorm.__init__f   s    	& ##{22DD 	# 9 7 9 9$jLAAA? 	4,t{33DK#%% 		$2$E$G$G!"<$"/# # #D
 ,F#<CX, , ,D(((		 		r   r   r
   
orig_dtyper	   r   r2   c                    |                      t          j                  } || |z   } |                      |          }| j        d         |k    r t	          d| d| j        d                    || }n,||k     rt	          d| d|           | ddddd|f         }|                    d                              dd          }| t          j        ||z             z  } |                      |          } || |z  } || S | |fS )	6PyTorch-native implementation equivalent to forward().NzExpected hidden_size to be z, but found: z$Expected hidden_size to be at least    Tdimkeepdim)tor   float32shape
ValueErrorpowmeanrsqrt)	r   r
   r*   r?   r	   r   r2   x_varvariances	            r   forward_staticzRMSNorm.forward_static   sZ    DD HAttJ''H72;+%%UkUUPRUU   ")EE333 J-J J<GJ J  
 aaa33334E99Q<<$$T$::H'77888DDF
AHh;r   c           	          |                      || j        | j        |j        | j        r| j        j        nd|| j                  S )rA   N)rP   r
   r*   r   r-   r	   datar2   r;   r   r   s      r   forward_nativezRMSNorm.forward_native   sM     ""!G $9DKT'
 
 	
r   c                     | j         |                     ||          S |d u}|r!t          ||| j        j        | j                  S t          || j        j        | j                  S r!   )r2   rT   r   r	   rR   r
   r   r;   r   r   add_residuals       r   forward_cudazRMSNorm.forward_cuda   su    
 &2&&q(333t+ 	H%8T[-t/D   At{/1FGGGr   c                     | j         |                     ||          S |d u}|r'|                     ||| j        j        | j                  S |                     || j        j        | j                  S r!   )r2   rT   r:   r	   rR   r
   r9   rV   s       r   forward_hipzRMSNorm.forward_hip   s    
 &2&&q(333t+ 	S//8T[-t/D   &&q$+*:D<QRRRr   c                     | j         |                     ||          S ddlm} |+|                    ||| j        j        | j                   ||fS |                    || j        j        | j                  S )Nr   )ipex_ops)	r2   rT   vllm._ipex_opsr\   r   r	   rR   r
   r   )r;   r   r   r   s       r   forward_xpuzRMSNorm.forward_xpu   s    
 &2&&q(333222222"" %	   h;||K!
 
 	
r   c                 d    d| j         j                            d           }|d| j         z  }|S )Nzhidden_size=r   z, eps=)r	   rR   sizer
   )r;   ss     r   
extra_reprzRMSNorm.extra_repr   s=    54;+003355	-d+---r   )r)   NTN)NNNr!   )__name__
__module____qualname____doc__intfloatboolr   r   r1   staticmethodTensortuplerP   rT   rX   rZ   r^   strrb   __classcell__r>   s   @r   r(   r(   \   s         &*$(   t	
  {T! 
     @  '+(,-1+ +<++ + K	+
 t#+ ,%+ !$d
+ 
elEL89	9+ + + \+` )-
 
<
 ,%
 
elEL89	9	
 
 
 
( )-H H<H ,%H 
elEL89	9	H H H H& )-S S<S ,%S 
elEL89	9	S S S S& )-
 
<
 ,%
 
elEL89	9	
 
 
 
0C        r   r(   gemma_rms_normc                       e Zd ZdZ	 ddededdf fdZedej	        d	ed
ej	        dej	        fd            Z
edej	        d	ed
ej	        dej	        deej	        ej	        f         f
d            Z	 dd
ej	        dej	        dz  dej	        eej	        ej	        f         z  fdZ	 dd
ej	        dej	        dz  dej	        eej	        ej	        f         z  fdZ xZS )GemmaRMSNormzRMS normalization for Gemma.

    Two differences from the above RMSNorm:
        1. x * (1 + w) instead of x * w.
        2. (x * w).to(orig_dtype) instead of x.to(orig_dtype) * w.
    r)   r*   r+   r   Nc                     t                                                       t          j        t	          j        |                    | _        || _        d S r!   )r0   r1   r5   r6   r   zerosr	   r
   )r;   r*   r+   r>   s      r   r1   zGemmaRMSNorm.__init__  sD    
 	l5;{#;#;<< #r   r	   r
   r   c                 "   |j         }|                                }|                    d                              dd          }|t	          j        ||z             z  }|d|                                 z   z  }|                    |          }|S )zGPyTorch-native implementation equivalent to forward() without residual.rC   rB   TrD         ?)r   rh   rK   rL   r   rM   rG   )r	   r
   r   r?   rO   s        r   _forward_static_no_residualz(GemmaRMSNorm._forward_static_no_residual  s~     W
GGII5588==R=66H'77888v||~~%&DDr   r   c                    |j         }|t          j        k    r)|                                |                                z   n||z   }|}|                                }|                    d                              dd          }|t          j        ||z             z  }|d|                                 z   z  }|                    |          }||fS )zDPyTorch-native implementation equivalent to forward() with residual.rC   rB   TrD   rv   )r   r   r"   rh   rK   rL   rM   rG   )r	   r
   r   r   r?   rO   s         r   _forward_static_with_residualz*GemmaRMSNorm._forward_static_with_residual'  s     W
 U]** GGII((((X 	

 GGII5588==R=66H'77888 v||~~%&DD({r   c                     |&|                      | j        j        | j        |          S |                     | j        j        | j        ||          S )rA   )rw   r	   rR   r
   ry   rS   s      r   rT   zGemmaRMSNorm.forward_native@  s\     33 $"7   55 $"7H  r   c                 >   t           j                                        r|                     ||          S t	          | dd          sCt          j        | j                  | _        t          j        | j                  | _        d| _        |                     ||          S )N_is_compiledFT)	r   compileris_compilingrT   getattrcompilerw   ry   r|   rS   s      r   rX   zGemmaRMSNorm.forward_cudaO  s    
 >&&(( 	4&&q(333t^U33 	%/4}00 0D, 2722 2D. !%D""1h///r   r)   r!   )rc   rd   re   rf   rg   rh   r1   rj   r   rk   rw   rl   ry   rT   rX   rn   ro   s   @r   rr   rr     s         $ $$ $ 
	$ $ $ $ $ $  < 
	   \  < ,	
 
u|U\)	*   \6 )- < ,% 
elEL89	9	   $ )-0 0<0 ,%0 
elEL89	9	0 0 0 0 0 0 0 0r   rr   rms_norm_gatedc                        e Zd ZdZ	 	 	 	 	 ddedededz  ded	ej        dz  d
ej	        dz  f fdZ
d Z	 ddej        dej        dz  dej        fdZ	 ddej        dej        dz  dej        fdZ xZS )RMSNormGatedzRMS Normalization with optional gating.

    This is a native PyTorch implementation that supports:
    - Standard RMS normalization
    - Group RMS normalization
    - Optional gating with SiLU activation
    h㈵>NFr*   r+   
group_sizenorm_before_gatedevicer   c                 "   ||d}t                                                       || _        t          j        t          j        |fi |          | _        |                     dd           || _	        || _
        |                                  dS )a  Initialize RMSNormGated.

        Args:
            hidden_size: Size of the hidden dimension
            eps: Epsilon for numerical stability
            group_size: If not None, do GroupNorm with each group
                        having group_size elements.
                        group_size=None is equivalent to group_size=hidden_size
                        (i.e. there's only 1 group).
            norm_before_gate: If True and z is provided: out = norm(x) * silu(z)
                              If False and z is provided: out = norm(x * silu(z))
            device: Device to create parameters on
            dtype: Data type for parameters
        )r   r   r   N)r0   r1   r+   r5   r6   r   emptyr	   register_parameterr   r   reset_parameters)	r;   r*   r+   r   r   r   r   factory_kwargsr>   s	           r   r1   zRMSNormGated.__init__o  s    . %+U;;l5;{#M#Mn#M#MNN---$ 0r   c                 X    t           j        j                            | j                   d S r!   )r   r5   initones_r	   )r;   s    r   r   zRMSNormGated.reset_parameters  s"    DK(((((r   r   zr   c                 (   || j         s|t          j        |          z  }| j        T|                    d                              dd          }|t          j        || j        z             z  }|| j	        z  }nvddl
m}  ||d| j        	          }|                    d                              dd          }|t          j        || j        z             z  } ||d
          | j	        z  }|| j         r|t          j        |          z  }|S )a  
        Native PyTorch implementation of RMS normalization with gating.

        Args:
            x: Input tensor
            z: Optional gating tensor

        Returns:
            Normalized (and optionally gated) tensor

        If z is not None:
            - norm_before_gate=True: out = norm(x) * silu(z)
            - norm_before_gate=False: out = norm(x * silu(z))
        NrC   rB   TrD   r   )	rearrangez... (g d) -> ... g d)dz... g d -> ... (g d))r   Fsilur   rK   rL   r   rM   r+   r	   einopsr   )r;   r   r   rO   x_normedr   r   x_groups           r   rT   zRMSNormGated.forward_native  s   $ =!6=AF1IIA ?"uuQxx}}T}::H5;x$(':;;;HT[(CC )(((((i#9T_MMMG{{1~~**r4*@@HX-@!A!AAH)H&<==KC =T2=q		/C
r   c           	      d    ddl m}  ||| j        | j        || j        | j        | j                  S )Nr   )
rmsnorm_fn)r   r+   r   r   )2vllm.model_executor.layers.fla.ops.layernorm_guardr   r	   r   r+   r   r   )r;   r   r   r   s       r   rX   zRMSNormGated.forward_cuda  sP     	RQQQQQzKI!2
 
 
 	
r   )r   NFNNr!   )rc   rd   re   rf   rg   rh   ri   r   r   r   r1   r   rk   rT   rX   rn   ro   s   @r   r   r   c  s<         !%!&&*$(       $J	 
   t#  {T!           @) ) ) 9=( (("',"5(	( ( ( (V 9=
 

"',"5
	
 
 
 
 
 
 
 
r   r   c                   D     e Zd ZdZddedef fdZdej        fdZ	 xZ
S )		LayerNormz
    Layer Normalization.
    r)   rE   r+   c                 @   t                                                       || _        || _        t	          j        t          j        |t          j                            | _	        t	          j        t          j
        |t          j                            | _        d S )Nr/   )r0   r1   rE   r+   r5   r6   r   r4   rH   r	   rt   r   )r;   rE   r+   r>   s      r   r1   zLayerNorm.__init__  so    l5:c#G#G#GHHLS!F!F!FGG			r   r   c                     t          j        |                                | j        f| j        | j        | j                                      |          S r!   )r   
layer_normrh   rE   r	   r   r+   type_as)r;   r   s     r   forwardzLayerNorm.forward  s>    |GGII{DKDH
 

'!**	r   r   )rc   rd   re   rf   rg   rh   r1   r   rk   r   rn   ro   s   @r   r   r     s         H HC He H H H H H H        r   r   )F)rf   r   torch.nnr5   torch.nn.functional
functionalr   vllm._aiter_opsr   vllm.model_executor.custom_opr   *vllm.model_executor.layers.batch_invariantr   r   vllm.platformsr   rk   rh   r   rl   r   r   ri   r   r&   registerr(   rr   r   Moduler    r   r   <module>r      s   # "                 * * * * * * 2 2 2 2 2 2        , + + + + +|"\=B
\   "|l L 	
 5<%&   *|"\16QV
\   " AF !&9=   ( :c c c c ch c c cN #$$Z0 Z0 Z0 Z0 Z08 Z0 Z0 %$Z0| #$$e
 e
 e
 e
 e
8 e
 e
 %$e
P    	     r   