
    Pi                     v   d dl Z d dlmZ d dlmZ d dlmZ  G d de j        j                  Z G d dej                  Z	 G d d	ej                  Z
 G d
 dej                  Zedk    rK ed          Z edde          Z e j        ddd          Z ee          Z eej                   dZdS dS )    N)	rearrange)RotaryPositionalEmbeddingsc                   0     e Zd Zddedef fdZd Z xZS )RMSNormư>dimepsc                     t                                                       || _        t          j        t          j        |                    | _        dS )z<https://github.com/meta-llama/llama/blob/main/llama/model.pyN)super__init__r	   nn	Parametertorchonesweight)selfr   r	   	__class__s      i/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/neucodec/bs_roformer5.pyr   zRMSNorm.__init__   s=    l5:c??33    c                     t          j        |dz  dd          }|t          j        || j        z             z  | j        z  }|S )N   T)r   keepdim)r   meanrsqrtr	   r   )r   xnorm_xoutputs       r   forwardzRMSNorm.forward   sC    AqDb$777U[$(!2333dkAr   )r   )__name__
__module____qualname__intfloatr   r   __classcell__r   s   @r   r   r      s_        4 4C 4e 4 4 4 4 4 4      r   r   c                   .     e Zd Zdeddf fdZd Z xZS )MLPr   returnNc                     t                                                       t          j        |d|z  d          | _        t          j                    | _        t          j        d|z  |d          | _        d S )N   Fbias)r   r   r   Linearfc1SiLUsilufc2)r   r   r   s     r   r   zMLP.__init__   sb    9S!c'666GII	9QWc666r   c                     |                      |          }|                     |          }|                     |          }|S N)r/   r1   r2   r   r   s     r   r   zMLP.forward   s4    HHQKKIIaLLHHQKKr   )r    r!   r"   r#   r   r   r%   r&   s   @r   r(   r(      sZ        7C 7D 7 7 7 7 7 7      r   r(   c                   2     e Zd Zdededef fdZd Z xZS )	Attentionr   n_headsrotary_embedc                 h   t                                                       ||z  dk    sJ || _        || _        || _        t          t          j        j        d          | _	        | j	        s
J d            t          j
        |d|z  d          | _        t          j
        ||d          | _        d S )Nr   scaled_dot_product_attentionzMust have flash attention.   Fr,   )r   r   r8   r   r9   hasattrr   r   
functionalflashr.   c_attnc_projr   r   r8   r9   r   s       r   r   zAttention.__init__$   s     	W}!!!!(UX02PQQ
z77777ziQW5999iSu555r   c                    |                                 \  }}}t          |                     |          dd| j                  \  }}}|                     |          }|                     |          }| j        r*t          j        j        	                    |||ddd          }t          |d          }| 
                    |          }|S )	z
        Args:
            x: (b, t, h*d)

        Constants:
            b: batch_size
            t: time steps
            r: 3
            h: heads_num
            d: heads_dim
        zb t (r h d) -> r b h t dr<   )rhNr   F)	attn_mask	dropout_p	is_causalzb h t d -> b t (h d))sizer   r@   r8   r9   r?   r   r   r>   r;   rA   )	r   r   BTCqkvys	            r   r   zAttention.forward5   s     &&((1aKKNN6!t|
 
 
1a
 a  a  : 	#@@1a41 A  A a/00KKNN r   )r    r!   r"   r#   r   r   r   r%   r&   s   @r   r7   r7   #   sd        66!$64N6 6 6 6 6 6"             r   r7   c                   B     e Zd Zdededef fdZdej        fdZ xZ	S )TransformerBlockr   r8   r9   c                    t                                                       || _        || _        t	          |          | _        t	          |          | _        t          |||          | _        t          |          | _
        d S )Nr   r8   r9   r   )r   r   r   r8   r   att_normffn_normr7   attr(   mlprB   s       r   r   zTransformerBlock.__init__Y   sl     	gLQQQ3<<<r   r   c                     ||                      |                     |                    z   }||                     |                     |                    z   }|S r4   )rX   rV   rY   rW   r5   s     r   r   zTransformerBlock.forwarde   sM     q))***q))***r   )
r    r!   r"   r#   r   r   r   Tensorr   r%   r&   s   @r   rR   rR   X   ss        
 
 !$
 4N
  
  
  
  
  
 <       r   rR   __main__   rU   i      rT   r      )r   torch.nnr   einopsr   torchtune.modulesr   Moduler   r(   r7   rR   r    rotary_embed_128transformer_blockrandnr   rP   printshapec r   r   <module>rk      s                8 8 8 8 8 8
 
 
 
 
eho 
 
 
    ")   2 2 2 2 2	 2 2 2j    ry   , z11c:::((!*:   	AsD!!A!A	E!'NNN	AAA r   