
    Pi                     L    d dl mZ d dlZd dlmZ  G d dej                  ZdS )    )OptionalN)nnc            	            e Zd ZdZ	 	 ddedededdf fd	Zd
 ZddeddfdZ	 ddej	        de
ej	                 dej	        fdZ xZS )Phi3RotaryPositionalEmbeddingsa|  
    RoPE Embeddings used in the Phi3 model.
    Ref: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct

    This class is not numerically equivalent to the RoPE Embedding module
    used by Llama2 and Llama3.

    Args:
        dim (int): Embedding dimension. This is usually set to the dim of each
            head in the attention module computed as ``embed_dim`` // ``num_heads``
        max_seq_len (int): Maximum expected sequence length for the
            model, if exceeded the cached freqs will be recomputed
        base (int): The base for the geometric progression used to compute
            the rotation angles
       '  dimmax_seq_lenbasereturnNc                     t                                                       || _        || _        || _        |                                  d S N)super__init__r	   r   r
   	rope_init)selfr	   r
   r   	__class__s       ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/models/phi3/_position_embeddings.pyr   z'Phi3RotaryPositionalEmbeddings.__init__   sG     		&    c                 
   d| j         t          j        d| j        d          d | j        dz                                           | j        z  z  z  }|                     d|d           |                     | j                   d S )Ng      ?r      thetaF
persistent)r   torcharanger	   floatregister_bufferbuild_rope_cacher
   )r   r   s     r   r   z(Phi3RotaryPositionalEmbeddings.rope_init+   s    IQ!,,->A->?EEGG$(RT
 	We>>>d./////r   c                    t          j        || j        j        | j        j                  }t          j        d|| j                                                  }t          j        ||gd          }t          j        |                                |	                                gd          }| 
                    d|d           d S )N)dtypedevicez
i, j -> ijr	   cacheFr   )r   r   r   r!   r"   einsumr   catcossinr   )r   r
   seq_idx	idx_thetafreqsr%   s         r   r   z/Phi3RotaryPositionalEmbeddings.build_rope_cache3   s    ,tz/
8I
 
 
 Lw
CCIIKK	
 	9i0b999	599;;		4"===We>>>>>r   x	input_posc                    |                     d          }|                     d          }|| j        d|         n| j        |         }|                    d|d|dz            }|dd|f         }|d|df         }|dd|j        d         dz  f         }|d|j        d         dz  df         }	t	          j        |	 |fd          }
||z  |
|z  z   }|                    |          S )ae  
        Args:
            x (torch.Tensor): input tensor with shape
                [b, s, n_h, h_d]
            input_pos (Optional[torch.Tensor]): Optional tensor which contains the position ids
                of each token. During training, this is used to indicate the positions
                of each token relative to its sample when packed, shape [b, s].
                During inference, this indicates the position of the current token.
                If none, assume the index of the token is its position id. Default is None.

        Returns:
            Tensor: output tensor with RoPE applied

        Notation used for tensor shapes:
            - b: batch size
            - s: sequence length
            - n_h: num heads
            - h_d: head dim

        TODO: The implementation below can be made more efficient
        for inference.
           r#   Nr   .r$   )sizer%   viewshaper   r'   type_as)r   r-   r.   seq_lenhead_dim
rope_cacher(   r)   x1x2rotatedx_outs               r   forwardz&Phi3RotaryPositionalEmbeddings.forwardD   s   4 &&))66"::
 %.$5DJxx  4:i;P 	  __R!X\BB
 ixi(hii(s&agbkQ&&&'sAGBK1$&&&')bS"I2... SWs]+}}Qr   )r   r   )r   r   )__name__
__module____qualname____doc__intr   r   r   r   Tensorr   r<   __classcell__)r   s   @r   r   r      s         &  	
 

 
 	

 

 
 
 
 
 
0 0 0? ?C ?4 ? ? ? ?$ DH3  3 3 *25<*@3 	3  3  3  3  3  3  3  3 r   r   )typingr   r   r   Moduler    r   r   <module>rG      st                i  i  i  i  i RY i  i  i  i  i r   