
    Pi                     L    d dl mZ d dlZd dlmZ  G d dej                  ZdS )    )OptionalN)nnc            	            e Zd ZdZ	 	 ddedededdf fd	Zd
 ZddeddfdZ	 dde	j
        dee	j
                 de	j
        fdZ xZS )Qwen2RotaryPositionalEmbeddingsau  
    RoPE Embeddings used in the Qwen2 model.
    Ref: https://huggingface.co/Qwen/Qwen2-7B-Instruct

    This class is not numerically equivalent to the RoPE Embedding module
    used by Llama2 and Llama3.

    Args:
        dim (int): Embedding dimension. This is usually set to the dim of each
            head in the attention module computed as ``embed_dim`` // ``num_heads``
        max_seq_len (int): Maximum expected sequence length for the
            model, if exceeded the cached freqs will be recomputed
        base (float): The base for the geometric progression used to compute
            the rotation angles
           .Adimmax_seq_lenbasereturnNc                     t                                                       || _        || _        || _        |                                  d S N)super__init__r	   r   r
   	rope_init)selfr	   r
   r   	__class__s       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/models/qwen2/_positional_embeddings.pyr   z(Qwen2RotaryPositionalEmbeddings.__init__   sG     		&    c                 
   d| j         t          j        d| j        d          d | j        dz                                           | j        z  z  z  }|                     d|d           |                     | j                   d S )Ng      ?r      thetaF
persistent)r   torcharanger	   floatregister_bufferbuild_rope_cacher
   )r   r   s     r   r   z)Qwen2RotaryPositionalEmbeddings.rope_init+   s    IQ!,,->A->?EEGG$(RT
 	We>>>d./////r   c                    t          j        || j        j        | j        j                  }t          j        d|| j                                                  }t          j        ||gd          }t          j        |                                |	                                gd          }| 
                    d|d           d S )N)dtypedevicez
i, j -> ijr	   cacheFr   )r   r   r   r!   r"   einsumr   catcossinr   )r   r
   seq_idx	idx_thetafreqsr%   s         r   r   z0Qwen2RotaryPositionalEmbeddings.build_rope_cache3   s    ,tz/
8I
 
 
 Lw
CCIIKK	
 	9i0b999	599;;		4"===We>>>>>r   x	input_posc                 2   |                     d          }|                     d          }|| j        d|         n| j        |         }|                    d|d|dz            }|dd|f                             |j                  }|d|df                             |j                  }|dd|j        d         dz  f         }|d|j        d         dz  df         }	t          j        |	 |fd          }
||z  |
|z  z   }|                    |          S )ae  
        Args:
            x (torch.Tensor): input tensor with shape
                [b, s, n_h, h_d]
            input_pos (Optional[torch.Tensor]): Optional tensor which contains the position ids
                of each token. During training, this is used to indicate the positions
                of each token relative to its sample when packed, shape [b, s].
                During inference, this indicates the position of the current token.
                If none, assume the index of the token is its position id. Default is None.

        Returns:
            Tensor: output tensor with RoPE applied

        Notation used for tensor shapes:
            - b: batch size
            - s: sequence length
            - n_h: num heads
            - h_d: head dim

        TODO: The implementation below can be made more efficient
        for inference.
           r#   Nr   .r$   )	sizer%   viewtor!   shaper   r'   type_as)r   r-   r.   seq_lenhead_dim
rope_cacher(   r)   x1x2rotatedx_outs               r   forwardz'Qwen2RotaryPositionalEmbeddings.forwardD   s+   4 &&))66"::
 %.$5DJxx  4:i;P 	  __R!X\BB
 ixi(++AG44hii(++AG44s&agbkQ&&&'sAGBK1$&&&')bS"I2... SWs]+}}Qr   )r   r   )r   r   )__name__
__module____qualname____doc__intr   r   r   r   r   Tensorr   r=   __classcell__)r   s   @r   r   r      s         &  !	
 

 
 	

 

 
 
 
 
 
0 0 0? ?C ?4 ? ? ? ?$ DH3  3 3 *25<*@3 	3  3  3  3  3  3  3  3 r   r   )typingr   r   r   Moduler    r   r   <module>rH      st                i  i  i  i  i bi i  i  i  i  i r   