
     `i                     (   d dl mZ d dlZd dlmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZ dd	lmZ  ej        e          Zd
Z G d de          Zd ZddZ G d de          Z G d de	          Z G d de
          Z G d de          Zg dZdS )    )OptionalN   )logging   )LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification)Phi3MLP   )	GlmConfigzTHUDM/glm-4-9bc                       e Zd ZdS )GlmMLPN__name__
__module____qualname__     w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/glm/modular_glm.pyr   r   %           Dr   r   c                     | ddddf         }| ddddf         }t          j        | |fd                              d          S )	z*Rotates half the hidden dims of the input..r   Nr   r   dim)torchstackflatten)xx1x2s      r   rotate_halfr#   )   sQ    	
319B	
319B;Ryb)))11"555r   c                 T   |                     |          }|                     |          }|dd|j        d         dz  f                             dd          }|dd|j        d         dz  f                             dd          }|j        d         }| dd|f         | d|df         }}|dd|f         |d|df         }
}	||z  t          |          |z  z   }|	|z  t          |	          |z  z   }t	          j        ||gd          }t	          j        ||
gd          }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    .Nr   r   r   )	unsqueezeshaperepeat_interleaver#   r   cat)qkcossinposition_idsunsqueeze_dim
rotary_dimq_rotq_passk_rotk_passq_embedk_embeds                r   apply_rotary_pos_embr6   0   s\   ( --
&
&C
--
&
&C c'SYr]a'''
(
:
:1"
:
E
EC
c'SYr]a'''
(
:
:1"
:
E
EC 2Jc;J;&'3
+;)<6Ec;J;&'3
+;)<6E s{{511C78Gs{{511C78G i&)r222Gi&)r222GGr   c                   6     e Zd Zddedee         f fdZ xZS )GlmAttentionNconfig	layer_idxc                     t                                          ||           t          j        |j        | j        z  |j        d          | _        d S )NF)bias)super__init__nnLinearnum_attention_headshead_dimhidden_sizeo_proj)selfr9   r:   	__class__s      r   r>   zGlmAttention.__init__[   sG    +++i :T] JFL^ejkkkr   )N)r   r   r   r   r   intr>   __classcell__)rF   s   @r   r8   r8   Z   sa        l ly lXc] l l l l l l l l l lr   r8   c                       e Zd ZdS )GlmForCausalLMNr   r   r   r   rJ   rJ   `   r   r   rJ   c                       e Zd ZdS )GlmForSequenceClassificationNr   r   r   r   rL   rL   d   r   r   rL   c                       e Zd ZdS )GlmForTokenClassificationNr   r   r   r   rN   rN   h   r   r   rN   )GlmPreTrainedModelGlmModelrJ   rL   rN   )Nr   )typingr   r   torch.nnr?   utilsr   llama.modeling_llamar   r   r	   r
   phi3.modeling_phi3r   configuration_glmr   
get_loggerr   logger_CHECKPOINT_FOR_DOCr   r#   r6   r8   rJ   rL   rN   __all__r   r   r   <module>r[      s                                  ) ( ( ( ( ( ( ( ( ( ( ( 
	H	%	%& 	 	 	 	 	W 	 	 	6 6 6' ' ' 'Tl l l l l> l l l	 	 	 	 	% 	 	 		 	 	 	 	#A 	 	 		 	 	 	 	 ; 	 	 	  r   