
     `i_                        d dl Z d dlmZ d dlZd dlmZ ddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZmZ d	d
lmZ  ej        e          Z G d dej                  Z G d de          Z G d de          Zd Zd"dZ G d de          Z G d de          Z  G d de          Z! G d de!e          Z" G d de	          Z# G d de
          Z$ G d d e          Z%g d!Z&dS )#    N)Optional   )logging   )GemmaForCausalLMGemmaForSequenceClassificationGemmaForTokenClassification)GraniteAttention)LlamaDecoderLayerLlamaMLP
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbedding   )HeliumConfigc                   ,     e Zd Zd fd	Zd Zd Z xZS )HeliumRMSNormư>c                     t                                                       t          j        t	          j        |                    | _        || _        d S N)super__init__nn	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/helium/modular_helium.pyr   zHeliumRMSNorm.__init__!   sB    l5:k#:#:;; #    c                 T   |j         }|                    t          j                  }|                    d                              dd          }|t          j        || j        z             z  }| j                            t          j                  |z                      |          S )Nr   T)keepdim)	dtypetor   float32powmeanrsqrtr   r   )r   hidden_statesinput_dtypevariances       r#   forwardzHeliumRMSNorm.forward&   s    #)%((77 $$Q'',,R,>>%Ht?T4T(U(UUu}--=AA+NNNr$   c                 H    t          | j        j                   d| j         S )Nz, eps=)tupler   shaper   )r   s    r#   
extra_reprzHeliumRMSNorm.extra_repr-   s&    )**II$2GIIIr$   )r   )__name__
__module____qualname__r   r1   r5   __classcell__r"   s   @r#   r   r       se        $ $ $ $ $ $
O O OJ J J J J J Jr$   r   c                       e Zd ZdS )HeliumRotaryEmbeddingNr6   r7   r8    r$   r#   r<   r<   1           Dr$   r<   c                       e Zd ZdS )	HeliumMLPNr=   r>   r$   r#   rA   rA   5   r?   r$   rA   c                     | ddddf         }| ddddf         }t          j        | |fd                              d          S )	z*Rotates half the hidden dims of the input..r   Nr   r   r&   dim)r   stackflatten)xx1x2s      r#   rotate_halfrK   9   sQ    	
319B	
319B;Ryb)))11"555r$   c                 z   |                     |          }|                     |          }|dd|j        d         dz  f                             dd          }|dd|j        d         dz  f                             dd          }| |z  t          |           |z  z   }||z  t          |          |z  z   }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    .Nr&   r   rC   )	unsqueezer4   repeat_interleaverK   )qkcossinposition_idsunsqueeze_dimq_embedk_embeds           r#   apply_rotary_pos_embrW   @   s    ( --
&
&C
--
&
&C c'SYr]a'''
(
:
:1"
:
E
EC
c'SYr]a'''
(
:
:1"
:
E
EC3w;q>>C/0G3w;q>>C/0GGr$   c                   6     e Zd Zddedee         f fdZ xZS )HeliumAttentionNconfig	layer_idxc                     t                                          ||           t          j        |j        |j        d          | _        dt          j        | j                  z  | _	        d S )NF)biasr   )
r   r   r   Linearr    o_projmathsqrthead_dimscalingr   rZ   r[   r"   s      r#   r   zHeliumAttention.__init__b   sW    +++i 2F4FUSSS49T]333r$   r   r6   r7   r8   r   r   intr   r9   r:   s   @r#   rY   rY   a   sT        4 4| 4 4 4 4 4 4 4 4 4 4 4r$   rY   c                   6     e Zd Zddedee         f fdZ xZS )HeliumDecoderLayerNrZ   r[   c                     t                                          ||           t          |          | _        t	          |j        |j                  | _        t	          |j        |j                  | _        d S )Nr!   )	r   r   rA   mlpr   r    rms_norm_epsinput_layernormpost_attention_layernormrd   s      r#   r   zHeliumDecoderLayer.__init__i   sh    +++V$$,V-?VEXYYY(5f6HfNa(b(b(b%%%r$   r   re   r:   s   @r#   rh   rh   h   sa        c c| c c c c c c c c c c cr$   rh   c                       e Zd ZdS )HeliumPreTrainedModelNr=   r>   r$   r#   rp   rp   q   r?   r$   rp   c                   $     e Zd Zdef fdZ xZS )HeliumModelrZ   c                 X   t                                                     t          j        fdt	          j                  D                       | _        t          j        j	                  | _
        t                    | _        d| _        |                                  d S )Nc                 0    g | ]}t          |          S r>   )rh   ).0r[   rZ   s     r#   
<listcomp>z(HeliumModel.__init__.<locals>.<listcomp>y   s$    dddy	22dddr$   rj   F)r   r   r   
ModuleListrangenum_hidden_layerslayersr   r    rl   normr<   
rotary_embgradient_checkpointing	post_init)r   rZ   r"   s    `r#   r   zHeliumModel.__init__v   s       mddddE&JbDcDcddd
 
 "&"4&:MNNN	/77&+# 	r$   )r6   r7   r8   r   r   r9   r:   s   @r#   rr   rr   u   sD        
| 
 
 
 
 
 
 
 
 
 
r$   rr   c                       e Zd ZdS )HeliumForCausalLMNr=   r>   r$   r#   r   r      r?   r$   r   c                       e Zd ZdS )HeliumForSequenceClassificationNr=   r>   r$   r#   r   r      r?   r$   r   c                       e Zd ZdS )HeliumForTokenClassificationNr=   r>   r$   r#   r   r      r?   r$   r   )rp   rr   r   r   r   )Nr   )'r`   typingr   r   torch.nnr   utilsr   gemma.modeling_gemmar   r   r	   granite.modeling_graniter
   llama.modeling_llamar   r   r   r   r   configuration_heliumr   
get_loggerr6   loggerModuler   r<   rA   rK   rW   rY   rh   rp   rr   r   r   r   __all__r>   r$   r#   <module>r      s                        p p p p p p p p p p 7 7 7 7 7 7 v v v v v v v v v v v v v v . . . . . . 
	H	%	%J J J J JBI J J J"	 	 	 	 	0 	 	 		 	 	 	 	 	 	 	6 6 6   B4 4 4 4 4& 4 4 4c c c c c* c c c	 	 	 	 	0 	 	 	    '   	 	 	 	 	( 	 	 		 	 	 	 	&D 	 	 		 	 	 	 	#> 	 	 	  r$   