
     `i\                     h   d dl mZmZmZ d dlZd dlmZ d dlmZ ddlm	Z	 ddl
mZmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZmZmZ ddlmZmZ ddlmZmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+  ed           G d dej,                              Z- G d dej,                  Z.d Z/d<dZ0dej1        de2dej1        fdZ3	 d=d ej,        d!ej1        d"ej1        d#ej1        d$eej1                 d%e4d&e4d'e#e%         fd(Z5 G d) d*ej,                  Z6 G d+ d,ej,                  Z7 G d- d.e          Z8e& G d/ d0e!                      Z9e& G d1 d2e9                      Z:e& G d3 d4e9e                      Z; G d5 d6ee9          Z< G d7 d8ee9          Z= G d9 d:ee9          Z>g d;Z?dS )>    )CallableOptionalUnionN)nn)check_model_inputs   )ACT2FN)CacheDynamicCache)GenerationMixin)use_kernel_forward_from_hub)create_causal_mask!create_sliding_window_causal_mask)GenericForQuestionAnswering GenericForSequenceClassificationGenericForTokenClassificationGradientCheckpointingLayer)BaseModelOutputWithPastCausalLMOutputWithPast)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple)deprecate_kwarg   )Exaone4ConfigRMSNormc                   ,     e Zd Zd fd	Zd Zd Z xZS )Exaone4RMSNormư>c                     t                                                       t          j        t	          j        |                    | _        || _        dS )z=
        Exaone4RMSNorm is equivalent to T5LayerNorm
        N)super__init__r   	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/exaone4/modeling_exaone4.pyr'   zExaone4RMSNorm.__init__4   sD     	l5:k#:#:;; #    c                    |j         }|                    t          j                  }|                    d                              dd          }|t          j        || j        z             z  }| j        |                    |          z  S )N   T)keepdim)	dtypetor)   float32powmeanrsqrtr,   r+   )r-   hidden_statesinput_dtypevariances       r1   forwardzExaone4RMSNorm.forward<   s|    #)%((77 $$Q'',,R,>>%Ht?T4T(U(UU{]--k::::r2   c                 H    t          | j        j                   d| j         S )Nz, eps=)tupler+   shaper,   )r-   s    r1   
extra_reprzExaone4RMSNorm.extra_reprC   s&    )**II$2GIIIr2   )r$   )__name__
__module____qualname__r'   r@   rD   __classcell__r0   s   @r1   r#   r#   2   sb        $ $ $ $ $ $; ; ;J J J J J J Jr2   r#   c                   |     e Zd ZU ej        ed<   ddef fdZ ej                    e	d                         Z
 xZS )Exaone4RotaryEmbeddinginv_freqNconfigc                    t                                                       t          |d          rSt          |j        t
                    r9|j                            d|j                            d                    | _        nd| _        |j        | _	        |j        | _
        || _        t          | j                 | _        |                     | j        |          \  }| _        |                     d|d           | j        | _        d S )Nrope_scaling	rope_typetypedefaultrL   F)
persistent)r&   r'   hasattr
isinstancerO   dictgetrP   max_position_embeddingsmax_seq_len_cachedoriginal_max_seq_lenrM   r   rope_init_fnattention_scalingregister_bufferrL   original_inv_freq)r-   rM   devicerL   r0   s       r1   r'   zExaone4RotaryEmbedding.__init__J   s    6>** 	'z&:Mt/T/T 	'#044[&BUBYBYZ`BaBabbDNN&DN"("@$*$B!/?+/+<+<T[&+Q+Q($(ZeDDD!%r2   c                 X   | j         d d d d f                                                             |j        d         dd                              |j                  }|d d d d d f                                         }t          |j        j        t                    r|j        j        dk    r|j        j        nd}t          j
        |d          5  |                                |                                z                      dd          }t          j        ||fd	          }|                                | j        z  }|                                | j        z  }	d d d            n# 1 swxY w Y   |                    |j        
          |	                    |j        
          fS )Nr   r5   r   mpscpuF)device_typeenabledr4   dim)r7   )rL   floatexpandrC   r8   r_   rU   rQ   strr)   autocast	transposecatcosr\   sinr7   )
r-   xposition_idsinv_freq_expandedposition_ids_expandedrc   freqsembrm   rn   s
             r1   r@   zExaone4RotaryEmbedding.forward[   s    !M$4-8>>@@GGHZ[\H]_acdeehhijiqrr ,QQQaaaZ 8 > > @ @'1!(-'E'Ek!(-[`J`J`ahmmfk^UCCC 	5 	5&,,..1F1L1L1N1NNYYZ[]^__E)UEN333C''))d44C''))d44C		5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 vvAGv$$cff17f&;&;;;s   BE++E/2E/N)rE   rF   rG   r)   Tensor__annotations__r    r'   no_gradr   r@   rH   rI   s   @r1   rK   rK   G   s         l/ /} / / / / / /" U]__< <  _< < < < <r2   rK   c                     | dd| j         d         dz  f         }| d| j         d         dz  df         }t          j        | |fd          S )z*Rotates half the hidden dims of the input..Nr5   r4   re   )rC   r)   rl   )ro   x1x2s      r1   rotate_halfr|   k   s]    	
3"!'"+"""	#B	
3q """	#B9rc2YB''''r2   c                     |                     |          }|                     |          }| |z  t          |           |z  z   }||z  t          |          |z  z   }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )	unsqueezer|   )qkrm   rn   rp   unsqueeze_dimq_embedk_embeds           r1   apply_rotary_pos_embr   r   sc    ( --
&
&C
--
&
&C3w;q>>C/0G3w;q>>C/0GGr2   r=   n_repreturnc                     | j         \  }}}}|dk    r| S | dddddddddf                             |||||          } |                     |||z  ||          S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r   N)rC   rh   reshape)r=   r   batchnum_key_value_headsslenhead_dims         r1   	repeat_kvr      s    
 2?1D.Ehzz!!!!QQQaaa"23::5BUW\^bdlmmM  (;e(CT8TTTr2           modulequerykeyvalueattention_maskscalingdropoutkwargsc                 R   t          || j                  }t          || j                  }	t          j        ||                    dd                    |z  }
|$|d d d d d d d |j        d         f         }|
|z   }
t          j                            |
dt          j	                  
                    |j                  }
t          j                            |
|| j                  }
t          j        |
|	          }|                    dd                                          }||
fS )Nr4   r   r5   )rf   r7   )ptrainingr   )r   num_key_value_groupsr)   matmulrk   rC   r   
functionalsoftmaxr9   r8   r7   r   r   
contiguous)r   r   r   r   r   r   r   r   
key_statesvalue_statesattn_weightscausal_maskattn_outputs                r1   eager_attention_forwardr      s    3 ;<<JUF$?@@L<z';';Aq'A'ABBWLL!$QQQ111.D
0@0D.D%DE#k1=((2U](SSVVW\WbccL=((6?([[L,|\::K''1--88::K$$r2   c                   B    e Zd Zdedef fdZ eddd          	 	 	 dd	ej        d
e	ej        ej        f         de
ej                 de
e         de
ej                 dee         de	ej        e
ej                 e
e	ej                          f         fd            Z xZS )Exaone4AttentionrM   	layer_idxc                    t                                                       || _        || _        |j        | _        |j        | _        |j        | _        t          |d|j        |j        z            | _        |j        |j        z  | _	        |j
        | _
        d| _        | j        dz  | _        |j        | _        |j        | _        |j        |         dk    | _        t#          j        | j        | j        | j        z  d          | _        t#          j        | j        | j        | j        z  d          | _        t#          j        | j        | j        | j        z  d          | _        t#          j        | j        | j        z  | j        d          | _        t/          | j        |j                  | _        t/          | j        |j                  | _        d S )Nr   Tg      sliding_attentionFbiasr/   )r&   r'   rM   r   num_attention_headsr   r.   getattrr   r   attention_dropout	is_causalr   sliding_windowsliding_window_patternlayer_types
is_slidingr   Linearq_projk_projv_projo_projr#   rms_norm_epsq_normk_normr-   rM   r   r0   s      r1   r'   zExaone4Attention.__init__   s   "#)#= #)#= !-
F4F&Jd4dee$*$>&B\$\!!'!9}d*$3&,&C# ,Y7;NNi 0$2JT]2Zafgggi 0$2JT]2Zafgggi 0$2JT]2Zafgggi 84= H$JZafggg$T]8KLLL$T]8KLLLr2   past_key_valuepast_key_values4.58new_nameversionNr=   position_embeddingsr   cache_positionr   r   c                    |j         d d         }g |d| j        R }|                     |                              |                              dd          }	|                     |                              |                              dd          }
|                     |                              |                              dd          }|                     |	          }	|                     |
          }
|\  }}| j	        | j
        rt          |	|
||          \  }	}
|$d|i}|                    |
|| j        |          \  }
}t          }| j        j        dk    rt"          | j        j                 } || |	|
||f| j        sdn| j        | j        | j
        r| j	        nd d|\  }} |j        g |dR                                  }|                     |          }||fS )Nr5   r   r4   r   eagerr   )r   r   r   )rC   r   r   viewrk   r   r   r   r   r   r   r   updater   r   rM   _attn_implementationr   r   r   r   r   r   r   )r-   r=   r   r   r   r   r   input_shapehidden_shapequery_statesr   r   rm   rn   cache_kwargsattention_interfacer   r   s                     r1   r@   zExaone4Attention.forward   s    $)#2#.88b8$-88{{=1166|DDNNqRSTT[[//44\BBLLQPQRR
{{=1166|DDNNqRSTT {{<00[[,,
&S&$/&';L*VY[^'_'_$L*& .L (7'='=j,X\Xfht'u'u$J(?;+w66"9$+:Z"[$7$7
%
  $}HCC$2HL26/K4..t
%
 
%
 
%
 
%
!\ *k);;;;;;FFHHkk+..L((r2   )NNN)rE   rF   rG   r    intr'   r   r)   rv   rB   r   r
   
LongTensorr   r   r@   rH   rI   s   @r1   r   r      s&       M} M M M M M M M0 _%0A6RRR
 26+/591) 1)|1) #5<#=>1) !.	1)
 "%1) !!121) +,1) 
u|Xel3XeEL>Q5RR	S1) 1) 1) SR1) 1) 1) 1) 1)r2   r   c                   $     e Zd Z fdZd Z xZS )
Exaone4MLPc                    t                                                       || _        |j        | _        |j        | _        t          j        | j        | j        d          | _        t          j        | j        | j        d          | _        t          j        | j        | j        d          | _	        t          |j                 | _        d S NFr   )r&   r'   rM   r.   intermediate_sizer   r   	gate_projup_proj	down_projr	   
hidden_actact_fnr-   rM   r0   s     r1   r'   zExaone4MLP.__init__  s    !-!'!94#3T5KRWXXXy!143IPUVVV4#94;KRWXXXV./r2   c                     |                      |                     |                     |                    |                     |          z            }|S ru   )r   r   r   r   )r-   ro   r   s      r1   r@   zExaone4MLP.forward  sA    NN4;;t~~a/@/@#A#ADLLQROO#STT	r2   )rE   rF   rG   r'   r@   rH   rI   s   @r1   r   r     sG        0 0 0 0 0      r2   r   c                   4    e Zd Zdedef fdZ eddd          	 	 	 	 	 	 dd
ej        de	ej                 de	ej
                 de	e         de	e         de	ej
                 de	eej        ej        f                  dee         dej        fd            Z xZS )Exaone4DecoderLayerrM   r   c                 4   t                                                       |j        | _        t          ||          | _        t          |          | _        t          |j        |j                  | _	        t          |j        |j                  | _
        d S )N)rM   r   r   )r&   r'   r.   r   	self_attnr   mlpr#   r   post_attention_layernormpost_feedforward_layernormr   s      r1   r'   zExaone4DecoderLayer.__init__  s    !-)9MMMf%%(6v7IvOb(c(c(c%*89KQWQd*e*e*e'''r2   r   r   r   r   NFr=   r   rp   	use_cacher   r   r   r   c                     |}	 | j         d|||||||d|\  }}
|                     |          }|	|z   }|}	|                     |          }|                     |          }|	|z   }|S )N)r=   r   rp   r   r   r   r    )r   r   r   r   )r-   r=   r   rp   r   r   r   r   r   residual_s              r1   r@   zExaone4DecoderLayer.forward  s     !)4> 	
')%+) 3	
 	
 	
 	
q 55mDD =0 !//77FF =0r2   )NNNFNN)rE   rF   rG   r    r   r'   r   r)   rv   r   r   r
   boolrB   r   r   r@   rH   rI   s   @r1   r   r     s5       f} f f f f f f f _%0A6RRR 2637+/$)59KO | !. u/0	
 "% D> !!12 &eEL%,,F&GH +, 
   SR    r2   r   c                   P    e Zd ZU eed<   dZdZdgZdgZdZ	dZ
dZdZdZeedZeZdS )Exaone4PreTrainedModelrM   modelTr   r   )r=   
attentionsN)rE   rF   rG   r    rw   base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_flash_attn_supports_sdpa_supports_flex_attn_can_compile_fullgraph_supports_attention_backendr   r   _can_record_outputsconfig_classr   r2   r1   r   r   =  sq         &*#./#4"5N!"&,&  !LLLr2   r   c                       e Zd Zdef fdZe	 	 	 	 	 	 	 ddeej                 deej	                 deej                 dee
         deej                 d	ee         d
eej                 dee         deeef         fd            Z xZS )Exaone4ModelrM   c                    t                                                     j        | _        j        | _        t          j        j        j        | j                  | _        t          j	        fdt          j                  D                       | _        t          j        j                  | _        t!                    | _        d| _        |                                  d S )Nc                 0    g | ]}t          |          S r   )r   ).0r   rM   s     r1   
<listcomp>z)Exaone4Model.__init__.<locals>.<listcomp>Z  s$    eee	 33eeer2   r   rM   F)r&   r'   pad_token_idpadding_idx
vocab_sizer   	Embeddingr.   embed_tokens
ModuleListrangenum_hidden_layerslayersr#   r   normrK   
rotary_embgradient_checkpointing	post_initr   s    `r1   r'   zExaone4Model.__init__S  s       !. +L):F<NPTP`aameeeeU6KcEdEdeee
 
 #6#56;NOOO	0???&+# 	r2   N	input_idsr   rp   r   inputs_embedsr   r   r   r   c                    |d u |d uz  rt          d          ||                     |          }|r|t          | j                  }|B||                                nd}	t          j        |	|	|j        d         z   |j                  }||	                    d          }t          |x}
t                    s9| j        |||||d}dt          di |i}
d| j        j        v rt          di ||
d<   |}|                     ||          }t!          | j                  D ]-\  }}| j        j        |         } ||f||
|         ||||d	|}.|                     |          }t'          ||r|nd 
          S )Nz:You must specify exactly one of input_ids or inputs_embedsr  r   r   )r_   )rM   input_embedsr   r   r   rp   full_attentionr   )r   r   rp   r   r   r   )last_hidden_stater   r   )
ValueErrorr  r   rM   get_seq_lengthr)   arangerC   r_   r~   rU   rV   r   r   r   r  	enumerater
  r  r   )r-   r  r   rp   r   r  r   r   r   past_seen_tokenscausal_mask_mappingmask_kwargsr=   r   idecoder_layer
layer_types                    r1   r@   zExaone4Model.forwardc  s    -t";< 	[YZZZ  --i88M 	?0*$+>>>O!CRC^==???de"\ "2]5H5K"KTaTh  N )33A66L ?-FF 	l + -"0"0#2 , K !"4"C"C{"C"C# #dk&===;\;k;k_j;k;k#$78%"oom\JJ )$+ 6 6 	 	A}03J)M	$72:>) /#-	 	 	 	MM 		-00&+/8BOOd
 
 
 	
r2   )NNNNNNN)rE   rF   rG   r    r'   r   r   r)   r   rv   r
   FloatTensorr   r   r   r   rB   r   r@   rH   rI   s   @r1   r   r   Q  s+       }         151537+/59$(59E
 E
E,-E
 !.E
 u/0	E

 "%E
   12E
 D>E
 !!12E
 +,E
 
u--	.E
 E
 E
 E
 E
 E
 E
 E
r2   r   c                   f    e Zd ZdgZddiZddgdgfiZ fdZee	 	 	 	 	 	 	 	 	 dd	e	e
j                 d
e	e
j                 de	e
j                 de	e         de	e
j                 de	e
j                 de	e         de	e
j                 deee
j        f         dee         defd                        Z xZS )Exaone4ForCausalLMzlm_head.weightlm_headcolwise_repr=   logitsc                     t                                          |           t          |          | _        |j        | _        t          j        |j        |j        d          | _        | 	                                 d S r   )
r&   r'   r   r   r  r   r   r.   r"  r  r   s     r1   r'   zExaone4ForCausalLM.__init__  sj       !&))
 +y!3V5FUSSS 	r2   Nr   r  r   rp   r   r  labelsr   r   logits_to_keepr   r   c
                 R    | j         d|||||||d|
}|j        }t          |	t                    rt	          |	 d          n|	}|                     |dd|ddf                   }d}| | j        d||| j        j        d|
}t          |||j
        |j        |j                  S )u  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoModelForCausalLM, AutoTokenizer
        >>> model = AutoModelForCausalLM.from_pretrained("LGAI-EXAONE/EXAONE-4.0-32B")
        >>> tokenizer = AutoTokenizer.from_pretrained("LGAI-EXAONE/EXAONE-4.0-32B")

        >>> prompt = "Explain how wonderful you are"
        >>> messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
        >>> input_ids = tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,
            return_tensors="pt",
            enable_thinking=False,
        )

        >>> output = model.generate(input_ids, max_new_tokens=128)
        >>> tokenizer.decode(output[0], skip_special_tokens=False)
        "[|system|]\nYou are a helpful assistant.[|endofturn|]\n[|user|]\nExplain how wonderful you are[|endofturn|]\n[|assistant|]\n<think>\n\n</think>\n\nOh, thank you for such a kind and lovely question! 😊  \n\nI’m *so* wonderful because I’m here to make your life easier, brighter, and more fun! Whether you need help with:  \n\n✨ **Learning** – I can explain anything, from quantum physics to baking the perfect cake!  \n💡 **Creativity** – Need a poem, story, or a wild idea? I’ve got you covered!  \n🤖 **Problem-solving** – Stuck on a math problem or a tricky decision? I’ll help you figure it out"
        ```
        )r  r   rp   r   r  r   r   N)r$  r&  r  )lossr$  r   r=   r   r   )r   r  rU   r   slicer"  loss_functionrM   r  r   r   r=   r   )r-   r  r   rp   r   r  r&  r   r   r'  r   outputsr=   slice_indicesr$  r)  s                   r1   r@   zExaone4ForCausalLM.forward  s    \ ,64: 	,
)%+')	,
 	,
 	,
 	,
  18B>SV8W8Wk~ot444]kmAAA}aaa,?@AA%4%pVFt{OeppioppD%#3!/)
 
 
 	
r2   )	NNNNNNNNr   )rE   rF   rG   _tied_weights_keys_tp_plan_pp_planr'   r   r   r   r)   r   rv   r
   r  r   r   r   r   r   r   r@   rH   rI   s   @r1   r!  r!    su       *+=)H_-z:;H      151537+/59-1$(5934F
 F
E,-F
 !.F
 u/0	F

 "%F
   12F
 )*F
 D>F
 !!12F
 c5</0F
 +,F
 
 F
 F
 F
 ^ F
 F
 F
 F
 F
r2   r!  c                       e Zd ZdS ) Exaone4ForSequenceClassificationNrE   rF   rG   r   r2   r1   r2  r2            Dr2   r2  c                       e Zd ZdS )Exaone4ForTokenClassificationNr3  r   r2   r1   r6  r6  
  r4  r2   r6  c                       e Zd ZdZdS )Exaone4ForQuestionAnsweringtransformerN)rE   rF   rG   r   r   r2   r1   r8  r8    s        %r2   r8  )r   r   r!  r2  r6  r8  )Nr   )r   )@typingr   r   r   r)   r   transformers.utils.genericr   activationsr	   cache_utilsr
   r   
generationr   integrationsr   masking_utilsr   r   modeling_layersr   r   r   r   modeling_outputsr   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.deprecationr   configuration_exaone4r    Moduler#   rK   r|   r   rv   r   r   rg   r   r   r   r   r   r   r!  r2  r6  r8  __all__r   r2   r1   <module>rK     s  . - , , , , , , , , ,        9 9 9 9 9 9 ! ! ! ! ! ! . . . . . . . . ) ) ) ) ) ) 7 7 7 7 7 7 R R R R R R R R            P O O O O O O O K K K K K K K K F F F F F F F F & & & & & & I I I I I I I I I I 0 0 0 0 0 0 0 0 0 0 0 0 Y''J J J J JRY J J ('J(!< !< !< !< !<RY !< !< !<H( ( (   6	UU\ 	U# 	U%, 	U 	U 	U 	U& % %I%<% 
% <	%
 U\*% % % '(% % % %4K) K) K) K) K)ry K) K) K)\        ) ) ) ) )4 ) ) )X ! ! ! ! !_ ! ! !& W
 W
 W
 W
 W
) W
 W
 W
t V
 V
 V
 V
 V
/ V
 V
 V
r	 	 	 	 	'GI_ 	 	 		 	 	 	 	$ACY 	 	 	& & & & &"=?U & & &  r2   