
     `i%                        d dl mZmZ d dlZd dlmZ d dlmZ ddlmZm	Z	 ddl
mZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/  ej0        e1          Z2 G d de(          Z3 G d de"          Z4 ej5         e                        ej5        d          k    r G d dej6                  Z7n$ ed           G d dej8                              Z7 G d d e#          Z9 G d! d"e)          Z: G d# d$e-          Z; G d% d&e$          Z< G d' d(e&          Z= G d) d*e'          Z> G d+ d,e%          Z?g d-Z@dS ).    )CallableOptionalN)version)nn   )CacheDynamicCache)use_kernel_forward_from_hub)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)deprecate_kwarg)check_model_inputs)get_torch_version   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                        e Zd Z fdZ xZS )Qwen2MLPc                 .   t                                          |           t          j        | j        | j        d          | _        t          j        | j        | j        d          | _        t          j        | j        | j        d          | _        d S )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_projselfconfig	__class__s     {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/qwen2/modular_qwen2.pyr+   zQwen2MLP.__init__(   s|       4#3T5KRWXXXy!143IPUVVV4#94;KRWXXX    )__name__
__module____qualname__r+   __classcell__r5   s   @r6   r&   r&   '   sA        Y Y Y Y Y Y Y Y Yr7   r&   c                       e Zd Zdedef fdZ eddd          	 	 dd	ej        d
e	ej        ej        f         de
ej                 de
e         de
ej                 dee         de	ej        e
ej                 f         fd            Z xZS )Qwen2Attentionr4   	layer_idxc                    t                                          ||           t          j        |j        |j        | j        z  d          | _        t          j        |j        |j        | j        z  d          | _	        t          j        |j        |j        | j        z  d          | _
        t          j        |j        | j        z  |j        d          | _        |j        |         dk    r|j        nd | _        d S )NTr(   Fsliding_attention)r*   r+   r   r,   r-   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projlayer_typessliding_windowr3   r4   r?   r5   s      r6   r+   zQwen2Attention.__init__0   s    +++i 2F4NQUQ^4^eijjji 2F4NQUQ^4^eijjji 2F4NQUQ^4^eijjji :T] JFL^ejkkk7=7I)7TXk7k7kf33qur7   past_key_valuepast_key_valuesz4.58)new_namer   Nhidden_statesposition_embeddingsattention_maskcache_positionkwargsreturnc                 P   |j         d d         }g |d| j        R }|                     |                              |                              dd          }	|                     |                              |                              dd          }
|                     |                              |                              dd          }|\  }}t          |	|
||          \  }	}
|&|||d}|                    |
|| j	        |          \  }
}t          }| j        j        dk    rt          | j        j                 } || |	|
||f| j        sdn| j        | j        | j        d|\  }} |j        g |dR                                  }|                     |          }||fS )Nr#   r   )sincosrR   eagerg        )dropoutscalingrJ   )shaperC   rD   view	transposerF   rG   r    updater?   r!   r4   _attn_implementationr   trainingattention_dropoutr[   rJ   reshape
contiguousrH   )r3   rO   rP   rQ   rM   rR   rS   input_shapehidden_shapequery_states
key_statesvalue_statesrX   rW   cache_kwargsattention_interfaceattn_outputattn_weightss                     r6   forwardzQwen2Attention.forward8   s    $)#2#.88b8$-88{{=1166|DDNNqRSTT[[//44\BBLLQPQRR
{{=1166|DDNNqRSTT&S#7jRUWZ#[#[ j&#&snUUL'6'='=j,X\Xfht'u'u$J(?;+w66"9$+:Z"[$7$7
%
  $}HCC$2HL.
%
 
%
 
%
 
%
!\ *k);;;;;;FFHHkk+..L((r7   )NN)r8   r9   r:   r$   intr+   r   torchTensortupler   r   
LongTensorr   r   rn   r;   r<   s   @r6   r>   r>   /   s       v{ vs v v v v v v _%0A6RRR ,059*) *)|*) #5<#=>*) !.	*)
 "%*) !!12*) -.*) 
u|Xel33	4*) *) *) SR*) *) *) *) *)r7   r>   z2.3.0c                   *     e Zd Zddeddf fdZ xZS )Qwen2RMSNormư>epsrT   Nc                 P    t                                          ||d           d S )NT)normalized_shaperw   elementwise_affine)r*   r+   r3   r-   rw   r5   s      r6   r+   zQwen2RMSNorm.__init__i   s)    GGksW[\\\\\r7   rv   )r8   r9   r:   floatr+   r;   r<   s   @r6   ru   ru   h   s]        	] 	]U 	]d 	] 	] 	] 	] 	] 	] 	] 	] 	] 	]r7   ru   RMSNormc                   T     e Zd Zd	deddf fdZdej        dej        fdZd Z xZ	S )
ru   rv   rw   rT   Nc                     t                                                       t          j        t	          j        |                    | _        || _        dS )zC
            Qwen2RMSNorm is equivalent to T5LayerNorm
            N)r*   r+   r   	Parameterrp   onesweightvariance_epsilonr{   s      r6   r+   zQwen2RMSNorm.__init__p   sD     GG,uz+'>'>??DK$'D!!!r7   rO   c                    |j         }|                    t          j                  }|                    d                              dd          }|t          j        || j        z             z  }| j        |                    |          z  S )Nr   rV   T)keepdim)	dtypetorp   float32powmeanrsqrtr   r   )r3   rO   input_dtypevariances       r6   rn   zQwen2RMSNorm.forwardx   s|    '-K),,U];;M$((++00T0BBH)EK4CX8X,Y,YYM;!1!1+!>!>>>r7   c                 H    t          | j        j                   d| j         S )Nz, eps=)rr   r   r\   r   )r3   s    r6   
extra_reprzQwen2RMSNorm.extra_repr   s&    DK-..MMd6KMMMr7   r|   )
r8   r9   r:   r}   r+   rp   rq   rn   r   r;   r<   s   @r6   ru   ru   n   s        	( 	(U 	(d 	( 	( 	( 	( 	( 	(	? 	?%, 	? 	? 	? 	?	N 	N 	N 	N 	N 	N 	Nr7   c                   (     e Zd Zdedef fdZ xZS )Qwen2DecoderLayerr4   r?   c                 r    t                                          ||           |j        |         | _        d S )N)r4   r?   )r*   r+   rI   attention_typerK   s      r6   r+   zQwen2DecoderLayer.__init__   s6    )<<<$0;r7   )r8   r9   r:   r$   ro   r+   r;   r<   s   @r6   r   r      sK        <{ <s < < < < < < < < < <r7   r   c                       e Zd ZdS )Qwen2PreTrainedModelNr8   r9   r:    r7   r6   r   r              Dr7   r   c                       e Zd Zdef fdZee	 	 	 	 	 	 	 ddeej	                 deej
                 deej	                 dee         deej                 d	ee         d
eej	                 dee         defd                        Z xZS )
Qwen2Modelr4   c                 p    t                                          |           d| j        j        v | _        d S )NrA   )r*   r+   r4   rI   has_sliding_layersr2   s     r6   r+   zQwen2Model.__init__   s3       "59P"Pr7   N	input_idsrQ   position_idsrM   inputs_embeds	use_cacherR   rS   rT   c                    |d u |d uz  rt          d          ||                     |          }|r|t          | j                  }|B||                                nd}	t          j        |	|	|j        d         z   |j                  }||	                    d          }t          |x}
t                    s2| j        |||||d}dt          di |i}
| j        rt          di ||
d<   |}|                     ||          }| j        d | j        j                 D ]} ||f|
|j                 |||||d	|}|                     |          }t)          ||r|nd 
          S )Nz:You must specify exactly one of input_ids or inputs_embeds)r4   r   r#   )device)r4   input_embedsrQ   rR   rM   r   full_attentionrA   )rQ   r   rM   r   rR   rP   )last_hidden_staterM   r   )
ValueErrorembed_tokensr	   r4   get_seq_lengthrp   aranger\   r   	unsqueeze
isinstancedictr   r   r   
rotary_emblayersnum_hidden_layersr   normr   )r3   r   rQ   r   rM   r   r   rR   rS   past_seen_tokenscausal_mask_mappingmask_kwargsrO   rP   decoder_layers                  r6   rn   zQwen2Model.forward   s    -t";< 	[YZZZ  --i88M 	?0*$+>>>O!CRC^==???de"\ "2]5H5K"KTaTh  N )33A66L ?-FF 	l + -"0"0#2 , K !"4"C"C{"C"C# & l;\;k;k_j;k;k#$78% #oom\JJ![)H4;+H)HI 
	 
	M)M	2=3OP) /#-$7	 	 	 	MM 		-00&+/8BOOd
 
 
 	
r7   )NNNNNNN)r8   r9   r:   r$   r+   r   r   r   rp   rs   rq   r   FloatTensorboolr   r   r   rn   r;   r<   s   @r6   r   r      s1       Q{ Q Q Q Q Q Q  151537+/59$(59E
 E
E,-E
 !.E
 u/0	E

 "%E
   12E
 D>E
 !!12E
 +,E
 
!E
 E
 E
 ^ E
 E
 E
 E
 E
r7   r   c                       e Zd ZdS )Qwen2ForCausalLMNr   r   r7   r6   r   r      r   r7   r   c                       e Zd ZdS )Qwen2ForSequenceClassificationNr   r   r7   r6   r   r      r   r7   r   c                       e Zd ZdS )Qwen2ForTokenClassificationNr   r   r7   r6   r   r      r   r7   r   c                       e Zd ZdS )Qwen2ForQuestionAnsweringNr   r   r7   r6   r   r      r   r7   r   )r   r   r   ru   r   r   r   )Atypingr   r   rp   	packagingr   r   cache_utilsr   r	   integrationsr
   masking_utilsr   r   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.deprecationr   utils.genericr   utils.import_utilsr   llama.modeling_llamar   r   r   r   r   r   r   r   r    r!   mistral.modeling_mistralr"   configuration_qwen2r$   
get_loggerr8   loggerr&   r>   parser~   ru   Moduler   r   r   r   r   r   r   __all__r   r7   r6   <module>r      sD   % % % % % % % %              . . . . . . . . 7 7 7 7 7 7 R R R R R R R R B B B B B B      6 5 5 5 5 5 & & & & & & @ @ @ @ @ @ @ @ @ @ 0 0 0 0 0 0 / / / / / / 3 3 3 3 3 3                        4 3 3 3 3 3 , , , , , , 
	H	%	%Y Y Y Y Yx Y Y Y4) 4) 4) 4) 4)^ 4) 4) 4)n 7=""$$%%w)?)???] ] ] ] ]rz ] ] ] ] ! ++N N N N Nry N N ,+N(< < < < <) < < <	 	 	 	 	/ 	 	 	L
 L
 L
 L
 L
 L
 L
 L
^	 	 	 	 	' 	 	 		 	 	 	 	%C 	 	 		 	 	 	 	"= 	 	 		 	 	 	 	 9 	 	 	  r7   