
     `i                        d dl mZmZ d dlZd dlmZ d dlmZ ddlmZm	Z	 ddl
mZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' ddl(m)Z)  ej*        e+          Z, G d de#          Z- G d de          Z. G d de          Z/ G d de%          Z0 G d de$          Z1 G d de           Z2 G d d e"          Z3 G d! d"e!          Z4 G d# d$ee0          Z5g d%Z6dS )&    )CallableOptionalN)nn)check_model_inputs   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GenericForQuestionAnswering)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)deprecate_kwarg   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward   )MistralConfigc                        e Zd Z fdZ xZS )
MistralMLPc                 .   t                                          |           t          j        | j        | j        d          | _        t          j        | j        | j        d          | _        t          j        | j        | j        d          | _        d S )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/mistral/modular_mistral.pyr(   zMistralMLP.__init__&   s|       4#3T5KRWXXXy!143IPUVVV4#94;KRWXXX    )__name__
__module____qualname__r(   __classcell__r1   s   @r2   r#   r#   %   sA        Y Y Y Y Y Y Y Y Yr3   r#   c                       e Zd Zdedef fdZ eddd          	 	 dd	ej        d
e	ej        ej        f         de
ej                 de
e         de
ej                 dee         de	ej        e
ej                 f         fd            Z xZS )MistralAttentionr0   	layer_idxc                    t                                          ||           t          |dd           p|j        |j        z  | _        t          j        |j        |j        | j        z  d          | _        t          j        |j        |j	        | j        z  d          | _
        t          j        |j        |j	        | j        z  d          | _        t          j        |j        | j        z  |j        d          | _        d S )Nhead_dimFr%   )r'   r(   getattrr*   num_attention_headsr=   r   r)   q_projnum_key_value_headsk_projv_projo_projr/   r0   r;   r1   s      r2   r(   zMistralAttention.__init__.   s    +++
D99mV=OSYSm=mi 2F4NQUQ^4^ejkkki 2F4NQUQ^4^ejkkki 2F4NQUQ^4^ejkkki :T] JFL^ejkkkr3   past_key_valuepast_key_valuesz4.58)new_nameversionNhidden_statesposition_embeddingsattention_maskcache_positionkwargsreturnc           
      n   |j         d d         }g |d| j        R }|                     |                              |                              dd          }	|                     |                              |                              dd          }
|                     |                              |                              dd          }|\  }}t          |	|
||          \  }	}
|&|||d}|                    |
|| j	        |          \  }
}t          }| j        j        dk    rt          | j        j                 } || |	|
||f| j        sdn| j        | j        t#          | j        dd           d|\  }} |j        g |dR                                  }|                     |          }||fS )	Nr    r   )sincosrM   eagerg        sliding_window)dropoutscalingrU   )shaper=   r@   view	transposerB   rC   r   updater;   r   r0   _attn_implementationr   trainingattention_dropoutrW   r>   reshape
contiguousrD   )r/   rJ   rK   rL   rG   rM   rN   input_shapehidden_shapequery_states
key_statesvalue_statesrS   rR   cache_kwargsattention_interfaceattn_outputattn_weightss                     r2   forwardzMistralAttention.forward6   s    $)#2#.88b8$-88{{=1166|DDNNqRSTT[[//44\BBLLQPQRR
{{=1166|DDNNqRSTT&S#7jRUWZ#[#[ j&#&snUUL'6'='=j,X\Xfht'u'u$J(?;+w66"9$+:Z"[$7$7
%
  $}HCC$2HL"4;0@$GG
%
 
%
 
%
 
%
!\ *k);;;;;;FFHHkk+..L((r3   )NN)r4   r5   r6   r!   intr(   r   torchTensortupler   r   
LongTensorr   r   rj   r7   r8   s   @r2   r:   r:   -   s       l} l l l l l l l _%0A6RRR ,059*) *)|*) #5<#=>*) !.	*)
 "%*) !!12*) -.*) 
u|Xel33	4*) *) *) SR*) *) *) *) *)r3   r:   c                   (     e Zd Zdedef fdZ xZS )MistralDecoderLayerr0   r;   c                     t                                          ||           t          ||          | _        t	          |          | _        d S )N)r0   r;   )r'   r(   r:   	self_attnr#   mlprE   s      r2   r(   zMistralDecoderLayer.__init__e   sD    +++)9MMMf%%r3   )r4   r5   r6   r!   rk   r(   r7   r8   s   @r2   rq   rq   d   sK        &} & & & & & & & & & & &r3   rq   c                       e Zd ZeedZdS )MistralPreTrainedModel)rJ   
attentionsN)r4   r5   r6   rq   r:   _can_record_outputs r3   r2   rv   rv   k   s#        ,& r3   rv   c                       e Zd Zee	 	 	 	 	 	 	 ddeej                 deej                 deej                 dee	         deej
                 dee         deej                 d	ee         d
efd                        ZdS )MistralModelN	input_idsrL   position_idsrG   inputs_embeds	use_cacherM   rN   rO   c                    |d u |d uz  rt          d          ||                     |          }|r|t          | j                  }|B||                                nd}	t          j        |	|	|j        d         z   |j                  }||	                    d          }| j        j
        t          nt          }
 |
| j        |||||          }|}|                     ||          }| j        d | j        j                 D ]} ||f||||||d|}|                     |          }t#          ||r|nd           S )	Nz:You must specify exactly one of input_ids or inputs_embeds)r0   r   r    )device)r0   input_embedsrL   rM   rG   r}   )rL   r}   rG   r   rM   rK   )last_hidden_staterG   )
ValueErrorembed_tokensr	   r0   get_seq_lengthrl   arangerX   r   	unsqueezerU   r
   r   
rotary_emblayersnum_hidden_layersnormr   )r/   r|   rL   r}   rG   r~   r   rM   rN   past_seen_tokensmask_functioncausal_maskrJ   rK   decoder_layers                  r2   rj   zMistralModel.forwards   s    -t";< 	[YZZZ  --i88M 	?0*$+>>>O!CRC^==???de"\ "2]5H5K"KTaTh  N )33A66L.2k.H.P**Vw#m;&))+%
 
 
 &"oom\JJ![)H4;+H)HI 
	 
	M)M	*) /#-$7	 	 	 	MM 		-00&+/8BOOd
 
 
 	
r3   )NNNNNNN)r4   r5   r6   r   r   r   rl   ro   rm   r   FloatTensorboolr   r   r   rj   ry   r3   r2   r{   r{   r   s         151537+/59$(599
 9
E,-9
 !.9
 u/0	9

 "%9
   129
 D>9
 !!129
 +,9
 
!9
 9
 9
 ^ 9
 9
 9
r3   r{   c                       e Zd ZdS )MistralForCausalLMNr4   r5   r6   ry   r3   r2   r   r              Dr3   r   c                       e Zd ZdS )MistralForTokenClassificationNr   ry   r3   r2   r   r      r   r3   r   c                       e Zd ZdS ) MistralForSequenceClassificationNr   ry   r3   r2   r   r      r   r3   r   c                       e Zd ZdS )MistralForQuestionAnsweringNr   ry   r3   r2   r   r      s          r3   r   )r   r   r{   rv   r   r   )7typingr   r   rl   r   transformers.utils.genericr   cache_utilsr   r	   masking_utilsr
   r   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.deprecationr   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   configuration_mistralr!   
get_loggerr4   loggerr#   r:   rq   rv   r{   r   r   r   r   __all__ry   r3   r2   <module>r      sG   % % % % % % % %        9 9 9 9 9 9 . . . . . . . . R R R R R R R R B B B B B B      8 7 7 7 7 7 5 5 5 5 5 5 & & & & & & @ @ @ @ @ @ @ @ @ @ 0 0 0 0 0 0                        1 0 0 0 0 0 
	H	%	%Y Y Y Y Y Y Y Y4) 4) 4) 4) 4)~ 4) 4) 4)n& & & & &+ & & &    1   <
 <
 <
 <
 <
: <
 <
 <
~	 	 	 	 	) 	 	 		 	 	 	 	$? 	 	 		 	 	 	 	'E 	 	 	 \ [ [ [ ["=?U [ [ [  r3   