
     `ib-                        d dl mZmZ d dlZd dlmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZm Z m!Z!m"Z" ddl#m$Z$  ej%        e&          Z'dZ(dZ) G d de          Z* G d de          Z+ G d de          Z, G d de           Z- G d de          Z. G d de          Z/ G d d e          Z0 G d! d"e          Z1g d#Z2dS )$    )CallableOptionalN   )CacheDynamicCache)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)deprecate_kwarg   )CLIPMLP)LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification
LlamaModelLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )	PhiConfigzmicrosoft/phi-1r   c                       e Zd Zdedef fdZ eddd          	 	 dd	ej        d
e	ej        ej        f         de
ej                 de
e         de
ej                 de	ej        e
ej                 f         fd            Z xZS )PhiAttentionconfig	layer_idxc                    t                                          ||           t          j        |j        |j        | j        z  d          | _        t          j        |j        |j        | j        z  d          | _	        t          j        |j        |j        | j        z  d          | _
        t          j        |j        | j        z  |j        d          | _        | `t          | j        |j        z            | _        |j        | _        | j        r^t          j        |j        |j        z  |j        d          | _        t          j        |j        |j        z  |j        d          | _        d S d S )NTbias)epselementwise_affine)super__init__nnLinearhidden_sizenum_attention_headshead_dimq_projnum_key_value_headsk_projv_projdenseo_projintpartial_rotary_factorrotary_ndimsqk_layernorm	LayerNormlayer_norm_epsq_layernormk_layernormselfr   r   	__class__s      w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/phi/modular_phi.pyr&   zPhiAttention.__init__%   s`   +++i 2F4NQUQ^4^eijjji 2F4NQUQ^4^eijjji 2F4NQUQ^4^eijjjYv9DMI6K]dhiii
K0L LMM"/ 	!|"f&@@fF[pt     D  "|"f&@@fF[pt     D		 	    past_key_valuepast_key_values4.58new_nameversionNhidden_statesposition_embeddingsattention_maskcache_positionreturnc                    |j         d d         }g |d| j        R }|                     |                              |                              dd          }	|                     |                              |                              dd          }
|                     |                              |                              dd          }| j        r*|                     |	          }	| 	                    |
          }
|\  }}|	dd | j
        f         |	d| j
        d f         }}|
dd | j
        f         |
d| j
        d f         }}t          ||||          \  }}t          j        ||fd          }	t          j        ||fd          }
|&|||d}|                    |
|| j        |          \  }
}t           }| j        j        dk    rt&          | j        j                 } || |	|
||f| j        sdn| j        | j        d	|\  }} |j        g |dR                                  }|                     |          }||fS )
Nr   r   .)dim)sincosrH   eagerg        )dropoutscaling)shaper+   r,   view	transposer.   r/   r5   r8   r9   r4   r   torchcatupdater   r   r   _attn_implementationr   trainingattention_dropoutrQ   reshape
contiguousr0   )r;   rE   rF   rG   r@   rH   kwargsinput_shapehidden_shapequery_states
key_statesvalue_statesrN   rM   	query_rot
query_passkey_rotkey_passcache_kwargsattention_interfaceattn_outputattn_weightss                         r=   forwardzPhiAttention.forward6   s    $)#2#.88b8$-88{{=1166|DDNNqRSTT[[//44\BBLLQPQRR
{{=1166|DDNNqRSTT 	6++L99L))*55J&S 1 1112d/1112 	
 s/d///0sD-///0 
 2)Wc3OO	7 y)Z!8bAAAY2;;;
&#&snUUL'6'='=j,X\Xfht'u'u$J(?;+w66"9$+:Z"[$7$7	%
  $}HCC$2HL	%
 	%
 	%
 	%
!\ *k);;;;;;FFHHjj--L((r>   )NN)__name__
__module____qualname__r   r2   r&   r   rU   Tensortupler   r   
LongTensorrk   __classcell__r<   s   @r=   r   r   $   s        y S      " _%0A6RRR ,059;) ;)|;) #5<#=>;) !.	;)
 "%;) !!12;) 
u|Xel33	4;) ;) ;) SR;) ;) ;) ;) ;)r>   r   c                       e Zd ZdS )PhiMLPNrl   rm   rn    r>   r=   ru   ru   u           Dr>   ru   c                   v    e Zd Zdedef fdZ eddd          	 	 	 	 	 	 	 dd
ej        de	ej                 de	ej
                 de	e         de	e         de	e         de	ej
                 de	eej        ej        f                  deej        e	eej        ej        f                  f         fd            Z xZS )PhiDecoderLayerr   r   c                 "   t                                                       t          ||          | _        t	          |          | _        t          j        |j        |j	                  | _
        t          j        |j                  | _        d S )N)r   r#   )r%   r&   r   	self_attnru   mlpr'   r6   r)   r7   input_layernormDropoutresid_pdropresid_dropoutr:   s      r=   r&   zPhiDecoderLayer.__init__z   sr    %f	BBB&>>!|F,>FDYZZZZ(:;;r>   r?   r@   rA   rB   NFrE   rG   position_idsoutput_attentions	use_cacherH   rF   rI   c	                    |}
|                      |          } | j        d||||||||d|	\  }}|                     |          }|                     |                     |                    }||z   |
z   }|f}|r||fz  }|S )N)rE   rG   r   r@   r   r   rH   rF   rw   )r   r}   r   r~   )r;   rE   rG   r   r@   r   r   rH   rF   r]   residualattn_outputsself_attn_weightsfeed_forward_hidden_statesoutputss                  r=   rk   zPhiDecoderLayer.forward   s     !,,];; +9$. 
+
')%+/) 3
+
 
+
 
+
 
+
'' )),77%)%7%78O8O%P%P"$'AAHL " 	,)++Gr>   )NNNFFNN)rl   rm   rn   r   r2   r&   r   rU   ro   r   rq   r   boolrp   FloatTensorrk   rr   rs   s   @r=   rz   rz   y   sN       <y <S < < < < < < _%0A6RRR 2637+/,1$)59KO% %|% !.% u/0	%
 "%% $D>% D>% !!12% &eEL%,,F&GH% 
u (51BEDU1U+V"WW	X% % % SR% % % % %r>   rz   c                       e Zd ZdS )PhiRotaryEmbeddingNrv   rw   r>   r=   r   r      rx   r>   r   c                       e Zd Zdef fdZ	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 dee	         deej
                 d	ee         d
ee         dee         deej                 dee         defdZ xZS )PhiModelr   c                 D   t                                                     t          j        fdt	          j                  D                       | _        t          j        j                  | _	        t          j
        j        j                  | _        | `d S )Nc                 0    g | ]}t          |          S rw   )rz   ).0r   r   s     r=   
<listcomp>z%PhiModel.__init__.<locals>.<listcomp>   s#    aaaI_VY//aaar>   r|   )r%   r&   r'   
ModuleListrangenum_hidden_layerslayersr   
embd_pdropembed_dropoutr6   r)   r7   final_layernormnormr;   r   r<   s    `r=   r&   zPhiModel.__init__   s       maaaavG_A`A`aaa
 
  Z(9::!|F,>FDYZZZIIIr>   N	input_idsrG   r   r@   inputs_embedsr   r   output_hidden_statesrH   r]   rI   c
                    ||n| j         j        }||n| j         j        }||n| j         j        }|d u |d uz  rt	          d          | j        r%| j        r|rt                              d           d}|| 	                    |          }|r|t          | j                   }|	B||                                nd}t          j        |||j        d         z   |j                  }	||	                    d          }t#          | j         |||	||          }|                     |          }|}|                     ||          }|rd	nd }|rd	nd }| j        d | j         j                 D ]1}|r||fz  } ||f||||||	|d
|
}|d         }|r||d         fz  }2|                     |          }|r||fz  }t/          ||r|nd ||          S )Nz:You must specify exactly one of input_ids or inputs_embedszX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.F)r   r   r   )device)r   input_embedsrG   rH   r@   r   rw   )rG   r   r@   r   r   rH   rF   )last_hidden_stater@   rE   
attentions)r   r   r   r   
ValueErrorgradient_checkpointingrY   loggerwarning_onceembed_tokensr   get_seq_lengthrU   arangerR   r   	unsqueezer   r   
rotary_embr   r   r   r
   )r;   r   rG   r   r@   r   r   r   r   rH   r]   past_seen_tokenscausal_maskrE   rF   all_hidden_statesall_self_attnsdecoder_layerlayer_outputss                      r=   rk   zPhiModel.forward   s    2C1N--TXT_Tq$8$D  $+Jj 	 "+!6IIDK<Q	-t";< 	[YZZZ& 	4= 	Y 	j   I  --i88M 	?0*$+>>>O!CRC^==???de"\ "2]5H5K"KTaTh  N )33A66L(;&))+%
 
 
 **=99% #oom\JJ #7@BBD0:d![)H4;+H)HI 	6 	6M# 6!m%55!)M
*) /"3#-$7
 
 
 
M *!,M  6=#3"55,,];;   	2-!11&+/8BOOd+%	
 
 
 	
r>   )	NNNNNNNNN)rl   rm   rn   r   r&   r   rU   rq   ro   r   r   r   r   r   r
   rk   rr   rs   s   @r=   r   r      s7       y       151537+/59$(,0/359^
 ^
E,-^
 !.^
 u/0	^

 "%^
   12^
 D>^
 $D>^
 'tn^
 !!12^
 +,^
 
!^
 ^
 ^
 ^
 ^
 ^
 ^
 ^
r>   r   c                        e Zd Z fdZ xZS )PhiForCausalLMc                     t                                          |           t          j        |j        |j        d          | _        d S )NTr!   )r%   r&   r'   r(   r)   
vocab_sizelm_headr   s     r=   r&   zPhiForCausalLM.__init__  s=       y!3V5FTRRRr>   )rl   rm   rn   r&   rr   rs   s   @r=   r   r     sA        S S S S S S S S Sr>   r   c                       e Zd ZdS )PhiForSequenceClassificationNrv   rw   r>   r=   r   r     rx   r>   r   c                       e Zd ZdS )PhiForTokenClassificationNrv   rw   r>   r=   r   r   #  rx   r>   r   )PhiPreTrainedModelr   r   r   r   )3typingr   r   rU   torch.nnr'   cache_utilsr   r   masking_utilsr   modeling_layersr	   modeling_outputsr
   modeling_utilsr   processing_utilsr   utilsr   r   utils.deprecationr   clip.modeling_clipr   llama.modeling_llamar   r   r   r   r   r   r   r   configuration_phir   
get_loggerrl   r   _CHECKPOINT_FOR_DOC_CONFIG_FOR_DOCr   ru   rz   r   r   r   r   r   __all__rw   r>   r=   <module>r      s   % % % % % % % %        . . . . . . . . / / / / / / 9 9 9 9 9 9      6 5 5 5 5 5 & & & & & & 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ( ( ( ( ( (	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ) ( ( ( ( ( 
	H	%	%' N) N) N) N) N)> N) N) N)b	 	 	 	 	W 	 	 	. . . . .0 . . .b	 	 	 	 	- 	 	 	h
 h
 h
 h
 h
z h
 h
 h
VS S S S S% S S S	 	 	 	 	#A 	 	 		 	 	 	 	 ; 	 	 	  r>   