
     `i&                         d Z ddlmZmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' ddl(m)Z)  ej*        e+          Z, G d dej-                  Z. G d de          Z/ G d de           Z0 G d de%          Z1 G d de$          Z2 G d de!          Z3 G d  d!e"          Z4 G d" d#e#          Z5g d$Z6dS )%zPyTorch Starcoder2 model.    )CallableOptionalUnionN)nn)check_model_inputs   )ACT2FN)CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)deprecate_kwarg   )	MistralAttentionMistralDecoderLayerMistralForCausalLM MistralForSequenceClassificationMistralForTokenClassificationMistralModelMistralRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )Starcoder2Configc                   `     e Zd Zdef fdZdeeej                          dej        fdZ	 xZ
S )Starcoder2MLPconfigc                 4   t                                                       |j        }t          j        ||j        |j                  | _        t          j        |j        ||j                  | _        t          |j
                 | _        |j        | _        d S )Nbias)super__init__hidden_sizer   Linearintermediate_sizeuse_biasc_fcc_projr	   
hidden_actactresidual_dropout)selfr#   	embed_dim	__class__s      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/starcoder2/modular_starcoder2.pyr(   zStarcoder2MLP.__init__8   s{    &	Ii)AXXX	i 8)&/ZZZ&+, & 7    hidden_statesreturnc                     |                      |          }|                     |          }|                     |          }t          j                            || j        | j                  }|S )Nptraining)r-   r0   r.   r   
functionaldropoutr1   r<   )r2   r7   s     r5   forwardzStarcoder2MLP.forward@   s^    		-00//M22--mt?T_c_l-mmr6   )__name__
__module____qualname__r    r(   r   tupletorchFloatTensorr?   __classcell__r4   s   @r5   r"   r"   7   sw        8/ 8 8 8 8 8 8XeE4E.F%G EL]        r6   r"   c                   N    e Zd Zddedee         f fdZ eddd          	 	 dd	ej	        d
e
ej	        ej	        f         deej	                 dee         deej                 dee         de
ej	        eej	                 ee
ej	                          f         fd            Z xZS )Starcoder2AttentionNr#   	layer_idxc                    t                                          ||           |j        | _        t          j        |j        |j        | j        z  |j                  | _	        t          j        |j        |j
        | j        z  |j                  | _        t          j        |j        |j
        | j        z  |j                  | _        t          j        |j        | j        z  |j        |j                  | _        d S )Nr#   rJ   r%   )r'   r(   r1   r   r*   r)   num_attention_headshead_dimr,   q_projnum_key_value_headsk_projv_projo_projr2   r#   rJ   r4   s      r5   r(   zStarcoder2Attention.__init__I   s    )<<< & 7i 2F4NQUQ^4^eketuuui 2F4NQUQ^4^eketuuui 2F4NQUQ^4^eketuuui :T] JFL^eketuuur6   past_key_valuepast_key_valuesz4.58)new_nameversionr7   position_embeddingsattention_maskcache_positionkwargsr8   c           
         |j         d d         }g |d| j        R }|                     |                              |                              dd          }	|                     |                              |                              dd          }
|                     |                              |                              dd          }|\  }}t          |	|
||          \  }	}
|&|||d}|                    |
|| j	        |          \  }
}t          }| j        j        dk    rt          | j        j                 } || |	|
||f| j        sdn| j        | j        t#          | j        dd           d|\  }} |j        g |dR                                  }|                     |          }t*          j                            || j        | j        	          }||fS )
Nr   r   )sincosr[   eagerg        sliding_window)r>   scalingrb   r:   )shaperN   rO   view	transposerQ   rR   r   updaterJ   r   r#   _attn_implementationr   r<   attention_dropoutrc   getattrreshape
contiguousrS   r   r=   r>   r1   )r2   r7   rY   rZ   rV   r[   r\   input_shapehidden_shapequery_states
key_statesvalue_statesr`   r_   cache_kwargsattention_interfaceattn_outputattn_weightss                     r5   r?   zStarcoder2Attention.forwardQ   s    $)#2#.88b8$-88{{=1166|DDNNqRSTT[[//44\BBLLQPQRR
{{=1166|DDNNqRSTT&S#7jRUWZ#[#[ j&#&snUUL'6'='=j,X\Xfht'u'u$J(?;+w66"9$+:Z"[$7$7
%
  $}HCC$2HL"4;0@$GG
%
 
%
 
%
 
%
!\ *k);;;;;;FFHHkk+..m++404= , 
 
 L((r6   )N)NN)r@   rA   rB   r    r   intr(   r   rD   TensorrC   r
   
LongTensorr   r   r?   rF   rG   s   @r5   rI   rI   H   s.       v v/ vHSM v v v v v v _%0A6RRR ,059.) .)|.) #5<#=>.) !.	.)
 "%.) !!12.) -..) 
u|Xel3XeEL>Q5RR	S.) .) .) SR.) .) .) .) .)r6   rI   c                   (     e Zd Zdedef fdZ xZS )Starcoder2DecoderLayerr#   rJ   c                 4   t                                          ||           t          ||          | _        t	          |          | _        t          j        |j        |j	                  | _
        t          j        |j        |j	                  | _        d S )NrL   eps)r'   r(   rI   	self_attnr"   mlpr   	LayerNormr)   norm_epsiloninput_layernormpost_attention_layernormrT   s      r5   r(   zStarcoder2DecoderLayer.__init__   s    +++,FiPPP ((!|F,>FDWXXX(*V5GVM`(a(a(a%%%r6   )r@   rA   rB   r    rv   r(   rF   rG   s   @r5   rz   rz      sX        b/ bC b b b b b b b b b br6   rz   c                       e Zd ZdS )Starcoder2RotaryEmbeddingNr@   rA   rB    r6   r5   r   r              Dr6   r   c                   (    e Zd Zdef fdZe	 	 	 	 	 	 	 ddeej                 deej	                 deej                 dee
eeej                 f                  deej                 d	ee         d
eej                 dee         defd            Z xZS )Starcoder2Modelr#   c                    t                                                     t          j        fdt	          j                  D                       | _        t          j        j        j	                  | _
        j        | _        d S )Nc                 0    g | ]}t          |          S r   )rz   ).0rJ   r#   s     r5   
<listcomp>z,Starcoder2Model.__init__.<locals>.<listcomp>   s$    hhh9#FI66hhhr6   r|   )r'   r(   r   
ModuleListrangenum_hidden_layerslayersr   r)   r   normembedding_dropout)r2   r#   r4   s    `r5   r(   zStarcoder2Model.__init__   s       mhhhhfNfHgHghhh
 
 L!39LMMM	!'!9r6   N	input_idsrZ   position_idsrV   inputs_embeds	use_cacher[   r\   r8   c                    |d u |d uz  rt          d          ||                     |          }|r|t          | j                  }|B||                                nd}	t          j        |	|	|j        d         z   |j                  }||	                    d          }| j        j
        t          nt          }
 |
| j        |||||          }|}t          j                            || j        | j                  }|                     ||          }| j        d | j        j                 D ]} ||f||||||d|}|                     |          }t-          ||r|nd 	          S )
Nz:You must specify exactly one of input_ids or inputs_embeds)r#   r   r   )device)r#   input_embedsrZ   r[   rV   r   r:   )rZ   r   rV   r   r[   rY   )last_hidden_staterV   )
ValueErrorembed_tokensr   r#   get_seq_lengthrD   arangerd   r   	unsqueezerb   r   r   r   r=   r>   r   r<   
rotary_embr   r   r   r   )r2   r   rZ   r   rV   r   r   r[   r\   past_seen_tokensmask_functioncausal_maskr7   rY   decoder_layers                  r5   r?   zStarcoder2Model.forward   s    -t";< 	[YZZZ  --i88M 	?0*$+>>>O!CRC^==???de"\ "2]5H5K"KTaTh  N )33A66L.2k.H.P**Vw#m;&))+%
 
 
 &--T3dm . 
 

 #oom\JJ![)H4;+H)HI 
	 
	M)M	*) /#-$7	 	 	 	MM 		-00&+/8BOOd
 
 
 	
r6   )NNNNNNN)r@   rA   rB   r    r(   r   r   rD   rx   rw   r   r
   listrE   boolr   r   r   r?   rF   rG   s   @r5   r   r      s"       :/ : : : : : :  151537KO59$(59?
 ?
E,-?
 !.?
 u/0	?

 "%tE4E/F(F"GH?
   12?
 D>?
 !!12?
 +,?
 
!?
 ?
 ?
 ?
 ?
 ?
 ?
 ?
r6   r   c                       e Zd ZdS )Starcoder2ForCausalLMNr   r   r6   r5   r   r      r   r6   r   c                       e Zd ZdS )#Starcoder2ForSequenceClassificationNr   r   r6   r5   r   r      r   r6   r   c                       e Zd ZdS ) Starcoder2ForTokenClassificationNr   r   r6   r5   r   r      r   r6   r   )r   r   Starcoder2PreTrainedModelr   r   )7__doc__typingr   r   r   rD   r   transformers.utils.genericr   activationsr	   cache_utilsr
   r   masking_utilsr   r   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   utils.deprecationr   mistral.modeling_mistralr   r   r   r   r   r   r   r   r   configuration_starcoder2r    
get_loggerr@   loggerModuler"   rI   rz   r   r   r   r   r   __all__r   r6   r5   <module>r      s!  (    , , , , , , , , , ,        9 9 9 9 9 9 ! ! ! ! ! ! . . . . . . . . R R R R R R R R B B B B B B 7 7 7 7 7 7 5 5 5 5 5 5 & & & & & & 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 7 6 6 6 6 6 
	H	%	%    BI   "8) 8) 8) 8) 8)* 8) 8) 8)vb b b b b0 b b b	 	 	 	 	 6 	 	 	I
 I
 I
 I
 I
l I
 I
 I
X	 	 	 	 	. 	 	 		 	 	 	 	*J 	 	 		 	 	 	 	'D 	 	 	  r6   