
    .`i;4                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.  G d dej/                  Z0 G d dej/                  Z1 G d dej/                  Z2e G d dej/                              Z3 G d dej/        e(          Z4dS ) zPyTorch Starcoder2 model.    )Iterable)isliceN)nn)Starcoder2Config)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )Starcoder2AttentionN configcache_configquant_configprefixc           
         t                                                       || _        |j        | _        t	                      }|j        | _        | j        |z  dk    sJ | j        |z  | _        |j        | _	        | j	        |k    r| j	        |z  dk    sJ n|| j	        z  dk    sJ t          d| j	        |z            | _        | j        | j        z  | _        | j        | j        z  | _        | j        | j        z  | _        | j        dz  | _        |j        | _        |j        | _        t%          | j        | j        | j        | j	        | j        || d          | _        t)          | j        | j        z  | j        | j        || d          | _        t-          | j        | j        |j        d          | _        t3          | j        | j        | j        | j        ||| d	
          | _        d S )Nr   r   g      z	.qkv_projbiasr%   r&   z.o_projT)max_positionrope_parametersis_neox_stylez.attn)num_kv_headsr$   r%   r&   )super__init__r#   hidden_sizer   num_attention_headstotal_num_heads	num_headsnum_key_value_headstotal_num_kv_headsmaxr-   head_dimq_sizekv_sizescalingmax_position_embeddingsuse_biasr   qkv_projr   o_projr   r+   
rotary_embr   attn)selfr#   r$   r%   r&   tp_size	__class__s         y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/starcoder2.pyr/   zStarcoder2Attention.__init__A   s    	!-688%9#g-2222-8"("<"g-- *W499999 T4499994#:g#EFF(D,@@nt}4(4=8}d*'-'E$)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M5"2	
 
 
 NML*%%###
 
 
			    	positionshidden_statesreturnc                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)r=   splitr8   r9   r?   r@   r>   )
rA   rF   rG   qkv_qkvattn_outputoutputs
             rD   forwardzStarcoder2Attention.forward   s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	rE   NNr"   __name__
__module____qualname__r   r	   r   strr/   torchTensorrT   __classcell__rC   s   @rD   r!   r!   @   s         ,026>
 >
 >
 "D(>
 )4/	>

 >
 >
 >
 >
 >
 >
@
<
 |
 
	
 
 
 
 
 
 
 
rE   r!   c                   \     e Zd Z	 	 d
dededz  def fdZdej        dej        fd	Z	 xZ
S )Starcoder2MLPNr"   r#   r%   r&   c                 &   t                                                       t          |j        |j        |j        || d          | _        t          |j        |j        |j        || d          | _        t          |j
                  | _        d S )Nz.c_fcr(   z.c_proj)r.   r/   r   r0   intermediate_sizer<   c_fcr   c_projr   
hidden_actact)rA   r#   r%   r&   rC   s       rD   r/   zStarcoder2MLP.__init__   s     	($%###
 
 
	 ($%%%%
 
 
 f/00rE   rG   rH   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rc   rf   rd   )rA   rG   rN   s      rD   rT   zStarcoder2MLP.forward   sE    99]33q//;;}55qrE   )Nr"   )rW   rX   rY   r   r   rZ   r/   r[   r\   rT   r]   r^   s   @rD   r`   r`      s         37	1 1 1 )4/1 	1 1 1 1 1 1.U\ el        rE   r`   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )Starcoder2DecoderLayerNr"   r#   r$   r%   r&   c                 ^   t                                                       |j        | _        t          |||| d          | _        t          ||| d          | _        t          j        |j        |j	                  | _
        t          j        |j        |j	                  | _        d S )Nz
.self_attnr%   r&   z.mlpeps)r.   r/   r0   r!   	self_attnr`   mlpr   	LayerNormnorm_epsiloninput_layernormpost_attention_layernorm)rA   r#   r$   r%   r&   rC   s        rD   r/   zStarcoder2DecoderLayer.__init__   s     	!-,%(((	
 
 
 !ooo
 
 
  "|F,>FDWXXX(*F$7)
 )
 )
%%%rE   rF   rG   rH   c                     |}|                      |          }|                     ||          }||z   }|}|                     |          }|                     |          }||z   }|S )N)rF   rG   )rs   ro   rt   rp   )rA   rF   rG   residuals       rD   rT   zStarcoder2DecoderLayer.forward   s     !,,];;' ' 
 
 !=0 !55mDD// =0rE   rU   rV   r^   s   @rD   rj   rj      s         ,026
 
 
 "D(
 )4/	

 
 
 
 
 
 
.< | 
	       rE   rj   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )Starcoder2Modelr"   r&   vllm_configr&   c                   t                                                       |j        j        |j        |j        | _        j        | _        t          j        j	        | d          | _
        t          j        fd| d          \  | _        | _        | _        t!          j        j	        j                  | _        t)          dgj	                  | _        d S )Nz.embed_tokensrl   c                 *    t          |           S )Nrl   )rj   )r&   r$   r#   r%   s    rD   <lambda>z*Starcoder2Model.__init__.<locals>.<lambda>   s     1<   rE   z.layersry   rm   rG   )r.   r/   model_config	hf_configr$   r%   r#   
vocab_sizer   r0   embed_tokensr   num_hidden_layersstart_layer	end_layerlayersr   rq   rr   normr   make_empty_intermediate_tensors)rA   rz   r&   r$   r#   r%   rC   s      @@@rD   r/   zStarcoder2Model.__init__   s   )3"/"/ +2%+++	
 
 
 9D$      %%%9
 9
 9
5$.$+ L!39LMMM	/Vv10
 0
,,,rE   	input_idsrH   c                 ,    |                      |          S rh   )r   rA   r   s     rD   embed_input_idszStarcoder2Model.embed_input_ids   s      +++rE   NrF   intermediate_tensorsinputs_embedsc                 J   t                      j        r||}n"|                     |          }n|J |d         }t          | j        | j        | j                  D ]} |||          }t                      j        st          d|i          S | 	                    |          }|S )NrG   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )rA   r   rF   r   r   rG   layers          rD   rT   zStarcoder2Model.forward   s     >>' 	B( - $ 4 4Y ? ?'3330AMDK)94>JJ 	< 	<E!E)];;MM~~* 	I&'GHHH		-00rE   weightsc                    g d}t          |                     d                    }t                      }|D ]\  }}|D ]O\  }}}	||vr|                    ||          }t	          ||           r2||         }
|
j        } ||
||	            nNt          ||          }|jt	          ||           r{||         }
t          |
dt                    } ||
|           |	                    |           |S )N))r=   q_projrO   )r=   k_projrP   )r=   v_projrQ   F)remove_duplicateweight_loader)
dictnamed_parameterssetreplacer   r   r   getattrr   add)rA   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               rD   load_weightszStarcoder2Model.load_weights  s?   "
 "
 "
 400%0HHII"%%%#* 	$ 	$D-5K 4 41
Kd**||K<<*466 #D) % 3e]H===0{CC<*466 #D) '@U V Ve]333d####rE   rh   )rW   rX   rY   r
   rZ   r/   r[   r\   r   r   rT   r   tupler   r   r]   r^   s   @rD   rx   rx      s       AC 
 
 
z 
3 
 
 
 
 
 
8, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   ,HU33D-E$F 3s8        rE   rx   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
dej        dej        d	z  fdZdeeeej        f                  dee         fdZ xZS )Starcoder2ForCausalLMr"   ry   rz   r&   c                   t                                                       |j        j        }|j        }|| _        t          |t          |d                    | _        |j	        | _	        |j
        r| j        j        | _        n%t          |j	        |j        || d          | _        t          |j	                  | _        | j        j        | _        d S )Nmodel)rz   r&   z.lm_headrl   )r.   r/   r~   r   r%   r#   rx   r   r   r   tie_word_embeddingsr   lm_headr   r0   r   logits_processorr   )rA   rz   r&   r#   r%   rC   s        rD   r/   zStarcoder2ForCausalLM.__init__6  s    )3"/$#L,I,I
 
 

 !+% 	:2DLL)!") ***	  DL !00A B BJ6 	,,,rE   r   rH   c                 6    | j                             |          S rh   )r   r   r   s     rD   r   z%Starcoder2ForCausalLM.embed_input_idsN  s    z)))444rE   NrF   r   r   c                 6    |                      ||||          }|S rh   )r   )rA   r   rF   r   r   rG   s         rD   rT   zStarcoder2ForCausalLM.forwardQ  s)     

y"6
 
 rE   rG   c                 <    |                      | j        |          }|S rh   )r   r   )rA   rG   logitss      rD   compute_logitsz$Starcoder2ForCausalLM.compute_logits]  s      &&t|]CCrE   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.weight)skip_prefixes)r   r#   r   r   )rA   r   loaders      rD   r   z"Starcoder2ForCausalLM.load_weightsd  sG    " '+k&EO!""4
 
 
 ""7+++rE   )NN)rW   rX   rY   r
   rZ   r/   r[   r\   r   r   rT   r   r   r   r   r   r]   r^   s   @rD   r   r   5  sN       AC 
 
 
z 
3 
 
 
 
 
 
05 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   	,HU33D-E$F 	,3s8 	, 	, 	, 	, 	, 	, 	, 	,rE   r   )5__doc__collections.abcr   	itertoolsr   r[   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   Moduler!   r`   rj   rx   r    rE   rD   <module>r      s  ,    $ $ $ $ $ $              ) ) ) ) ) ) * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - " " " " " "             K K K K K") K K K\    BI   >, , , , ,RY , , ,^ U U U U Ubi U U Up8, 8, 8, 8, 8,BIz 8, 8, 8, 8, 8,rE   