
    .`ig/                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z,  G d dej-                  Z. G d dej-                  Z/ G d dej-                  Z0e G d dej-                              Z1 G d dej-        e%e&          Z2dS )zDInference-only GPTBigCode model compatible with HuggingFace weights.    )Iterable)isliceN)nn)GPTBigCodeConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc            	       h     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        fd
Z
 xZS )GPTBigCodeAttentionN configcache_configquant_configprefixc           
         t                                                       |j        | _        |j        }t	                      | _        || j        z  dk    sJ || j        z  | _        | j        |z  | _        | j        dz  | _        |j	        | _	        | j	        r
d}d| _
        n|}| j        | _
        | j        | j
        z  | _        t          | j        | j        ||d|| d          | _        t          | j        | j        d|| d          | _        t!          | j        | j        | j        | j
        ||| d	          | _        d S )
Nr   g      r   Tz.c_attnbiasr$   r%   .c_proj.attn)scalenum_kv_headsr#   r$   r%   )super__init__hidden_sizenum_attention_headsr    tensor_model_parallel_world_size	num_headshead_dimr+   multi_queryr,   kv_dimr   c_attnr   c_projr   attn)selfr"   r#   r$   r%   total_num_headstotal_num_kv_heads	__class__s          z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/gpt_bigcode.pyr.   zGPTBigCodeAttention.__init__=   s{    	!- 40T0V0V-!FF!KKKK(D,QQ(O;]D(
!- 	/!" !D!0 $Dmd&77'M%%%%
 
 
 (%%%%
 
 
 NM**%%###
 
 
			    hidden_statesreturnc                     |                      |          \  }}|                    | j        | j        z  | j        | j        gd          \  }}}|                     |||          }|                     |          \  }}|S )N)dim)r6   splitr/   r1   r5   r8   r7   )r9   r?   qkv_qkvattn_outputs           r=   forwardzGPTBigCodeAttention.forwardp   s     ]++Q)) D$II
   
 
1a ii1a(([11Qr>   NNr!   __name__
__module____qualname__r   r	   r   strr.   torchTensorrK   __classcell__r<   s   @r=   r    r    <   s         ,0261
 1
 1
 "D(1
 )4/	1

 1
 1
 1
 1
 1
 1
f| 
       r>   r    c            	       `     e Zd Z	 	 ddedededz  def fdZdej	        d	ej	        fd
Z
 xZS )	GPTBigMLPNr!   intermediate_sizer"   r$   r%   c                     t                                                       |j        }t          ||d|| d          | _        t          ||d|| d          | _        t          |j                  | _	        d S )NTz.c_fcr'   r)   )
r-   r.   r/   r   c_fcr   r7   r   activation_functionact)r9   rX   r"   r$   r%   r/   r<   s         r=   r.   zGPTBigMLP.__init__   s     	((%###
 
 
	 (%%%%
 
 
 f899r>   r?   r@   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rZ   r\   r7   )r9   r?   rF   s      r=   rK   zGPTBigMLP.forward   sE    99]33q//;;}55qr>   )Nr!   )rN   rO   rP   intr   r   rQ   r.   rR   rS   rK   rT   rU   s   @r=   rW   rW      s        
 37: :: !: )4/	:
 : : : : : :2U\ el        r>   rW   c            	       h     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        fd
Z
 xZS )GPTBigCodeBlockNr!   r"   r#   r$   r%   c                 h   t                                                       |j        }|j        |j        nd|z  }t	          j        ||j                  | _        t          |||| d          | _	        t	          j        ||j                  | _
        t          |||| d          | _        d S )N   epsr*   r%   z.mlp)r-   r.   r/   n_innerr   	LayerNormlayer_norm_epsilonln_1r    r8   ln_2rW   mlp)r9   r"   r#   r$   r%   r/   	inner_dimr<   s          r=   r.   zGPTBigCodeBlock.__init__   s     	(&,n&@FNNa+o	L&2KLLL	'L,&7G7G7G
 
 
	 L&2KLLL	Y___UUUr>   r?   r@   c                     |}|                      |          }|                     |          }||z   }|}|                     |          }|                     |          }||z   }|S )N)r?   )rj   r8   rk   rl   )r9   r?   residualrJ   feed_forward_hidden_statess        r=   rK   zGPTBigCodeBlock.forward   sz     !		-00ii'   
 
 $h. 		-00%)XXm%<%<" #==r>   rL   rM   rU   s   @r=   ra   ra      s         ,026V V V "D(V )4/	V
 V V V V V V$| 
       r>   ra   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )GPTBigCodeModelr!   rf   vllm_configr%   c                J   t                                                       |j        j        |j        |j        | _        j        rJ j        | _	        j
        | _
        t          | j
        | j	        j
                  | _        t          j        j        | j	                  | _        t#          j        fd| d          \  | _        | _        | _        t          j        | j	        j                  | _        t3          dgj                  | _        d S )N)org_num_embeddingsc                 *    t          |           S )Nrf   )ra   )r%   r#   r"   r$   s    r=   <lambda>z*GPTBigCodeModel.__init__.<locals>.<lambda>   s    ?l6   r>   z.hrf   rd   r?   )r-   r.   model_config	hf_configr#   r$   r"   add_cross_attentionr/   	embed_dim
vocab_sizer   wter   	Embeddingmax_position_embeddingswper   num_hidden_layersstart_layer	end_layerhrh   ri   ln_fr   n_embdmake_empty_intermediate_tensors)r9   rs   r%   r#   r"   r$   r<   s      @@@r=   r.   zGPTBigCodeModel.__init__   s/   )3"/"/----+ +)OT^@Q
 
 
 < >OO3>$      ===4
 4
 4
0$.$& LV5NOOO	/Vv}0
 0
,,,r>   	input_idsr@   c                 ,    |                      |          S r^   )r}   r9   r   s     r=   embed_input_idszGPTBigCodeModel.embed_input_ids   s    xx	"""r>   Nposition_idsintermediate_tensorsinputs_embedsc                 j   t                      j        r0||                     |          }||                     |          z   }n|d         }t	          | j        | j        | j                  D ]} ||          }t                      j        st          d|i          S | 
                    |          }|S )Nr?   )r   is_first_rankr   r   r   r   r   r   is_last_rankr   r   )r9   r   r   r   r   r?   layers          r=   rK   zGPTBigCodeModel.forward   s     >>' 	B$ $ 4 4Y ? ?)DHH\,B,BBMM0AMDFD$4dnEE 	1 	1E!E-00MM~~* 	I&'GHHH		-00r>   weightsc                 z   t          |                     d                    }t                      }|D ]\  }}d|v r
t          ||           r||         }t	          |dt
                    }d|v r( |||d            |||d            |||d           n |||           |                    |           |S )	NF)remove_duplicatez
.attn.biasweight_loaderzc_attn.input_scalerG   rH   rI   )dictnamed_parameterssetr   getattrr   add)r9   r   params_dictloaded_paramsnameloaded_weightparamr   s           r=   load_weightszGPTBigCodeModel.load_weights  s    400%0HHII"%%%#* 	$ 	$D-t## &tT22 %E#E?<QRRM#t++e]C888e]C888e]C8888e]333d####r>   r^   )rN   rO   rP   r
   rQ   r.   rR   rS   r   r   rK   r   tupler   r   rT   rU   s   @r=   rr   rr      s       AC 
 
 
z 
3 
 
 
 
 
 
:# #%, # # # # .2 < l 2D8	
 |d* 
+	+   ,HU33D-E$F 3s8        r>   rr   c                   $    e Zd ZddgiZdddedef fdZdej        dej        fd	Z		 	 ddej        dej        de
d
z  dej        d
z  dej        e
z  f
dZdej        dej        d
z  fdZdeeeej        f                  dee         fdZ xZS )GPTBigCodeForCausalLMr6   r!   rf   rs   r%   c                   t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        | j        j	        r| j        j
        | _        n9t          | j        j        | j        j        t          |d                    | _        t          |j                  | _        | j        j        | _        d S )Ntransformer)rs   r%   lm_headrf   )r-   r.   rx   ry   r$   r"   rr   r   r   tie_word_embeddingsr}   r   r   r|   r{   r   logits_processorr   )r9   rs   r%   r"   r$   r<   s        r=   r.   zGPTBigCodeForCausalLM.__init__  s    )3"/(*#L,O,O
 
 
 ;* 	+/DLL) + *#FI66  DL !00A B B< 	,,,r>   r   r@   c                 6    | j                             |          S r^   )r   r   r   s     r=   r   z%GPTBigCodeForCausalLM.embed_input_ids5  s    //	:::r>   N	positionsr   r   c                 6    |                      ||||          }|S r^   )r   )r9   r   r   r   r   r?   s         r=   rK   zGPTBigCodeForCausalLM.forward8  s+     ((y"6
 
 r>   r?   c                 <    |                      | j        |          }|S r^   )r   r   )r9   r?   logitss      r=   compute_logitsz$GPTBigCodeForCausalLM.compute_logitsD  s      &&t|]CCr>   r   c                 p    d }| j         j        rdg}t          | |          }|                    |          S )Nzlm_head.)skip_prefixes)r"   r   r   r   )r9   r   r   loaders       r=   r   z"GPTBigCodeForCausalLM.load_weightsK  sK    ;* 	)'LM"'
 
 
 ""7+++r>   )NN)rN   rO   rP   packed_modules_mappingr
   rQ   r.   rR   rS   r   r   rK   r   r   r   r   r   rT   rU   s   @r=   r   r     s[       &
3AC 
 
 
z 
3 
 
 
 
 
 
2; ;%, ; ; ; ; <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r>   r   )3__doc__collections.abcr   	itertoolsr   rR   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   Moduler    rW   ra   rr   r    r>   r=   <module>r      s  , K J $ $ $ $ $ $              ) ) ) ) ) ) * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <         
 H G G G G G F F F F F F        P O O O O O - - - - - - 0 0 0 0 0 0 0 0             C C C C C") C C CL    	   B$ $ $ $ $bi $ $ $N K K K K Kbi K K K\:, :, :, :, :,BI|Z :, :, :, :, :,r>   