
    .`iD                     R   d Z ddlZddlmZ ddlmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z-m.Z. ddl/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5  G d dej6                  Z7 G d dej6                  Z8 G d dej6                  Z9 G d dej6                  Z:e G d  d!ej6        e.                      Z; G d" d#ej6                  Z< G d$ d%e<e,e-e.          Z=dS )&z;Inference-only ChatGLM model compatible with THUDM weights.    N)Iterable)islice)nn)	LayerNorm)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors)ChatGLMConfig   )SupportsLoRA
SupportsPPSupportsQuant)AutoWeightsLoaderWeightsMapperis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )GLMAttentionN configcache_configquant_configprefixc           
      ,   t                                                       |j        | _        t                      }|j        | _        | j        |z  dk    sJ | j        |z  | _        |j        | _        |j        r|j        n|j        | _	        | j	        |k    r| j	        |z  dk    sJ n|| j	        z  dk    sJ t          d| j	        |z            | _        |j        | j        z  | _        | j        | j        z  | _        | j        | j        z  | _        | j        dz  | _        t!          | j        | j        | j        | j	        |j        p|j        || d          | _        t)          | j        | j        z  |j        |j        || d          | _        t-          |dd          }t-          |d	d
          }dd|z  dd}|j         }	t1          | j        |||	          | _        t5          | j        | j        | j        | j        ||| d          | _        d S )Nr   r   g      z.query_key_valuebiasr)   r*   z.dense
rope_ratiog      ?
seq_length    defaulti'  g      ?)	rope_type
rope_thetapartial_rotary_factor)max_positionrope_parametersis_neox_stylez.attn)num_kv_headsr(   r)   r*   )super__init__hidden_sizer   num_attention_headstotal_num_heads	num_headsmulti_query_attentionmulti_query_group_numtotal_num_kv_headsmaxr8   head_dimq_sizekv_sizescalingr   add_bias_linearadd_qkv_biasquery_key_valuer   densegetattroriginal_roper   
rotary_embr   attn)selfr'   r(   r)   r*   tp_sizer.   max_positionsr6   r7   	__class__s             v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/chatglm.pyr:   zGLMAttention.__init__1   ss    	!-688%9#g-2222-8%+%A" +,F((+ 	
 "g-- *W499999 T4499994#:g#EFF*d.BBnt}4(4=8}d*0M #'>6+>%... 
  
  
 ' 4=0'%$$$
 
 

 V\377
d;;"*,%(
 
 #00"M&+'	
 
 
 NML*%%###
 
 
			    hidden_statesposition_idsreturnc                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)rI   splitrD   rE   rM   rN   rJ   )
rO   rU   rV   qkv_qkvcontext_layerattn_outputs
             rS   forwardzGLMAttention.forward~   s    
 %%m44Q))T[$,E2)NN1a|Q221		!Q**M22QrT   NNr&   )__name__
__module____qualname__r   r	   r   strr:   torchTensorrc   __classcell__rR   s   @rS   r%   r%   0   s         ,026K
 K
K
 "D(K
 )4/	K

 K
 K
 K
 K
 K
 K
Z
|
 l
 
	
 
 
 
 
 
 
 
rT   r%   c                   B     e Zd ZdZ	 	 d	dededz  def fdZd Z xZ	S )
GLMMLPzMLP.

    MLP will take the input with h hidden state, project it to 4*h
    hidden dimension, perform nonlinear transformation, and project the
    state back into h hidden dimension.
    Nr&   r'   r)   r*   c                 :   t                                                       |j        | _        t	          |j        |j        gdz  |j        || d          | _        t                      | _	        t          |j        |j        |j        || d          | _        d S )N   z.dense_h_to_4hr,   z.dense_4h_to_h)r9   r:   rG   add_biasr   r;   ffn_hidden_sizedense_h_to_4hr   activation_funcr   dense_4h_to_h)rO   r'   r)   r*   rR   s       rS   r:   zGLMMLP.__init__   s     	. 8#$q('%,,,
 
 
  *|| /"'%,,,
 
 
rT   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rs   rt   ru   )rO   rU   intermediate_parallelr]   outputs        rS   rc   zGLMMLP.forward   sM    #'#5#5m#D#D q $ 4 45J K K&&'<==	rT   )Nr&   )
re   rf   rg   __doc__r   r   rh   r:   rc   rk   rl   s   @rS   rn   rn      s          37	
 

 )4/
 	
 
 
 
 
 
<      rT   rn   c            	       z     e Zd ZdZ	 	 	 ddededz  dedz  def fdZd	e	j
        d
e	j
        de	j
        fdZ xZS )GLMBlockzA single transformer layer.

    Transformer layer takes input with size [s, b, h] and returns an
    output of the same size.
    Nr&   r'   r(   r)   r*   c                    t                                                       |j        | _        |j        | _        |j        rt
          nt          } ||j        |j                  | _	        t          |||| d          | _        |j        | _         ||j        |j                  | _        t          ||| d          | _        d S )Nepsz.self_attentionr*   z.mlp)r9   r:   (apply_residual_connection_post_layernormfp32_residual_connectionrmsnormr   r   r;   layernorm_epsiloninput_layernormr%   self_attentionhidden_dropoutpost_attention_layernormrn   mlprO   r'   r(   r)   r*   layer_norm_funcrR   s         rS   r:   zGLMBlock.__init__   s     	; 	5 )/(G%%+^B''.F$< 
  
  

 +L,&7Q7Q7Q
 
 
 %3 )8F$<)
 )
 )
%
 &,&GGGrT   rU   rV   rW   c                     |                      |          }|                     ||          }| j        r|}n|}||z   }|                     |          }| j        r|}n|}|                     |          |z   }|S )NrU   rV   )r   r   r   r   r   )rO   rU   rV   layernorm_outputattention_outputresiduallayernorm_inputry   s           rS   rc   zGLMBlock.forward   s      //>>..*% / 
 
 8 	%'HH$H"%55  88II 8 	''HH&H*++h6rT   rd   )re   rf   rg   rz   r   r	   r   rh   r:   ri   rj   rc   rk   rl   s   @rS   r|   r|      s          ,026 H  H H "D( H )4/	 H
  H  H  H  H  H  HD!|! l! 
	! ! ! ! ! ! ! !rT   r|   c            	            e Zd ZdZ	 	 	 ddededz  dedz  def fdZd	e	j
        d
e	j
        de	j
        ez  fdZ xZS )GLMTransformerzTransformer class.Nr&   r'   r(   r)   r*   c                    t                                                       j        | _        j        | _        t	          | j        fd| d          \  | _        | _        | _        | j        r1j        rt          nt          } |j        j                  | _        t          dgj                  | _        d S )Nc                 *    t          |           S )Nr   )r|   )r*   r(   r'   r)   s    rS   <lambda>z)GLMTransformer.__init__.<locals>.<lambda>  s    8FL,vVVV rT   z.layersr   r~   rU   )r9   r:   post_layer_norm
num_layersr"   start_layer	end_layerlayersr   r   r   r;   r   final_layernormr!   make_empty_intermediate_tensorsr   s    ```  rS   r:   zGLMTransformer.__init__
  s     	%5 !+ 9DOVVVVVV%%%9
 9
 9
5$.$+  	)/FggYO#2?"(@$ $ $D  0Wv10
 0
,,,rT   rU   rV   rW   c                     t          | j        | j        | j                  D ]} |||          }t	                      j        st          d|i          S | j        r|                     |          }|S )Nr   rU   )	r   r   r   r   r   is_last_rankr   r   r   )rO   rU   rV   layers       rS   rc   zGLMTransformer.forward)  s    
 DK)94>JJ 	 	E!E+,  MM ~~* 	I&'GHHH  	@ 00??MrT   rd   )re   rf   rg   rz   r   r	   r   rh   r:   ri   rj   r   rc   rk   rl   s   @rS   r   r     s        
 ,026
 

 "D(
 )4/	

 
 
 
 
 
 
>| l 
+	+	       rT   r   c                        e Zd ZdddgiZdddedef fdZd	ej        d
ej        fdZ		 	 dd	ej        dej        de
dz  dej        dz  ded
ej        e
z  fdZdeeeej        f                  d
ee         fdZ xZS )ChatGLMModellinear_proj.merged_projlinear_proj.gate_projlinear_proj.dense_h_to_4hr&   r   vllm_configr*   c                   t                                                       |j        j        }|j        }|j        }|| _        t          |j        |j	        || d          | _
        |j        | _        |j        | _        |j        | _        t          |||| d          | _        t!          |j        |j	        || d          | _        | j        j        | _        d S )Nz
.embedding)r)   r*   z.encoderr   z.output_layer)r9   r:   model_config	hf_configr(   r)   r'   r   padded_vocab_sizer;   	embeddingr   r@   kv_channelsr   encoderr   output_layerr   )rO   r   r*   r'   r(   r)   rR   s         rS   r:   zChatGLMModel.__init__F  s   )3"/"//$%(((	
 
 
 !+%+%A"!-%L,&7J7J7J
 
 
 +$%+++	
 
 
 L8 	,,,rT   	input_idsrW   c                 ,    |                      |          S rw   )r   rO   r   s     rS   embed_input_idszChatGLMModel.embed_input_idsh  s    ~~i(((rT   N	positionsintermediate_tensorsinputs_embedskwargsc                     t                      j        r||}n"|                     |          }n|J |d         }|                     ||          }|S )NrU   r   )r   is_first_rankr   r   )rO   r   r   r   r   r   rU   s          rS   rc   zChatGLMModel.forwardk  st     >>' 	B( - $ 4 4Y ? ?'3330AM '" % 
 

 rT   weightsc                 2   ddg}t          |                                           }t                      }|D ]\  }}|D ]i\  }}}	||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }
|
j        } ||
||	            nZd|v rv|                    d          r||vrt          ||           r||         }
t          |
dt                    } ||
|           |	                    |           |S )N)r   r   r   )r   r   r   z.biaszrotary_pos_emb.inv_freqweight_loader)
dictnamed_parameterssetreplaceendswithr    r   rK   r   add)rO   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               rS   load_weightszChatGLMModel.load_weights  sr    DG"

 4002233"%%%#* 	$ 	$D-5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H===,44==)) d+.E.E*466 #D) '@U V Ve]333d####rT   NN)re   rf   rg   packed_modules_mappingr
   rh   r:   ri   rj   r   r   objectrc   r   tupler   r   rk   rl   s   @rS   r   r   =  sF        	"#'$
 BD  
  
  
z  
3  
  
  
  
  
  
D) )%, ) ) ) ) <@-1 < < 2D8	
 |d*  
+	+   2"HU33D-E$F "3s8 " " " " " " " "rT   r   c            	            e Zd Z eddi          Zdeddededee         dd	f fd
Z	de
j        de
j        fdZde
j        de
j        d	z  fdZdeeee
j        f                  fdZ xZS )ChatGLMBaseModelz.word_embeddingsr&   )orig_to_new_substr)r*   transformer_typer   r*   r   rW   Nc                   t                                                       |j        j        }|j        }|j        j        }|| _        || _        || _        t          |dd          | _         ||t          |d                    | _
        | j        j        r | j
        j        j        | j
        j        _        | j
        j        | _        t!          |j                  | _        | j
        j        | _        d S )Nmax_sequence_lengthr0   transformerr   r*   )r9   r:   r   r   r)   multimodal_configr'   rK   max_position_embeddingsr#   r   tie_word_embeddingsr   weightr   lm_headr   r   logits_processorr   )rO   r   r*   r   r'   r)   r   rR   s          rS   r:   zChatGLMBaseModel.__init__  s     	)3"/'4F!2('.v7Ld'S'S$++#L,O,O
 
 
 ;* 	U373C3M3TD)0'4 /0H I I< 	,,,rT   r   c                 6    | j                             |          S rw   )r   r   r   s     rS   r   z ChatGLMBaseModel.embed_input_ids  s    //	:::rT   rU   c                 <    |                      | j        |          }|S rw   )r   r   )rO   rU   logitss      rS   compute_logitszChatGLMBaseModel.compute_logits  s      &&t|]CCrT   r   c                 X    t          |           }|                    || j                  S )N)mapper)r   r   hf_to_vllm_mapper)rO   r   loaders      rS   r   zChatGLMBaseModel.load_weights  s+    "4((""743I"JJJrT   )re   rf   rg   r   r   r   r
   rh   typer:   ri   rj   r   r   r   r   r   rk   rl   s   @rS   r   r     s%       %.3   /;
 
 
  
 	

 |,
 

 
 
 
 
 
8; ;%, ; ; ; ;| 
	   KHU33D-E$F K K K K K K K KrT   r   c                        e Zd ZdgdgdZdddedef fdZ	 	 dd
ej        dej        de	d	z  dej        d	z  dej        e	z  f
dZ
 xZS )ChatGLMForCausalLMrI   rs   )rI   rs   r&   r   r   r*   c                    |j         j        }t          |d          r*ddgi}t          dt	          j        |           d          t                                          ||           d S )Nvision_configarchitecturesGLM4VForCausalLMzThe configuration of this model indicates that it supports vision inputs, but you instantiated the text-only version of this model. Please use the vision model by setting `--hf-overrides 'z'`r   )r   r   hasattrRuntimeErrorjsondumpsr9   r:   )rO   r   r*   r'   hf_overridesrR   s        rS   r:   zChatGLMForCausalLM.__init__  s    )36?++ 	+.@-ABLA %)J|$<$<A A A   	[@@@@@rT   Nr   r   r   r   rW   c                 6    |                      ||||          }|S rw   )r   )rO   r   r   r   r   rU   s         rS   rc   zChatGLMForCausalLM.forward  s+     ((y"6
 
 rT   r   )re   rf   rg   r   r
   rh   r:   ri   rj   r   rc   rk   rl   s   @rS   r   r     s        -.)* 
 BD A A Az A3 A A A A A A" <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
 
 
 
 
rT   r   )>rz   r   collections.abcr   	itertoolsr   ri   r   torch.nnr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   vllm.transformers_utils.configsr   
interfacesr   r   r   utilsr   r   r    r!   r"   r#   Moduler%   rn   r|   r   r   r   r    rT   rS   <module>r     s  
 B A  $ $ $ $ $ $                    * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < < 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - 9 9 9 9 9 9 ? ? ? ? ? ? ? ? ? ?               X X X X X29 X X Xv, , , , ,RY , , ,^J J J J Jry J J JZ3 3 3 3 3RY 3 3 3l h h h h h29m h h hV-K -K -K -K -Kry -K -K -K`    )<]     rT   