
    .`iL)                        d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z' ddl%m(Z( ddl)m*Z*m+Z+m,Z,  G d dej-                  Z. G d dej-                  Z/de/iZ0 e
ddddd           G d d e(                      Z1 G d! d"ej-        e#e$          Z2dS )#zDInference-only GLM-4-0414 model compatible with HuggingFace weights.    )IterableN)nn)
Glm4Config)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)RMSNorm)QKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHead)IntermediateTensors)AttentionType   )SupportsLoRA
SupportsPP)LlamaMLP)
LlamaModel)AutoWeightsLoaderPPMissingLayermaybe_prefixc                        e Zd Zddddddej        fdedededed	ed
edz  dededz  de	dz  de
de
ddf fdZdej        dej        dej        fdZ xZS )Glm4Attentioni   NF confighidden_size	num_headsnum_kv_headsmax_positionhead_dimqkv_biascache_configquant_configprefix	attn_typereturnc           
      z   t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ |j                            dd           t          d| j        |z            | _
        |p	|| j        z  | _        | j        | j        z  | _        | j
        | j        z  | _        | j        dz  | _        t          || j        | j        | j        ||	|
 d          | _        t#          | j        | j        z  |d|	|
 d	          | _        t'          | j        ||j        d
          | _        t+          | j        | j        | j        | j
        ||	|
 d|          | _        d S )Nr   partial_rotary_factorg      ?r   g      z	.qkv_proj)biasr(   r)   Fz.o_proj)r$   rope_parametersis_neox_stylez.attn)r#   r'   r(   r)   r*   )super__init__r!   r   total_num_headsr"   total_num_kv_headsr/   
setdefaultmaxr#   r%   q_sizekv_sizescalingr   qkv_projr   o_projr   
rotary_embr   attn)selfr    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   tp_size	__class__s                s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/glm4.pyr2   zGlm4Attention.__init__4   s	    	&688(#g-2222-8"."g-- *W499999 T449999))*A3GGG4#:g#EFF GK43G$Gnt}4(4=8}d*)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M%"2	
 
 
 NML*%%###	
 	
 	
			    	positionshidden_statesc                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)r:   splitr7   r8   r<   r=   r;   )
r>   rC   rD   qkv_qkvattn_outputoutputs
             rA   forwardzGlm4Attention.forwardx   s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	rB   )__name__
__module____qualname__r   DECODERr   intboolr   r   strr2   torchTensorrP   __classcell__r@   s   @rA   r   r   3   s*        &#+/26&.B
 B
B
 B
 	B

 B
 B
 *B
 B
 "D(B
 )4/B
 B
 B
 
B
 B
 B
 B
 B
 B
H
<
 |
 
	
 
 
 
 
 
 
 
rB   r   c            
            e Zd Z	 	 ddedededz  ddf fdZdej        d	ej        d
ej        dz  de	ej        ej        f         fdZ
 xZS )Glm4DecoderLayerr   Nvllm_configr)   r    r+   c                    t                                                       |p|j        j        }|j        }|j        }|j        | _        t          || j        |j        |j	        |j
        t          |dd          t          |dd           ||| dt          j                  | _        t          | j        |j        |j        || d          | _        t'          |j        |j                  | _        t'          |j        |j                  | _        t'          |j        |j                  | _        t'          |j        |j                  | _        d S )	Nattention_biasFr%   z
.self_attn)r    r!   r"   r$   r#   r&   r%   r'   r(   r)   r*   z.mlp)r!   intermediate_size
hidden_actr(   r)   )eps)r1   r2   model_config	hf_configr'   r(   r!   r   num_attention_headsmax_position_embeddingsnum_key_value_headsgetattrr   rT   	self_attnGlm4MLPra   rb   mlpr   rms_norm_epsinput_layernormpost_attention_layernormpost_self_attn_layernormpost_mlp_layernorm)r>   r^   r)   r    r'   r(   r@   s         rA   r2   zGlm4DecoderLayer.__init__   sj    	=;3="/"/!-&(073V%5u==VZ66%%(((#+
 
 
 ($6(%???
 
 
  'v'9v?RSSS(/F$7)
 )
 )
% )0F$7)
 )
 )
% #*&*<&BU"V"V"VrB   rC   rD   residualc                 N   ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     |          }|                     ||          \  }}|                     |          }|                     |          }||fS )N)rC   rD   )rn   rj   rp   ro   rl   rq   )r>   rC   rD   rr   s       rA   rP   zGlm4DecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 

 55mDD #'"?"?x"X"Xx////>>h&&rB   )r   N)rQ   rR   rS   r	   rW   r   r2   rX   rY   tuplerP   rZ   r[   s   @rA   r]   r]      s         $(	)W )W)W )W T!	)W
 
)W )W )W )W )W )WV'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'rB   r]   	attentionrF   )	input_idsrC   intermediate_tensorsinputs_embeds)dynamic_arg_dimsc                   .     e Zd Zdddedef fdZ xZS )	Glm4Modelr   r)   r^   r)   c                Z    t                                          ||t                     d S )N)r^   r)   
layer_type)r1   r2   r]   )r>   r^   r)   r@   s      rA   r2   zGlm4Model.__init__   s6    #F?O 	 	
 	
 	
 	
 	
rB   )rQ   rR   rS   r	   rW   r2   rZ   r[   s   @rA   r{   r{      s\         BD 
 
 
z 
3 
 
 
 
 
 
 
 
 
 
rB   r{   c                   ,    e Zd Zg dddgdZdddedef fd	Zd
ej        dej        fdZ		 	 dd
ej        dej        de
dz  dej        dz  dej        e
z  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )Glm4ForCausalLM)q_projk_projv_proj	gate_projup_proj)r:   gate_up_projr   r|   r^   r)   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t                      j
        rJ|j        r| j        j        | _        nDt          |j        |j        |t          |d                    | _        nt#                      | _        t%          |j                  | _        | j        j        | _        d S )Nmodel)r^   r)   lm_head)r(   r)   )r1   r2   rd   re   r(   r    r{   r   r   r
   is_last_ranktie_word_embeddingsembed_tokensr   r   
vocab_sizer!   r   r   logits_processormake_empty_intermediate_tensors)r>   r^   r)   r    r(   r@   s        rA   r2   zGlm4ForCausalLM.__init__   s    )3"/(#L,I,I
 
 

 >>& 	,) #z6-%&!-'	::	      *++DL /0A B B J6 	,,,rB   rv   r+   c                 6    | j                             |          S N)r   embed_input_ids)r>   rv   s     rA   r   zGlm4ForCausalLM.embed_input_ids  s    z)))444rB   NrC   rw   rx   c                 6    |                      ||||          }|S r   )r   )r>   rv   rC   rw   rx   rD   s         rA   rP   zGlm4ForCausalLM.forward  s)     

y"6
 
 rB   rD   c                 <    |                      | j        |          }|S r   )r   r   )r>   rD   logitss      rA   compute_logitszGlm4ForCausalLM.compute_logits  s      &&t|]CCrB   weightsc                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   r    r   load_weights)r>   r   loaders      rA   r   zGlm4ForCausalLM.load_weights"  sC    "+/;+JTJ<<PT
 
 
 ""7+++rB   )NN)rQ   rR   rS   packed_modules_mappingr	   rW   r2   rX   rY   r   r   rP   r   r   rt   setr   rZ   r[   s   @rA   r   r      s{       
 
 
 

 
 BD 
 
 
z 
3 
 
 
 
 
 
>5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,rB   r   )3__doc__collections.abcr   rX   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   r   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   vllm.sequencer   vllm.v1.attention.backendr   
interfacesr   r   llamar   rk   r   utilsr   r   r   Moduler   r]   ALL_DECODER_LAYER_TYPESr{   r    rB   rA   <module>r      s  0 K J $ $ $ $ $ $        # # # # # # * * * * * * = = = = = = / / / / / / / / O O O O O O O O 8 8 8 8 8 8 R R R R R R R R G G G G G G F F F F F F @ @ @ @ @ @ N N N N N N - - - - - - 3 3 3 3 3 3 0 0 0 0 0 0 0 0 & & & & & &       B B B B B B B B B BO O O O OBI O O OdD' D' D' D' D'ry D' D' D'P ! 
  !	   
 
 
 
 

 
 
 
G, G, G, G, G,biz G, G, G, G, G,rB   