
    .`ie              
          d Z ddlZddlmZ ddlmZmZ ddlmZm	Z	 ddl
Z
ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5m6Z6m7Z7 ddl8m9Z9 ddl:m;Z;m<Z<m=Z=m>Z>m?Z? ddl@mAZA ddlBmCZC ddlDmEZEmFZF ddlGmHZHmIZImJZJ ddlKmLZLmMZMmNZNmOZOmPZP  G d  d!eE          ZQ G d" d#ejR                  ZS G d$ d%ejR                  ZT G d& d'ejR                  ZU G d( d)ejR                  ZV G d* d+ejR                  ZW G d, d-ejR                  ZX G d. d/ejR                  ZY G d0 d1eI          ZZ G d2 d3          Z[ G d4 d5e=          Z\ G d6 d7e;e\                   Z] G d8 d9e<e\                   Z^ e2j_        e^e\e]:           G d; d<eHeOeMePeN                      Z`dS )=z<Inference-only CogAgent model compatible with THUDM weights.    N)	Namespace)MappingSequence)	AnnotatedLiteral)nn)	LayerNorm)
transforms)InterpolationMode)BatchFeaturePreTrainedTokenizer
TensorType)
ImageInput)	TextInput)
VllmConfig)BaseDummyOptions)$get_tensor_model_parallel_world_size)
SiluAndMul
get_act_fn)MMEncoderAttention)Conv2dLayer)ColumnParallelLinearMergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)QuantizationConfig)MultiModelKeys)MULTIMODAL_REGISTRY)MultiModalDataDictMultiModalFeatureSpecMultiModalFieldConfigMultiModalKwargsItems)MultiModalDataItems)BaseDummyInputsBuilderBaseMultiModalProcessorBaseProcessingInfoPromptReplacementPromptUpdate)IntermediateTensors)ChatGLMConfig)TensorSchemaTensorShape   )ChatGLMBaseModelChatGLMModelGLMTransformer)MultiModalEmbeddingsSupportsLoRASupportsMRoPESupportsMultiModal
SupportsPPc                   j    e Zd ZU dZdZed         ed<   eej	         e
dddd          f         ed<   d	S )
GLMVImagePixelInputsz
    Dimensions:
        - b: Batch size
        - c: Number of channels (3)
        - h: Height of image
        - w: Width of image
    pixel_valuestypeb   hwdataN)__name__
__module____qualname____doc__r:   r   __annotations__r   torchTensorr-        t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/glm4v.pyr8   r8   B   s[           %3D'.
!222
EL++c1c3"?"??
@@@@@@rH   r8   c                   B     e Zd Z fdZdej        dej        fdZ xZS )EVA2CLIPPatchEmbeddingc                 J   t                                                       t          |j        |j        |j        |j                  | _        t          j        t          j
        d|j                            | _        t          j        |j        |j                  | _        d S )N)kernel_sizestrider.   )super__init__r   in_channelshidden_size
patch_sizeprojr   	ParameterrE   zeroscls_embedding	Embeddingnum_positionsposition_embedding)selfconfig	__class__s     rI   rP   zEVA2CLIPPatchEmbedding.__init__P   s    )$	
 
 
	  \%+a9K*L*LMM"$,v/CVEW"X"XrH   imagesreturnc                    |                     | j        j        j        | j        j        j                  }|                     |          }|                    d                              dd          }| j                            |j	        d         dd          }t          j        ||fd          }|| j        j                            d          z  }|S )
        Parameters:
        images : torch.Tensor
            Input image tensor with shape (B, C, H, W)

        Returns:
        torch.Tensor
            Transformed tensor with shape (B, L, D)
        )devicedtype   r.   r   dim)torT   weightrb   rc   flatten	transposerW   expandshaperE   catrZ   	unsqueeze)r[   r^   x	cls_tokens       rI   forwardzEVA2CLIPPatchEmbedding.forward[   s     $)"2"9AQAWXXIIfIIaLL""1a((&--agaj"bAA	Iy!n!,,,	T$+55a888rH   )r@   rA   rB   rP   rE   rF   rr   __classcell__r]   s   @rI   rK   rK   O   sc        	Y 	Y 	Y 	Y 	Yel u|        rH   rK   c                   X     e Zd Z	 	 d	dedz  def fdZdej        dej        fdZ xZ	S )
EVA2CLIPAttentionN quant_configprefixc                 0   t                                                       |j        | _        t                      | _        |j        | j        z  | _        |j        |j        z  | _        | j        dz  | _        t          |j        | j        |j        || d          | _
        t          |j        |j        || d          | _        t          | j        | j        | j                  | _        t          j                            |j                  | _        d S )Ng      z.query_key_valuerx   ry   z.dense)rO   rP   rR   r   tp_size	num_headsnum_heads_per_rankhead_dimscaler   query_key_valuer   denser   attnrE   r   Dropoutdropout_proboutput_dropoutr[   r\   rx   ry   r]   s       rI   rP   zEVA2CLIPAttention.__init__o   s    	!-;=="("2dl"B*f.>>]D(
0M%... 
  
  
 '%$$$	
 
 

 '#T]DJ
 
	 $h..v/BCCrH   rp   r_   c                     |                      |          \  }}|                    dd          \  }}}|                     |||          }|                     |          \  }}|                     |          }|S )Nr<   re   rf   )r   chunkr   r   r   )	r[   rp   qkv_qkvoutoutputs	            rI   rr   zEVA2CLIPAttention.forward   st    %%a((Q))A2)&&1aii1a  JJsOO	$$V,,rH   Nrw   
r@   rA   rB   r   strrP   rE   rF   rr   rs   rt   s   @rI   rv   rv   n   s         37	D D )4/D 	D D D D D D@ %,        rH   rv   c                   X     e Zd Z	 	 d	dedz  def fdZdej        dej        fdZ xZ	S )
EVA2CLIPMLPNrw   rx   ry   c                    t                                                       || _        t          |j                  | _        t          |j        |j        || d          | _	        t          |j        |j        || d          | _        d S )Nz.fc1r{   z.fc2)rO   rP   r\   r   
hidden_actactivation_fnr   rR   intermediate_sizefc1r   fc2r   s       rI   rP   zEVA2CLIPMLP.__init__   s     	'(9::'$%???	
 
 
 %$%???	
 
 
rH   rp   r_   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r   r   r   r[   rp   r   s      rI   rr   zEVA2CLIPMLP.forward   s@    xx{{1q!!xx{{1rH   r   r   rt   s   @rI   r   r      s         37	
 
 )4/
 	
 
 
 
 
 
, %,        rH   r   c                   :     e Zd Z	 	 ddedz  def fdZd Z xZS )EVA2CLIPTransformerLayerNrw   rx   ry   c                 0   t                                                       t          |j        |j                  | _        t          ||| d          | _        t          ||| d          | _	        t          |j        |j                  | _
        d S )N)epsz
.attentionr{   z.mlp)rO   rP   r	   rR   layer_norm_epsinput_layernormrv   	attentionr   mlppost_attention_layernormr   s       rI   rP   z!EVA2CLIPTransformerLayer.__init__   s     	();AVWWW*6K6K6K
 
 
 ooo
 
 
 )2F$9)
 )
 )
%%%rH   c                     |}|                      |                     |                    }||z   }|}|                     |                     |                    }||z   }|S r   )r   r   r   r   )r[   hidden_statesattention_inputattention_output	mlp_input
mlp_outputr   s          rI   rr   z EVA2CLIPTransformerLayer.forward   se    '//0O0OPP'*::!	22488I3F3FGG
Z'rH   r   r@   rA   rB   r   r   rP   rr   rs   rt   s   @rI   r   r      sr         37	
 
 )4/
 	
 
 
 
 
 
$      rH   r   c                   :     e Zd Z	 	 ddedz  def fdZd Z xZS )EVA2CLIPTransformerNrw   rx   ry   c                     t                                                       t          j        fdt	          j                  D                       | _        d S )Nc           	      >    g | ]}t           d |           S )z.layers.r{   )r   ).0	layer_idxr\   ry   rx   s     rI   
<listcomp>z0EVA2CLIPTransformer.__init__.<locals>.<listcomp>   sQ         )!-$99i99    rH   )rO   rP   r   
ModuleListrangenum_hidden_layerslayersr   s    ```rI   rP   zEVA2CLIPTransformer.__init__   sr     	m      "'v'?!@!@  	
 	
rH   c                 0    | j         D ]} ||          }|S r   )r   )r[   r   layer_modules      rI   rr   zEVA2CLIPTransformer.forward   s*     K 	8 	8L(L77MMrH   r   r   rt   s   @rI   r   r      sr         37	
 
 )4/
 	
 
 
 
 
 
$      rH   r   c                   :     e Zd Z	 	 ddedz  def fdZd Z xZS )EVA2CLIPGLUNrw   rx   ry   c                    t                                                       t          ||j        d|| d          | _        t          j        |j                  | _        t          j                    | _	        t                      | _        t          |j        |j        gdz  d|| d          | _        t          |j        |j        d|| d          | _        dS )a  
        The original implementation is the same as:
        ```python
        self.dense_h_to_4h = ColumnParallelLinear(
            config.hidden_size,
            config.ffn_hidden_size,
            bias=False,
            quant_config=quant_config,
        )

        self.gate_proj = ColumnParallelLinear(
            config.hidden_size,
            config.ffn_hidden_size,
            bias=False,
            quant_config=quant_config,
        )
        ```
        ```
        gate_proj_output, _ = self.gate_proj(x)
        dense_h_to_4h_output, _ = self.dense_h_to_4h(x)
        x = torch.cat([gate_proj_output, dense_h_to_4h_output], dim=-1)
        ```

        We merge two ColumnParallelLinear into one MergedColumnParallelLinear:
        ```
        self.merged_proj = MergedColumnParallelLinear(
            config.hidden_size,
            [config.ffn_hidden_size] * 2,
            bias=False,
            quant_config=quant_config,
        )
        ```
        ```
        x, _ = self.merged_proj(x)
        ```
        F.linear_proj)biasrx   ry   rd   z.merged_projz.dense_4h_to_hN)rO   rP   r   rR   linear_projr   r	   norm1GELUact1r   act2r   ffn_hidden_sizemerged_projr   dense_4h_to_h)r[   r\   in_featuresrx   ry   r]   s        rI   rP   zEVA2CLIPGLU.__init__   s    V 	+%***
 
 
 \&"455
GII	LL	5#$q(%***
 
 
 /"%,,,
 
 
rH   c                    |                      |          \  }}|                     |                     |                    }|                     |          \  }}|                     |          }|                     |          \  }}|S r   )r   r   r   r   r   r   r   s      rI   rr   zEVA2CLIPGLU.forward5  sv    ""1IIdjjmm$$""1IIaLL!!!$$1rH   r   r   rt   s   @rI   r   r      s|        
 37E
 E
 )4/	E

 E
 E
 E
 E
 E
 E
N      rH   r   c                   X     e Zd Z	 	 d	dedz  def fdZdej        dej        fdZ xZ	S )
EVA2CLIPModelNrw   rx   ry   c                 *   t                                                       t          di |j        }t	          |          | _        t          ||| d          | _        t          ||j	        || d          | _
        t          |j	        |j	        dd          | _        t          j        t          j        dd|j	                            | _        t          j        t          j        dd|j	                            | _        |j        | _        d S )	Nz.transformerr{   r   )r   rx   ry   rd   )rQ   out_channelsrM   rN   r.   rG   )rO   rP   r   vision_configrK   patch_embeddingr   transformerr   rR   r   r   convr   rU   rE   rV   boieoiscaling_factor)r[   r\   rx   ry   r   r]   s        rI   rP   zEVA2CLIPModel.__init__?  s    	!99F$8995mDD.=T=T=T
 
 
 '*%***	
 
 
  %1+	
 
 
	 <Aq&2D E EFF<Aq&2D E EFF+:rH   r^   r_   c                    |                      |          }|                     |          }|ddddf         }|j        \  }}}t          |dz            }|                    ||||                              dddd          }|                     |          }|                    d                              dd          }| 	                    |          }| j
                            |j        d         dd          }| j                            |j        d         dd          }t          j        |||fd          }|| j        z  }|S )	ra   Nr.   g      ?r   r<   rd   re   rf   )r   r   rm   intviewpermuter   rj   rk   r   r   rl   r   rE   rn   r   )	r[   r^   rp   r;   sr=   	grid_sizer   r   s	            rI   rr   zEVA2CLIPModel.forward[  s-      ((QaaaeH'1a3KK	FF1iA..66q!QBBIIaLLIIaLL""1a((Qhooagaj"b11hooagaj"b11IsAsm+++##rH   r   r   rt   s   @rI   r   r   >  s         37	; ; )4/; 	; ; ; ; ; ;8el u|        rH   r   c                   .     e Zd Zdddedef fdZ xZS )
GLM4VModelrw   ry   vllm_configry   c                    t                                          ||           |j        }t          | j        || d          | _        d S )N)r   ry   z.visionr   )rO   rP   rx   r   r\   vision)r[   r   ry   rx   r]   s       rI   rP   zGLM4VModel.__init__x  sT    [@@@"/#K.@.@.@
 
 
rH   )r@   rA   rB   r   r   rP   rs   rt   s   @rI   r   r   w  sX        AC 
 
 
z 
3 
 
 
 
 
 
 
 
 
 
rH   r   c            	            e Zd ZdZdededdf fdZ	 	 	 ddeee         z  dz  de	ee	         z  dz  d	e
ez  dz  defd
Z xZS )GLM4VProcessorz_
    This model doesn't define its own HF processor,
    so we implement our own one here.
    r\   	tokenizerr_   Nc                 D   t                                                       || _        || _        |j        }|d         }t          j        t          j        ||ft          j	                  t          j
                    t          j        dd          g          | _        d S )N
image_size)interpolation)g3<4'?gwgM?gy{ ?)gB91?gwt.?g	U?)meanstd)rO   rP   r\   r   r   r
   ComposeResizer   BICUBICToTensor	Normalizeimage_transform)r[   r\   r   r   r   r]   s        rI   rP   zGLM4VProcessor.__init__  s    
 	","<0
)1!,"3";   #%%$<<  
 
  
rH   textr^   return_tensorsc                 <    |g }t          |t                    s|g}|g }t          |t                    s|g}                     |          }t          |          dk    ri }n$ fd|D             }dt	          j        |          i}t          i |||          S )Nr   c                 :    g | ]}                     |          S rG   )r   )r   imager[   s     rI   r   z+GLM4VProcessor.__call__.<locals>.<listcomp>  s'    LLLED0077LLLrH   r9   )tensor_type)
isinstancelistr   lenrE   stackr   )r[   r   r^   r   text_inputsimage_inputsr9   s   `      rI   __call__zGLM4VProcessor.__call__  s     <D$%% 	6D>F&$'' 	XFnnT**v;;!LLLLLLVLLLL*EK,E,EFL '
 
 
 	
rH   )NNN)r@   rA   rB   rC   r+   r   rP   r   r   r   r   r   r   r   rs   rt   s   @rI   r   r     s         


 '
 
	
 
 
 
 
 
: 487;26	
 
$y/)D0
 T*--4
 j(4/	

 

 
 
 
 
 
 
 
rH   r   c                   ^    e Zd Zd ZdedefdZdeee	dz  f         fdZ
de	fdZde	fdZdS )	GLM4VProcessingInfoc                 @    | j                             t                    S r   )ctxget_hf_configr+   r[   s    rI   r  z!GLM4VProcessingInfo.get_hf_config  s    x%%m444rH   kwargsr_   c                      | j         j        t          f|                                 |                                 d|S )N)r\   r   )r   init_processorr   r  get_tokenizer)r[   r  s     rI   get_hf_processorz$GLM4VProcessingInfo.get_hf_processor  sP    &tx&
%%''((**
 
 	
 
 	
rH   Nc                 
    ddiS )Nr   r.   rG   r  s    rI   get_supported_mm_limitsz+GLM4VProcessingInfo.get_supported_mm_limits  s    |rH   c                 r    |                                  }|j        }|d         }|d         }||z  dz  }||z  S )Nr   rS   rd   )r  r   )r[   	hf_configr   r   rS   grid_lengths         rI   get_num_image_tokensz(GLM4VProcessingInfo.get_num_image_tokens  sI    &&((	!/"<0
"<0
 J.!3[((rH   c                 0    |                                  dz   S )Nrd   )r  r  s    rI   get_num_image_feature_tokensz0GLM4VProcessingInfo.get_num_image_feature_tokens  s    ((**Q..rH   )r@   rA   rB   r  objectr   r  r   r   r   r	  r  r  rG   rH   rI   r   r     s        5 5 5
 
N 
 
 
 
cDj)A    )c ) ) ) )/c / / / / / /rH   r   c            	       p    e Zd Zdeeef         defdZ	 ddedeeef         deeef         dz  defdZ	dS )	GLM4VDummyInputsBuilder	mm_countsr_   c                 <    |                     dd          }d}||z  S )Nr   r   /<|begin_of_image|><|endoftext|><|end_of_image|>)get)r[   r  
num_images	base_texts       rI   get_dummy_textz&GLM4VDummyInputsBuilder.get_dummy_text  s%    ]]7A..
E	:%%rH   Nseq_len
mm_optionsc                     | j                                         }|j        }|d         x}}|                    dd          }|r|                    d          nd }	d|                     ||||	          iS )Nr   r   r   )widthheightr  	overrides)infor  r   r  _get_dummy_images)
r[   r  r  r  r  r   target_widthtarget_heightr  image_overridess
             rI   get_dummy_mm_dataz)GLM4VDummyInputsBuilder.get_dummy_mm_data  s     I++--	!/'4\'BB}]]7A..
5?I*..111T T++"$%)	 ,  
 	
rH   r   )
r@   rA   rB   r   r   r   r  r   r    r%  rG   rH   rI   r  r    s        &S(9 &c & & & & =A	
 

 38$
 C!112T9	

 

 
 
 
 
 
rH   r  c            
           e Zd Zdededeeef         deeef         def
dZde	deeef         deee
f         fdZdedeeef         d	edee         fd
ZdS )GLM4VMultiModalProcessorprompt_textmm_itemshf_processor_mm_kwargstokenization_kwargsr_   c                     dS )NFrG   )r[   r(  r)  r*  r+  s        rI   _hf_processor_applies_updatesz6GLM4VMultiModalProcessor._hf_processor_applies_updates  s	     urH   	hf_inputsc                 F    t          t          j        d                    S )Nr   )r9   )dictr"   batched)r[   r.  r*  s      rI   _get_mm_fields_configz.GLM4VMultiModalProcessor._get_mm_fields_config
  s!    
 !6!>w!G!GHHHHrH   out_mm_kwargsc                       j                                         }|j        |j        |j        dt
          f fd}t          dg|          gS )Nitem_idxc                 V    j                                         }g|z  }g|z   gz   S r   )r   r  )r5  num_image_tokensimage_tokensboi_token_ideoi_token_idimage_token_idr[   s      rI   get_replacementzEGLM4VMultiModalProcessor._get_prompt_updates.<locals>.get_replacement  s9    #y==??*+.>>L >L0L>AArH   r   )modalitytargetreplacement)r   r  r9  pad_token_idr:  r   r(   )	r[   r)  r*  r3  r  r<  r9  r:  r;  s	   `     @@@rI   _get_prompt_updatesz,GLM4VMultiModalProcessor._get_prompt_updates  s     I++--	 -"/ -	Bc 	B 	B 	B 	B 	B 	B 	B 	B 	B  $nlC+  
 	
rH   N)r@   rA   rB   r   r$   r   r  boolr-  r   r"   r2  r#   r   r)   rA  rG   rH   rI   r'  r'     s         & !(V 4	
 %S&[1 
   II !(V 4I 
++	,	I I I I
%
 !(V 4
 -	

 
,	
 
 
 
 
 
rH   r'  )r   dummy_inputsc                       e Zd ZdgdgddgdZdefdZedededed	z  fd
            Z	de
ddededee
         dd	f fdZdeded	z  fdZdedej        fdZdee         dee         deej        ef         fdZej        ZdedefdZ	 	 ddej        dej        ded	z  dej        d	z  dedej        ez  fdZ xZS )GLM4VForCausalLMr   dense_h_to_4h	gate_proj)r   rF  r   r_   c                 0    t          j        ddd          S )z<
        Get the module prefix in multimodal models
        ztransformer.encoderztransformer.vision.linear_projztransformer.vision.transformer)language_model	connectortower_model)r   from_string_fieldr  s    rI   get_mm_mappingzGLM4VForCausalLM.get_mm_mapping:  s'     /068
 
 
 	
rH   r=  iNc                 N    |                     d          rdS t          d          )Nr   r  z Only image modality is supported)
startswith
ValueError)clsr=  rN  s      rI   get_placeholder_strz$GLM4VForCausalLM.get_placeholder_strD  s.    w'' 	EDD;<<<rH   rw   )ry   transformer_typer   ry   rT  c                    |                      |t          dt          i          5  t                                          |||           d d d            n# 1 swxY w Y   |  d S )Nr   )language_targetstower_targets)r   ry   rT  )_mark_composite_modelr1   r   rO   rP   )r[   r   ry   rT  r]   s       rI   rP   zGLM4VForCausalLM.__init__K  s     ''+"M2 ( 
 
 		 		
 GG'!1    		 		 		 		 		 		 		 		 		 		 		 		 		 		 		 	$$$s   %AAAr  c                     |                     dd           }|)| j        j        d         x}}t          d|||d          S d S )Nr9   r   )r=   r>   )r:   r?   resolve_bindings)popr\   r   r8   )r[   r  r9   
expected_h
expected_ws        rI   _parse_and_validate_image_inputz0GLM4VForCausalLM._parse_and_validate_image_input_  s`     zz.$77#&*k&?&MMJ'#!'1
!C!C    trH   image_inputc                     |d                              | j        j                  }| j                            |          S )Nr?   )rc   )rh   r\   rc   r   r   )r[   r_  r9   s      rI   _process_image_inputz%GLM4VForCausalLM._process_image_inputn  s9    "6*--DK4E-FF&&|444rH   input_tokensmm_featuresc           	      
   t          j        |ddh          }d |                    dg           D             }d |                    dg           D             }| j        }|j        }|j        }|j        }	|j        j        }
g }|s|r(g }d}|D ]h}||k    rd}n||	k    rd}||k    r|du r|	                    d           3||k    r|du r|	                    d           S|	                    d	           ig }t          j        t          |          d
           D ]K\  }}t          |          }|d         d         }|d         d         dz   }|	                    |||f           Ld}d}|D ]?\  }}}t          |          dk    r|d                                         dz   nd}|dk    r=||         \  }}}|||
z  ||
z  } }}t!          j        |                              dd                              d|| z                                            }!t!          j        |                              ddd                              |d|                                           }"t!          j        |                               ddd                              ||d                                          }#|	                    t!          j        |!|"|#g          |z              |dz  }}|dk    rb|g||         dd          R \  }}}|||
z  ||
z  } }}t-          |          D ]!}$t!          j        |$                              dd                              d|| z                                            }!t!          j        |                              ddd                              dd|                                           }"t!          j        |                               ddd                              d|d                                          }#|	                    t!          j        |!|"|#g          |z              #|dz  }|dz  }||z
  }%|	                    t!          j        |%                              dd                              dd          |z              d}An^t          |          }%|	                    t!          j        |%                              dd                              dd                     t!          j        |d                              dd          }&|&                                dz   t          |          z
                                  }'|&|'fS )Nimage_grid_thwvideo_grid_thwc                 6    g | ]}|                                 S rG   tolistr   items     rI   r   z>GLM4VForCausalLM.get_mrope_input_positions.<locals>.<listcomp>|       UUUD$++--UUUrH   c                 6    g | ]}|                                 S rG   rh  rj  s     rI   r   z>GLM4VForCausalLM.get_mrope_input_positions.<locals>.<listcomp>}  rl  rH   FTr   videor   c                     | d         S )Nr.   rG   )rp   s    rI   <lambda>z<GLM4VForCausalLM.get_mrope_input_positions.<locals>.<lambda>  s
    qt rH   r   re   r.   r<   rf   )r!   gather_kwargsr  r\   r;  video_start_token_idvideo_end_token_idr   spatial_merge_sizeappend	itertoolsgroupby	enumerater   r   maxrE   aranger   rl   rj   r   r   tensorrn   reshaperk  )(r[   rb  rc  r  re  rf  r  r;  rr  rs  rt  llm_pos_ids_listinput_token_typevideo_check_flgtokeninput_type_groupkey
group_iter
group_liststart_index	end_indexvideo_frame_nummm_data_idxmodality_type	start_idxend_idxst_idxtr=   r>   
llm_grid_t
llm_grid_h
llm_grid_wt_indexh_indexw_indext_idxtext_lenllm_positionsmrope_position_deltas(                                           rI   get_mrope_input_positionsz*GLM4VForCausalLM.get_mrope_input_positionss  s   
 '4/0
 
 VUFJJ?OQS4T4TUUUUUFJJ?OQS4T4TUUUK	"1(=&9&4G!# l	V^ l	V*,#O% 4 4000&*OO000&+O^++/U2J2J$++G4444~--Ot4K4K$++G4444$++F3333;=#,#4*++^^$ $ G GZ "*--
(mA.&rN1-1	 ''k9(EFFFFOK5E M( M(1y'69:J6K6Ka6O6O$R(,,..22UV  !G++,[9GAq!//// -7
J Z00b!J$;<< 	  Z00aQ
B
;; 	  Z00aB
J;; 	  %++Wgw$?@@6I    1$KK"g--''4QRR8 GAq!
 //// -7
J "'z!2!2  !L//!T"a[[#VB
Z(?@@$WYY	   "L44!T!R^^#VAr:66$WYY	   "L44!T!Q^^#VAz266$WYY	   )//!K'7(CDDvM     1$K#q(OO  '2H$++X..33Ar::AA!RHH6Q   '(OO[M(` <((H##EL$:$:$?$?2$F$F$M$MaQS$T$TUUU	"2:::BB1bII - 1 1 3 3a 7#l:K:K KQQSS222rH   c                 R     | j         di |}|g S |                     |          }|S )NrG   )r^  ra  )r[   r  r_  vision_embeddingss       rI   embed_multimodalz!GLM4VForCausalLM.embed_multimodal  s?    :d:DDVDDI 55kBB  rH   	input_ids	positionsintermediate_tensorsinputs_embedsc                 >    |d }|                      ||||          }|S r   )r   )r[   r  r  r  r  r  r   s          rI   rr   zGLM4VForCausalLM.forward  s6      + M((y"6
 
 rH   )NN)r@   rA   rB   packed_modules_mappingr   rM  classmethodr   r   rS  r   r   r:   rP   r  r8   r^  rE   rF   ra  r   r!   tupler  r5   embed_input_idsr2   r  r*   rr   rs   rt   s   @rI   rE  rE  ,  s0        ..)*#_5 
 
 
 
 
 =3 =3 =3: = = = [= -7% % %  % 	%
 z*% 
% % % % % %(		$   50D 5 5 5 5 5
C33iC3 /0C3 
u|S 	!	C3 C3 C3 C3J )8O! !4H ! ! ! ! <@-1 < < 2D8	
 |d*  
+	+       rH   rE  )arC   rv  argparser   collections.abcr   r   typingr   r   rE   r   torch.nnr	   torchvisionr
   torchvision.transformsr   transformersr   r   r   transformers.image_utilsr   $transformers.tokenization_utils_baser   vllm.configr   vllm.config.multimodalr   vllm.distributedr   %vllm.model_executor.layers.activationr   r   9vllm.model_executor.layers.attention.mm_encoder_attentionr   vllm.model_executor.layers.convr   !vllm.model_executor.layers.linearr   r   r   r   r   'vllm.model_executor.layers.quantizationr   )vllm.model_executor.models.module_mappingr   vllm.multimodalr   vllm.multimodal.inputsr    r!   r"   r#   vllm.multimodal.parser$   vllm.multimodal.processingr%   r&   r'   r(   r)   vllm.sequencer*   vllm.transformers_utils.configsr+   vllm.utils.tensor_schemar,   r-   chatglmr/   r0   r1   
interfacesr2   r3   r4   r5   r6   r8   ModulerK   rv   r   r   r   r   r   r   r   r   r  r'  register_processorrE  rG   rH   rI   <module>r     s   C B           - - - - - - - - % % % % % % % %              " " " " " " 4 4 4 4 4 4 F F F F F F F F F F / / / / / / : : : : : : " " " " " " 3 3 3 3 3 3 A A A A A A H H H H H H H H X X X X X X 7 7 7 7 7 7              G F F F F F D D D D D D / / / / / /            6 5 5 5 5 5              . - - - - - 9 9 9 9 9 9 > > > > > > > > C C C C C C C C C C             
A 
A 
A 
A 
A< 
A 
A 
A    RY   >( ( ( ( (	 ( ( (V    ")   <    ry   :    ")   2N N N N N") N N Nb6 6 6 6 6BI 6 6 6r
 
 
 
 
 
 
 
>
 >
 >
 >
 >
 >
 >
 >
B/ / / / /, / / /:
 
 
 
 
45HI 
 
 
@)
 )
 )
 )
 )
67JK )
 )
 )
X ('	(  
` ` ` ` `(,
M` ` 
` ` `rH   