
    .`i"                     L   d dl Z d dlmZmZ d dlmZ d dlmZmZ d dl	Z	d dl	m
Z
 d dlmZmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9m:Z:m;Z; d dl<m=Z=m>Z>m?Z? d dl@mAZAmBZBmCZCmDZDmEZEmFZFmGZG d dlHmIZI d dlJmKZKmLZL ddlMmNZNmOZOmPZPmQZQmRZRmSZS dd lTmUZU dd!lVmWZWmXZXmYZY dd"lZm[Z[  G d# d$eK          Z\ G d% d&e
j]                  Z^ G d' d(e
j]                  Z_d) Z` G d* d+e
j]                  Za G d, d-e
j]                  Zb G d. d/e
j]                  Zc G d0 d1e
j]                  Zd G d2 d3e
j]                  Ze ed4d ie55           G d6 d7e
j]                              Zf G d8 d9eC          Zg G d: d;eBeg                   Zh G d< d=eAeg                   Zi e7jj        ehegei>           G d? d@e
j]        eReSeNePeQ                      ZkdS )A    N)IterableMapping)tee)	AnnotatedLiteral)nn)BatchFeatureLlama4ConfigLlama4VisionConfig)SizeDict)Llama4Processor)find_supported_resolutionsget_best_fit)support_torch_compile)
VllmConfigset_current_vllm_config)BaseDummyOptions)$get_tensor_model_parallel_world_size)set_forward_context)MMEncoderAttention)FusedMoE)ColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)QuantizationConfig)get_rope)initialize_model)default_weight_loader)MultiModelKeys)should_torch_compile_mm_vit)MULTIMODAL_REGISTRY)MultiModalDataDictMultiModalFieldConfigMultiModalKwargsItems)ImageProcessorItems	ImageSizeMultiModalDataItems)BaseDummyInputsBuilderBaseMultiModalProcessorBaseProcessingInfoInputProcessingContextPromptReplacementPromptUpdatePromptUpdateDetails)IntermediateTensors)TensorSchemaTensorShape   )MixtureOfExpertsMultiModalEmbeddingsSupportsEagle3SupportsLoRASupportsMultiModal
SupportsPP)Llama4ForCausalLM)AutoWeightsLoaderStageMissingLayermaybe_prefix)run_dp_sharded_vision_modelc                       e Zd ZU dZdZed         ed<   eej	         e
dddd          f         ed<   eej	         e
d          f         ed<   	 eej	         e
dd	          f         ed
<   dS )Llama4ImagePatchInputsz
    Dimensions:
        - batch_size: Batch size
        - total_num_chunks: Batch size * number of chunks
        - num_channels: Number of channels
        - image_size: Size of each image
    pixel_valuestypetotal_num_chunksnum_channels
image_size
batch_sizepatches_per_image   aspect_ratiosN)__name__
__module____qualname____doc__rB   r   __annotations__r   torchTensorr2        v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/mllama4.pyr@   r@   U   s           %3D'.
!222&lSS	U   
 !{{</H/H!HIIII U\;;|Q+G+GGHHHH rR   r@   c                   r     e Zd Z	 	 	 ddededededed	edz  d
edef fdZdej	        dej	        fdZ
 xZS )Llama4VisionMLPN F
input_sizeintermediate_sizeoutput_sizebiasoutput_activationquant_configprefixuse_data_parallelc	                     t                                                       t          ||||| d|          | _        t	          ||||| d|          | _        t          j                    | _        || _	        d S )Nz.fc1)rW   rY   rZ   r\   r]   
disable_tpz.fc2)
super__init__r   fc1r   fc2r   GELUactivation_fnr[   )
selfrW   rX   rY   rZ   r[   r\   r]   r^   	__class__s
            rS   rb   zLlama4VisionMLP.__init__v   s     	'!)%???(
 
 
 %(#%???(
 
 
  WYY!2rR   hidden_statesreturnc                     |                      |          \  }}|                     |          }|                     |          \  }}| j        r|                     |          S |S N)rc   rf   rd   r[   rg   ri   _s      rS   forwardzLlama4VisionMLP.forward   sf    88M22q**=9988M22q! 	5%%m444rR   NrV   F)rJ   rK   rL   intboolr   strrb   rO   rP   ro   __classcell__rh   s   @rS   rU   rU   u   s         37"'3 33 3 	3
 3  3 )4/3 3  3 3 3 3 3 3>U\ el        rR   rU   c                   :     e Zd Z	 	 ddedz  def fdZd Z xZS )Llama4MultiModalProjectorNrV   r\   r]   c           	          t                                                       t          |j        j        |j        j        d|d| d          | _        d S )NFTz	.linear_1)rW   rY   rZ   r\   gather_outputr]   )ra   rb   r   vision_configvision_output_dimtext_confighidden_sizelinear_1)rg   configr\   r]   rh   s       rS   rb   z"Llama4MultiModalProjector.__init__   s\     	,+=*6%'''
 
 
rR   c                 6    |                      |          \  }}|S rl   )r~   )rg   image_featuresri   rn   s       rS   ro   z!Llama4MultiModalProjector.forward   s    ==88qrR   )NrV   )rJ   rK   rL   r   rs   rb   ro   rt   ru   s   @rS   rw   rw      sr         37	
 
 )4/
 	
 
 
 
 
 
       rR   rw   c           
         | j         \  }}}t          t          j        |                    }|                     |||d          } |                                 \  }}}}|                     ||t          ||z            t          ||z                      }|                    dddd                                          }|                    |t          ||z            t          ||z            t          ||dz  z                      }|                    dddd                                          }|                    |d|j         d                   }	|	S )Nr   rH   r3      )shaperq   mathsqrtviewsizepermute
contiguous)
input_tensorshuffle_ratiorF   num_patcheschannels
patch_sizeheightwidthreshaped_tensoroutput_tensors
             rS   pixel_shuffler      sS   (4(:%JXTY{++,,J$$ZZLLL*6*;*;*=*='Jx"''FC 566H}<T8U8U O &--aAq99DDFFO%**F]"##EM!""Hq()**	 O &--aAq99DDFFO#((R9Nr9RSSMrR   c                   ^     e Zd Z	 	 	 ddedz  dedef fdZdej        d	ej        fd
Z	 xZ
S )Llama4VisionPixelShuffleMLPNrV   Fr\   r]   r^   c           
      &   t                                                       |j        | _        t          |j        | j        dz  z            | _        |j        | _        t          |j	        |j        |j        |j
        d|| d|          | _        d S )NrH   T.mlprW   rX   rY   rZ   r[   r\   r]   r^   )ra   rb   pixel_shuffle_ratiorq   projector_input_dim	inner_dimprojector_output_dim
output_dimrU   rX   multi_modal_projector_biasmlprg   r   r\   r]   r^   rh   s        rS   rb   z$Llama4VisionPixelShuffleMLP.__init__   s     	#)#= &4+CQ+FG
 
 !5"/$832"%???/	
 	
 	
rR   encoded_patchesrj   c                 V    t          || j                  }|                     |          S rl   )r   r   r   )rg   r   s     rS   ro   z#Llama4VisionPixelShuffleMLP.forward   s&    '9QRRxx(((rR   rp   )rJ   rK   rL   r   rs   rr   rb   rO   rP   ro   rt   ru   s   @rS   r   r      s         37"'
 
 )4/
 	

  
 
 
 
 
 
0)u| ) ) ) ) ) ) ) ) )rR   r   c            	       `     e Zd Z	 	 ddededz  dedef fdZd	ej	        d
ej	        fdZ
 xZS )Llama4VisionAttentionrV   Fr   r\   Nr]   r^   c           	      
   t                                                       || _        |rdnt                      | _        |j        | _        |j        | _        |j        | j        z  | _	        | j        | j        z  dk    sJ | j        | j        z  | _
        | j
        | j	        z  | _        | j
        | j	        z  | _        |j        | _        | j	        dz  | _        t          | j
        | j	        | j                  | _        |r`t#          | j        | j        d| j        z  z   d|| d          | _        t#          | j        | j	        z  | j        d|| d          | _        n[t)          | j        | j	        | j        d|| d          | _        t+          | j        | j	        z  | j        dd|| d	          | _        d
|j        d         dd}t/          | j	        |j        |j        z  dz  |dt4          j                  | _        d S )Nr3   r         rH   Tz	.qkv_proj)rZ   r\   r]   z.o_proj)rZ   input_is_parallelr\   r]   mllama4
rope_thetag      ?)	rope_typer   partial_rotary_factorF)	head_sizemax_positionrope_parametersis_neox_styledtype)ra   rb   r   r   tp_sizer}   	embed_dimnum_attention_heads	num_headshead_dimnum_local_headsq_sizekv_sizeattention_dropoutscalingr   attnr   qkv_projo_projr   r   r   r   rE   r   rO   	complex64
rotary_emb)rg   r   r\   r]   r^   r   rh   s         rS   rb   zLlama4VisionAttention.__init__   sB    	"NAA(L(N(N 	  +3*dn<~,1111#~=*T]:+dm;!'!9}d*& $-
 
	  	,a$,..) +++  DM +.) )))  DKK .) +++  DM ,."&) )))  DK # 0>%(
 
 #m +v/@@QF+/
 
 
rR   ri   rj   c                    |j         d d         }|                     |          \  }}|                    | j        | j        | j        gd          \  }}}|                    |j         d         |j         d         | j        | j                  }|                    |j         d         |j         d         | j        | j                  }|                     ||          \  }}|                    |j         d         |j         d         d          }|                    |j         d         |j         d         d          }| 	                    |||          } |j
        g |dR                                  }|                     |          \  }}|S )Nr   dimr   r3   )r   r   splitr   r   r   r   r   r   r   reshaper   r   )	rg   ri   input_shapeqkvrn   qkvattn_outputs	            rS   ro   zLlama4VisionAttention.forward6  sT    $)#2#.}--Q))T[$,E2)NN1aFF171:qwqz4+?OOFF171:qwqz4+?OOq!$$1FF171:qwqz2..FF171:qwqz2..ii1a(()k);;;;;;FFHH[11QrR   rV   FrJ   rK   rL   r   r   rs   rr   rb   rO   rP   ro   rt   ru   s   @rS   r   r      s        
 "'H
 H
"H
 )4/H
 	H

  H
 H
 H
 H
 H
 H
T| 
       rR   r   c            	       R     e Zd Z	 	 ddededz  dedef fdZd	ej	        fd
Z
 xZS )Llama4VisionEncoderLayerrV   Fr   r\   Nr]   r^   c           
         t                                                       |j        | _        |j        | _        |j        | _        t          ||| d|          | _        t          |j        |j        |j        dd|| d|          | _        t          j
        |j                  | _        t          j
        |j                  | _        d S )Nz
.self_attnr\   r]   r^   TFr   r   )ra   rb   r}   r   rX   r   	self_attnrU   r   r   	LayerNorminput_layernormpost_attention_layernormr   s        rS   rb   z!Llama4VisionEncoderLayer.__init__N  s     	!-#)#= !'!9.%(((/	
 
 
 #)$6*#%???/	
 	
 	
  "|F,>??(*V5G(H(H%%%rR   hidden_statec                     |}|                      |          }|                     |          }||z   }|}|                     |          }|                     |          }||z   }|f}|S rl   )r   r   r   r   )rg   r   residualoutputss       rS   ro   z Llama4VisionEncoderLayer.forwardn  sx    
  ++L99~~l33,.  44\BBxx--,./rR   r   r   ru   s   @rS   r   r   M  s        
 "'I I"I )4/I 	I
  I I I I I I@l       rR   r   c            	       `     e Zd Z	 	 ddededz  dedef fdZd	ej	        d
ej	        fdZ
 xZS )Llama4VisionEncoderrV   Fr   r\   Nr]   r^   c                     t                                                       | _        t          j        fdt          j                  D                       | _        d S )Nc           	      @    g | ]}t           d |           S )z.layers.r   )r   ).0	layer_idxr   r]   r\   r^   s     rS   
<listcomp>z0Llama4VisionEncoder.__init__.<locals>.<listcomp>  sT         )!-$99i99&7	    rR   )ra   rb   r   r   
ModuleListrangenum_hidden_layerslayersr   s    ````rS   rb   zLlama4VisionEncoder.__init__  s     	m       "'v'?!@!@  

 

rR   ri   rj   c                 @    | j         D ]} ||          }|d         }|S )aR  
        Args:
            hidden_states: Input tensor of shape
                (batch_size, sequence_length, hidden_size).
                Hidden states from the model embeddings, representing
                the input tokens.
                associated vectors than the model's internal embedding
                lookup matrix.
        r   )r   )rg   ri   encoder_layerlayer_outputss       rS   ro   zLlama4VisionEncoder.forward  s6     "[ 	- 	-M)M-88M)!,MMrR   r   r   ru   s   @rS   r   r     s        
 "'
 
"
 )4/
 	

  
 
 
 
 
 
*| 
       rR   r   c            	       b     e Zd Z	 	 	 ddededz  dedef fdZd	ej	        d
ej	        fdZ
 xZS )Llama4UnfoldConvolutionNrV   Fr   r\   r]   r^   c           	      R   t                                                       |j        }t          |t                    r||f}t
          j                            ||j                  | _        t          |j
        |d         z  |d         z  |j        dd|| d|          | _        d S )N)kernel_sizestrider   r3   FTz.linear)rW   rY   rZ   ry   r\   r]   r`   )ra   rb   r   
isinstancerq   rO   r   Unfoldunfoldr   rD   r}   linear)rg   r   r\   r]   r^   r   rh   s         rS   rb   z Llama4UnfoldConvolution.__init__  s     	'k3'' 	5&4Khoo+fFWoXX**[^;k!nL*%%%%(
 
 
rR   ri   rj   c                     |                      |          }|                    ddd          }|                     |          \  }}|S )Nr   rH   r3   )r   r   r   rm   s      rS   ro   zLlama4UnfoldConvolution.forward  sF    M22%--aA66;;}55qrR   rp   r   ru   s   @rS   r   r     s         37"'
 
"
 )4/
 	

  
 
 
 
 
 
,U\ el        rR   r   images_flattened)dynamic_arg_dims	enable_ifc            	       b     e Zd Z	 	 	 ddededz  dedef fdZd	ej	        d
ej	        fdZ
 xZS )Llama4VisionModelNrV   Fr   r\   r]   r^   c                    t                                                       || _        |j        | _        |j        | _        |j        | _        |j        | _        | j        | j        z  dz  dz   | _        |j        dz  | _        t          ||| d|          | _
        t          j        | j        t          j        | j                  z            | _        t          j        | j        t          j        | j        | j                  z            | _        t          j        | j        d          | _        t          j        | j        d          | _        t)          ||| d|          | _        t-          ||| d	|
          | _        d S )NrH   r3   r   z.patch_embeddingr   gh㈵>)epsz.modelz.vision_adapter)r]   r^   )ra   rb   r   rE   r   r}   rD   r   scaler   patch_embeddingr   	ParameterrO   randnclass_embeddingpositional_embedding_vlmr   layernorm_prelayernorm_postr   modelr   vision_adapterr   s        rS   rb   zLlama4VisionModel.__init__  s    	 + +!-"/ Ot>1DqH'-
6%.../	 
  
  
  "|DJTEU9V9V,VWW(*JT%5t7GHHH)
 )
%
  \$*:EEE l4+;FFF )%$$$/	
 
 

 :---/	
 
 
rR   r   rj   c                 b   |                      |          }|j        \  }}}| j                            |j        d         d|j        d                   }t	          j        ||gd          }|dz  }|                    |d||          }| j                            |j	        |j
                  }||z   }|                     |          }|                    |d|          }|                     |          }|                     |          }|d d d dd d f         }|                     |          }|S )Nr   r3   r   r   )r   device)r   r   r   expandrO   catr   r   tor   r  r   r   r  r  r  )rg   r   r   	num_tilesr   
hidden_dimr   positional_embeddings           rS   ro   zLlama4VisionModel.forward  sb   
 ++,<==-9-?*	;
 .55q!1l&8&<
 
 y,!@aHHHq $++	
 
  $<??$\-@  @  
  
 $&::)),77#((B
CC zz,//**<88 $AAAssAAAI. **<88rR   rp   r   ru   s   @rS   r   r     s         37"'.
 .
".
 )4/.
 	.

  .
 .
 .
 .
 .
 .
`',' 
' ' ' ' ' ' ' 'rR   r   c                        e Zd Zdeddf fdZdefdZdedefdZ	de
eedz  f         fdZed	edefd
            ZdefdZdefdZ xZS )Mllama4ProcessingInfoctxrj   Nc                 J    t                                          |           d S rl   )ra   rb   )rg   r  rh   s     rS   rb   zMllama4ProcessingInfo.__init__*  s!    rR   c                 @    | j                             t                    S rl   )r  get_hf_configr
   rg   s    rS   r  z#Mllama4ProcessingInfo.get_hf_config-  s    x%%l333rR   kwargsc                 ^     | j         j        t          fd|                    dd          i|S )Nuse_fastT)r  get_hf_processorr   pop)rg   r  s     rS   r  z&Mllama4ProcessingInfo.get_hf_processor0  sA    (tx(
 
&,jjT&B&B
FL
 
 	
rR   c                 
    dd iS )NimagerQ   r  s    rS   get_supported_mm_limitsz-Mllama4ProcessingInfo.get_supported_mm_limits5  s     rR   rz   c                     | j         }| j        }||z  dk    sJ d| d            d|  t          t          d| j        dz  z                      }||z  dz  |z  S )Nr   zchunk size z should be multiple of zpatch_size g      ?rH   )rE   r   rq   roundr   )rz   rE   r   ds_ratios       rS   get_patch_per_chunkz)Mllama4ProcessingInfo.get_patch_per_chunk:  s    "-
"-
J&!+++=*=== ,++ 	#j"""uSM$Eq$HIJJKKj(Q.(::rR   c                 B    |                                  j        }|j        S rl   )r  image_processormax_patches)rg   r   s     rS   get_max_num_tilesz'Mllama4ProcessingInfo.get_max_num_tilesG  s    //11A**rR   c                     |                                  j        }|j        }t          |                                 |z  |          S )Nr   r   )r  rz   rE   r'   r"  )rg   rz   rE   s      rS   !get_image_size_with_most_featuresz7Mllama4ProcessingInfo.get_image_size_with_most_featuresK  sB    **,,:"-
 6 6 8 8: EZXXXXrR   )rJ   rK   rL   r,   rb   r
   r  objectr   r  r   rs   rq   r  staticmethodr   r  r"  r'   r%  rt   ru   s   @rS   r  r  )  s       2 t      4| 4 4 4 4
 
O 
 
 
 

cDj)A    
 
;+= 
;# 
; 
; 
; \
;+3 + + + +Y9 Y Y Y Y Y Y Y YrR   r  c            
            e Zd Zdedeeef         deeef         deeef         def
 fdZdedeeef         deeef         fd	Z	d
e
deeef         dedee         fdZ xZS )Mllama4MultiModalProcessorpromptmm_data	mm_kwargs
tok_kwargsrj   c                    | j                                         }| ||d          S t                                          ||||          } | j         j        di |}|j        | j                                         j        }|                    d          d|v s
J d            |d         }	| 	                                
                    d|	i                              dt                    }
|j        t          | j                                         t!                    	          fd
|
D             }fd|D             }d |D             }t#          j        |          |d<   t#          j        |          |d<   |S )NF)add_special_tokens)r*  r+  r,  r-  rA   imagesz=images expected to be in mm_data when pixel_values is presentr  r$  )max_num_chunksr   c                     g | ]C}t          |j        d          |j        d         ft          j                  j                  DS )r3   r   )resize_to_max_canvas)r   r   rO   tensorr3  )r   r  r   possible_resolutionss     rS   r   zAMllama4MultiModalProcessor._call_hf_processor.<locals>.<listcomp>z  sb         Z]EJqM2L!566)8)M    rR   c                 <    g | ]}|d          z  |d         z  fS r   r3   rQ   )r   rE   	tile_sizes     rS   r   zAMllama4MultiModalProcessor._call_hf_processor.<locals>.<listcomp>  s@        A)+Z]i-GH  rR   c                 :    g | ]\  }}||z  d k    rd nd ||z  z   S )r3   rQ   )r   r_hr_ws      rS   r   zAMllama4MultiModalProcessor._call_hf_processor.<locals>.<listcomp>  sA     ! ! !;ECS3Y!^^S3Y! ! !rR   rI   rG   rQ   )infoget_tokenizerra   _call_hf_processorr  r   r  rz   get_get_data_parserparse_mm_data	get_itemsr&   rE   r   r"  r   rO   r4  )rg   r*  r+  r,  r-  	tokenizerprocessed_outputs	processorrz   r0  parsed_imagesbest_fit_sizesrI   rG   r   r5  r8  rh   s                 @@@rS   r>  z-Mllama4MultiModalProcessor._call_hf_processorS  s    I++--	?9V>>>>!GG66!	 7 
 
 /DI.;;;;	#3	//11?  00<w&&&O '&& X&F%%''0117$788  &0I#=#y::<<#9IFFF$ $ $      +  N   "0  M! !IV! ! ! 27m1L1Lo.5:\BS5T5T12  rR   	hf_inputshf_processor_mm_kwargsc                     |                     dt          j        d                    }t          t	          j        d|          t	          j        d          t	          j        d                    S )NrG   r   r  )rA   rG   rI   )r?  rO   emptydictr$   flat_from_sizesbatched)rg   rH  rI  rG   s       rS   _get_mm_fields_configz0Mllama4MultiModalProcessor._get_mm_fields_config  sl    
 &MM*=u{1~~NN.>*  4;GDD/7@@
 
 
 	
rR   mm_itemsout_mm_kwargsc                   	
 | j                                         }|j        }| j                             |          
 | j         j        di |j        }j        	dt          f	
fd}t          d||          gS )Nitem_idxc                     d         |          }|d         j         }                    |          }t          j        |          S )Nr  rI   )aspect_rationum_patches_per_chunk)data_prompt_split_imager/   select_text)rS  out_itemrU  replhf_processorimg_patch_tokenrV  rQ  s       rS   get_replacementzGMllama4MultiModalProcessor._get_prompt_updates.<locals>.get_replacement  sT    $W-h7H#O49L33)&; 4  D
 '24IIIrR   r  )modalitytargetreplacementrQ   )	r<  r  rz   r  r  image_tokenr]  rq   r-   )rg   rP  rI  rQ  r   rz   rb  r^  r\  r]  rV  s      `    @@@rS   _get_prompt_updatesz.Mllama4MultiModalProcessor._get_prompt_updates  s     ((**, $	 = =m L L1ty1KK4JKK".&6		Jc 		J 		J 		J 		J 		J 		J 		J 		J 		J  "+  
 	
rR   )rJ   rK   rL   rs   r   r&  r	   r>  r$   rO  r(   r%   listr.   rc  rt   ru   s   @rS   r)  r)  R  s       ;!;! f%;! 3;'	;!
 CK(;! 
;! ;! ;! ;! ;! ;!z

 !(V 4
 
++	,	
 
 
 

%
 !(V 4
 -	

 
l	
 
 
 
 
 
 
 
rR   r)  c            	       p    e Zd Zdeeef         defdZ	 ddedeeef         deeef         dz  defdZ	dS )	Mllama4DummyInputsBuilder	mm_countsrj   c                 x    |                     dd          }| j                                        }|j        }||z  S )Nr  r   )r?  r<  r  fake_image_token)rg   rg  
num_imagesrE  rb  s        rS   get_dummy_textz(Mllama4DummyInputsBuilder.get_dummy_text  s;    ]]7A..
I..00	0Z''rR   Nseq_len
mm_optionsc                     |                     dd          }| j                                        \  }}|r|                     d          nd }d|                     ||||          iS )Nr  r   )r   r   rj  	overrides)r?  r<  r%  _get_dummy_images)rg   rl  rg  rm  rj  target_widthtarget_heightimage_overridess           rS   get_dummy_mm_dataz+Mllama4DummyInputsBuilder.get_dummy_mm_data  s|     ]]7A..
(,	(S(S(U(U%}5?I*..111T T++"$%)	 ,  
 	
rR   rl   )
rJ   rK   rL   r   rs   rq   rk  r   r#   rt  rQ   rR   rS   rf  rf    s        (S(9 (c ( ( ( ( =A	
 

 38$
 C!112T9	

 

 
 
 
 
 
rR   rf  )r<  dummy_inputsc                   |    e Zd Zg dddgdZdZedededed	z  fd
            Zddde	def fdZ
deedf         dd	fdZdeedf         fdZdej        dej        dej        fdZdedefdZdeded	z  fdZdedefdZdefdZ	 	 d5d ej        d!ej        d"ed	z  d#ej        d	z  dedej        ez  fd$Zd%ej        dej        d	z  fd&Zd'eeeej        f                  dedeeeeej        f                  eeeej        f                  f         fd(Zd'eeeej        f                  deeeej        f                  fd)Zd*edefd+Zd'eeeej        f                  deeeeej        f                  eeeej        f                  f         fd,Z d'eeeej        f                  d-e!deeeeej        f                  e"e         f         fd.Z#d/eeeej        f                  d-e!d0ede"e         fd1Z$deeeeeef                  fd2Z%d'eeeej        f                  de"e         fd3Z&de'fd4Z( xZ)S )6Llama4ForConditionalGeneration)q_projk_projv_proj	gate_projup_proj)r   gate_up_projTr_  irj   Nc                 N    |                     d          rdS t          d          )Nr  z	<|image|>z Only image modality is supported)
startswith
ValueError)clsr_  r~  s      rS   get_placeholder_strz2Llama4ForConditionalGeneration.get_placeholder_str  s,    w'' 	;;<<<rR   rV   )r]   vllm_configr]   c                   t                                                       |j        j        }|j        }|j        j        }|j        dk    | _        || _        || _	        || _        || _        | 
                    |d          5  ddlm} t          |          5   |dd          5  t          |j        d t!          |d          | j        	          | _        d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t%          | j	        d t!          |d
                    | _        d d d            n# 1 swxY w Y   |                     |          5  t+          |                    |j        dg          t!          |d          t0                    | _        d d d            n# 1 swxY w Y   | j        j        | _        d| _        | j        j        | _        | j        j        | _        | j        j        | _        | j        j        | _        | j        j         | _         | j        j!        | _!        | j        j"        | _"        tG          | j"                  | _$        d S )NrW  r  r   )set_model_tagr   T)
is_encodervision_model)r   r\   r]   r^   multi_modal_projector)r   r\   r]   LlamaForCausalLMlanguage_model)r  r]   model_classr3   )%ra   rb   model_config	hf_configr\   multimodal_configmm_encoder_tp_moder^   r  r   _mark_tower_modelvllm.compilation.backendsr  r   r   rz   r=   r  rw   r  _mark_language_modelr   with_hf_configr|   r:   r  make_empty_intermediate_tensorsnum_expert_groupsnum_logical_expertsnum_physical_expertsnum_local_physical_expertsnum_routed_expertsnum_shared_expertsnum_redundant_experts
moe_layerslennum_moe_layers)rg   r  r]   r   r\   r  r  rh   s          rS   rb   z'Llama4ForConditionalGeneration.__init__  sc   )3"/'4F!2!E!O&(!2##K99 	 	?????? (44	 	1dCCC	 	 %6!/!%'??&*&<	% % %!		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 *C{!#F,CDD* * *D&	 	 	 	 	 	 	 	 	 	 	 	 	 	 	( &&{33 	 	"2'66&);(<  $F,<==-# # #D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ? 	,
 "##'#6#J $($7$L!*.*=*X'"&"5"H"&"5"H%)%8%N"-8!$/22sm   D<C;'1C$C;$C((C;+C(,C;/D<;C?	?D<C?	-D<<E E AF--F14F1r   .c                 h    t          | j        d          sJ | j                            |           dS )zBSet which layers should output auxiliary hidden states for EAGLE3.set_aux_hidden_state_layersN)hasattrr  r  )rg   r   s     rS   r  z:Llama4ForConditionalGeneration.set_aux_hidden_state_layers2  s;     t*,IJJJJJ77?????rR   c                 b    t          | j        d          sJ | j                                        S )zGet the layer indices for auxiliary hidden state outputs.

        Note: The GPU model runner will override this with layers from
        the speculative config if available, providing dynamic configuration.
        "get_eagle3_aux_hidden_state_layers)r  r  r  r  s    rS   r  zALlama4ForConditionalGeneration.get_eagle3_aux_hidden_state_layers8  s3     t*,PQQQQQ"EEGGGrR   expert_load_viewlogical_to_physical_maplogical_replica_countc                 `    | j                             |||           | j         j        | _        d S rl   )r  set_eplb_stateexpert_weights)rg   r  r  r  s       rS   r  z-Llama4ForConditionalGeneration.set_eplb_stateB  s>     	**57L	
 	
 	
 #1@rR   r  r  c                 <    | j                             ||           d S rl   )r   update_physical_experts_metadata)rg   r  r  s      rS   r  z?Llama4ForConditionalGeneration.update_physical_experts_metadataM  s/     	<< "<	
 	
 	
 	
 	
rR   r  c                     |                     dd           }|d S |                     d          }|                     d          }t          d|||          S )NrA   rG   rI   )rB   rA   rG   rI   )r  r@   )rg   r  rA   rG   rI   s        rS   _parse_and_validate_image_inputz>Llama4ForConditionalGeneration._parse_and_validate_image_inputT  si     zz.$774"JJ':;;

?33%%/'	
 
 
 	
rR   image_inputc                 6   | j         r| j        sJ |d         }|d                                         }| j        rt	          || j                   }n|                      |          }|                     |          }d |                    |d          D             S )NrA   rG   c                 :    g | ]}|                     d d          S r7  )flatten)r   imgs     rS   r   zGLlama4ForConditionalGeneration._process_image_input.<locals>.<listcomp>w  s6     
 
 
 KK1
 
 
rR   r   r   )r  r  tolistr^   r>   r   )rg   r  rA   rG   vision_embeddings_flats        rS   _process_image_inputz3Llama4ForConditionalGeneration._process_image_inputf  s      ?T%????">2'(;<CCEE ! 	E%@d/& &"" &*%6%6|%D%D"!%!;!;<R!S!S
 
-334E13MM
 
 
 	
rR   c                      | j         di |}|g S t          d | j                  5  |                     |          cd d d            S # 1 swxY w Y   d S )NrQ   )r  r   r  r  )rg   r  r  s      rS   embed_multimodalz/Llama4ForConditionalGeneration.embed_multimodal|  s    :d:DDVDDI  d&677	: 	: ,,[99	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	:s   A		AA	input_ids	positionsintermediate_tensorsinputs_embedsc                 :    |d }|                      ||||          S rl   )r  )rg   r  r  r  r  r  s         rS   ro   z&Llama4ForConditionalGeneration.forward  s1      + M""y"6
 
 	
rR   ri   c                 6    | j                             |          S rl   )r  compute_logits)rg   ri   s     rS   r  z-Llama4ForConditionalGeneration.compute_logits  s     "11-@@@rR   weightsc                    t          |d          \  dt          t          t          t          j        f                  ffd}dt          t          t          t          j        f                  ffd} |             |            fS )NrH   rj   c               3   R   K   D ] \  } }|                                r| |fV  !d S rl   r  )namerW  r]   weights1s     rS   get_prefix_weightszKLlama4ForConditionalGeneration.separate_weights.<locals>.get_prefix_weights  sI      & ' '
d??6** ',&&&' 'rR   c               3   R   K   D ] \  } }|                                s| |fV  !d S rl   r  )r  rW  r]   weights2s     rS   get_other_weightszJLlama4ForConditionalGeneration.separate_weights.<locals>.get_other_weights  sI      & ' '
dv.. ',&&&' 'rR   )r   r   tuplers   rO   rP   )rg   r  r]   r  r  r  r  s     `  @@rS   separate_weightsz/Llama4ForConditionalGeneration.separate_weights  s    
 !!__(	'HU33D-E$F 	' 	' 	' 	' 	' 	' 	'
	'8E#u|2C,D#E 	' 	' 	' 	' 	' 	' 	'
 "!##%6%6%8%888rR   c              #   >  K   dddd}i }|D ]Y\  }}|                                 D ]9\  }}||vr
|                    |d          }||vr	d gdz  ||<   |||         |<    n||fV  Z|                                 D ]!\  }	}
t          j        |
d          }|	|fV  "d S )Nr   r3   rH   ).self_attn.q_proj.self_attn.k_proj.self_attn.v_proj.self_attn.qkv_projr   r   )itemsreplacerO   r  )rg   r  qkv_idx_mappingsqkv_weightsr  loaded_weightweight_nameidxnew_namekeyweight
qkv_weights               rS   _consolidate_qkv_weightsz7Llama4ForConditionalGeneration._consolidate_qkv_weights  s      "#!"!"
 

 #* 
	* 
	*D-$4$:$:$<$< 	* 	* Sd**<<5JKK;..-1FQJK)-:H%c*M))))&,,.. 	" 	"KC6q111Jz/!!!!	" 	"rR   r  c                    |                     d          s|                     d          r|                     d          r|                    ddd          n|}d|v rrd|v sd|v rjd|v r|                    dd          S d	|v r|                    d	d
          S d|v r|                    dd          S d|v r|                    dd          S |S d|v r>d|v sd|v r6d|v r|                    dd          S d|v r|                    dd          S |S |S |                     d          r|                    dd          S |S )zKRename weights from ModelOpt llama4 fp8 checkpoints to vLLM
        format.zmodel.zlanguage_model.model.r3   feed_forward.experts._input_scale_weight_scaledown_proj_input_scalew2_input_scaledown_proj_weight_scalew2_weight_scalegate_up_proj_input_scalew13_input_scalegate_up_proj_weight_scalew13_weight_scalez
self_attn.z.k_scalez.v_scalez.k_proj.k_scalez.attn.k_scalez.v_proj.v_scalez.attn.v_scalezlm_head.weightzlanguage_model.lm_head.weight)r  r  )rg   r  renameds      rS   &_rename_weight_for_modelopt_checkpointzELlama4ForConditionalGeneration._rename_weight_for_modelopt_checkpoint  s    ??8$$ %	S8O(P(P %	S ??8,,X'>BBB  '$..$&&/T*A*A +g55"??+BDTUUU-88"??+CEVWWW/7::"??24E   1G;;"??35G    %%:+=+=tASAS$//"??+<oNNN&'11"??+<oNNN N__-.. 	S<< 02QRRRrR   c                 T   g }g }|D ]\  }}|                      |          }|                    dd          d         }t          t          | |          t                    rZ|                    d          r|                    ||f           |                    ||f           ||fS )zORename weights and separate them into language_model and other
        weights..r3   r   zlanguage_model.)r  r   r   getattrr<   r  append)rg   r  language_model_weightsother_weightsr  r  r  attrs           rS   _separate_and_rename_weightsz;Llama4ForConditionalGeneration._separate_and_rename_weights  s    
 "$# 
	8 
	8LD&AA$GGG==a((+D'$--/@AA !!"344 8&--w.?@@@@$$gv%67777%}44rR   params_dictc                    g }g }t                      }|D ]\  }}d|v rd|v rd|vr||v r||         }t          |d          rw|j                                        dk    rZ|                                dk    rB|j                            |                                           |                    |           |                    ||f           |                    ||f           |||fS )zHandle expert scale parameters that need broadcasting.

        ModelOpt checkpoints use a single value tensor scalar for BMM style
        experts, vLLM expects the scale to be broadcasted across all experts.
        r  r   z.shared_expertrW  r3   )setr  rW  numelfill_itemaddr  )	rg   r  r  regular_weightsexpert_scale_weightsupdated_paramsr  r  params	            rS   !_handle_expert_scale_broadcastingz@Llama4ForConditionalGeneration._handle_expert_scale_broadcasting  s    !# 	7 	7LD& (4//tOO$D00;&&'-Ev..!!J,,..22"LLNNa// 
((777&**4000 $++T6N;;;;&&f~6666 4nDDrR   r  stacked_params_mappingc                    t                      }| j        r|                     |          }|D ]\  }}|D ]Z\  }}}	||vs| j        r|                    ||          }||         }
|                    |           |
j        } ||
||	            n?||         }
t          |
dt                    } ||
|           |                    |           |S )z6Load non-language-model weights with stacking support.weight_loader)r  r^   r  r  r  r  r  r   )rg   r  r  r  r  r  r  
param_namer  shard_idr  r  s               rS   _load_other_weightsz2Llama4ForConditionalGeneration._load_other_weights-  s	    ! 	I 99-HHM#0 	) 	)D-5K ) )1
Kd**d.D*||K<<#D)""4((( % 3e]H=== $D) '@U V Ve]333""4(((rR   c                 ^    t          j        | ddd| j        j        j        | j                  S )Nr{  	down_projr|  )ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namenum_expertsr  )r   make_expert_params_mappingr   r|   num_local_expertsr  r  s    rS   get_expert_mappingz1Llama4ForConditionalGeneration.get_expert_mappingM  s;     2 + +'/A"&"<
 
 
 	
rR   c                 $   g d}t          |                                           }t                      }|                     |          \  }}|                     ||          \  }}}	|                    |	           t          |           }
|
                    |          }|J |                    |           |r,|
                    |          }|r|                    |           |                    |                     |||                     |S )N))r  r  r   )r  r  r   )r  r  r   ).shared_expert.gate_up_projz.shared_expert.gate_projr   )r  z.shared_expert.up_projr3   ).feed_forward.gate_up_projz.feed_forward.gate_projr   )r  z.feed_forward.up_projr3   )	rL  named_parametersr  r  r  updater;   load_weightsr  )rg   r  r  r  r  r  r  r  r  updated_params_from_expertsloaderloaded_language_model_paramsloaded_expert_scale_paramss                rS   r  z+Llama4ForConditionalGeneration.load_weightsY  sG   "
 "
 "
 4002233#&55 150Q0Q1
 1
- 223I;WW 	K-/J 	9:::"4(('-':':?'K'K$+777:;;; 	B)/)<)<=Q)R)R&) B%%&@AAA$$]KAWXX	
 	
 	
 rR   c                 0    t          j        ddd          S )z<
        Get the module prefix in multimodal models
        r  zmulti_modal_projector.zvision_model.)r  	connectortower_model)r    from_string_fieldr  s    rS   get_mm_mappingz-Llama4ForConditionalGeneration.get_mm_mapping  s'     /+.'
 
 
 	
rR   )NN)*rJ   rK   rL   packed_modules_mappingsupports_encoder_tp_dataclassmethodrs   rq   r  r   rb   r  r  r  rO   rP   r  r  r&  r@   r  r5   r  r  r0   ro   r  r   r  r  r  rd  r  rL  r  r  r  r  r  r    r  rt   ru   s   @rS   rw  rw    s        322$i0 
  $=3 =3 =3: = = = [= BD 63 63 63z 633 63 63 63 63 63 63p@%S/ @d @ @ @ @HE#s(O H H H H	A,	A "'	A  %|		A 	A 	A 	A
$'
EH
 
 
 


	$	&
 
 
 
$
1
	
 
 
 
,:,@ : : : : <@-1
 
<
 <
 2D8	

 |d*
 
 
+	+
 
 
 
A|A 
	A A A A9%U\ 1239 9 
xc5</018E#u|BS<T3UU	V	9 9 9 9&"c5<&7 89"	%U\)*	+" " " "0*3 *3 * * * *X5c5<&7 895	tE#u|+,-tE#u|:K4L/MM	N5 5 5 5,#EE#u|"345#EDH#E	tE#u|+,-s3x7	8#E #E #E #EJc5<&7 89  !%	
 
S   @

DsCc/A)B$C 

 

 

 

)HU33D-E$F )3s8 ) ) ) )V
 
 
 
 
 
 
 
 
rR   rw  )lr   collections.abcr   r   	itertoolsr   typingr   r   rO   r   transformersr	   r
   r   transformers.image_utilsr   transformers.models.llama4r   7transformers.models.llama4.image_processing_llama4_fastr   r   vllm.compilation.decoratorsr   vllm.configr   r   vllm.config.multimodalr   vllm.distributedr   vllm.forward_contextr   9vllm.model_executor.layers.attention.mm_encoder_attentionr   $vllm.model_executor.layers.fused_moer   !vllm.model_executor.layers.linearr   r   r   r   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   &vllm.model_executor.model_loader.utilsr   -vllm.model_executor.model_loader.weight_utilsr   )vllm.model_executor.models.module_mappingr    !vllm.model_executor.models.visionr!   vllm.multimodalr"   vllm.multimodal.inputsr#   r$   r%   vllm.multimodal.parser&   r'   r(   vllm.multimodal.processingr)   r*   r+   r,   r-   r.   r/   vllm.sequencer0   vllm.utils.tensor_schemar1   r2   
interfacesr4   r5   r6   r7   r8   r9   llama4r:   utilsr;   r<   r=   visionr>   r@   ModulerU   rw   r   r   r   r   r   r   r   r  r)  rf  register_processorrw  rQ   rR   rS   <module>rA     s~  &  - - - - - - - -       % % % % % % % %        G G G G G G G G G G - - - - - - 6 6 6 6 6 6       
 > = = = = = ; ; ; ; ; ; ; ; 3 3 3 3 3 3 A A A A A A 4 4 4 4 4 4 X X X X X X 9 9 9 9 9 9            G F F F F F @ @ @ @ @ @ C C C C C C O O O O O O D D D D D D I I I I I I / / / / / /         
 V U U U U U U U U U                  . - - - - - > > > > > > > >                & % % % % % E E E E E E E E E E / / / / / /    \   @& & & & &bi & & &R    	   ,  2) ) ) ) )") ) ) )<_ _ _ _ _BI _ _ _D2 2 2 2 2ry 2 2 2j( ( ( ( (") ( ( (V    bi   < (!,8S  X X X X X	 X X Xv&Y &Y &Y &Y &Y. &Y &Y &YRk
 k
 k
 k
 k
!89N!O k
 k
 k
\
 
 
 
 
 67L M 
 
 
> ('	*  
h
 h
 h
 h
 h
Ih
 h
 
h
 h
 h
rR   