
    .`iԞ              	          d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlZd dlmZ d dlmc mZ d dlmZmZmZmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5 d dl6m7Z7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z>m?Z?m@Z@ d dlAmBZB d dlCmDZDmEZEmFZFmGZGmHZHmIZI d dlJmKZK d dlLmMZMmNZN ddlOmPZPmQZQmRZRmSZS ddlTmUZUmVZVmWZWmXZX  e"eY          ZZ G d d eM          Z[ G d! d"eF          Z\ G d# d$eDe\                   Z] G d% d&eEe\                   Z^ G d' d(ej_                  Z` G d) d*eja                  Zb G d+ d,eja                  Zc G d- d.eja                  Zd G d/ d0eja                  Ze G d1 d2eja                  Zf G d3 d4eja                  Zg G d5 d6eja                  Zh G d7 d8eja                  Zi G d9 d:eja                  Zj G d; d<eja                  Zk G d= d>          Zl G d? d@eja                  Zm e<jn        e^e\e]A           G dB dCeja        eQeReS                      ZodS )D    )IterableMappingSequence)cached_property)islice)	AnnotatedAnyLiteralN)BatchFeatureChameleonConfigChameleonProcessorChameleonVQVAEConfig)	Attention)CacheConfig
VllmConfig)BaseDummyOptions)get_pp_group$get_tensor_model_parallel_world_size)init_logger)
SiluAndMul)Conv2dLayer)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loaderrow_parallel_weight_loader)set_weight_attrs)MULTIMODAL_REGISTRY)MultiModalDataDictMultiModalFieldConfigMultiModalKwargsItems)MultiModalDataItems)BaseDummyInputsBuilderBaseMultiModalProcessorBaseProcessingInfoPromptReplacementPromptUpdatePromptUpdateDetails)IntermediateTensors)TensorSchemaTensorShape   )MultiModalEmbeddingsSupportsMultiModal
SupportsPPSupportsQuant)is_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   f    e Zd ZU dZed         ed<   eej         e	dddd          f         ed<   d	S )
ChameleonImagePixelInputsz
    Dimensions:
        - bn: Batch size * number of images
        - c: Number of channels (3)
        - h: Height of each image
        - w: Width of each image
    pixel_valuestypebn   hwdataN)
__name__
__module____qualname____doc__r
   __annotations__r   torchTensorr1        x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/chameleon.pyr<   r<   N   sW           .
!!!!
EL++dAsC"@"@@
AAAAAArL   r<   c                   N    e Zd Zd ZdefdZdeeedz  f         fdZ	defdZ
dS )ChameleonProcessingInfoc                 @    | j                             t                    S N)ctxget_hf_configr   selfs    rM   rS   z%ChameleonProcessingInfo.get_hf_config\   s    x%%o666rL   kwargsc                 2     | j         j        t          fi |S rQ   )rR   get_hf_processorr   )rU   rV   s     rM   rX   z(ChameleonProcessingInfo.get_hf_processor_   s     (tx();FFvFFFrL   returnNc                 
    ddiS )Nimager2   rK   rT   s    rM   get_supported_mm_limitsz/ChameleonProcessingInfo.get_supported_mm_limitsb   s    |rL   c                 8    |                                  }|j        S rQ   )rX   image_seq_length)rU   	processors     rM   get_num_image_tokensz,ChameleonProcessingInfo.get_num_image_tokense   s    ))++	))rL   )rD   rE   rF   rS   objectrX   r   strintr\   r`   rK   rL   rM   rO   rO   [   s        7 7 7G G G G GcDj)A    *c * * * * * *rL   rO   c            	       p    e Zd Zdeeef         defdZ	 ddedeeef         deeef         dz  defdZ	dS )	ChameleonDummyInputsBuilder	mm_countsrY   c                 x    |                     dd          }| j                                        }|j        }||z  S )Nr[   r   )getinforX   image_token)rU   rf   
num_imagesr_   rj   s        rM   get_dummy_textz*ChameleonDummyInputsBuilder.get_dummy_textk   s;    ]]7A..
I..00	+Z''rL   Nseq_len
mm_optionsc                     | j                                         }|j        j        x}}|                    dd          }|r|                    d          nd }d|                     ||||          iS )Nr[   r   )widthheightrk   	overrides)ri   rS   	vq_config
resolutionrh   _get_dummy_images)	rU   rm   rf   rn   configrp   rq   rk   image_overridess	            rM   get_dummy_mm_dataz-ChameleonDummyInputsBuilder.get_dummy_mm_datas   s     ((**)44]]7A..
5?I*..111T T++%)	 ,  
 	
rL   rQ   )
rD   rE   rF   r   rb   rc   rl   r   r%   rx   rK   rL   rM   re   re   j   s        (S(9 (c ( ( ( ( =A	
 

 38$
 C!112T9	

 

 
 
 
 
 
rL   re   c            
            e Zd Zdedeeef         deeef         deeef         def
 fdZdee	         dee	         fdZ
d	ed
eeef         deeef         fdZded
eeef         dedee         fdZ xZS )ChameleonMultiModalProcessorpromptmm_data	mm_kwargs
tok_kwargsrY   c                    |sa| j                                                             |          }|                     |          }t	          t          |g          d          S t                                          ||||          S )N)	input_idspt)tensor_type)r{   r|   r}   r~   )ri   get_tokenizerencode_apply_hf_processor_tokens_onlyr   dictsuper_call_hf_processor)rU   r{   r|   r}   r~   
prompt_ids	__class__s         rM   r   z/ChameleonMultiModalProcessor._call_hf_processor   s      	P002299&AAJ==jIIJ
| < < <$OOOOww))!	 * 
 
 	
rL   prompt_tokensc                     | j                                         }|                                }||j                 }||gz   S rQ   )ri   r   	get_vocab	sep_token)rU   r   	tokenizervocabsep_token_ids        rM   r   z<ChameleonMultiModalProcessor._apply_hf_processor_tokens_only   sB    
 I++--	##%%Y01~--rL   	hf_inputshf_processor_mm_kwargsc                 F    t          t          j        d                    S )Nr[   )r=   )r   r&   batched)rU   r   r   s      rM   _get_mm_fields_configz2ChameleonMultiModalProcessor._get_mm_fields_config   s!    
 !6!>w!G!GHHHHrL   mm_itemsout_mm_kwargsc                 l    | j         j        di |}| j                                         }|                                }||j                 }||j                 }||j                 }	| j                                         }
|g|
z  }t          d|gt          j
        |g|z   |	gz   |                    gS )Nr[   )embed_token_id)modalitytargetreplacementrK   )ri   rX   r   r   image_start_tokenrj   image_end_tokenr`   r,   r.   select_token_id)rU   r   r   r   r_   r   r   image_start_idimage_token_idimage_end_idnum_image_tokensimage_tokenss               rM   _get_prompt_updatesz0ChameleonMultiModalProcessor._get_prompt_updates   s     /DI.HH1GHH	I++--	##%%y:;y45Y67999;;&'*::  &'/?#$|3|nD#1    	
 		
rL   )rD   rE   rF   rb   r   ra   r   r   listrc   r   r&   r   r(   r'   r   r-   r   __classcell__r   s   @rM   rz   rz      sI       

 f%
 3;'	

 CK(
 

 
 
 
 
 
&
.Cy
. 
c
. 
. 
. 
.II !(V 4I 
++	,	I I I I
%
 !(V 4
 -	

 
,	
 
 
 
 
 
 
 
rL   rz   c                   $     e Zd Z fdZd Z xZS )ChameleonLayerNormc                      t                      j        |g|R i | |d         f| _        t          | j        dt
          i           t          | j        dt
          i           d S )Nweight_loader)r   __init__normalized_shaper#   weightr"   bias)rU   hidden_sizeargsrV   r   s       rM   r   zChameleonLayerNorm.__init__   so    6t666v666!,R 28R&STTT_6P$QRRRRRrL   c                 f    t          j        || j        d d d          }|| j        z  | j        z   }|S )Ngh㈵>eps)F
layer_normr   r   r   rU   hidden_statess     rM   forwardzChameleonLayerNorm.forward   sA    40$$
 
 
 &3di?rL   )rD   rE   rF   r   r   r   r   s   @rM   r   r      sL        S S S S S      rL   r   c                   P     e Zd Z	 	 	 ddededededz  ded	ed
df fdZd Z xZ	S )ChameleonMLPNF r   intermediate_size
hidden_actquant_configr   prefixrY   c                    t                                                       t          ||gdz  ||| d          | _        t	          ||||| d          | _        |dk    rt          d| d          t                      | _        d S )	N   .gate_up_proj)
input_sizeoutput_sizesr   r   r   z
.down_projr   output_sizer   r   r   siluzUnsupported activation: z!. Only silu is supported for now.)	r   r   r   gate_up_projr   	down_proj
ValueErrorr   act_fn)rU   r   r   r   r   r   r   r   s          rM   r   zChameleonMLP.__init__   s     	6"+,q0%+++
 
 
 +(#%(((
 
 
 X:XXX   !llrL   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S rQ   )r   r   r   )rU   xgate_up_s       rM   r   zChameleonMLP.forward   sD    &&q))
KK  ~~a  1rL   )NFr   )
rD   rE   rF   rc   rb   r   boolr   r   r   r   s   @rM   r   r      s         37# ## # 	#
 )4/# # # 
# # # # # #<      rL   r   c                        e Zd Z	 	 	 	 	 ddedededeeef         d	ed
edz  dede	dz  deddf fdZ
dej        dej        deej        ej        f         fdZdej        dej        dej        fdZ xZS )ChameleonAttention   NFr   r   	num_headsnum_kv_headsrope_parametersmax_position_embeddingsr   r   cache_configr   rY   c
           
         t                                                       || _        t                      }
|| _        | j        |
z  dk    sJ | j        |
z  | _        || _        | j        |
k    r| j        |
z  dk    sJ n|
| j        z  dk    sJ t          d| j        |
z            | _        || j        z  | _	        | j        | j	        z  | _
        | j        | j	        z  | _        | j	        dz  | _        || _        t          || j	        | j        | j        |||	 d          | _        t!          | j        | j	        z  ||||	 d          | _        t%          | j        | j	        f          | _        t%          | j        | j	        f          | _        t+          | j	        ||          | _        t/          | j        | j	        | j        | j        |||	 d	
          | _        d S )Nr   r2         	.qkv_proj)r   	head_sizetotal_num_headstotal_num_kv_headsr   r   r   z.o_projr   )max_positionr   z.attn)r   r   r   r   )r   r   r   r   r   r   r   maxr   head_dimq_sizekv_sizescalingr   r   qkv_projr   o_projr   q_normk_normr   
rotary_embr   attn)rU   r   r   r   r   r   r   r   r   r   tp_sizer   s              rM   r   zChameleonAttention.__init__  s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF#t';;nt}4(4=8}d*'>$)#m 0#6%'''
 
 
 (+dm;#%%%%
 
 
 )$.$-)HII($*;T])KLL"M0+
 
 
 NML*%%###
 
 
			rL   qkc                 R   |                     d| j        | j                  }|                     d| j        | j                  }|                     |          }|                     |          } |j        g |j        d d         dR  } |j        g |j        d d         dR  }||fS )Nr   )reshaper   r   r   r   r   viewshape)rU   r   r   s      rM   _apply_qk_normz!ChameleonAttention._apply_qk_normK  s     IIb$.$-88IIb$+T];;KKNNKKNNAF%AGCRCL%"%%%AF%AGCRCL%"%%%!trL   	positionsr   c                 T   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     ||          \  }}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )Nr   dim)r   splitr   r   r   r   r   r   )
rU   r   r   qkvr   r   r   vattn_outputoutputs
             rM   r   zChameleonAttention.forwardW  s    
 }--Q))T[$,E2)NN1a""1a((1y!Q//1ii1a((KK,,	rL   )r   NFNr   )rD   rE   rF   rc   r   rb   r	   r   r   r   r   rI   rJ   tupler   r   r   r   s   @rM   r   r     sM        (,26+/B
 B
B
 B
 	B

 c3hB
 "%B
 )4/B
 B
 "D(B
 B
 
B
 B
 B
 B
 B
 B
H

"',
	u|U\)	*
 
 
 
< | 
	       rL   r   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        dz  f         fdZ xZS )ChameleonDecoderLayerNr   rv   r   r   r   rY   c                    t                                                       |j        | _        t          |dd          }t	          | j        |j        t          |d|j                  |j        ||d|| d	  	        | _        t          | j        |j	        |j
        |t          |dd          | d	          | _        t          |j        |j        
          | _        t          |j        |j        
          | _        d S Nr   r   num_key_value_headsFz
.self_attn)	r   r   r   r   r   r   r   r   r   mlp_biasz.mlp)r   r   r   r   r   r   r   r   r   r   getattrr   num_attention_headsr   	self_attnr   r   r   mlpr   rms_norm_epsinput_layernormpost_attention_layernormrU   rv   r   r   r   r   r   s         rM   r   zChameleonDecoderLayer.__init__g      	!-")&2KT"R"R+(0 -v/I  #2$;%%(((
 
 
  ($6(%U33???
 
 
  'v'9v?RSSS(/F$7)
 )
 )
%%%rL   r   r   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS N)r   r   )r  r  r  r  rU   r   r   r  s       rM   r   zChameleonDecoderLayer.forward  s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx//h&&rL   NNr   rD   rE   rF   r   r   r   rb   r   rI   rJ   r  r   r   r   s   @rM   r  r  f  s         ,026#
 #
#
 "D(#
 )4/	#

 #
 
#
 #
 #
 #
 #
 #
J'<' |' ,%	'
 
u|U\D00	1' ' ' ' ' ' ' 'rL   r  c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )ChameleonSwinDecoderLayerNr   rv   r   r   r   rY   c                    t                                                       |j        | _        t          |dd          }t	          | j        |j        t          |d|j                  |j        ||d|| d	  	        | _        t          | j        |j	        |j
        |t          |dd          | d	          | _        t          |j        |j        
          | _        t          |j        |j        
          | _        d S r  r  r  s         rM   r   z"ChameleonSwinDecoderLayer.__init__  r  rL   r   r   r  c                     |}|                      ||          }|                     |          }||z   }|}|                     |          }|                     |          }||z   }||fS r  )r  r  r  r  r  s       rM   r   z!ChameleonSwinDecoderLayer.forward  s     !' ' 
 

 ,,];;%0 !//55mDD =0h&&rL   r  r  r   s   @rM   r  r    s         ,026#
 #
#
 "D(#
 )4/	#

 #
 
#
 #
 #
 #
 #
 #
J'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'rL   r  c                   :     e Zd Zdef fdZdej        fdZ xZS )ChameleonVQVAEVectorQuantizerrv   c                    t                                                       |j        | _        |j        | _        t          |dd          | _        t          j        | j        | j                  | _	        | j        | _
        d S )Nbetag      ?)r   r   num_embeddings	embed_dimembedding_dimr	  r  nn	Embedding	embeddingre_embedrU   rv   r   s     rM   r   z&ChameleonVQVAEVectorQuantizer.__init__  si    $3#-FFD11	d&94;MNN+rL   hidden_statec           
      R   |                     dddd                                          }|                    d| j                  }t	          j        |dz  dd          t	          j        | j        j        dz  d          z   dt	          j        d	|| j        j        	                    dd                    z  z
  }t	          j
        |d          }|                     |                              |j                  }t	          j        |                                |z
  dz            | j        t	          j        ||                                z
  dz            z  z   }|||z
                                  z   }|                     dddd                                          }|||fS )
Nr   r   r@   r2   r   T)r   keepdimr   z	bd,dn->bn)permute
contiguousr   r"  rI   sumr%  r   einsum	transposeargminr   meandetachr  )rU   r(  hidden_state_flattened	distancesmin_encoding_indiceshidden_state_quantlosss          rM   r   z%ChameleonVQVAEVectorQuantizer.forward  s   #++Aq!Q77BBDD!-!2!22t7I!J!J I,a/QEEEi-q0a8889l&%//155  	  %|I1===!^^,@AAFF
 

 z&&((<7A=
 
I
$69L9L9N9N$NST#TUUUV
 *-?,-N,V,V,X,XX 0771aCCNNPP!4)===rL   	rD   rE   rF   r   r   rI   rJ   r   r   r   s   @rM   r  r    sb        ,3 , , , , , , >EL  >  >  >  >  >  >  >  >rL   r  c                   :     e Zd Zdef fdZdej        fdZ xZS )#ChameleonVQVAEEncoderConvDownsamplein_channelsc                 z    t                                                       t          ||ddd          | _        d S )Nr@   r   r   kernel_sizestridepadding)r   r   r   convrU   r;  r   s     rM   r   z,ChameleonVQVAEEncoderConvDownsample.__init__  s>    !Aq
 
 
			rL   r   c                 `    t          j        |ddd          }|                     |          }|S )N)r   r2   r   r2   constantr   )padmodevalue)r   rE  rA  r   s     rM   r   z+ChameleonVQVAEEncoderConvDownsample.forward  s2    mJVWXXX		-00rL   	rD   rE   rF   rc   r   rI   rJ   r   r   r   s   @rM   r:  r:    sa        
C 
 
 
 
 
 
U\        rL   r:  c                   D     e Zd Z	 	 ddedef fdZdej        fdZ xZ	S )	 ChameleonVQVAEEncoderResnetBlockNFrv   r;  c                 j   t                                                       || _        ||n|| _        || _        t
          j                            d|dd          | _        t          ||ddd          | _
        t
          j                            d|dd          | _        t
          j                            |j                  | _        t          ||ddd          | _        | j        | j        k    r=| j        rt          ||ddd          | _        d S t          ||ddd          | _        d S d S )	N    ư>T
num_groupsnum_channelsr   affiner@   r2   r=  r   )r   r   r;  out_channelsuse_conv_shortcutrI   r#  	GroupNormnorm1r   conv1norm2Dropoutdropoutconv2conv_shortcutnin_shortcut)rU   rv   r;  rR  r[  r   s        rM   r   z)ChameleonVQVAEEncoderResnetBlock.__init__   se    	&+7+?KK\!.X''d ( 
 

 !1Q
 
 

 X''$t ( 
 

 x''77 ,Aa
 
 

 t000% %01QPQ& & &""" %01QPQ% % %!!! 10rL   r   c                    |}|                      |          }|t          j        |          z  }|                     |          }|                     |          }|t          j        |          z  }|                     |          }|                     |          }| j        | j        k    r2| j	        r| 
                    |          }n|                     |          }||z   S rQ   )rU  rI   sigmoidrV  rW  rY  rZ  r;  rR  rS  r[  r\  )rU   r   r  s      rM   r   z(ChameleonVQVAEEncoderResnetBlock.forwardC  s     

=11}555

=11

=11}555]33

=11t000% 7--h77,,X66-''rL   )NF)
rD   rE   rF   r   rc   r   rI   rJ   r   r   r   s   @rM   rJ  rJ    sz        
 ! !$! ! ! ! ! ! !F(U\ ( ( ( ( ( ( ( (rL   rJ  c                   :     e Zd Zdef fdZdej        fdZ xZS )ChameleonVQVAEEncoderAttnBlockr;  c                 n   t                                                       || _        t          j                            d|dd          | _        t          ||ddd          | _        t          ||ddd          | _	        t          ||ddd          | _
        t          ||ddd          | _        d S )NrL  rM  TrN  r2   r   r=  )r   r   r;  rI   r#  rT  normr   r   r   r   proj_outrB  s     rM   r   z'ChameleonVQVAEEncoderAttnBlock.__init__Y  s    &H&&d ' 
 
	 !Aq
 
 
 !Aq
 
 
 !Aq
 
 
 $!Aq
 
 
rL   r   c                    |}|                      |          }|                     |          }|                     |          }|                     |          }|j        \  }}}}	|                    ||||	z                                ddd          }|                    ||||	z            }t          j        ||          }
|
t          |          dz  z  }
t          j        |
d          }
|                    ||||	z            }|
                    ddd          }
t          j        ||
                              ||||	          }|                     |          }||z   S )Nr   r   r2   r   r   )rb  r   r   r   r   r   r+  rI   bmmrc   r   softmaxrc  )rU   r   r  query_states
key_statesvalue_states
batch_sizechannelsrq   rp   attn_weightsr   s               rM   r   z&ChameleonVQVAEEncoderAttnBlock.forwardm  se    		-00vvm,,VVM**
vvm,, /;.@+
Hfe#++&5.
 

'!Q

 	  ''
HfunMM
yz::#s8}}'>?y1555 $++J&5.QQ#++Aq!44il;;CC&%
 
 mmK00+%%rL   rH  r   s   @rM   r`  r`  X  sa        
C 
 
 
 
 
 
(&U\ & & & & & & & &rL   r`  c                   :     e Zd Zdef fdZdej        fdZ xZS )ChameleonVQVAEEncoderrv   c           	         t                                                       t          |j                  | _        |j        | _        |j        }|j        }|j        }|j	        }|j
        }|j        }t          ||ddd          | _        |}dt          |          z   }	|	| _        t          j                    | _        t%          | j                  D ]}
t          j                    }t          j                    }||	|
         z  }|||
         z  }t%          | j                  D ]f}|                    t)          |||                     |}|j        6||j        v r-|j        dk    r"|                    t/          |                     gt          j                    }||_        ||_        |
| j        dz
  k    rt7          |          |_        |dz  }| j                            |           t          j                    | _        t)          |||          | j        _        |j        dk    rt/          |          nt          j                    | j        _         t)          |||          | j        _!        tD          j        #                    d|d	d
          | _$        t          ||rd|z  n|ddd          | _%        d S )Nr@   r2   r=  )r2   )rv   r;  rR  vanillar   rL  rM  TrN  )&r   r   lenchannel_multipliernum_resolutionsnum_res_blocksbase_channelsrt   r;  double_latentlatent_channelsr   conv_inr  in_channel_multiplierr#  
ModuleListdownrangeappendrJ  attn_resolutions	attn_typer`  Moduleblockr   r:  
downsamplemidblock_1Identityattn_1block_2rI   rT  norm_outconv_out)rU   rv   ru  rt   r;  rv  rw  rr  curr_resry  i_levelr  r   block_in	block_outi_blockr{  r   s                    rM   r   zChameleonVQVAEEncoder.__init__  s   "6#<==$3,&
(, 0#6"Aa
 
 
  $u-?'@'@ @%:"MOO	T122 	# 	#GMOOE=??D$'<W'EEH%(:7(CCI !455 J J4%$,%.     %+7 F$;;;(I55KK >x H HIII9;;DDJDI$.222"Eh"O"O#q=IT""""9;;; !
 
 
 9,, +8444 	
 < !
 
 
 **d4 + 
 
 $#0EAo
 
 
rL   r=   c                    |                     | j        j        j                  }|                     |          g}t	          | j                  D ]}t	          | j                  D ]} | j        |         j        |         |d                   }t          | j        |         j
                  dk    r! | j        |         j
        |         |          }|                    |           || j        dz
  k    r9|                    | j        |                             |d                              |d         }| j                            |          }| j                            |          }| j                            |          }|                     |          }|t%          j        |          z  }|                     |          }|S )Nr   r   r2   )torx  r   dtyper|  rs  rt  r{  r  rq  r   r}  r  r  r  r  r  r  rI   r^  r  )rU   r=   r   r  r  r(  last_hidden_states          rM   r   zChameleonVQVAEEncoder.forward  s   #t|':'@AA l334T122 	W 	WG !455 3 3@ty17@rARSSty).//!33#C49W#5#:7#CL#Q#QL$$\2222$.222$$TYw%7%B%B=QSCT%U%UVVV *"- H,,->?? HOO,=>> H,,->?? !MM*;<<U]+<=== MM*;<<  rL   r8  r   s   @rM   rn  rn    sj        K
3 K
 K
 K
 K
 K
 K
Z!EL ! ! ! ! ! ! ! !rL   rn  c                   n     e Zd Zdef fdZdej        deej        ej        ej        f         fdZ xZ	S )ChameleonVQVAErv   c                 @   t                                                       t          |          | _        t	          |          | _        t          |j        |j        d          | _	        t          |j        |j        d          | _
        |                                  d S )Nr2   )r   r   rn  encoderr  quantizer   rw  r!  
quant_convpost_quant_convevalr'  s     rM   r   zChameleonVQVAE.__init__  s}    ,V445f==%f&<f>NPQRR*6+;V=SUVWW		rL   r=   rY   c                     |                      |          }|                     |          }|                     |          \  }}}|||fS rQ   )r  r  r  )rU   r=   r   quantemb_lossindicess         rM   r   zChameleonVQVAE.encode  sK     \2266#'==#?#? xh''rL   )
rD   rE   rF   r   r   rI   rJ   r  r   r   r   s   @rM   r  r    s}        3      (!L(	u|U\5<7	8( ( ( ( ( ( ( (rL   r  c                       e Zd ZdZdeeef         fdZed             Z	ed             Z
ed             Zed             Zed             Zed	             Zd
ej        dej        fdZdS )ChameleonImageVocabularyMappingzM
    A class for mapping discrete image tokens from VQGAN to BPE tokens.
    	vocab_mapc                 H    || _         |                    d          | _        d S )N<image>)r  rh   r   )rU   r  s     rM   r   z(ChameleonImageVocabularyMapping.__init__  s#    "'mmI66rL   c                 H    d | j                                         D             S )Nc                     i | ]\  }}||	S rK   rK   .0r   r   s      rM   
<dictcomp>z<ChameleonImageVocabularyMapping.val2name.<locals>.<dictcomp>  s    888A1888rL   )r  itemsrT   s    rM   val2namez(ChameleonImageVocabularyMapping.val2name  s$    88!5!5!7!78888rL   c                 b    t          d | j                                        D                       S )Nc                 B    g | ]\  }}|                     d           |S )IMGIMG)
startswith)r  namevals      rM   
<listcomp>z@ChameleonImageVocabularyMapping.image_tokens.<locals>.<listcomp>  s-    VVVYT3DOOH<U<UVSVVVrL   )sortedr  r  rT   s    rM   r   z,ChameleonImageVocabularyMapping.image_tokens  s3    VV$."6"6"8"8VVV
 
 	
rL   c                      d t          d          D             dt          dt          ffd fd j        D             S )Nc                 h    i | ]/}t          t          d           |z             t          |          0S )A)chrordrb   )r  is     rM   r  z;ChameleonImageVocabularyMapping.bpe2img.<locals>.<dictcomp>  s2    LLLQs3s88a<00#a&&LLLrL   
   old_namerY   c                 p    d                     fd| t          d          d         D                       S )Nr   c              3   D   K   | ]}                     ||          V  d S rQ   )rh   )r  cimg_tkn_chr_mappings     rM   	<genexpr>zIChameleonImageVocabularyMapping.bpe2img.<locals>.remap.<locals>.<genexpr>  sE        23#''1--     rL   r  r   )joinrq  )r  r  s    rM   remapz6ChameleonImageVocabularyMapping.bpe2img.<locals>.remap  sP    77    7?HPR@R7S     rL   c           	      X    i | ]&}|t           j        |                             'S rK   )rc   r  )r  tokr  rU   s     rM   r  z;ChameleonImageVocabularyMapping.bpe2img.<locals>.<dictcomp>#  s4    QQQSt}S12233QQQrL   )r|  rb   r   )rU   r  r  s   `@@rM   bpe2imgz'ChameleonImageVocabularyMapping.bpe2img  sn    LL%))LLL	C 	C 	 	 	 	 	 	
 RQQQQt?PQQQQrL   c                 H    d | j                                         D             S )Nc                     i | ]\  }}||	S rK   rK   r  s      rM   r  z;ChameleonImageVocabularyMapping.img2bpe.<locals>.<dictcomp>'  s    666A1666rL   )r  r  rT   s    rM   img2bpez'ChameleonImageVocabularyMapping.img2bpe%  s$    66!3!3!5!56666rL   c                     t          j        t          | j                                                            t          j        t          | j                                                            fS rQ   )rI   tensorr  r  keysvaluesrT   s    rM   bpe2img_search_tensorsz6ChameleonImageVocabularyMapping.bpe2img_search_tensors)  sV    |F4<#4#4#6#67788%,4<&&(());
 ;
 
 	
rL   c                     t          j        t          | j                                                  dz   t           j                  }| j                                        D ]
\  }}|||<   |S )Nr2   )r  )rI   zerosr   r  r  rc   r  )rU   mappingr   r   s       rM   img2bpe_mapping_tensorz6ChameleonImageVocabularyMapping.img2bpe_mapping_tensor/  sd    +c$,"3"3"5"566:%)LLLL&&(( 	 	DAqGAJJrL   	img_batchrY   c                 z    |j         }| j        |                    d                   }|                    |          S )Ncpu)devicer  r  )rU   r  r  
img_tokenss       rM   convert_img2bpez/ChameleonImageVocabularyMapping.convert_img2bpe6  s5    !0e1D1DE
}}V$$$rL   N)rD   rE   rF   rG   r   rb   rc   r   r   r  r   r  r  r  r  rI   rJ   r  rK   rL   rM   r  r    s        7$sCx. 7 7 7 7 9 9 _9 
 
 _

 R R _R 7 7 _7 
 
 _

   _% %%, % % % % % %rL   r  c                        e Zd Zdddedef fdZdej        dej        fdZd	ej        dej        fd
Z		 ddej        dz  dej        de
dz  dej        dz  dej        e
z  f
dZ xZS )ChameleonModelr   r   vllm_configr   c                ^   t                                                       |j        j        |j        |j        | _        j        | _        t          | j        j	                  | _
        t          j                  | _        | j        j        st          nt           t#          j        fd| d          \  | _        | _        | _        t-          j	        j                  | _        t3          j                  | _        t9          ddgj	                  | _        d S )Nc                 "     |           S )N)rv   r   r   r   rK   )r   r   rv   decoder_layerr   s    rM   <lambda>z)ChameleonModel.__init__.<locals>.<lambda>S  s$    ==))	   rL   z.layersr  r   r   r  )r   r   model_config	hf_configr   r   rv   
vocab_sizer    r   embed_tokensr  vocabulary_mapvocabulary_mapping	swin_normr  r  r9   num_hidden_layersstart_layer	end_layerlayersr   r  rb  r  rs   vqmodelr8   make_empty_intermediate_tensors)rU   r  r   r   rv   r  r   r   s      @@@@rM   r   zChameleonModel.__init__=  sH   )3"/"/ +2O
 
 #B&BW"X"X ;(+!!* 	 9D$       %%%	9
 	9
 	9
5$.$+ F.F4GHHH	%f&677/Vj)6+=0
 0
,,,rL   r   rY   c                 ,    |                      |          S rQ   )r  )rU   r   s     rM   embed_input_idszChameleonModel.embed_input_idsb  s      +++rL   r=   c                     |j         d         }| j                            |          \  }}}| j                            |          }|                    |d          }|S )z
        Tokenizes images into discrete tokens with VQGAN module. Converts
        obtained image tokens into BPE tokens and wraps with "boi" and "eoi"
        special tokens.
        r   r   )r   r  r   r  r  r   )rU   r=   rj  r   
image_toksbpe_tokss         rM   get_image_tokenszChameleonModel.get_image_tokense  s[     "'*
<..|<<1j*:::FF==R00rL   Nr   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nr   r  )r   r  )
r   is_first_rankr  r   r  r  r  is_last_rankr/   rb  )	rU   r   r   r  r  r   r  layerr   s	            rM   r   zChameleonModel.forwardq  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e' '#M88
 ~~* 	&"/XFF    99]H==qrL   rQ   )rD   rE   rF   r   rb   r   rI   rJ   r  r  r/   r   r   r   s   @rM   r  r  <  s       AC #
 #
 #
z #
3 #
 #
 #
 #
 #
 #
J, ,%, , , , ,
U\ 
el 
 
 
 
" .2 <$& < 2D8	
 |d* 
+	+       rL   r  )ri   dummy_inputsc                   X    e Zd Zg dddgdZededededz  fd	            Zd
ddedef fdZ	de
dedz  fdZde
defdZ	 	 ddej        dej        dedz  dej        dz  dej        ez  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )!ChameleonForConditionalGeneration)q_projk_projv_proj	gate_projup_proj)r   r   r   r  rY   Nc                 N    |                     d          rdS t          d          )Nr[   r  z Only image modality is supported)r  r   )clsr   r  s      rM   get_placeholder_strz5ChameleonForConditionalGeneration.get_placeholder_str  s,    w'' 	9;<<<rL   r   r  r  r   c                   t                                                       |j        j        }|j        j        }|| _        || _        |                     || j        j        st          nt          dt          i          5  t          |t          |d                    | _        d d d            n# 1 swxY w Y   t          |j        |j        t          |d                    | _        |j        r| j        j        j        | j        _        t+          |dd          }t-          |j        |	          | _        | j        j        | _        d S )
Nr[   )language_targetstower_targetsmodel)r  r   lm_headr  logit_scaleg      ?)scale)r   r   r  r  multimodal_configrv   _mark_composite_modelr  r  r  r  r  r:   r  r   r  r   r  tie_word_embeddingsr  r   r	  r   logits_processorr  )rU   r  r   rv   r
  r  r   s         rM   r   z*ChameleonForConditionalGeneration.__init__  s   )3'4F!2'' {,/%%."N3 ( 
 
 	 	 ('#FG44  DJ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 &	22
 
 

 % 	A"&*"9"@DLfmS99 /0A U U UJ6 	,,,s   ?%B00B47B4rV   c                     |                     dd           }|d S | j        j        }|j        x}}t	          d|||d          S )Nr=   )rA   rB   )r>   rC   resolve_bindings)poprv   rs   rt   r<   )rU   rV   r=   rs   
expected_h
expected_ws         rM   _parse_and_validate_image_inputzAChameleonForConditionalGeneration._parse_and_validate_image_input  sa     zz.$774*.+*?	"+"66
Z(#-J??
 
 
 	
rL   c                      | j         di |}|g S | j        j        J | j                            |d                             | j        j                            }| j                            |          }|S )NrC   rK   )r  r  r  r  r  rv   r  r  )rU   rV   image_inputr   vision_embeddingss        rM   embed_multimodalz2ChameleonForConditionalGeneration.embed_multimodal  s    :d:DDVDDIz!---z22""4;#455
 
 !J66|DD  rL   r   r   r  r  c                 @    |d }|                      ||||          }|S )N)r  )r  )rU   r   r   r  r  rV   r   s          rM   r   z)ChameleonForConditionalGeneration.forward  s9      + M

y"6m # 
 
 rL   r   c                     |                      | j        |          }|6| j        j        j        }t          j        |j                  j        |d d |f<   |S rQ   )	r  r  r  r  r   rI   finfor  min)rU   r   logitsr   s       rM   compute_logitsz0ChameleonForConditionalGeneration.compute_logits  sV     &&t|]CC :8EL&+k&,&?&?&CF111l?#rL   weightsc                    g d}t          |                                           }t                      }|D ]\  }}d|v rd|v sd|v r| j        j        rd|v r%d}d|v r| j        j        d}n|D ]i\  }}	}
|	|vr|                    |	|          }|                    d	          r||vr;t          ||           rL||         }|j
        } ||||
            n|                    d	          r||vr|                    d
          r:|                    dd          }||vrt                              d||           |}t          ||           r"||         }t          |dt                    } |||           |r@||v r<t          ||           rd||         }t          |dt                    } |||           |                    |           |S )N))r   z.q_projr   )r   z.k_projr   )r   z.v_projr   )r   z
.gate_projr   )r   z.up_projr2   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedzlm_head.weightFr  Tz.biaskv_scalez	.kv_scalez.attn.kv_scalez{Found kv scale in the checkpoint (e.g. %s), but not found the expected name in the model (e.g. %s). kv-scale is not loaded.r   )r   named_parameterssetrv   r  r  r  replaceendswithr7   r   loggerwarning_oncer	  r!   add)rU   r  stacked_params_mappingparams_dictloaded_paramsr  loaded_weightuse_default_weight_loading
param_nameweight_nameshard_idparamr   remapped_kv_scale_names                 rM   load_weightsz.ChameleonForConditionalGeneration.load_weights  s   "
 "
 "
 4002233"%%%#* B	$ B	$D-$,,&$..2IT2Q2Q 
 {. 3Ct3K3K).&D  :%1 26.9O %8 %85JX"$.. <<Z@@D}}W-- !$k2I2I .tT:: ! '-E$)$7M!M%AAAE }}W-- !$k2I2I }}Z00 :15')92 2. 2DD"// !^ $ 6  
 %#9D.tT:: ! '-E$+0E% %M "M%777) 4dk.A.A*466 #D) '@U V Ve]333d####rL   )NN)rD   rE   rF   packed_modules_mappingclassmethodrb   rc   r  r   r   ra   r<   r  r3   r  rI   rJ   r/   r   r  r   r  r"  r2  r   r   s   @rM   r  r    s        322$i0 
 =3 =3 =3: = = = [= BD !
 !
 !
z !
3 !
 !
 !
 !
 !
 !
F

	"T	)
 
 
 
"	! 	!4H 	! 	! 	! 	! <@-1 < < 2D8	
 |d* 
+	+    | 
	   NHU33D-E$F N3s8 N N N N N N N NrL   r  )pcollections.abcr   r   r   	functoolsr   	itertoolsr   typingr   r	   r
   rI   torch.nnr#  torch.nn.functional
functionalr   transformersr   r   r   r   vllm.attention.layerr   vllm.configr   r   vllm.config.multimodalr   vllm.distributedr   r   vllm.loggerr   %vllm.model_executor.layers.activationr   vllm.model_executor.layers.convr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r    -vllm.model_executor.model_loader.weight_utilsr!   r"   vllm.model_executor.utilsr#   vllm.multimodalr$   vllm.multimodal.inputsr%   r&   r'   vllm.multimodal.parser(   vllm.multimodal.processingr)   r*   r+   r,   r-   r.   vllm.sequencer/   vllm.utils.tensor_schemar0   r1   
interfacesr3   r4   r5   r6   utilsr7   r8   r9   r:   rD   r%  r<   rO   re   rz   	LayerNormr   r  r   r   r  r  r  r:  rJ  r`  rn  r  r  r  register_processorr  rK   rL   rM   <module>rV     s;   8 7 7 7 7 7 7 7 7 7 % % % % % %       * * * * * * * * * *                            + * * * * * / / / / / / / / 3 3 3 3 3 3 O O O O O O O O # # # # # # < < < < < < 7 7 7 7 7 7 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               7 6 6 6 6 6 / / / / / /         
 6 5 5 5 5 5                . - - - - - > > > > > > > >                       
X		
B 
B 
B 
B 
B 
B 
B 
B* * * * *0 * * *
 
 
 
 
"89P"Q 
 
 
@A
 A
 A
 A
 A
#:;R#S A
 A
 A
H       "# # # # #29 # # #N] ] ] ] ] ] ] ]@:' :' :' :' :'BI :' :' :'z;' ;' ;' ;' ;'	 ;' ;' ;'~*> *> *> *> *>BI *> *> *>\    ")   5( 5( 5( 5( 5(ry 5( 5( 5(r.& .& .& .& .&RY .& .& .&df! f! f! f! f!BI f! f! f!T( ( ( ( (RY ( ( (&2% 2% 2% 2% 2% 2% 2% 2%jQ Q Q Q QRY Q Q Qh (' 	 ,  
z z z z zI!:}z z 
z z zrL   