
    .`iz                        U d dl Z d dlmZmZmZ d dlmZ d dlmZm	Z	 d dl
mZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1m2Z2  ee3          Z4ee5e5dz  f         Z6	 e G d d                      Z7 G d d          Z8dddddede5dedz  de9e5         dz  dej:        f
d Z;ed!ej<        dej<        fd"            Z=ed!e9ej<                 de9ej<                 fd#            Z=ed!e9ej<                 ej<        z  d$ed%         dej<        fd&            Z=ed'd(d!e9ej<                 ej<        z  d$e>de9ej<                 ej<        z  fd)            Z=d'd(d!e9ej<                 ej<        z  d$e>de9ej<                 ej<        z  fd*Z=d+e(dej<        fd,Z?d+e(de5fd-Z@d.ej<        d/eAde9e9eA                  fd0ZBd1ej<        d2e(d3ej<        dej<        fd4ZCd5ej<        d6e9eA         dej<        fd7ZD G d8 d9ej:                  ZEedd:d;ej:        d<eFej:                 eGeFej:                 d=f         z  dz  fd>            ZHedd:d;ej:        d?eej:        gej:        f         d<eFej:                 eGeFej:                 d=f         z  dz  fd@            ZI G dA dBe          ZJ G dC dDej        jK                  ZLd aMd aNdEeAddfdFZOd;ej        j:        dej        j:        fdGZPdHeAdIeJde5deGeAeAej        jQ        f         fdJZRi ZSeTeAe9e5         f         eUdK<   dLej        j:        de9e5         fdMZVdNe5dLej        j:        de>fdOZWdPe9e5         dQeAfdRZXde5dNe5de5fdSZYdede dz  fdTZZdidVe5dWeAdeAfdXZ[	 djdZej<        d[e\dej<        fd\Z]d]ej<        d^eAd_eAdeGej<        ej<        f         fd`Z^d!ej<        dej<        fdaZ_d!ej<        dej<        fdbZ`d!ej<        dej<        fdcZa e1dde`eaejb        jc        fe           dLej:        dNe5ddfdfZddgeAdHeAdeAfdhZedS )k    N)CallableIterableMapping)contextmanager)	dataclassfield)AnyLiteralProtocoloverload)functional_call)(register_module_module_registration_hook)PretrainedConfig)
VllmConfig)get_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)init_logger)QuantizationConfig).support_quantized_model_reload_from_hp_weights)default_weight_loader)supports_any_eagle)NestedTensorsIntermediateTensors)cdiv)is_pin_memory_availableis_uva_available)direct_register_custom_opget_cuda_view_from_cpu_tensorc                   L   e Zd ZU dZ ee          Zeed<    ee          Z	eed<    ee          Z
eed<   ddZd	eded
z  fdZdeeeej        f                  deeeej        f                  fdZdee         dee         fdZdeeef         deeef         fdZd
S )WeightsMapperzBMaps the name of each weight if they match the following patterns.)default_factoryorig_to_new_substrorig_to_new_prefixorig_to_new_suffixotherreturnc                 z    t          i | j        |j        i | j        |j        i | j        |j                  S )z7Combine two `WeightsMapper`s by merging their mappings.)r#   r$   r%   )r!   r#   r$   r%   )selfr&   s     t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/utils.py__or__zWeightsMapper.__or__8   sR    V$"9VU=UVV$"9VU=UVV$"9VU=UV
 
 
 	
    keyNc                    | j                                         D ]%\  }}||v r| d S |                    ||d          }&| j                                        D ]6\  }}|                    |          r| d S |                    ||d          }7| j                                        D ]H\  }}|                    |          r.| d S |                    |                    |d                    }I|S )N   )	r#   itemsreplacer$   
startswithr%   endswithjoinrsplit)r)   r-   substrnew_keyprefixsuffixs         r*   	_map_namezWeightsMapper._map_name@   s   #6<<>> 	6 	6OFG}}?44kk&'155#6<<>> 	6 	6OFG~~f%% 6?44kk&'155#6<<>> 	: 	:OFG||F## :?44ll3::fa#8#899
r,   weightsc                 $      fd|D             S )Nc              3   T   K   | ]"\  }}                     |          x|fV  #d S Nr:   ).0namedataout_namer)   s      r*   	<genexpr>z&WeightsMapper.apply.<locals>.<genexpr>[   sO       
 
d NN4000= t====
 
r,    )r)   r;   rC   s   ` @r*   applyzWeightsMapper.applyX   s4    
 
 
 
 
%
 
 
 	
r,   valuesc                 $      fd|D             S )Nc                 B    g | ]}                     |          xS r>   r?   )r@   rA   rC   r)   s     r*   
<listcomp>z,WeightsMapper.apply_list.<locals>.<listcomp>b   s:     
 
 
 NN4000= ===r,   rE   r)   rG   rC   s   ` @r*   
apply_listzWeightsMapper.apply_lista   s4    
 
 
 
 

 
 
 	
r,   c                 H      fd|                                 D             S )Nc                 J    i | ]\  }}                     |          x| S r>   r?   )r@   rA   valuerC   r)   s      r*   
<dictcomp>z,WeightsMapper.apply_dict.<locals>.<dictcomp>i   s@     
 
 
e NN4000= e===r,   )r0   rK   s   ` @r*   
apply_dictzWeightsMapper.apply_dicth   s<    
 
 
 
 
%||~~
 
 
 	
r,   )r&   r!   r'   r!   )__name__
__module____qualname____doc__r   dictr#   WeightsMapping__annotations__r$   r%   r+   strr:   r   tupletorchTensorrF   listrL   r	   rQ   rE   r,   r*   r!   r!   0   sR        LL).t)D)D)DDDD).t)D)D)DDDD).t)D)D)DDDD
 
 
 
S S4Z    0
c5<&7 89
	%U\)*	+
 
 
 

c 
tCy 
 
 
 

c3h 
DcN 
 
 
 
 
 
r,   r!   c                       e Zd ZdZg dZddddddej        dee         dz  dee         dz  dee         dz  d	ee         dz  d
df fdZ	de
eeej        f                  d
e
eee
eeej        f                  f                  fdZdeded
efdZded
efdZded
efdZdedej        de
eeej        f                  d
e
e         fdZdej        deeej        f         fdZdedej        de
eeej        f                  d
e
e         fdZeddde
eeej        f                  dedz  d
ee         fd            Z xZS )AutoWeightsLoadera"  
    Helper class to load weights into a [`torch.nn.Module`][]. It is able
    to automatically detect child modules and parameters while iterating over
    the weights only once.

    The weight loading logic for individual modules can be overridden
    by defining a `load_weights` method.

    Similarly, the weight loading logic for individual parameters can be
    overridden by defining a `weight_loader` method.

    Detailed weight loading information can be viewed by setting the
    environment variable `VLLM_LOGGING_LEVEL=DEBUG`.
    )zrotary_pos_emb.inv_freqzrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedN)skip_prefixesskip_substrsignore_unexpected_prefixesignore_unexpected_suffixesmoduler`   ra   rb   rc   r'   c                    t                                                       || _        |pg | _        |pg | _        |pg | _        |pg | _        | xj        | j        z  c_        d S r>   )super__init__rd   r`   ra   rb   rc   ROTARY_EMBEDS_UNUSED_WEIGHTS)r)   rd   r`   ra   rb   rc   	__class__s         r*   rg   zAutoWeightsLoader.__init__   ss     	*0b(.B*D*J'*D*J'T>>r,   r;   c              #   |   K   d |D             }t          j        |d           D ]\  }}|d |D             fV  d S )Nc              3   L   K   | ]\  }}|                     d d          |fV   dS ).r/   N)split)r@   weight_nameweight_datas      r*   rD   z4AutoWeightsLoader._groupby_prefix.<locals>.<genexpr>   sO       
 
([ sA&&4
 
 
 
 
 
r,   c                     | d         d         S Nr   rE   xs    r*   <lambda>z3AutoWeightsLoader._groupby_prefix.<locals>.<lambda>   s    qQRtTUw r,   )r-   c              3   Z   K   | ]&\  }}t          |          d k    rdn|d          |fV  'dS )r/    N)len)r@   partsweights_datas      r*   rD   z4AutoWeightsLoader._groupby_prefix.<locals>.<genexpr>   sU        +| u::??RRa,G     r,   )	itertoolsgroupby)r)   r;   weights_by_partsr8   groups        r*   _groupby_prefixz!AutoWeightsLoader._groupby_prefix   s      
 
,3
 
 

 './?EVEVWWW 		 		MFE /4  	    		 		r,   r8   restc                 P    |dk    r|S |dk    r|S d                     ||f          S )Nrv   rl   )r4   )r)   r8   r   s      r*   _get_qualnamezAutoWeightsLoader._get_qualname   s4    R<<K2::Mxx'''r,   qualnamec                     t          fd| j        D                       pt          fd| j        D                       S )Nc              3   B   K   | ]}                     |          V  d S r>   r2   r@   pr   s     r*   rD   z.AutoWeightsLoader._can_skip.<locals>.<genexpr>   s1      FFa8&&q))FFFFFFr,   c              3       K   | ]}|v V  	d S r>   rE   )r@   r6   r   s     r*   rD   z.AutoWeightsLoader._can_skip.<locals>.<genexpr>   sA       N
 N
#)FhN
 N
 N
 N
 N
 N
r,   )anyr`   ra   )r)   r   s    `r*   	_can_skipzAutoWeightsLoader._can_skip   sn    FFFF43EFFFFF 
# N
 N
 N
 N
-1->N
 N
 N
 K
 K
 	
r,   c                     fd| j         D             }fd| j        D             }t          |          pt          |          S )Nc              3   B   K   | ]}                     |          V  d S r>   r   r   s     r*   rD   z;AutoWeightsLoader._can_ignore_unexpected.<locals>.<genexpr>   s1      OO!x""1%%OOOOOOr,   c              3   B   K   | ]}                     |          V  d S r>   )r3   )r@   sr   s     r*   rD   z;AutoWeightsLoader._can_ignore_unexpected.<locals>.<genexpr>   s1      MMx  ##MMMMMMr,   )rb   rc   r   )r)   r   iupiuss    `  r*   _can_ignore_unexpectedz(AutoWeightsLoader._can_ignore_unexpected   sR    OOOOt/NOOOMMMMT-LMMM3xx#3s88#r,   base_prefixparamc              #     K   |D ]\  }}|                      ||          }|                     |          rt                              d|           L|dk    rF|                     |          rt                              d|           t          d|d|          t          |dt                    } |||           t                              d||j                   |V  d S )NzSkipping weight %srv   zIgnoring weight %sz Attempted to load nested weight z into a single parameter weight_loaderzLoaded weight %s with shape %s)	r   r   loggerdebugr   
ValueErrorgetattrr   shape)r)   r   r   r;   rn   ro   weight_qualnamer   s           r*   _load_paramzAutoWeightsLoader._load_param   s      )0 	" 	"$K"00kJJO~~o.. 1?CCCb  ..?? LL!5GGG ? ? ?/:? ?  
 $E?<QRRMM%---LL9?EKXXX!!!!!1	" 	"r,   child_paramsc           
         t          |t          j        t          j        t          j        t          j        t          j        t          j        t          j        f          r$|	                                }dD ]}||         ||<   dS dS )z
        Add tensor names that are not in the model params that may be in the
        safetensors, e.g., batch normalization stats.
        )running_meanrunning_varnum_batches_trackedN)

isinstancennBatchNorm1dBatchNorm2dBatchNorm3dLazyBatchNorm1dLazyBatchNorm2dLazyBatchNorm3dSyncBatchNorm
state_dict)r)   rd   r   module_state_dict	stat_names        r*   _add_loadable_non_param_tensorsz1AutoWeightsLoader._add_loadable_non_param_tensors   s     """ 
 
 	G !' 1 1 3 3S G G	*;I*FY''	G 	GG Gr,   c              #      K   t          |t          t          f          rd S | j        k    rct	          |dd           }t          |          rC ||          }|t                              d|           nt           fd|          E d {V  t          |
                                          }t          |                    d                    }                     ||                                |          D ]\  }}	                     |          }
||v rX                     |
dz             rt                              d|
           T                     |
||         |	          E d {V  x||v rU                     |
          rt                              d|
                                |
||         |	          E d {V  щ                     |
dz             }                     |
          }|s|rt                              d	|
                                |
dz             }                     |
          }|s|rt                              d
|
           mfd|                    d          D             }d|
d j                                         d d|                                 d| 
}t+          |          d S )Nload_weightsz1Unable to collect loaded parameters for module %sc                 0                         |           S r>   )r   )rs   r   r)   s    r*   rt   z0AutoWeightsLoader._load_module.<locals>.<lambda>  s    $"4"4[!"D"D r,   F)recurserl   zSkipping module %szSkipping param %szSkipping missing %szIgnoring missing %sc                      h | ]
\  }}|z   S rE   rE   )r@   k_r   s      r*   	<setcomp>z1AutoWeightsLoader._load_module.<locals>.<setcomp>=  s.     # # #(,1K!O# # #r,   Tz&There is no module or parameter named z in z(. The available parameters belonging to z (z) are: )r   StageMissingLayerPPMissingLayerrd   r   callabler   warningmaprV   named_childrennamed_parametersr   r~   r   r   r   _load_moduler   r   	_get_namer   )r)   r   rd   r;   module_load_weightsloaded_paramschild_modulesr   child_prefixchild_weightsr8   can_skip_modulecan_skip_paramcan_ignore_modulecan_ignore_paramdesc_param_keysmsgs   ``               r*   r   zAutoWeightsLoader._load_module   s      f0.ABB 	F T[  ")&.$"G"G+,, 
 3 3G < < (NNKV     #DDDDD%          
 V224455F33E3BBCC 	,,V\BBB+/+?+?+H+H -	& -	&'L-''\BBF},,>>&3,// LL!5v>>>,,M,7          -->>&)) LL!4f===++L6          #'..#">">!%!7!7" n LL!6???$($?$?$M$M!#'#>#>v#F#F $ (8 LL!6???# # # #060G0GPT0G0U0U# # #EV E E+//11E E=HE E ((**E E 4CE E  !oo%[-	& -	&r,   )mapperr   c                     ||                     |          } fd|D             }t                               d j        |                    }|S )Nc              3   P   K   | ] \  }}                     |          ||fV  !d S r>   )r   )r@   rA   weightr)   s      r*   rD   z1AutoWeightsLoader.load_weights.<locals>.<genexpr>R  sP       
 
+tVdnnT>R>R
6N
 
 
 
 
 
r,   rv   )rF   setr   rd   )r)   r;   r   autoloaded_weightss   `   r*   r   zAutoWeightsLoader.load_weightsH  sn     ll7++G
 
 
 
/6
 
 
 !!2!22t{G!L!LMM!!r,   )rR   rS   rT   rU   rh   r   Moduler]   rY   rg   r   rZ   r[   r\   r~   r   boolr   r   	Parameterr   rV   r   r   r   r!   r   r   __classcell__ri   s   @r*   r_   r_   p   s        "$ $ $  +/)-7;7;? ? ?	? Cy4'	?
 3i$&? %)I$4? %)I$4? 
? ? ? ? ? ?&%U\ 123 
%XeC,=&>??@	A   ((C (s (s ( ( ( (
# 
$ 
 
 
 

$s $t $ $ $ $
"" |" %U\ 123	"
 
#" " " "@GiG/3C4E/FG G G G.M&M& 	M& %U\ 123	M&
 
#M& M& M& M&^ 4
 (,	" " "%U\ 123" $	"
 
S" " " 43" " " " "r,   r_   rv   )r8   	hf_configarchitecturesvllm_configr8   r   r   r'   c                z    ddl m} ||| j        j        }||                     ||          }  || |          S )z
    Helper function to initialize an inner model registered to vLLM,
    based on the arguments passed to the outer vLLM model.
    r   )initialize_modelN)r   )r   r8   )&vllm.model_executor.model_loader.utilsr   model_configr   with_hf_config)r   r8   r   r   r   s        r*   init_vllm_registered_modelr   Z  sa     HGGGGG]6,6	!00-0XXFCCCCr,   rs   c                     d S r>   rE   rr   s    r*   
flatten_bnr   q  s    14r,   c                     d S r>   rE   rr   s    r*   r   r   u  s    =@Sr,   concatTc                    d S r>   rE   rs   r   s     r*   r   r   y  s	    
 3r,   F)r   c                    d S r>   rE   r   s     r*   r   r     s	    
 ),r,   c                    t          | t          j                  r|                     dd          S |rt          j        |           S d | D             S )z
    Flatten the `B` and `N` dimensions of batched multimodal inputs.

    The input tensor should have shape `(B, N, ...)`.
    r   r/   c                     g | ]	}|D ]}|
S rE   rE   )r@   x_bx_ns      r*   rJ   zflatten_bn.<locals>.<listcomp>  s%    ,,,C,,C,,,,r,   )r   r[   r\   flattencatr   s     r*   r   r     sT     !U\"" yyA y||,,1,,,,r,   
embeddingsc                     t          | t          j                  r|                     dd          S t          j        t          d | D                                 S )z`
    Recursively flattens and concatenates NestedTensors on all but the last
    dimension.
    r   c              3   4   K   | ]}t          |          V  d S r>   )_flatten_embeddings)r@   ts     r*   rD   z&_flatten_embeddings.<locals>.<genexpr>  s+      FFa.q11FFFFFFr,   )r   r[   r\   r   r   rZ   r   s    r*   r   r     sU     *el++ )!!!R(((9UFF:FFFFFGGGr,   c                     t          | t          j                  r,d                    d | j        dd         D                       S d                    d | D                       S )ze
    Constructs a debugging representation of the number of embeddings in the
    NestedTensors.
    z x c                 ,    g | ]}t          |          S rE   )rY   )r@   dims     r*   rJ   z/_embedding_count_expression.<locals>.<listcomp>  s    EEE3s88EEEr,   Nz + c              3   4   K   | ]}t          |          V  d S r>   )_embedding_count_expression)r@   inners     r*   rD   z._embedding_count_expression.<locals>.<genexpr>  s+      QQU1%88QQQQQQr,   )r   r[   r\   r4   r   r   s    r*   r   r     sg     *el++ GzzEEz/?/DEEEFFF::QQjQQQQQQr,   lstintervalc                     d t          t          |           |z  dz             D             }| D ]"}||z  }||                             |           #|S )Nc                     g | ]}g S rE   rE   r@   r   s     r*   rJ   z*split_list_into_ranges.<locals>.<listcomp>  s    MMMarMMMr,   r/   )rangemaxappend)r   r   rangesnumindexs        r*   split_list_into_rangesr    se    MM5#c((h2F!1K+L+LMMMF " "xuS!!!!Mr,   inputs_embedsmultimodal_embeddingsis_multimodalc           
         t          |          dk    r| S t          |          }| j        }	 |                     |                    d          |                    |                     n# t          $ ry}t          |          }|                                                                }||k    r)t          |          }t          d| d| d| d          |t          d          |d	}~ww xY w| S )
z
    Merge `multimodal_embeddings` into `inputs_embeds` by overwriting the
    positions in `inputs_embeds` corresponding to placeholder tokens in
    `input_ids`.

    Note:
        This updates `inputs_embeds` in place.
    r   r   )dtypezAttempted to assign z = z multimodal tokens to z placeholdersz%Error during masked scatter operationN)rw   r   r
  masked_scatter_	unsqueezetoRuntimeErrorsumitemr   r   )	r  r  r  mm_embeds_flatinput_dtypeenum_actual_tokensnum_expected_tokensexprs	            r*   _merge_multimodal_embeddingsr    sR     !!Q&&()>??N%KI 	%%##B''):):):)M)M	
 	
 	
 	
  I I I//+//116688 333./DEEDKt K K0A K K(;K K K  
 @AAqHI s   =A+ +
C.5A4C))C.elementstest_elements_listc                     t          j        |t                                                    | j        d          }t          j        | |          S )N)
pin_memoryT)devicenon_blocking)r[   tensorr   r  r  isin)r  r  test_elementss      r*   	isin_listr!    sR     L*,,   	bdb33 
 :h...r,   c                   \     e Zd Zd
dedej        dz  ddf fdZdefdZd Zdefd	Z	 xZ
S )r   N
stage_namerd   r'   c                 j    t                                                       || _        || j        d<   d S Nrd   )rf   rg   r#  __dict__)r)   r#  rd   ri   s      r*   rg   zStageMissingLayer.__init__  s4    $ #)hr,   rA   c                 8    t          | j        d         |          S r%  )r   r&  )r)   rA   s     r*   __getattr__zStageMissingLayer.__getattr__   s    t}X.555r,   c                 &    t          |  d          )Nz should not be called)r  r)   argskwargss      r*   __call__zStageMissingLayer.__call__  s    d999:::r,   c                     d| j         S )Nzstage_name=)r#  )r)   s    r*   
extra_reprzStageMissingLayer.extra_repr  s    0T_000r,   r>   )rR   rS   rT   rY   r   r   rg   r(  r-  r/  r   r   s   @r*   r   r     s        ) )3 )	D0@ )D ) ) ) ) ) )6 6 6 6 6; ; ;1C 1 1 1 1 1 1 1 1r,   r   )targetsrd   r0  .c             #   r   K   t          t                               |Tdt          j        dt          dt          j        f fd}t	          |          5  V  ddd           dS # 1 swxY w Y   dS V                                   D ]*\  }}t          ||          r                    |           +dS )aA  
    Within this context, collect all direct child assignments to `module`,
    returning a list of children names that is internally updated until the
    context is exited.

    If `targets` is set, instead collect descendents of `module`
    that are an instance of `targets`, even if they aren't direct children.
    Nmodule_rA   	submodulec                 >    | u r                     |           d S d S r>   )r  )r2  rA   r3  children_namesrd   s      r*   hookzcollect_children.<locals>.hook  s0    &  %%d+++++ ! r,   )r]   rY   r   r   r   named_modulesr   r  )rd   r0  r6  rA   r2  r5  s   `    @r*   collect_childrenr8  
  s6      #Y[[N	,") 	,3 	,29 	, 	, 	, 	, 	, 	, 	, 6d;; 	! 	!    	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! #1133 	, 	,MD''7++ ,%%d+++	, 	,s   A''A+.A+placeholderc             #      K   dt           j        dt          dt           j        f fd}t          |          5  t	          j        d          5  dV  ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS dt           j        dt          dt           j        ffd}t          |          5  dV  ddd           dS # 1 swxY w Y   dS )al  
    Within this context, prevent weight initialization from using device memory and
    replace direct child assignments to `module` with the result of `placeholder()`.

    If `targets` is set, instead prevent weight initialization and
    replace assignments where the child is an instance of `targets`,
    even if they aren't direct children of `module`.
    Nr2  rA   r3  c                 &    | u r |          S |S r>   rE   )r2  rA   r3  rd   r9  s      r*   r6  zno_init_weights.<locals>.hook;  s$    &  "{9---r,   metac                     t          |           r|                    d           t          |          r |                    d            |          S |S )Nr<  )r   r  )r2  rA   r3  r9  r0  s      r*   r6  zno_init_weights.<locals>.hookE  sa    '7++ %V$$$)W-- .V$$$"{9---r,   )r   r   rY   r   r[   r  )rd   r9  r0  r6  s   ``` r*   no_init_weightsr>  *  s      	") 	3 	29 	 	 	 	 	 	 	 6d;; 	 	U\&=Q=Q 	 	EEE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 		") 	3 	29 	 	 	 	 	 	 	 6d;; 	 	EEE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	sG   A;A#A;#A'	'A;*A'	+A;;A?A?=CCCc                   2    e Zd Zdedej        j        fdZdS )LayerFnr8   r'   c                     d S r>   rE   )r)   r8   s     r*   r-  zLayerFn.__call__U  s      r,   N)rR   rS   rT   rY   r[   r   r   r-  rE   r,   r*   r@  r@  T  s*        ;s;ux;;;;;;r,   r@  c                   (     e Zd ZdZ fdZd Z xZS )r   zN
    A placeholder layer for missing layers in a pipeline parallel model.
    c                 H    t                                                       d S r>   )rf   rg   )r)   r+  r,  ri   s      r*   rg   zPPMissingLayer.__init__]  s    r,   c                 r    |r|d         n-t          t          |                                                    S )z>Return the first arg from args or the first value from kwargs.r   )nextiterrG   r*  s      r*   forwardzPPMissingLayer.forward`  s-    ?tAwwDfmmoo)>)>$?$??r,   )rR   rS   rT   rU   rg   rG  r   r   s   @r*   r   r   X  sX             @ @ @ @ @ @ @r,   r   	max_bytesc                     da | ad S rq   )_CPU_OFFLOAD_BYTES_CPU_OFFLOAD_MAX_BYTES)rH  s    r*   set_cpu_offload_max_bytesrL  i  s    &r,   c           	      N   	
 t                                           d           x} S |j        t          j        d          k    r S t          t
          k    r S t                      }t                      }|s
J d            d}d}                                 D ]}t          t
          k    r nt          j        |j	        
                                |j	                                        |j	        j        |j	        j        d|          }|                    |j	                   |s||_	        n||_        t!          |          |_	        t          |j	                                        |j	                                        z  z  ad}|r|s j        
	 
fd		 _         S )Ncpuz3V1 CPU offloading requires uva (pin memory) supportTF)sizestrider
  layoutr  r  c                      _         fd                                                                D             }t          || |          }_         |S )Nc                 F    i | ]\  }}||                     d           S )T)r  )r  )r@   r   vr  s      r*   rP   z9maybe_offload_to_cpu.<locals>.forward.<locals>.<dictcomp>  sA        Aq 144T422  r,   )r+  r,  )rG  r   r0   r   )r+  r,  device_stateoutputr  rG  rd   original_forwards       r*   rG  z%maybe_offload_to_cpu.<locals>.forward  sp    -FN    #--//5577	  L %V\VTTTF$FNMr,   )rE  
parametersr  r[   rJ  rK  r   r   empty_stridedrB   rO  rP  r
  rQ  copy__vllm_offloaded_cpu_datar   numelelement_sizerG  )rd   paramsr  uva_availableuva_offloadingoffloaded_parametersr   cpu_datar  rG  rW  s   `       @@@r*   maybe_offload_to_cpurc  o  s   v((**D111:]Fe$$$$ 333(**J$&&MOOOOO=N !   $ $!777 E &6==??&,6=!
 
 
 	qv 	=AFF *2A&28<<AFafllnnqv/B/B/D/DDD# !N !!>
	 
	 
	 
	 
	 
	 
	 
	 !Mr,   num_hidden_layerslayer_fnc                 Z   ddl m} ddlm}  ||  |            j         |            j                  \  }}t          j                            d t          |          D             fdt          ||          D             z   d t          ||           D             z             }|||fS )zgMake a list of layers with the given layer function, taking
    pipeline parallelism into account.
    r   )get_pp_group)get_pp_indicesc                 *    g | ]}t                      S rE   r   r   s     r*   rJ   zmake_layers.<locals>.<listcomp>  s    666a		666r,   c           	      L    g | ] }t            d |                     !S )rl   )r8   )rc  )r@   idxre  r8   s     r*   rJ   zmake_layers.<locals>.<listcomp>  sL     
 
 
 !F1B1BS1B1B!C!C!CDD
 
 
r,   c                 *    g | ]}t                      S rE   rj  r   s     r*   rJ   zmake_layers.<locals>.<listcomp>  s    
I
I
I>
I
I
Ir,   )
vllm.distributed.parallel_staterg  vllm.distributed.utilsrh  rank_in_group
world_sizer[   r   
ModuleListr   )rd  re  r8   rg  rh  start_layer	end_layermoduless    ``     r*   make_layersrv    s     =<<<<<555555+^<<>>79R K h!!665#5#5666
 
 
 
 
[)44
 
 
	

 J
IU96G%H%H
I
I
I	J G 	7**r,    _model_to_pp_missing_layer_namesmodelc                    t          |           }|t          v rt          |         S g }|                                 D ]9\  }}t          |t          t
          f          r|                    |dz              :|t          |<   |S )zAGet the names of the missing layers in a pipeline parallel model.rl   )idrw  r7  r   r   r   r  )rx  model_idmissing_layer_namesrA   rd   s        r*   get_pp_missing_layer_namesr}    s    %yyH333/99++-- 3 3ff0.ABB 	3  &&tcz2221D$X.r,   rA   c                      t          |t          t          f          rdS t           fdt	          |          D                       S )z=Check if a parameter is missing in a pipeline parallel model.Tc              3   B   K   | ]}                     |          V  d S r>   r   )r@   missing_layer_namerA   s     r*   rD   z*is_pp_missing_parameter.<locals>.<genexpr>  sD         	*++     r,   )r   r   r   r   r}  )rA   rx  s   ` r*   is_pp_missing_parameterr    s`    %+^<== t    "<U"C"C     r,   keyshidden_sizec                 d     dt           dt          j        dt          j        dt          f fd}|S )N
batch_sizer
  r  r'   c                 F     t           fdD                       S )Nc                 D    i | ]}|t          j        f           S )r
  r  )r[   zeros)r@   r-   r  r  r
  r  s     r*   rP   zdmake_empty_intermediate_tensors_factory.<locals>.make_empty_intermediate_tensors.<locals>.<dictcomp>  sB        U[*k!:%PVWWW  r,   r   )r  r
  r  r  r  s   ```r*   make_empty_intermediate_tensorszPmake_empty_intermediate_tensors_factory.<locals>.make_empty_intermediate_tensors  sO    
 #        
 
 	
r,   )intr[   r
  r  r   )r  r  r  s   `` r*   'make_empty_intermediate_tensors_factoryr    sW    



{

 

 
	

 

 

 

 

 

 

 +*r,   c                     | s|n|  d| S )a  Add a prefix to a name if the prefix is non-empty.

    Args:
        prefix: The prefix to add. If empty, no prefix will be added.
        name: The name to potentially prefix.

    Returns:
        The string "prefix.name" if prefix was non-empty, otherwise just "name".
    rl   rE   )r8   rA   s     r*   maybe_prefixr    s!     544f#5#5t#5#55r,   c                 Z    | j         j        }| j        }|rt          j        ||          ndS )ae  Get quantization config for Draft models.

    Draft models should use their own quantization config instead of the verifier/target
    model's config. This helper retrieves the draft model's quantization config.

    Args:
        vllm_config: The vLLM configuration object.

    Returns:
        The draft model's config if available, None otherwise.
    N)speculative_configdraft_model_configload_configr   get_quantization_config)r   r  draft_load_configs      r*   get_draft_quant_configr  	  s?     %7J#/ 	
*+=?PQQQr,   r/   
layer_namenum_attn_modulec                    |                      d          }g }|D ]5}	 |                    t          |                     &# t          $ r Y 2w xY w|dk    sd| vr)t	          |          dk    sJ d|  d            |d         S t	          |          dk    sJ d|  d            t	          |          dk    r|d         |z  |d         z   n|d         }|S )	z
    Extract the layer index from the module name.
    Examples:
    - "encoder.layers.0" -> 0
    - "encoder.layers.1.self_attn" -> 1
    - "2.self_attn" -> 2
    - "model.encoder.layers.0.sub.1" -> ValueError if num_attn_module == 1
    rl   r/   attnzlayer name z  should only contain one integerr      z! should contain most two integers)rm   r  r  r   rw   )r  r  subnamesint_valssubnamelayer_indexs         r*   extract_layer_indexr  !  s'    $$HH  	OOCLL)))) 	 	 	H	!vZ778}}!!!F*FFF "!! {8}}!!!G*GGG "!!
 8}}!! QK/)HQK77! 	
 s   "A  
AA  tensorsoffsetc                    |                                                                  s&|                                                                 r9t          j        | j                  j        |z
  }t          j        | | |          } | S )N)minr   )isinfr   isnanr[   finfor
  r   clamp)r  r  clamp_values      r*   cast_overflow_tensorsr  C  ss     }} J 3 3 5 5 Jk'-004v=+gK<[IIINr,   rG   topkr   c                 j    |dk    rt          j        | |d          S t          j        | ||          S )a!  
    Optimized topk implementation that uses torch.max for k=1 case.

    This function provides better performance for the common case of k=1
    by using torch.max instead of the more general torch.topk.

    Args:
        values: Input tensor to find top-k values from
        topk: Number of top values to return (k). Must be > 0.
        dim: Dimension along which to compute topk

    Returns:
        Tuple of (values, indices) where values are the top-k values
        and indices are their corresponding indices in the input tensor
    r/   T)r   keepdim)r   )r[   r   r  )rG   r  r   s      r*   	fast_topkr  M  s=    $ qyyyS$7777 z&$C0000r,   c                 J    t           j        j                            |           S r>   )r[   opsvllmsequence_parallel_chunk_implrr   s    r*   sequence_parallel_chunkr  k  s    9>66q999r,   c                 *   t                      }t                      }|                     d          }||z  }|dk    r*||z
  }t          j                            | ddd|f          }n| }|j        d         |z  }||z  }t          j        |d||          S rq   )	r   r   rO  r   
functionalpadr   r[   narrow)	rs   tp_sizetp_rankseq_len	remainderpad_lenychunkstarts	            r*   r  r  o  s    244G,..G ffQiiG'!IA~~I%Ma!Q7!344GAJ'!EeOE<1eU+++r,   c                     t                      }t          |                     d          |          }t          | j                  }||d<   t          j        || j        | j                  }|S )Nr   r  )	r   r   rO  r]   r   r[   emptyr
  r  )rs   r  r  r   outs        r*   !sequence_parallel_chunk_impl_faker    s[    244G166!99g&&GMMEE!H
+e1718
<
<
<CJr,   r  )op_nameop_func	fake_impltagsc                 X    t          |           sdS d|v rd| _        d|v r	d| _        dS dS )a%  
    Update EAGLE model flags based on loaded weight name.
    This should be called during weight loading to detect if a model
    has its own lm_head or embed_tokens weight.
    Args:
        model: The model instance (must support EAGLE)
        name: The name of the weight to process
    Nlm_headTembed_tokens)r   has_own_lm_headhas_own_embed_tokens)rx  rA   s     r*   process_eagle_weightr    sO     e$$  D $%)""" r,   feature_layer_indexc                 "    | dk     r|| z   dz   S | S )a  Given a signed vision feature layer, get the number of hidden layers
       needed to leverage it.

    Args:
        feature_layer_index: Index of a required layer in the visual encoder.
        num_hidden_layers: The total number of hidden layers in the visual encoder.
    r   r/   rE   )r  rd  s     r*   get_layer_indexr    s&     Q #66::r,   )r/   )r  )frz   collections.abcr   r   r   
contextlibr   dataclassesr   r   typingr	   r
   r   r   r[   torch.nnr   
torch.funcr   torch.nn.modules.moduler   transformersr   vllm.configr   vllm.distributedr   r   vllm.loggerr   3vllm.model_executor.layers.quantization.base_configr   4vllm.model_executor.model_loader.online_quantizationr   -vllm.model_executor.model_loader.weight_utilsr   %vllm.model_executor.models.interfacesr   vllm.multimodalr   vllm.sequencer   vllm.utils.math_utilsr   vllm.utils.platform_utilsr   r   vllm.utils.torch_utilsr   r   rR   r   rY   rW   r!   r_   r]   r   r   r\   r   r   r   r   r  r  r  r!  r   typerZ   r8  r>  r@  Identityr   rJ  rK  rL  rc  rr  rv  rw  rV   rX   r}  r  r  r  r  r  floatr  r  r  r  r  Tagneeds_fixed_stride_orderr  r  rE   r,   r*   <module>r     s)
        7 7 7 7 7 7 7 7 7 7 % % % % % % ( ( ( ( ( ( ( ( 3 3 3 3 3 3 3 3 3 3 3 3        & & & & & & L L L L L L ) ) ) ) ) ) " " " " " "        $ # # # # #           P O O O O O D D D D D D ) ) ) ) ) ) - - - - - - & & & & & &              
 
X		cDj) N <
 <
 <
 <
 <
 <
 <
 <
~g" g" g" g" g" g" g" g"Z )-&*D D DD D  $&	D
 9t#D YD D D D. 
 4%, 45< 4 4 4 
 4 
 @$u|$ @el); @ @ @ 
 @ 
ELEL( DM \	   
 
 , , ,ELEL(, , 
%,%,&	, , , 
, - - -ELEL(- - 
%,%,&	- - - -&
HM 
Hel 
H 
H 
H 
H	RM 	Rc 	R 	R 	R 	R  T#Y    *<*(* <* \	* * * *Z	/l	/S		/ \	/ 	/ 	/ 	/1 1 1 1 1	 1 1 1(  EI, , ,I, ")_uT")_c%9::TA, , , ,> 
 EI	& & &I&29+ry01& ")_uT")_c%9::TA	& & & &R< < < < <h < < <
@ 
@ 
@ 
@ 
@UX& 
@ 
@ 
@   ' ' ' ' ' '@ @UX_ @ @ @ @F+++ + 3UX(()	+ + + +4 :<  $sDI~"6 ; ; ;eho $s)    $# eho $    +$s) +# + + + + 
6 
6C 
6C 
6 
6 
6 
6$   0 C # c    H  \ \   1L1 #1*-1
5<%&1 1 1 1<:u| : : : : :,EL ,U\ , , , ,$ %,      *(/
)
,	.	   *9*
* 
* * * *,
 
 
 
 
 
 
 
 
r,   