
    -`i                        d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZ d dlmZmZmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( e
rd dl)m*Z* ne+Z* e e,          Z-dgZ. G d de j/                  Z0 G d de j1                  Z2e e ed                     G d d                                  Z3 G d de4e j1                  Z5e e ed                     G d d                                  Z6e e ed                     G d d                                   Z7dS )!    N)Counter)Callable)field)Path)TYPE_CHECKINGAnyClassVarLiteral)
ConfigDictFieldTypeAdapterfield_validator)	dataclass)CallableInductorPassInductorPass)Rangeconfigget_hash_factorshash_factors)init_loggercurrent_platform)resolve_obj_by_qualname)round_up)is_torch_equal_or_newer)
VllmConfigr   c                   (    e Zd ZdZdZ	 dZ	 dZ	 dZdS )CompilationModezSThe compilation approach used for torch.compile-based compilation of the
    model.r            N)__name__
__module____qualname____doc__NONESTOCK_TORCH_COMPILEDYNAMO_TRACE_ONCEVLLM_COMPILE     k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/config/compilation.pyr   r   '   sA          D<HL0 0r+   r   c                       e Zd ZdZdZdZdZeefZeefZddZ	ddZ
dd defd	Zdefd
ZddZdefdZdefdZdefdZdefdZdefdZdS )CUDAGraphModezConstants for the cudagraph mode in CompilationConfig.
    Meanwhile, the subset enum `NONE`, `PIECEWISE` and `FULL` are also
    treated as concrete runtime mode for cudagraph runtime dispatching.
    r   r   r    returnc                 b    |                                  rt          | j        d                   n| S )Nr   separate_routiner.   valueselfs    r,   decode_modezCUDAGraphMode.decode_modeC   ,    /3/D/D/F/FP}TZ]+++DPr+   c                 b    |                                  rt          | j        d                   n| S )Nr   r1   r4   s    r,   
mixed_modezCUDAGraphMode.mixed_modeF   r7   r+   modec                 ~    |                                 rJ |                                  r|j        | j        v S | |k    S N)r2   r3   )r5   r:   s     r,   has_modezCUDAGraphMode.has_modeI   sG    ((*****  "" 	,:++t|r+   c                 @    |                      t          j                  S r<   )r=   r.   	PIECEWISEr4   s    r,   requires_piecewise_compilationz,CUDAGraphMode.requires_piecewise_compilationO   s    }}]4555r+   c                 p    |                                  r!t          t          | j                            n| S r<   )r2   r.   maxr3   r4   s    r,   max_cudagraph_modez CUDAGraphMode.max_cudagraph_modeR   s.    151F1F1H1HR}S__---dRr+   c                 F    |                                  t          j        k    S r<   )rC   r.   FULLr4   s    r,   has_full_cudagraphsz!CUDAGraphMode.has_full_cudagraphsU   s    &&((M,>>>r+   c                 *    |                                  S r<   )r@   r4   s    r,   has_piecewise_cudagraphsz&CUDAGraphMode.has_piecewise_cudagraphsX   s    22444r+   c                 6    t          | j        t                    S r<   )
isinstancer3   tupler4   s    r,   r2   zCUDAGraphMode.separate_routine[   s    $*e,,,r+   c                 L    | t           j        t           j        t           j        fv S r<   )r.   r&   r?   rE   r4   s    r,   valid_runtime_modesz!CUDAGraphMode.valid_runtime_modes^   s    *M,C]EWXXXr+   c                     | j         S r<   )namer4   s    r,   __str__zCUDAGraphMode.__str__a   s
    yr+   N)r/   r.   )r"   r#   r$   r%   r&   r?   rE   FULL_DECODE_ONLYFULL_AND_PIECEWISEr6   r9   boolr=   r@   rC   rF   rH   r2   rM   strrP   r*   r+   r,   r.   r.   7   sU        
 DIDd|	*Q Q Q QQ Q Q Q_     6 6 6 6 6S S S S?T ? ? ? ?5$ 5 5 5 5-$ - - - -YT Y Y Y Y      r+   r.   forbid)extra)r   c            
          e Zd ZU dZ ed          Zeed<   	  ed          Zeed<   	  ed          Z	eed<   	  ed          Z
eed<   	  ed          Zeed<   	  ed          Zeed	<   	  ed          Zeed
<   	 dZedz  ed<   	 dZeed<   	 dededz  fdZedeeef         fd            ZdefdZ edddddd	d
d          edededefd                        ZddZdS )
PassConfiga  Configuration for custom Inductor passes.

    This is separate from general `CompilationConfig` so that inductor passes
    don't all have access to full configuration - that would create a cycle as
    the `PassManager` is set as a property of config.

    You must pass PassConfig to VLLMConfig constructor via the CompilationConfig
    constructor. VLLMConfig's post_init does further initialization.
    If used outside of the VLLMConfig, some fields may be left in an
    improper state.
    Ndefaultfuse_norm_quantfuse_act_quantfuse_attn_quanteliminate_noops	enable_spfuse_gemm_commsfuse_allreduce_rmsfi_allreduce_fusion_max_size_mbFenable_qk_norm_rope_fusion
world_sizer/   c                     d}g d}||vrdS | j         }|'|                                                     |          }|t          ||z            ndS )z
        Returns the max communication size in bytes for flashinfer
        allreduce fusion for the given world size. Returns None if world size
        is not supported by configs as it's not supported by flashinfer.
        i   )r          N)rb   'default_fi_allreduce_fusion_max_size_mbgetint)r5   rd   MiBFI_SUPPORTED_WORLD_SIZESmax_size_mbs        r,   flashinfer_max_sizezPassConfig.flashinfer_max_size   sn     #,99 5554:FFHHLLZXXK)4)@s;$%%%dJr+   c                      ddl m}  ddlm}  |j                    si S |                      |j                                                    i           S )Nr   )FI_ALLREDUCE_FUSION_MAX_SIZE_MBr   )"vllm.compilation.collective_fusionrp   vllm.platformsr   is_cudari   get_device_capabilityto_int)rp   r   s     r,   rh   z2PassConfig.default_fi_allreduce_fusion_max_size_mb   st    VVVVVV333333'')) 	I.222244;;==r
 
 	
r+   c                 T    t          t          | t                                          S )z
        Produces a hash unique to the pass configuration.
        Any new fields that affect compilation should be added to the hash.
        Any future fields that don't affect compilation should be excluded.
        )r   r   setr4   s    r,   compute_hashzPassConfig.compute_hash   s!     ,T35599:::r+   wrapr:   r3   handlerc                      ||S  ||          S zFSkip validation if the value is `None` when initialisation is delayed.r*   clsr3   r{   s      r,   _skip_none_validationz PassConfig._skip_none_validation   s     =Lwu~~r+   c                 f   | j         sj| j        s| j        rt                              d           | j        rt                              d           | j        rt                              d           | j        r6t          j	                    s%t                              d           d| _        d S d S d S )NzdFusion enabled but reshape elimination disabled. RMSNorm/SiluMul + quant (fp8) fusion might not workz^Fusion enabled but reshape elimination disabled. Attention + quant (fp8) fusion might not workziFusion enabled but reshape elimination disabled. Allreduce + rms norm + quant (fp8) fusion might not workzhQK Norm + RoPE fusion enabled but the current platform is not CUDA or ROCm. The fusion will be disabled.F)
r^   r[   r\   loggerwarning_oncer]   ra   rc   r   is_cuda_aliker4   s    r,   __post_init__zPassConfig.__post_init__   s     # 	# t': ##J   # ##D   & ##O   * 	43C3Q3S3S 	4=   /4D+++	4 	4 	4 	4r+   r/   N)r"   r#   r$   r%   r   r[   rS   __annotations__r\   r]   r^   r_   r`   ra   rb   floatrc   rj   rn   staticmethoddictrh   rT   rx   r   classmethodr   r   r   r   r*   r+   r,   rX   rX   e   s+        
 
 "E$///OT///. 5...ND....!E$///OT///0!E$///OT///eD)))It)))&!E$///OT///$uT222222-48#UT\8881" (-,,,/Kc KcDj K K K K" 
T#u*5E 
 
 
 \
;c ; ; ; ; _	 	 	 #  S    [	 	4 4 4 4 4 4r+   rX   c                   "    e Zd ZdZdZ	 dZ	 dZdS )DynamicShapesTypezTypes of dynamic shapes handling in torch.compile().
    see  Dynamic shapes and vllm guard dropping in torch_compile.md
    for more details.backedunbackedbacked_size_obliviousN)r"   r#   r$   r%   BACKEDUNBACKEDBACKED_SIZE_OBLIVIOUSr*   r+   r,   r   r      s>          F7 H7 4% %r+   r   c                   Z    e Zd ZU dZej        Zeed<   	 dZe	ed<   	 dZ
e	ed<   	 defdZd	S )
DynamicShapesConfigz<Configuration to control/debug torch compile dynamic shapes.typeFevaluate_guardsTassume_32_bit_indexingr/   c                 @    ddl m}m}  || i           } ||          S )z8
        Provide a hash for DynamicShapesConfig
        r   r   r   )vllm.config.utilsr   r   )r5   r   r   factorss       r,   rx   z DynamicShapesConfig.compute_hash   s?    
 	EDDDDDDD""4,,|G$$$r+   N)r"   r#   r$   r%   r   r   r   r   r   rS   r   rT   rx   r*   r+   r,   r   r      s          GF/6D
666 "OT!!!	 $(D'''%c % % % % % %r+   r   c                   ,   e Zd ZU dZ ed          Zeed<   	  ed          Ze	ed<   	 dZ
edz  ed<   	 dZeed<   	  ed	 
          Zed         ed<   	 dZeed<   	  ee
          Zee         ed<   	 dZee         dz  ed<   	 dZeed<   	 dZeeez           dz  ed<   	 dZee         dz  ed<   	  ee
          Zeed<   	  ee
          Zeeef         ed<   	  ed          Zeed<   	 dZeed<   	 dZee         dz  ed<   	 dZ eed<   	 dZ!eed<   	  ed          Z"eed<   	  ee#
          Z$e#ed<   	  ed          Z%edz  ed<   	  ee&
          Z'e&ed <   	  edd!          Z(eed"<   	  ee)d#          Z*e)e         ed$<   	  ee)d#          Z+e)e         ed%<   	  ee,d#          Z-e,e         ed&<   	  ed'd!          Z.e/ed(<   	  eed#          Z0eee1f         ed)<   	 g d*Z2e3ee                  ed+<   d,efd-Z4d,efd.Z5e5Z6 e7dd/0          e8d1e1d,e1fd2                        Z9 e7dd/0          e8d1e1d,e1fd3                        Z: e7dd/0          e8d1e1d,e1fd4                        Z; e7d          e8d1ed,efd5                        Z< e7ddddd60          e8d1e1d7e=d,e1fd8                        Z>dLd9Z?d:d;d,ee=z  fd<Z@dLd=ZA	 dMd?ed@efdAZBdB ZCd,efdCZDd,efdDZEdE ZFdFed,efdGZGdHedIefdJZHd,eeI         fdKZJdS )NCompilationConfigaN	  Configuration for compilation.

    You must pass CompilationConfig to VLLMConfig constructor.
    VLLMConfig's post_init does further initialization. If used outside of the
    VLLMConfig, some fields will be left in an improper state.

    It has three parts:

    - Top-level Compilation control:
        - [`mode`][vllm.config.CompilationConfig.mode]
        - [`debug_dump_path`][vllm.config.CompilationConfig.debug_dump_path]
        - [`cache_dir`][vllm.config.CompilationConfig.cache_dir]
        - [`backend`][vllm.config.CompilationConfig.backend]
        - [`custom_ops`][vllm.config.CompilationConfig.custom_ops]
        - [`splitting_ops`][vllm.config.CompilationConfig.splitting_ops]
        - [`compile_mm_encoder`][vllm.config.CompilationConfig.compile_mm_encoder]
    - CudaGraph capture:
        - [`cudagraph_mode`][vllm.config.CompilationConfig.cudagraph_mode]
        - [`cudagraph_capture_sizes`]
        [vllm.config.CompilationConfig.cudagraph_capture_sizes]
        - [`max_cudagraph_capture_size`]
        [vllm.config.CompilationConfig.max_cudagraph_capture_size]
        - [`cudagraph_num_of_warmups`]
        [vllm.config.CompilationConfig.cudagraph_num_of_warmups]
        - [`cudagraph_copy_inputs`]
        [vllm.config.CompilationConfig.cudagraph_copy_inputs]
    - Inductor compilation:
        - [`compile_sizes`][vllm.config.CompilationConfig.compile_sizes]
        - [`compile_ranges_split_points`]
            [vllm.config.CompilationConfig.compile_ranges_split_points]
        - [`inductor_compile_config`]
        [vllm.config.CompilationConfig.inductor_compile_config]
        - [`inductor_passes`][vllm.config.CompilationConfig.inductor_passes]
        - custom inductor passes

    Why we have different sizes for cudagraph and inductor:
    - cudagraph: a cudagraph captured for a specific size can only be used
        for the same size. We need to capture all the sizes we want to use.
    - inductor: a graph compiled by inductor for a general shape can be used
        for different sizes. Inductor can also compile for specific sizes,
        where it can have more information to optimize the graph with fully
        static shapes. However, we find the general shape compilation is
        sufficient for most cases. It might be beneficial to compile for
        certain small batchsizes, where inductor is good at optimizing.
    NrY   levelr:   debug_dump_path 	cache_dirc                      t           j        S r<   )envsVLLM_COMPILE_CACHE_SAVE_FORMATr*   r+   r,   <lambda>zCompilationConfig.<lambda>y  s	     C r+   )default_factorybinaryunpackedcompile_cache_save_formatbackend
custom_opssplitting_opsFcompile_mm_encodercompile_sizescompile_ranges_split_pointsinductor_compile_configinductor_passescudagraph_moder   cudagraph_num_of_warmupscudagraph_capture_sizescudagraph_copy_inputsTcudagraph_specialize_lorause_inductor_graph_partitionpass_configmax_cudagraph_capture_sizedynamic_shapes_config)rZ   initlocal_cache_dir)r   r   enabled_custom_opsdisabled_custom_opstraced_filesg        compilation_timestatic_forward_context)zvllm::unified_attentionz#vllm::unified_attention_with_outputzvllm::unified_mla_attentionz'vllm::unified_mla_attention_with_outputzvllm::mamba_mixer2zvllm::mamba_mixerzvllm::short_convzvllm::linear_attentionzvllm::plamo2_mamba_mixerzvllm::gdn_attention_corezvllm::kda_attentionzvllm::sparse_attn_indexerz$vllm::rocm_aiter_sparse_attn_indexer_attention_opsr/   c                     h d}ddl m}m}  || |          }| j                                        |d<   | j                                        |d<    ||          S )a$  
        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        >   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   rx   r   )r5   ignored_factorsr   r   r   s        r,   rx   zCompilationConfig.compute_hashi  s    

 

 

 	EDDDDDDD""499!%!1!>!>!@!@+/+E+R+R+T+T'(|G$$$r+   c                 >   dddddddid}i }t          t                                                                D ]#\  }}t          | j        |          |k    rd||<   $|r||d<   t          t                                        | |d          }t          |          S )NTpost_grad_custom_post_pass)r   r   r   r   r   r   r   )excludeexclude_unset)	varsrX   itemsgetattrr   r   r   dump_pythonrT   )r5   r   pass_config_excludeattrdefault_valr   s         r,   __repr__zCompilationConfig.__repr__  s    &*"&#' $ ,d(	
 	
 !!%jll!3!3!9!9!;!; 	1 	1D+t'..+==,0#D) 	9%8GM".//;;' < 
 
 6{{r+   beforerz   r3   c           	         t          |t                    rs|                                }|t          j        vrDt          d| dd                    t          j                                                             t          |         S |S )z
        Enable parsing the `mode` field from string mode names.
        Accepts both integers (0-3) and string names, like NONE, STOCK_TORCH_COMPILE,
        DYNAMO_TRACE_ONCE, VLLM_COMPILE.
        zInvalid compilation mode: z. Valid modes are: z, )rJ   rT   upperr   __members__
ValueErrorjoinkeys)r   r3   	mode_names      r,   validate_mode_beforez&CompilationConfig.validate_mode_before  s     eS!! 
	.I ;;; X X X(,		/2M2R2R2T2T(U(UX X  
 #9--r+   c                 n    t          |t                    rt          |                                         S |S )z=Enable parsing of the `cudagraph_mode` enum type from string.)rJ   rT   r.   r   r   r3   s     r,   validate_cudagraph_mode_beforez0CompilationConfig.validate_cudagraph_mode_before  s.     eS!! 	0 //r+   c                 H    t          |t                    rt          di |S |S )z<Enable parsing of the `pass_config` field from a dictionary.r*   )rJ   r   rX   r   s     r,   validate_pass_config_beforez-CompilationConfig.validate_pass_config_before  s/     eT"" 	'&&&&&r+   c                 2    |dvrt          d|           |S )Nr   z?compile_cache_save_format must be 'binary' or 'unpacked', got: )r   r   s     r,   "validate_compile_cache_save_formatz4CompilationConfig.validate_compile_cache_save_format  s;     ...       r+   ry   r{   c                      ||S  ||          S r}   r*   r~   s      r,   r   z'CompilationConfig._skip_none_validation  s     =Lwu~~r+   c                 x   | j         -t                              d           | j        | j         | _        | j                            d          }| j                            d          }||z   dk    s
J d            t          d          rd}|| j        vr
d| j        |<   | j        	                                D ]\  }}t          |t                    sLt          |          sJ d	| d
            t          |t                    r|nt          |          | j        |<   f|                    d          }d                    |d d                   }|d         }t#          |          j        |         }	t          |	t                    r|	nt          |	          | j        |<   | j        j        r| j                            d           t          d          r9d| j        vr0d| j        vr't-          j                    sd| j        d<   d| j        d<   | j        rt          d          st3          d          | j        D ]#}
|
d         dvr|
dvrt3          d|
 d          $| j        t4          j        k    r | j        dvrt3          d| j                   | j        dk    rt-          j                    | _        d S d S )NzLevel is deprecated and will be removed in the next release,either 0.12.0 or 0.11.2 whichever is soonest.Use mode instead.If both level and mode are given,only mode will be used.noneallr   z Can only specify 'none' or 'all'z2.6enable_auto_functionalized_v2Fzpass z' should be callable or a qualified name.z+rotary_embeddingz	2.9.0.devcombo_kernelsbenchmark_combo_kernelTzuuse_inductor_graph_partition is only supported with torch>=2.9.0.dev. Set use_inductor_graph_partition=False instead.r      +->   r   r   zInvalid syntax 'z]' for custom op, must be 'all', 'none', '+op' or '-op' (where 'op' is the registered op name))r   eagerinductorz+Invalid backend for piecewise compilation: r   )r   r   warningr:   r   countr   r   r   r   rJ   rT   callabler   r   splitr   
__import____dict__r   rc   appendr   is_cpur   r   r   r)   r   get_compile_backend)r5   
count_none	count_allKEYkvnamesmodule	func_namefuncops              r,   r   zCompilationConfig.__post_init__  s=   :!NN*   y  J	_**622
O))%00	I%***,N*** #5)) 	:1C$66649,S1(..00 	 	DAqa%% {{VV$VA$V$V$VVV{#A|44QAA:Nq:Q:Q ,Q/  GGCLLEXXeCRCj))Fb	If%%.y9D"466V<PQU<V<V (++ 6 	8 O""#6777 $K00
	Jt'CCC(0LLL$+-- M =AD(9EID()AB, 	5L6
 6
 	 >   / 	 	B!uJ&&2_+D+D =r = = =   9444 N
 :
 :

 LdlLL   <2+?AADLLL r+   vllm_configr   c                    | j         t          d          | j         t          j        k    rt          d          ddlm}  |t                                }| j         t          j        t          j        fv r$| j	        |v r| j	        S t          | j	                  S | j         t          j        k    sJ | j	        dvrt                              d           dd	lm}  ||          S )
z
        Initialize the backend for the compilation config from a vllm config.
        Arguments:
            vllm_config: The vllm config to initialize the backend from.
        Returns:
            The backend for the compilation config.
        NzyNo compilation mode is set. This method should only be called via vllm config where the level is set if none is provided.zNo compilation mode is set.r   )list_backends)exclude_tags)r   r   z)Using OOT custom backend for compilation.)VllmBackend)r:   r   r   r&   torch._dynamo.backends.registryr  rK   r'   r(   r   r   r)   r   infovllm.compilation.backendsr  )r5   r  r  torch_backendsr  s        r,   init_backendzCompilationConfig.init_backendA  s    9  
 9,,,:;;;AAAAAA&EGG<<<9/-
 
 
 |~--|#*4<888yO88888<444KKCDDD999999 {;'''r+   c                    g }| j         t          t          | j                             | _         | j         D ]q}t          |t                    r.|dk    sJ d|             |                    | j                   Et          |t                    sJ |                    |           r|| _         | j        	                                 | j        r| j        d         | j
        k    sJ dS dS )zTo complete the initialization after cudagraph related
        configs are set. This includes:
        - initialize compile_sizes
        Nr   zOUnrecognized size type in compile_sizes, expect 'cudagraph_capture_sizes', got r   )r   listrw   rJ   rT   extendr   rj   r   sortr   )r5   computed_compile_sizesxs      r,   post_init_cudagraph_sizesz+CompilationConfig.post_init_cudagraph_sizesg  s#    "$)!%c$*<&=&=!>!>D' 	5 	5a%% 5 9999EABE E :99 +11$2NOOOO%a-----*11!44443 	$))+++' 	W/3t7VVVVV	W 	WVVr+   r   all2all_backenddata_parallel_sizec                    | j         t          j        k    r| j        g | _        d S | j         t          j        k    s
J d            | j        j        r| j        s|                                  n| j        t          | j	                  | _        nt          | j                  dk    r| j        t          j        k    s| j        t          j        k    rt                              d           | j        t          j        k    r,t                              d           t          j        | _        n@| j        t          j        k    r+t                              d           t          j        | _        g | _        |dk    rH|dk    rD| j        t          j        k    r1t                              d           t          j        | _        d S d S d S d S )	NzXset_splitting_ops_for_v1 should only be called when mode is CompilationMode.VLLM_COMPILEr   z2Using piecewise cudagraph with empty splitting_opsa  Piecewise compilation with empty splitting_ops does not contain piecewise cudagraph. Setting cudagraph_mode to NONE. Hint: If you are using attention backends that support cudagraph, consider manually setting cudagraph_mode to FULL or FULL_DECODE_ONLY to enable full cudagraphs.ztPiecewise compilation with empty splitting_ops does not contain piecewise cudagraph. Setting cudagraph_mode to FULL.deepep_high_throughputr   a(  DeepEP: Disabling CUDA Graphs since DeepEP high-throughput kernels are optimized for prefill and are incompatible with CUDA Graphs. In order to use CUDA Graphs for decode-optimized workloads, use --all2all-backend with another option, such as deepep_low_latency, pplx, or allgather_reducescatter.)r:   r   r)   r   r   r]   r   !set_splitting_ops_for_attn_fusionr  r   lenr   r.   r?   rR   r   r   r&   rE   r
  )r5   r  r  s      r,   set_splitting_ops_for_v1z*CompilationConfig.set_splitting_ops_for_v1  s   
 9444!)%'"F yO88883 988
 + &	(D4U &	(224444!) &*$*=%>%>""T'((A--'=+BBB*m.NNN''L   &-*AAA''5   +8*<D''(M,LLL''2  
 +8*<D'%'" 777"Q&&#}'999
 KKH   #0"4D 87&&99r+   c                    | j         j        sJ | j        Kg | _        | j                                        r+t
                              d           t          j        | _        | 	                                r
J d            d S )NaN  fuse_attn_quant is incompatible with piecewise cudagraph when use_inductor_graph_partition is off. In this case, splitting_ops will be set to empty list, and cudagraph_mode will be set to FULL. Please ensure you are using attention backends that support cudagraph or set cudagraph_mode to NONE explicitly if encountering any problems.zIattention ops should not be in splitting_ops when fuse_attn_quant is True)
r   r]   r   r   rH   r   r   r.   rE   splitting_ops_contain_attentionr4   s    r,   r  z3CompilationConfig.set_splitting_ops_for_attn_fusion  s    ////%!#D";;== 
9##?   '4&8#7799 	
 	
W	
 	
9 	
 	
r+   c                 V      j         d uot           fd j        D                       S )Nc              3   *   K   | ]}|j         v V  d S r<   )r   ).0r  r5   s     r,   	<genexpr>zDCompilationConfig.splitting_ops_contain_attention.<locals>.<genexpr>  s<       6
 6
)+B$$$6
 6
 6
 6
 6
 6
r+   )r   r   r   r4   s   `r,   r  z1CompilationConfig.splitting_ops_contain_attention  sM    !- 
# 6
 6
 6
 6
/3/B6
 6
 6
 3
 3
 	
r+   c                     |                                  sdS | j        s| j        t          j        k    S | j        dk    o| j        t          j        k    S )NFr   )r  r   r:   r   r)   r   r&   r4   s    r,   is_attention_compiled_piecewisez1CompilationConfig.is_attention_compiled_piecewise  sT    3355 	50 	=9 <<< |z)Odi?;O.OOr+   c                    t          | j                  t          | j                  z   dk    rt                              d           dS t                              d| j                   t                              d| j                   | j        | j        z  }| j        D ]e}|dv r|d         dv s
J d            |d	d         }||vr<dd
lm} ||vrdnd}|d         dk    rdnd}t                              d||||           fdS )a  
        This method logs the enabled/disabled custom ops and checks that the
        passed custom_ops field only contains relevant ops.
        It is called at the end of set_current_vllm_config,
        after the custom ops have been instantiated.
        r   zNo custom ops found in model.Nzenabled custom ops: %szdisabled custom ops: %s>   r   r   r   z8Invalid custom op syntax (should be checked during init)r   )op_registryz-doesn't exist (or wasn't imported/registered)znot present in modelr   enabling	disablingz&Op '%s' %s, %s with '%s' has no effect)	r  r   r   r   debugr   vllm.model_executor.custom_opr%  r   )r5   all_ops_in_modelr  op_namer%  missing_str
enable_strs          r,   custom_op_log_checkz%CompilationConfig.custom_op_log_check  sV    t&''#d.F*G*GG1LLLL8999F-t/FGGG.0HIII2T5MM/ 	 	B_$$a5J&&&J '&&
 fG...EEEEEE k11 DC/  ,.a5C<<ZZ[
##<  -	 	r+   r  c                 Z    d| j         v rd| | j         vS d| j         v sJ d| | j         v S )Nr   r   r   r   )r   )r5   r  s     r,   is_custom_op_enabledz&CompilationConfig.is_custom_op_enabled  sH    DO##r884?22((((2xx4?**r+   uniform_decode_query_lentensor_parallel_sizec                     ||dk    rD j         j        r8t          ||          |z  dk    s	|z  dk    rt          d| d| d           j        rdk    rd S  j        J t          t           fd j        D                                 }t          |          dk    r j        k    rg}t          |          dk    r)t          d d|dz
   d	 j         d
 j         d	          |d          _        | _        d S )Nr   r   z=Can't determine cudagraph shapes that are both a multiple of z: (num_speculative_tokens + 1) required by spec-decode and z} (tensor_parallel_size) required by sequence parallelism please adjust num_speculative_tokens or disable sequence parallelismc              3   j   K   | ]-}t          |          j        k    t          |          V  .d S r<   )r   r   )r   sizemultiple_ofr5   s     r,   r!  zKCompilationConfig.adjust_cudagraph_sizes_for_spec_decode.<locals>.<genexpr>>  sQ        D+..$2QQQ {++QQQQ r+   z7No valid cudagraph sizes after rounding to multiple of zm (num_speculative_tokens + 1 or tp if sequence parallelism is enabled) please adjust num_speculative_tokens (z!) or max_cudagraph_capture_size (z) or cudagraph_capture_sizes ()r   )	r   r_   rB   r   r   r   sortedrw   r  )r5   r1  r2  rounded_sizesr6  s   `   @r,   &adjust_cudagraph_sizes_for_spec_decodez8CompilationConfig.adjust_cudagraph_sizes_for_spec_decode&  s    /!##(8(B#68LMMK66!;;!55:: N#;N N 0N N N   + 	{a/?/?F.:::      8    
 
 }""{d6U'U'U(MM}""P+ P P:RUV:VP P 594SP P 150L	P P P   +8*;''4$$$r+   c                     | j         g S t          t          | j                             }d t          dg|dd         z   |          D             S )z2Get the compile ranges for the compilation config.Nc                 <    g | ]\  }}t          |d z   |          S )r   )startend)r   )r   ses      r,   
<listcomp>z8CompilationConfig.get_compile_ranges.<locals>.<listcomp>Z  s=     
 
 
1 A1%%%
 
 
r+   r   r   )r   r8  rw   zip)r5   split_pointss     r,   get_compile_rangesz$CompilationConfig.get_compile_rangesU  sc    +3Ic$"BCCDD
 
QC,ss"33\BB
 
 
 	
r+   r   )r   )Kr"   r#   r$   r%   r   r   rj   r   r:   r   r   r   r   rT   r   r   r
   r   r  r   r   r   rS   r   r   r   r   r   r   r.   r   r   r   r   r   rX   r   r   r   r   r   r   r   r   rw   r   r   r   r   r   r   r	   rx   r   rP   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r#  r.  r0  r:  r   rD  r*   r+   r,   r   r   +  s:        , ,^ t$$$E3$$$ "E$///D////L $(OTD['''1Is @EuCC@ @ @w';<    GS$ "E$777JS	777J '+M49t#***M"  %$$$% -1M4c	?T)0000 59cT!1888 %*E$$?$?$?T???+ ',eD&A&A&AOT#s(^AAAK %*E$$7$7$7NM777"F %&c%%%3 15T#Y-444: #(4''' '+t*** */t)<)<)< $<<<& $eJ???K???A-2U4-@-@-@d
@@@  27+2 2 2.    3 5E:::OS:::' (-uW5'Q'Q'QQQQ%(-gE(R(R(RRRR&"U3UCCCL#c(CCC/#eCe<<<e<<<$-2U4e-T-T-TDcNTTT=+ + +NHT#Y'    %c % % % %@#    4 G_V(+++     [ ,+& _%H5553 3    [ 65 _]222     [ 32 _011s s    [ 21 _&   #  S    [ ]B ]B ]B ]B~$( $(x $( $( $( $(LW W W W8 ?@I5 I5"I58;I5 I5 I5 I5V
 
 
(
 
 
 
 

	P 	P 	P 	P 	P, , ,\+s +t + + + +-5(+-5CF-5 -5 -5 -5^
DK 
 
 
 
 
 
r+   r   )8enumcollectionsr   collections.abcr   dataclassesr   pathlibr   typingr   r   r	   r
   pydanticr   r   r   r   pydantic.dataclassesr   	vllm.envsr   vllm.compilation.inductor_passr   r   r   r   r   r   r   vllm.loggerr   rr   r   vllm.utils.import_utilsr   vllm.utils.math_utilsr   vllm.utils.torch_utilsr   vllm.configr   objectr"   r   __all__IntEnumr   Enumr.   rX   rT   r   r   r   r*   r+   r,   <module>rX     s^          $ $ $ $ $ $             8 8 8 8 8 8 8 8 8 8 8 8 D D D D D D D D D D D D * * * * * *       M M M M M M M M            $ # # # # # + + + + + + ; ; ; ; ; ; * * * * * * : : : : : : &&&&&&&J	X		 )0 0 0 0 0dl 0 0 0 + + + + +DI + + +\ 
**8,,,---B4 B4 B4 B4 B4 B4 B4 .- B4J% % % % %TY % % %( 
**8,,,---&% &% &% &% &% &% &% .- &%R 
**8,,,---p
 p
 p
 p
 p
 p
 p
 .- p
 p
 p
r+   