
    -`i&                        U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZm Z  d d
l!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 ddl4m5Z5m6Z6m7Z7m8Z8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z?  e+e@          ZAdeBeC         deBejD        dz           de
def         de
def         fdZEde"de5fdZF G d d           ZGejH         G d! d"                      ZId#ejJ        d$eBeK         deLejJ        eBeI         f         fd%ZMd&aNd'ed(e$de"d)eOd*eOdefd+ZP G d, d-ej        jQ                  ZRd.aSeKeTd/<   d0aUeOeTd1<    ejV        d2d3          ZWejV        e
g df         dz           eTd4<   ed5e
g df         ded6         fd7            ZXed=d8eKd9eOded6         fd:            ZY G d; d<          ZZdS )>    N)Callable	GeneratorSequence)contextmanager)deepcopy)partialAny)enable_python_dispatcher)pass_context)inductor_partition_rule_contextshould_split)CompilationConfigCUDAGraphMode
VllmConfig)DynamicShapesType)Rangehash_factors)init_logger)lazy)current_platform)resolve_obj_by_qualname)is_torch_equal_or_newer   )CompilerInterfaceEagerAdaptorInductorAdaptorInductorStandaloneAdaptoris_compile_cache_enabled)compilation_counter)InductorPass)PostGradPassManagersym_tensor_indicesinput_bufferscallable_fn.returnc                 8     dt           dt           f fd}|S )a  Create a wrapper that copies inputs to static buffers before calling.

    This is used for cudagraph input copying where we need to copy dynamic
    tensors to static buffers before invoking the compiled graph.

    Args:
        sym_tensor_indices: Indices of tensors with symbolic shapes
        input_buffers: List of static buffers (can contain None for lazy init)
        callable_fn: The compiled function to call

    Returns:
        A wrapper function that copies inputs and calls the compiled function
    argsr&   c                     t          |           }t          	          D ]c\  }}||         }|j        d         }|         |                                |<   |         d |         }|                    |           |||<   d | S )Nr   )list	enumerateshapeclonecopy_)
r(   	list_argsiindexruntime_tensorruntime_shapestatic_tensorr%   r$   r#   s
          m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/compilation/backends.pycopy_and_callz)make_copy_and_call.<locals>.copy_and_callG   s    JJ	!"455 
	- 
	-HAu&u-N*03M Q'#1#7#7#9#9a )!,^m^<M///,Ie{I&&    r	   )r#   r$   r%   r6   s   ``` r5   make_copy_and_callr8   4   sC    &'S 'S ' ' ' ' ' ' ' ' r7   compilation_configc                    t           j        rt           j        s
J d            | j        dk    rt           j        rWt	          d          rHt          t          j        d          r.t          	                    d           t          | j                  S t          	                    d           t                      S | j        dk    r(t          	                    d           t                      S t          	                    d	| j                    t          t          j                                          }t#          |t$                    sJ |S )
NzCVLLM_USE_MEGA_AOT_ARTIFACT=1 requires VLLM_USE_STANDALONE_COMPILE=1inductorz	2.8.0.devstandalone_compilezUsing InductorStandaloneAdaptorzUsing InductorAdaptoreagerzUsing EagerAdaptorzUsing custom backend: %s)envsVLLM_USE_MEGA_AOT_ARTIFACTVLLM_USE_STANDALONE_COMPILEbackendr   hasattrtorch	_inductorloggerdebugr   compile_cache_save_formatr   r   r   r   get_compile_backend
isinstancer   )r9   compilers     r5   make_compilerrK   Y   s>   . $2R  M R !Z// ,	%'44	% )=>>	%
 LL:;;;,"<   LL0111"$$$		#w	.	.)***~~/1C1KLLLR*+;+O+Q+QRRTT($566666r7   c                      e Zd ZdZdeddfdZdedefdZe	de
ded	         fd
            Z	 ddedededdfdZddZdej        dee         dede
dedef         dz  f
dZ	 	 ddej        dee         deeef         dede
dededefdZdS )CompilerManagera  
    A manager to manage the compilation process, including
    caching the compiled graph, loading the compiled graph,
    and compiling the graph.

    The cache is a dict mapping
    `(runtime_shape, graph_index, backend_name)`
    to `any_data` returned from the compiler.

    When serializing the cache, we save it to a Python file
    for readability. We don't use json here because json doesn't
    support int as key.
    r9   r&   Nc                 p    t                      | _        d| _        || _        t	          |          | _        d S )NF)dictcacheis_cache_updatedr9   rK   rJ   )selfr9   s     r5   __init__zCompilerManager.__init__   s1    8<
 %"4%&899r7   vllm_configc                 6    | j                             |          S N)rJ   compute_hash)rR   rT   s     r5   rW   zCompilerManager.compute_hash   s    }))+666r7   compile_rangeNNNc              #      K   t          |          5  | j        j        r6t          | j        j                  5  dV  ddd           n# 1 swxY w Y   ndV  ddd           dS # 1 swxY w Y   dS )zProvide compilation context for the duration of compilation to set
        any torch global properties we want to scope to a single Inductor
        compilation (e.g. partition rules, pass context).N)r   r9   use_inductor_graph_partitionr   splitting_ops)rR   rX   s     r5   compile_contextzCompilerManager.compile_context   s     
 -(( 	 	&C 4+9    EEE              
 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s3   &A&A	A&	A	A&A	A&&A*-A*F 	cache_dirdisable_cacheprefixc                 d   || _         || _        t          j                            |d          | _        |st          j                            | j                  rt          | j                  5 }t          j	        |
                                          }ddd           n# 1 swxY w Y   dt          dt          ddfddt          dt          t          t          t           f         ffdfd	|                                D             | _        | j                            |||
           dS )a  
        Initialize the cache directory for the compiler.

        The organization of the cache directory is as follows:
        cache_dir=/path/to/hash_str/rank_i_j/prefix/
        inside cache_dir, there will be:
        - vllm_compile_cache.py
        - computation_graph.py
        - transformed_code.py

        for multiple prefixes, they can share the same
        base cache dir of /path/to/hash_str/rank_i_j/ ,
        to store some common compilation artifacts.
        zvllm_compile_cache.pyNvaluetyr&   c                 p    t          | |          s%t          d| dt          |            d|            d S )Nz	Expected z	 but got z for )rI   	TypeErrortype)rc   rd   s     r5   
check_typez4CompilerManager.initialize_cache.<locals>.check_type   sN    !%,, X#$V$V$VT%[[$V$Vu$V$VWWWX Xr7   keyc                    | \  }}} |t                       |t                     t          |t                    r8|\  }} |t                       |t                      t	          ||          } |t                     |||fS )N)startend)intstrrI   tupler   )ri   range_tuplegraph_indexcompiler_namerk   rl   rh   s         r5   	parse_keyz3CompilerManager.initialize_cache.<locals>.parse_key   s    :=7[-
;,,,
=#...k511 >!,JE3Juc***JsC((("'e"="="=K
;..."K>>r7   c                 .    i | ]\  }} |          |S  ru   ).0ri   rc   rs   s      r5   
<dictcomp>z4CompilerManager.initialize_cache.<locals>.<dictcomp>   s'    PPPJC))C..%PPPr7   )r_   r`   ra   )r`   r_   ospathjoincache_file_pathexistsopenastliteral_evalreadr
   rg   ro   r   rm   rn   itemsrP   rJ   initialize_cache)rR   r_   r`   ra   frP   rh   rs   s         @@r5   r   z CompilerManager.initialize_cache   s   $ +"!w||I7NOO 	Q0D!E!E 	Qd*++ 3q (22	3 3 3 3 3 3 3 3 3 3 3 3 3 3 3X# X4 XD X X X X
?s 
?uUC_'= 
? 
? 
? 
? 
? 
? QPPP%++--PPPDJ&&}V 	' 	
 	
 	
 	
 	
s   0'B##B'*B'c                 
   | j         s| j        sd S t          j        d          }|                    | j                  }t          | j        d          5 }|                    |           d d d            d S # 1 swxY w Y   d S )N   )indentw)	r`   rQ   pprintPrettyPrinterpformatrP   r}   r{   write)rR   printerdatar   s       r5   save_to_filezCompilerManager.save_to_file   s     	T%: 	F&a000tz**$&,, 	GGDMMM	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A88A<?A<graphexample_inputsrq   .c                    ||| j         j        f| j        vrd S | j        ||| j         j        f         }| j                             |||||          }t                              d|t          |          | j         j        |           |S )NzGDirectly load the %s-th graph for compile range %sfrom %s via handle %s)rJ   namerP   loadrE   rF   rn   )rR   r   r   rq   rX   handlecompiled_graphs          r5   r   zCompilerManager.load   s     ;(:;4:MM4]K9KLM++E>;
 
 	UM	
 	
 	
 r7   r   r   additional_inductor_config
num_graphsc                    |dk    rt          j                     at          xj        dz  c_        d }|                     ||||          }|a||dz
  k    rVt          j                     }	|	t          z
  }
|xj        |
z  c_        t                              dt          |          |
           |S t          | j
        t                    rd }nd}||j         d|j         z  }|d| z  }|                     |          5  | j
                            |||||          \  }}d d d            n# 1 swxY w Y   |
J d            t!          |          r||| j        ||| j
        j        f<   t          xj        dz  c_        d| _        |dk    r(t                              d	t          |                     t                              d
|t          |          | j
        j        |           ||dz
  k    rXt          j                     }	|	t          z
  }
|xj        |
z  c_        t                              dt          |          |
d           |S )Nr   r   zTDirectly load the compiled graph(s) for compile range %s from the cache, took %.3f sartifact_compile_range__
_subgraph_zFailed to compile the graphTz1Cache the graph of compile range %s for later usez?Store the %s-th graph for compile range%s from %s via handle %sz3Compiling a graph for compile range %s takes %.2f slocalscope)timecompilation_start_timer    num_backend_compilationsr   compilation_timerE   inforn   rI   rJ   r   rk   rl   r]   compiler   rP   r   num_cache_entries_updatedrQ   	info_oncerF   )rR   r   r   r   r9   rX   rq   r   r   nowelapsed	maybe_keyr   s                r5   r   zCompilerManager.compile   s    ! &*Y[["44944 5.+}UU%j1n,, ikk 66"33w>332&&	   "! dm_55 	4II1IM/EE-2CEEEI3k333I!!-00 	 	%)]%:%:*& &"NF	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ))+H))) $$>?? 	FDVKQDJ{DM4FGH99Q>99$(D!a  G&&   LLQM"""   *q.(()++C22G//7://EM""	     s   <"D**D.1D.)Fr^   r&   N)r   r   )__name__
__module____qualname____doc__r   rS   r   rn   rW   r   r   r   r]   boolr   r   fxGraphModuler*   r
   rm   r   r   rO   r   ru   r7   r5   rM   rM   w   s        :+< : : : : :7
 7s 7 7 7 7 U yAQ7R    ^ JL2
 2
2
-12
CF2
	2
 2
 2
 2
h   ~ S	 	
  
#s(	d	"   : W W~W S	W %)cN	W
 .W W W W 
W W W W W Wr7   rM   c                   B    e Zd ZU eed<   eed<   eed<   ej        ed<   dS )	SplitItemsubmod_namegraph_idis_splitting_graphr   N)	r   r   r   rn   __annotations__rm   r   r   r   ru   r7   r5   r   r   I  sA         MMM>r7   r   r   r\   c           	         d}i g }| j         j        D ]}|j        dv r|j        dk    r?|j        t          j        k    r*|j        d         }|j        dk    r|v sJ |         |<   Vt          ||          r%|dz  }||<   |                    |           |dz  }||<   t          j
        j        j                            | d fdd          }g }d	 |                                D             }|D ]g}	d
|	v s|	dk    rt          ||	          }
t          |	                    dd                    }|                    t#          |	|||v |
                     h|                    d            ||fS )Nr   )outputplaceholdercall_functionr   r   c                     |          S rV   ru   )nodenode_to_subgraph_ids    r5   <lambda>zsplit_graph.<locals>.<lambda>u  s    "5d"; r7   T)keep_original_orderc                     g | ]\  }}|S ru   ru   )rv   r   modules      r5   
<listcomp>zsplit_graph.<locals>.<listcomp>z  s    AAAntVTAAAr7   .r^   submod_c                     | j         S rV   )r   )xs    r5   r   zsplit_graph.<locals>.<lambda>  s    qz r7   )ri   )r   nodesoptargetoperatorgetitemr(   r   appendrC   r   passessplit_modulenamed_modulesgetattrrm   replacer   sort)r   r\   subgraph_idsplit_op_graphsr   
input_nodesplit_gmoutputsnamesr   r   r   r   s               @r5   split_graphr   Q  s    K.0!#O! 4 47/// 7o%%$+9I*I*I1J}--!%88888,?
,K#D)m,, 	41K(3%"";///1KK(3%% x+88t;;;;QU 9  H GAA(>(>(@(@AAAE Y Y$;;$"**4((t||Ir2233yx(o2MPVWWXXXX LL))L***Wr7   g        piecewise_backendrT   is_first_graphis_last_graphc           
          |j                                         r|j        r| S ddlm} t          t          j                              } || |t          j	         ||| |                    S )a?  
    Wrap a piecewise backend with CUDA graph wrapper if needed.
    This function is shared between VllmBackend and
    construct_serializable_fn_from_inductor_cache.

    Args:
        piecewise_backend: The backend to wrap
        vllm_config: The vLLM configuration
        compilation_config: The compilation configuration
        is_first_graph: Whether this is the first graph in the sequence
        is_last_graph: Whether this is the last graph in the sequence

    Returns:
        The wrapped backend if CUDA graphs are enabled, otherwise the original backend
    r   )CUDAGraphOptions)debug_log_enable
gc_disableweak_ref_output)runnablerT   runtime_modecudagraph_options)
cudagraph_modehas_piecewise_cudagraphsr[   
cuda_graphr   r   r   get_static_graph_wrapper_clsr   	PIECEWISE)r   rT   r9   r   r   r   static_graph_wrapper_classs          r5   wrap_with_cudagraph_if_neededr     s    . -FFHH!:! !  -,,,,, "9577" " &%"",**+)))
 
 
		 	 	 	r7   c            
            e Zd ZdZdej        j        dee         de	ddddf
 fd	Z
d
edef fdZdej        j        j        d
eej        j        j        df         deeef         def fdZ xZS )PiecewiseCompileInterpretera  Code adapted from `torch.fx.passes.shape_prop.ShapeProp`.
    It runs the given graph with fake inputs, and compile some
    submodules specified by `compile_submod_names` with the given
    compilation configs.

    NOTE: the order in `compile_submod_names` matters, because
    it will be used to determine the order of the compiled piecewise
    graphs. The first graph will handle logging, and the last graph
    has some special cudagraph output handling.

    Note: This class shares similar logic with
    reconstruct_serializable_fn_from_mega_artifact in caching.py.
    Both create PiecewiseBackend instances and wrap them with cudagraph.
    The key difference is:
    - reconstruct_serializable_fn_from_mega_artifact: PiecewiseBackend receives
      pre-compiled runnables (compiled_runnables is set, graph is None)
    - this class: PiecewiseBackend receives the FX graph to compile
      (graph is set, compiled_runnables is None)


    If modifying the backend creation/wrapping logic, consider updating both.
    r   compile_submod_namesrT   vllm_backendVllmBackendr&   Nc                     t                                          |           ddlm}  |            | _        || _        |j        | _        || _        || _        d| _	        d S )Nr   detect_fake_modeF)
superrS   torch._guardsr   	fake_moder   r9   rT   r   extra_traceback)rR   r   r   rT   r   r   	__class__s         r5   rS   z$PiecewiseCompileInterpreter.__init__  sp     	   222222))++$8!"-"@&($r7   r(   c                       fd|D             } j         5  t                      5   t                      j        | cd d d            cd d d            S # 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nc                 |    g | ]8}t          |t          j                  rj                            |          n|9S ru   )rI   rC   Tensorr   from_tensor)rv   trR   s     r5   r   z3PiecewiseCompileInterpreter.run.<locals>.<listcomp>  sO     
 
 
 .85<-H-HODN&&q)))a
 
 
r7   )r   r   r   run)rR   r(   	fake_argsr   s   `  r5   r   zPiecewiseCompileInterpreter.run  s3   
 
 
 

 
 
	 ^ 	+ 	+577 	+ 	+577;	*	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+s3   A.AA.A	A.A	A..A25A2r   .kwargsc                 4   t          |t                    sJ t                                          |||          }|| j        v r| j                            |          }|                     |          }d t          |          D             }ddlm	} ddl
m}	  |	|| j        |t          | j                  || j         ||                    }
t          |
| j        | j        |
j        |
j                  | j        j        |<   t*          xj        dz  c_        |S )Nc                 L    g | ]!\  }}t          |t          j                  |"S ru   )rI   rC   SymInt)rv   r0   r   s      r5   r   z;PiecewiseCompileInterpreter.call_module.<locals>.<listcomp>  s=     ! ! !aAu|1L1L!! ! !r7   r   )graph_returns_tupler   PiecewiseBackend)rI   rn   r   call_moduler   r1   
fetch_attrr+   torch._inductor.compile_fxr  r   r  rT   lenr   r   r9   r   r   r   __dict__r    $num_piecewise_capturable_graphs_seen)rR   r   r(   r  r   r1   submodsym_shape_indicesr  r  r   r   s              r5   r  z'PiecewiseCompileInterpreter.call_module  sM    &#&&&&&$$VT6::T...-33F;;E__V,,F! !'oo! ! !
 GFFFFF;;;;;; 0 0 D-..!!##F++! ! ,I! '!0!/, ,DK (  DDIDDr7   )r   r   r   r   rC   r   r   r*   rn   r   rS   r
   r   r   Targetro   ArgumentrO   r  __classcell__)r   s   @r5   r   r     s        .%$% #3i%  	%
 $% 
% % % % % %$+ + + + + + + ++$+ EHM*C/0+ S#X	+
 
+ + + + + + + + + +r7   r   backbone	model_tagFmodel_is_encoder on_compilation_complete_callback)default!_on_compilation_complete_callbackcallbackrY   c              #      K   t                               |           }	 d V  t                               |           d S # t                               |           w xY wrV   )r  setreset)r  tokens     r5   set_on_compilation_completer  0  s^       .11(;;E7)//66666)//6666s	   > Atag
is_encoderc              #      K   | t           k    sJ d|  dt            d            t           }t          }| a |a	 dV  |a |adS # |a |aw xY w)z%Context manager to set the model tag.z
Model tag z  is the same as the current tag r   N)r  r  )r  r   old_tagold_is_encoders       r5   set_model_tagr$  ;  s      
 )FSFF)FFF  G%NI!*	) 	)))))s   A Ac            	       ~   e Zd ZU dZeed<   eed<   dZeed<   e	j
        ed<   e	j
        ed<   ee         ed<   ed	ef         ed
<   eed	ef                  ed<   eed<   eeef         ed<   	 	 ddedededdfdZdeeeeee         f         dz  eeef         dz  f         fdZddZde	j
        dee         defdZdS )r   a  The compilation backend for `torch.compile` with vLLM.
    It is used for compilation mode of `CompilationMode.VLLM_COMPILE`,
    where we customize the compilation.

    The major work of this backend is to split the graph into
    piecewise graphs, and pass them to the piecewise backend.

    This backend also adds the PostGradPassManager to Inductor config,
    which handles the post-grad passes.
    rT   r9   F_calledr   r   piecewise_graphs.returned_callablepost_grad_passescompiler_managerinductor_configr^   ra   r   r&   Nc                 N   |pt           | _        |pt          | _         t	          t          j                                          | _        t
          j        | _        || _	        |j
        | _
        t          | j
                  | _        t          | j
        j                  | _        d S rV   )r  ra   r  r   r   r   get_pass_manager_clspass_managerpass_keyrT   r9   rM   r*  r   inductor_compile_configr+  )rR   rT   ra   r   s       r5   rS   zVllmBackend.__init__k  s     )	 %8(8
3133
 
   )1&"-"@1@#2
 2
  ((?(WXXr7   c           
      B   t           j        sdS ddlm} ddlm}  |            }i }i }| j                                        D ]\  }}t          | j        |          }t          |d          r|j
        n|}	t          |	|          sD|}
|	j        ||
<   |	j        ||
<   |	                                                                D ]F\  }}|                    |
||           t"                              d|
|t'          |                     Gt"                              d|                                |                                |                                           t"                              dt1          |j                                                             |||fS )	a  Collect inductor cache artifacts from all piecewise backends.

        Returns:
            tuple: (standalone_compile_artifacts, sym_shape_indices_map,
                    returns_tuple_map)
                - standalone_compile_artifacts: StandaloneCompiledArtifacts
                  with compiled artifacts
                - sym_shape_indices_map: dict mapping submod_name to
                  sym_shape_indices
                - returns_tuple_map: dict mapping submod_name to
                  returns_tuple
        rY   r   )StandaloneCompiledArtifactsr  r   z-collected artifact for %s shape %s (%d bytes)z=collected artifacts: %d entries, %d artifacts, %d bytes totalz$standalone compile artifact keys: %s)r>   r?   cachingr2  r   r  r   named_childrenr   rB   r   rI   r  returns_tupleto_bytesr   insertrE   rF   r  r   num_entriesnum_artifacts
size_bytesr*   submodule_byteskeys)rR   r2  r  standalone_compile_artifactssym_shape_indices_mapreturns_tuple_mapr   r   childr   r   	shape_str
bytes_datas                r5   $collect_standalone_compile_artifactsz0VllmBackend.collect_standalone_compile_artifacts  s     . 	$##888888777777'B'B'D'D$ "}3355 	 	GD!DM400E29%2L2L WRW/1ABB K1B1T!+.->-Lk*):)C)C)E)E)K)K)M)M  %	:,33KJWWWC
OO	    	K(4466(6688(3355		
 	
 	
 	2-=BBDDEE	
 	
 	

 ,-BDUUUr7   c                    | j                             | j                   | j        | j        v rt          | j        | j                 t                    rt          d          t          | j        j	        | j                 t                    sJ | j                             | j        j	        | j                            | j         | j        | j        <   d S )Nz9PostGradPassManager can not be kept in CompilationConfig.)r.  	configurerT   r/  r+  rI   r"   
ValueErrorr9   r0  r!   add)rR   s    r5   configure_post_passzVllmBackend.configure_post_pass  s    ##D$4555 =D000$.t}=?RSS  O  
 "+CDMR      !%%+CDMR   /3.?T]+++r7   r   c           
        '() ddl m} | j        }t          j                    }t          |          }|                                }| j                            |          }t          t          | j
        j                            (t                              dt          (fd                     g }	(D ]}
|	                    |
           |
dk    r	 t!          |
          5 }|	                    |                                           d d d            n# 1 swxY w Y   n# t$          t&          f$ r t                              d|
           Y w xY wt+          j        d                    |	                                                                                    }| j
        j                                         | j
        j        s||||g}t+          j        t9          |                                                                                    d d         }t:          j                            t          j        d	|          }|| j
        _        | j
        j        }t;          j         |d
           || j
        _        |j!        j"        }|j!        j#        }t:          j                            |d| d| | j$                  }t;          j         |d
           || j
        _%        tM          | j'                   }|rt          (                    dd           nt          (                    d|d           | j        )                    ||| j$                   t                              d|||||           	 t                              dt          tU          tV          j,        |d                    |           t:          j                            |d          }t:          j        -                    |          sEt!          |d          5 }t]          j/        ||||d|dd
           d d d            n# 1 swxY w Y   n-# t`          $ r  t                              d|d
           Y nw xY wtb          xj2        dz  c_2        ddl3m4} tk          j5                    |z
  }t          (                    d|d           | j
        xj6        |z  c_6        | j7        r
J d            || _8        | 9                                 | j
        j:        rg }n| j
        j;        pg }ty          ||          \  | _=        | _>        d }t          j?        rt          | j=                  }d d!lAmB}  |d"| j8                    |d#| j=                   tb          xjC        t          | j>                  z  c_C        d$ | j>        D             }g '|j8        E                    d%&          D ]"}'                    |jF        d'                    #'fd(t                    D             } t          | j=        || j        |           jI        |  d d)lJmK}  |            }| j
        jL        jM        rr| j
        jL        jN        t          jP        k    rSd d*lQmR} |jS        jT        U                                D ].\  } }!|!jV        dk    r |d |!jW                  |jS        jT        | <   /t:          j                            |d+          }"t:          j        -                    |"          sd,| j=        X                    d-.          z   }#|#Y                    d/d0          }#t!          |"d          5 }|Z                    |#           d d d            n# 1 swxY w Y   t          [                    d1|"d           d
| _7        t          j?        r|n| j8        }$| j
        j\        t          j^        k    s| j
        j_        s  ||$| j$        | j=        | j`        | 2          S d d3lamb) )fd4t          |          D             }%t          |%fd5|%D             | j=                  }& ||$| j$        |&| j`        | |%6          S )7Nr   )VllmSerializableFunctionz9Traced files (to be considered for compilation cache):
%sc                  .    d                                S )N
)rz   )forward_code_filess   r5   r   z&VllmBackend.__call__.<locals>.<lambda>  s    #566 r7   z<string>zFailed to read file %srL  
   torch_compile_cacheT)exist_okrank_r   z'vLLM's torch.compile cache is disabled.r   r   z2Using cache directory: %s for vLLM's torch.compilezAtorch.compile cache factors: env=%s cfg=%s comp=%s code=%s dir=%sz2Compile env factors (raw):
%s
Vllm config hash: %sx   )widthzcache_key_factors.jsonr   )envconfig_hash	code_hashcompiler_hash   )r   	sort_keyszCould not write compile cache metadata at %s; continuing without metadata. Compiled cache remains valid; diagnostics may be limited.)exc_info)torch_compile_start_timez&Dynamo bytecode transform time: %.2f sz#VllmBackend can only be called oncer   )lazy_format_graph_codezbefore splitzafter splitc                 *    g | ]}|j         	|j        S ru   )r   r   )rv   items     r5   r   z(VllmBackend.__call__.<locals>.<listcomp>  s4     #
 #
 #
*#
#
 #
 #
r7   r   )r   example_valuec                 ^    g | ])\  }}t          |t          j                  r|         n|*S ru   )rI   rC   r   )rv   r0   r   all_fake_valuess      r5   r   z(VllmBackend.__call__.<locals>.<listcomp>  sH     
 
 
1 #-Q"="=DOA1
 
 
r7   r   )ValueRangeszcomputation_graph.pyz0from __future__ import annotations
import torch
F)print_outputz<lambda>r   zComputation graph saved to %s)r   r   )is_symbolicc                     g | ]X\  }}t          |t          j        j        j                  )t          fd |                                D                       V|YS )c              3   .   K   | ]} |          V  d S rV   ru   )rv   drd  s     r5   	<genexpr>z2VllmBackend.__call__.<locals>.<listcomp>.<genexpr>  s+      55qKKNN555555r7   )rI   rC   _subclassesfake_tensor
FakeTensoranysize)rv   r0   r   rd  s      r5   r   z(VllmBackend.__call__.<locals>.<listcomp>  su     
 
 
1!U.:EFF
 5555AFFHH55555	

 
 
r7   c                 D    g | ]}|                                          S ru   )r-   )rv   r   r   s     r5   r   z(VllmBackend.__call__.<locals>.<listcomp>  s*    CCC1^A$$&&CCCr7   )r   r   r#   )dr3  rJ  rT   r>   compile_factorsr   rW   r*  r*   sortedr9   traced_filesrE   rF   r   r   r}   r   OSErrorUnicodeDecodeErrorwarninghashlibsha256rz   encode	hexdigestclearr_   rn   rx   ry   VLLM_CACHE_ROOTmakedirsparallel_configrankdata_parallel_indexra   local_cache_dirr   r+  r   r   r   r   r   r|   jsondump	Exceptionr    num_graphs_seenmonitorr[  r   r   r&  r   rH  r[   r\   r   r   r'  r?   r   torch._dynamo.utilsr\  num_piecewise_graphs_seenr  
find_nodesmetar+   r   r   r   r   dynamic_shapes_configevaluate_guardsrg   r   BACKEDtorch.utils._sympy.value_rangesrb  	shape_envvar_to_ranger   lowerupperprint_readabler   r   
debug_oncer   r   NONEcudagraph_copy_inputsr   %torch.fx.experimental.symbolic_shapesrd  r8   )*rR   r   r   rJ  rT   env_factorsenv_hashrU  rW  hash_contentfilepathr   rV  factorshash_keyr_   r}  dp_rankr  r`   	meta_pathr[  dynamo_timefx_split_opsoriginal_split_gmr\  submod_names_to_compiler0   r   r   r   rb  sr
graph_pathsrcgraph_to_serializer#   r6   ra  rM  rd  s*     `                                    @@@r5   __call__zVllmBackend.__call__  s"
   	
 	
 	
 	
 	
 	
 & *,,,,!..00-::;GG!&)@)M"N"NOOH666677	
 	
 	
 * 	 	H))):%% (^^ 2q ''1112 2 2 2 2 2 2 2 2 2 2 2 2 2 2/0   7BBB N499\#:#:#A#A#C#CDDNNPP	,22444&0 	:
  iGG ~c'll&9&9&;&;<<FFHH"MH$&;X I 1:D#-+5	
I----,5)*/-A',,y2J$2J2J2J2JDKXX
Od33332A/ 5T5IJJJ 	FgVVVVD     	..]DK	
 	
 	
 	O	
 	
 	
	LLFWV^[DDDEE  
 _6NOOI7>>),, )S)) QI#.+6)2-:	   "&
 
 
 
                
	 
	 
	NN        
	 	++q0++555555ikk$<<4k 	 	
 	
 	
 	00K?00 <FF!FFF
  """"? 	G&(LL2@FBL/:5,/O/O,t, !* 	8 ( 7 7>>>>>> 	~tz:::}dm<<<55T=R9S9SS55#
 #
-#
 #
 #
 ''='99 	< 	<A""16/#:;;;;
 
 
 
!.11
 
 
		#M2D4Dd	
 	

y	 	 	322222$$&&	 #9I	R'=B '( ( DCCCCC "+8>>@@ R R17a<<:E+a:Q:QI'4Q7W\\/3IJJ
w~~j)) 	
 E-..E.BBC  ++j-88Cj#&& !               /7     !%!@Pdj 	
 #2m6HHH*@ I ,+"?!    	FEEEEE
 
 
 
!),,
 
 
 +CCCC0BCCCM
 
 ('K1
 
 
 	
s   D%%(DD%D	D% D	!D%%,EE<BQ P8,Q 8P<<Q ?P< Q 'Q.-Q.]33]7:]7)r^   Fr   )r   r   r   r   r   r   r   r&  r   r   r   r*   r   r   r
   r   rM   rO   rn   rS   ro   rm   rC  rH  r  ru   r7   r5   r   r   O  s        	 	 ))))GT>n9o%%%S))))xS12222%%%% #s(^###
  	#Y #Y#Y #Y 	#Y
 
#Y #Y #Y #YN<V	sDd3i(4/c4i41GG	H<V <V <V <V|@ @ @ @*K
bn K
hsm K
PS K
 K
 K
 K
 K
 K
r7   r   )F)[r~   contextvarsdataclassesru  r  r   rx   r   r   collections.abcr   r   r   
contextlibr   copyr   	functoolsr   typingr
   rC   torch.fxr   torch._dispatch.pythonr   	vllm.envsr>   vllm.compilation.inductor_passr    vllm.compilation.partition_rulesr   r   vllm.configr   r   r   vllm.config.compilationr   vllm.config.utilsr   r   vllm.loggerr   vllm.logging_utilsr   vllm.platformsr   vllm.utils.import_utilsr   vllm.utils.torch_utilsr   compiler_interfacer   r   r   r   r   counterr    inductor_passr!   r.  r"   r   rE   r*   rm   r   r8   rK   rM   	dataclassr   r   rn   ro   r   r   r   r   Interpreterr   r  r   r  
ContextVarr  r  r$  r   ru   r7   r5   <module>r     sK   



            				   9 9 9 9 9 9 9 9 9 9 % % % % % %                          ; ; ; ; ; ;       7 7 7 7 7 7        E D D D D D D D D D 5 5 5 5 5 5 1 1 1 1 1 1 1 1 # # # # # # # # # # # # + + + + + + ; ; ; ; ; ; : : : : : :              ) ( ( ( ( ( ' ' ' ' ' ' - - - - - -	X		"S	"t+," #s(#" c3h	" " " "J&7 <M    <O O O O O O O Od        8>8*.s)8
2>4	?*+8 8 8 8v  333 *3 	3
 3 	3 3 3 3l^ ^ ^ ^ ^%("6 ^ ^ ^F 	3    $    K=tLLL ";#9(2t8:Lt:S#T   
 7r4x 7 7 7 7 7 * *s * *CS9T * * * *&a
 a
 a
 a
 a
 a
 a
 a
 a
 a
r7   