
    -`iM                        d dl Z d dlZd dlZd dlZd dlmZmZ d dlmZm	Z	 d dl
mZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ 	 d dlmZ n# e$ r e ZY nw xY w e!ee"          sJ  ee#          Z$ G d d          Z% G d de          Z&de'e(ef         dddede'e(e)e*         f         de'e(e+f         ddfdZ,dede)e(         fdZ-de'e(e(f         de(fdZ.de/e(         de(fdZ0dS )    N)CallableSequence)AnyLiteral)patch)_pytree)get_inductor_factors)
VllmConfigget_current_vllm_config)hash_factors)init_logger)	safe_hash)SerializableCallablec                      e Zd ZdZddZdedededdfdZdededefd	Zdedede	fd
Z
defdZdefdZdefdZdee         fdZddZdeeeeef         eeef         z  f         fdZdeeeee	f         f         ddfdZdS )StandaloneCompiledArtifactsaK  Storage for standalone compiled artifacts with content-based deduplication.

    Deduplication works via a two-level indirection:
    1. `submodule_bytes` maps "{submod_name}_{shape}" -> SHA256 hash
    2. `submodule_bytes_store` maps SHA256 hash -> actual bytes

    When inserting, we compute the SHA256 hash of the bytes. If the hash
    already exists in `submodule_bytes_store`, we reuse the existing entry
    rather than storing duplicate bytes. This is common because submodules
    often compile to identical artifacts (e.g., identical transformer layers
    split on attn)
    returnNc                 0    i | _         i | _        i | _        d S Nsubmodule_bytessubmodule_bytes_storeloaded_submodule_storeselfs    l/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/compilation/caching.py__init__z$StandaloneCompiledArtifacts.__init__.   s     /179"68###    submod_nameshapeentryc                 r   t          j                    }|                    |           |                                }|| j        | d| <   || j        vr7|| j        |<   t                              d||t          |          |           d S t                              d||t          |          |           d S )N_zHinserting new artifact for submod %s with shape %s (%s bytes) at hash %szQreusing existing cache artifact for submod %s with shape %s (%s bytes) at hash %s)	hashlibsha256update	hexdigestr   r   loggerdebuglen)r   r   r   r    hasher
hex_digests         r   insertz"StandaloneCompiledArtifacts.insert6   s    !!e%%''
9C55e556T7775:D&z2LL(E

     LL6E

    r   c                 t    t                               d||           | j        | j        | d|                   S Nz,getting artifact for submod %s with shape %sr"   )r'   r(   r   r   r   r   r   s      r   getzStandaloneCompiledArtifacts.getO   sL    :	
 	
 	

 ) K!9!9%!9!9:
 	
r   c                 t    t                               d||           | j        | j        | d|                   S r.   )r'   r(   r   r   r/   s      r   
get_loadedz&StandaloneCompiledArtifacts.get_loadedY   sL    :	
 	
 	

 * K!9!9%!9!9:
 	
r   c                 b    t          d | j                                        D                       S )Nc              3   4   K   | ]}t          |          V  d S r   )r)   ).0r    s     r   	<genexpr>z9StandaloneCompiledArtifacts.size_bytes.<locals>.<genexpr>d   s(      OO%3u::OOOOOOr   )sumr   valuesr   s    r   
size_bytesz&StandaloneCompiledArtifacts.size_bytesc   s.    OO4+E+L+L+N+NOOOOOOr   c                 *    t          | j                  S r   )r)   r   r   s    r   num_artifactsz)StandaloneCompiledArtifacts.num_artifactsf   s    4-...r   c                 *    t          | j                  S r   )r)   r   r   s    r   num_entriesz'StandaloneCompiledArtifacts.num_entriesi   s    4'(((r   c                 r    d | j         D             }t          t                              |                    S )Nc                 F    g | ]}|                     d d          d         S )r"      r   )rsplit)r5   	cache_keys     r   
<listcomp>z?StandaloneCompiledArtifacts.submodule_names.<locals>.<listcomp>n   s-    SSS!!#q))!,SSSr   )r   listdictfromkeys)r   namess     r   submodule_namesz+StandaloneCompiledArtifacts.submodule_namesl   s2    SSd>RSSSDMM%(()))r   c                 X   dd l }t          | j                  t          | j                  k    rd S ddlm dt          dffd}|j                                        5 }t          | j        
                                          }t          |                    ||                    }d d d            n# 1 swxY w Y   t          | j                                                  D ]\  }}||         | j        |<   t                              d|                                            d S )Nr   )AOTCompiledArtifactentry_bytesr   c                 V    t          j        |           }                    |          S r   )pickleloadsdeserialize)rK   r    rJ   s     r   _load_entryz9StandaloneCompiledArtifacts.load_all.<locals>._load_entryz   s&    L--E&225999r   zloaded all %s submodules)concurrent.futuresr)   r   r   "torch._inductor.standalone_compilerJ   bytesfuturesThreadPoolExecutorrD   r8   map	enumeratekeysr'   r(   r;   )	r   
concurrentrP   executorentriesloaded_entriesikrJ   s	           @r   load_allz$StandaloneCompiledArtifacts.load_allq   s   !!!! t*++s43M/N/NNNFJJJJJJ	:U 	:/B 	: 	: 	: 	: 	: 	: 2244 	F45<<>>??G!(,,{G"D"DEEN	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F d8==??@@ 	? 	?DAq-;A->D'**/1C1C1E1EFFFFFs    A
B66B:=B:c                      | j         | j        dS )Nr   r   ra   r   s    r   __getstate__z(StandaloneCompiledArtifacts.__getstate__   s    #3%)%?
 
 	
r   statec                 H    |d         | _         |d         | _        i | _        d S )Nr   r   r   )r   rc   s     r   __setstate__z(StandaloneCompiledArtifacts.__setstate__   s,    $%67%*+B%C"&(###r   )r   N)__name__
__module____qualname____doc__r   strrS   r,   r0   r   r2   intr9   r;   r=   rD   rH   r_   rE   rb   re    r   r   r   r       s        9 9 9 9# c % D    2
s 
3 
5 
 
 
 

c 
# 
# 
 
 
 
PC P P P P/s / / / /)S ) ) ) )*c * * * *
G G G G,
d3S#Xc5j9I(I#IJ 
 
 
 
)$sDcN':"; ) ) ) ) ) ) )r   r   c                      e Zd ZdZ	 	 	 ddej        j        dee         de	de
def         d	ed
edz  dee         dz  ddfdZdededefdZedd defd            Zededd fd            Zeded          fd            ZdS )VllmSerializableFunctiona   
    A wrapper around a compiled function by vllm. It will forward the tensor
    inputs to the compiled function and return the result.
    It also implements a serialization interface to support PyTorch's precompile
    with custom backend, so that we can save and load the compiled function on
    disk. There's no need to wrap around the compiled function if we don't want
    to serialize them in particular cases.
    Right now serialization for the custom backend is done via
    serializing the Dynamo fx graph plus example inputs.
    FNgraph_moduleexample_inputsprefixoptimized_call.
is_encodervllm_backendsym_tensor_indicesr   c                     t          |t          j        j                  sJ || _        || _        || _        || _        || _        d | _	        || _
        || _        t          d | j        D             d           }||j        j	        | _	        d S d S )Nc              3   N   K   | ] }t          |t          j                  |V  !d S r   )
isinstancetorchSymInt)r5   r]   s     r   r6   z4VllmSerializableFunction.__init__.<locals>.<genexpr>   s3      KK1z!U\/J/JKQKKKKKKr   )rx   ry   fxGraphModulero   rp   rq   rr   rs   	shape_envrt   ru   nextnode)	r   ro   rp   rq   rr   rs   rt   ru   	sym_inputs	            r   r   z!VllmSerializableFunction.__init__   s     ,(<=====(,,$("4KK+KKKT
 
	  &^5DNNN ! r   argskwargsc                      | j         |i |S r   )rr   )r   r   r   s      r   __call__z!VllmSerializableFunction.__call__   s    "t"D3F333r   compiled_fnc           
      P   dd l ddlm ddlm}m} |j                                        }|                    d           |                    d           |                    dd            |d         j	        j
        D ]8}|j                            dd            |j                            d	d            9|d                                         D ]Z\  }}t          |d
          rE|j	        j
        D ]8}|j                            dd            |j                            d	d            9[|j        d|dt          dt           t"          dt          f         t           t          df         f         t          z  ffd}|                    d          r+t'          j        t*          j        d |d                   |d<   n*t'          j        t*          j        d |d                   |d<   t/          j        |d|          5  |                    |d          |d                     |d<   |                    |d                   |d<   d d d            n# 1 swxY w Y   |j        r,|j                                        \  }	}
}|	|d<   |
|d<   ||d<   t9          j        |          S )Nr   FakeTensorMode)GraphPicklerOptionsrr   r}   rt   ro   source_fn_stacknn_module_stackgraphr   objr   .c                     t          j        |          r4t          |j                  rt	          |d          r|j        |j        ffS t          |          rt          d           dfS  | |          S )N_torch_unpicklerrl   )	inspectisclass
issubclassFunctionhasattrr   _torch_handler_namerx   type)r   r   r   graph_reducer_overridesympys     r   _graph_reducer_overridezUVllmSerializableFunction.serialize_compile_artifacts.<locals>._graph_reducer_override   s     $$HsEN33H C!344H
 +c.E-GGG#~.. &Dzz2~%))$444r   ru   c                 .    t          j        | d          S Nmetadevicery   
empty_likeinps    r   <lambda>zFVllmSerializableFunction.serialize_compile_artifacts.<locals>.<lambda>       E,S@@@ r   rp   c                 .    t          j        | d          S r   r   r   s    r   r   zFVllmSerializableFunction.serialize_compile_artifacts.<locals>.<lambda>   r   r   reducer_override)
ops_filterstandalone_compile_artifactssym_shape_indices_mapreturns_tuple_map)r   torch._subclassesr   torch.fx._graph_picklerr   r   __dict__copypopr   nodesr   named_childrenr   r   r   tupler   r0   pytreetree_map_onlyry   Tensorr   objectdumpsrt   $collect_standalone_compile_artifactsrM   )clsr   r   r   rc   r   namesubmodr   r   r   r   r   r   r   s               @@@r   serialize_compile_artifactsz4VllmSerializableFunction.serialize_compile_artifacts   s]    	444444AAAAAAAA$))++		"###		+		.$'''.)/5 	3 	3DIMM+T222IMM+T2222!.1@@BB 	; 	;LD&vw'' ;"L. ; ;DIMM"3T:::IMM"3T::::!-!>	5	5%(	58CH%uS#X67#=	5 	5 	5 	5 	5 	5 	5 	5 99)** 	 '-&:@@&'' 'E"## '-&:@@&'' 'E"#
 \,(:<STT 	R 	R$0$6$6n%ww$'?'?'?% %E.! '3&8&8?O9P&Q&QE"#		R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R # 	;
 (MMOO	,%!4PE01-BE)*):E%&|E"""s   AIIIdatac                    ddl mm ddlm} ddlm} ddlm} t          j
        |           | |                      |
                    d                   d<   d                                          |
                    d                   d<                       d	d           }                    d
i           }                    di           }t          j        r|J |                                }t!          |          }	|                                }
t$                              d|
|	           t)          |t+                      ||          t$                              d           S ddlm}                     dd          } |t+                      d         |          dt2          dt2          ffd} | di d|iS )Nr   )TracingContexttracingr   )r   )ShapeEnv)r}   ro   rp   r   r   r   zareconstructing serializable fn from standalone compile artifacts. num_artifacts=%d num_submods=%d)rc   r   vllm_configr   r   z?reconstructed serializable fn from standalone compile artifacts)VllmBackendrs   Frq   r   c                        fdt          j                  D             }                      5   d         |          j        _        ddd           n# 1 swxY w Y    j          S )a^  
            On the first run of the optimized call, we rerun the compiler
            backend which should result in a cache hit. After the backend
            call returns, we just do a one-time replacement of the optimized
            call with the compiled function, so that subsequent calls are on
            the AOT compiled path.
            c                 .    g | ]\  }}||n|         S r   rl   )r5   r]   r   rp   s      r   rC   zbVllmSerializableFunction.deserialize_compile_artifacts.<locals>.optimized_call.<locals>.<listcomp>;  s:       As N1,=  r   ro   N)rW   rp   rr   )rp   compile_inputsr   	fake_modefnrc   r   rt   s   ` r   rr   zNVllmSerializableFunction.deserialize_compile_artifacts.<locals>.optimized_call3  s       '(9::  N 	2233 ! !$0L.)>% %  !! ! ! ! ! ! ! ! ! ! ! ! ! ! ! %2$n55s   A  A$'A$rr   rl   )torch._guardsr   r   r   r   r   r   %torch.fx.experimental.symbolic_shapesr   rM   rN   	recompiler   envsVLLM_USE_MEGA_AOT_ARTIFACTrH   r)   r;   r'   info.reconstruct_serializable_fn_from_mega_artifactr   vllm.compilation.backendsr   r0   r   )r   r   r   r   r   r   r   r   submod_namesnum_submodsr;   r   rs   rr   r   r   r   rc   r   rt   s                 @@@@@@r   deserialize_compile_artifactsz6VllmSerializableFunction.deserialize_compile_artifacts   s~   99999999444444888888BBBBBBT"""NXXZZ888	 , 2 253H) T Tnn'')))"."4"4U;K5Li"X"X',yy1OQU'V'V$ %		*A2 F F!II&92>>* 	/;;;7GGIILl++K8FFHHMKK=	   @-I355&;"3  B KKQ   I 	:99999YY|U33
$/K#%%uX
%
 %
	6C 	6C 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6$ S88588888	r   c                     dS )z+
        Used for depyf debugging.
        rn   rl   r   s    r   co_namez VllmSerializableFunction.co_nameH  s
    
 *)r   )FNN)rf   rg   rh   ri   ry   r{   r|   r   r   rj   r   boolrD   rk   r   r   classmethodrS   r   r   propertyr   r   rl   r   r   rn   rn      sr       	 	" !#'/36 6h*6 !6 	6
 !c*6 6 Dj6 !I,6 
6 6 6 624c 4S 4S 4 4 4 4 A#4A#	A# A# A# [A#F F F;U F F F [FP *!;< * * * X* * *r   rn   rc   r   r   r   r   r   c                     ddl m}m}m} ddlm}  d         }	                     dd          }
 d         }j        }|                                 |	                                }i }|j
        D ]H}|                    dd	          \  }}|                    ||          |                    |i           |<   I ||	|
          }t          j                            t"          j        d
          }t          j        |d           |j                            |d|	           d |                                D             }t/          |          |z
  }|rJ d| dt1          |                       t3          |          D ]\  }}||v r||v sJ ||         }||         }||         } |d|t5          |          ||||          }|dk    }|t5          |          d	z
  k    } |||||          }||j        |<   t8                              d|           |j        r% d         } fd|D             } ||||          } n|} t?          di  | dd}!|!S )a  Construct a VllmSerializableFunction from cached inductor artifacts.

    This function reconstructs a callable model from pre-compiled inductor
    artifacts without re-running the compilation. It:
    1. Loads all cached artifacts
    2. Builds compiled callables for each submodule/shape
    3. Creates PiecewiseBackend instances that dispatch to cached artifacts
    4. Wraps with cudagraph if needed
    5. Returns the final VllmSerializableFunction

    Note: This function shares similar logic with PiecewiseCompileInterpreter
    in backends.py. Both create PiecewiseBackend instances and wrap them with
    cudagraph. The key difference is:
    - this function: PiecewiseBackend receives pre-compiled runnables
      (compiled_runnables is set, graph is None)
    - PiecewiseCompileInterpreter: PiecewiseBackend receives the FX graph
      to compile (graph is set, compiled_runnables is None)

    If modifying the backend creation/wrapping logic, consider updating both.

    Args:
        state: Deserialized state dict containing graph_module, example_inputs,
            prefix, sym_tensor_indices, is_encoder, etc.
        standalone_compile_artifacts: The StandaloneCompiledArtifacts containing
            pre-compiled artifacts for each submodule/shape combination.
        vllm_config: The vLLM configuration.
        sym_shape_indices_map: Mapping from submod_name to sym_shape_indices.
        returns_tuple_map: Mapping from submod_name to returns_tuple.

    Returns:
        A VllmSerializableFunction that can be called directly.
    r   )r   make_copy_and_callwrap_with_cudagraph_if_needed)PiecewiseBackendrq   rs   Fro   r"   r@   dummy_cacheT)exist_ok)	cache_dirdisable_cacherq   c                     h | ]\  }}|S rl   rl   )r5   r   r"   s      r   	<setcomp>zAreconstruct_serializable_fn_from_mega_artifact.<locals>.<setcomp>  s    DDDwtQdDDDr   z-artifacts reference submodules not in graph: z. graph has: N)r   r   piecewise_compile_indextotal_piecewise_compilessym_shape_indicesrt   returns_tuplecompiled_runnablesz7Replaced submodule %s with piecewise backend from cacheru   c                 h    g | ].}t          j        d          |         j        j                  /S )rp   r   )ry   r   device_configr   )r5   idxrc   r   s     r   rC   zBreconstruct_serializable_fn_from_mega_artifact.<locals>.<listcomp>  sS     
 
 
  &',[5N5U  
 
 
r   )rr   rt   rl   ) r   r   r   r   "vllm.compilation.piecewise_backendr   r0   compilation_configr_   rH   r   rA   r2   
setdefaultospathjoinr   VLLM_CACHE_ROOTmakedirscompiler_managerinitialize_cacher   setsortedrW   r)   r   r'   r(   cudagraph_copy_inputsrn   )"rc   r   r   r   r   r   r   r   r   rq   rs   split_gmr   r   compiled_callablesrB   r   	shape_strrt   dummy_cache_dirgraph_childrenmissingr]   r   r   	runnablespiecewise_backendis_firstis_lastwrapped_backendru   input_buffersrr   r   s"   ` `                               r   r   r   P  s   N         
 DCCCCC8_F<//J^$H$7 ))+++/??AALCE1A 
 
	!*!1!1#q!9!9Y(33KKK 	%%k266yAA ;{FJ??Lgll4#7GGOK$////!22! 3    ED(*A*A*C*CDDDN,.0G  	/ 	/ 	/^,,	/ 	/ ;
 $L11  
  
;333GX8X8X8XX1+>)+6&{3	,,#$%%(%6%6/%'(	
 	
 	
 6s<((1,,77
 
 *9+&E	
 	
 	
 	

 / 
""#78
 
 
 
 
 *	
 
 
 ,+,>xXX!	! 
 


%
 
 
 
B
 Ir   c                     g }t          t          j                              }|                    |           |                                 }|                    |           t          j        r!|                    t                                 |S r   )r   r   compile_factorsappendcompute_hashr   extendr	   )r   factorsenv_hashconfig_hashs       r   aot_compile_hash_factorsr    s    G D02233HNN8 **,,KNN; & /+--...Nr   file_contentsc                 n   t          t          |                                 d                     }g }|D ]6\  }}|                    |           |dk    r!|                    |           7t	          d                    |                                          d                                          }|S )Nc                     | d         S )Nr   rl   )xs    r   r   z1_compute_code_hash_with_content.<locals>.<lambda>  s
    QqT r   )keyz<string>
F)usedforsecurity)rD   r   itemsr  r   r   encoder&   )r  r  hash_contentfilepathcontentresults         r   _compute_code_hash_with_contentr    s    ++-->>BBBCCEL" % %'H%%%z!! G$$$$		,&&((%  ikk  Mr   filesc                 P   t                               dd                    |                      i }| D ]e}t          j                            |          sd||<   't          |          5 }|                                ||<   d d d            n# 1 swxY w Y   ft          |          S )Nz9Traced files (to be considered for compilation cache):
%sr   )	r'   r(   r   r   r   isfileopenreadr  )r  r  r  fs       r   _compute_code_hashr#    s    
LLDdiiPUFVFV   M 3 3w~~h'' 	3&(M(##h 31*+&&((h'3 3 3 3 3 3 3 3 3 3 3 3 3 3 3*=999s   )BB	B	)1r#   r   r   rM   collections.abcr   r   typingr   r   unittest.mockr   ry   torch.utilsr   r   	vllm.envsr   #vllm.compilation.compiler_interfacer	   vllm.configr
   r   vllm.config.utilsr   vllm.loggerr   vllm.utils.hashingr   torch._dynamo.aot_compiler   ImportErrorr   rx   r   rf   r'   r   rn   rE   rj   rD   rk   r   r   r  r  r   r#  rl   r   r   <module>r0     s     				  . . . . . . . .                ) ) ) ) ) )       D D D D D D ; ; ; ; ; ; ; ; * * * * * * # # # # # # ( ( ( ( ( (">>>>>>> " " "!" z&-- - --	X		p) p) p) p) p) p) p) p)fz* z* z* z* z*3 z* z* z*zBS>B"?B B  T#Y/	B
 CIB  B B B BJ* c    &4S> c     :c#h :3 : : : : : :s   A A)(A)