
    `i                   6
   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZm Z m!Z! d dl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( d dl)m*Z* d dl+Z,d dl-Z,d dl.m/c m0Z1 d dl2m3Z3 d dl,m4Z4 d dl5m6Z6 d dl7m8Z8m9Z:m	Z;m/Z< d dl=m>Z> d dl?m@Z@ d dlAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJ d dlKm9ZL d dlMmNZN d dlOmPZPmQZQmRZRmSZS d dlTmUZUmVZVmWZW d dlXmYZYmZZZm[Z[m\Z\ d dl]m^Z^ d dl_m`Z`maZa d dlbmcZcmdZdmeZemfZfmgZgmhZh d dlimjZj d dlkmlZlmmZmmnZnmoZompZpmqZqmrZrmsZsmtZt d dlumvZv d dlwmxZx d d lymzZz d d!l-m{Z{ d d"l|m}Z}m~Z~ d d#lmZ d d$lmZ d d%lmZ d&d'lmZ d&d(lmZmZ d&d)lmZ d&d*lmZ d&d+lmZ d,d-lm9Z9mZ d,d.lmZmZ d,d/lmZ d,d0lmZ d,d1lmZ d,d2lmZ d,d3lmZmZ d,d4lmZ d,d5lmZ d,d6lmZmZ d,d7lmZ d,d8lmZ d,d9l/mZmZmZmZmZmZmZmZmZ d,d:lmZ er d d;lmZmZ d d<lbmZ d d=lmZ d d>lmZ d,d?lmZ  e%d@          Z e dA          Zes e9j                    s	ddFZddKZnd dLlmZmZ erd dlZd dMlmZmZmZ  G dN dOej                  Ze G dP dQ                      ZddRZddTZ eͦ            Zej        Zej        Zej        Z e	j        eצ          Ze,j                            edU          Ze,j                            edV          Ze,j                            edW          Ze,j                            edX          Ze,j                            edY          Zdd]Zdd`ZddaZddbZ ej        d          ddd            Zej        dde            ZddgZd djZ	 dddoZddrZ	 dddsZddduZ	 	 	 ddd}Zdd~Zd	dZd
dZ	 dddZ	 dddZej        dd            Z G d de'dk          Z G d de&          ZddZ ed          dd            Z G d d          Z G d de          Z G d de          ZddZddZ	 dddddddZddZ ddZ	 dddZedfddĄZ ed           ZddτZddфZddԄZdd؄ZddلZ	 edvڦ           G dۄ dܦ                      Z
dd߄ZedkfddZefd dZd!dZedddkfd"dZddZd#dZd#dZd$dZ	 dddd%dZdS (&      )annotationsN)ABCabstractmethod)defaultdict)AbstractContextManager)	dataclass)currentframe)count)
attrgetter)AnyCallableOptionalTYPE_CHECKINGTypeVarUnion)Neveroverride	ParamSpecProtocol	TypedDictUnpack)mock)#min_cut_rematerialization_partition)fx)enable_python_dispatcher)compiled_autogradconfigloggingutils)get_interface_for_device)wrap_compiler_debug)	chromium_event_timedCompileEventLoggercountersdetect_fake_modedynamo_timedflatten_graph_inputsget_metrics_contextlazy_format_graph_codeset_feature_use)r   )!unwrap_tensor_subclass_parameters)aot_export_moduleGraphOutputNamemake_boxed_funcSerializableAOTDispatchCompiler)	code_hashFxGraphCacheoutput_code_log)BoxedDeviceIndexformat_default_skip_message#log_cudagraph_skip_and_bump_counterPlaceholderInfo)CustomPartitionerFn)"create_mapping_pre_post_grad_nodessave_args_for_compile_fx_inner)CompiledAOTICompiledFxGraphCompiledFxGraphConstantsWithGmget_expanded_dimsindex_expanded_dims
OutputCode)	cache_dir)		BoxedBoolcount_tangentsfresh_cacheget_all_devices	InputTypeis_gpushould_assume_input_aligned should_use_remote_fx_graph_cachetensor_is_aligned)FakeScriptObject)trace_structured)compile_time_strobelight_meta)GraphModule)free_unbacked_symbolsSymExprPrinter)FakeTensorProp)_WaitCounter)
OrderedSet   )aot_autograd)ShortenTraceback	SkipFrame)_use_lazy_graph_module)_PyTreeCodeGen)
has_triton   )r   metrics)get_wrapper_codegen_for_deviceinit_backend_registration)DebugContext)select_decomp_table)InductorError)joint_graph_passes)post_grad_passesview_to_reshape)pre_grad_passes)GraphLowering)get_device_typeIRNode)complex_memory_overlap)TritonBundler)	align_inputs_from_check_idxsclone_preserve_stridescopy_misaligned_inputs get_cloned_parameter_buffer_name%get_first_incompatible_cudagraph_node#maybe_get_suppress_shape_guards_ctxoutput_noderemove_unaligned_input_idxsshape_env_from_inputs)V)	GeneratorSequence)_StrideExprStr)
OpOverload)Weights)ExternKernelNode_P_Tattrstrreturn.Callable[[Callable[_P, _T]], Callable[_P, _T]]c                    t           j        S N)dynamo_utilsidentityr|   s    n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torch/_inductor/compile_fx.pytime_and_logr      s    $$    argsobjectkwargsNonec                     d S r    )r   r   s     r   log_optimus_to_scubar      s    r   )r   r   )FQNGraphInputNameGraphSignaturec                      e Zd ZdZdZdZdS )FxCompileModer   rZ   rS   N)__name__
__module____qualname__NORMAL	SERIALIZE
SUBPROCESSr   r   r   r   r      s        F IJJJr   r   c                  .    e Zd ZU ded<   ded<   ded<   dS )FxCompileConfigr   modebool	use_asyncuse_progressiveNr   r   r   __annotations__r   r   r   r   r      s3         OOOr   r   c                 6   d} t           j                            |           }|t          t          j        dd          S d}d}|                                                    d          rd}|dd          }|                                                    d          rd}|dd          }	 |                                }t          t          |         ||          S # t          $ r dd l
} |j        t                    }|                    d	|| d
                    t          d t          j                                        D                                            t           j                            |            t          t          j        dd          cY S w xY w)NTORCHINDUCTOR_FX_COMPILE_MODEFzprogressive+T   zasync+   r   z>Invalid value of %s for %s. Expected one of %s. Using default.z, c              3  4   K   | ]}t          |          V  d S r   )repr.0xs     r   	<genexpr>z+_fx_compile_mode_default.<locals>.<genexpr>   s(      OOT!WWOOOOOOr   )osenvirongetr   r   r   lower
startswithupperKeyErrorr   	getLoggerr   errorjoinsorted__members__keyspop)namevaluer   r   r   logs         r   _fx_compile_mode_defaultr      s   *DJNN4  E}}3UEBBBIO{{}}// bcc
{{}})) 	abb	C}U3YPPP C C Cg))		LIIfOOm.G.L.L.N.NOOOOOPP		
 	
 	
 	
t}3UEBBBBBCs   */C B;FFlist[dict[str, Any]]c                     ddigS )Nmax_autotuneTr   r   r   r   _get_progression_configsr      s     
 r   
perf_hintspre_grad_graphspost_grad_graphscudagraph_static_inputsinductor_metrics	num_fixedint	list[int]c                    t           j        j                                        }t	          t          |                     }|r|j        s|S |j        j        S r   )torch_guardsTracingContexttry_getlistrangefw_metadatastatic_input_indices)r   contextfixeds      r   get_static_input_idxsr      sR    
 m*2244Gy!!""E '- 33r   gmrM   c                   | j                             d          d         }g }t          |j        d         t          j        j                  s|j        d         }n|j        }|D ]}t          |t          j        j                  r^|j                            d          x}Bt          |t          j	                  r(|
                    |                                           |
                    d            ||j        d<   d S )Noutputopr   valoriginal_output_strides)graph
find_nodes
isinstancer   r   r   Nodemetar   Tensorappendstride)r   rp   output_stridesoutput_node_argsr   r   s         r   record_original_output_stridesr      s    (%%%2215KNk&q)58=99 ,&+A.&+" ( (vux}--	(...;3-- < !!#**,,////!!$''''2@K.///r   c                    | j                             dt          j        j        j                  D ]1}t          | |j        d         j                  }t          |           2t          |            d S )Ncall_functionr   targetr   )r   r   r   opshigher_orderinvoke_subgraphgetattrr   r   )_recursive_record_original_output_stridesr   )r   nodesubgraphs      r   r   r     st    ##59#9#I $   < < 2ty|2331(;;;;"2&&&&&r   c           	     ~   | j                             dt          j        j        j                  D ]t          | j        d         j                  }|j                             d          D ]=fdt          t          j        d                             D             j        d<   >t          |           d S )Nr   r   r   r   r   c                t    g | ]4}t          j        d          |         t          j        j                  2|5S )r   )r   r   r   r   r   )r   idxr   s     r   
<listcomp>z>_recursive_record_user_visible_output_idxs.<locals>.<listcomp>$  sH     5 5 5dil3/??55 5 5r   user_visible_output_idxs)r   r   r   r   r   r   r   r   r   r   lenr   *_recursive_record_user_visible_output_idxs)r   r   r   s     @r   r   r     s    ##59#9#I $   = = 2ty|233N---:: 	 	D5 5 5 5 TYq\!2!2335 5 5DI011
 	38<<<<= =r   Callable[..., None]c                 4    t          j        t                    S r   )dynamo_loggingget_step_loggerr   r   r   r   _step_loggerr   ,  s    )#...r   c                     t           j                                        rSt           j        j        j        j        s:t           j                                        dk    rt          j        d           d S d S d S d S )N)   r   zTensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.)	r   cudais_availablebackendsmatmul
allow_tf32get_device_capabilitywarningswarnr   r   r   _warn_tf32_disabledr	  1  s     	
!!
#*5
 J,,..&88d	
 	
 	
 	
 	

 
 
 
 98r   modc                  
 t          d |                     d          D                       

                    t          d |                     d          D                                  d
fd}|j        j        D ]?}|j        dk    r0|j        }|                    d          s|                    d          sAt          | |          sR t          |          |          } t          |          |           }t          |t                    r%t          |t                    r|j        |j        u rn&t          j        ||          r|j        |j        k    r|                    d          rdnd} ||j        |          }| | }	|	|_        t#          ||	|           
                    |	           AdS )a  
    In aot_export_module (make_fx), we create get_attr nodes with name prefix
    "_tensor_constant" and "_torchbind_obj". See Tracer.create_arg() in
    torch/fx/_symbolic_trace.py

    However, this might result in name collision if the original mod already
    has a different buffer with the same name.

    We resolve this potential name collision here by changing the target name
    with a new number post fix.
    c                    g | ]\  }}|S r   r   r   r   r   s      r   r   z+_resolve_name_collision.<locals>.<listcomp>L  s    LLL)$LLLr   Fremove_duplicatec                    g | ]\  }}|S r   r   r  s      r   r   z+_resolve_name_collision.<locals>.<listcomp>O  s    TTTYT3DTTTr   r   fx.Graphprefixr}   r~   r   c                h   d}| j         D ]}|j        dk    r|j                            |          rvt	          |j                  t	          |          k    rQ|j                            |          d         }|                                rt          |t          |                    }D ]}|                    |          rlt	          |          t	          |          k    rL|                    |          d         }|                                rt          |t          |                    }|dz   S )Nr   get_attrrZ   )	nodesr   r   r   r   splitisdigitmaxr   )r   r  ir   post_fixkeyexisting_keyss         r   find_smallest_iz0_resolve_name_collision.<locals>.find_smallest_iR  s   K 	2 	2Dw*$$)?)?)G)G$t{##c&kk11#{0088<H'')) 23x==11  	2 	2C~~f%% 2s88c&kk))"yy004H'')) 23x==111ur   r  _tensor_constant_torchbind_objN)r   r  r  r}   r~   r   )rR   named_parametersupdatenamed_buffersr   r  r   r   r   hasattrr   r   rJ   real_objr   equaldtypesetattradd)r
  r   r  r   target_name	gm_targetmodel_targetr  new_idnew_target_namer  s             @r   _resolve_name_collisionr/  >  s#    LLs33U3KKLLL M TT#*;*;U*;*S*STTTUU          !/ !/7j  +K))"  !,,-=>> 3,, /
;//33I2:k22377L)%566 
|-=>>!*l.CCCI|44O|'999 ))*<==&""% 
 %_RXv66F!'111O)DKB333o...C!/ !/r   graph_signaturer   c                   ddl m}m} t          | |           i }|                     d          D ]\  }}|||<    |||||j                   |                     d          D ]\  }}|||<    |||||j                   |j        	                    d          }	g }
|	D ]}|j
        }||j        v r#|j        |         }|
                    |           5||j        v rM|j        |         }|
                    |           t          ||                   |j        t!          |          <   ||j        v sJ |
                    d            ddlm} t)          |j                                        j        d                   }g }|j        }|j        }|j        }t5          |          D ]|\  }}d }|t7          |          t7          |          z   t7          |          z   k     r-t9          |j
                  }||v r	||         }n||v r||         }|                    |           } |||
|t;          j                    d           }|S )	Nr   )_assign_attr	_AttrKindFr  )	attr_kindplaceholderr   )_unlift)torch.export.unflattenr2  r3  r/  r!  	PARAMETERr#  BUFFERr   r   r   inputs_to_parametersr   inputs_to_buffersrk   r   rm   user_inputstorch.export._unliftr6  tuplerp   r   buffers_to_mutateuser_inputs_to_mutateoutput_tokens	enumerater   r-   pytreeLeafSpec)r
  r   r0  r2  r3  
state_dictr   parambufferplaceholder_nodeslifted_inputsr   	node_nameparameter_namebuffer_namer6  outputsmutated_outputsbuffer_mutationsuser_input_mutationsrA  r   outr   unlifted_gms                            r   _unlift_graphrS    s    ?>>>>>>>C$$$OQJ++U+CC 
 
e 
4)		
 	
 	
 	
 	
 ))5)AA 
 
f!
4&		
 	
 	
 	
 	
 ++}+==)+M " ' 'I	<<<,A)LN  0000/;;;);IFK  ---&z+'>?? G4[AABB  ;;;;;  &&&&,,,,,,).rx/C/C/E/E/J1/M)N)NGO&8*@#1Mg&& 
& 
&S6:%&&-A)B)BBSEWEWWWW"38,,D'''(.---,T2u%%%%'
 K r   Fskip_invoke_subgraphr   Generator[str, None, None]c              #    K   t          d | j                            d          D                       }t                      }|                                 D ]=\  }}||v r4t	          |t
          j        j                  r|                    |           >|rX| j                            dt
          j	        j
        j                  D ]'}|                    |j        d         j                   (|E d {V  d S )Nc              3  $   K   | ]}|j         V  d S r   )r   r   s     r   r   z&_get_subgraph_names.<locals>.<genexpr>  s5       5 55 5 5 5 5 5r   r  r   r   r   r   )rR   r   r   named_childrenr   r   r   rM   r)  r   r   r   discardr   r   )r   rT  all_subgraph_namesfx_subgraph_names
child_namechild_moduler   s          r   _get_subgraph_namesr^    s,      +5 5 5(---<<5 5 5 + + *4$&$5$5$7$7 . . 
L +++
%(.1
 1
+ !!*--- ;H''uy'='M ( 
 
 	; 	;D %%dil&9::::          r   example_inputsSequence[InputType]c                4   t          ddd          5  t          j        }t          j        }t	          |           D ]3}t          | |          }t          |d          }t          | ||           4t          | |||          cd d d            S # 1 swxY w Y   d S )N_recursive_pre_grad_passesTpre_grad_pass_time_uslog_pt2_compile_eventdynamo_compile_column_usr   )	r&   r   add_pre_grad_passesremove_pre_grad_passesr^  r   rb  r(  rd   )r   r_  
add_passesremove_passessubgraph_namer   new_subgraphs          r   rb  rb    s    
$"!8
 
 
 N N
 /
5044 	5 	5Mr=11H5hCCLB|4444r>:}MMN N N N N N N N N N N N N N N N N Ns   A-BBBc                    t          ddd          5  t          | |          D ]"}t          | |          }t          ||           #t	          |            d d d            d S # 1 swxY w Y   d S )N_recursive_joint_graph_passesTjoint_graph_pass_time_usrd  )r&   r^  r   rn  ra   )r   rT  rk  r   s       r   rn  rn    s     
'"!;
 
 
   15IJJ 	J 	JMr=11H)(4HIIII2                    AA##A'*A'is_inferencec                    t          ddd          5  t          |           D ]"}t          | |          }t          ||           #t	          | |           d d d            d S # 1 swxY w Y   d S )N_recursive_post_grad_passesTpost_grad_pass_time_usrd  )r&   r^  r   rs  rb   )r   rq  rk  r   s       r   rs  rs    s    	%"!9
 
 
 + +
 144 	@ 	@Mr=11H',????\***+ + + + + + + + + + + + + + + + + +rp  Tskip_constructorlifted_constant_namesOptional[list[str]]skip_folding_node_fn)Optional[Callable[[torch.fx.Node], bool]]"tuple[GraphModule, dict[str, int]]c                   ddl m}m}m}m}m}  || |||          }	|
 |	            nd}
d t          t          |	j        j	                  d         j
        d                   D             }g }g }i }| j        j	        D ]R}|j        |v r|                    |           !|j        |         |k    r |j        dk    r|                    |           S|D ]?}d|j        z   } || |||
||j                          nd|           ||j                 ||<   @|ddd         D ]M}|j        r*|j        D ]!}|j        |         |k    sJ d| d	            "3| j                            |           N|                                  |	|fS )
a  
    This function takes an GraphModule input "gm".
    The gm will be split into 2 components,
      1) const_gm, which consists the subgraph of gm that can be constant folded.
      2) gm (being inplace modified,) which returns the graph after constant folding.

    If an additional "lifted_constants" argument is passed in, we will assume the gm has
    been lifted and run the transformation accordingly.

    When a "skip_folding_node_fn" callback is passed, we will skip constant folding on
    the nodes for which the callback returns True.

    const_output_index is a mapping of corresponding node name from gm to the
    output index of const_gm.
    Returns (const_gm, const_output_index)
    r   )CONST_MODULE_TAGMETA_TAG
MODULE_TAGreplace_node_with_constantrun_and_get_constant_graphNc                $    i | ]\  }}|j         |S r   )r   )r   r   r   s      r   
<dictcomp>z"split_const_gm.<locals>.<dictcomp>A  s-       Q  r   r  r5  _FOLDED_CONST_znode: z user not empty.) torch._inductor.constant_foldingr|  r}  r~  r  r  rB  r>  r   r  r   r   r   r   r   users
erase_node	recompile)r   ru  rv  rx  r|  r}  r~  r  r  const_gmconst_resultconst_outputsto_erase_nodeto_replace_nodeconst_output_indexr   new_const_namens                     r   split_const_gmr    sM   ,              *)
35I H "7!>88:::DL "+E(.2F,G,G,K,PQR,S"T"T  M MO ' '9%%""4((((Yx $444M9Q9Q  &&& F F)DI5"" )0 ]49566		
 		
 		
 .;49-E>**ddd# & &: 	&Z W Wvh':5557V7V7V7V5555W H%%%%LLNNN'''r   c                   t           j        j        }t          |j        j        |j        j        |j        j        |j        j        g          }|D ]}| j	        
                    d|          D ]t}t          |j                            dd           t           j                  r?|j        d         j        t           j        k    r|j        d         j        j        dk    r  dS udS )Nr   r   r   r  TF)r   r   atenrR   mmdefaultaddmmbmmbaddbmmr   r   r   r   r   r   r'  float32devicetype)r   r  tf32_opsr   r   s        r   is_tf32_warning_applicabler  f  s    9>DGOJHL 		
 H   H''?6'JJ 	 	D49==55u|DDIe$*em;;Ie$+0F::ttt	 5r   "AbstractContextManager[None, None]c                l   t          d | D                       }t          j        r=t          j        r1|s/t                              d           t          j        d          S t          j        j        r/t                              d           t          j        d          S t          j
                    S )z
    For CPU backend, enable comprehensive padding causes some unit tests
    fail due to changing number of generated kernels. Skip for now.
    c              3  |   K   | ]7}t          |t          j                  t          |j        j                  V  8d S r   )r   r   r   rF   r  r  )r   ts     r   r   z6maybe_disable_comprehensive_padding.<locals>.<genexpr>  sS        "#Au|9T9Tqx}     r   z!Skip comprehensive padding on CPUF)comprehensive_paddingz;Skip comprehensive padding for use_runtime_constant_folding)anyr   disable_padding_cpur  perf_hint_loginfopatchaot_inductoruse_runtime_constant_folding
contextlibnullcontext)r_  has_gpus     r   #maybe_disable_comprehensive_paddingr  {  s       '5    G ! 	(f&B 	(7 	(>???|%8888			9 (I	
 	
 	
 |%8888%'''r   cpp_wrapperaot_modec                Z    | s|rt          j        d          S t          j                    S )zH
    graph partition does not support cpp_wrapper and aot_mode yet.
    F)graph_partition)r   r  r  r  )r  r  s     r   maybe_disable_graph_partitionr    s5      (h (|E2222%'''r   force_allow_non_fake_inputs torch._subclasses.FakeTensorModec                   t                      5  t          |          }|s:t          j                            d          } t          | |          j        |  ni|st          j                    n t          j
                            |dd          }|5   t          | |          j        |  ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |S )z}
    If we can not detect fake mode from the context of inputs, create one.

    The created fake mode will be returned.
    Tallow_non_fake_inputs)r   r  N)r   r%   r   _subclassesFakeTensorModerP   	propagater  r  r   r  r   propagate_dont_convert_inputs)r   r_  r  	fake_modectxs        r   fake_tensor_propr    sp    
"	#	#  $^44	 	)88t8TTI8N2I...8.III 3Q
&(((Z&&y2I4PP 
   Pr	222P#                               s6   BCB9-C9B=	=C B=	CCCconfig_patches$Optional[Union[str, dict[str, Any]]]dict[str, Any]c                    t          j        |           5  t          j                    cd d d            S # 1 swxY w Y   d S r   )r   r  get_config_copy)r  s    r   get_patched_config_dictr    s     
n	%	% ( (%''( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (s   599Generator[None, None, None]c               #     K   t           j        r<t          t                      d          5  d V  d d d            d S # 1 swxY w Y   d S d V  d S )NF)dirdelete)r   force_disable_cachesrC   r@   r   r   r   with_fresh_cache_if_configr    s      "  Y[[777 	 	EEE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   >AAc                  ~    e Zd ZU ded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   ded<   ded<   ded<   dS )_CompileFxKwargszOptional[BoxedBool]
cudagraphsSequence[int]static_input_idxsr   is_backwardzOptional[int]graph_idr  r  rq  zOptional[bool]
layout_optz1Optional[Callable[[list[ExternKernelNode]], Any]]extern_node_serializerzOptional[BoxedDeviceIndex]boxed_forward_device_index
fx_wrapperNr   r   r   r   r  r    s         ####$$$$NNNMMMM::::r   r  )totalc                      e Zd Zdd	Zd
S )_CompileFxCallabler   rM   r_  r`  r   Unpack[_CompileFxKwargs]r~   r?   c                    d S r   r   )selfr   r_  r   s       r   __call__z_CompileFxCallable.__call__  s	    
 Sr   Nr   rM   r_  r`  r   r  r~   r?   )r   r   r   r  r   r   r   r  r    s(             r   r  r  r?   c                *   |                     dd            |                     dd           |                     dd           |                     dd            |                     dd           |                     dd           |                     d	d           |                     d
d            |                     dd            |                     dd            t          j                    5 }|                    t          j        j                                                   |                    t          t          j
                             |                    t          j        dddddd                     |                    t                                 |                    t                                 t          j        d|d                     t#          t$          d          | |fi |cd d d            S # 1 swxY w Y   d S )Nr  r  r   r  Fr  r  r  rq  r  r  r  compile_fx_innerinductor_compileTcompile_inductor#inductor_cumulative_compile_time_us)
phase_namere  log_waitcounterwaitcounter_name_overriderf  )r  inductor)compiler_name)
setdefaultr  	ExitStackenter_contextr   r   _python_dispatch_disable_current_modesrW   dynamo_configuse_lazy_graph_moduler   r&   r  r^   r#   pt2_compiler!   _compile_fx_inner)r   r_  r   stacks       r   r  r    si   
 lD)))
)2...
mU+++
j$'''
mU+++
lE***
ne,,,
2D999
lD)))
.555 
			 
5EK8OOQQRRR2=3VWWXXX%"-&* $*<)N  		
 		
 		
 	688999LNN+++&}-	
 	
 	
 	
 P"#4JOOO
 
 
 
'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   0DHHHzcompilation time (in seconds)r   graph_kwargsc                \  '( t           j        }t          j        j        j                                         t          j        | j	                  dk    rx|svddl
m} ddlm} |                    |            t          j        j                                        }t%          j        dd|i|j                   t+          | j                  S |                    dd          }t0                              d	|           t5          ||          }t7          t9          t;          t=          | j	        j                                      j         d         tB          tD          f          sJ d
| j	                     |#                    d          !tI          tJ          j&        j'                  |d<   tJ          j(        rtS          | |fi | tU          j*                    }	tW                      }
tY                       t[          d d t]          |           D             D                       }t_          ddd          5  tJ          j0         o)tJ          j1        p|
o| o|ot          j2        j%        j3         }tJ          j1        }|
}ti          d|           tj                              d||||tJ          j0                   tm          |          D ]C\  }}t7          |t          j7                  r$tq          |j9        j:                  r||v rd|_;        Dd}d}d'd}ty          |           }tU          j=                    }|rt}          j?        | ||||          \  }'||\  }}tj                              d|           |r-t}          j@                    }tj                              d           t}          jA        ||||||#                    dd          |          \  }'ntj                              d           t          j2        j%        j3        r|J 'J t          jC                     	 t          | ||fi |}|J t          jE                    \  }}|F                    |           nX# t          t          f$ r  t          $ r:}t          |t                                L                    |jM                  dd}~ww xY w	 t          jN                     n9# t          jN                     w xY w''d         dk    rH|J tj                              d''#                    dd          nd           t          | ||fi |}nˉ'd         d k    rz|J |J tj                              d!           t          jC                     	 t          | ||fi |}|J tU          j=                    |z
  |_O        |\  }}||_P        ||_Q        t          jE                    \  }}|F                    |           nX# t          t          f$ r  t          $ r:}t          |t                                L                    |jM                  dd}~ww xY w	 t          jN                     n# t          jN                     w xY w|t          |          'd"<   |jO        'd#<   tj                              d$|           t}          jS        |||||           nD'd         d%k    sJ |J |J |\  }}tj                              d&|           ||_P        ||_Q        |J |}''d         nd'(t%          jT        d(( 'pi |)           t%          jU        d*(|'r'#                    d+          nd'r'#                    d,          nd'r'#                    d          nd-||.           't          d/(fd0'fd12           |W                    |||           ddd           n# 1 swxY w Y   tj                              d3tU          j*                    |	z
             tj          X                    t          jZ                  rg }t          d4         \                                D ]P\  }}|]                    d5          }t          |          d6k     r|_                    |d7d8d8d8|g           Jt          |          d9k    rd5`                    |dd:                   nd5`                    |dd;                   } | a                    d<          }!|!r\t          |          d9k    rI|d:d         \  }"}#}$}%d5`                    |dd:                   } |_                    | |"|#|$|%|g           
|d;d         \  }#}$}%d5`                    |dd;                   } |_                    | d7|#|$|%|g           Rtj          b                    d=           tj          b                    d>c                    d?d@dAdBdCdD                     tj          b                    dE           |D ]>}&tj          b                     d>jc        |&            tj          b                    dE           ?t          j        j        j                                          t                      t          jZ        dF|d         rdGndH dI|dJ                     |S )Kz
    Inductor API that compiles a single graph.

    If you change the argument list for this function, make sure you
    also update the call to save_args_for_compile_fx_inner below accordingly.
    r   )CompileEventLogLevel)_LazyGraphModulezbackward no-op
compile_id)metadata	log_levelr  r   z&static input idxs compile_fx_inner: %szGinductor can only compile FX graphs which return a tuple/list, but got r  Nc              3  (   K   | ]}||j         V  d S r   )supports_caching)r   backends     r   r   z$_compile_fx_inner.<locals>.<genexpr>N  s;       	# 	#  	  	# 	#r   c              3  j   K   | ].}t          |j        t          j        t          j                  V  /d S r   )r\   r  r   r  r  r   r  s     r   r   z$_compile_fx_inner.<locals>.<genexpr>P  sQ       
 
  +V/1B 
 
 
 
 
 
r   fx_codegen_and_compileT)re  r  fx_cachezXFX cache status: use_cache=%s, local=%s, remote=%s, aot_mode=%s, force_disable_caches=%szFX cache key generated: %szUsing remote FX cacher  F)r  	constantszFailed to generate FX cache keycache_statebypasszFX cache bypass reason: %scache_bypass_reasonunknownz*FX cache disabled or key generation failedmissz,FX cache miss, compiling and saving to cachetriton_bundler_metatime_taken_nsz.Saving compiled graph to FX cache with key: %shitzFX cache hit with key: %sdisabledfx_graph_cache_)r  time_nsr  r  
componentszcache not enabled)r   cache_event_timer  r  r  remote_cache_enabledlocal_cache_enabledartifactc                     d  ddS )Nr	  jsonr   encodingr   )r   s   r   <lambda>z#_compile_fx_inner.<locals>.<lambda>#  s    ;k;; &% % r   c                 ,    t          j                   S r   )r  dumps)
cache_infos   r   r  z#_compile_fx_inner.<locals>.<lambda>'  s    4:j#9#9 r   metadata_fn
payload_fnz%FX codegen and compilation took %.3fsaten_mm_info_   -?   )r  r  z$Overview info of inductor aten mms: z3{:<30} | {:<20} | {:<20} | {:<20} | {:<20} | {:<20}NameBMNKCountz----------------------------------------------------------------------------------------------------------------------------------ztorchinductor done compiling 	BACKWARDSFORWARDS graph r  )ers   aot_compilationr   	_inductorasync_compileCompiledTritonKernelscache_clearr   count_callsr   torch._dynamo.utilsr  torch.fx._lazy_graph_moduler  force_recompiler   CompileContextcurrent_compile_idr#   log_instant_eventPT2_COMPILEr.   forwardr  static_inputs_logdebugget_input_idxs_to_checkr   nextiterreversedr  r   r>  r   r   rA   r   tritonr  	save_argsr9   timerH   r]   allrD   r&   r  fx_graph_cache
_functorchbundled_autograd_cacher*   r   rB  r   rF   r  r  _is_inductor_staticr<   r
  r1   prepare_keyget_remote_cacheload_with_keyri   begin_compiler  collectset_triton_bundlerU   rV   	Exceptionr`   r	   with_traceback__traceback__end_compile_time_taken_ns_fx_graph_cache_key_fx_graph_cache_debug_linesr}   _save_graphinstantr  rK   post_compileisEnabledForr   INFOr$   itemsr  r   r   r   endswithr  formatr   ))r   r_  r  r  r  r  r  r  inputs_to_checkstartfx_graph_remote_cachebackends_support_caching	use_cachelocalremoter  inputmb_compiled_graphkey_inforemote_cacher  
start_timer  debug_linestriton_bundler  e	cache_keycompiled_graphmm_table_datar   partsr   
is_batchedbatchmr  krowr  r   s)                                          @@r   r  r    s    &H 
O!7CCEEE))Q..x. 	=<<<<<@@@@@@((,,,]1DDFF
,"J/*6	
 	
 	
 	
 rz***'3'>'>?RTV'W'WDFWXXX-n>OPPOd4 8 899::?BUDMRR  \RTRZ\\ R %%-%.v}/G%H%H\" 
&	
 	
 	
 	
 	
 IKKE<>> " 	# 	#
 
 *"--	
 
 
	# 	# 	# 	  	  
 d
 
 
 PM PM ++ C&?*?CC )C $+BB 	 %&
I...		f'	
 	
 	
 ".11 	1 	1HAu5%,//15<,--1 ***,0)26
2266	 \^^
 	=%1%=NL/6& &"Xz
 ##+ [		6<<< 7#/#@#B#BLII56660<0J"  , 0 0 F F'1 1 1-!:: 		;<<<"9 ]	H$,,,%%%
 '))),$:% %;G% %! )444 ")++!'!33MBBBB$i0      #A|~~66EEO  C )++++)++++
 :m#<#H#H$,,,II, "- NN#8)DDDE   !7NO! !7C! !
 &&00$,,,'''IIDEEE'))),$:% %;G% %! )44437<>>J3N!0)1&	;8A!5@K!= ")++!'!33MBBBB$i0      #A|~~66EEO  C )++++)++++".478K4L4L
01*;*JJ'IIF	RRR$!    m,5555$000''''/$YII19===4=1<G9 ,,,* *4)?J}%%Z 	 	"+k++%2	
 	
 	
 	
 	&#')3=
u%%%7AKz~~l333t )
4555(!' %	
 	
 	
 	
  !    :999    	##NI|LLLaPM PM PM PM PM PM PM PM PM PM PM PM PM PM PMd II5ty{{U7JKKK %% ! ">288:: 	B 	BJCIIcNNE5zzA~~$$c3S#u%EFFF ,/u::??388E#2#J'''sPRs@T@TD'9::J 	Bc%jjAoo!&rssq!Qxxcrc
++$$dE1aE%BCCCC  *1axxcrc
++$$dCAq%%@AAAA7888AHHS#sG 	
 	
 	

 	  	  	 CHHQJQSVWXXXHHY 
O!7CCEEELNN	,&}5E;;:	, 	,j)	, 	,   s   8G(_!>Q S R5;5R00R55S9_S##B_?A,W,+Y,Y5X<<YY_Y..E___c                  &    e Zd ZU dZded<   ddZdS )	_FxCompileStatr   r   codegen_and_compiler~   r}   c                    d| j          S )Nzcodegen_and_compile: )rw  )r  s    r   __repr__z_FxCompileStat.__repr__b  s    At'?AAAr   N)r~   r}   )r   r   r   rw  r   ry  r   r   r   rv  rv  ^  sB              B B B B B Br   rv  c                  d    e Zd ZU dZ ee          Zded<   edd            Z	e
dd            ZdS )	FxCompileza
    An FxCompile represents a mechanism that can turn a GraphModule into an
    OutputCode.
    z%dict[type[FxCompile], _FxCompileStat]_compile_statsr   rM   r_  r`  r]  r  r  r  r~   r?   c                    d S r   r   )r  r   r_  r]  r  s        r   rw  zFxCompile.codegen_and_compiler  s	     Sr   r   c                8    | j                                          d S r   )r|  clear)clss    r   _reset_statszFxCompile._reset_stats{  s      """""r   N
r   rM   r_  r`  r]  r  r  r  r~   r?   r~   r   )r   r   r   __doc__r   rv  r|  r   r   rw  classmethodr  r   r   r   r{  r{  f  s~           =HK<W<WNWWWW
    ^ # # # [# # #r   r{  c                  &    e Zd Zedd            ZdS )_InProcessFxCompiler   rM   r_  r`  r]  r  r  r  r~   r?   c                  34567 d|v r|d         J |d         }|                     dd          }|                     dd          }|                     dd          }|                     dd          }	|                     d	d          }
t          j        }|                     d
d          }|                     dd          }t          d                                          5  t          j                    5  t          j        x}/ddl	}t                              d|            |j        |           t                    rt                       t          d                                         }t#          j        t'          t#          j                    d                      t+                      t,          j        d|rdnd d|            t1          j                    }t4          j        j        j                            ||dd           |                                7tA          dd 7fd           t          j!        "                    |           tG          |          }tI                     tK          dd          5  t5          j&                    5  tO          |          }ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   tQ                     tA          dd fd           t          j)        |          5  tU                    }|5  tW          |           ddd           n# 1 swxY w Y   t          j!        ,                    |           tZ          !                    d t]          d!ddd"                     /                    dddd#          4tA          dd$ 4fd%           t          j0        j1        dk    rat4          j2        j3        4                    j5                  }tm          t4          j7        j!        j8        |          t4          j7        j!        _9        tu                      }|;                                rrt"          j<        d&k     r-t{          t          d'         >                                          }nt          d'         ?                                }t          jA        d|(           t          jB                    rW	 t          d)t          t                                i*           n*# t          $ r t                              d+           Y nw xY wddd           n# 1 swxY w Y   t          j)        |          5  t          |          5  t          |	|          5  d}d}d}d}|rt          jI        jJ        rt          d, -          \  }}t          |g |||	||||d|
.          }t          jM        |          5  t          jN        g           5  |	s
J d/            |O                                 |P                                \  }}ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   t          ||||	||||||r|jQ        nd|r|jQ        nd|||
0          }t          jS                    }|T                                 t          jM        |          5  t          jN        g           5   |jO        |  g }|jU        t                      6|jU        D ]} t          | t                    r| Y                                rxt          t          | \                                                    dk    rF|]                    t          6fd1| _                                j`        D                                  |]                    d           t          |           d}!tK          d2d          5  |jb        rZ|jc        rS|jd        rJ |e                                d         jf        }"t          j!        d3|"/                    d4                     nh|jb        r4d5d6lhmi}# |jd        s
J d/            |P                                \  }$}%t          j!        d7|$jQ                   |%jQ        rt          j!        d8|%jQ                   d}&t          jj        r4|k                    t          jj                  }&t          j!        d9|&           tK          d:d          5  |#l                    ||$jQ        |%jQ        |&|jm        g t          o                    |jp        jq        |r|jp        jq        ng z             ;          }"ddd           n# 1 swxY w Y   n,|r                                }'|'js        }"t          |'d<d          }!ddd           n# 1 swxY w Y   d5d3t          j0        j1        dk    rt          jv        t4          j7        j!        w                                          5t          jv        t4          j7        j!        jx                  3tA          dd= 5fd>           tA          dd? 3fd@           d}(t          z                    t,          j                  rv|{                                \  })}*}(t          xj|        |)z  c_|        t          xj}        |(z  c_}        t          xj~        |*z  c_~        t                              dA|)|*|(dB           t          j        r<|{                                \  }+}+}(t4          j7        j!                            |(           t4          j7        j!                            |j        j                   |rt          j        j        rt          j5        j        st5          j7        j        j        | rd},j5        j        D ]}-|-j                             dCd          }.|-j        dDk    s>t          |.t4          j                  r$t4          j7        j                            |.          sg|-j                             dEd          x},r ndF}/|,r	|/ dG|, dH}/n|/ dH}/|/t          j5        _        |rct          j5        j        sRt                    }0|0r@dI|0j         }/|0j                             dEd          x},r|/ dG|, dH}/|/t          j5        _        t          j        rt          |"t          t          t4          j2        j        f          sJ t#          |"                      t%          |"          cddd           cddd           cddd           cddd           cddd           cddd           cddd           S |r@t          j5        j        s/ddJlm}1  |1t          j5        j                  t          j5        _        | j        t#          |                    xj        d5z  c_        t4          j7        j!        j        rvt4          j7        j!        j        `t          t4          j        j                                                  }2|                     d          }||2t4          j7        j!        j        |<   t;          |"||t          j5        j        |                                t          d         |z
  |||||74|!53          cddd           cddd           cddd           cddd           cddd           cddd           cddd           S # 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )KzS
        Generates the OutputCode from the GraphModule and example_inputs.
        r  Nr  r   r  Fr  r  r  rq  r  z/pytorch.wait_counter.actual_codegen_and_compiler   z3Sleeping for %s since sleep_sec_TESTING_ONLY is setr  i  ztorchinductor compiling r)  r*  r+  )save_dirr  c                     dddS )Nfx_graph_runnablestringr  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    / (% % r   c                      S r   r   )runnable_graph_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    #5 r   r  additional_fake_tensor_propTre  c                     dddS )Nbefore_post_grad_graphr  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    4 (% % r   c                 4                          ddd          S NFTprint_outputinclude_strideinclude_deviceprint_readabler   s   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s#    2#4#4!&tD $5 $ $ r   rq  %szAFTER POST GRADr  r  colored)r  r  r  fast_sympy_printc                     dddS )Nafter_post_grad_graphr  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s     7$,) ) r   c                      S r   r   )inductor_post_grad_graph_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    'C r   )r  
   graph_break)	overwritenum_graph_breakspt2_configs)extra_loggingzfailed to log pt2_configsc                    | j         dk    oat          | j        t                    oG| j                            d          p-t          | j                            dd           t                    S )Nr  r   r   )r   r   r   r}   r   r   r   rJ   )r   s    r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>O  sc    $'Z:O ;&t{C88; !K223CDD X)$)--t*D*DFVWW	 r   )rx  )
r_  	shape_envr  r  r  r  rq  r  is_const_graphr  z"AOT mode only supports C++ wrapper)r_  r  r  r  r  r  rq  r  r  const_wrapper_codeconst_kernel_codeconst_moduler]  r  c              3  B   K   | ]}                     |          V  d S r   )doprint)r   sps     r   r   z:_InProcessFxCompile.codegen_and_compile.<locals>.<genexpr>  s-      )X)X1!))A,,)X)X)X)X)X)Xr   zGraphLowering.compile_to_fnzOutput graph module: 
%s)r  rZ   )AotCodeCompilerzOutput wrapper code: 
%szOutput kernel code:
%sz#Serialized Extern Kernel Nodes: 
%szAotCodeCompiler.compile)device_typeadditional_filesrunnerc                     dddS )N*inductor_provenance_tracking_node_mappingsr  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    (T,21 1 r   c                      S r   r   )r  s   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    /Y r   c                     dddS )N0inductor_provenance_tracking_kernel_stack_tracesr  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    (Z,21 1 r   c                      S r   r   )inductor_kernel_stack_trace_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    /N r   zGraph Metrics:
%s)num_bytes_accessednodes_num_elemnode_runtimesr   r5  stack_tracezWgraph with symbolic shapes inputs and config.triton.cudagraph_skip_dynamic_graphs=True.z Found from 
z,disabling cudagraphs due to incompatible op ) check_lowering_disable_cudagraph)r   rs   r,  rQ   guardr   preserve_rng_stater   sleep_sec_TESTING_ONLYrB  r   warningsleepr  r	  r$   copysyssetrecursionlimitr  getrecursionlimitr   r   rY  ioStringIOr   _dynamorepro	after_aotsave_graph_reprogetvaluerK   r;  fx_graphrr   rc   r&   no_gradr  r   set_fake_modeget_cuda_device_contextrs  fx_graph_transformedpost_grad_graphs_logr)   r  traceprovenance_tracking_levelr   	tracebackget_graph_provenance_jsonr   r8   r-  _pre_grad_graph_id _inductor_post_to_pre_grad_nodesr(   in_progressversion_infosumvaluesr  r#   compilation_metric	is_fbcoder   r}   r  rN  r  r  r  r  r  re   set_graph_handlerset_extern_kernel_nodesruncodegen_with_cpp_wrapperr   r[   CachedMetricsHelperfreeze_runtime_assertsgraph_outputsrO   r   rg   has_tensor_outputr   rN   
get_strider   r>  
get_layoutr   _check_triton_bf16_supportr  r  r  codegenr   r2   	codecacher  extern_kernel_nodesr  compiler  dictfromkeyswrapper_coder  compile_to_modulecallr   r  r  dump_inductor_provenance_info_inductor_kernel_stack_traceinductor_metrics_logrX  count_bytesr  r  r  r  log_tlparselog_runtime_and_tensor_metalog_collective_schedule	schedulerr  r@  cudagraph_skip_dynamic_graphsdisable_cudagraphs_reasonr   any_is_symbolicr   r   r   rn   r   r   rM   r  r:   torch._inductor.cudagraph_utilsr  device_node_mappingr|  rw  RECORD_GRAPH_EXECUTIONGRAPH_COMPILE_IDSr   r5  r6  r;   
get_deltas)8r  r   r_  r]  r  r  r  r  r  r  r  r  rq  r  	sleep_secrB  inductor_countersfdr  r  cuda_contextprovenance_tracking_jsonmetrics_contextr  r  const_graphr  r  r  r   metrics_helperr   rQ  compiled_fn_runnercompiled_fnr  r  kernel_codeserialized_extern_kernel_nodescompiled_moduler  	num_bytesr  r  r  r   meta_valdisablemaybe_incompat_noder  r  r  r  r  r  r  s8    `                                                 @@@@@r   rw  z'_InProcessFxCompile.codegen_and_compile  s     |++\0J0V0VV ,\ :
+7+;+;<OQS+T+T(,,]EBB"."2"2:t"D"D(,,]EBB'++L%@@
*)--neDD5t<< 	
 JKKQQSSQ	 Q	+--Q	 Q	 $::	GI9   
9%%%)"-- &#%%% ( 4 9 9 ; ; !#c&;&=&=t"D"DEEELNN$"-=;;:$ $!$ $   BM)::B
T ;    "$  6555    GR000
 .n==I$ B-T   E E ]__ E E 0^ D DIE E E E E E E E E E E E E E EE E E E E E E E E E E E E E E 6b999
     	 	 	 	 ++ CA CA6r::! O O/NNNNO O O O O O O O O O O O O O O,,R@@@$***)'+'+ $  	 	 	 02/@/@!&#'#'%)	 0A 0 0, !! !  DCCC    <9Q>>*DDRXNN - ;!O1D4  O)J #6"7"7"..00 ''11+.x/F/M/M/O/O+P+P((+3M+B+H+H+J+J(&9"&9I    #%% 
A	A, -s3J3L3L/M/M+    
 % A A A $?@@@@@AACA CA CA CA CA CA CA CA CA CA CA CA CA CA CAL 	**o o3NCCo o .k8DDo o
 &*""%)"$(! " 3 P " 4B. .4 4 40H0 #0 ')"+!)$/!)/E%1$/'+#-# # #K +K88 1"55   +PP,PPP{#)))'@@BB >*,=                              & $2'% +%+A!- +'94FP*00D 4EN)//$!,$3)-  0 ")!<!>!> ,,...(// d d1J21N1N d dEI~..QSN*6 +,,#(#6 < <C *3 7 7
<$'$9$9$;$;
< %((=cnn>N>N(O(O$P$PTU$U$U !/ 5 5$))X)X)X)X@P@P@W)X)X)X$X$X!" !" !" !" !/ 5 5d ; ; ; ;.u555 *.&%5T   > > !> ;e.> ;','888#8*/--//!*<*?K+1 ; + : : : N N   
 #^ 3BBBBBB#(#4   D #4 9>8V8V8X8X5L++1 ;\=O    +0 " / 5$={?P!" !" !" >B: 4 "$)$@$@AV$W$W !? !0 5$J$B!" !" !"
 ". 9QU" " " " " />.E.E$)$0$6$/$5$B050A	6&)-,1,>,O 4?180H0Y0Y57	-.** **	6& /F /" /"	" " " " " " " " " " " " " " ", /4.E.E.G.GO*9*>K18 /42 2.y> > > > > > > > > > > > > > >B BF>6:3|=BBEIZ!O1OOQQF FB ;?*!O1N; ;7 )&) ) (Z'Y'Y'Y    )&) ) (O'N'N'N    %)M+88FF CHCTCTCVCV@	>=22i?22-->--...@..,1106?2@1>    ) Y.3.?.?.A.A+1m-II-XXX O)AA%/BWXXX #D"MGD !" AD "O1A>R	D '+$&HN 
& 
&D'+y}}UD'A'AH $= 8 8'1(EL'I'I !9','<'L'LX'V'V !9 !).2immM4.P.PP{ & %&"{& 5)0&M&Mk&M&M&MGG)0nnnG<C9! H!'*K H.STV.W.W+. H&qUhUo&q&qG.A.F.J.J -t/ /  { R .5*Q*Q+*Q*Q*Q@GAG=( 9)'#tUX5I)J    - -,,- -    ,K88kd d d d d d d d d d d d d dWo o o o o o o o o o o o o o o o o o o o oEQ	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	J " 	!'*K 	     
 =< ! ;  9 'T

3GG1LGG -D!O1CO%(!M8KKMM& &
 $0#3#3J#?#?#/ * "O1CHM +#&9&1133 ,/@@"&)$'*4*B7# ed d d d d d d d d d d d d dWo o o o o o o o o o o o o o o o o o o o oEQ	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	Zd d d d d d d d d d d d d d d d d d d d d d dWo o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o oEQ	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	 Q	s  $~48F~KJ<	0K<K  KK K~K~K<~U)M	;UMUMFU,+TU$T?	<U>T?	?U~U~U~-~=}.	A}-Y8Y:YY

YY
Y}Y!!}$Y!%A.}} (D|),D&gA e>2g>fgf0g6|)g|)	g
M0|):} }}.	~*~6~4E|)}  },}.	8~~~4)|--} 0|-1} 4} }}}}}.	}}.	}}.	"~.}22~5}26~9~~		~~	~~4~ 	 ~4#~ 	$~44~8;~8Nr  )r   r   r   r   rw  r   r   r   r  r    s8        m m m Xm m mr   r  r]  r  c                ,   t           t          j        k    rt                      }nKt           t          j        k    rddlm}  |            }n%t           t          j        k    rddlm	}  |            }t          r1ddlm} ddlm} t          ||          s
J d             ||          }t          rOddlm}	 ddlm} t          ||          s
J d            t#                      }
t                      } |	|||
          }|                    | |||          S )	NrZ   )_DebugSerdeFxCompile)_SubprocessFxCompile)_AsyncFxCompile)_OutOfProcessFxCompilez7async is only valid with an out-of-process compile mode)_ProgressiveFxCompilez=progressive is only valid with an out-of-process compile mode)fx_compile_moder   r   r  r   compile_fx_extr  r   compile_fx_subprocr  fx_compile_asynccompile_fx_asyncr   r!  r   fx_compile_progressiver"  r   rw  )r   r_  r]  r  schemer  r  r   r!  r"  progression_configsfast_schemes               r   r  r  r  s    -...$&&	M3	3	3888888%%''	M4	4	4<<<<<<%%'' )555555::::::&"899 	
 	
E	
 	
9 !(( Q;;;;;;::::::&"899 	
 	
K	
 	
9 788 *++&&{F<OPP%%b./<XXXr   inputsr  c                   g }t          |           D ]\  }}t          |t          j                  s t	          |j        j                  s:t                      5  ||v rt          |          r	 ddd           it          |          s	 ddd           	 ddd           n# 1 swxY w Y   |
                    |           |S )z
    This function runs at compile time, and generates a list of indices for which we
    might need to do a copy to preserve alignment requirements.
    N)rB  r   r   r   rF   r  r  ro   rI   rG   r   )r,  r  ids_to_checkr  rd  s        r   r<  r<    sR    Lf%%  5%.. 	el'(( 	022 	 	 %%%*;E*B*B%		 	 	 	 	 	 	
 /u55 	 	 	 	 	 	 	
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	As   B$;B$$B(	+B(	r   )r  placeholdersmutated_input_idxsmodelCallable[..., Any]device_indexstack_traceslist[Optional[str]]r  r  tuple[torch.Tensor, ...]r/  Sequence[PlaceholderInfo]r0  tuple[int, ...]c                    ddl m}	 t          j        j        r?t          j        |	|||||||t          j        j	        
                                	  	        nt          d d	 fd}
|
S )
Nr   )cudagraphify_impl)r3  r4  r  rq  r  r/  r0  r  
new_inputsr`  r~   r   c                    8t          j                    5   |           d d d            n# 1 swxY w Y    |           S r   )r   r  )r;  r  cudagraphify_fnr1  r  s    r   r  zcudagraphify.<locals>.run  s    022 T T-oeZARSST T T T T T T T T T T T T T T{:&&&s   155)r;  r`  r~   r   )torch._inductor.cudagraph_treesr:  r   r@  cudagraph_trees	functoolspartialr   r   r5  r6  )r1  r  r3  r4  r  rq  r  r/  r0  new_cudagraphify_implr  r  r=  s   ``         @@r   cudagraphifyrC    s         
 }$ ,#+!%%#%%1}3FFHH

 

 

 ,K' ' ' ' ' ' ' ' ' Jr   r   torch.Tensorc                    t          j        |                                 |                                 | j        | j                  S )z1
    Copy and input while preserving strides
    )r'  r  )r   empty_stridedsizer   r'  r  )r   s    r   static_inputrH    s2     qvvxx1718TTTTr   dstsrcexpanded_dimsc                p    t          | |          } t          ||          }|                     |           dS )z=Index into expanded dimensions of both dst and src then copy_N)r>   copy_)rI  rJ  rK  s      r   index_expanded_dims_and_copy_rN    s5     c=
1
1C
c=
1
1CIIcNNNNNr   list[torch.Tensor] Callable[[list[InputType]], Any]c                   	
 t          |          }t          t          |                    t          ||           t	          |t
                    sJ fdt          |          D             fdt          |          D             t          t          |                    D ]=\  }\  }}t	          |t          j	                  r|vrt          |         ||           >t          j                                         t          j                                        }|                    t          j                                                   t          j                            |          5   | t                               ddd           n# 1 swxY w Y   |                                 t          j                                                            |           t          j                                         t          j                                        
t          j                            
|d          5   | t                              ddd           n# 1 swxY w Y   t	          t
          t&          f          sft(          j        rd
fd
}n2fdt-          t/                              D             	d	
fd}t1          ||t                                S )zQ
    Assumes inputs[static_input_idxs[i]] are always the same memory address
    c                @    g | ]\  }}|vrt          |          ng S r   )r=   r   r   r   r  s      r   r   z%cudagraphify_impl.<locals>.<listcomp>  sF       C !$+< < <!"  r   c                    g | ]H\  }}t          |t          j                  s|n&|vrt          |          n|                                IS r   )r   r   r   rH  detachrS  s      r   r   z%cudagraphify_impl.<locals>.<listcomp>  sj     	 	 	 C a..AA +++ a	 	 	r   Nthread_local)streamcapture_error_moder;  list[InputType]r~   rP  c                   t                    t          |           k    sJ t          t          |                     D ]\  }\  }}}t          |t          j                  s$t          |t          j                  sJ |v r-|                                |                                k    sJ qt          |||           |                                  	                                 	S r   )
r   rB  zipr   r   r   data_ptrrN  r  replay)
r;  r   rI  rJ  rK  r   inps_expanded_dimsr  static_inputsstatic_outputss
        r   r  zcudagraphify_impl.<locals>.run=  s    }%%Z88882;M:/ABB3 3 K K..c3 "#u|44 !#u|44444+++<<>>S\\^^;;;;;
 2#sMJJJJLLNNN!!r   c                    g | ]}|v|	S r   r   )r   r   r  s     r   r   z%cudagraphify_impl.<locals>.<listcomp>Q  s+     
 
 
CT8T8TC8T8T8Tr   c                    D ]E}|         }| |         }t          |t          j                  sJ t          |         ||           F|                                                                   S r   )r   r   r   rN  r  r]  )	r;  r   rK  rJ  copy_indicesr   r^  r_  r`  s	       r   r  zcudagraphify_impl.<locals>.runU  s~    # V V 23 7 o!#u|44444-mC.@#}UUUULLNNN!!r   )r;  rY  r~   rP  )r<  rR   rq   rl   r   r   rB  r[  r   r   rN  r  synchronizeStreamwait_streamcurrent_streamrW  	CUDAGraphr   r>  r   size_assertsr   r   rj   )r1  r,  r  check_input_idxsr   r   rK  rW  r  rc  r   r^  r_  r`  s     `      @@@@@r   r:  r:    s    /v7HII)3#F,=>>* * 6#3444fd#####   ''  	 	 	 	  ''	 	 	M $-S9K-L-L#M#M P Paa&& 	P36G+G+G)-*<aOOO 
JZ  F
uz0022333			6	"	" # #d=!!"""# # # # # # # # # # # # # # #
	J++F333	J J  ""E			%>		R	R 4 4tM22334 4 4 4 4 4 4 4 4 4 4 4 4 4 4ntUm44 +(* ""	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"(
 
 
 
 ]!3!344
 
 
	" 	" 	" 	" 	" 	" 	" 	" 	" 	" (-=z||LLLs$   8FF!$F!I00I47I4model_example_inputs_rY  inner_compileOptional[dict[str, Any]]2Union[list[Union[str, Weights]], str, GraphModule]c                   t          | t                    s
J |             t          |            t          j        |pi           }|                    dd          st          j        sd|d<   |                    dt          j        j	                  }|r |
                    d          r
J d            ni |dt          | j                  i}dd	lm}  ||          }|                    d
d           }| j                            dd           }t"          j                            |          }t)          j        d          5  t"          j                            |          5  t/          ddd          5  t1                      5  t3          | |t5          j        ||          |          }	t          |	t8                    sJ |	j        cd d d            cd d d            cd d d            cd d d            S # 1 swxY w Y   d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nr  FTr  zaot_inductor.output_pathz.pt2a
  The output path for aot_compile should not have an extension with .pt2 this is for specifying the output path for the .so in AOTInductor. If you would like to package the AOTInductor generated files into a pt2, please call `torch._inductor.aoti_compile_and_package`.rZ   )maybe_aoti_standalone_configr  dynamo_compile_idcompile_fx_aot)re  reset_event_log_on_exit)r  )rm  r  )r   rM   r+   r  deepcopyr   r   r  r  output_pathr[  r0   coder   rq  r   r   r   r   r5  rs   set_aot_compilationcompile_contextr"   r(   
compile_fxr@  rA  r:   filename)
rk  rl  rm  r  rv  rq  r  saved_compile_idsaved_compile_contextcompiled_artifactss
             r   rs  rs  b  s    fk**22F22* &f---%)]>3GR%H%HN|U33 -v7H -(,}% $$"F$7$C K  
''// 	
 	
R	
 	
/ 	


&	&+(>(>
 

 43333311.AAN+//0H$OO{':DAA!M889IJJ	d##+ +%%&;<<+ + 	"&$(	
 	
 	
+ + 	+ + (#+'=   *
 
 
 ,l;;;;;!*-+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +s    I "H(5HAG:		HH(!I :G>>HG>HH(HH(HH(I (H,	,I /H,	0I  IIaot_autograd_modelaot_example_inputsdynamo_modelnum_example_inputsr  rA   r  forward_devicer3   0Callable[[list[object]], Sequence[torch.Tensor]]c                   ddl m}m}	 t          |            t	          j        | d          }
|
rt          | d            ||             |	||           \  }fdD             t                    }|j        j	        ^ }}|j
        d         }d t          |          D             |j        d<   g }t          j        j                                        }dgd|2|j        J |j        }t%          dt'          |          dz
            t)          t*                               }|j        }|J d}t'          |          dk    rg t/          t'          |                    D ]\}|vr&d ||<   |dk    r||         ||dz
           k    r|dz  }n|                    ||                                        |           ]|j        J t/          t'          |j                            D ]}||vr
d |j        |<   |j        r|j        j        }t:          j                            |d	d          5   |||||d||

          d d d            n# 1 swxY w Y   t@          j!        rS dfd}d|_"        |S )Nr   )%convert_conv_weights_to_channels_lastfreezeTr  c                     g | ]
}|         S r   r   )r   indr  s     r   r   z(fw_compiler_freezing.<locals>.<listcomp>  s    SSSc,S1SSSr   c                V    g | ]&\  }}t          |t          j        j                  $|'S r   r   r   r   r   r   r   r  s      r   r   z(fw_compiler_freezing.<locals>.<listcomp>  s?     ; ; ;QjEHM6R6R;; ; ;r   r   rZ   r  )r  r  r  rq  r  r  r   list[object]r~   Sequence[torch.Tensor]c                d      fdD             }                                    |          S )Nc           	     N    g | ]!}|t          |                   z
           "S r   )min)r   r  r   max_offset_idxunwrapped_args_offsetss     r   r   z9fw_compiler_freezing.<locals>.wrapper.<locals>.<listcomp>  sC     
 
 
 +C>,B,BCCD
 
 
r   )r  )r   args_newr  optimized_functionpreserved_arg_indicesr  s   ` r   wrapperz%fw_compiler_freezing.<locals>.wrapper   sV    
 
 
 
 
 
*
 
 
 	

!!(+++r   )r   r  r~   r  )#torch._inductor.freezingr  r  rn  re   decide_layout_optr  r%   r   r  r   rB  r   r   r   r   r   params_flat_unwrap_subclassesr  r   rR   r   params_unwrapped_to_flat_indexr   r)  r   params_flatr   r   r   r  r   rs   r,  _boxed_call)r  r  r  r  rm  r  r  r  r  r  r  	opt_modelr  r  model_outputs_nodemodel_outputsr  tracing_contextparams_flat_unwrappreserved_indices_params_flatunwrapped_idxscurrent_offsetr  r  r  r  r  r  s    `                      @@@@r   fw_compiler_freezingr    s    WVVVVVVV ""455501CRVWWWJ B+-?FFF--.@AAA'-v( ($I$ TSSS=RSSS !344I '_2Q&+A.M; ;#M22; ; ;67 $&m2::<<OSN"<HHH,JQ$6 7 7! ;<<(23(9(9%(G)))!""Q&&%'"s-..// 	: 	:A---(,"1%q55^A..Q2GGG"a'N-11.2CDDD")).9999 *666s?67788 	6 	6A55515+A.& 	Q / ; P			9&=t	D	D 

 

*]/!'5!	
 	
 	


 

 

 

 

 

 

 

 

 

 

 

 

 

 

 	 "!!, , , , , , , , , GNs   ;II"Idict[str, object]c                     t           j        j        rt          t	          d                     t           j        j        t           j        j        nt                      ddddS )Nzcpp wrapper enabledFT)ztriton.autotune_at_compile_timeztriton.autotune_cublasLtztriton.cudagraphsztriton.store_cubin)r   r@  r  r5   r4   autotune_at_compile_timerY   r   r   r   get_cpp_wrapper_configr    sf    } 
+'(=>>	
 	
 	
 }5A M22$)""
 
 
r   torch.fx.GraphModuleAbstractContextManager[None]c                n   t           j                                        st          j                    S t          d t          |           D                       }t          |          dk    r9t           j                            t          t          |                              nt          j                    S )zX
    Returns a cuda device context manager if there is a single device in the graph
    c              3  0   K   | ]}|j         d k    |V  dS )r  N)r  r  s     r   r   z*get_cuda_device_context.<locals>.<genexpr>'  s7       8 8FK64I4I4I4I4I4I8 8r   rZ   )r   r  r  r  r  rR   rD   r   r  r=  r>  )r   cuda_devicess     r   r  r     s     :""$$ (%'''-7 8 8,R008 8 8 . .L |!! 	
$tL1122333#%%r   joint_inputsSequence[object]tuple[GraphModule, GraphModule]c                2   t          |           }|5  t          | d           d d d            n# 1 swxY w Y   |                    dd           }t          j        At          j        dd          5  t          | |fd|d|cd d d            S # 1 swxY w Y   d S t          t          j        t                    sJ t          j        t          j        j
        j        d          5  t          j        | |fd|d|cd d d            S # 1 swxY w Y   d S )NT)rT  static_lifetime_input_indicesr   r  r  )compilerr  )r  rn  r   r   custom_partitioner_fnr   r&   r   r   r7   	__class__r   )r   r  r   r  r  s        r   partition_fnr  2  sF   
 +2..L	 E E 	&btDDDD	E E E E E E E E E E E E E E E :@': :! #+&1
 
 
 		 		 7 $.K	 
  		 		 		 		 		 		 		 		 		 		 		 		 		 		 		 		 		 		 &68KLLLLL&(2;"&
 
 
 
	 
	 / $.K	 
  	
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	s0   0443BBB)DDDc                d    t          |           }t          j        |j         }t	          |          S r   )rp   rC  arg_tree_leavesr   r   )r1  r  r  s      r   get_num_model_outputsr  \  s/    $U++*,>,CDM}r   )frozenc                  .    e Zd ZU ded<   ded<   ded<   dS )CompilerConfigExtrarA   r  r   r  r3   r  Nr   r   r   r   r  r  b  s3         MMM$$$$$$r   r  r   types.ModuleTypec                    t          | j        j                  }t          t                    }t          d           }t          |||          S )N)r  r  r  )rA   r@  r  r=  _graph_counterr3   r  )r   r  r  r  s       r   create_compiler_config_extrar  i  sT    
 6=344J N##H &d++N%   r   num_orig_model_outputscompiler_config_extraCallable[..., OutputCode]c           	         |r;t          dd  fd           t                      t          dd  fd           t          j        j                            |t          |                    }t                     }t          j	        rt          j        |j         t                    }	t          j        j                                        }
|
|
j        r|s|
j        j        }nd}||	k    sJ ||z   }||	k    sJ fd	t%          ||          D             |j        d
<   n
g |j        d
<   t)                       | |t+          |          |j        |j        ||j                  S )a#  
    Compile the forward graph of the given graph module.

    Args:
        gm: The graph module to compile.
        example_inputs: The example inputs to use for compilation.
        num_orig_model_outputs: The number of model outputs from the original dynamo graph.
        num_example_inputs: The number of example inputs from the original dynamo graph.
        compiler_config_extra: Extra configuration for the compiler.
        inner_compile: The inner compile function to use.
        is_inference: Whether this is an inference graph.
    r  c                     dddS )Nbefore_joint_graphr  r  r   r   r   r   r  z$compile_fx_forward.<locals>.<lambda>  s    ,$! ! r   c                 4                          ddd          S r  r  r  s   r   r  z$compile_fx_forward.<locals>.<lambda>  #    r00"4  1     r   r  c                     dddS )Nafter_joint_graphr  r  r   r   r   r   r  z$compile_fx_forward.<locals>.<lambda>  s    +$! ! r   c                 4                          ddd          S r  r  r  s   r   r  z$compile_fx_forward.<locals>.<lambda>  r  r   Nr   c                ^    g | ])}t          |         t          j        j                  '|*S r   r  )r   r   r  s     r   r   z&compile_fx_forward.<locals>.<listcomp>  sB     ?
 ?
 ?
-,ehm<<?
?
 ?
 ?
r   r   )r  r  r  rq  r  )rK   rn  r   r-  r   num_fw_fixed_argumentsr   rp   r   keep_output_striderC  r  r   r   r   r   r   num_mutated_inp_runtime_indicesr   r   r   r   r  r  r  )r   r_  r  r  r  rm  rq  r   r  num_model_outputsr   original_output_start_indexorig_output_end_idxr  s   `            @r   compile_fx_forwardr    s   ,  
    		
 		
 		
 		
 	&b)))    		
 		
 		
 		
 O!88C// E %R  'A.0B0GH..-.66887#6|#C (' +,'%)::::: :<RR #&77777?
 ?
 ?
 ?
8:MNN?
 ?
 ?
 :;; ?A :;
 /r222=
/66(3&/!#8#G   r   c                X   ddl m} |5  t          |           }t          j        r5t          j        |j         }d t          |          D             |j	        d<   n
g |j	        d<   t          |           }t          j        r t          j        t                                nt          j                    5   || |t!          t#          |                    |j        d|j        |j                  cddd           cddd           S # 1 swxY w Y   	 ddd           dS # 1 swxY w Y   dS )a5  
    Compile the backward graph of the given graph module.

    Args:
        gm: The graph module to compile.
        example_inputs: The example inputs to use for compilation.
        compiler_config_extra: Extra configuration for the compiler.
        inner_compile: The inner compile function to use.
    r   )compile_lockc                V    g | ]&\  }}t          |t          j        j                  $|'S r   r  r  s      r   r   z'compile_fx_backward.<locals>.<listcomp>	  sG     C C CCa//CC C Cr   r   T)r  r  r  r  r  N)torch._dynamo.convert_framer  rp   r   bw_outputs_user_visiblerC  r  r   rB  r   rB   r  r  r  r  r  r   r   r  r  r  )r   r_  r  rm  r  r  r  r   s           r   compile_fx_backwardr    s    988888	  (__) 	E"24F4KLMC C'66C C C#$>?? CE#$>?r"" !*FL/11222'))	 	
 !="&uU||"4"40; .7+@+O  	 	 	 	 	 	 	       	 	 	 	 	 	 	 	 	                 s6   B)D2;D-DD
	
DD
	DD#&D#c           
         t          dd  fd           t                              dt          d ddd                     t	           j                  t          j        j        _        t          j
        j        d	k    r9 j        j        D ],}|j        r#|j        t          j        j        j        |j        <   -t!           |           t          dd
  fd            S )Nr  c                     dddS )Nbefore_pre_grad_graphr  r  r   r   r   r   r  z%run_pre_grad_passes.<locals>.<lambda>#	  s    + 
 
 r   c                 d                          ddd          dt           j                   z   S NFTr  z

 # graph id: r  idr   rk  s   r   r  z%run_pre_grad_passes.<locals>.<lambda>'	  ?    600tD 1 
 
 1b..
0
01 r   r  r  zBEFORE PRE GRADTr  rZ   c                     dddS )Nafter_pre_grad_graphr  r  r   r   r   r   r  z%run_pre_grad_passes.<locals>.<lambda>B	  s    * 
 
 r   c                 d                          ddd          dt           j                   z   S r  r  r  s   r   r  z%run_pre_grad_passes.<locals>.<lambda>F	  r  r   )rK   pre_grad_graphs_logr;  r)   r  r   r   r-  r  r   r  r  r  r  #_inductor_pre_grad_node_stack_tracer   rb  )rk  rl  r   s   `  r   run_pre_grad_passesr  	  s.   
 
 
1 1 1 1
 
 
 
 	
 	
 	
	 	 	 02&,/?/?EO,|-22L& 	 	D $ %I$)T (@@F
 
1 1 1 1
 
 
 
 Mr   decompositions.Optional[dict[OpOverload, Callable[..., Any]]]ignore_shape_envPUnion[Callable[[list[object]], Sequence[torch.Tensor]], str, list[str], Weights]c                0   "# t          d |D                       r(t          j        j        j                                         |r[t          j        |          5  t           | t          j        |                    ||          cddd           S # 1 swxY w Y   t          j	        st          j
        rt          j	        }t          j
        }t          j        dddt                                5  t          j        |          5  |}t           t                    rd  j        j        D             }	d |	D             }	t          d |	D                       rwt%          t'                      |	|          D ]W\  }
}}|Ot          |t          j                  sJ |j        |j        k    r#t-          d	|
 d
|j         d|j         d          X|	}ddlm} t3          |          } | |i |          5 \  }}}}}t          ||t5          j        ||          ||          cddd           cddd           cddd           S # 1 swxY w Y   	 ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   t5          j        t          ||          }t9                     st;           ||          S t           t                    r0t           j        j        t>                    rtA           ||          S tC          tD          j#                  5  tI                      5  t          j%        j&        '                    t          j(        j)        dk              5  t          j        j*        +                                5  t           t                    rtY           |           t          d |D                       rAt[           ||          cddd           cddd           cddd           cddd           S t          j.        rJ t_          |          #ta          t                    "||ntc                      }d+" #fd}t5          j        |d          }te          tf          |          }t          j4        rCt          j5                    s0t5          j        tl           #"j7        "j8        "j9                  }n+t5          j        |d          }te          tf          |          }tu          d          d,"fd            }te          tf          |          }t3          |          pt          j;        <                    d           }t          j=        j>        ?                                pt          j=        >                    |          }t          j@        rdd!lAmB}  |             t          j        d"          5  t           |d|#          \  }}dd$lEmF}  ||          }|j        j        D ]}|jG        d%k    rd&|jH        vr t          |jJ                  |          }t          |t          j                  r$|J |K                    |d'          |jH        d&<   qt          |t          jL                  r.t          jM        jN        O                    ||          |jH        d&<   t          |t                    r
||jH        d&<   	 ddd           n# 1 swxY w Y   t           ||          }d( jH        v r jH        d(         |jH        d(<   d) jH        v r jH        d)         |jH        d)<   t          jR        S                                }|rt          jR        jT        nt          jV        } t          jW        |          5  t          jY                    5   |             5   |||          cddd           cddd           cddd           cddd           cddd           cddd           cddd           S # 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   t          jW        |          5  t          j=        Z                    |          5  t          jY                    5  t          j        d"          5  	  t          ||||t          d"j7        "j9        |*	  	         |          cddd           cddd           cddd           cddd           cddd           cddd           cddd           cddd           S # t          $ r}!|!^                                dd}!~!ww xY w# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )-a@  
    Main entry point for compiling given FX graph.  Despite the fact that this
    lives in :mod:`torch._inductor`, this function is responsible for calling
    into AOT Autograd (and we will eventually get a callback to
    ``inner_compile`` to perform actual compilation.  In other words, this
    function orchestrates end-to-end compilation for the inductor backend when
    you use :func:`torch.compile`.

    NB: This function TAKES OWNERSHIP of the input ``model_`` and can potentially
    mutate it!  Make a copy if you need to preserve the original GraphModule.
    c              3  f   K   | ],}t          |t          j                  o|j        j        d v V  -dS ))r  xpuN)r   r   r   r  r  )r   rk  s     r   r   zcompile_fx.<locals>.<genexpr>b	  sP         	1el##H(H     r   )rm  r  r  NF)r  r  c                X    g | ]'}|j         d k    |j                            d          (S )r5  r   )r   r   r   )r   r   s     r   r   zcompile_fx.<locals>.<listcomp>	  s=       w-// IMM%((///r   c                J    g | ] }t          |t          j                  r|nd !S r   r   r   r   )r   inps     r   r   zcompile_fx.<locals>.<listcomp>	  s=        &c5<88BCCd  r   c              3     K   | ]}|d uV  	d S r   r   )r   vs     r   r   zcompile_fx.<locals>.<genexpr>	  s&      ::q}::::::r   zBDevice mismatch between fake input and example input at position #z: z vs zx. If the model was exported via torch.export(), make sure torch.export() and torch.aot_compile() run on the same device.r   )_fakify_script_objectsrZ   c              3  Z   K   | ]&}t          |t          t          t          f          V  'd S r   )r   r   r>  r  r   s     r   r   zcompile_fx.<locals>.<genexpr>	  s3      KKaz!dE4011KKKKKKr   r   rM   r_  r`  rq  r   r~   r?   c           
         t          j        d          5  t          t                    rt	                    }nt	          |           }t          | |||          cd d d            S # 1 swxY w Y   d S )Nz$compile_fx.<locals>.fw_compiler_base)r  r  r  rm  rq  )r   r&   r   rM   r  r  )r   r_  rq  r  r  rm  rk  r  s       r   fw_compiler_basez$compile_fx.<locals>.fw_compiler_base	  s    
 *+QRR  fk22 G-B6-J-J**-B2-F-F*)"+A'9*?"/!-                   s   A
A--A14A1r  )r  r  rm  r  r  r  Tbackward)r  c                    t          j        d          5  t          | |          cd d d            S # 1 swxY w Y   d S )Nzcompile_fx.<locals>.bw_compiler)r  rm  )r   r&   r  )r   r_  r  rm  s     r   bw_compilerzcompile_fx.<locals>.bw_compiler
  s    
 )*KLL  +"*?"/	                   s   6::r  )is_valid_aoti_model_name)unlift_effect_tokens)trace_jointr  )_detect_fake_mode_from_gmr  r   )static_shapes dynamo_flat_name_to_original_fqnrr  )	fw_compilerr  inference_compilerr  r  keep_inference_input_mutationsr  r  r  )r   rM   r_  r`  rq  r   r~   r?   )r   rM   r_  r`  r~   r?   )_r  r   r-  r.  AsyncCompilewakeupr   r  rz  r  r  r  rs   set_real_inputsr   rM   r   r  r[  r
   r   r  
ValueErrortorch._export.non_strict_utilsr  r%   r@  rA  graph_returns_tuplemake_graph_return_tuple_codegenrX   handle_dynamo_export_graphrW   r  r  r   r   r  preserve_node_metar  r  r;  reset_provenance_globalsr  r'   _raise_error_for_testingr   r  r_   r/   r?   freezingis_grad_enabledr  r  r  r  rL   r  r  r   r   r   r,  r   r  functorch_configr,   torch._export.utilsr  r   r   r   r   from_tensorScriptObject_libraryfake_class_registrymaybe_to_fake_objrJ   rS  _C_is_any_autocast_enabled_DisableAutocastr  r  r  r   _disabletracingrT   r  rU   remove_dynamo_frames)$rk  rl  rm  r  r  r  cpp_wrapper_configfx_wrapper_configinputs_fake_inputsr   fir  r  r  patched_mod	fake_argsr  recursive_compile_fxr  r  r  r  r  r  r   r0  r  r   r   rR  disable_ampr   rk  r  r  s$   ` `                               @@r   rz  rz  N	  s   (         < 	%299;;;
  	\.)) 	 	:fl>::=II-!1  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	  =V. =#/"- L#("'  -.. 9	 9	 o..9	 9	 ,;G&+.. *  & 2   *  
 ::k::::: *&)%'';&H&H " "
R>#-a#>#>>>#>!yAH44&0%oil %o %o')y%o %o67h%o %o %o'" '" !"
 *GMMMMMM(11I''YGG  L!"+"3%$6#4# # #
 $2%5
 
 
      Q9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	P        Q9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	 9	v %,#%)	   v&& 
& 
 
 	
 &+&& 
:~, , 
 * 
 
 	
 	}BCCw9 w9 ""w9 w9 	--L2a7	
 	
w9 w9 	6688w9 w9 fk** 	B(AAF
 KK?KKKKK 	'$ )w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w94 2222 11 <V D D -8NN>Q>S>S 		 	 	 	 	 	 	 	 	* .UCCC 	 6j+NN? 	5#8#:#: 	5>5F$##5+0;.74C6 6 6 "+!23CRV!W!W!W!@." " 
'*	=	=	=	 	 	 	 	 	 
>	=	 6j+NN$
 
 J--D-II 	 M(0022 7}++I66 	
  ;	H777777$$&&&!'TBBB  6  6&7# %#1	' ' '#O JIIIII55b99	 HN 6 6Dw*,,di1G1G!8DK!8!8!<!<%fel;; 6#,#8#8#8/8/D/D &d 0E 0 0DIe,, (0BCC 6 % B T T$-v!" !" !Ie,,
 (0@AA 6/5DIe,6# 6  6  6  6  6  6  6  6  6  6  6  6  6  6  6D (ODDK1V[@@GM{6H !CD #fk118>DW8X !45  (;;==K-8T))j>T  ++ H H->-G-I-I H H7799 H H))+GGH H H H H H H H H H H H H H H H H H H H Hw9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9~H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H H OI&&	9 	9M!!/22	9 	9 &((	9 	9 "===		9 	99
| + +'9#1!-374?/D/S%5
 
 
 /
+ 
+	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9Ew9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9h $ 9 9 9 ,,..D89%	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9Ew9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9 w9s  /BBB9I>C-I';/I*I'6I>II'II'I>'I+	+I>.I+	/I>>JJ$h38g3+$gAg	g*g36hGg	D[/#g	/[33g	6[37B!g	a,`/7``/ag	'g3g3?h``/` `/#a/`33a6`37a:g	a

g	a
g	% f.ff /e)10e!f -f9f.g	gg3)h
e&e!!e&&e))e--f 0e-1f 4f fffff.ff.ff."g	.f22g	5f26g	9gg		gg	gg3g  g3#g $g3'h3g7	7h:g7	;hhhc                   t          | t                    sdS t          |           j        \  }t          |t          t
          f          rdS t          |t          j        j        j	                  rat          |j        d          rLt          |j        j        j                  dk    r*t          d |j        j        j        D                       rdS dS )z"True if a FX graph returns a tupleT_schemarZ   c              3  F   K   | ]}t          |j                  d k    V  dS )r   N)r}   r  )r   rets     r   r   z&graph_returns_tuple.<locals>.<genexpr>
  s/      OOcCH)OOOOOOr   F)r   rM   rp   r   r   r>  r   r   r   r   r$  r   r   r-  returnsrC  )r   rvs     r   r  r  
  s    b+&& tOO ER"tUm$$ t2ux})**BIy)) 	!)**Q..OORY5F5NOOOOO / t5r   
compile_gmc                   t          |           }|j        \  }t          j        |          \  }| j                            |          5  | j                            |           ddd           n# 1 swxY w Y   | j                            |           t          |           sJ  || |          t          j
                  dfd            }|S )z
    Mutate gm so it returns a tuple.  This is only needed for graphs
    not created by torchdynamo that return non-tuples.
    Nr   r   r   r~   c                 :    t          j         | i |          S r   )rC  tree_unflatten)r   r   r  specs     r   r  z(make_graph_return_tuple.<locals>.wrapper
  s%    $[[$%A&%A%A4HHHr   )r   r   r   r   r~   r   )rp   r   rC  tree_flattenr   inserting_beforer   r  r  r@  wraps)r   r,  r2  r   r1  r  r  r6  s         @@r   r  r  
  s.    r??DIER"2&&HB		"	"4	(	(  
              Hr"""""*R((K_[!!I I I I I I "!I Ns   A33A7:A7c                   | j         j        t          j        j                                         | j         _        |                                   ||  j        |           t          j                  dfd            }|S )z
    `torch._dynamo.export` embeds pytrees in the FX graph codegen object,
    convert that to a normal FX graph so inductor can compile it.
    r   r   r~   c                 D                           j        |             S r   )process_outputsprocess_inputs)r   r  r  s    r   r  z+handle_dynamo_export_graph.<locals>.wrapper
  s)    &&{{4JG4JD4Q'RSSSr   )r   r   r~   r   )	r   r  r   r   CodeGenr  r=  r@  r9  )r   r,  r2  r  r  r  s       @@r   r  r  
  s     hG..00BHLLNNN*R!7!7!@AAK_[!!T T T T T T "!T Nr   r   re   c                   dd}t          j        | j                                        | j                  D ]}t          |t                    st          |          }|r1t          |          r"|	                                t          j        k    r[t          |          }|                    d          r d S  ||                                           d S )	Nr  Optional[torch.device]r~   r   c                    ddl m} | J t          | j                  }|                    |           }t          j        |j         d            |d          )Nr   )rV   z9 does not support bfloat16 compilation natively, skippingzBF16 is not supported)torch._dynamo.excrV   r    r  get_device_propertiesr  r  r   )r  rV   device_interfacedevice_propss       r   warn_and_skipz1_check_triton_bf16_support.<locals>.warn_and_skip
  sy    //////!!!3FK@@'==fEE [[[	
 	
 	
 i/000r   F)including_emulation)r  r@  r~   r   )	itertoolschaingraph_inputsr  r  r   rg   rf   rF   	get_dtyper   bfloat16r    is_bf16_supported
get_device)r   rF  r   r  rD  s        r   r  r  
  s    
1 
1 
1 
1  2 9 9 ; ;U=PQQ ) )$'' 	%d++	+&&	 ~~5>11 4K@@--%-HH 	FFdoo''(((() )r   )options!Union[list[Any], tuple[Any, ...]]rO   tuple[list[Any], dict[str, Any]]c               h   ddl m}  ||           s
J d            d}d}t          | j        j        t
          j        j        j                  r~| j        j        }t
          j        j                                        | j        _        | 	                                 |j
        j        |j
        j        }|j
        j        |j
        j        }n.t          | d          r| j        }t          | d          r| j        }|t!          j        |          nd}|t!          j        |          nd}	t!          j        ||pi f          \  }
}t'          d |
D                       rd	d
lm}m}  ||j        d          d |
D             }|||k    rt1          d| d|           |||	dni |||	d}||fS )z
    Flatten the inputs to the graph module and return the flat inputs and options.
    Add "aot_inductor.serialized_in_spec" and "aot_inductor.serialized_out_spec" to the options.
    rZ   )r  zGraph output must be a tuple(). This is so that we can avoid pytree processing of the outputs. Please change the module to have tuple outputs.N_in_spec	_out_spec c              3  V   K   | ]$}t          |d          t          j                  V  %dS rZ   N)r   r   r  r   s     r   r   z'_aoti_flatten_inputs.<locals>.<genexpr>  s3      
M
MA:adE.//
M
M
M
M
M
Mr   r   )	UserErrorUserErrorTypezTorchBind objects found in inputs. TorchBind object inputs are not supported in AOTInductor. TorchBind objects can only be attributes.c                b    g | ],}t          |d          t          j                  r|d          nd-S rW  r  r   s     r   r   z(_aoti_flatten_inputs.<locals>.<listcomp>%  sB       =>
1Q4..8!D  r   z>Trying to flatten user inputs with exported input tree spec: 
z-
but actually got inputs with tree spec of: 
)zaot_inductor.serialized_in_specz aot_inductor.serialized_out_spec)rz  r  r   r   r  r   r   rX   r>  r  pytree_infoin_specout_specr$  rS  rT  rC  treespec_dumpstree_flatten_with_pathr  rB  rX  rY  INVALID_INPUTr  )r   r   r   rO  r  r\  r]  r  serialized_in_specserialized_out_specflat_args_with_pathreceived_specrX  rY  flat_example_inputss                  r   _aoti_flatten_inputsrf  
  sm    0/////r""  	 " GH"(#UX^%BCC $(#!HN2244
&2)1G'3*3H 2z"" 	"kG2{## 	$|H;B;N.w777TV+3+?h'''R  *0)F	v|* *& 
M
M9L
M
M
MMM 
>>>>>>>>i'8
 
 	
 BU   }77   
 
 	
 ? 0B0C	
 	
 	



/A0C
 
 
  ''r   )r|   r}   r~   r   )r   r   r   r   r~   r   )r~   r   )r~   r   )r   r   r~   r   )r   rM   r~   r   )r~   r   r  )r
  rM   r   rM   r~   r   )r
  rM   r   rM   r0  r   r~   rM   )F)r   rM   rT  r   r~   rU  )r   rM   r_  r`  r~   rM   )r   rM   rT  r   r~   r   )r   rM   rq  r   r~   r   )TNN)
r   rM   ru  r   rv  rw  rx  ry  r~   rz  )r   rM   r~   r   )r_  r`  r~   r  )r  r   r  r   r~   r  )r   rM   r_  r`  r  r   r~   r  r   )r  r  r~   r  )r~   r  r  )r   rM   r_  r`  r  r  r~   r?   )
r   rM   r_  r`  r]  r  r  r  r~   r?   )r,  r`  r  r  r~   r  )r   )r1  r2  r  r  r3  r   r4  r5  r  r   rq  r   r  r6  r/  r7  r0  r8  r~   r2  )r   rD  r~   rD  )rI  rD  rJ  rD  rK  r   r~   r   )r1  r2  r,  rO  r  r  r~   rP  )
rk  rM   rl  rY  rm  r  r  rn  r~   ro  )r  rM   r  r`  r  rM   r  r   rm  r2  r  rA   r  r   r  r3   r~   r  )r~   r  )r   r  r~   r  )r   rM   r  r  r   r   r~   r  )r1  rM   r~   r   )r   r  r~   r  )r   rM   r_  r`  r  r   r  r   r  r  rm  r  rq  r   r~   r?   )
r   rM   r_  r`  r  r  rm  r  r~   r?   )rk  rM   rl  r`  r~   rM   )rk  rM   rl  r`  rm  r  r  rn  r  r  r  r   r~   r  )r   rM   r,  r`  r2  r2  r~   r2  )r   re   r~   r   )
r   r  r   rP  r   rn  rO  rn  r~   rQ  (  
__future__r   r  r  enumr@  r  rH  r  r   r   r  rB  r  abcr   r   collectionsr   r   dataclassesr   inspectr	   r
   operatorr   typingr   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   unittestr   torch._inductor.async_compiler   torch.fxtorch.utils._pytreer   _pytreerC  functorch.compiler   r   torch._dispatch.pythonr   torch._dynamor   r   r  r   r   torch._dynamo.device_interfacer    torch._dynamo.repro.after_aotr!   r2  r"   r#   r$   r%   r&   r'   r(   r)   r*   torch._functorchr  7torch._functorch._aot_autograd.subclass_parametrizationr+   torch._functorch.aot_autogradr,   r-   r.   r/   torch._inductor.codecacher0   r1   r2   r  r3   r4   r5   r6   !torch._inductor.custom_graph_passr7   torch._inductor.debugr8   r9   torch._inductor.output_coder:   r;   r<   r=   r>   r?   'torch._inductor.runtime.cache_dir_utilsr@   torch._inductor.utilsrA   rB   rC   rD   rE   rF   rG   rH   rI   "torch._library.fake_class_registryrJ   torch._loggingrK   torch._utils_internalrL   rM   %torch.fx.experimental.symbolic_shapesrN   rO    torch.fx.passes.fake_tensor_proprP   torch.monitorrQ   torch.utils._ordered_setrR   _dynamo.backends.commonrT   _dynamo.excrU   rV   fx._lazy_graph_modulerW   fx.graphrX   utils._tritonrY   rU  r[   codegen.commonr\   r]   r;  r^   decompositionr_   excr`   fx_passes.joint_graphra   fx_passes.post_gradrb   rc   fx_passes.pre_gradrd   r   re   irrf   rg   output_coderh   triton_bundlerri   rj   rk   rl   rm   rn   ro   rp   rq   rr   virtualizedrs   collections.abcrt   ru   rv   
torch._opsrw   )torch.export.pt2_archive._package_weightsrx   ry   rz   r{   r  r   r   torch._inductor.fb.utilstypes&torch._functorch._aot_autograd.schemasr   r   r   Enumr   r   r   r   _fx_compile_configr   r#  r   r&  r   r(  r   r   r   _logginggetArtifactLoggerr  r  r  r:  r  r   r   r   r   	lru_cacher   cacher	  r/  rS  r^  rb  rn  rs  r  r  r  r  r  r  contextmanagerr  r  r  r  r  rv  r{  r  r  r<  rC  rH  rN  r:  rs  r  r  r  r  r  r  r  r  r  r  r  rz  r  r  r  r  rf  r   r   r   <module>r     sx   " " " " " "           				       				 



   # # # # # # # # # # # # # # - - - - - - ! ! ! ! ! !                         I I I I I I I I I I I I I I I I U U U U U U U U U U U U U U U U       $ $ $ $  $ $ $ $ $ $ $ $ $ A A A A A A       ; ; ; ; ; ;            D C C C C C = = = = = =
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 8 7 7 7 7 7                 O N N N N N N N N N            B A A A A A                       > = = = = =
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 @ ? ? ? ? ? + + + + + + ? ? ? ? ? ?             W W W W W W W W ; ; ; ; ; ; & & & & & & / / / / / / 2 2 2 2 2 2 5 5 5 5 5 5 5 5 : : : : : : % % % % % % & & & & & &         U U U U U U U U       . . . . . .       5 5 5 5 5 5 B B B B B B B B / / / / / /             ' ' ' ' ' ' ' ' I I I I I I ) ) ) ) ) )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
        %33333333::::::%%%%%%AAAAAA$$$$$$ Yt__WT]] 	L((** 	L% % % %     LKKKKKKK LLL             DI           C C C CD    .-// $)%/ +; g!!00<HHn66xARSS ~77BTUU N44'   ~77BTUU 
4 
4 
4 
4A A A A*' ' ' '= = = =  T/ / / / 	
 	
 	
 	
E/ E/ E/ E/PJ J J J\ 38! ! ! ! !0N N N N( 38    (	+ 	+ 	+ 	+ 	+ "15FJ	E( E( E( E( E(P   *( ( ( (.	( 	( 	( 	( ).    @ <@( ( ( ( (        y           +
 +
 +
 +
\ 2333E E E 43EP
B B B B B B B B# # # # # # # #4o o o o o) o o od-Y -Y -Y -Y`       J (*) +-.0*,) ) ) ) ) )XU U U U    (*\M \M \M \M \MD )9/3	?+ ?+ ?+ ?+ ?+D qc c c cL   &   $' ' ' 'T    $% % % % % % % %   : 0@k k k k kd 0@	+ + + + +\/ / / /j 0@/3EI"y9 y9 y9 y9 y9x	   $   4   ,) ) ) )D (,R(
 )-R( R( R( R( R( R( R( R(r   