
    `i;%                      U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZmZmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z" d dl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* d d	l+m,Z, d dl-Z-d d
l-m.Z.m/Z/m0Z0 d dl1m2c m3c m4Z5 d dl6m7c m8Z9 d dl:Z;d dl<Z;d dl=m8c m>Z? d dl@mAZA d dlBmCZC d dlDmEZE d dlFmGZG d dlHmIZI d dlJmKZKmLZLmMZMmNZNmOZO d dlPmQZQ d dlRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZm[Z[ d dl\m]Z] d dl^m_Z_ d dl`maZambZbmcZc d dldmeZe ddlfmgZgmhZh ddlimjZjmkZkmlZlmmZmmnZn ddlhmoZompZpmqZqmrZrmsZs ddltmuZu ddlvmwZwmxZxmyZymzZz ddl{m|Z| ddl}m~Z~mZ ddl8mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ dd lmZmZmZ e r+d d!lmZ d d"lRmZ d d#l\mZ dd$lmZ dd%lmZ dd&lmZ dd'l8mZ neZd(ed)<   	 d dlZej        Zd*Zn# e$ r dZd+ZY nw xY w e'd,          Z e!d-          Z e!d.          Z e!d/          Ze"ee.f         Zd(ed0<   e"eee.f         Zd(ed1<   e"e;j        j        e;j        j        f         Zd(ed2<    ej        e          Z ej        e	j        d34          Ze;j        j        Z e ej        d5d6                    Z e ej        d7d8                    Z	 e"ed9eed9f         d:d;eee"eeed9f         d9d:d;f                           f         Zd(ed<<   dzdAZ ej        d*B           G dC dD                      ZŐd{dHZƐd|dLZǐd}dPZȐd}dQZɐd~dWZg dXZg dYZ	 ddd^Z͐dd`Z	 dddaZedddd            Zedddf            Z	 dddiZАddmZѐddpZҐddrZӐddsZԐddwZՐdd|Z֐ddZאddZؐddZ G d d;          Z ed+B           G d d                      Ze G d deڦ                      ZܐddZe G d deܦ                      Ze G d deަ                      Z ed           ed           ed           ed           ed           ed          dZded<   	 dddZe G d deܦ                      Zd e/d           fddZeee.         ee.         gef         Zd(ed<    G d de          Z G d de          Z G d de          Ze G d deܦ                      Ze G d de                      Ze G d deܦ                      ZddZddZ	 	 	 	 	 dddÄZddĄZddƄZe G dǄ deڦ                      Ze G dɄ de                      Ze G d˄ de                      Ze G d̈́ de                      Ze G dτ de                      Ze G dф de                      Ze G dӄ de                      Ze G dՄ de                      Z G dׄ de          Ze G dل deڦ                      Ze G dۄ de                      Ze G d݄ de                      Zdd߄ZddZ G d d          Ze G d de                      Z G d de          Z  G d de          Z G d de          Z G d de          Z G d de           Ze G d de                      Z G d de          Z ed+B           G d deek                      Z ed+B           G d deeۦ                      Z G d de          Z	 G d de	          Z
 G d de	          Ze G d deڦ                      Ze G d d eڦ                      Z ed+B           G d de                      Z G d de          Z G d de          Ze"eeeeee"eeeef                  f         Z G d d          Z G d	 d
e          Z G d de          Z G d de          Z G d de          Z G d de          ZddZ ed+B           G d de                      Z G d de          Z G d de          Z ed+B           G d de                      Z ed+B           G d d e                      Z G d! d"e          Z  G d# d$e          Z! G d% d&e          Z" G d' d(e          Z# G d) d*e#          Z$ G d+ d,e#          Z% G d- d.e          Z& G d/ d0e          Z' G d1 d2e          Z( G d3 d4e          Z) G d5 d6e          Z* G d7 d8e*          Z+ G d9 d:e!          Z, G d; d<e          Z- G d= d>e          Z. G d? d@e          Z/ G dA dBe          Z0 G dC dDe          Z1 G dE dFe          Z2 ed+B           G dG dH                      Z3 G dI dJe!          Z4 ed+B           G dK dLe4                      Z5 G dM dNe4          Z6e G dO dPe                      Z7 G dQ dRe          Z8ej         G dS dTeڦ                      Z9 G dU d9e9          Z: G dV dWe9          Z; ed+B           G dX dYeڦ                      Z<dd[Z= ed+B           G d\ d]e                      Z> ed+B           G d^ d_e                      Z?ddcZ@ ed+B           G dd dee                      ZA G df dge4          ZB G dh dieڦ          ZCe G dj dkeC                      ZDe G dl dmeC                      ZE G dn doe4          ZF G dp dqeF          ZG G dr dseF          ZH G dt dueF          ZIddxZJddyZKdS (      )annotationsN)	Container	GeneratorIterableIteratorSequence)AbstractContextManagernullcontext)Enum)partial)AnyCallablecastClassVarLiteralOptionaloverloadSupportsFloatSupportsIntTYPE_CHECKINGTypeVarUnion)assert_neverNeveroverride	ParamSpecSelf	TypeAliasTypeIs)patch)ExprIntegerSymbol)identity)GraphModuleSerializer)can_auto_functionalize)metricsget_free_symbols)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)get_schema_info)	&_remove_effect_token_unbacked_bindingscompute_unbacked_bindingsfree_symbolsfree_unbacked_symbolsIterateExprsrebind_unbackedresolve_unbacked_bindingsShapeEnvSymTypes)Node
OrderedSet)CleanDivFloorDivModularIndexing)SymT   )configdependencies)BackendFeatureCodegenSymbolget_scheduling_for_deviceindex_prevent_reorderingKernel)Depextract_free_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResultReductionType	StoreMode)benchmarker)DevicePropertiesReductionHint)argsortargsort_symcache_on_selfcache_on_self_and_argsceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningdo_bench_using_profilingdtype_from_sizeget_dtype_sizeget_kernel_metadataGPU_ALIGN_BYTESir_dataclass
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_substensor_is_aligned)opsOpsValueV)FakeScriptObject)SympyBoolean)Argument)CUDATemplate)PythonWrapperCodegen)GraphLowering)IndentedBufferr   rq   TF_P_T_U_V_IntLike_NumLike_OpOverloadsz  prefixTORCH_AUTOTUNE_WARMUP   TORCH_AUTOTUNE_REPd   	TensorBoxr#   IRNode_NodeOrNodesxobjectreturnboolc                :    t          | t          t          f          S N)
isinstanceintr"   r   s    f/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torch/_inductor/ir.py
_is_staticr      s    a#w(((    )frozenc                  L    e Zd ZU ded<   ded<   ded<   ded<   d	ed
<   ded<   dS )GraphPartitionSignatureOrderedSet[sympy.Symbol]symbol_inputsz5dict[str, Union[IRNode, sympy.Expr, TorchBindObject]]input_nodeslist[IRNode]output_nodeszdict[str, bool]input_deallocationr   skip_cudagraph	list[str]constant_namesN__name__
__module____qualname____annotations__ r   r   r   r      sc          ,+++ GFFF (''' r   r   node_or_nodesOptional[_NodeOrNodes]Nonec                *    dfd |            d S )Nnodesr   r   r   c                   | d S t          | t          t          f          r| D ]} |           d S t          | t                    r$|                                 D ]} |           d S t          | t
          t          t          t          t          j
        j        j        t          t          t          t           f	          sJ dt#          |            d            d S )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])r   listtupledictvalues
ExpandViewDynamicScalarAssertScalarr   sympylogicboolalgBooleanr!   r   EffectfulKernelShapeAsConstantBuffertype)r   node_check_tensorboxs     r   r   z%validate_ir.<locals>._check_tensorbox   s    =De}-- 	 ' '  &&&&' 't$$ 	 ' '  &&&&' ' ! K'/#)
    lekkk    r   )r   r   r   r   r   )r   r   s    @r   validate_irr      s:         < ]#####r   namestrCallable[..., OpsValue]c                l     t           t                    sJ t                                 d fd}|S )Nargsr   kwargsr   rl   c                 :     t          t                    | i |S r   )getattrrk   )r   r   r   s     r   fnzops_wrapper.<locals>.fn  s"    !wsD!!426222r   )r   r   r   r   r   rl   )r   r   r   )r   r   s   ` r   ops_wrapperr     sL    dC  ,,$t**,, 3 3 3 3 3 3 Ir   orderSequence[int]&Callable[[Sequence[_T]], Sequence[_T]]c           
         t          t          | t          t          |                                         dfd}|S )NindexSequence[_T]r   c                     t                     t                    k    sJ  fdt          t                               D             S )Nc                ,    g | ]}|                  S r   r   ).0ir   	inv_orders     r   
<listcomp>z4inverse_reorder.<locals>.reindex.<locals>.<listcomp>"  s"    ???il#???r   lenrange)r   r   s   `r   reindexz inverse_reorder.<locals>.reindex   sL    5zzS^^++++?????U3u::->->????r   r   r   r   r   )r   zipr   r   )r   r   r   s     @r   inverse_reorderr     sS    Sc%jj 1 12233I@ @ @ @ @ @ Nr   c                     d fd}|S )Nr   r   r   c                     t                     t                    k    sJ  fdt          t                               D             S )Nc                ,    g | ]}|                  S r   r   )r   r   r   r   s     r   r   z1same_reorder.<locals>.reindex.<locals>.<listcomp>*  s!    ;;;AeAh;;;r   r   )r   r   s   `r   r   zsame_reorder.<locals>.reindex(  sL    5zzSZZ'''';;;;;s5zz):):;;;;r   r   r   )r   r   s   ` r   same_reorderr   '  s(    < < < < < < Nr   reindex1&Callable[[Sequence[_U]], Sequence[_V]]reindex2&Callable[[Sequence[_T]], Sequence[_U]]&Callable[[Sequence[_T]], Sequence[_V]]c                     d fd}|S )Nr   r   r   Sequence[_V]c                ,      |                     S r   r   )r   r   r   s    r   r   z fuse_reindexing.<locals>.reindex3  s    x(((r   )r   r   r   r   r   )r   r   r   s   `` r   fuse_reindexingr   /  s.    ) ) ) ) ) ) ) Nr   )   r      r@   )   r   r   r   r@   seq(Sequence[Union[int, torch.SymInt, Expr]]	shape_envOptional[ShapeEnv]c                |    |t          d | D                       rt          |           }nt          ||           }|S )z1
    Convert strides to fill order (argsort)
    Nc              3  X   K   | ]%}t          |t          t          j        f          V  &d S r   )r   r   r   r"   r   ss     r   	<genexpr>z!get_fill_order.<locals>.<genexpr>C  s3      QQ
1sEM.B C CQQQQQQr   )allrU   rV   )r   r   
sorted_idxs      r   get_fill_orderr   =  sJ     CQQSQQQQQ$+CLL

 !C00
r   Sequence[Union[int, Integer]]c                    d t          |           D             fdt          t          |                     D             }|S )z
    Convert stride order to fill order
    For channel last format,

    stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
    c                    i | ]\  }}||	S r   r   r   idxposs      r   
<dictcomp>z+stride_order2fill_order.<locals>.<dictcomp>R  s    88883c3888r   c                     g | ]
}|         S r   r   r   r   lookups     r   r   z+stride_order2fill_order.<locals>.<listcomp>S  s    777&)777r   )	enumerater   r   )r   
fill_orderr   s     @r   stride_order2fill_orderr   K  sM     98y'7'7888F7777U3u::%6%6777Jr   c                    t          | |          }d t          t          |                     D             }t          |          D ]
\  }}|||<   |S )z)
    Convert strides to stride order
    c                    g | ]}d S r   r   r   _s     r   r   z$get_stride_order.<locals>.<listcomp>^  s    
&
&
&1
&
&
&r   )r   r   r   r   )r   r   r   outr   elems         r   get_stride_orderr  W  s]     !/sI > >J
&
&eCHHoo
&
&
&CZ((  4D		Jr   Literal[None]guard_shapec                    d S r   r   r   r	  s     r   ir_node_to_tensorr  d  s    KN3r   torch.Tensorc                    d S r   r   r  s     r   r  r  h  s    LOCr   Optional[IRNode]Optional[torch.Tensor]c                   | d S |st           j        j        j        nt          fd|                                 D             }t          |           r&fd|                                 j        D             }nt          
                    |          }|                                 }|                                 }t          |          }t          |          }t           j        j        j                                        5  t!          j        ||||                                          }d d d            n# 1 swxY w Y   |S )Nc                &    g | ]} |          S r   r   r   r   shape_fns     r   r   z%ir_node_to_tensor.<locals>.<listcomp>w  s!    ...AHHQKK...r   c                &    g | ]} |          S r   r   r  s     r   r   z%ir_node_to_tensor.<locals>.<listcomp>z  s!    ===!((1++===r   )sizestridedtypedevice)rm   graphsizevars	size_hintr$   get_sizeis_storage_and_layout
get_layoutr  FlexibleLayoutcontiguous_strides	get_dtype
get_devicer[   r   suppress_guardstorchempty_stridedzero_)r   r	  r  r  r  r  tr  s          @r   r  r  l  sm    	yt  7#-.......DQ 9====q||~~'<===22488KKMME\\^^F"4((D$V,,F	
		#	3	3	5	5  fE&
 
 

%'' 	
               Hs   +D::D>D>valueOptional[Sequence[_T]] Optional[Sequence[Optional[_T]]]c                :    t          | t                    r| sd gS | S r   )r   r   r)  s    r   may_convert_to_optionalr.    s+     % u  vLr   2Union[IRNode, OutputSpec, torch.device, None, str]Optional[str]c                D   t          | t                    s| | S t          | t          j                  r| j        S t          | t
          t          f          r!t          |                                           S t          d|  dt	          |           j
         d           d S )Nzget_device_type(: ))r   r   r%  r  r   r   
OutputSpecget_device_typer#  r   r   r   s    r   r5  r5    s     !S /QY	Au|	$	$ /v	A
+	,	, /q||~~...<A<<a)9<<<=====r   &Union[IRNode, torch.device, None, str]c                   t          |           }|dv r t          t          | d          dk    rdS dS |t          |          x}dS ddlm} t          |t                    sJ t          |                      t          ||          S )N)cpucuda_backendtritonTFr@   )TritonScheduling)	r5  r   rA   rE   codegen.tritonr<  r   r   
issubclass)r   r  device_schedulingr<  s       r   	is_tritonr@    s    QF   6f...//8;;4u!:6!B!BBKu000000'..GG5F0G0GGG.')9:::r   c                (    t          |           dk    S )Nr8  )r5  r   s    r   is_cpurB    s    1&&r   Union[Buffer, TensorBox]	alignmentr   c           	     ~    t           t                    rV                                 Bt                                                     s!t                                                     rdS t           fdt          t                                                     dz
            D                       }t          j
        j                                                             d                   dk    p?t          j
        j                                                             d                   dk    }|o|S )NFc              3     K   | ]G}t           j        j                                                            |                   z  d k    V  HdS )r   N)rm   r  r  size_hint_or_throw
get_stride)r   r   rD  r   s     r   r   z2is_aligned_realized_tensor_hint.<locals>.<genexpr>  sb         
		,	,Q\\^^A->	?	?)	KPQQ     r   r@   )r   r   maybe_get_strider3   rH  r  r   r   r   rm   r  r  rG  )r   rD  aligned_stridesaligned_last_dims   ``  r   is_aligned_realized_tensor_hintrM    s"   
 q&!!' 00 ( .. ( u     s1<<>>**Q.//    O 	
++ALLNN2,>??1D 	F7..qzz||B/?@@AE  //r   strides1Sequence[_IntLike]strides2shapec                   t          |          t          |           k    r t          |           t          |          k    sJ t          || |          D ]\  }}}t          j        j                            |d          r,t          j        j                            ||          sMt          j        j                            |          t          j        j                            |          k    s dS dS )zP
    Returns true if the strides are equal, ignoring dimensions of size 1 .
    r@   FT)r   r   rm   r  r  statically_known_leqstatically_known_equalssymbolic_hint)rN  rP  rQ  dims1s2s         r   significant_strides_equalrY    s     u::X&&3x==CMM+I+I+II5(H55 	 	R700a88 	w77
 
 	'"00448H8V8V9
 9
 
 
 554r   tensorstrides"Sequence[Union[int, torch.SymInt]]c                   t          |           s| S t          d t          ||                                           D                       r| S t	          ||                                 |                                           s| S t          |           \  }}g |j        }t          |                                           D ]5\  }}t          j
        j                            |d          r||         ||<   6t          |j        |j        |j        ||j        |j                  }t'          t)          ||                    S )a  
    Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
    dimensions - size 0 or 1 - will be updated.

    If there are real stride differences (NHWC vs NCHW), or the tensor is not realized, then the input will be returned
    c              3  f   K   | ],\  }}t           j        j                            ||          V  -d S r   rm   r  r  rT  r   rW  rX  s      r   r   z2try_match_insignificant_strides.<locals>.<genexpr>  sO        B 	
00R88     r   r@   datalayout)r  r   r   rH  rY  r  as_storage_and_layoutr  r   rm   r  r  rS  FixedLayoutr  r  r  offset	is_pinnedr   ReinterpretView)rZ  r[  storage
old_layout
new_strider   r   
new_layouts           r   try_match_insignificant_stridesrm    sN    !(( 
  '6#4#4#6#677      $Wf.?.?.A.A6??CTCTUU /77GZ%:$%J&//++,, ' '1700A66 	'#AJJqM J _'*EEEFFFr   gmtorch.fx.GraphModulec                    | j                             d          d         }d t          |j                  D             |j        d<   ddlm}  ||            d S )Noutput)opr   c                    g | ]\  }}|S r   r   )r   r   r  s      r   r   z.gm_original_output_strides.<locals>.<listcomp>  s)     4 4 4Q4 4 4r   user_visible_output_idxs)record_original_output_strides)r  
find_nodesr   r   metatorch._inductor.compile_fxru  )rn  output_noderu  s      r   gm_original_output_stridesrz    s}    (%%%2215K4 4#K$4554 4 4K/0 JIIIII""2&&&&&r   inputsSequence[IRNode]
list[Expr]c                    t                      }| D ]N}|t          |                                d          z  }|t          |                                d          z  }Ot	          |          S )NFunbacked_only)r;   r)   r  rH  r   )r{  sym_varsinps      r   get_symbolic_inputsr    sl    !+H L L$S\\^^5IIII$S^^%5%5UKKKK>>r   c                     e Zd ZU dZ e            Zded<    ej        d          Z	ded<    ej        d          Z
ded	<    ej        d          Zd
ed<   eej        dzd                        Zed{d            Zd|dZd}dZd~dZddZddZddZd~d Zddd$Z	 ddd(Zdd*Zdd,Zdd.Zdd0Zdd2Zdd4Zdd5Z dd7Z!dd9Z"e#dd;            Z$dd=Z%dd>Z&dd@Z'dddDZ(ddFZ)ddHZ*ddIZ+ddKZ,ddMZ-ddOZ.ddPZ/ddQZ0ddRZ1ddSZ2dddVZ3ddYZ4d}dZZ5dd[Z6d}d\Z7	 ddd`Z8ddaZ9ddcZ:	 dddeZ;ddgZ<ddiZ=ddjZ>ddlZ?	 dddoZ@ddpZAddqZBddrZCddsZDdduZEddvZFddwZGddxZHeIre#ddy            ZJdAS dAS )r   zBase class for all intermediate representation (IR) nodes in TorchInductor.

    Note:
        This is an abstract base class. Most methods raise NotImplementedError
        and must be overridden by concrete subclasses.
    zClassVar[OrderedSet[Any]]_current_originsF)initOrderedSet[Any]originsOptional[list[str]]	tracebackOptional[torch.fx.Node]origin_nodeOrderedSet[Node]r   Generator[None, None, None]c              #     K   t           j        }|| z  t           _        	 d V  |t           _        d S # |t           _        w xY wr   )r   r  )r  olds     r   current_originszIRNode.current_origins/  sL       %"%-	*EEE&)F###cF#))))s   1 ?r   r   c                ^    t          | t          t          t          t          t
          f          S r   )r   ComputedBufferInputsKernelInputBufferrh  TemplateBuffer)r   s    r   is_realized_nodezIRNode.is_realized_node9  s+    	
 	
 		
r   attrr   r)  r   r   c                >    t                               | ||           d S r   )r   __setattr__)selfr  r)  s      r   _post_init_setattrzIRNode._post_init_setattrF  s"     	4u-----r   c                    t          | j                  }|                     d|           |                     dt          j        rt          j                    nd            |                     dd            d S )Nr  r  r  )r;   r  r  rA   debug_ir_tracebackr  format_stack)r  r  s     r   __post_init__zIRNode.__post_init__L  sz    T233	7333V5NX/111TX	
 	
 	
 	t44444r   OrderedSet[str]c                X    t          d |                                 D                       S )Nc              3  $   K   | ]}|j         V  d S r   r   r   deps     r   r   z(IRNode.get_read_names.<locals>.<genexpr>U  $      ??s#(??????r   r;   	get_readsr  s    r   get_read_nameszIRNode.get_read_namesT  )    ??dnn.>.>??????r   c                    | j         S r   )r  r  s    r   get_tracebackzIRNode.get_tracebackW  s
    ~r   c                    | j         S r   r  r  s    r   get_origin_nodezIRNode.get_origin_nodeZ      r   Optional[Operation]c                    d S r   r   r  s    r   get_defining_opzIRNode.get_defining_op]      tr   c                \   t                      }| j        }t          | t                    r+|                                 }| j        rt          |g          }|D ]}t          |d          r"|j        r|                    |j                   4t          j
        j        j                            di                               |j        g           }t          |t                    s|D ]C}t          j
        j        j                            |d           }|r|                    |           D|S )Nstack_trace	postToPre)r;   r  r   ExternKernelr  r  hasattrr  addr%  	_inductordebug _inductor_post_to_pre_grad_nodesgetr   r   #_inductor_pre_grad_node_stack_trace)r  stack_tracesr  r  r   pre_grad_nodes	node_namer  s           r   get_stack_traceszIRNode.get_stack_traces`  sE    )3,dL)) 	4..00K 4$k]33 	6 	6Dt]++ 60@ 6  !12222 O)JNN#R c$)R(( 
 ".$77 !/ 6 6I-QUU%t   
 # 6$((5556 r   TshortenSequence[str]c                ^   dt          | dd           }|r t          |          dk    r|d d          d}|                                 s|gS g }|                                 D ]D}|                    d           ||                    d          z  }|                    d	           E|g|z   S )
Nzorigins=r   @   =   z...zstack_traces = {
})r   r   r  appendsplit)r  r  r  stack_trace_strr  s        r   common_reprzIRNode.common_repr  s    ;WT9b99;; 	+s7||b(( "***G$$&& 	90022 	( 	(K""#5666{00666O""3''''y?**r   linesSequence[object]	multilinec                Z   t          |          t          |                     |                    z   }t          t          t          |                    }|r<t	          d                    |                    }t          |           j         d| dS t          |           j         d| dS )Nz,
z(
z
)(r3  )r   r  mapr   indentjoinr   r   )r  r  r  r  	new_liness        r   
str_helperzIRNode.str_helper  s     Ud4#3#3G#<#<===Se__%% 	5uzz%0011I4jj)<<i<<<<4jj)44E4444r   torch.dtypec                    | j         S r   r  r  s    r   r"  zIRNode.get_dtype  
    zr   Optional[torch.dtype]c                N    	 |                                  S # t          $ r Y d S w xY wr   )r"  NotImplementedErrorr  s    r   maybe_get_dtypezIRNode.maybe_get_dtype  s9    	>>###" 	 	 	44	    
$$Layoutc                B    t          dt          |            d          )Nz#get_layout() is not implemented by !r  r   r  s    r   r  zIRNode.get_layout  s"    !"UT

"U"U"UVVVr   Optional[Layout]c                N    	 |                                  S # t          $ r Y d S w xY wr   )r  r  r  s    r   maybe_get_layoutzIRNode.maybe_get_layout  9    	??$$$" 	 	 	44	r  r4  c                *    |                                  S r   )r  r  s    r   get_output_speczIRNode.get_output_spec  s       r   Optional[OutputSpec]c                N    	 |                                  S # t          $ r Y d S w xY wr   )r  r  r  s    r   maybe_get_output_speczIRNode.maybe_get_output_spec  s;    	'')))" 	 	 	44	r  c                P    t          |                                 t                    S )z4True for single tensor output (excludes MultiOutput))r   r  r  r  s    r   has_tensor_outputzIRNode.has_tensor_output  s    $4466???r   Sequence[Expr]c                B    t          dt          |            d          )Nz!get_size() is not implemented by r  r  r  s    r   r  zIRNode.get_size  s"    !"Sd4jj"S"S"STTTr   Optional[Sequence[_IntLike]]c                N    	 |                                  S # t          $ r Y d S w xY wr   )r  r  r  s    r   maybe_get_sizezIRNode.maybe_get_size  7    	==??"" 	 	 	44	r  .Union[_IntLike, sympy.Rel, Sequence[_IntLike]]c                *    |                                  S r   r  r  s    r   rQ  zIRNode.shape  s    }}r   r!   c                D    t          |                                           S r   )rh   r  r  s    r   	get_numelzIRNode.get_numel  s    T]]__---r   c                    t           j        j                            t	          j        |                                 d                    S Nr   rm   r  r  statically_known_truer   Eqr  r  s    r   is_zero_elementszIRNode.is_zero_elements  2    w55eht~~?O?OQR6S6STTTr   r0  c                @    t          dt          |                      )a)  
        If the IRNode refers to data which has not been materialized (e.g.,
        it is a Pointwise/Reduction that could potentially have more
        compute fused into it), realize the IRNode into physical memory,
        ending the possibility of fusing into it, but allowing, e.g., multiple
        users to access the data without having to recompute.

        Check StorageBox.realize for a particularly notable implementation.

        TODO(ezyang): I think, in principle, every IRNode should have an
        implementation of this, and most of the time no-op is OK, but you
        really do have to audit each IRNode for this, so for now, raise
        an error if it's not implemented.  Note that some code in graph.py
        will catch this thrown error and suppress it with a warning.
        zrealize NYI on r  r  s    r   realizezIRNode.realize  s!      ""@DJJ"@"@AAAr   NwriterOptional[IndentedBuffer]c                @    t          dt          |                      )Nzcodegen_reference NYI on r  r  r  s     r   codegen_referencezIRNode.codegen_reference  s    !"Jd4jj"J"JKKKr   Optional[torch.device]c                    d S r   r   r  s    r   r#  zIRNode.get_device  r  r   torch.devicec                6    |                                  }|J |S r   r#  r  r  s     r   get_device_or_errorzIRNode.get_device_or_error  s"    ""!!!r   c                    dS NFr   r  s    r   has_exceeded_max_readszIRNode.has_exceeded_max_reads      ur   $Callable[[Sequence[Expr]], OpsValue]c                D    t          t          |           j                  r   r  r   r   r  s    r   make_loaderzIRNode.make_loader      !$t**"5666r    Callable[[Sequence[Expr]], Expr]c                D    t          t          |           j                  r   r  r  s    r   make_indexerzIRNode.make_indexer  r  r   rO  c                D    t          t          |           j                  r   r  r  s    r   rH  zIRNode.get_stride  r  r   c                N    	 |                                  S # t          $ r Y d S w xY wr   )rH  r  r  s    r   rJ  zIRNode.maybe_get_stride  r  r  c                D    t          t          |           j                  r   r  r  s    r   get_namezIRNode.get_name  r  r   c                N    	 |                                  S # t          $ r Y d S w xY wr   )r#  r  r  s    r   maybe_get_namezIRNode.maybe_get_name  r  r  c                p    	 |                                  t          j        j        v S # t          $ r Y dS w xY wr  )r#  rm   r  graph_inputsr  r  s    r   is_input_bufferzIRNode.is_input_buffer  sA    	==??ag&:::" 	 	 	55	s   $' 
55	thresholdOptional[int]c                    dS r  r   r  r)  s     r   has_large_inner_fnzIRNode.has_large_inner_fn	  r  r   usersr   c                    d S r   r   r  r.  s     r   
mark_reusezIRNode.mark_reuse      r   c                    d S r   r   r  s    r   realize_hintzIRNode.realize_hint  r2  r   c                D    t          t          |           j                  r   r  r  s    r   unwrap_viewzIRNode.unwrap_view  r  r   c                D    t          t          |           j                  r   r  r  s    r   freeze_layoutzIRNode.freeze_layout  r  r   r   r   allow_paddingc                D    t          t          |           j                  r   r  r  r   r9  s      r   freeze_layout_with_stride_orderz&IRNode.freeze_layout_with_stride_order       "$t**"5666r   c                D    t          t          |           j                  r   r  r  r   s     r   freeze_layout_with_fill_orderz$IRNode.freeze_layout_with_fill_order  r  r   r  c                D    t          t          |           j                  r   r  r  r  s     r   freeze_layout_with_same_orderz$IRNode.freeze_layout_with_same_order   r  r   exact_stridesc                D    t          t          |           j                  r   r  r  rD  r9  s      r    freeze_layout_with_exact_stridesz'IRNode.freeze_layout_with_exact_strides#  r=  r   dependencies.ReadWritesc                D    t          t          |           j                  r   r  r  s    r   get_read_writeszIRNode.get_read_writes(  r  r   OrderedSet[Dep]c                4    |                                  j        S r   rJ  readsr  s    r   r  zIRNode.get_reads+      ##%%++r   c                D    t          |                                           S r   )r   r  r  s    r   	num_readszIRNode.num_reads.  s    4>>##$$$r   ry   c                D    t          t          |           j                  r   r  r  s    r   get_storage_numelzIRNode.get_storage_numel1  r  r   r  r   c                D    t          t          |           j                  r   r  r  r  s     r   get_free_symbol_useszIRNode.get_free_symbol_uses4  r=  r   c                D    t          t          |           j                  r   r  r  s    r   get_reduction_typezIRNode.get_reduction_type9  r  r   c                D    t          t          |           j                  r   r  r  s    r   get_reduction_sizezIRNode.get_reduction_size<  r  r   c                    dS r  r   r  s    r   	is_externzIRNode.is_extern?  r  r   c                    dS r  r   r  s    r   is_no_opzIRNode.is_no_opB  r  r   r  c                D    t          t          |           j                  r   r  r  s     r   constant_to_devicezIRNode.constant_to_deviceE  r  r   c                D    t          t          |           j                  r   r  r  s    r   get_mutation_nameszIRNode.get_mutation_namesH  r  r   c                D    t          t          |           j                  r   r  r  s    r   get_operation_namezIRNode.get_operation_nameK  r  r   c                D    t          t          |           j                  r   r  r  s    r   get_inputs_that_alias_outputz#IRNode.get_inputs_that_alias_outputN  r  r   c                    d S r   r   r  s    r   r  zIRNode.dtypeS  s    (+r   )r  r  r   r  r   r   r   r   )r  r   r)  r   r   r   r   r   r   r  )r   r  r   r  r   r  T)r  r   r   r  )TT)r  r  r  r   r  r   r   r   r   r  )r   r  r   r  )r   r  r   r4  )r   r  r   r   r   r  )r   r  )r   r  r   r!   r   r0  r   r  r	  r   r   r   r  r   r  r   r  r   r  r   rO  r   r   r)  r*  r   r   r.  r   r   r   r   r   Fr   r   r9  r   r   r   r   r   r   r   r  rO  r   r   rD  rO  r9  r   r   r   r   rH  r   rK  r   r   r   ry   r  r   r   r   r  r  r   r   r   r  )Kr   r   r   __doc__r;   r  r   dataclassesfieldr  r  r  staticmethod
contextlibcontextmanagerr  r  r  r  r  r  r  r  r  r  r  r"  r  r  r  r  r  r  r  r  propertyrQ  r  r  r  r  r#  r  r  r  r  rH  rJ  r#  r%  r(  r-  r1  r4  r6  r8  r<  r@  rC  rG  rJ  r  rQ  rS  rV  rX  rZ  r\  r^  r`  rb  rd  rf  r   r  r   r   r   r   r     s          3=*,,>>>>  1{0e<<<G<<<<%6[%6E%B%B%BIBBBB+<;+<%+H+H+HKHHHH* * *  \* 

 

 

 \

. . . .5 5 5 5@ @ @ @                >+ + + + +  PT	5 	5 	5 	5 	5      W W W W   ! ! ! !   @ @ @ @U U U U       X. . . .U U U UB B B B$L L L L L      
   7 7 7 77 7 7 77 7 7 7   7 7 7 7                7 7 7 77 7 7 7 ;@7 7 7 7 7
7 7 7 77 7 7 7 HM7 7 7 7 7
7 7 7 7, , , ,% % % %7 7 7 7 %*7 7 7 7 7
7 7 7 77 7 7 7      7 7 7 77 7 7 77 7 7 77 7 7 7  ,	+++ 
+++, ,r   c                      e Zd Zd!dZd"dZd#dZd$d	Zd%dZd&dZd&dZ	d'dZ
d(dZd)dZd*dZd+dZd,dZ	 d-d.dZd/dZd S )0	Operationr   r   c                    d | _         d S r   operation_namer  s    r   r  zOperation.__post_init__Y  s    -1r   r  c                    t           r   r  r  s    r   r#  zOperation.get_device\      !!r   r  c                4    t          | d          sJ | j        S Nr  )r  r  r  s    r   r  zOperation.get_origin_node_  s!    t]+++++r   r  c                4    t          | d          sJ | j        S )Nr  )r  r  r  s    r   get_originszOperation.get_originsc  s     tY'''''|r   r   c                "    | j         J | j         S r   r  r  s    r   rd  zOperation.get_operation_nameg  s    "...""r   r   c                    dS r  r   r  s    r   r\  zOperation.is_externk  r  r   c                    dS r  r   r  s    r   r^  zOperation.is_no_opn  r  r   rH  c                    t           r   r  r  s    r   rJ  zOperation.get_read_writesq  r  r   r   c                .    ||                                  v S r   )r  )r  r   s     r   
is_user_ofzOperation.is_user_oft  s    t**,,,,r   r  c                X    t          d |                                 D                       S )Nc              3  $   K   | ]}|j         V  d S r   r  r  s     r   r   z+Operation.get_read_names.<locals>.<genexpr>x  r  r   r  r  s    r   r  zOperation.get_read_namesw  r  r   rK  c                4    |                                  j        S r   rM  r  s    r   r  zOperation.get_readsz  rO  r   list[Buffer]c                    t           r   r  r  s    r   get_outputszOperation.get_outputs}  r  r   r   c                    t                      S r   r:   r  s    r   get_unbacked_symbol_defsz"Operation.get_unbacked_symbol_defs      ||r   Fr  c                    t                      S )a  
        When unbacked_only=True:
        Returns the unbacked symbols which are required to be in scope in
        order to successfully perform codegen for this buffer.  For example,
        a buffer that corresponds to an extern kernel call that takes i0 as
        an argument would return {i0} here.  This is used to generate necessary
        dependencies that ensure we actually bind i0 in codegen before you
        try to use it.

        Note that this is NOT transitive; in particular, if this buffer takes
        in as input another buffer with dynamic shape (e.g., (i0,)), we will
        not report it here, because you will already have a dependency
        on that buffer, which will eventually have a dependency on i0 if
        necessary.

        When unbacked_only=False:
        Similar to `unbacked_only=True` but including all free symbols
        instead of only free unbacked symbols.
        r:   rU  s     r   rV  zOperation.get_free_symbol_uses  s    , ||r   r   c                    dS )z
        Gets extra global memory size needed by this buffer.
        Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
        r   r   r  s    r   get_workspace_sizezOperation.get_workspace_size  s	    
 qr   Nri  rv  rk  )r   r  r{  rq  r  )r   r   r   r   rj  r  r   r  r   r   r  r  r  )r   r   r   r  r#  r  r  rd  r\  r^  rJ  r  r  r  r  r  rV  r  r   r   r   r  r  W  sN       2 2 2 2" " " "          # # # #      " " " "- - - -@ @ @ @, , , ," " " "    %*    0     r   r  c                  f    e Zd ZU ded<   ded<   ded<   ded<    ed           	 d;d<d            Zd=dZd> fdZd?dZeZ	d@dZ
dAdZdBdZdBdZedCd!            Zeej        fdDd$            ZedEd&            ZdFd(Zed?d)            ZdGdHd-Zd;dId/ZdJd1ZdKd3ZdLd5ZdBd6ZdMd8ZdNd:Z xZ S )OLoopsr  r  r  r  Callable[..., Any]inner_fnrO  rangesFr  r   r   r   c                     t                      j        g fd| j        D             |                               R  S )Nc              3  8   K   | ]}t          |          V  d S r   r(   r   er  s     r   r   z-Loops.get_free_symbol_uses.<locals>.<genexpr>  s.      FFQq-00FFFFFFr   )r;   unionr  inner_fn_free_symbolsrU  s    `r   rV  zLoops.get_free_symbol_uses  sY     "z||! 
FFFF$+FFF
&&}55
 
 
 	
r   namesr  r   c                                           d j        j         dt           j                                                   g fd|D             z   d j        gz             S )N'c                :    g | ]}| d t          |           S =)r   )r   r   r  s     r   r   z!Loops._to_str.<locals>.<listcomp>  s1    AAA4$..t,,..AAAr   origin_node=)r  r  r   r   r  inner_fn_strr  )r  r  s   ` r   _to_strzLoops._to_str  s    'DK$'''DJ!!##
 BAAA5AAAB 3d.2234
 
 	
r   r   c                H    t                                                       d S r   )superr  r  	__class__s    r   r  zLoops.__post_init__  s    r   c                ,    |                      d          S )Nr  r  r  s    r   __str__zLoops.__str__  s    ||K(((r   r  c                    | j         S r   r  r  s    r   r#  zLoops.get_device  
    {r   r  c                    | j         S r   r  r  s    r   r  zLoops.get_origin_node  r  r   r  c                    | j         S r   r  r  s    r   r  zLoops.get_size  r  r   c                    | j         S r   r  r  s    r   get_pointwise_sizezLoops.get_pointwise_size  r  r   r   r   r   'Union[TensorBox, ShapeAsConstantBuffer]c                   |                     dd           }|                     dd           } | |i |}|                    d|           |                    d|p|j                   t                              |          S )Nr  r  )popr  r  r   create)clsr   r   r  tbrs         r   r  zLoops.create  s     jj55ZZT**C    	
]K888	["*;<<<"""r   r}   r?   c                :    fdt          |           D             S )Nc                d    g | ],\  }}|d k    rt           j        j        nt          |          -S r@   )r   SZerorg   )r   nr   r}   s      r   r   z Loops._index.<locals>.<listcomp>  sH     
 
 
1 FFEGLL(Fvq(Q(Q
 
 
r   )r   )r  r}   s    `r   _indexzLoops._index  s6    
 
 
 
!&))
 
 
 	
r   rO   c                ~   t          t          j                              }t          j        |          5  t	          j        t          dd          5   | j        |                                   |	                                cd d d            cd d d            S # 1 swxY w Y   d d d            d S # 1 swxY w Y   d S Nallow_indexingT)
rN   rm   MockHandlerset_ops_handlerr    r   r   r  inner_fn_argsgetvalue)r  	opcounters     r   inner_fn_opcountzLoops.inner_fn_opcount  sH    11	i((	( 	(L)94@@	( 	( DM4--//00%%''	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(s5   B20BB2B	B2!B	"B22B69B6Sequence[Sequence[_IntLike]]c                8    |                      | j                  fS r   )r  r  r  s    r   r  zLoops.inner_fn_args  s    DK((**r   c                ^    t          j        j        | j        g|                                 R  S r   )rm   KernelFormatterHandlerir_to_stringr  r  r  s    r   r  zLoops.inner_fn_str  s8    '4M
 ..00
 
 
 	
r   Nr)  r*  c                x    |d}t          |t          j                  }|                                 j        |k    S r   )maxrA   realize_opcount_thresholdr  num_opsr,  s     r   r-  zLoops.has_large_inner_fn  s9    I	6#CDD	$$&&.::r   OrderedSet[Symbol]c                d    |                      | j                  }t          | j        ||          S Nr  )r  r  rI   r  )r  r  r   s      r   r  zLoops.inner_fn_free_symbols   s,    DK((#DM5VVVVr   rK  c                   t          j        t          dd          5  |                                 rXt	          |                                 |                                 |                                           j        cd d d            S t	          |                                 |                                           j        cd d d            S # 1 swxY w Y   d S r  )	r    r   r   rX  rK   r  r  rZ  rN  r  s    r   r  zLoops.get_reads  s    \.*:DAA 	 	&&(( 
*$$&&MMOO++--  	 	 	 	 	 	 	 	 +$$&&MMOO  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A C	8CCCr  c                N    t          |                                 j                  S r   )r;   r  read_buffersr  s    r   r  zLoops.get_read_names  s    $//11>???r   r   c                N    t          |                                 j                  S r   )r   r  r  r  s    r   rQ  zLoops.num_reads  s    4((**7888r   c                B    t          dt          |            d          )Nz+get_reduction_size() is not implemented by r  r  r  s    r   rZ  zLoops.get_reduction_size  (    !G$t**GGG
 
 	
r   r0  c                B    t          dt          |            d          )Nz+get_reduction_type() is not implemented by r  r  r  s    r   rX  zLoops.get_reduction_type  r  r   r   c                B    t          dt          |            d          )Nz+constant_to_device() is not implemented by r  r  r  s     r   r`  zLoops.constant_to_device"  r  r   r  r  )r  r  r   r   ri  r{  rv  rk  rr  )r   r   r   r   r   r  )r  rO  r}   r?   r   r  )r   rO   r   r  r   r|  r  r   r   r  r  rj  r  rt  r  )!r   r   r   r   rX   rV  r  r  r  __repr__r#  r  r  r  classmethodr  r  r?   INDEXr  rW   r  r  r  r-  r  r  r  rQ  rZ  rX  r`  __classcell__r  s   @r   r  r    sq            G$$$)
 
 
 
 %$
	
 	
 	
 	
           ) ) ) ) H                 # # # [# :>* 
 
 
 
 \
 ( ( ( ](+ + + + 
 
 
 ]

; ; ; ; ;W W W W W   @ @ @ @9 9 9 9
 
 
 


 
 
 


 
 
 
 
 
 
 
r   r  r   Union[Expr, Sequence[Expr]]r  r  rl   c               ~    |j         r"t          j        t          d          |          S t          j        d|          S )Nnanr   )is_floating_pointrk   constantfloat)r   r  s     r   nop_loader_fnr  (  s7     &|E%LL%000|Au%%%r   c                  6    e Zd ZddZddZddZddZddZdS )	Pointwiser   r  c                n    |                                  rt          t          | j                  S | j        S Nr  )r  r   r  r  r  r  s    r   r  zPointwise.make_loader1  s3      "" 	<=
;;;;}r   Sequence[sympy.Expr]c                    g S r   r   r  s    r   rZ  zPointwise.get_reduction_size8  s    	r   r0  c                    d S r   r   r  s    r   rX  zPointwise.get_reduction_type;  r  r   output_nameindexer!Callable[[Sequence[Expr]], Never]varsr  r   c                ~    |                                  }t          j        |pd ||           ||                    S Nunnamed)r  rk   storer  r  r  r  loaders        r   store_outputzPointwise.store_output>  s>     !!##y1	774==&&,,OOOr   r  r  r   c                    |                                  } t          j        t          d|          |          }t	          || j        || j                  S FMove this to a given device. Requires that all reads are to constants.override_devicer  r  r  r  )r  r    r   ConstantBufferr  r  r  r  r  r  s      r   r`  zPointwise.constant_to_deviceG  sX    !!##Hn.?HHPP*;	
 
 
 	
r   Nrx  )r   r  rt  )r  r0  r  r  r  r  r   r   r  )r   r   r   r  rZ  rX  r  r`  r   r   r   r  r  /  s|                 P P P P	
 	
 	
 	
 	
 	
r   r  c                  8    e Zd ZU ded<   dZded<   dd
ZddZdS )Scatterr  output_indexerNrQ   scatter_moder  r  r   r   c                    |                                  } t          j        t          d|          |          }t	          || j        || j        | j        | j                  S )r"  r#  )r  r  r  r  r)  r*  )	r  r    r   r%  r(  r  r  r)  r*  r&  s      r   r`  zScatter.constant_to_deviceX  sd    !!##Hn.?HHPP*;.*
 
 
 	
r   r  r0  r  r  r  r  r   c                    |                                  }|d}t          j        | ||                     |                     ||          | j                  S )Nr  )mode)r  rk   r  r)  r*  r  s        r   r  zScatter.store_outpute  sh     !!###KyGD''--..F4LL"	
 
 
 	
r   r  )r  r0  r  r  r  r  r   r   )r   r   r   r   r*  r`  r  r   r   r   r(  r(  S  s[         4444"L""""
 
 
 

 
 
 
 
 
r   r(  
logical_ormaximumminimummulr  bitwise_xor)anyr  minprodsumxor_sumz"dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNreduction_typearg_break_ties_leftCallable[..., object]c                      t           v rt                     S  dv r
d fd}|S  dk    rdd
}|S t          d            )Nargmaxargminatuple[object, object]br   tuple[OpsValue, OpsValue]c                   | \  }}|\  }}dk    rt          j        ||          }nt          j        ||          }t          j        ||          }t	                    rzt          j        ||          }t          j        ||          }	t          j        |t          j        ||	                    }t          j        |t          j        ||	                    }rt          j        ||          nt          j        ||          }
t          j        |t          j        ||
                    }t          j        |||          t          j        |||          fS )Nr?  )	rk   ltgteqr,   ner.  logical_andwhere)r@  rB  a_valuea_indexb_valueb_indexmaskequala_isnanb_isnantier:  r  r9  s              r   argmax_combine_fnz3get_reduction_combine_fn.<locals>.argmax_combine_fn  s@     !GW GW))vgw//vgw//F7G,,Ee$$ Q&'22&'22~dCF7G,D,DEEucogw.O.OPP '.w(((VGW-- 
 >$s(C(CDDD	$11	$11 r   welford_combine#tuple[OpsValue, OpsValue, OpsValue]c                l    | \  }}}|\  }}}||z
  }||z   }	||	z  }
|||
z  z   ||z   ||z  |z  |
z  z   |	fS r   r   )r@  rB  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              r   welford_combine_fnz4get_reduction_combine_fn.<locals>.welford_combine_fn  sm     &'"FD(%&"FD(VOE!H,J :-I**teemh6BB r   zunknown reduction_type=)r@  rA  rB  rA  r   rC  )r@  rV  rB  rV  r   rV  )r8  r  )r9  r  r:  rT  ra  s   ```  r   get_reduction_combine_fnrb    s     ---#N33	/	/	/	 	 	 	 	 	 	 	: ! 	,	,	,	 	 	 	  "! ""LN"L"LMMMr   c                      e Zd ZU ded<   ded<   ded<   ded<   dVdZeZ ed           dWdX fd            ZdYdZdZdZ	d[dZ
d\dZd]d ZdWdXd!Zd^d%Ze	 d_d`d1            Zedad4            Zeej        d&fdbd7            Zedcd:            Zedcd;            Zeddd?            ZededA            Ze	 d_dfdG            ZedgdO            ZedhdS            Ze	 d_didT            ZedjdU            Z xZS )k	ReductionrO  reduction_rangesrP   r9  r  	src_dtyperT   reduction_hintr   r   c                ,    |                      d          S )N)r  re  r9  r  r  s    r   r  zReduction.__str__  s    ||LMMMr   Fr  r   r  c                    t                                                     t                      j        fd| j        D              z  S )Nc              3  8   K   | ]}t          |          V  d S r   r(   r  s     r   r   z1Reduction.get_free_symbol_uses.<locals>.<genexpr>  s.      PPQq-00PPPPPPr   )r  rV  r;   r  re  r  r  r  s    `r   rV  zReduction.get_free_symbol_uses  sN    ww++M::=OZ\\=OPPPP$:OPPP>
 
 	
r   r  c                    | j         S r   )re  r  s    r   rZ  zReduction.get_reduction_size  s    $$r   r0  c                    | j         S r   )r9  r  s    r   rX  zReduction.get_reduction_type      ""r   r  r  r  r  reduction_varsSequence[Symbol]r   c           	         t          j        | j        | j        | j        |                     ||                    }t          j        |pd ||          |           d S r  )rk   	reductionr  rf  r9  r  store_reduction)r  r  r  r  ro  r)  s         r   rs  zReduction.store_reduction  s`     JNMM$//	
 
 	K49ggdmmUKKKKKr   r   c                T    t          | j                  t          | j                  z   S r   )r   r  re  r  s    r   index_lengthzReduction.index_length  s"    4;#d&;"<"<<<r   Sequence[Sequence[Expr]]c                    |                      | j                  }|                      | j        t          j                  }||fS r   )r  r  re  r?   R0_INDEX)r  r   rindexs      r   r  zReduction.inner_fn_args  s7    DK((T2DMBBvr   c                    |                      | j                  }|                      | j        t          j                  }t          | j        |||          S r  )r  r  re  r?   rx  rI   r  )r  r  r   ry  s       r   r  zReduction.inner_fn_free_symbols  sO    DK((T2DMBB#M5&
 
 
 	
r   r  r  r   c           
         |                                  } t          j        t          d|          |          }t	          || j        || j        | j        | j        | j	        t          j                  S )r"  r#  r  r  r  r  re  r9  rf  rg  )r  r    r   r%  rd  r  r  re  r9  rf  rT   DEFAULTr&  s      r   r`  zReduction.constant_to_device  so    !!##Hn.?HHPP*;!2.n(0	
 	
 	
 		
r   N	dst_dtyper  Callable[_P, OpsValue]r  %Union[ReductionType, Literal['scan']]reduction_numelr!   
input_noder  tuple[ReductionHint, _IntLike]c	           
        t           j        j                            |          }	t           j        j                            t	          |                    }
|dk    p:t           j                            | t          j                   o|dvot          j	        }t          |	          rt          |
          st          j        dfS t          j        |           }|j        }d}|rMt!          j        t           j        j        | d          }t!          j        t           j        j        | d          }ndd}|}|
dk    r ||	|
          }|dk    rt          j        |fS |t+          |t,                    rt/          j        t2          dd          5  t5          |          \  }}d d d            n# 1 swxY w Y   |i|gt           j        j                            t	          ||z                       }|	|k    r-t6                              d|||||           t          j        dfS t          j        |fS |	|k    s|
|dz  dz  k    rt          j        dfS t;          | |||||dk    r|nd|t          j                  }dd} ||          \  }}|r ||          \  }}t=          |          dk    rt          j        dfS t?          j         |!                                |"                                          \  \  }}}d}d}|D ]}t           j        j        #                    ||          }t           j        j        $                    ||tK          |&                                                    } tO          d | D                       }!|!r|dz  }|dz  }||k    rt          j         ||	|
          fS t          j(         ||	|
          fS )Nscanr=  r@       T)inner_reductionFreduction_numel_hintr   
numel_hintr   c                    dS Nr@   r   )r  r  s     r   inner_reduction_splitsz4Reduction.num_splits.<locals>.inner_reduction_splits-  s	     qr   r  zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %srI  r   r6  r|  r  rd  tuple[Sequence[Expr], bool]c           	       	 |                                  }|J t          d t          ||                                 |                                           |           }|                                }|j        J d |j        D             }g }d}t          |j        d           D ]	t          	fd|D                       r|
                    	j                   	j        t          j        j        v rbt          j        j        	j                 }t!          |j        dd           }|                                 t!          |j        dd           |k    rd	}||fS )
Nr  r  r  r   rc  rb  c                p    g | ]3}t          |t                    t          |t          j                  1|4S r   )r   r!   r   Numberr   r  s     r   r   zBReduction.num_splits.<locals>.get_read_indices.<locals>.<listcomp>v  sN       a&& 0:!U\/J/J  r   Fc                    | j         S r   r  r   s    r   <lambda>z@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>}  s    af r   keyc              3  4   K   | ]}|j         j        v V  d S r   )r   r2   )r   r  mds     r   r   zAReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>~  s-      FFaqBH11FFFFFFr   r  T)r#  r  r   r"  r  rJ  
range_varssortedrN  r   r  r   r   rm   r  name_to_bufferr   rc  decide_layout)
r  r  cbread_writesr  indiceschangedbuforiginal_strider  s
            @r   get_read_indicesz.Reduction.num_splits.<locals>.get_read_indicese  s   \\^^F%%%%!++--  
   B ,,..K )555 $/  J
 GG[.4D4DEEE + +FFFF:FFFFF +NN28,,,w!'"888g4RW=*1#*h*M*M))+++"3:x>>/QQ&*GG##r   r   c              3  "   K   | ]
}|d k    V  dS r@   Nr   r   s     r   r   z'Reduction.num_splits.<locals>.<genexpr>  s&      //!A//////r   )r  r   r  r   r   r   )r  rd  r   r  ))rm   r  r  rU  rh   has_featurerC   REDUCE_TO_SINGLE_ELEMENTrA   split_reductionsr   rT   r}  rS   r  multi_processor_count	functoolsr   choicesreduction_split_factorINNERr   r   r    r   r   rJ   logr  rd  r   rB   index_vars_squeezer  rZ  simplify_with_rangesstride_hintsr   keysr   OUTER)"r  r~  rf  r  r  re  r9  r  r  r  r  should_splitpropsnum_smmin_elements_per_threadr  outer_reduction_splitsr  
new_rangesnew_reduction_rangesextracted_numel_hintr  r  r  r  r  ro  ranges1	num_outer	num_innerr   jr[  outers"                                     r   
num_splitszReduction.num_splits  s     !w/==oNNW%33M&4I4IJJ
%/ 
##FN,STTT (( ' 	 /00 	,Z
5K5K 	, (!++ '//,"$ 	<@I@Q	0&$A A A" AJ@Q	0&%A A A""
    &<" ??**+?LLEzz$*E11%*Z*K*K%\.2BDII H H <JGG",H H H H H H H H H H H H H H H
 ).B.N+,7+;+I+I%j3G&GHH, ,( ,/CCC		G #,!&0	 	 	  -2B66 &-- $;;;VaZ"_,, (!++--;v-E-E>>5(0	
 	
 	
!	$ !	$ !	$ !	$F ,+A.. 	-))!,,JGQw<<1 (!++'3'FJJLL!..00(
 (
$NW 		 		 		A 55aAAAg&33>4#7#7 G //w/////E Q		Q		y   &(>(>$j) )   !&(>(>$j) )  s   F,,F03F0<Callable[[Sequence[_IntLike], Sequence[_IntLike]], OpsValue](Callable[[Sequence[_IntLike]], OpsValue]c                     t           j        j                                      t	          ||          dfd|dv r4t          t                                                  d fd	fd
S  S )z1Convert inner_fn from a reduction to an pointwiser   rO  r   r   c                v     t          j         fdt          j        d D              D                       S )Nc              3  0   K   | ]} |          V  d S r   r   )r   ry  r   value_fns     r   r   z=Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>  sC         HUF++     r   c                ,    g | ]}t          |          S r   )r   r   r   s     r   r   z>Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<listcomp>  s    ===q%((===r   )r  reduce	itertoolsproduct)r   
combine_fnre  r  s   `r   r   z*Reduction._unroll_reduction_fn.<locals>.fn  sa    #    "+"3==,<===#    r   r?  r>  ry  rC  c                    d |D             } | |          t          j         |          t          j                  fS )Nc                6    g | ]}t          j        |          S r   r   expandr   r   s     r   r   zDReduction._unroll_reduction_fn.<locals>.value_fn.<locals>.<listcomp>  s     :::a%,q//:::r   )rk   
index_exprr%  int64)r   ry  flatten_indexr  s     r   r  z0Reduction._unroll_reduction_fn.<locals>.value_fn  sL     ;:6:::HUF++N==#8#8%+FF r   c                &     |           d         S r  r   )r   r   s    r   r  z0Reduction._unroll_reduction_fn.<locals>.<lambda>  s    E1 r   N)r   rO  r   r   )r   rO  ry  rO  r   rC  )rm   r  r  guard_int_seqrb  _fixed_indexerr   r!  )r  re  r9  rf  r  r  r   r  s   ``  @@@@r   _unroll_reduction_fnzReduction._unroll_reduction_fn  s     7+99:JKK-niHH
		 		 		 		 		 		 		 		 111* 112BCC M
       .----HIr   r  r  c
                P   t           j        j                            t	                              dk    rdfd}
 |
d           |
d           |
d           |
d          d                                v sJ  d	            dfd}t                              |||t          |                    S dk    r0dv rdfd}ndfd}t                              |||          S t          t                    rt           j        j                                      t          j        k     r[t	          |          dk    st          |j                  r4t                              ||                     |          |          S |                     ||||		  	        \  }}dfd} ||          }|t$          j        k    r|}|dk    ro|	J t)          j        t,          dd          5  t/          |	          \  }}d d d            n# 1 swxY w Y   |J |J |                     ||||||
  
        S |dk    r|                     ||||||	
  
        S t4                              t7          ||||                    S )Nr   valr   r   Union[bool, float, int]c                <   t           j        k    rt          |           S j        r;t          | t                    sJ t          |                       t          |           S t          | t                    sJ t          |                       t          |           S r   )	r%  r   r
  r   r   r   r  r   r   )r  r~  s    r   py_cnstz!Reduction.create.<locals>.py_cnst  s    
**99$0 $%c=99DD499DD9 ::%%c;77BBcBB7s88Or   r@   )r6  r7  r5  r3  z* not supported for zero-dimension tensors!r   r   rl   c                :    t          j                           S r   rk   r  )r   r~  r9  rtypes_to_initss    r   const_fnz"Reduction.create.<locals>.const_fn  s    |ON$CYOOOr   r$  r  c                .    t          j        d          S r   r  )r   r~  s    r   r   zReduction.create.<locals>.fn  s    <9555r   c                4    d D             } | |          S )Nc                0    g | ]}t           j        j        S r   r   r  r  r  s     r   r   z0Reduction.create.<locals>.fn.<locals>.<listcomp>      &N&N&Nuw|&N&N&Nr   r   )r   reduction_indexr  re  s     r   r   zReduction.create.<locals>.fn  s)    &N&N=M&N&N&NO#8E?;;;r   r  c                j    t                    r| S | dk    rt          | t          j                  S | S r  )r   r  rA   min_num_split)r  r  s    r   _maybe_increase_splitz/Reduction.create.<locals>._maybe_increase_split6  s9    /** qyy5&"6777r   rI  r  Tr|  )r  r   r   r  )r   r   r   rl   )r  r   r   r   )rm   r  r  simplifyrh   r  r  r  r   r   r"   rG  rA   unroll_reductions_thresholdrd   r   r  r  rT   r}  r    r   r   rJ   !create_multilayer_existing_rangescreate_multilayerr   rd  )r  r  r~  rf  r  r  re  r9  rg  r  r  r  r   hintr  r  r  r  r  r  s     ` ` ``          @@r   r  zReduction.create  s1    '*33MBR4S4STTa$ $ $ $ $ $ wqzz"71::

wqzz	 O "_%9%9%;%;;;;!MMM <;;P P P P P P P P ##!F||	 $    a!5556 6 6 6 6 6 6
< < < < < < < ##YF $   
 00	 33ODD01 1v&&!++vfk/B/B+ ##11.	   $    nn

 

e	 	 	 	 	 	 &%e,,
 ]222!NB;;)))n.>EE  3V4 40
0               )))'33388 $   QYY((    !!1-#-	 	 	
 
 	
s   H$$H(+H(r  #Union[_NumLike, Sequence[_NumLike]]c           	        | dv rHt          |          rt          d          S t          |          rdS t          j        |          j        S | dv rHt          |          rt          d          S t          |          rdS t          j        |          j        S t          |          rdnd}t          |          rdnd}|||||||f|||ft          d          |fd	|          S )
N)r  r>  z-infF)r4  r?  infTr   r@   )r6  r5  r7  r3  welford_reducerU  online_softmax_reduce)r,   r  r+   r%  iinfor4  r  )r9  r  zeroones       r   default_accumulatorzReduction.default_accumulatorv  s    ...e$$ .V}}$!%(( .u{5))--...e$$ .U||#!%(( .t{5))--(//6uuQ&u--4dd1#T40 $dD1&+FmmT%:
 
  	r   c                H    | dk    rdS t                               | |          S )Nr  r   )rd  r  r9  r  s     r   default_valuezReduction.default_value  s+     ---1,,^UCCCr   r  ry   r  c                    | dk    r|S | dk    r"|dk    r|t           j        k    rt           j        S | dk    r"|dk    r|t           j        k    rt           j        S |S )NrI     i      )rT   r  
OUTER_TINY)r  r  rg  s      r   _multilayer_second_step_hintz&Reduction._multilayer_second_step_hint  sm     B;;!!C<<J#--.MDW2W2W ++TMMc!!-"555 ++r   r*  c                   |dS t           j        j                            |                                |          sdS |                                 	 t          |           n# t          $ r Y dS w xY w|                                }t          |dd                   D ].\  }}t           j        j                            |d          r|c S /dS )z
        If we are reducing over the full tensor, and it is non-dense in the last dimension,
        reindex so we reduce over the dense dimension. initially just handle complete
        reduction case
        NrI  r@   )
rm   r  r  rT  r  r  rd  r  rH  r   )r  r  r  r[  r   r   s         r   $check_for_split_dense_dim_reindexingz.Reduction.check_for_split_dense_dim_reindexing  s     4w77  ""O
 
 	 4	!*----" 	 	 	44	 ''))gcrcl++ 	 	DAqw771==  ts   A# #
A10A1r  r   
block_sizedefaultr;  c                  
 |                      |          }t                              |g|          t          j        j                            t          j        |z  d                     
d
fd}	|	S )	Nr   r   rp  r  r   rl   c                   |\  }| ^ }|z  |z   d
fd}r`t                    }t          j        t          j        |          t          j        |                    }t          j        ||	          S  |            S )Nr   rl   c                 0       g                    S r   r   )r  r  	new_indexr   s   r   bodyzCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body  s!    vi'););<<<r   )r   rl   )r^   rk   rE  r  masked)r   r  reduction_blockr  index_dtyperO  r  r  r  r  r  	need_maskr  r   s         @@r   
wrapper_fnz5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn  s     "1_*/'Y ?2_DG= = = = = = = = =  -o>>vN7K88N?K@@  z$g666tvvr   )r   rp  r  rp  r   rl   )	r  Viewdynamic_reshape_indexerrm   r  r  r  r   r  )r  r  re  r  r  r  r  r  dense_indexr  r  r   s    ` ` ``   @@r   _multilayer_wrap_loaderz!Reduction._multilayer_wrap_loader  s     >>Z
 
 ../
 
 (>>H_u,a00
 
 
		 	 	 	 	 	 	 	 	 	 	( r   4Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]original_rangesoriginal_reduction_rangesr  Sequence[Integer]r  @Callable[[Sequence[sympy.Expr], Sequence[sympy.Expr]], OpsValue]c                    t          d D                       sJ d            t                              |t          |          t          |          z             d	fd}|S )
Nc              3  "   K   | ]
}|d k    V  dS r  r   r  s     r   r   zDReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>  s&      33a16333333r   z8Only enabled for numel_hint == 1, found original_ranges=merged_indexr  new_reduction_indexr   rl   c           	         | d t                             }| t                    d          } | t          |          t          |          z                       S r   )r   r   )r  r  original_idxr  r  r  r   s       r   r  zEReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fn	  so     ((>#o*>*>(>?L$S%9%9%;%;<I6i((51D+E+EEFF  r   )r  r  r  r  r   rl   )r   r  r  r   )r  r  r  r  r  r  r  r   s    ``    @r   '_multilayer_wrap_loader_existing_rangesz1Reduction._multilayer_wrap_loader_existing_ranges  s     33?33333 	
 	
III	
 	
3 ..%uZ'8'85AU;V;V'V
 
		 		 		 		 		 		 		 		 r   r  r}  list[Integer]c                D   |t           j        t           j        fvr|nt           j        }t                              |||||||	|          }|                                 |                                d	fd}t          j	        j
                            t          |                    }|                     |
||          }||dt          |                   k    sJ t                              t	          |||||t          |          d         |	||                    S )
a
        Break a large reduction up into multiple smaller reductions
        recursively
        r   rO  r  r   rl   c                "     g | |          S r   r   )r   r  intermediate_loaders     r   intermediate_fnz;Reduction.create_multilayer_helper.<locals>.intermediate_fn>  s!     '&'A'A'ABBBr   Nr|  )r   rO  r  rO  r   rl   )r%  float16bfloat16r  rd  r  r  r  rm   r  r  r  rh   r  r   r   )r  r  r~  rf  r  r  r  r  r  r9  r  rg  intermediate_dtypeintermediater'  r  r&  s                   @r   create_multilayer_helperz"Reduction.create_multilayer_helper  s\   0  ??? I 	
 !'' 	
 	
 	*6688	C 	C 	C 	C 	C 	C
 W%//o0N0NOO
99:~
 
 *-Cs?/C/C-C"DDDDD(&!+C,@,@,B,B!C-#-	 	 	
 
 	
r   c                    t          |          }t          ||dz
  z   |          }|                     ||          }|                     |||||||
          }|                     ||||||g |||g|||	          S )r$  r@   )rh   r=   r  r  r,  )r  r  r~  rf  r  r  re  r9  r  rg  r  r  r  r  r  s                  r   r  zReduction.create_multilayerV  s    & ((899o;UCC
##NI>>00
 

 ++feL
 
 	
r   c                z    |                      |||||          }|                     ||||||g ||||	d|
          S )r$  rI  )r!  r,  )r  r  r~  rf  r  r  r  r  r  r9  rg  r  s               r   r  z+Reduction.create_multilayer_existing_ranges  sm    $ @@% 
 

 ++%+o+
+ 
 
 	
r   r{  r  r  rr  rt  )
r  r0  r  r  r  r  ro  rp  r   r   r  r   rv  r  r   )r  r  r~  r  rf  r  r  r  r  rO  re  rO  r9  r  r  r!   r  r  r   r  )
r  r  re  rO  r9  r   rf  r  r   r  )r  r  r~  r  rf  r  r  r  r  r  re  r  r9  rP   rg  rT   r  r  r   r  r9  r   r  r  r   r  )r  ry   r  r   rg  rT   r   rT   )r  ry   r  r  r   r*  )r  r   re  rO  r  ry   r  ry   r  ry   r  r  r  r  r   r;  )r  r  r  r  r  r  r  r  r  r  r   r  )r  r  r~  r  rf  r  r  r  r  r  r  r  r  r}  r  r"  r9  rP   r  ry   rg  rT   r   r  )r  r  r~  r  rf  r  r  r  r  r  re  r  r9  rP   r  ry   rg  rT   r  r  r   r  )r  r  r~  r  rf  r  r  r  r  r  r  r  r  r"  r  r"  r9  rP   rg  rT   r   r  ) r   r   r   r   r  r  rX   rV  rZ  rX  rs  ru  r  r  r`  r  r  r  r  rT   r}  r  r  r  r  r  r  r!  r,  r  r  r  r  s   @r   rd  rd    s         ((((!!!!!!!!N N N N HK((
 
 
 
 
 
 )(

% % % %# # # #L L L L= = = =   

 
 
 
 

 
 
 
  (,` ` ` ` \`D ) ) ) \)V  )6(='+_
 _
 _
 _
 [_
B    \< D D D \D    \     [>  (,( ( ( ( [(T    [8 =
 =
 =
 [=
~  (,+
 +
 +
 +
 [+
Z $
 $
 $
 [$
 $
 $
 $
 $
r   rd  r  r  Optional[Sequence[int]]rf  r!   r  c                     d fd}|S )1A closure containing math to read a given elementr   r   r   r   c                     t          |           t                    k    sJ t          |           t                    k    sJ }t          |           D ]\  }}}|dk    r|||z  z   }|S r  )r   r   )r   resultr   stszrf  r  r  s        r   r  z_fixed_indexer.<locals>.indexer  s    !c%jjCKK&?&?&??5zzSYY&&&&ufd33 	+ 	+KCRQww#(*r   )r   r   r   r   r   )r  r  rf  r  s   ``` r   r  r    s4            Nr   INNER_FN_TYc                  4     e Zd ZU ded<   d fdZddZ xZS )MultiOutputReductionr   output_indexr  r  r~  r  	inner_fns)Union[INNER_FN_TY, Sequence[INNER_FN_TY]]r  r  re  r9  rP   rf  rg  rT   c
           
         t                    rft                    dk    r	d         }
nd
fd}
t                                          |||
|||||	           |	| _        d S )Nr@   r   r   r  reduction_idxr   tuple[OpsValue, ...]c                @     t           fdD                       S )Nc              3  0   K   | ]} |          V  d S r   r   )r   r   r   r?  s     r   r   z@MultiOutputReduction.__init__.<locals>.loader.<locals>.<genexpr>  s/      HHRR]33HHHHHHr   )r   )r   r?  r<  s   ``r   r  z-MultiOutputReduction.__init__.<locals>.loader  s-     HHHHHiHHHHHHr   r|  )r   r  r?  r  r   r@  )callabler   r  __init__r;  )r  r  r~  r<  r  re  r9  rf  rg  r;  r  r  s      `       r   rD  zMultiOutputReduction.__init__  s     I 	%"I y>>Qq\FFI I I I I I
 	-)) 	 		
 		
 		
 )r   r  r0  r  r  r  r  ro  rp  r   r   c           	     8   t          j        | j        | j        | j        |                     ||                    }t          |t          t          f          sJ t          |                      || j
                 }t          j        |pd ||          |          S r  )rk   rr  r  rf  r9  r  r   r   r   r   r;  rs  )r  r  r  r  ro  r   r)  s          r   rs  z$MultiOutputReduction.store_reduction  s     JNMM$//	
 
 &5$-00>>$v,,>>0t()";#;)WWT]]ERRRr   )r  r  r~  r  r<  r=  r  r  re  r  r9  rP   rf  r  rg  rT   r;  r   )
r  r0  r  r  r  r  ro  rp  r   r   )r   r   r   r   rD  rs  r  r  s   @r   r:  r:    sh         #) #) #) #) #) #)JS S S S S S S Sr   r:  c                  6    e Zd Zeej        dfdd            ZdS )OnlineSoftmaxReductionNr  r  r~  r  rf  r  r  r  r  re  
num_outputr   rg  rT   r  r  r   1Sequence[Union[TensorBox, ShapeAsConstantBuffer]]c
           	         t          fdt          |          D                       }
|
D ]}|                                 |
S )z>
        Create the reduction disregarding splitting.
        c              3  v   K   | ]3}t                               t          d |	  	                  V  4dS )r  N)r   r  r:  )	r   
output_idxr  r~  r  r  rg  re  rf  s	     r   r   z0OnlineSoftmaxReduction.create.<locals>.<genexpr>  sq       
 
  $$+"
 
 
 
 
 
 
 
r   )r   r   r  )r  r  r~  rf  r  r  re  rH  rg  r  resultsr(  s    `````` `   r   r  zOnlineSoftmaxReduction.create  s       
 
 
 
 
 
 
 
 
 
 $J//
 
 
 
 
   	 	AIIKKKKr   )r  r  r~  r  rf  r  r  r  r  r  re  r  rH  r   rg  rT   r  r  r   rI  )r   r   r   r  rT   r}  r  r   r   r   rG  rG    sB         )6(='+! ! ! ! [! ! !r   rG  c                  d    e Zd Zeej        fdd            Zedd            Zedd            Z	dS )WelfordReductionr  r  r  r  r<  Sequence[Callable[..., Any]]r  r"  re  r9  rP   rg  rT   r   rI  c           
        dv sJ t           j        j                            t	                              }dfd}	|dk    r& |	d          }
 |	d          } |	d          }|
||fS |dk    rPdfddk    r& d                    |	d           |	d          fS t          fdD                       S t                              d         |          \  }}t          j	        k    r||dk    r| 
                    |          S fdt          d          D             }|D ]}|                                 |S )N)r  rU  r  r   r   r  c                j     d fd}t                               |t                              S )Nr   r  r   rl   c                .    t          j                  S r   r  )r   r  r  s    r   r  z8WelfordReduction.create.<locals>.const.<locals>.inner_fn3  s    |  r   r$  r   r  r   rl   r  r  r   )r  r  r  r  r  s   ` r   constz&WelfordReduction.create.<locals>.const2  sX           ##!F||	 $   r   r   r@   r  r  c                j     d fd}t                               |t                              S )Nr   r  r   rl   c                4    d D             } | |          S )Nc                0    g | ]}t           j        j        S r   r  r  s     r   r   zKWelfordReduction.create.<locals>.copy.<locals>.inner_fn.<locals>.<listcomp>L  r  r   r   )r   r  r  re  s     r   r  z7WelfordReduction.create.<locals>.copy.<locals>.inner_fnK  s)    &N&N=M&N&N&NO!6#777r   r$  rT  rU  )r  r  r  r  r  re  s   ` r   copyz%WelfordReduction.create.<locals>.copyH  sX    8 8 8 8 8 8 8 !''!%<<	 (   r   r  c              3  .   K   | ]} |          V  d S r   r   )r   r   rZ  s     r   r   z*WelfordReduction.create.<locals>.<genexpr>Y  s+      ::"TT"XX::::::r   )r9  r  c                n    g | ]1}t                               t          |	  	                  2S r   )r   r  rO  )	r   rL  r  r  r<  r  rg  re  r9  s	     r   r   z+WelfordReduction.create.<locals>.<listcomp>  sc     
 
 
   $""
 
 
 
 
r   r   )r  r   r   r  )r  r  r   r  )rm   r  r  r  rh   r   rd  r  rT   r}  r  r   r  )r  r  r  r<  r  re  r9  rg  r  rV  meanm2weightr  r  rM  r(  rZ  s    ```````         @r   r  zWelfordReduction.create#  s9    !FFFFF'*33MBR4S4STT	 	 	 	 	 	 	 	 a588DqBU1XXFV##a         !111tIaL))5588UU1XX==::::	::::::&  **aL)+ + 	
 	
e ]222!N199(( 	 	 	
 
 
 
 
 
 
 
 
 
 $Ahh
 
 
   	 	AIIKKKKr   r   r  c                    dS )N)r   r   r   r   r  s     r   r  zWelfordReduction.default_value  s	     yr   r  ry   c	                6    t                    t          j        j                            t          j        z  d                     }	|	rP|dk    rJdfd
}
                     ||d         t          |
d          t          |
d          f|d|          S t          dz
  z             t                              |t           fd|D                       g |g||          }|D ]}|                                 ddt          j        j                            t          |                    }                     ||          }t                              |t          fd|D                       |gd|          S )r$  r   rU  r   r  r?  r)  r   r   rl   c                .    t          j        |          S r   r  )r   r?  r)  r  s      r   r  z4WelfordReduction.create_multilayer.<locals>.constant  s     |E5111r   r-  r@   )r  r  r<  r  re  r9  r  rg  c           	   3  N   K   | ]}                     |d           V   dS )r   )r  N)r  )r   r  r  r  r  re  r  s     r   r   z5WelfordReduction.create_multilayer.<locals>.<genexpr>  s`       
 
  ++$# ,  
 
 
 
 
 
r   r   r  r  r  c                      |g | |          S r   r   )r   r  r  s      r   intermediate_loader_fnzBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fn  s    
 64E4O4555r   c              3  ^   K   | ]'}t          |                                           V  (dS ))r  N)r   r  )r   r   re  s     r   r   z5WelfordReduction.create_multilayer.<locals>.<genexpr>  sM         .q}}GGG     r   )r   r  r?  r  r)  r   r   rl   )r   r  r  r  r  r  r   rl   )rh   rm   r  r  r  r   r  r  r   r=   rO  r  r   r  r  r  )r  r  r  r<  r  re  r9  r  rg  r  r  intermediatesr   r  r  re  r  s   ` `  ` `      @@@r   r  z"WelfordReduction.create_multilayer  sN     ((899(>>H_u,a00
 
 
	  	+<<<2 2 2 2 2 2
 ((aLHA...HA...
 !10- )    o;UCC
(// 
 
 
 
 
 
 
 
 (
 
 
 
 
 feL#
 
&  	 	AIIKKKK	6 	6 	6 	6 W%//f0E0EFF
99:~
 
  &&    &     G
 
 	
r   N)r  r  r  r  r<  rP  r  r"  re  r"  r9  rP   rg  rT   r   rI  r0  )r  r  r  r  r<  rP  r  r"  re  r"  r9  rP   r  ry   rg  rT   r   rI  )
r   r   r   r  rT   r}  r  r  r  r  r   r   r   rO  rO  "  s         )6(=v v v v [vp    \
 Z
 Z
 Z
 [Z
 Z
 Z
r   rO  c                  (    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<    ed           d=d> fd            Zd? fdZd@d!ZdAd"ZdBd#Z	dBd$Z
dBd%ZdCd&ZdDd(Zd=d>d)Zeej        fd*d+dEd3            ZedFd<            Z xZS )GScanr"  scan_rangesr  =Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]]r  zFCallable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]]r   rT   rg  r   r;  tuple[torch.dtype, ...]dtypestuple[Callable[..., Any], ...]r<  Fr  r   r   r  c                    t                                                     t                      j        fd| j        D              z   t                      j        fd| j        D              z  S )Nc              3  8   K   | ]}t          |          V  d S r   r(   r  s     r   r   z,Scan.get_free_symbol_uses.<locals>.<genexpr>	  .      OO"1m44OOOOOOr   c              3  8   K   | ]}t          |          V  d S r   r(   r  s     r   r   z,Scan.get_free_symbol_uses.<locals>.<genexpr>	  .      HH"1m44HHHHHHr   )r  rV  r;   r  rj  r  rk  s    `r   rV  zScan.get_free_symbol_uses	  s     GG((77 jll OOOOd>NOOO !jll HHHHdiHHH		
r   r   c                    t          | j                  t          | j                  z   t          | j                  k    sJ t	                                                       d S r   )r   r  rj  r  r  r  r  s    r   r  zScan.__post_init__	  R    4;#d&6"7"773ty>>IIIIr   r  r0  r  %Callable[[Sequence[_IntLike]], Never]r  r  	scan_varsrp  r   c                   |                      ||          t          fd| j        D                       }t          j        | j        | j        |          }t          j        |pd |          || j                           S )Nc              3  .   K   | ]} |          V  d S r   r   r   r  r   s     r   r   z'Scan.store_reduction.<locals>.<genexpr>)	  +      DDxx}}DDDDDDr   r  )	r   r   r<  rk   r  rm  r  r  r;  )r  r  r  r  rw  r   r5  r   s          @r   rs  zScan.store_reduction!	  s     ll4++DDDDT^DDDDD$+t??y$9ggcllF4;L4M
 
 	
r   c                    dS )Ncustomr   r  s    r   rX  zScan.get_reduction_type/	  s    xr   c                    | j         S r   )rj  r  s    r   rZ  zScan.get_reduction_size3	  r  r   c                    | j         S r   r  r  s    r   r  zScan.get_size6	  
    yr   c                    | j         S r   r  r  s    r   r  zScan.get_pointwise_size9	  r  r   c                T    t          | j                  t          | j                  z   S r   )r   r  rj  r  s    r   ru  zScan.index_length<	  "    4;#d&6"7"777r   r  c                    |                      | j                  }|                      | j        t          j                  }|                     ||          }|fS r   )r  r  rj  r?   rx  r   r  r   ry  r   s       r   r  zScan.inner_fn_args?	  G    DK((T-t}==ll5&))vr   c                    |                      | j                  }|                      | j        t          j                  }|                     ||          }t          | j        ||          S r  )r  r  rj  r?   rx  r   rI   r  r  r  r   ry  r   s        r   r  zScan.inner_fn_free_symbolsE	  W    DK((T-t}==ll5&))#DM3mTTTTr   T)can_fallback_to_atenr  r  +tuple[Callable[[Sequence[Expr]], Any], ...]axisr  r   ;Sequence[Optional[Union[TensorBox, ShapeAsConstantBuffer]]]c                 	 g d          dz   d                   gt           j                            t          j                  sd gt                    z  S t                    dk    r=t           j                            t          j                  sd gt                    z  S t           j        j        }
|
                    t                              }t                    t                    k    sJ |

                    t          j        |d                    r+fdt          t                              D             S |                     d         d         |          \  }t          |dk    rYt           j        j        d u pt&          o
t(          dk    ot                    dk    }|s|rd gt                    z  S d}nt*          dfd	fdt          t                              D             }|D ]}|                                 |S )Nr@   c                d    g | ],}t                               |         |                    -S r$  r  r  r   r;  r  rm  r<  r  s     r   r   zScan.create.<locals>.<listcomp>l	  V        !   ! .&|4	 !    r   r   )r  r  r  r  pointwise_rangesrj  r  
scan_numelz3.3.0r   r  
scan_indexr   r}  c                    t          |          t                    k    sJ t          |           t                    k    sJ g | d          || d          S r   r   )r   r  r  r  rj  s     r   r   zScan.create.<locals>.reindex	  g    z??c+&6&66666u::%5!6!66666>U5D5\>J>tuu>>r   c                    g | ];}t                                d|         |         
	|d           <S ))r  r  rm  r  r<  r  r  rj  r  r   rg  r;  r   )r   r  )r   r;  r  r  rm  r<  r   r  rg  r   rj  	scan_typer  s     r   r   zScan.create.<locals>.<listcomp>	  s     
 
 
$ # 	 ! .!&|4'+ +)##1!-    
 
 
r   )r   r  r  r  r   r}  )rm   r  r  rC   SCANr   TUPLE_REDUCTIONr  r  rh   r  r   Ler   r  ri  r%  versionhip
has_tritontriton_version	SplitScanr  )r  r  rm  r<  r  r  r  rg  r  r   r  r  r  supports_splitrM  r5  r  r   rj  r  s    ``````` `      @@@@r   r  zScan.createK	  s    =T%4%[<4q

+;<Dzlw""6>+>?? 	(6CKK''v;;??17#6#6N2$
 $
? 6CKK''7#&&}['A'ABB
6{{c)nn,,,, ))%(:q*A*ABB 		       %*#f++$6$6    &)^^)q\-#!! &4 	&
 	&
"
 	>>!T)Wj.V^w=V%v;;!#  " &' # 6CKK//!"JJ%		? 	? 	? 	? 	? 	? 	? 	?

 
 
 
 
 
 
 
 
 
 
 
 
 
$ !&c&kk 2 2%
 
 
*  	 	FNNr   r  r  r  r  r  r  r!   r  c	           
     X    d	fd}	t                               ||||	||d|          S )
Nr   r  r?  r   rl   c                F     g | d          || d                    S r   r   )r   r?  r  r  s     r   r  z#Scan.num_splits.<locals>.wrapper_fn	  s3    8Fc%4%jF=F3tuu:FGGGr   r  )r  r~  rf  r  r  re  r9  r  )r   r  r?  r  r   rl   )rd  r  )
r  r  r  r  r  r  rj  r  r  r  s
      ``     r   r  zScan.num_splits	  se    	H 	H 	H 	H 	H 	H 	H ###(!& $ 	
 	
 		
r   r  r  ri  )
r  r0  r  rv  r  r  rw  rp  r   r   rt  rr  r  r   )r  r  rm  rl  r<  r  r  r"  r  r   r  rk  rg  rT   r  r   r   r   r   r  )r  r  r  r  r  r  r  r   r  r"  rj  r"  r  rk  r  r!   r   r  )r   r   r   r   rX   rV  r  rs  rX  rZ  r  r  ru  r  r  r  rT   r}  r  r  r  r  s   @r   ri  ri   	  s        MMMMSSSS!!!!####---- F##
 
 
 
 
 
 $#
           
 
 
 
                8 8 8 8   U U U U U  )6(=_ &*_ _ _ _ _ [_B 
 
 
 [
 
 
 
 
r   ri  c                      e Zd ZdS )r  N)r   r   r   r   r   r   r  r  	  s        Dr   r  c                      e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<    ed           d0d1 fd            Zd2 fdZd3d Zd4d!Zd5d"Z	d5d#Z
d5d$Zd6d%Zd7d'Zd0d1d(Zeej        fd8d/            Z xZS )9Sortr"  sort_rangesr  z:Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]]r   rT   rg  r   r;  rl  rm  rn  r<  r   stable
descendingFr  r   r  c                    t                                                     t                      j        fd| j        D              z   t                      j        fd| j        D              z  S )Nc              3  8   K   | ]}t          |          V  d S r   r(   r  s     r   r   z,Sort.get_free_symbol_uses.<locals>.<genexpr>	  rq  r   c              3  8   K   | ]}t          |          V  d S r   r(   r  s     r   r   z,Sort.get_free_symbol_uses.<locals>.<genexpr>	  rs  r   )r  rV  r;   r  r  r  rk  s    `r   rV  zSort.get_free_symbol_uses	  s     GG((77 jll OOOOd>NOOO !jll HHHHdiHHH		
r   r   c                    t          | j                  t          | j                  z   t          | j                  k    sJ t	                                                       d S r   )r   r  r  r  r  r  r  s    r   r  zSort.__post_init__	  ru  r   r  r0  r  r  r  r  ro  r   c                   |                      ||          t          fd| j        D                       }t          j        | j        || j        | j                  }t          j        |pd |          || j	                           S )Nc              3  .   K   | ]} |          V  d S r   r   rz  s     r   r   z'Sort.store_reduction.<locals>.<genexpr>	  r{  r   r  )
r   r   r<  rk   sortrm  r  r  r  r;  )r  r  r  r  ro  r   r5  r   s          @r   rs  zSort.store_reduction	  s     ll400DDDDT^DDDDD$+vt{DOLLy$9ggcllF4;L4M
 
 	
r   c                    dS )Nr  r   r  s    r   rX  zSort.get_reduction_type	  s    vr   c                    | j         S r   )r  r  s    r   rZ  zSort.get_reduction_size
  r  r   c                    | j         S r   r  r  s    r   r  zSort.get_size
  r  r   c                    | j         S r   r  r  s    r   r  zSort.get_pointwise_size
  r  r   c                T    t          | j                  t          | j                  z   S r   )r   r  r  r  s    r   ru  zSort.index_length

  r  r   rv  c                    |                      | j                  }|                      | j        t          j                  }|                     ||          }|fS r   )r  r  r  r?   rx  r   r  s       r   r  zSort.inner_fn_args
  r  r   c                    |                      | j                  }|                      | j        t          j                  }|                     ||          }t          | j        ||          S r  )r  r  r  r?   rx  r   rI   r  r  s        r   r  zSort.inner_fn_free_symbols
  r  r   r  r  'tuple[Callable[[list[Expr]], Any], ...]r  r   r  c	                r  	 g d          dz   d                   gt           j                            t          j                  sd gt                    z  S t           j        j        }
|
                    t                              }d}t          j
        j        o'|
                    t          j        ||                    }|sd gt                    z  S t                    t                    k    sJ |
                    t          j        |d                    r+fdt          t                              D             S dfd		fd
t          t                              D             }|D ]}|                                 |S )Nr@   r   c                d    g | ],}t                               |         |                    -S r  r  r  s     r   r   zSort.create.<locals>.<listcomp>>
  r  r   r   r  
sort_indexr   r}  c                    t          |          t                    k    sJ t          |           t                    k    sJ g | d          || d          S r   r  )r   r  r  r  r  s     r   r   zSort.create.<locals>.reindexH
  r  r   c                    g | ]@}t                               t          d|         |         
	|d           AS ))r  r  rm  r  r<  r  r  r  r   rg  r;  r  r  r   )r   r  r  )r   r;  r  r  rm  r<  r   r  rg  r   r  r  r  s     r   r   zSort.create.<locals>.<listcomp>M
  s     
 
 
& %  ! .!&|4'+ +##1!-!)    
 
 
r   )r   r  r  r  r   r}  )rm   r  r  rC   SORTr   r  r  rh   rA   r;  persistent_reductionsr  r   r  r   r  )r  r  rm  r<  r  r  r  r  rg  r   r  
sort_numel
max_rblockis_persistent_kernelrM  r5  r  r   r  s    `````````      @@@r   r  zSort.create
  s/    =T%4%[<4q

+;<Dzlw""6>+>?? 	(6CKK''7#&&}['A'ABB
 
M/ Q..ux
J/O/OPP 	 $ 	(6CKK''6{{c)nn,,,, ))%(:q*A*ABB 		       %*#f++$6$6   	? 	? 	? 	? 	? 	? 	? 	?

 
 
 
 
 
 
 
 
 
 
 
 
 
& !&c&kk 2 2'
 
 
,  	 	FNNr   r  r  ri  )
r  r0  r  r  r  r  ro  r  r   r   rt  rr  r  r/  )r  r  rm  rl  r<  r  r  r"  r  r   r  r   r  r   rg  rT   r   r   r   r  )r   r   r   r   rX   rV  r  rs  rX  rZ  r  r  ru  r  r  r  rT   r}  r  r  r  s   @r   r  r  	  s         GGGG!!!!####----LLL F##	
 	
 	
 	
 	
 	
 $#	
           
 
 
 
                8 8 8 8   U U U U U  )6(=L L L L [L L L L Lr   r  c                L    	 t          | d           dS # t          $ r Y dS w xY w)NFfreezeT)rd  r  r   s    r   r  r  i
  sA    a....t   uus    
##c                    	 t          | d          \  }}|                                r|                                 |                                S # t          $ r Y dS w xY wNFr  )rd  should_pad_stridespad_stridesis_contiguousr  )r   _bufferrc  s      r    is_contiguous_storage_and_layoutr  q
  sy    /%@@@ $$&& 	!   ##%%%   uus   AA 
A A r  want_contiguousstride_order'Optional[Sequence[Union[int, Integer]]]r9  rD  tuple[StorageBox, Layout]c                   t          | t                    rt          | j        |||||          S t          | t                    r8t          | j        |||||          \  }}| | j                                        fS t          | t                    r|r|r=|                                  |                                                                 sJ nH|| 	                    ||           n.|| 
                    ||           n|                                  t	          |           |                                 fS t          | t                    r"t          | j        |          \  }}|| j        fS t          )z
    Try to simplify x into a StorageBox and a Layout.

    allow_padding only affect how we apply stride_order. When allow_padding
    is True, we have the freedom to add padding when applying the stride_order.
    r  r  r  r9  rD  Nr9  r  )r   r   rd  rb  
StorageBoxr  Bufferr8  r  r<  rG  r  rh  rc  r  )	r   r  r  r  r9  rD  r  rc  buffers	            r   rd  rd  }
  s    !Y 
$F+%''
 
 
 	
 !Z   	&)F+%''
 
 
	6 !&##%%%%!V - 	" "!!!||~~33555555)11  2     *22! 3     !!!!}}allnn,,!_%%   *F
 
 
	 qx
r   c                x    	 t          | d          \  }}|                    |          S # t          $ r Y dS w xY wr  )rd  is_stride_orderedr  )r   r  r  rc  s       r   "is_stride_order_storage_and_layoutr  
  sS    /%@@@''555   uus   (+ 
99r   c                   t          | t          t          f          rt          | j                  S t          | t
                    rb| j        }t          j        j	        
                    |j        t          |j                  z  t                     }t          | j                  p|S t          | t                    r%|                                 t          j        j        v S dS r  )r   r   r  is_unalignedrb  rh  rc  rm   r  r  statically_known_multiple_ofrf  r_   r  ra   r  r#  unaligned_buffers)r   rc  has_unaligned_layouts      r   r  r  
  s    $J/00 'DI&&&$(( ?#$7#3#P#PMN6<888/$
 $
  
 DI&&>*>>$ <}}!'";;; 5r   c                      e Zd ZU ded<    ed           d0d1d            Zd2d
Zd3dZd4dZe	d5d            Z
d6dZd7dZd8dZd9dZd:dZd;dZd<dZd=d!Zd>d"Zd?d$Zd<d%Zd<d&Zd@d(ZdAd*ZdBd+ZdCd.Zd/S )DBaseViewr   rb  Fr  r   r   r  c                6    | j                             |          S r   rb  rV  rU  s     r   rV  zBaseView.get_free_symbol_uses
  s    y--m<<<r   *Callable[[Sequence[Expr]], Sequence[Expr]]c                &    t          d|            )Nzmake_reindexer NYI on r  r  s    r   make_reindexerzBaseView.make_reindexer
  s    !"A4"A"ABBBr   r  c                r    | j                                         |                                 dfd}|S )Nr   r  r   r!   c                ,      |                     S r   r   r   innerr   s    r   r  z&BaseView.make_indexer.<locals>.indexer
      5&&&r   )r   r  r   r!   )rb  r  r  )r  r  r  r   s     @@r   r  zBaseView.make_indexer
  sR    	&&((%%''	' 	' 	' 	' 	' 	' 	' r   r  c                r    | j                                         |                                 dfd}|S )Nr   r  r   rl   c                ,      |                     S r   r   r  s    r   r  z$BaseView.make_loader.<locals>.loader
  r  r   rT  )rb  r  r  )r  r  r  r   s     @@r   r  zBaseView.make_loader
  sR    	%%''%%''	' 	' 	' 	' 	' 	' 	' r   r  c                4    | j                                         S r   )rb  r"  r  s    r   r  zBaseView.dtype
  s    y""$$$r   r  c                4    | j                                         S r   rb  r  r  s    r   r  zBaseView.get_layout
      y##%%%r   r  c                4    | j                                         S r   rb  r#  r  s    r   r#  zBaseView.get_device
  r  r   r  c                    d S r   r   r  s    r   r  zBaseView.get_origin_node
  r  r   r   c                4    | j                                         S r   rb  r#  r  s    r   r#  zBaseView.get_name
      y!!###r   r  c                *    |                                  S r   r  r  s    r   r  zBaseView.get_pointwise_size      }}r   r.  r   r   c                6    | j                             |          S r   rb  r1  r0  s     r   r1  zBaseView.mark_reuse      y##E***r   c                4    | j                                         S r   rb  r  r  s    r   r  zBaseView.has_exceeded_max_reads      y//111r   r0  c                4    | j                                         S r   rb  r  r  s    r   r  zBaseView.realize      y  """r   c                8    | j                                          d S r   rb  r4  r  s    r   r4  zBaseView.realize_hint  s    	     r   ry   c                4    | j                                         S r   rb  rS  r  s    r   rS  zBaseView.get_storage_numel      y**,,,r   c                4    | j                                         S r   rb  r\  r  s    r   r\  zBaseView.is_extern      y""$$$r   c                    t          | j        t                    sJ t          | j                              | j                                        S r   )r   rb  r  r   is_module_bufferr  s    r   r  zBaseView.is_module_buffer  s>    $)X..??TY??.y))+++r   r  c                4    | j                                         S r   rb  r  r  s    r   r  zBaseView.get_read_names      y'')))r   rK  c                    t          j        t          dd          5  t          |                                 |                                           j        cd d d            S # 1 swxY w Y   d S r  )r    r   r   rK   r  r  rN  r  s    r   r  zBaseView.get_reads  s    \.*:DAA 	 	&  ""  		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   9A""A&)A&c                l    | }t          |t                    r|j        }t          |t                    |S r   )r   r  rb  )r  r   s     r   r6  zBaseView.unwrap_view%  s9    H%% 	A H%% 	r   r  r  c                    |                                  } t          j        t          d|          |          }t	          ||                                 ||                                           S r!  )r  r    r   r%  r  r"  r  r&  s      r   r`  zBaseView.constant_to_device+  sf    !!##Hn.?HHPP..""==??	
 
 
 	
r   Nr  r  r   r  ry  rx  rn  ro  rv  rk  r{  rr  r}  rq  rt  ri  r  rj  r  r~  r  )r   r   r   r   rX   rV  r  r  r  r  r  r  r#  r  r#  r  r1  r  r  r4  rS  r\  r  r  r  r6  r`  r   r   r   r  r  
  s        LLLJ''= = = = ('=C C C C       % % % X%& & & && & & &   $ $ $ $   + + + +2 2 2 2# # # #! ! ! !- - - -% % % %, , , ,* * * *      	
 	
 	
 	
 	
 	
r   r  c                  Z    e Zd ZU ded<   edd            Zedd
            ZddZddZ	dS )r   r  r  r   r   new_sizerO  r   c                b   t           j        j        }d |D             }|                                 }dgt	          |          t	          |          z
  z  t          |          z   }t	          |          t	          |          k    sJ t          t	          |                    D ]}||         dk    r||         J ||         ||<   $||         *t           j        j                            ||                   rW|                    ||         ||         z
  d          dk    s
J d            |S )zReplace `-1` with correct sizesc                6    g | ]}t          j        |          S r   r  r   s     r   r   z.ExpandView._normalize_size.<locals>.<listcomp>?  s     666ELOO666r   NrI  r   fallbackzKBroadcast failed in ExpandView({x.get_size()}, {new_size}) on dimension {i})	rm   r  r  r  r   r   r   is_size_one_or_falser  )r   r  r  old_sizer   s        r   _normalize_sizezExpandView._normalize_size;  s8    7#66X666::<<6S]]S]]:;d8nnL8}}H----s8}}%% 	 	A{b  {...&qk!$(8(M(M) )$   ))(1+*Ca)PPTUUUUa VUUU r   r  c                v   |                      ||          }t          |          rt          |          \  }}t          |          t          |j                  z
  }|dk    sJ t
          j        j        g|z  }t          |j	        |j                  D ]O\  }}|
                    t          j        j                            |          s|nt
          j        j                   Pt          |j        |j        t%          |          ||j        |j                  }	t+          ||	          S t-          ||          S )Nr   ra  )rb  r  )r  r  rd  r   r  r   r  r  r   r  r  rm   r  r  r  re  r  r  r   rf  rg  rh  r   )
r  r   r  ri  rj  skiprk  r  r  rl  s
             r   r  zExpandView.createV  s3   &&q(33 ## 	D"7":":GZx==3z#7#77D19999',$.J #J$5z G G  !!7+@@FF&FF   
 %! X!$ J #
CCCCqx0000r   c                    | j         S r   r  r  s    r   r  zExpandView.get_sizeq  r  r   r  c                    |                                  }| j                                         t          |          t                    z
  dfd}|S )Nr   r  r   c                    t          | d                    } t          |           t                    k    sJ t          t                              D ]"}|         dk    rt          j        j        | |<   #| S r  )r   r   r   r   r  r  )r   r   actualr!  s     r   r   z*ExpandView.make_reindexer.<locals>.reindex{  st     tuu&&Eu::V,,,,3v;;'' , ,!9>>$w|E!HLr   r   r  r   r  )r  rb  r   )r  targetr   r%  r!  s      @@r   r  zExpandView.make_reindexert  sf     ##%%6{{S[[(		 		 		 		 		 		 		 r   N)r   r   r  rO  r   rO  )r   r   r  rO  r   r  rr  r  )
r   r   r   r   r  r  r  r  r  r  r   r   r   r   r   7  s            \4 1 1 1 [14        r   r   c                  Z    e Zd ZU ded<   edd            Zedd
            ZddZddZdS )PermuteViewr}  dimsr   r   r   r   r  c                   |                      |          }t          |          t          t          t          |                              k    sJ t	          |          rct          |          \  }t          j        j        fd|D             fd|D             j	        j
                  }t          ||          S t          ||          S )Nc                *    g | ]}j         |         S r   r  r   r   rj  s     r   r   z&PermuteView.create.<locals>.<listcomp>  s     222#222r   c                *    g | ]}j         |         S r   r  r-  s     r   r   z&PermuteView.create.<locals>.<listcomp>  s!    444!"1%444r   ra  )rb  r*  )_map_neg_dimsr;   r   r   r  rd  re  r  r  rf  rg  rh  r)  )r  r   r*  ri  rl  rj  s        @r   r  zPermuteView.create  s      &&$:eCII.>.>#?#????? ## 
	D"7":":GZ$! 2222T2224444t444!$ J #
CCCC----r   	list[int]c                     fdD             S )Nc                D    g | ]}|d k    r|nt                    |z   S r  r  )r   rV  r*  s     r   r   z-PermuteView._map_neg_dims.<locals>.<listcomp>  s/    EEEsaxxSYY_EEEr   r   )r  r*  s    `r   r0  zPermuteView._map_neg_dims  s    EEEEEEEEr   r  c                   t          |                     | j                            t          t          t	          | j                                      k    sJ | j                                        fd| j        D             S )Nc                     g | ]
}|         S r   r   )r   r   r  s     r   r   z(PermuteView.get_size.<locals>.<listcomp>  s    +++AQ+++r   )r;   r0  r*  r   r   rb  r  )r  r  s    @r   r  zPermuteView.get_size  s    $,,TY7788J#di..!!=
 =
 
 
 
 
 y!!##++++++++r   r  c                ,   d t          | j                  D             fdt          t          | j                            D             t	                    t	          t          t          | j                                      k    sJ dfd}|S )Nc                    i | ]\  }}||	S r   r   )r   r   r  s      r   r   z.PermuteView.make_reindexer.<locals>.<dictcomp>  s    5551q!555r   c                     g | ]
}|         S r   r   )r   r   invs     r   r   z.PermuteView.make_reindexer.<locals>.<listcomp>  s    555!s1v555r   r   r  r   c                "      fdD             S )Nc                     g | ]
}|         S r   r   )r   r   r   s     r   r   z?PermuteView.make_reindexer.<locals>.reindex.<locals>.<listcomp>  s    ***E!H***r   r   )r   r9  s   `r   r   z+PermuteView.make_reindexer.<locals>.reindex  s      +***c****r   r&  )r   r*  r   r   r;   )r  r   r9  s     @r   r  zPermuteView.make_reindexer  s     65	$) 4 45555555uS^^44555#*U3ty>>-B-B"C"CCCCC	+ 	+ 	+ 	+ 	+ 	+
 r   N)r   r   r*  r   r   r  )r*  r   r   r1  rr  r  )	r   r   r   r   r  r  r0  r  r  r   r   r   r)  r)    s         . . . [.$ F F F [F, , , ,     r   r)  c                  L    e Zd Zedddd            Zedd            ZddZdS )SqueezeViewN)rV  r   r   rV  r*  r   c                  t          |          rAt          |          \  }}g }g }Lt          t                    sJ t	                                dk    rt          |j                  k     sJ t          t          |j        |j	                            D ]|\  }\  }}	1|dk    r*|
                    |           |
                    |	           ;|k    r+|
                    |           |
                    |	           l|dk    s
J d            }t          |j        |j        |||j        |j                  }
t!          ||
          S 7t"                              |d |                                D                       S |                                         dk    sJ t"                              |fdt          |                                          D                       S )Nr   r@   zexpected squeezed size to be 1ra  c                    g | ]
}|d k    |S r  r   r   s     r   r   z&SqueezeView.create.<locals>.<listcomp>  s    "E"E"Ea1ff1fffr   c                &    g | ]\  }}|k    |S r   r   )r   r   r   rV  s      r   r   z&SqueezeView.create.<locals>.<listcomp>  s#    "U"U"UAAQTHH1HHHr   )r  rd  r   r   r   r   r  r   r   r  r  re  r  r  rf  rg  rh  r  r  r  )r  r   rV  ri  rj  r  rk  r   r  r  rl  s     `        r   r  zSqueezeView.create  s    ## 	D"7":":GZHJ!#s++66T#YY66+CxxC#jo*>*>$>$>$>>%.s:?JDU/V/V%W%W 
K 
K!>D&;qyy ---"))&111Cxx ---"))&1111#qyyy*Jyyyy$! !$ J #
CCCC;;;q"E"Eajjll"E"E"EFFF::<<$))));;q"U"U"U"U1::<<1H1H"U"U"UVVVr   r  r  9tuple[list[int], Callable[[Sequence[Expr]], tuple[Expr]]]c                    d | D             }d t          |           D             t          |           dfd}||fS )	Nc                    g | ]
}|d k    |S r  r   r   s     r   r   z(SqueezeView.squeezer.<locals>.<listcomp>  s    ...!qAvvAvvvr   c                $    g | ]\  }}|d k    |S r  r   )r   r   r   s      r   r   z(SqueezeView.squeezer.<locals>.<listcomp>  s!    ;;;AAFF1FFFr   r   r  r   tuple[Expr]c                    t          |           t                    k    sJ |  d             t          j        j        gz  }t	          |           D ]
\  }}|||<   t          |          S )N )r   r   r  r  r   r   )r   r  r   r   lengthnot_ones       r   r   z%SqueezeView.squeezer.<locals>.reindex  s|    u::W---%/C/C'/C/C---/Igu-- # #Q!"	####r   )r   r  r   rE  )r   r   )r  r  r   rH  rI  s      @@r   squeezerzSqueezeView.squeezer  sm     /.t...;;4;;;T	$ 	$ 	$ 	$ 	$ 	$ 	$   r   rb  r   r   c                     t          d          )Nzuse SqueezeView.create())AssertionError)r  rb  s     r   rD  zSqueezeView.__init__  s    7888r   )r   r   rV  r*  r   r   )r  r  r   rA  )rb  r   r   r   )r   r   r   r  r  r  rJ  rD  r   r   r   r=  r=    sz        7; $W $W $W $W $W [$WL ! ! ! \! 9 9 9 9 9 9r   r=  c                  `    e Zd ZU ded<   ded<   ddZddZdd	ZeZedd            Z	ddZ
dS )GenericViewr  r  r  r   r   c                    | j         S r   )r   r  s    r   r  zGenericView.make_reindexer  s     |r   r   c                    d t          t          | j                            D             }t          |                     |                    }dd                    t          t          |                     d| S )Nc                B    g | ]}t          t          j        |          S r   )rg   r?   r  r   r  s     r   r   z+GenericView.reindex_str.<locals>.<listcomp>  s2     
 
 
>?*4:q99
 
 
r   zlambda , r2  )r   r   r  r   r   r  r  r   )r  	index_old	index_news      r   reindex_strzGenericView.reindex_str  sw    
 
CHTYCXCX
 
 
	 i0011	F3sI#6#677FF9FFFr   c                v    |                      | j        d| j         d|                                  g          S )Nsize=zreindex=)r  rb  r  rV  r  s    r   r  zGenericView.__str__	  sD    Y+	++-L8H8H8J8J-L-LM
 
 	
r   r   r   r  r  c                8     | |t          |          |          S )Nrb  r  r   )r   )r  r   r  r   s       r   r  zGenericView.create  s!     sX@@@@r   c                    | j         S r   r  r  s    r   r  zGenericView.get_size  r  r   Nr  r{  )r   r   r  r  r   r  r   r  rr  )r   r   r   r   r  rV  r  r  r  r  r  r   r   r   rN  rN    s         7777   
G G G G
 
 
 

 HA A A [A     r   rN  c                      e Zd Zedd            Zedd
            Zedd            Ze	 ddd            Ze	 ddd            Z	dS )r  r   r!   r  r   c                    t          j        |           } t          j        |          }t          j        j        j        j        } |t          j        | d                    r| |z   } | S r   )r   r  rm   r  r  r   evaluate_exprLt)r   r  r^  s      r   handle_negative_indexzView.handle_negative_index  s\    l3|D!!(2@=#q))** 	*C
r   r   r   r  r  c                v  	 t          |t                    sJ t          |                      |                     |                                |          \  	}t
          j        j                            	|          r|S d}t          t          	                    dk    s t          t          |                    dk    rd}d|v r!d	fd} | |t          |          |	          S t          |          s|r|r)t          |          st                              |          }t          |d
          \  }}t!          |j        |j        |t&                              |          |j        |j                  }t/          ||          S |                     	|          } | |t          |          |	          S )NFr   Tr   r   r   tuple[int, ...]c                D    t          dgt                    z            S r   )r   r   )r   r  s    r   fake_reindexz!View.create.<locals>.fake_reindex:  s    aS3x==0111r   rZ  )r  ra  )r   r   r   rb  )r   r   r   resolve_negative_sizer  rm   r  r  statically_known_list_equalsr   r3   r   r  r  require_contiguousrd  re  r  r  r   r!  rf  rg  rh  r  )
r  r   r  unbacked_symbols_in_sizesrd  ri  rj  rl  r   r  s
            @r   r  zView.create(  s   (H--==tH~~==- 66qzz||XNN( 7888LL 	H$)!%h//00144(2233a77(,%==2 2 2 2 2 2 3ADNNLIIII-a00 	D4M 	D( 72RST2U2U 7 !33A66"74"P"P"PGZ$! 11(;;!$ J #
CCCC--hAAsX@@@@r   r  tuple[list[Expr], list[Expr]]c                   d |D             }d | D             } t          |          }t          t          |                    D ]Q}||         dk    rCt          j        j        ||<   t          t          |           t          |                    ||<    nRt          j	        j
                            t          |           t          |                     | |fS )Nc                V    g | ]&}t           j        j                            |          'S r   rm   r  r  r  r  s     r   r   z.View.resolve_negative_size.<locals>.<listcomp>Z  +    CCCQAG$--a00CCCr   c                V    g | ]&}t           j        j                            |          'S r   rl  r  s     r   r   z.View.resolve_negative_size.<locals>.<listcomp>[  rm  r   rI  )r   r   r   r   r  Oner<   rh   rm   r  r  check_equals)r  r  r   s      r   re  zView.resolve_negative_sizeV  s     DC(CCCCC(CCC>>s8}}%% 	 	A{b  #gk&}X'>'>h@W@WXX !
 	
%%mH&=&=}X?V?VWWW!!r   NrO  	dense_dimr*  r   c                    	 |                      |||          }nc# t          t          f$ rO t          |          g}|                      ||          }|                      ||          }t	          ||          }Y nw xY w|S r   )_dynamic_reshape_indexerrL  
IndexErrorrh   r   )r  r  r  rq  r   flatr   r   s           r   r  zView.dynamic_reshape_indexerg  s    	:228XyQQGG
+ 	: 	: 	:!(++,D33HdCCH33D(CCH%h99GGG	: s    AA:9A:r  c                	   t           j        j        j        }d t	          t          |                    D             t          t          |                    }t          |           }|duo(|t          |          dz
  k    ot          |          dk    }|r.|J |                    |          }|	                    |           g |r|r~|                                }|                                \  }	}
|dk    r=	                    t          j        j                   |	                    |	|
f           n|
dk    r|	                    |           n ||
           ||          k    r<	                    |	           t           j        j                            |
|           n ||
           ||          k     r ||
           ||          k     r<|                                \  }}||
z  |	z   }	|
|z  }
 ||
           ||          k     <	                    |	           t           j        j                            |
|           n ||
           ||          k    rt          j        j        }|}	                    t          |	||                     ||z  } ||
           ||          k    rZ|                                }	                    t          |	||                     ||z  }||z  } ||
           ||          k    Zt           j        j                            |
|           nt           |r|~|r_|                                }t           j        j                            |d           	                    t          j        j                   |_|r>|                                \  }	}
t           j        j                            |
d           |>|Rt          |          dk    r?                                                                 }                    ||           n                                 t                    t          |           k    sJ dfd}|S )	zG
        Perform a reshape entirely by modifying indexing math
        c                B    g | ]}t          t          j        |          S r   )rg   r?   VIEWr  s     r   r   z1View._dynamic_reshape_indexer.<locals>.<listcomp>  s2     
 
 
=>*49a88
 
 
r   Nr@   r   r  r   c                   t          |           t                    k    s&J t          |           t                    f            t          t          |                     t          fdD                       S )Nc              3  8   K   | ]}t          |          V  d S r   )ri   )r   r   replacementss     r   r   zAView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>  s-      HHA|44HHHHHHr   )r   r   r   r   )r   r{  r  	view_exprs    @r   r   z.View._dynamic_reshape_indexer.<locals>.reindex  sv     u::T***SZZT,C***D% 0 011LHHHHiHHHHHHr   r&  )rm   r  r  r  r   r   r   r   r  r  r   r  r  rp  ro  r>   rL  reverseinsert)r  r  rq  r  	stack_new	stack_oldreordering_dense_dimold_dimsize_oldvarsize_newvar2	size_new2divisormodulus
dense_exprr   r  r|  s                    @@r   rs  zView._dynamic_reshape_indexerx  s    G$.	
 
BGHBVBV
 
 
 T8,,--	NN	 T! #S^^a//#H" 	
   	&(((mmI..GW%%%	 	%I 	% }}H%MMOOMC1}}  ...  #x1111Q  ****8$$		((;(;;;  %%% --hAAAA8$$yy':':::i))IIh,?,???&/mmooOD)/C/C')3H  i))IIh,?,???   %%% --hAAAA8$$yy':':::'+"  gw!G!GHHH!G+i))IIh,?,???'mmooG$$_S'7%K%KLLL%/G''1H	  i))IIh,?,???
  --hAAAA$$=  	%I 	%@  	+ }}HG))(A666UW\***  	+
  	7%MMOOMCG))(A666  	7  S]]a%7%7"JY
33339~~X....	I 	I 	I 	I 	I 	I 	I r   )r   r!   r  r!   r   r!   )r   r   r  r  r   r   )r  r  r  r  r   ri  r   )r  rO  r  rO  rq  r*  r   r   )r  r  r  r  rq  r*  r   r  )
r   r   r   r  r`  r  r  re  r  rs  r   r   r   r  r    s           \ +A +A +A [+AZ " " " \"  
 $(	    [   $(X X X X \X X Xr   r  c                       e Zd ZU dZded<   d$ fdZd%dZeZd%d	Zd&dZ	d'dZ
ed(d            Zd)dZd)dZd*dZd+dZd,dZd$dZ ed           	 d-d.d            Zd/d0d!Zd1d#Z xZS )2rh  z*Pretend our storage has a different layoutr  rc  r   r   c                    t                                                       t          | j        t                    r5t
                              | d| j                                                   d S d S )Nrb  )r  r  r   rb  r  r   r  r6  r  s    r   r  zReinterpretView.__post_init__  sf    di** 	FtVTY-B-B-D-DEEEEE	F 	Fr   r   c                D    |                      | j        | j        g          S r   )r  rb  rc  r  s    r   r  zReinterpretView.__str__  s'    	
 
 	
r   c                4    | j                                         S r   r  r  s    r   r#  zReinterpretView.get_name  r  r   r  c                    | j         j        S r   )rc  r  r  s    r   r#  zReinterpretView.get_device  s    {!!r   r  c                    d S r   r   r  s    r   r  zReinterpretView.get_origin_node  r  r   r  c                    | j         j        S r   )rc  r  r  s    r   r  zReinterpretView.dtype  s    {  r   r  c                4    t          | j        j                  S r   )r   rc  r  r  s    r   r  zReinterpretView.get_size  s    DK$%%%r   c                4    t          | j        j                  S r   )r   rc  r  r  s    r   rH  zReinterpretView.get_stride  s    DK&'''r   r  c                     d fd}|S )Nr   r  r   rl   c                   j                                         }t          j                                         ||                     }j         j        j        j        k    r%t          j        |j        j        j                  S |S r   )rc  r  rk   loadr#  r  rb  to_dtype_bitcast)r   r  
tmp_loaderr  s      r   r  z+ReinterpretView.make_loader.<locals>.loader  sl    k..00G$--//775>>BBJ{ DIO33+J
DIOTTT!!r   r   r  r   rl   r   r  r  s   ` r   r  zReinterpretView.make_loader  s(    	" 	" 	" 	" 	" 	" r   r  c                4    | j                                         S r   )rc  r  r  s    r   r  zReinterpretView.make_indexer      {'')))r   c                    | j         S r   rc  r  s    r   r  zReinterpretView.get_layout
  r  r   c                    d S r   r   r  s    r   r8  zReinterpretView.freeze_layout  r2  r   Fr  r   r   c                    t          | j        j        |          t          | j        j        |          z  t          | j        j        |          z  S r   )r)   rc  r  r  rf  rU  s     r   rV  z$ReinterpretView.get_free_symbol_uses  sK    
 T[-}==t{1=AABt{1=AAB	
r   Nr  r	  c                    t           j        j                            | j        | j        j        | j        j        | j        j        ||j	        nt           j        j        j	        | j        j
                  S r  )rm   r  wrapper_codecodegen_reinterpret_viewrb  rc  r  r  rf  	writeliner  r  s     r   r  z!ReinterpretView.codegen_reference  sc     w#<<IKKK & 2F8L8V+# = 
 
 	
r   r   c                    dS r  r   r  s    r   rQ  zReinterpretView.num_reads'      qr   ri  r{  rv  rk  rn  rr  rx  ry  ro  r  r  r   ru  r  )r   r   r   r  r   r  r  r  r#  r#  r  r  r  r  rH  r  r  r  r8  rX   rV  r  rQ  r  r  s   @r   rh  rh    s        44NNNF F F F F F

 
 
 
 H$ $ $ $" " " "    ! ! ! X!& & & &( ( ( (	 	 	 	* * * *       -..$)
 
 
 
 /.

 
 
 
 
       r   rh  c                  j    e Zd ZU dZded<   edd	            ZddZeZe	dd            Z
ddZddZdS )	DtypeViewz(Pretend our storage has a different typer  target_dtyper   r   	new_dtyper   r  c                    t          |          rPt          |          \  }}t          |j        ||j        |j        |j        |j                  }t          ||          S t          ||          S )Nra  )rb  r  )
r  rd  re  r  r  r  rf  rg  rh  r  )r  r   r  ri  rj  rl  s         r   r  zDtypeView.create1  s|     ## 
	D"7":":GZ$!!!$ J #
CCCCai8888r   r   c                D    |                      | j        | j        g          S r   )r  rb  r  r  s    r   r  zDtypeView.__str__@  s    	4+<=>>>r   c                    | j         S r   )r  r  s    r   r  zDtypeView.dtypeE  s      r   r  c                4    | j                                         S r   rb  r  r  s    r   r  zDtypeView.get_sizeI  r  r   r  c                J      j                                         d fd}|S )Nr   r  r   rl   c                `    t          j         |           j        j        j                  S r   )rk   r  r  rb  r  )r   r  r  s    r   r  z%DtypeView.make_loader.<locals>.loaderO  s'    'c

D4EtyWWWr   rT  rb  r  )r  r  r  s   ` @r   r  zDtypeView.make_loaderL  sH    	%%''	X 	X 	X 	X 	X 	X 	X r   N)r   r   r  r  r   r  r{  rn  rr  rx  )r   r   r   r  r   r  r  r  r  r  r  r  r  r   r   r   r  r  +  s         229 9 9 [9? ? ? ? H! ! ! X!$ $ $ $     r   r  c                  D    e Zd Zedd	            Ze	 	 ddd            ZdS )	SliceViewr   r   rV  r   startendr   tuple[int, int]c                J   	
 t           j        j        
|                                |         t	          d ||fD                       rt
          j        	t
          j        n
j        	
j	        d	
fdd fd} ||dd          } |||          }||fS )zz
        Normalize start and end such that both are in the range
        [0, x.get_size()[dim]] and start <= end.
        c              3  4   K   | ]}t          |          V  d S r   )r3   r  s     r   r   z0SliceView.normalize_start_end.<locals>.<genexpr>a  s+      HHA$Q''HHHHHHr   r   r!   lowerr   upperr   c                                         | |          r| n | |          }                    ||          r|n ||          }|S r   )statically_known_geqrS  )r   r  r  clamped_lowerclamped_fullmax_funcmin_funcr  s        r   clampz,SliceView.normalize_start_end.<locals>.clamph  sj    221e<<T((1eBTBT 
 00FF4XmU33 
  r   r  Union[int, None]r  Union[Expr, int]c                R    | |S                      |           }  | ||          S r   )r`  )r  r  r  r  r  r  dim_sizes       r   
clamp_wrapz1SliceView.normalize_start_end.<locals>.clamp_wraps  s8     {++C::C5eU+++r   r   )r   r!   r  r   r  r   r   r!   )
r  r  r  r   r  r   r  r  r   r  )
rm   r  r  r  r3  r   MinMaxevaluate_minevaluate_max)r  r   rV  r  r  r  r  r  r  r  r  s   `     @@@@@r   normalize_start_endzSliceView.normalize_start_endV  s     7#::<<$HH%h1GHHHHH 	-yHyHH,H,H		  		  		  		  		  		  		  		 	, 	, 	, 	, 	, 	, 	, 	, 
5!Xq11jeXx88czr   r@   Tstepr  r   c           	        t          j                  t          t                    sdk    s
J             	 dk    r|dk    rdk    r|S n# t          $ r Y nw xY wt          |                                          |r|                     ||          \  }t          |z
  dz
  z             <   t          |          r~t          |          \  }}t          |j                  }	|	         z  |	<   t          |j        |j        |	|j        |j                 z  z   |j                  }
t#          ||
          S d
fd}t%          ||	          S )Nr   l    r@   ra  r   r  r   c                    t          |           t                    k    sJ d|  d             t          |           } |          z  z   | <   | S )Nzwrong ndim rG  )r   r   )r   rV  r  r  r  s    r   r   z!SliceView.create.<locals>.reindex  sb     u::X...0Pe0P0Ph0P0P...KKEsd*U2E#JLr   rZ  r&  )r   r  r   r!   	TypeErrorr   r  r  r=   r  rd  r  re  r  r  rf  rg  rh  r  )r  r   rV  r  r  r  r  ri  rj  rk  rl  r   r  s     `` `      @r   r  zSliceView.create  s    |D!!$%%741	zzcY..4199 	 	 	D	 

%%
  	E00CDDJE3 uq!94@@ ## 	D"7":":GZj/00J(o4JsO$! !J$5c$:U$BB$ J #
CCCC	 	 	 	 	 	 	 	 	 ah@@@@s   A 
A! A!N)
r   r   rV  r   r  r   r  r   r   r  )r@   T)r   r   rV  r   r  r   r  r   r  r   r  r   r   r   )r   r   r   r  r  r  r   r   r   r  r  U  sg        ' ' ' ['R  3A 3A 3A 3A [3A 3A 3Ar   r  c                  D    e Zd ZU ded<   ded<   ddZdd	ZddZddZdS )BaseConstantr  r  r  r  r   r  c                    dS Nr   r   r  s    r   r  zBaseConstant.get_size  s    rr   r  c                    | j         S r   r  r  s    r   r#  zBaseConstant.get_device  r  r   r  c                    d S r   r   r  s    r   r  zBaseConstant.get_origin_node  r  r   rK  c                    t                      S r   r:   r  s    r   r  zBaseConstant.get_reads  r  r   Nrr  rv  rk  r  )r   r   r   r   r  r#  r  r  r   r   r   r  r    s~                       r   r  c                  F    e Zd ZU ded<   ded<   ded<   dd	ZddZddZdS )Constantr   r)  r  r  r  r  r   r  c                     d fd}|S )Nr   r  r   rl   c                B    t          j        j        j                  S r   )rk   r  r)  r  r   r  s    r   r  z$Constant.make_loader.<locals>.loader  s    <
DJ777r   r  r   r  s   ` r   r  zConstant.make_loader  s(    	8 	8 	8 	8 	8 	8 r   r0  c                    d S r   r   r  s    r   r  zConstant.realize  r2  r   r   c                :    t          | j        | j        |          S )N)r)  r  r  )r  r)  r  r  s     r   r`  zConstant.constant_to_device  s    dj
6JJJJr   Nrx  rt  r  )r   r   r   r   r  r  r`  r   r   r   r  r    sy         JJJ      K K K K K Kr   r  c                  >    e Zd ZU ded<   ded<   ded<   dd	ZddZdS )IndexingConstantr   r   r  r  r  r  r   r  c                     d fd}|S )Nr   r  r   rl   c                B    t          j        j        j                  S r   )rk   r  r   r  r  s    r   r  z,IndexingConstant.make_loader.<locals>.loader  s    >$*dj999r   r  r   r  s   ` r   r  zIndexingConstant.make_loader  s(    	: 	: 	: 	: 	: 	: r   r   c                :    t          | j        | j        |          S )N)r   r  r  )r  r   r  r  s     r   r`  z#IndexingConstant.constant_to_device  s    dj
6RRRRr   Nrx  r  )r   r   r   r   r  r`  r   r   r   r  r    se         JJJ   S S S S S Sr   r  c                P   d}d}t          t          t          ||                               D ]v\  }}|dk    rt          j        j                            ||          s(t          j        j                            ||          s dS |t          j        d|          z  }||z  }wdS Nr@   FT)	reversedr   r   rm   r  r  rT  r   r  )r  rQ  expected_strideexpected_stride_maxr   ys         r   is_contiguous_strides_for_shaper    s     Os5&112233 
 
166w77
 
 	'"::1>QRR	 55uyA.14r   c                *    t           j        | j        z  S r   )rA   padding_alignment_bytesitemsizer  s    r   get_align_for_dtyper     s    )U^;;r   c                  .    e Zd ZdZddZddZ	 dddZdS )r4  zxAbstract base for Layout, MultiOutputLayout, NoneLayout.
    Represents the memory layout of the output of an Operation.r   r  c                D    t          t          |           j                  r   r  r  s    r   r#  zOutputSpec.get_device  r  r   r   c                D    t          t          |           j                  r   r  r  s    r   storage_sizezOutputSpec.storage_size  r  r   Fr  r   r   c                D    t          t          |           j                  r   r  rU  s     r   rV  zOutputSpec.get_free_symbol_uses  r=  r   Nrv  r  r  r  )r   r   r   r  r#  r  rV  r   r   r   r4  r4    sf        C C7 7 7 77 7 7 7 %*7 7 7 7 7 7 7r   r4  c                     e Zd ZdZd ed          dfd9dZed:d            Zej        d;d            Zed:d            Z	e	j        d;d            Z	ed<d            Z
e
j        d=d            Z
d>dZeZd?dZd@dZdAd ZedBd$            ZdAd%ZdCd(ZdAd)ZedDd+            ZdEd,ZdAd-ZdFd/ZdGd1ZdHd4Zd<d5Z ed           	 dIdJd8            ZdS )Kr  zo
    Layout base class

    Carries tensor meta-information including offset and
    whether it is pinned.
    Nr   Fr  r  r  r  r  r  r  Optional[Sequence[Expr]]rf  r!   rg  r   r   r   c                ^   |t                               |          }|| _        || _        t	          |          t	          |          k    sJ d| d|             t          d |D                       sJ || _        || _        || _        || _	        | j	        r| j        j
        dk    sJ d S d S )NrX  	, stride=c              3  N   K   | ] }t          |t          t          f          V  !d S r   )r   r!   r   r   s     r   r   z"Layout.__init__.<locals>.<genexpr>+  s0      <<!:a$--<<<<<<r   r8  )r   r!  r  r  r   r   _size_stride_offsetrg  r   )r  r  r  r  r  rf  rg  s          r   rD  zLayout.__init__  s     >#66t<<F
4yyCKK''')H)H)H)H)H'''<<t<<<<<<<<
"NB(8E(A(A(ABBB(A(Ar   c                    | j         S r   r  r  s    r   r  zLayout.size3  
    zr   r)  c                    || _         d S r   r  r  r)  s     r   r  zLayout.size7  s    


r   c                    | j         S r   r  r  s    r   r  zLayout.stride;  
    |r   c                    || _         d S r   r  r   s     r   r  zLayout.stride?      r   c                    | j         S r   r  r  s    r   rf  zLayout.offsetC  r  r   c                    || _         d S r   r  r   s     r   rf  zLayout.offsetG  r  r   r   c                   d}| j         dk    r
d| j          }| j        j        dnd| j        j         }d}| j        r
d| j         }t	          |           j         d| j        j         | d| j         d| j         d	| j         | | d
S )Nr  r   z	, offset=:z, is_pinned=z('z', z, size=r  r3  )	rf  r  r   rg  r   r   r  r  r  )r  rf  device_index_stris_pinned_strs       r   r  zLayout.__str__K  s    ;!...F!%!2!:22@WDKDU@W@W> 	<;4>;;MDzz" N Ndk&6 N8H N NTZ N NIN N(,N5;N=JN N N	
r   c                    | j         S r   r  r  s    r   r#  zLayout.get_device[  r  r   r  c                    t           j        5  t          j        t	          | j                  t	          | j                  | j        | j        | j	                  cd d d            S # 1 swxY w Y   d S )N)r  r  
pin_memory)
rm   	fake_moder%  r&  r[   r  r  r  r  rg  r  s    r   get_examplezLayout.get_example^  s    [ 	 	&'	22'44j{>  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   AA&&A*-A*c                6    t          | j        | j                  S r   )r  r  r  r  s    r   r  zLayout.is_contiguoush  s    .t{DIFFFr   rQ  rO  r[  c                    t          |           }|dvs| d         dk    rdS t          |t          |           |           D ]\  }}}|dk    r	||k    r dS dS )N)r      r@   FT)r   r   r-   )rQ  r[  ndimleftrightr  s         r   is_channels_last_contiguousz"Layout.is_channels_last_contiguousk  s{     5zzvqQ5!$3E::E"
 "
 	 	D% qyyTU]]uutr   c                    t          | j        t          t                              t          t          | j                                                | j                  D ]\  }}}|dk    r	||k    r dS dS r  )r   r  r  r   r!  r   r  )r  r  r  r  s       r   is_transposedzLayout.is_transposedy  sw    !$K^66tHTY<O<O7P7PQQRRI"
 "
 	 	D%
 qyyTU]]uutr   r   r   c                    t           j                  t                    k    sJ d t           j                  D             } fd|D             }fd|D             dd} |          dgt                    z  }t	          t                              D ]}||         ||         <   t	          t                    d	z
            D ]j}||         ||d	z            k    }t          |t                    s9t          j        j	        
                    ||         ||d	z            k    d
          }|r dS kd
S )Nc                l    g | ]1\  }}t           j        j                            |d           dk    /|2S )r   r  r@   )rm   r  r  r  )r   r   rV  s      r   r   z,Layout.is_stride_ordered.<locals>.<listcomp>  sI     
 
 
3w))#)::a?? ???r   c                *    g | ]}j         |         S r   r/  )r   r   r  s     r   r   z,Layout.is_stride_ordered.<locals>.<listcomp>  s    888Q$+a.888r   c                     g | ]
}|         S r   r   )r   r   r   s     r   r   z,Layout.is_stride_ordered.<locals>.<listcomp>  s    @@@Qa@@@r   arrr   r   c                >    t          |           fd| D             S )Nc                :    g | ]}                     |          S r   )r   )r   element
sorted_arrs     r   r   zDLayout.is_stride_ordered.<locals>.sorted_indices.<locals>.<listcomp>  s'    AAA'J$$W--AAAr   )r  )r  r#  s    @r   sorted_indicesz0Layout.is_stride_ordered.<locals>.sorted_indices  s(    JAAAASAAAAr   rI  r@   T)size_obliviousF)r  r   r   r   )r   r  r   r  r   r   r   rm   r  
_shape_envr^  )r  r   non_1_indicesr  r$  stride_orderedr   exprs   ``      r   r  zLayout.is_stride_ordered  s   4;3u::----
 
#DI..
 
 
 9888-888@@@@-@@@	B 	B 	B 	B
 u%% E

*s5zz"" 	1 	1A'-ayN58$$s5zzA~&& 	 	A!!$~a!e'<<DdD)) w)77"1%q1u(==d 8    uutr   c                    dgt          t          t          dt          | j                  dz
                                z   }t          |          g|z   }|                     |          S Nr   r@   )r   r  r   r   r  r  r?  s     r   is_channels_last_stride_orderedz&Layout.is_channels_last_stride_ordered  s]    d8E!S-=-=-A$B$BCCDDDUu$%%e,,,r   
in_stridesc                   t          |          }t          |           dk    r| S t          j        st                              ||           r| S t          j                    }t          |d          r|j	        
                    dd          r| S t          t          j        d          rt          j        j        nddfdrt          fd| D                       r| S t          |           }t          |          }d t!          t          |                     D             }d||d         <   d}t#          |dd         d          D ]\  }	}
||	dz
           }||         ||         z  }t%          |t&          t(          j        f          o|t          j        k    o||z  dk    p%t%          |t(          j                  ot          j        }|||
<   |rt3          ||          |z  ||
<   d}|s| S t4          xj        dz  c_        |S )z
        The padding does not change stride order but makes sure all strides larger
        than the threshold are multiple of align.
        r   rw  dislike_paddingFr&  Nr)  sympy.Expr | intr   r   c                    dS t          | t          j                  sdS t          fd| j        D                       S )NFc              3  B   K   | ]}                     |          V  d S r   )is_unbacked_symint)r   r   r   s     r   r   zILayout._pad_strides.<locals>.contains_unbacked_symints.<locals>.<genexpr>  s1      RR1y33A66RRRRRRr   )r   r   r!   r3  r2   )r)  r   s    r   contains_unbacked_symintsz6Layout._pad_strides.<locals>.contains_unbacked_symints  sM     udEJ// uRRRR@QRRRRRRr   c              3  .   K   | ]} |          V  d S r   r   )r   r   r4  s     r   r   z&Layout._pad_strides.<locals>.<genexpr>  s/      NNa66q99NNNNNNr   c                    g | ]}d S r  r   r  s     r   r   z'Layout._pad_strides.<locals>.<listcomp>  s    999Qq999r   r@   )r  T)r)  r0  r   r   )r  r   rA   pad_channels_lastr  r  rm   get_current_noder  rw  r  r  r&  r3  r  r   r   r   r   r   r   r"   padding_stride_thresholdr!   pad_dynamic_shapesrY   r'   num_comprehensive_padding)r-  r  r  aligncurrent_fx_noder  r   new_stridespaddedrankr   prev_idxr  require_paddingr4  r   s                 @@r   _pad_strideszLayout._pad_strides  sk    $E**z??a' 	F,N,N*-
 -
 	 ,..?F++ 	0D0H0Hu1
 1
 	 *1!'<*H*HRAG&&d		S 	S 	S 	S 	S 	S  	NNNN:NNNNN 	'
I>>,\::
99%J"8"8999 &'JqM"":abb>;;; 	 	ID#!$(+H *T(^;F 6C#788 (V<<(UNa'P VUZ00NV5N	 
  &K #*65#9#9E#AC  	 ))Q.))r   c                    t          | t                    sJ t          |                       | j        J |                     | j        | j        | j                  | _        d S r   )r   r   r   r  rC  r  r  r  s    r   r  zLayout.pad_strides  sW    $//;;d;;/{&&&''TY
KKr   c                D    t           j        ot          | t                    S r   )rA   comprehensive_paddingr   r   r  s    r   r  zLayout.should_pad_strides  s    +P
40P0PPr   re  c                    t          | t                    r| S |                                 r|                                  t          | j        | j        | j        | j        | j        | j	                  S r   )
r   re  r  r  r  r  r  r  rf  rg  r  s    r   as_fixedzLayout.as_fixed  so    dK(( 	K""$$ 	KJIKKN
 
 	
r   r  c                    t           j        s J dt          |           j         d            |                                                                 S )Nzconvert z to FixedLayout first)r   r  r   r   rH  r  r  s    r   r  zLayout.make_indexer  sR    , 	
 	
AtDzz*AAA	
 	
, }}++---r   otherr   c                    t          |t                    o_| j        |j        k    oO| j        |j        k    o?| j        |j        k    o/| j        |j        k    o| j        |j        k    o| j        |j        k    S r   )r   r  r  r  r  r  rf  rg  )r  rJ  s     r   __eq__zLayout.__eq__  s    uf%% 2u|+2
ek)2 	UZ'2 u|+	2
 u|+2 %/1	
r   c                B    t          | j        | j        | j                  S r   )r*   r  r  rf  r  s    r   r  zLayout.storage_size  s    .ty$+t{SSSr   r  r   c                    t          | j        |          t          | j        |          z  t          | j        |          z  S r   )r)   r  r  rf  rU  s     r   rV  zLayout.get_free_symbol_uses  s@    
 TY66t{M::;t{M::;	
r   )r  r  r  r  r  r  r  r  rf  r!   rg  r   r   r   rr  r)  r  r   r   rs  r)  r!   r   r   r{  rw  )r   r  rq  )rQ  rO  r[  rO  r   r   )r   r   r   r   )r-  r   r  r  r  r  r   r   ri  )r   re  ry  )rJ  r   r   r   r  r  )r   r   r   r  r"   rD  r  r  setterr  rf  r  r  r#  r  r  r  r  r  r  r,  rC  r  r  rH  r  rL  r  rX   rV  r   r   r   r  r    s         ,0wqzzC C C C C,    X 
[   [    X ]   ]    X ]   ]
 
 
 
 H      G G G G    \   ! ! ! !F- - - - B B B \BHL L L L
Q Q Q Q
 
 
 
. . . .	
 	
 	
 	
T T T T H%%$)
 
 
 
 &%
 
 
r   r  c                      e Zd ZdZddZdS )re  z A Tensor layout we cannot changer   r  c                B    t          | j        | j        | j                  S )r3  )r  r  r  rf  r  s    r   r  zFixedLayout.make_indexer*  s    didkBBBr   Nry  )r   r   r   r  r  r   r   r   re  re  '  s4        **C C C C C Cr   re  c                      e Zd ZdZdZed3d            Zed4d	            Zed5d            Zed6d            Z	ed7d            Z
ed8d            Zej        d9d            Zed8d            Zej        d9d            Zed:d            Zej        d;d            Z	 d<d=dZ	 d<d>d Zd?d!Zd@d"ZdAd$ZdBd(Z	 	 dCdD fd2Z xZS )Er   z|
    A Tensor layout that we are allowed to change

    Assumption: layout change should NOT add or remove free symbols
    Fsizesr   r   r}  c                    t          |           dk    rg S t          j        j        g}t	          | dd                    D ] }|                    ||d         z             !t          t	          |                    S )Nr   r@   rI  )r   r   r  ro  r  r  r   )rU  reversed_stridesr  s      r   r!  z!FlexibleLayout.contiguous_strides9  s|    u::??I!GK=U122Y'' 	A 	AD##D+;B+?$?@@@@H-..///r   r   c                   t          t          t          |                               t          |          k    sJ | |f            t          j        j        }dgt          |          z  }|D ]}|||<   || |         z  }|S )z
        Create a stride based on the order the dimensions should be filled in.

        In this format, channels last would be:
            [1, 3, 2, 0]
        N)r;   r   r   r   r  ro  )rU  r   next_strider[  r   s        r   fill_orderedzFlexibleLayout.fill_orderedB  s     %E

++,,
50A0AAAAE5>AAAgk&3u::% 	1 	1A$GAJ%a0KKr   r  c                    t          t          t          |                               t          |          k    sJ t          |          }t                              | |          S )z
        Create a stride based on the sorted order of a permuted range.

        In this format, channels last would be:
            [3, 0, 2, 1]
        )r;   r   r   r   r   rZ  )rU  r   r   s      r   r(  zFlexibleLayout.stride_orderedS  sV     %E

++,,
50A0AAAAA,U33
**5*===r   memory_formattorch.memory_formatc                Z   |t           j        k    r t                              | t                    S |t           j        k    r t                              | t                    S |t           j        k    rt                              |           S t          
                    d|           t          )aq  
        Create a stride based on a memory format.

        Memory format is translasted into a stride order,
        so channels_last is the same as:
            FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

        This interface does not support memory_format `torch.preserve_format`
        which should be used to deduce a format from another source
        z>stride_ordered_for_memory_format, unsuppored memory_format: %s)r%  channels_lastr   r(  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr!  r  r  r  )rU  r\  s     r    stride_ordered_for_memory_formatz/FlexibleLayout.stride_ordered_for_memory_format_  s     E///!008IJJJe444!008JKKKe555!44U;;;IIP   &%r   r  rO  c                    t          |           t          |          k    sJ d |D             }t          t          t          |                    |j                  }t                              | |          S )z
        Create a stride that has the same stride order as given stride

        For example, if given stride is [1000, 1, 100, 10],
        the fill order should be [1, 3, 2, 0]
        c                V    g | ]&}t           j        j                            |          'S r   )rm   r  r  rG  r  s     r   r   z/FlexibleLayout.same_ordered.<locals>.<listcomp>  s+    IIIQ!'"55a88IIIr   r  )r   r  r   __getitem__r   rZ  )rU  r  r   s      r   same_orderedzFlexibleLayout.same_orderedz  sm     5zzS[[((((II&IIIE#f++..F4FGGG
**5*===r   c                    | j         S r   r  r  s    r   r  zFlexibleLayout.size  r  r   r)  r   c                @    |                      d|           || _        d S )Nr  )!assert_free_symbol_uses_unchangedr  r   s     r   r  zFlexibleLayout.size  s#    ..vu===


r   c                    | j         S r   r  r  s    r   r  zFlexibleLayout.stride  r  r   c                @    |                      d|           || _        d S )Nr  )rk  r  r   s     r   r  zFlexibleLayout.stride  #    ..x???r   r!   c                    | j         S r   r  r  s    r   rf  zFlexibleLayout.offset  r  r   c                @    |                      d|           || _        d S )Nrf  )rk  r  r   s     r   rf  zFlexibleLayout.offset  rn  r   r9  r   re  c                    |                      | j        |          }|                                 r#|r!|                     || j        | j                  }t          | j        | j        | j        || j        | j                  S r   )	r(  r  r  rC  r  re  r  rf  rg  )r  r   r9  rk  s       r   as_stride_orderzFlexibleLayout.as_stride_order  s     ((E::
""$$ 	N 	N**:ty$*MMJKJIKN
 
 	
r   rD  c                    |}|                                  r#|r!|                     || j        | j                  }t	          | j        | j        | j        || j        | j                  S r   )r  rC  r  r  re  r  rf  rg  )r  rD  r9  rk  s       r   as_exact_strideszFlexibleLayout.as_exact_strides  so     #
""$$ 	N 	N**:ty$*MMJKJIKN
 
 	
r   c                    |                      | j        |          }|                                 r!|                     || j        | j                  }t          | j        | j        | j        || j        | j                  S r   )	rZ  r  r  rC  r  re  r  rf  rg  )r  r   rk  s      r   as_fill_orderzFlexibleLayout.as_fill_order  sw    $($5$5di$G$G
""$$ 	N**:ty$*MMJKJIKN
 
 	
r   c                    |                      | j        |          }|                                 r!|                     || j        | j                  }t          | j        | j        | j        || j        | j                  S r   )	rh  r  r  rC  r  re  r  rf  rg  )r  r  rk  s      r   as_same_orderzFlexibleLayout.as_same_order  sw    &&ty&99
""$$ 	N**:ty$*MMJKJIKN
 
 	
r   $dict[tuple[str, bool], sympy.Symbol]c           
         i }dD ]9}dD ]4}||f}t          t          t          | |          |                    ||<   5:|S )N)r  r  rf  TF)r;   r)   r   )r  initial_free_symbolsr   r  r  s        r   get_initial_free_symbol_usesz+FlexibleLayout.get_initial_free_symbol_uses  sl    !0 	 	D!.  ]+,6$WT4%8%8-HH- -$S)) $#r   r   r   r4   c                    dD ]D}| j         ||f         }t          t          ||                    }||k    sJ d| d|             Ed S )Nr{  z)Expected free symbols unchanged, but got z vs )r|  r;   r)   )r  r   r)  r  old_free_symbolsnew_free_symbolss         r   rk  z0FlexibleLayout.assert_free_symbol_uses_unchanged  sy    * 	 	M#8$9NO)*:5-*P*PQQ#'7777d<LddRbdd 8777	 	r   Nr  r  r  r  r  r  r  rg  c                    |rt                               ||          }nt                               |          }t                                          |||||           |                                 | _        d S )Nrg  )r   rZ  r!  r  rD  r}  r|  )r  r  r  r  r  rg  r[  r  s          r   rD  zFlexibleLayout.__init__  sv      	>$11$EEGG$77==GgKKK %)$E$E$G$G!!!r   )rU  r   r   r}  )rU  r   r   r   r   r}  )rU  r   r   r   r   r  )rU  r   r\  r]  r   r  )rU  r   r  rO  r   r  rr  rO  rs  rP  r  )r   r   r9  r   r   re  )rD  rO  r9  r   r   re  )r   r   r   re  )r  rO  r   re  )r   ry  )r   r   r)  r4   r   r   r  )r  r  r  r  r  r  r  r  rg  r   r   r   )r   r   r   r  r  r  r!  rZ  r(  rd  rh  r  r  rQ  r  rf  rr  rt  rv  rx  r}  rk  rD  r  r  s   @r   r   r   /  s]         N 0 0 0 \0    \  	> 	> 	> \	> & & & \&4 > > > \>    X 
[   [    X ]   ]    X ]   ]
 ;@
 
 
 
 
" HM
 
 
 
 
 
 
 
 

 
 
 
	$ 	$ 	$ 	$    AEH H H H H H H H H H Hr   r   c                  b     e Zd ZdZd fdZddZdd
Z ed           	 ddd            Z xZ	S )NonOwningLayoutz,Is a view into the storage of another tensorviewUnion[BaseView, TensorBox]r   r   c                    |                                 }t                                          |j        |j        |j        |j                   || _        d S r   )r  r  rD  r  r  r  r  r  )r  r  rc  r  s      r   rD  zNonOwningLayout.__init__  sO    ""MLKM		
 	
 	
 			r   r  c                N    |                                                                  S r   )rH  r  r  s    r   r  zNonOwningLayout.make_indexer  s    }}++---r   r   c                    | j                                         j        }|dk    rdS ddlm} t
          j        j                            ||          S )Nr   Tr@   )	ALIGNMENT)	r  r  rf  utilsr  rm   r  r  r  )r  rf  r  s      r   maybe_guard_alignedz#NonOwningLayout.maybe_guard_aligned  sS    %%''.Q;;4$$$$$$w<<VYOOOr   Fr  r   c                D   t          | j        t                    sJ | j        j        }t          |t                    sJ t          |                      |j        }t          |t                    sJ t          |                      |j                            |          S r   )	r   r  rh  rb  r  r   r  rc  rV  )r  r  boxinput_buffers       r   rV  z$NonOwningLayout.get_free_symbol_uses  s     $)_55555in#z**55DII55*x,//::c::/"77FFFr   )r  r  r   r   ry  rq  r  r  )
r   r   r   r  rD  r  r  rX   rV  r  r  s   @r   r  r    s        66     . . . .P P P P -..$)G G G G /.G G G G Gr   r  c                      e Zd ZdZdS )CommBufferTypesymm_memN)r   r   r   SYMM_MEMr   r   r   r  r  (  s        HHHr   r  c                  :     e Zd ZU dZded<   ded<   d	 fdZ xZS )
CommBufferLayoutax  
    A layout that signifies the buffer is a comm buffer.
    In terms of striding, the layout is identical to `FixedLayout`.

    Buffers with this layout do not participate in in-place reuse - it can be
    neither the source nor the target for in-place reuse.

    For detailed motivation and usage of this layout, see
    NOTE [lowering-time collective optimization].
    r  comm_buffer_typer   
group_namerc  r   c                &   t          |t                    st          d| d          |                                }t	                                          |j        |j        |j        |j	        |j
        |j                   || _        || _        d S )NzJA `CommBufferLayout` can only be initialized with a `FlexibleLayout` (got z).r  r  r  r  rf  rg  )r   r   rL  rH  r  rD  r  r  r  r  rf  rg  r  r  )r  rc  r  r  fixedr  s        r   rD  zCommBufferLayout.__init__;  s     &.11 	 6+16 6 6  
 !!<+<<o 	 	
 	
 	
 !1$r   )rc  r   r  r  r  r   )r   r   r   r  r   rD  r  r  s   @r   r  r  ,  s`         	 	 %$$$OOO% % % % % % % % % %r   r  c                      e Zd ZU ded<    ej        d           Zded<    ej        d           Zded<   ddZddZ	ddZ
dS )
NoneLayoutr  r  c                     dgS r   r   r   r   r   r  zNoneLayout.<lambda>_  s     r   default_factoryr1  r  c                     dgS r   r   r   r   r   r  zNoneLayout.<lambda>`  s    1# r   r  r   r   c                    dS r   r   r  s    r   r  zNoneLayout.storage_sizeb  r  r   r4  c                    | S r   r   r  s    r   rH  zNoneLayout.as_fixede      r   c                    | j         S r   r  r  s    r   r#  zNoneLayout.get_deviceh  r  r   Nr  rp  rv  )r   r   r   r   r  r  r  r  r  rH  r#  r   r   r   r  r  T  s          #"""'k'DDDDDDDD))++FFFFFFFF           r   r  c                       e Zd Zd fdZedd            Zej        dd
            ZddZddZd dZ	e
	 d!d"d            Zd#dZd$dZ xZS )%MutationLayoutSHOULDREMOVEr'  r   r   r   c                T   t                                          |                                |                                |                                d            || _        |                                                                 }t          j	        
                    |           d S r   )r  rD  r  r"  r  r'  
get_bufferr#  rm   r  mark_buffer_mutated)r  r'  r   r  s      r   rD  z#MutationLayoutSHOULDREMOVE.__init__m  s    &&((OO		
 	
 	
   ))++	##D)))))r   r  c                4    |                                  j        S r   )real_layoutr  r  s    r   r  z!MutationLayoutSHOULDREMOVE.stridex  s    !!((r   r)  r   c                    d S r   r   r   s     r   r  z!MutationLayoutSHOULDREMOVE.stride|  s    r   r!   c                N    |                                                                  S r   )r  r  r  s    r   r  z'MutationLayoutSHOULDREMOVE.storage_size  s     !!..000r   r  c                    dfd | j                   }t          |t                    sJ t          |                      |S )Nr'  r   r   c                    t          | t                    r | j                  S t          | t                    r |                                           S t          | t
                    r | j                  S | S r   )r   r  r'  r  r6  
MutableBoxrb  )r'  unwrap_viewss    r   r  z;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_views  s    &"<== 3#|FM222&(++ :#|F$6$6$8$8999&*-- 1#|FK000Mr   )r'  r   r   r   )r'  r   r  r   )r  r5  r  s     @r   r  z%MutationLayoutSHOULDREMOVE.get_buffer  s]    	 	 	 	 	 	 dk**&&))774<<77)r   r  c                f    |                                  j        }t          |t                    sJ |S r   )r  rc  r   r  )r  rc  s     r   r  z&MutationLayoutSHOULDREMOVE.real_layout  s/    "")&&)))))r   Fsrcdstunsafe_aliasr   c                v   |                                  t          j                            |                                           t          |t                    r|j        }|                                 |st          
                    |                                |                                |                                d t          |                                |                                          D                       }t          |t           t"          f          sJ |j        }|                                  t%          |d          s
J |            t          |j        j        t(                    s!J t+          |j        j                              t-          |          |j        _        |j        S )Nc                ^    g | ]*\  }}t           j        j                            ||          +S r   rm   r  r  check_equals_and_simplifyr   r@  rB  s      r   r   z;MutationLayoutSHOULDREMOVE.realize_into.<locals>.<listcomp>  sA       1 G$>>q!DD  r   r$  rb  )r  rm   r  r  r#  r   r   rb  r4  r  r  r#  r"  r  r   r  r  r  r  rc  r   r   r  )r  r  r  r  r   s        r   realize_intoz'MutationLayoutSHOULDREMOVE.realize_into  sp    	 	
##CLLNN333c9%% 	(C 	 	##~~''mmoo**  #CLLNNCLLNN C C  	 $  D dXz$:;;;;;)CsF##((S((##(/>::QQD<Q<QQQ:4S99xr   r   c                    | S r   r   r  s    r   rH  z#MutationLayoutSHOULDREMOVE.as_fixed  r  r   r  c                4    | j                                         S r   )r'  r  r  s    r   r  z'MutationLayoutSHOULDREMOVE.make_indexer  r  r   )r'  r   r   r   rr  )r)  r   r   r   rs  )r   r  ro  r  )r  r   r  r   r  r   r   r   )r   r   ry  )r   r   r   rD  r  r  rQ  r  r  r  r  r  rH  r  r  r  s   @r   r  r  l  s       	* 	* 	* 	* 	* 	* ) ) ) X) ]   ]1 1 1 1      
 <A% % % % [%N   * * * * * * * *r   r  c                  ^    e Zd ZU ded<   ded<   d? fdZd@d	ZdAdZdBdZdCdZdDdZ	e
dEd            ZdFdZdGdZdHdZdIdZdJdZdKdZdLd Zd?d!Z	 dMdNd&ZdOd'ZdPd)Z	 dMdQd+ZdLd,ZdRd.ZdSdTd2Zd?d3ZdUd5ZdUd6ZdVd8Z ed           	 dMdWd;            Z dXd<Z!dYd=Z"dLd>Z# xZ$S )Zr  r0  r   r4  rc  r   r   c                t    t                                                       |                     dd            d S r  )r  r  r  r  s    r   r  zBuffer.__post_init__  s5    t44444r   r  c                N    |                                                                  S r   )r  r  r  s    r   r  zBuffer.make_indexer  s      --///r   r   c                2    | j         s
J |             | j         S r   r  r  s    r   r#  zBuffer.get_name  s    y$yyr   !Union[torch.Tensor, sympy.Symbol]c                    t          | j        t                    r| j                                        S t	          t          | j                  j                  r   )r   rc  r  r  r  r   r   r  s    r   r  zBuffer.get_example  sE    dk6** 	-;**,,,!$t{"3"3"<===r   r  c                N    |                                                                  S r   )r  r#  r  s    r   r#  zBuffer.get_device  s     ##%%00222r   r  c                    d S r   r   r  s    r   r  zBuffer.get_defining_op  r  r   r  c                4    |                                  j        S r   )r  r  r  s    r   r  zBuffer.dtype  s      &&r   r  c                8    g |                                  j        S r   )r  r  r  s    r   r  zBuffer.get_size  s    (""'((r   r}  c                8    g |                                  j        S r   )r  r  r  s    r   rH  zBuffer.get_stride  s    *"")**r   r!   c                4    |                                  j        S r   )r  rf  r  s    r   
get_offsetzBuffer.get_offset  s      ''r   r  c                    t          | j        t                    r| j        S t          t	          | j                  j                  r   )r   rc  r  r  r   r   r  s    r   r  zBuffer.get_layout  s9    dk6** 	;!$t{"3"3"<===r   c                    | j         S r   r  r  s    r   r  zBuffer.get_output_spec  r  r   r   c                *    |                                  S r   )r  r  s    r   rS  zBuffer.get_storage_numel  s    ~~r   r   c                4    |                                  j        S r   )r  rg  r  s    r   get_is_pinnedzBuffer.get_is_pinned  s      **r   c                    t          | j        t                    r:t          | j        t                    s"| j                                        | _        d S d S d S r   )r   rc  r  r  rH  r  s    r   r8  zBuffer.freeze_layout   s]    dk6** 	1:K4
 4
 	1 +..00DKKK	1 	1 	1 	1r   Fr   r   r9  c                    t          | j        t                    sJ t          | j                              | j                            ||          | _        d S Nr  )r   rc  r   r   rr  r;  s      r   r<  z&Buffer.freeze_layout_with_stride_order  sN     $+~66IIT[8I8III6k11%}1UUr   c                    t          | j        t                    sJ t          | j                              | j                            |          | _        d S r   )r   rc  r   r   rv  r?  s     r   r@  z$Buffer.freeze_layout_with_fill_order  sG    $+~66IIT[8I8III6k//66r   r  c                    t          | j        t                    sJ t          | j                              | j                            |          | _        d S r   )r   rc  r   r   rx  rB  s     r   rC  z$Buffer.freeze_layout_with_same_order  sG    $+~66IIT[8I8III6k//77r   rD  c                    t          | j        t                    sJ t          | j                              | j                            ||          | _        d S r  )r   rc  r   r   rt  rF  s      r   rG  z'Buffer.freeze_layout_with_exact_strides  sU     $+~66IIT[8I8III6k22 3 
 
r   c                    t           j        j                            t	          j        |                                 d                    S r   r  r  s    r   r  zBuffer.is_zero_elements  r  r   r  c                                                       r(t          t                                                     S d fd}|S )Nr  r   r  r   rl   c                v                                     }t          j        j        pd ||                     S r  )r  rk   r  r   r   r  r  s     r   r  z"Buffer.make_loader.<locals>.loader$  s5    ''))G8DI2GGENNCCCr   r  )r  r   r  r"  r  s   ` r   r  zBuffer.make_loader  s`      "" 	B=0@0@AAAA	D 	D 	D 	D 	D 	D r   Nr  r	  c                *    |                                  S r   r#  r  s     r   r  zBuffer.codegen_reference*  r  r   c                    d S r   r   r  s    r   r  zBuffer.decide_layout-  r2  r   r  c                x    t          | j        t                    r| j        j                                        gS dS r  )r   rc  r  r  r#  r  s    r   rf  z#Buffer.get_inputs_that_alias_output0  s5    dk?33 	1K$--//00rr   c                x    t          | j        t                    r| j        j                                        gS dS r  )r   rc  r  r'  r#  r  s    r   rb  zBuffer.get_mutation_names5  s6    dk#=>> 	3K&//1122rr   r  c                F    t          |                                 g          S r   )r;   r#  r  s    r   r  zBuffer.get_read_names:  s    4==??+,,,r   r  r   c                    t                      S r   r:   rU  s     r   rV  zBuffer.get_free_symbol_uses=       ||r   c                    t                      S r   r:   r  s    r   r  zBuffer.get_unbacked_symbol_defsC  r  r   c                    d S r   r   r  s    r   r  zBuffer.realizeF  r2  r   c                    dS r  r   r  s    r   should_allocatezBuffer.should_allocateI  s    ur   ri  ry  r{  )r   r  rv  rl  rn  rr  )r   r}  rs  ro  rp  r  rq  r  r  r  )r  r   r   r   )rD  r   r9  r   r   r   rx  r   ru  r  rj  r  r  rt  )%r   r   r   r   r  r  r#  r  r#  r  r  r  r  rH  r  r  r  rS  r  r8  r<  r@  rC  rG  r  r  r  r  rf  rb  r  rX   rV  r  r  r  r  r  s   @r   r  r    s         
5 5 5 5 5 50 0 0 0   > > > >
3 3 3 3    ' ' ' X') ) ) )+ + + +( ( ( (> > > >
          + + + +1 1 1 1 ;@V V V V V7 7 7 78 8 8 8
 CH
 
 
 
 
U U U U	 	 	 	          
   
- - - - H%%$)    &%
             r   r  c                  4    e Zd Zd	dZd
dZej        ZddZdS )OperationBufferr   r  c                    | gS r   r   r  s    r   r  zOperationBuffer.get_outputsQ  s	    vr   r  c                    | S r   r   r  s    r   r  zOperationBuffer.get_defining_opT  r  r   r   c                n    t                               |            t                              |            d S r   )r  r  r  r  s    r   r  zOperationBuffer.__post_init__Z  s0    T"""%%%%%r   Nr  r   r  ri  )r   r   r   r  r  r  rd  r  r   r   r   r  r  N  s[               #5& & & & & &r   r  c                      e Zd ZddZdS )r  r   r   c                    dS r  r   r  s    r   rQ  zInputBuffer.num_reads`  r  r   Nr  )r   r   r   rQ  r   r   r   r  r  _  s(             r   r  c                      e Zd ZdZdS )DonatedBufferaY  
    Represents a donated buffer which is a saved tensor that is not alias to any
    fwd inputs, fwd user outputs, and bwd outputs. We generally cannot inplace
    reuse the input tensor memory during backward since it might be used in another
    function. However, donated buffer can be inplace reused during backward
    to save memory.
    N)r   r   r   r  r   r   r   r  r  d  s           r   r  c                  .    e Zd ZU dZded<   ddZdd
ZdS )r%  Nr  r#  r   r  c                     d fd}|S )Nr   r  r   rl   c                                                                                     }t          j        t          j                                                            j                   ||                     S r   )	r  r  rk   r  rm   r  constant_namer#  r#  r  s     r   r  z*ConstantBuffer.make_loader.<locals>.loaderr  s]    oo''4466G8%%dmmoot7KLL  r   r  r   r  s   ` r   r  zConstantBuffer.make_loaderq  s(    	 	 	 	 	 	 r   r  r  r   c                    t          t          j                            |                                 |          | j                  S N)r   rc  )r%  rm   r  r  r#  rc  r  s     r   r`  z!ConstantBuffer.constant_to_device{  s:    &&t}}??
 
 
 	
r   rx  r  )r   r   r   r#  r   r  r`  r   r   r   r%  r%  n  sO         .2O2222   
 
 
 
 
 
r   r%  c                  ^    e Zd ZddZ ed           	 ddd            ZdddZddZddZd	S )NoneAsConstantBufferr   rK  c                    t                      S r   r:   r  s    r   r  zNoneAsConstantBuffer.get_reads  r  r   Fr  r   r   c                    t                      S r   r:   rU  s     r   rV  z)NoneAsConstantBuffer.get_free_symbol_uses  r  r   Nr  r	  r   c                .    t           j        j        j        S r   )rm   r  r  none_strr  s     r   r  z&NoneAsConstantBuffer.codegen_reference  s    w#,,r   r4  c                "    t          d           S Nr  )r  r  s    r   r  z$NoneAsConstantBuffer.get_output_spec  s    &&&&r   c                    dS r  r   r  s    r   r  z&NoneAsConstantBuffer.has_tensor_output  r  r   r  r  r  r   ru  rp  rq  )	r   r   r   r  rX   rV  r  r  r  r   r   r   r  r    s            233$)    43
- - - - -' ' ' '     r   r  c                  Z    e Zd ZU ded<    ed           	 ddd            ZdddZddZd	S )r   r!   r)  Fr  r   r   r   c                ,    t          | j        |          S r   )r)   r)  rU  s     r   rV  z*ShapeAsConstantBuffer.get_free_symbol_uses  s      	=999r   Nr  r	  r   c                T    t           j        j                            | j                  S r   )rm   r  r  codegen_sizevarr)  r  s     r   r  z'ShapeAsConstantBuffer.codegen_reference  s    w#33DI>>>r   c                    dS r  r   r  s    r   r  z'ShapeAsConstantBuffer.has_tensor_output  r  r   r  r  r   ru  rq  )r   r   r   r   rX   rV  r  r  r   r   r   r   r     s         JJJ344$): : : : 54:
? ? ? ? ?     r   r   c                  P    e Zd ZU dZded<   dZded<   eej        d=d	                        Z	d>dZ
d?dZd@dZdAdZdBdZ ed           	 dCdDd            ZdE fdZdFdZdGdZdHdZdId ZedJd"            Z	 	 dKdLd)Ze	 dMdNd3            ZdOd5Zd>d6ZdFd7ZdFd8ZdPd<Z xZS )Qr  zb
    Represents a buffer that is computed during kernel execution rather than being an input.
    r  rb  FzClassVar[bool]_force_realizer   Iterator[None]c               #  ~   K   t           j        } 	 dt           _        d V  | t           _        d S # | t           _        w xY wNT)r  r  )	old_values    r   force_realizezComputedBuffer.force_realize  sH       #1		6,0N)EEE,5N)))IN)5555s   . <r0  c                d    | j         | j         S t          | j        d          r| j        j         S dS )z
        Returns self.name if it exists, otherwise returns the name of the data node if that exists.
        If neither exist, returns None.
        Nr   )r   r  rb  r  s    r   get_computed_buffer_namez'ComputedBuffer.get_computed_buffer_name  s7    
 9 949f%% 	"9>!tr   r   c                4    | j                                         S r   rb  rQ  r  s    r   rQ  zComputedBuffer.num_reads  r  r   rK  c                4    | j                                         S r   rb  r  r  s    r   r  zComputedBuffer.get_reads  r  r   r  c                4    | j                                         S r   r  r  s    r   r  zComputedBuffer.get_read_names  r  r   rH  c                   t          | j        t          t          t          t
          f          s;t          j        t                      t                      t                                S t          j
        t          dd          5  | j                                        r]t          |                                 | j                                        | j                                                  cd d d            S t          |                                 | j                                                  cd d d            S # 1 swxY w Y   d S )NrN  writesindex_exprsr  T)r   rb  rd  ri  r  r  rB   
ReadWritesr;   r    r   r   rX  rK   get_store_functionr  rZ  r  r  s    r   rJ  zComputedBuffer.get_read_writes  st   $)itY%GHH 	* ll!||&LL    \.*:DAA 	 	y++-- 
*++--I0022I0022 	 	 	 	 	 	 	 	 +++--I&&(( 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A*E ;8E  EEr  r   r   c                :   | j                             |          | j                            |          z  }|                                 rQt	          |                                 t                    r*||                                                     |          z  }|S r   )rc  rV  rb  has_store_functionr   r  rM   rJ  )r  r  r5  s      r   rV  z#ComputedBuffer.get_free_symbol_uses  s    $ 11
 
I**=99: ""$$ 	Q##%%x*
 *
 	Q d**,,AA-PPPFr   r  c                   |                                  sP| j        t          j        j        vr8|                                 dk    r | j        s| j                                        S t                                                      S r   )
rX  r   rm   r  mutated_buffersrQ  r  rb  r  r  r  s    r   r  zComputedBuffer.make_loader  su    ''))	+	!888  A%%' & 9((***ww""$$$r   c                \    t          | j        t          t          t          t
          f          S r   )r   rb  rd  ri  r  r  r  s    r   r  z!ComputedBuffer.has_store_function
  s    $)itY%GHHHr   Callable[..., None]c                   |                                                                                                  }t          | j        t
          t          t          f          r t          | j        j	        | j
        |          S t          | j        t                    sJ t          | j                              t          | j        j        | j
        |          S r   )r  rH  r  r   rb  rd  ri  r  r   rs  r   r  r   r  )r  r  s     r   r  z!ComputedBuffer.get_store_function  s    //##,,..;;==di)T4!899 	G494diIIIdi33DDT$)__DD349149gFFFr   Optional[list[int]]c                :   t          | j        t                    rt          j        | j                                        | j                                                  \  \  }}|                                 j	        }t          d |D                       sJ fd|D             }|rqt          | j        t          t          f          r| j                            |          n|fd|D             }ddlm}  |||                                           S dS )al  
        If our layout is still flexible, try to determine the stride order based on stride orders of reads.

        TODO(jansel): A better algorithm here would look at downstream consumers of this
                      value and try to do global graph-level layout optimization.
                      This is also something just begging to be autotuned.
        c              3  b   K   | ]*}t          |t          j        t          j        f          V  +d S r   )r   rB   StarDep	MemoryDepr  s     r   r   z0ComputedBuffer.get_fill_order.<locals>.<genexpr>$  sK         1|3\5KLMM     r   c                    g | ];}t          |t          j                  t          |j        d  D                       <S )c                >    i | ]}|d k    |t           j        j        S r  r  )r   vs     r   r   z<ComputedBuffer.get_fill_order.<locals>.<listcomp>.<dictcomp>)  s*    $W$W$WPQUVPVPVQPVPVPVr   )r   rB   r#  ri   r   )r   r  ro  s     r   r   z1ComputedBuffer.get_fill_order.<locals>.<listcomp>(  sW       a!78817$W$Wn$W$W$WXX  r   c                Z    g | ]'}t           j        j                            |          (S r   rm   r  r  r  )r   r)  r  s     r   r   z1ComputedBuffer.get_fill_order.<locals>.<listcomp>3  s;     " " "EIAG$11$@@" " "r   r@   pick_loop_orderN)r   rc  r   rB   r  rb  r  rZ  rJ  rN  r   ri  r  r   	schedulerr*  r  )r  
index_varsr  rN  stride_lengthsr*  r  ro  s         @@r   r   zComputedBuffer.get_fill_order  sl    dk>22 	H.:.M	,,..	0L0L0N0N/ /+(Z! ((**0E               E  
Hdi$66 )"i//
NKKGG(G" " " "MR" " " 766666&~t}}GGGtr   r   c                    t          | j        t                    rC|                                 }|r|                     |           d S |                                  d S d S r   )r   rc  r   r   r@  r8  r?  s     r   r  zComputedBuffer.decide_layout<  sk    dk>22 	%''))E %22599999""$$$$$	% 	%r   Mtuple[tuple[list[Expr], list[Expr]], LoopBody, tuple[list[Expr], list[Expr]]]c                   t          j        | j                                        | j                                        d          \  }}t          j        t          d|                                           5  t          | 
                                |                                 r|n	|d d         |g|R  }d d d            n# 1 swxY w Y   g }g }g }g }|                                D ]t\  }}	||d         v r/|rJ |                    |           |                    |	           >||d         v sJ |                    |           |                    |	           u||f|||ffS )Nqr|   r#  r@   r   )rB   r  rb  r  rZ  r    r   r%  r#  rM   r  rX  itemsr  )
r  r   
var_rangesr  r,  reduce_vars
index_sizereduce_sizer&  r   s
             r   get_default_sizes_bodyz%ComputedBuffer.get_default_sizes_bodyD  s    (:I((**DI,H,H,J,JSV
 
 
j \.*;T__=N=NOO 	 	''))0022@RaR 	  D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 
!#
$$&& 	& 	&DAqDG||&&&!!!$$$!!!$$$$DG||||""1%%%""1%%%%K($[0IIIs   6A CC	CNextra_indexing_constraints*Optional[tuple[dict[Any, Any], list[Any]]]recompute_sizes_body_funcOptional[Callable[..., Any]]8tuple[tuple[list[Expr], list[Expr]], Optional[LoopBody]]c                l                                      \  \  }}}\  }}|r |||f|||f          \  \  }}}\  }}g |j                                        |t          |t                    rt          |          dk    sJ |\  }}	t          |t                    sJ t          |                      t          |	t                    sJ t          |	                      t          d |	D                       sJ |j
        }
|
|k    sJ |
|f            fd|	D             }	|	z  g |                                t          j                             t          j                  s'                    |                                           d fd}||z   }t'          t)                                pt*          j         } |||||          \  }}} |||||          \  }}}t/          j        ||d          \  \  }}}t3          | ||           ||          g|||          }||f|fS )an  
        This is a main place where we do loop transformations in a
        backend-agnostic way.

        Here we:
            1) Remove any 1 dimensions
            2) Fuse contiguous dimensions together
            3) Reorder dimensions based on stride orders

        Optional argument extra_indexing_constraints can be used to append additional
        indexing expressions to existing ones derived from buffer's body. This can be useful
        to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
        on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
        the scheduler node compatible with other nodes.
        Optional argument recompute_sizes_body_func can be used to recompute sizes and body
        on the default body. This can be useful to append additional loop transformations.
        Nr   c              3  @   K   | ]}t          |t                    V  d S r   )r   r!   )r   fs     r   r   z6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>  s,      HHqz!T**HHHHHHr   c                    g | ]}|v|	S r   r   )r   r  index_formulass     r   r   z7ComputedBuffer.simplify_and_reorder.<locals>.<listcomp>  s*     # # #!>2I2I2I2I2Ir   x_varsSequence[sympy.Symbol]support_varsrU  r   simplify_loopsr   r   dtuple[list[int], Callable[[Sequence[int]], Sequence[int]], Callable[[Sequence[int]], Sequence[int]]]c           	                              | ||
          \  }}} ||           } |rJt          j        j                            | |t          	| |                    \  }}}t          ||          }n|}|||fS r   )_apply_loop_reorderingrm   r  r  _simplify_loopsrF   r   )rB  rD  rU  rE  reindex0r   r   _pruner   rA  memory_addrsr  s            r   simplify_and_reorderzAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reorder  s     )-(C(Ce\) )%E8X Xf%%F #*+'*:*J*J,^VUKK+ +'x
 *(H=="'8++r   pr|   )
rB  rC  rD  rC  rU  r   rE  r   r   rF  )r7  indexing_exprsr   r   r   r   r   r   r   r   r3  get_write_exprsrm   r  r  rC   PREFER_STORE_LOOP_ORDERextendget_read_exprsrd   r5  rA   loop_ordering_after_fusionrB   index_vars_no_squeezerM   )r  r8  r:  r5  r6  r  r,  r4  extra_indexing_rangesextra_indexing_exprexpected_var_rangesrM  rD  should_merge_loopsiter_rangesiter_reindexr  reduce_rangesreduce_reindex	iter_varsr3  rA  rL  s   `                    @@r   rM  z#ComputedBuffer.simplify_and_reordere  s   4 ''))		
%Z%Z % 	
 *)[)4*k1J 	)[)[
 94.55778%15u==233q8889 :T6!#63T::WWDAV<W<WWW:1488SS$?R:S:SSS8HH4GHHHHHHHH"&/&*????#%B???
# # # #.# # # 11N0--//0w""4)OPP 	7 3 3 5 5666	, 	, 	, 	, 	, 	, 	, 	,6 "K/t,,---VV5V1V 	 (<';	(
 (
$\1 ,@+?{4F,
 ,
(~q
 0</Q0
 0
 0
, K*
 \)$$nn[&A&AB
 
 ]+T11r   r,  rC  rD  rU  r   rL  list[sympy.Expr]priority_idxrF  c           
     n    ddl m} |g }	  fd|D             }t          |          t          |          k    r&t          |d                   t                     k    sJ t          t	           |||                              }n|# t
          $ ro t          j        r7t          	                    dt          t                               |           t          t          t                                        }Y nw xY wfd|D             t          |          t          |          fS )zU
        Shuffle the order of loops around to hopefully improve performance.
        r@   r)  Nc                \    g | ](}t           j        j                            |          )S r   r(  )r   r)  r,  rD  s     r   r   z9ComputedBuffer._apply_loop_reordering.<locals>.<listcomp>  s@         --dJMM  r   r   z%Did not simplify complex index:
%s
%sc                     g | ]
}|         S r   r   )r   r   rU  s     r   r   z9ComputedBuffer._apply_loop_reordering.<locals>.<listcomp>  s    )))aq)))r   )r+  r*  r   r   r  	ExceptionrA   r  r  warningr   r   r   r   r   )r,  rD  rU  rL  r`  r*  r[  r   s   ```     r   rH  z%ComputedBuffer._apply_loop_reordering  sx    	/.....L	,    (  G w<<3|#4#444WQZCM M : : :  //'5,"O"OPPQQEE 	, 	, 	,| =Z//00   
 s5zz**++EEE	, *)))5)))l5))?5+A+AAAs   A>B A6DDr  c                4    | j                                         S r   rb  rZ  r  s    r   rZ  z!ComputedBuffer.get_reduction_size	      y++---r   c                4    | j                                         S r   rb  rX  r  s    r   rX  z!ComputedBuffer.get_reduction_type  rh  r   c                4    | j                                         S r   )rb  r  r  s    r   r^  zComputedBuffer.is_no_op  s    y))+++r   c                    dS r  r   r  s    r   r  zComputedBuffer.should_allocate  r  r   r  r  r   c                6    | j                             |          S )r"  rb  r`  r  s     r   r`  z!ComputedBuffer.constant_to_device  s    y++F333r   )r   r  rt  r  r  rj  r  r  r  rx  rq  )r   r  )r   r  ri  )r   r/  NN)r8  r9  r:  r;  r   r<  r   )r,  rC  rD  rC  rU  r   rL  r_  r`  r  r   rF  rr  r  ) r   r   r   r  r   r  r  r  r  r
  r  rQ  r  r  rJ  rX   rV  r  r  r  r   r  rW   r7  rM  rH  rZ  rX  r^  r  r`  r  r  s   @r   r  r    se          KKK%*N****6 6 6  \6	 	 	 	% % % %% % % %* * * *   * ,--$)    .-6	% 	% 	% 	% 	% 	%I I I IG G G G% % % %N% % % % J J J ]JD RVBFz2 z2 z2 z2 z2x  -1%B %B %B %B \%BN. . . .. . . ., , , ,   4 4 4 4 4 4 4 4r   r  c                  \     e Zd ZdZd fd
ZddZdddZd dZd!dZd"dZ		 	 d#d$dZ
 xZS )%r  zt
    Represents a Triton (in the future other type) of template operator
    that we can fuse an epilogue onto.
    rc  r4  r{  r|  make_kernel_renderr;  r   r   c                    t                                          d |           t                              |          | _        || _        t          j                            |           | _	        t          j        
                    |            d S r  )r  rD  r  unwrap_storager{  rq  rm   r  register_bufferr   register_operation)r  rc  r{  rq  r  s       r   rD  zTemplateBuffer.__init__   sr     	d6222"11&99"4G++D11		""4(((((r   rH  c                .    |                      d          S )NT	normalize)rK   r  s    r   rJ  zTemplateBuffer.get_read_writes,  s    ''$'777r   Frx  r   c           	        |                                  |                                                                 d
fd}t          j        ||                                 d|          }| j        D ]Ȋt          t          t          f          sJ t                                t          j        t                    sJ t          j                              j                                        d
fd	}|xj        t          j        |                                d|          j        z  c_        |S )Nr   Sequence[Any]ry  r   r   c                l    t          |          dk    sJ t          j         |           d          S )Nr   fake)r   rk   r  )r   ry  r  r   s     r   dummyz1TemplateBuffer.extract_read_writes.<locals>.dummy3  s6    v;;!####9T775>>6:::r   r   rw  c                    t          |          dk    sJ t          j                                         |                     S r   )r   rk   r  r#  )r   ry  r  r  s     r   r}  z1TemplateBuffer.extract_read_writes.<locals>.dummyA  s<    6{{a''''x???r   )r   rz  ry  rz  r   r   )r#  r  r  rB   rK   r  r{  r   rh  r  r   rc  r  rN  )r  rx  r}  depsr  r  r   s       @@@r   rK   z"TemplateBuffer.extract_read_writes/  s`   }}//##0022	; 	; 	; 	; 	; 	; 	; /4==??B)
 
 
 ; 	 	CcOV#<==HHtCyyHH=cj&11CC4
3C3CCC1j--//G@ @ @ @ @ @ @ JJ,:s||~~rY  JJJ r   r  c                $    t           j        j        S r   )r   r  ro  r  s    r   rZ  z!TemplateBuffer.get_reduction_sizeK  s    w{r   r0  c                    d S r   r   r  s    r   rX  z!TemplateBuffer.get_reduction_typeN  r  r   c                    dS r  r   r  s    r   r  zTemplateBuffer.should_allocateQ  r  r   Nr8  r9  r:  <tuple[tuple[Sequence[Expr], list[Expr]], Optional[LoopBody]]c                2    |                                  g fd fS r   r  )r  r8  r:  s      r   rM  z#TemplateBuffer.simplify_and_reorderT  s&      
 	
r   )rc  r4  r{  r|  rq  r;  r   r   r  r  )rx  r   r   rH  rr  rt  rq  ro  )r8  r9  r:  r;  r   r  )r   r   r   r  rD  rJ  rK   rZ  rX  r  rM  r  r  s   @r   r  r    s         

) 
) 
) 
) 
) 
)8 8 8 8    8         
 RVBF
 
 
 
 
 
 
 
 
r   r  c                  p     e Zd Z	 	 dd fdZ ed           	 dd fd            ZddZddZd dZ xZ	S )!TritonTemplateBufferNrc  r  r{  r|  rq  Optional[Callable[_P, _T]]mutated_inputsOptional[Iterable[IRNode]]allowed_prologue_inpsOptional[OrderedSet[str]]r   r   c                f    t                                          |||           | _         g _        |t          j        j        j        t          j        j        j        f}t          j
        j        j        }||v sJ d| d|             t           j        d         t                    s"J t!           j        d                                j        d                                          xj         fd|D             z  c_        |r|nt%                       _        d _        d _        dS )a  
        NOTE:[TritonTemplates with multiple outputs]
        We want the ability for TritonTemplates to output multiple tensors. Triton
        kernels have no notion of outputs and this is done by creating tensors that
        are then mutated by the kernel. Currently our STORE_OUTPUT codegen doesn't
        support creating multinode outputs for triton templates.
        We work around this by creating an extra input buffer during the lowering
        and we mark them as mutated inputs.
        Nz$Mutated inputs are only allowed for z	 but got r   c                N    g | ]!}t          t                     |          "S r  MutationOutputr  r   r  r  r  s     r   r   z1TritonTemplateBuffer.__init__.<locals>.<listcomp>  @        z888#tDD  r   )r  rD  r  outputsr%  rk   higher_orderflex_attentionflex_attention_backwardrm   r  current_noder'  r   r{  r   r   r#  r;   r  subgraph_inpssubgraph_outs)
r  rc  r{  rq  r  r  allowed_setr  r  r  s
   `       @r   rD  zTritonTemplateBuffer.__init__c  sQ   " 	);<<<,&*V% 	&5	&>K 7/6L;...[{[[\[[ /.. dk!nf55KKtDKN7K7KKK5[^..00FLL     )   LL &;L!!
 	" SW?Cr   Fr  r   r   c                "   t                                          |          }| j        r| j        ng }| j        r| j        ng }|D ]}t	          |t
          j                  r$|                    t          ||                     @t	          |t                    r)|                    |                    |                     ~|J |D ]D}t	          |t                    r)|                    |                    |                     @|J E|S r   )
r  rV  r  r  r   r   r!   updater)   r   )r  r  resr  r  r  r  r  s          r   rV  z)TritonTemplateBuffer.get_free_symbol_uses  s    gg**=99.2.@H**b.2.@H**b  	# 	#C#uz** #

+C??@@@@C(( #

333MBBCCCC{{{{  	# 	#C#v&& #

333MBBCCCC{{{{
r   r  c                    | j         S r   r  r  s    r   r  z TritonTemplateBuffer.get_outputs  
    |r   r  c                    | j         S r   )r  r  s    r   get_allowed_prologue_inpsz.TritonTemplateBuffer.get_allowed_prologue_inps  s    ))r   r   c                    d| j          d}|S )NzTritonTemplateBuffer(layout=r3  r  )r  r  s     r   r  zTritonTemplateBuffer.__str__  s    ;T[;;;
r   ro  )rc  r  r{  r|  rq  r  r  r  r  r  r   r   r  r  r  rj  r{  )
r   r   r   rD  rX   rV  r  r  r  r  r  s   @r   r  r  b  s         6:;?*D *D *D *D *D *D *DX 233$)      43.   * * * *       r   r  c                  d     e Zd ZdZd fdZddZddZddZddZddZ	d dZ
d!dZddZ xZS )"ChoiceCallera.  
    Represents a possible choice used in autotune_process.py.
    During autotuning, self.benchmark() is first called to get benchmark result,
    and if this choice is selected, self.output_node() is called to get the output_node.

    Children classes: TritonTemplateCaller, CUDATemplateCaller.
    r   r   r   r  rc  r  descriptionr   r   c                    t                                                       || _        || _        || _        || _        d S r   )r  rD  r   rc  r   r  )r  r   r   rc  r  r  s        r   rD  zChoiceCaller.__init__  sA     		& 'r   r   r   r  r  r  c                   |                                  t          t          d}t          j        rt          fdfi |S t          j        d|ifi |S )N)warmuprepc                        S r   r   )algor   s   r   r  z(ChoiceCaller.benchmark.<locals>.<lambda>  s    DD$K r   r  )to_callableautotune_warmupautotune_reprA   /profile_bandwidth_with_do_bench_using_profilingr]   rR   	benchmark)r  r  r   benchmark_configsr  s     ` @r   r  zChoiceCaller.benchmark  s|    !!%
 
 A 	V+,?,?,?,?,?UUCTUUU$T4%SSARSSSr   c                    t           r   r  r  s    r   	call_namezChoiceCaller.call_name  r  r   r  c                    t           r   r  r  s    r   r  zChoiceCaller.to_callable  r  r   c                *    |                                  S )z
        Hash key for the underlying kernel. By default, we assume there are no
        runtime params, so kernel hash key defaults to choice caller's hash key.
        )hash_keyr  s    r   kernel_hash_keyzChoiceCaller.kernel_hash_key  s    
 }}r   c                    t           r   r  r  s    r   r  zChoiceCaller.hash_key  r  r   r  c                    t           r   r  r  s    r   ry  zChoiceCaller.output_node  r  r   <dict[str, Union[PrimitiveInfoType, list[PrimitiveInfoType]]]c                    i S )zRInformation returned here is logged to the autotune log file when that is enabled.r   r  s    r   	info_dictzChoiceCaller.info_dict  s    	r   c                    dS )Nunsupported_choicer   r  s    r   autoheuristic_idzChoiceCaller.autoheuristic_id  s    ##r   )
r   r   r   r  rc  r  r  r   r   r   )r   r   r  r  r   r  r{  )r   r  )r   r  )r   r  )r   r   r   r  rD  r  r  r  r  r  ry  r  r  r  r  s   @r   r  r    s         ' ' ' ' ' 'T T T T" " " "" " " "   " " " "" " " "   $ $ $ $ $ $ $ $r   r  c                      e Zd ZddZdS )TritonTemplateCallerBaser   r   c                    t           r   r  r  s    r   get_make_kernel_renderz/TritonTemplateCallerBase.get_make_kernel_render  r  r   N)r   r   )r   r   r   r  r   r   r   r  r    s(        " " " " " "r   r  c                       e Zd ZdZd  fdZed!d            Z	 d"d#dZej	        d$d            Z
d%dZ	 d"d&dZd'dZ xZS )(MultiTemplateBufferaG  
    Represents a Buffer with multiple backing implementation choices.

    Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
    epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
    Otherwise, the fastest base choice will be chosen.
    rc  r  r{  r|  choice_timings_fn4Callable[[Optional[int]], dict[ChoiceCaller, float]]unfiltered_choiceslist[ChoiceCaller]r  r  r   r   c                    t                                          ||d |           || _        i | _        || _        t          d |D                       | _        i | _        d S )N)rc  r{  rq  r  c              3     K   | ]D}t          |t                    p*t          |t          j        j        j                  o|j        V  Ed S r   )r   r  r%  r  select_algorithmExternKernelCallerhas_out_variant)r   choices     r   r   z/MultiTemplateBuffer.__init__.<locals>.<genexpr>  sh       %
 %
  v788 65?#C#VWW +*	%
 %
 %
 %
 %
 %
r   )r  rD  _choice_timings_fn_choice_timingsoriginal_inputsr   _output_plannable_make_kernel_renders)r  rc  r{  r  r  r  r  s         r   rD  zMultiTemplateBuffer.__init__   s     	#"7	 	 	
 	
 	
 #4OQ%!$ %
 %
 -%
 %
 %
 "
 "
 ?A!!!r   r   c                    | j         S )z^
        Are all possible choices TritonTemplates or Extern Kernels with out variants
        )r  r  s    r   output_plannablez$MultiTemplateBuffer.output_plannable  s    
 %%r   Nhint_overrider*  dict[ChoiceCaller, float]c                h    || j         vr|                     |          | j         |<   | j         |         S r   )r  r  )r  r  s     r   choice_timingsz"MultiTemplateBuffer.choice_timings"  s;      444262I2I-2X2XD /#M22r   callerr  r  c              #    K   t          |t          j        j        j                  sJ t          |                      | j        |j        k    sJ | j        }|                                | _        	 d V  || _        d S # || _        w xY wr   )	r   r%  r  r  TritonTemplateCallerr   rc  rq  r  )r  r  renders      r   swap_as_triton_callerz)MultiTemplateBuffer.swap_as_triton_caller)  s      EO4I
 
 	 	<<	 	 
 {fm++++("("?"?"A"A	-EEE&,D###fD#,,,,s   1A> >	Bc                >   t          |t          j        j        j                  sJ t          |                      |                                 |j        j        k    sJ | 	                                |j        j
        k    sJ |                                | _        d S r   )r   r%  r  r  r  r   r  rc  r  rH  r  r  rq  )r  r  s     r   finalize_as_triton_callerz-MultiTemplateBuffer.finalize_as_triton_caller7  s    EO4I
 
 	 	<<	 	 
 }}&-"44444  FM$88888"("?"?"A"Ar   tuple[ChoiceCaller, float]c                n    |                      |          }t          ||j                  }|||         fS )N)r  r  )r  r4  r  )r  r  timings
min_choices       r   get_min_choicez"MultiTemplateBuffer.get_min_choice?  s>     %%M%BBgk222
GJ/00r   callers-dict[Optional[int], TritonTemplateCallerBase]c                    |                                 D ]!\  }}|                                | j        |<   "| j        d         | _        dS )z;Finalize with multiple callers for different hint overridesN)r2  r  r  rq  )r  r  r  r  s       r   finalize_as_triton_callersz.MultiTemplateBuffer.finalize_as_triton_callersF  sV     &-]]__ 	W 	W!M67=7T7T7V7VD%m44 #'";D"Ar   )rc  r  r{  r|  r  r  r  r  r  r  r   r   rq  r   )r  r*  r   r  )r  r  r   r  )r  r  r   r   )r  r*  r   r  )r  r  r   r   )r   r   r   r  rD  r  r  r  r  r  r  r  r  r  r  r  s   @r   r  r    s        A A A A A A6 & & & X& .23 3 3 3 3 - - - -B B B B .21 1 1 1 1B B B B B B B Br   r  c                  0     e Zd Zd fdZddZddZ xZS )CUDATemplateBufferrc  r  r{  r|  rq  Callable[_P, _T]workspace_sizer   templaterq   supports_epilogue_fusionr   r   r   c                x    t                                          |||           || _        || _        || _        d S r   )r  rD  r  r  r  )r  rc  r{  rq  r  r  r  r  s          r   rD  zCUDATemplateBuffer.__init__R  s?     	);<<<, (@%%%r   c                "    | j         | j         ndS r   )r  r  s    r   r  z%CUDATemplateBuffer.get_workspace_sizea  s    &*&9&Et""1Lr   c                    |                                  D ]*}t          j        |                                d d            +d S r   )r  rk   r  r#  )r  rq  s     r   emulate_store_fnz#CUDATemplateBuffer.emulate_store_fnd  sH    &&(( 	5 	5FIfoo''t4444	5 	5r   )rc  r  r{  r|  rq  r  r  r   r  rq   r  r   r   r   r  ri  )r   r   r   rD  r  r  r  r  s   @r   r  r  Q  so        A A A A A AM M M M5 5 5 5 5 5 5 5r   r  c                  ,     e Zd Zd fdZd fdZ xZS )CppTemplateBufferrc  r  r{  r|  rq  r  r  rq   r  r   r   r   c                x    t                                          |||           || _        || _        d | _        d S r   )r  rD  r  r  r  )r  rc  r{  rq  r  r  r  s         r   rD  zCppTemplateBuffer.__init__j  s;     	);<<< /3r   c                   t          | j        t                    rt          | j        t                    sJ t          | j                              | j        d         }t          |t                    sJ t          |                      |j        }t          |t                    sJ t          |                      |S t                      	                                S r   )
r   rc  MultiOutputLayoutr  r   r   r  r  r  r  )r  first_outputrc  r  s      r   r  zCppTemplateBuffer.get_layoutw  s    dk#455 	(dlH55IItDL7I7III5<?LlF33GGT,5G5GGG3!(Fff--;;tF||;;-M77%%'''r   )rc  r  r{  r|  rq  r  r  rq   r  r   r   r   ro  )r   r   r   rD  r  r  r  s   @r   r  r  i  s[        4 4 4 4 4 4	( 	( 	( 	( 	( 	( 	( 	( 	( 	(r   r  c                  0     e Zd ZdZ	 dd fdZddZ xZS )CuteDSLTemplateBufferz
    Buffer for CuteDSL (CUTLASS Python DSL) template kernels.
    Similar to other template buffers but specialized for CuteDSL operations.
    Nrc  r  r{  r|  rq  r  r  r   r  r  r   r   c                    t                                          |||           | _        | _         g _        |t           j        d         t                    s"J t           j        d                                j        d         	                                 xj         fd|D             z  c_        d S d S )Nr   c                N    g | ]!}t          t                     |          "S r  r  r  s     r   r   z2CuteDSLTemplateBuffer.__init__.<locals>.<listcomp>  r  r   )
r  rD  r  r  r  r   r{  r   r   r#  )r  rc  r{  rq  r  r  r  r  s   `     @r   rD  zCuteDSLTemplateBuffer.__init__  s     	);<<< ,&*V%dk!nf55KKtDKN7K7KKK5[^..00FLL     )   LLLL &%r   r  c                    | j         S r   r  r  s    r   r  z!CuteDSLTemplateBuffer.get_outputs  r  r   r   )rc  r  r{  r|  rq  r  r  r   r  r  r   r   r  )r   r   r   r  rD  r  r  r  s   @r   r  r    se          6:      *       r   r  r   )Sequence[Union[IRNode, Sequence[IRNode]]]TypeIs[Sequence[IRNode]]c                4    t          d | D                       S )Nc              3  @   K   | ]}t          |t                    V  d S r   r   r   rR  s     r   r   z#is_node_sequence.<locals>.<genexpr>  s,      44z!V$$444444r   )r   )r   s    r   is_node_sequencer    s!     44e444444r   c                      e Zd ZU ded<   ddZdd	ZddZedd            Ze	dd            Z
ddZddZ ed           	 d d!d            ZdS )"r  r  r{  r   r   r   r   c                r    | j         |         }t          |t                    sJ |                                S r   r{  r   r   r#  )r  r   inputs      r   
input_namezInputsKernel.input_name  s3    A%(((((~~r   rH  c                &   t          t          j                             }t          j        | j        D ]}t          |t                    r"|                    fd|D                        9t          |t                    rO|	                     |
                                                     t          t          j                 fd|                                 D                       }t          j        ||t                                S )Nc              3  R   K   | ]!} |                                           V  "d S r   r  )r   r   r"  s     r   r   z/InputsKernel.get_read_writes.<locals>.<genexpr>  s5      BBqWWQZZ\\22BBBBBBr   c              3  R   K   | ]!} |                                           V  "d S r   r  )r   r  r"  s     r   r   z/InputsKernel.get_read_writes.<locals>.<genexpr>  sF       .
 .
(+GGCLLNN##.
 .
 .
 .
 .
 .
r   r  )r;   rB   rH   r"  r{  r   r   r  r   r  r#  r  r  )r  rN  r
  r  r"  s       @r   rJ  zInputsKernel.get_read_writes  s   <+,..&[ 	5 	5E%** 5BBBBEBBBBBBBE#899 5		''%.."2"2334444L,- .
 .
 .
 .
/3/?/?/A/A.
 .
 .
 
 
 &"
 
 
 	
r   rK  c                4    |                                  j        S r   rM  r  s    r   r  zInputsKernel.get_reads  rO  r   r   r   c                   t          |t                    r|j        }t          |t                    r|j        }t          |t                    r/t          |t
                    st                              |          }t          |t                    r|                     |          S t          |t                    r|S t          |t          t
          f          sJ t          |                      |S r   )r   r   rb  r  r  rh  r  realize_inputunwrap_storage_for_inputTorchBindObjectr  r   r  r   s     r   r  z%InputsKernel.unwrap_storage_for_input  s    a## 	Aa$$ 	Aa"" 	.:a+I+I 	.**1--Aa## 	3
 //222a)) 	H!fo677@@a@@7r   %list[Union[IRNode, Sequence[IRNode]]]c                    g }| D ]S}t          |t                    rd |D             }nt                              |          }|                    |           T|S )Nc                B    g | ]}t                               |          S r   )r  r  r  s     r   r   z/InputsKernel.unwrap_storage.<locals>.<listcomp>  s&    III!\::1==IIIr   )r   r   r  r  r  )r{  
inputs_newr   s      r   rs  zInputsKernel.unwrap_storage  so     =?
 	! 	!A!X&& =IIqIII 99!<<a    r   r   c                    dS r  r   r  s    r   r\  zInputsKernel.is_extern  r  r   c                    dS r  r   r  s    r   rQ  zInputsKernel.num_reads  r  r   Fr  r   c                    t          t          j                             }| j        D ]M}t	          |t
                    r||                    |          z  }0|D ]}||                    |          z  }N|S r   )r;   r   r#   r{  r   r   rV  )r  r  r  r  	inner_inps        r   rV  z!InputsKernel.get_free_symbol_uses  s     u|$&&; 	G 	GC#v&& GS--m<<<!$ G GI77FFFAAGr   N)r   r   r   r   r  r  r   r   r   r   )r{  r  r   r  rq  r  r  r  )r   r   r   r   r  rJ  r  r  r  r  rs  r\  rQ  rX   rV  r   r   r   r  r    s         5555       

 
 
 
,, , , ,    [$ 
 
 
 \
       N++$)
 
 
 
 ,+
 
 
r   r  c                      e Zd ZddZddZdS )		NopKernelr   r   c                    dS r  r   r  s    r   r^  zNopKernel.is_no_op  r  r   rK  c                    t                      S r   r:   r  s    r   r  zNopKernel.get_reads  r  r   Nrq  r  )r   r   r   r^  r  r   r   r   r  r    s<                r   r  c                      e Zd ZdZedd            Ze	 ddd            Z ed           	 ddd            Zedd            Z	ddZ
d	S )ConcatKernelzn
    There isn't actually a real kernel for concat, we just change the
    storage for the upstream data.
    r{  r|  rV  r   r   r  c                $   |d                                          }|d                                         }t          |d                                                   }dg}||         g}d|cxk    rt	          |          k     sn J t          dt	          |                    D ]#}||                                         }	|                    ||                    t	          |	          t	          |          k    sJ ||                                         |k    sJ ||                                          |k    sJ t          t	          |                    D ]Q}
|
|k    r||
         |	|
         z   ||
<   t          j        j	        
                    ||
         |	|
                   ||
<   R|                    ||                    %t                              |          }t          j        r't                              |||d         j                  }t          t	          |                    D ]x}||         }t%          |          r_|                                }t)          |t*                    r6t                              |j        |j                  rt3          |          } nyt5          d |D                       }t          j        j        j        d         }t)          |t                    sJ t;          |                      |du r(t5          d |D                       rt3          |          }t=          d |D                       }|J t?          dt+          |||||          g 	          }tA          |          }g }tC          |          D ]\  }}t)          |tD          tF          f          sJ t;          |                      | $                    |tJ          &                    ||||         ||         d
                    }t)          |tN                    sJ t;          |                      t)          |j(        t                    sJ t;          |j(                              |j(                            |           t)          |j)        tD                    r|j)        *                                }n|j)        }t)          |t@                    rt|+                                r`|                                 x}JtY          |j                  r6t[          |          s'|                    |.                                           t	          |          dk    rIt          j        /                    |t`          j1                  rt          j        2                    |           t          j        3                    |          |_4        | 5                    |j(                  |_(        t          j        6                    |           |S )z6
        Create the concat kernel from inputs
        r   r@   c              3  4   K   | ]}t          |          V  d S r   )r  r  s     r   r   z&ConcatKernel.create.<locals>.<genexpr>8  s+      -W-W1.CA.F.F-W-W-W-W-W-Wr   Fc              3     K   | ]c}d |j         v oU|j         d                              t          j                  p*|j         d                              t          j                  V  ddS )r  r\  N)rw  r  r%  r_  ra  r   args     r   r   z&ConcatKernel.create.<locals>.<genexpr><  s       <
 <
  SX --E<O-PP W8E?00u?U0VV	<
 <
 <
 <
 <
 <
r   c              3  f   K   | ],}t          |          o|                                j        V  -d S r   )r  r  rg  r  s     r   r   z&ConcatKernel.create.<locals>.<genexpr>F  sL       
 
FG!!$$A)A
 
 
 
 
 
r   N)r  r  r  r  rg  r   rc  r{  )r  )7r#  r"  r   r  r   r   r  rm   r  r  r  r   r!  rA   rF  r  rC  r  r  r  r   re  r  r  r  r-   r3  r  r   r   r   r#  r  r   r  r  r  r  r  r  r{  rb  r6  r(  rd   rc   rd  r  rC   FOREACHregister_operation_listrt  r   rs  ru  )r  r{  rV  r  r  r  offsets_startoffsets_endr   
input_sizer  output_strider   rc  any_input_is_storage_and_layoutfx_node_argsrg  concat_kernelkernelop_namesr  r  input_unwrappeddevs                           r   r  zConcatKernel.create  s   
 %%''q	##%%q	**,,--}oC''''#h--''''''q#f++&& 	. 	.A++--J  #///z??c(mm3333!9&&((E1111!9''))V33333x==))  88"*1+
1"=HQKK"#'"2"L"L Z]# #HQKK x}----'5'H'H'R'R' 	"//x M
 s6{{## 		 		Aq	A$Q'' K  88fmTT %C8$L$LME*--W-WPV-W-W-W*W*W'w+03,--AAtL/A/AAA-*e33 <
 <
 $<
 <
 <
 9
 9
3 ;8DDM 
 
KQ
 
 
 
 
	 !!!$$#   

 

 

 M**'' 	C 	CFAscHj#9::EEDIIEE:++  Cq!1;q> !   L lF33GGT,5G5GGG3m2D99UU4@T;U;UUU9 ''555#(H-- +"%("6"6"8"8"%( ?J77C#3355C  NN,,,S938$$ :"<00 :  ? ? A ABBBx==1!4!4V^=S!T!TG++H555W44]CC"11-2FGG	""=111r   Nr  r   r  r  r   c                   t          |t                    r|                     |j        |          S t          |t          t
          f          sJ t          |                      t          |j        t                    rt          |j        j        t                    r|j        j
        sdS |dS t          |                                          t          |                                          k    sdS t          d t          |                                |                                          D                       S t          |j        d          o9t          |j        j        t                     ot          |j        t"                     S )NFTc              3  f   K   | ],\  }}t           j        j                            ||          V  -d S r   r_  r`  s      r   r   z=ConcatKernel.can_realize_into_without_copy.<locals>.<genexpr>  sO        B  88R@@     r   rc  )r   r   can_realize_into_without_copyrb  r  r  r   r  rc  re  r  r   rH  r   r   r  r   ExternKernelAlloc)r  r  r  s      r   r;  z*ConcatKernel.can_realize_into_without_copy{  si    c9%% 	D44SXsCCC#*566AAS		AA6ch 344 	sx<<x0 u {t s~~''((C0@0@,A,AAAu  !#.."2"2CNN4D4DEE      CHh'' <38?N;;<sx):;;;	
r   Fr  r   c                8    t                               | |          S r   )r  rV  rU  s     r   rV  z!ConcatKernel.get_free_symbol_uses  s     --dMBBBr   c                   t          |t                    s2t          |          r#t          |          \  }}t          ||          }t          |t                    sJ t	          |                      t          |t
                    r|                     |j        |          S t          |t                    ra|	                                 t          |j        d          sJ |                     ||          r t          |          |j        _        |j        S t                              |                                |                                |                                d t'          |                                |                                          D                       }|                     ||          S )Nra  rc  c                ^    g | ]*\  }}t           j        j                            ||          +S r   r  r  s      r   r   z-ConcatKernel.realize_into.<locals>.<listcomp>  sA       Aq  ::1a@@  r   r$  )r   rh  r  rd  r   r   r  rb  r  r  r  r;  r  rc  r  r  r#  r"  r  r   r  )r  r  r  ri  rc  pws         r   r  zConcatKernel.realize_into  s   
 #// 	C$S)) C"7"<"<%76BBB#//::c::/c9%% 	3##CHc222c:&& 	 KKMMM38X.....00c::  "1#"6"6x>>##--//__&& ??  	  
 
 C(((r   c                    dS r  r   r  s    r   r  zConcatKernel.should_allocate  r  r   )r{  r|  rV  r   r   r  r   )r  r   r  r  r   r   r  r  )r  r   r  r   r   r   rq  )r   r   r   r  r  r  r;  rX   rV  r  r  r   r   r   r#  r#    s         
 l l l [l\ 26!
 !
 !
 !
 [!
F N++$)C C C C ,+C
 ) ) ) [)@     r   r#  c                      e Zd ZU dZdZded<    ej        e          Z	ded<   dZ
d	ed
<   dZded<   dZded<    ej        e          Zded<   dZded<   dZded<    ej        e          Zded<   dZded<    ej        e          Zded<    ej        e          Zded<   	 	 	 	 	 	 	 dgdh fd$Zdid&Zdjd(Zdkd)Zdkd*Zdld-Zdld.Zdmdnd/Zdod0Zdpd2Zedqd6            Z e!drd<            Z"e!dsd>            Z#e!dtd?            Z$e!dtd@            Z%e!	 	 	 dudvdH            Z&e!	 dwdxdJ            Z'e!	 dwdydL            Z(e!dtdM            Z)e!dtdN            Z*e!dtdO            Z+e!dtdP            Z,dkdQZ-dzdRZ.dmd{dVZ/d|dWZ0d}dYZ1dwd~d[Z2dpd\Z3dld]Z4dld^Z5dld_Z6ddaZ7ddcZ8 e9d           	 dwdde            Z:dpdfZ;e;Z< xZ=S )r  z
    A class that represents Kernels which are not directly lowered to Inductor
    Loop Level IR, such as custom operators, or aten operators which we fallback to.
    r   rz  constant_argsr  dict[str, Any]r   NOptional[ReinterpretView]output_viewr0  python_kernel_namecpp_kernel_nameIterable[str]ordered_kwargs_for_cpp_kernelOptional[_OpOverloads]op_overloadzOptional[list[dict[str, Any]]]arg_propertieszdict[str, dict[str, Any]]allarg_propertiesz#Optional[dict[str, dict[str, Any]]]kwarg_propertiesz"dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszlist[MutationOutput]mutation_outputsr   rc  r4  r{  r  Optional[dict[str, Any]]r   r   c                b   t                                          |||           || _        |r|ni | _        || _        |
| _        |                     |           |                     |           |	| _        | 	                                 i | _
        g | _        t          j        j        | _        d S Nr+  )r  rD  rC  r   rF  rL  set_cpp_kernel_nameset_python_kernel_namerJ  collect_arg_kwarg_propertiesrP  rQ  rm   r  r  fx_node)r  r   rc  r{  rC  r   rF  rG  rH  rJ  rL  r  s              r   rD  zExternKernel.__init__  s     	 	 	
 	
 	

 + &.ffB&&  111##$6777-J*))+++!# "w+r   r  c                    | g| j         S r   )rQ  r  s    r   r  zExternKernel.get_outputs  s    -t,--r   r   c                    t                      S r   r:   r  s    r   r  z%ExternKernel.get_unbacked_symbol_defs  r  r   c                X   t          | j        t          j        j                  rd | j        j        j        D             n*d t          t          | j	                            D             | _
        t          | j        t          j        j                  rd | j        j        j        D             ni | _        t          | j        t          j        j                  rI| j        s d | j        j        j        D             | _        d | j        j        j        D             | _        d S g | _        d S )Nc                F    g | ]}|j         	|j        |j        |j        d S ))r   r   r  )
kwarg_onlyr   	real_typer  r  s     r   r   z=ExternKernel.collect_arg_kwarg_properties.<locals>.<listcomp>  sH        |FK%&_   r   c                    g | ]}i S r   r   r  s     r   r   z=ExternKernel.collect_arg_kwarg_properties.<locals>.<listcomp>  s    666"666r   c                8    i | ]}|j         |j        |j        d S ))r   r  )r   r^  r  r  s     r   r   z=ExternKernel.collect_arg_kwarg_properties.<locals>.<dictcomp>  s8        qOO  r   c                *    g | ]}|j         	|j        S r   r]  r   r  s     r   r   z=ExternKernel.collect_arg_kwarg_properties.<locals>.<listcomp>&  s1     6 6 6 !,6F6 6 6r   c                     g | ]}|j         	|S r   )r]  r  s     r   r   z=ExternKernel.collect_arg_kwarg_properties.<locals>.<listcomp>)  s/     " " """ " "r   )r   rL  r%  _ops
OpOverload_schema	argumentsr   r   r{  rM  rN  rJ  schema_kwargsr  s    r   rW  z)ExternKernel.collect_arg_kwarg_properties
  s[    $*EJ,ABB
7   )1;    76eC$4$455666 	$ $*EJ,ABB	  )1;   
  	 d&
(=>> 		$5 6 6$($4$<$F6 6 62" "+3=" " "D "$Dr   c                    t          | j        t                    r*|                                  |                                  d S d S r   )r   rc  r   apply_constraintr8  r  s    r   r  zExternKernel.decide_layout/  sJ    dk>22 	!!!###     	! 	!r   wrapperrr   c                ^    t          | |          \  }}|r|                    |           d S d S r   )r`   make_comment)r  rk  
origin_str_detailed_origin_strs       r   codegen_commentzExternKernel.codegen_comment4  sC    +>tW+M+M(
( 	-  ,,,,,	- 	-r   c                    t           r   r  r  rk  s     r   codegenzExternKernel.codegen9  r  r   c                   || _         t          j        j        r$t	          | j        t          j        j                  sd S | j        }| j         q|j	        dk    rS|j
        dk    r |j                            d          d         n|j                            dd          }d| d| _         d S |j        j        | _         d S d S )Natenr  .r   r  z
at::_ops::z::call)rH  rm   r  cpp_wrapperr   rL  r%  rd  re  	namespace_overloadnamer   r  replacerf  r   )r  rH  r5  opnames       r   rU  z ExternKernel.set_cpp_kernel_name<  s    .w" 	*ej3+
 +
 	 F!'6)) +y88 O))#..q1100c:: 
 (CF'B'B'B$$$'-~':$$$ ('r   c                    || _         |d S | j        }|d S t          |t          j        j                  rd|j         | _         d S |j                            dd           d|j         | _         d S )Nztorch.ops.higher_order.._ops..ops.rv  )	rG  rL  r   r%  rd  HigherOrderOperatorr   r   rz  )r  rG  r5  s      r   rV  z#ExternKernel.set_python_kernel_nameT  s    "4)F!>D
 >?? 	&Q&Q&QD### $,,Xw??SS&/SS ###r   r   c                   ddl m} |                                 x}r|j        nt          j        j        }t          j        j        r| j        J | j        S t          j        j	        rxt          t          j        j        |          s&J t          t          j        j                              | j        J t          j        j                            | j        |          S | j        J | j        S )Nr@   )CppWrapperCpu)codegen.cpp_wrapper_cpur  r#  r   rm   r  device_type
fx_wrapperrG  rw  r   r  rH  get_c_shim_func_name)r  r  dr  s       r   get_kernel_namezExternKernel.get_kernel_namec  s    ::::::!%!2!22AL9L7 	+*666**W  
	+ag2MBB  D$E E  B '3337'<<$f   *666**r   r   r   r  c           	     F   t                               |                                 |                                 |                                 |                                 |                                 |                                           }|                                 |S )N)r  r  r  r  r  r  )	r  r  r#  r"  r  r  r  r  r  )r   r@  s     r   
copy_inputzExternKernel.copy_inputv  sx    <<>>++--]]__::<<))++oo''  
 
 	

	r   r5  r{   r   r   ituple[Any, list[Any], list[Any], Callable[[Any, Any], Any], Optional[dict[sympy.Symbol, pytree.KeyPath]]]c                    ||d}t          j        |          \  }g g }g }|D ]}                    t          |t                    ot          |t
                                d         r|                    |           ^t          |t                    r+t          j        j	        j
                            |d           }|                    |           dfd	}	 fd
|D             }|D ]"}
t          |
          rt          |
d           #g }|D ]}
t          |
t                    sb|
                                t          j        j        v r=|                    t          j        j        |
                                                    zt          |
t                    sb|
                                t          j        j        v r=|                    t          j        j        |
                                                    t          |
t$                    r)|                    |
                                           /t          |
t(          j        j        j                  r^|
j        j        }|
j        j        dk    r|J |                    t(          j        j        |                                                    |                    t;          |
d                      |	||          \  }} ||i |}d }t          j        j
        x}rt          j        j         !                    d          }tE                      }t          j        j#        t(          j$        j%        j&        k    r!|d         }tO          t          j                  }|5  tQ          |t          j        |           d d d            n# 1 swxY w Y   tS          |||          }t          |tT          tV          f          s|gn|}|D ]i}t          |t(          j,                  rM|j-        rFd}t          j        j        j         !                    dd           x}r| d| }|t          j        _.        j||||	|fS )N)r   r   rI  )r  new_tensor_argsr   new_non_tensor_argsr   tuple[list[_T], dict[str, _T]]c                ^   g }t          |           }t          |          }D ]I}|r#|                    t          |                     '|                    t          |                     Jt          j        |          }|                    dg           |                    di           fS )Nr   r   )iterr  nextpytreetree_unflattenr  )	r  r  r5  
it_tensorsit_non_tensors	is_tensorr  	args_specis_arg_tensors	          r   unflatten_argsz3ExternKernel.process_kernel.<locals>.unflatten_args  s     Fo..J!"566N* 8 8	 8MM$z"2"23333MM$~"6"67777%fi88A55$$aeeHb&9&999r   c                :    g | ]}                     |          S r   r  r   r   r  s     r   r   z/ExternKernel.process_kernel.<locals>.<listcomp>  s'    AAAs((++AAAr   Tr  r9  )r	  r  r@   zEsparsity not handled. Please file issue for sparse inference weights.r  z Found from : 
 )r  r   r  r   r   r  )/r  tree_flattenr  r   r   GeneratorStater!   rm   r  r  r   create_symintnoder  rd  r  r#  	constantstorchbind_constantsr  	get_valuer%  r  irr  r   r   r9  default_generatorsclone_stater  r  r  rw  r  r
   r'  _higher_order_opseffectswith_effectsr0   r5   r1   r   r   Tensor	is_sparsedisable_cudagraphs_reason)r  r5  r   r   binded_args	args_flattensor_argsnon_tensor_argsr)  r  r   example_argsdevice_indexnew_args
new_kwargsexample_outputrP  r   node_meta_valctxexample_out_lir(  msgr  r  r  s   `                       @@r   process_kernelzExternKernel.process_kernel  s     $v66%2;??	9%' 		, 		,C  3''O
30O0O,O   R  ,""3''''c4(( W'*4FFsQUFVVC&&s++++	: 	: 	: 	: 	: 	: 	: BAAA[AAA  	6 	6A$Q'' 6%a5555 	 	  	L 	LA a** Lqzz||qw?P/P/P##AG$5ajjll$CDDDDq(++LJJLLAG$???##AG$?

$MNNNNA// 	L##AKKMM2222Au1@AA L x~x}..<3K3KK##J1,?KKMM    ##$5aT$J$J$JKKKK-~lOLL*8Z88JN--9 	N/33E::M0;C~$(?(G(TTT -a 0<Q^LL K K	1>>JJJK K K K K K K K K K K K K K K 9>=! ! ntUm<< ^ 	
   	8 	8A!U\** 8q{ 8]"#'"6";"?"?t"T"TT; A @@;@@C471 
 	
s   OO
Orh  c                   t          |t                    sJ t          |                      t          |t                    r|S |                                }t
          j                            |                                          }|J |	                                }|d|j
        v rt          |t          t          t          f          rt          |j        t                    r|j
        d                             t           j                  s+|j
        d                             t           j                  r5|                    t)          |                                                     n|                                 t/          j        |                                d          \  }}|d         } |                                |          }t
          j        j                            ||          }t
          j        j                            ||          }	t
          j        j                            ||          }
t=          ||	          |
z   }||k    r$t>                               d|	|
|           tB          t          |j"        tG          |$                                |%                                |                                |	|
d	          
          S )z
        In order to pass this to an extern kernel we need a
        ReinterpretView not a View.  This allows us to avoid some
        unneeded copies.
        Nr  r'  r  r|   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sFr  ra  )&r   r  r   rh  r6  rm   r  r  r#  r  rw  r  r  rc  r   r  r%  r_  ra  rC  r-   r  r8  rB   r  r  r  r  stride_vars
offset_varre   r  r  r  rb  re  r  r"  )r  r   x_unwrap_viewr  x_unwrap_view_fx_node
index_argsr3  r  r   r[  rf  expecteds               r   convert_to_reinterpret_viewz(ExternKernel.convert_to_reinterpret_view  s    !X&&//Q//&a)) 	H g  !7!7!9!9:: # 3 3 5 5 "-.333=?FJ*OPP 4=/@@ 4 &*51??"'"5 @  	 4 )-e4BB"'"8 C   4 77.}/E/E/G/GHH    '')))!-!@JJLL"
 "
 "

J  ]
   ,, 55eZHH'"..ujAA!,,UJ??Z11F:HIIR	   &%,,..kkmmZZ\\  

 

 

 
	
r   c                P   |t                      S t          |t          t          j        j        j        t          f          rt          |          S t          |t                    r]t          j                            t          j        |j        |                                |                                                    S t          |t$                    r|S t          |t&                    r|                     |j                  S t          |t,                    r;t-          |                     |j                  |                                          S t          |t0                    r[|                                 t5          |                                          r&	 |                     |          S # t:          $ r Y nw xY wt          |t<                    r|                                 |S t          |t>          t          f          r|S |                      |          S )N)r)  )r  r  ra  )!r  r   r!   r   r   r   r   r   r   r  rm   r  add_tensor_constantr%  rZ  r)  r"  r#  r%  r   r  rb  rh  r  r  r  r  r6  r  r  r  NonTensorObjr  r  s     r   r  zExternKernel.realize_inputC  s   9')))a$ 3 ;SABB 	1(a0000a"" 	7..QWAKKMM!,,..QQQ   a(( 	Ha## 	-$$QV,,,a)) 	"&&qv..q||~~    a"" 	IIKKK$Q]]__55 ::1===*   Da$$ 	IIKKKHa,(=>?? 	H~~a   s   %F: :
GGc                    t          |          rHt          |                                          dk    r|S |                                D ]}|dk    r|c S |                     |          S r+  )r  r   rH  r  )r  r   r  s      r   require_stride1zExternKernel.require_stride1d  sr     ## 	1<<>>""a'',,..  Q;;HHH ~~a   r   Fr   r1  rD  r  r9  r   c                	   |J                                  dv rsS t                    r7t                                          t                    r|rt          |          o&t                                          j                   }t          dd|rHt          t          j        j                                                            j                            n||           S t          ddd |           S t                                          t          t          f          ry|r'                                                    |          s<rNt#                                          j                                                  rt'                    nS t                                          x}t(                    rt          |                                x}t                    rt-          d          t          |t                    rC|r|                    |          s*r*t#          |j                                                  rS t          t.                    rg|r'                                                    |          s<r<t#                                          j                                                  rS t          t0                    rt          j        t4                    rt          j        t6                    st                                          x}          rt;          |d          rt          |j        t<                    se	 |                     j                  _        |r|                      ||          S r| !                    |          S n# tD          $ r Y nw xY wd }	                                }
yt          j        j        fd	tG          tI                                                              D             }	|	D ])}tJ          j&        j'        (                    |d
d          *| )                              t          dd||           |rt          |          sJ n=|	r;|
J tJ          j&        j'        *                    |
          t'                    S S )N)r   r@   TF)r  r  r  r9  r  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutrb  r  c                    g | ]N}                     |         d                                                               |         d          L|OS )r   r   )rT  r  r  )r   r   rD  r  r   s     r   r   z0ExternKernel.require_strides.<locals>.<listcomp>  sj       33M!4DaHH 11!**,,q/1EE	  r   r   r@   )+r  r  r   r  r   r  r3   r  rd  r  rm   r  r  size_hints_or_throwre  r  r  rY  r  rm  r  r  rL  r  r   rb  r  rh  r6  r  r<  r  require_stride_orderrequire_exact_stridesr  r   r   r%  r  loweringslice_r  r  )r  r   r   rD  r9  use_current_stride_ordermutation_layoutr  r6  expanded_dims	orig_sizerV  r  s    ` `        @r   require_strideszExternKernel.require_stridesn  s     M$=$==;;==F""="H !## J	!,,...99 I ' 0R50 0 0K3ALLNN4IJJJ - *#(-  8', ! 0 D D$%LLNN$9!" !"   "'&3    H *#(-%)&3&3    HALLNN[/,JKK   <<>>;;EBB  "  2%q||~~'<ajjll 	  %0 4A}EEE
 $%LLNN25O   $3$?$?$A$AA[N   )b    [99 		*<<UCC	 &	 6);+=qzz|| 		 H a%% 				||~~77>>		 		 .!1<<>>#8!**,, 			 Hq)$$	168,,	 qv77	 &Q]]__&DkEE		
 V,,	 {/1BCC	88@@ 335 4    # 44= 5    '    .2JJLL	$w'H     s1::<<0011  M % B BO,33AsAqAA NN1!''	
 	
 	
 	
  	E5a?????? 	E(]-F-FF(//9==A21mDDDs   >8O 7O 
OOrO  c                2    |                      |||          S )N)rD  r9  r  )r  r   rD  r9  s       r   r  z"ExternKernel.require_exact_strides  s(     ""]- # 
 
 	
r   r   c                2    |                      |||          S )N)r   r9  r  )r  r   r   r9  s       r   r  z!ExternKernel.require_stride_order  s     ""1E"OOOr   c                8    |                      |t                    S r   )r  r`  r  s     r   require_channels_lastz"ExternKernel.require_channels_last   s    ''+<===r   c                8    |                      |t                    S r   )r  rb  r  s     r   require_channels_last_3dz%ExternKernel.require_channels_last_3d$  s    ''+=>>>r   c                    dd} ||          r|S |                      |t                              |                                                    S )Nr   r   r   r   c                    	 |                                  }n# t          t          f$ r Y dS w xY w|t          j        j        v ot          j        j        |         j        S r  )r#  AttributeErrorr  rm   r  r  	is_mkldnn)r   r   s     r   is_mkldnn_tensorz9ExternKernel.require_contiguous.<locals>.is_mkldnn_tensor*  sb    zz||"$78   uu 17,,R1B41H1RRs    ,,r   r   r   r   r  r   r!  r  )r  r   r  s      r   rg  zExternKernel.require_contiguous(  sg    	S 	S 	S 	S A 	H,,>44QZZ\\BB  r   c                    |                      |t                              |                                                    S r   r  r  s     r   require_contiguous_stridesz'ExternKernel.require_contiguous_strides:  s7     ((~00>>
 
 	
r   c                    d S r   r   r  s    r   rj  zExternKernel.apply_constraintB  r2  r   c                   t          |t                    sJ t          |                      t          |t                    st          |          }| j        s
J d            t          |          }t          | j                  }||k     r|t                              d| j        ||z
             t          ||          D ]G}| j        |         d         }|
                    ||v r||         n| j        |         d                    H|S )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   r  )r   r   r   r   rM  r   r  r  rL  r   r  )r  r   r   n_args
n_pos_argsr   arg_names          r   fill_non_provided_argsz#ExternKernel.fill_non_provided_argsE  s&    $))554::55)$%% 	::D"UU$UUU"T,--
 JII^ V#	   6:..  .q1&96)) 8$$,Q/@   
 r   r  r  r   c                   t           j        j        r4g }d }|rG| j        r@t	          | j                  t	          |          k    s
J d            d | j        D             }t          | j                  D ]\  }}|9|J |                    ||                   }|r|                    d          nd }nXt	          | j                  |z   }| j        r8|t	          | j                  k     r | j        |                             d          nd }|	                    t           j        j
                            ||                     |S d | j        D             S )NzDnames passed to codegen_const_args does not match self.constant_argsc                :    i | ]}|                     d           |S r  )r  r(  s     r   r   z3ExternKernel.codegen_const_args.<locals>.<dictcomp>u  s1     * * *-0CGGFOOS* * *r   r   c                V    g | ]&}t           j        j                            |          'S r   rm   r  r  val_to_arg_strr   r@  s     r   r   z3ExternKernel.codegen_const_args.<locals>.<listcomp>  s+    WWWqAG(77::WWWr   )rm   r  rw  rM  r   rC  r   r  r{  r  r  r  )	r  r  r5  name_to_arg_propertiesr   r   proptype_r   s	            r   codegen_const_argszExternKernel.codegen_const_argsi  s   7 	XF
 &*" , 4-..#e**<<<Z =<<* *484G* * *& "$"455 M M1)5 ,,,155eAh??D04>DHHV,,,$EEdk**Q.C  ."36T=P9Q9Q3Q3Q +C044V<<<! 
 ag2AA!UKKLLLLMWWDDVWWWWr   c                   t           j        j        r3| j        ,|                     g | j        | j        | j                  }d}n	| j        }d}g }t          |          D ]\  }}t           j        j        r| j	        r|t          | j	                  k     s
J d            | j	        |                             d          }|                    t           j        j                            ||                     |                    t           j        j                            |                     |r'|                    |                                            |S )NFTz-Invalid access to ExternKernel.arg_propertiesr   )rm   r  rw  rL  r  r{  rC  r   r   rM  r   r  r  r  r  rR  r  )r  r{  need_codegen_constant_argsr   r   r   r  s          r   codegen_argszExternKernel.codegen_args  sV   7 		.4#3#?003$+3 23T[ F */&&[F)-&f%% 	D 	DDAqw" D* q3t7J3K3K/K/K/KC 0L/KK +A.226::AG0??5IIJJJJAG0??BBCCCC% 	3KK//11222r   r  c                    ||v r|                     |          S || j        v r| j                             |          S | j                             |          x}|                     d          S t          | d          )zGiven an argument name, queries for values in (in order):
        1. any provided kwargs for this function.
        2. the class self.kwargs member.
        3. any available default arguments in self.allarg_properties.Nr  z not in self.allarg_properties)r  r   rN  rL  )r  r  r   r)  s       r   get_kwargs_valuezExternKernel.get_kwargs_value  s    
 v::h'''t{"";??8,,,)--h777CD77?+++HHHIIIr   skip_outc                :   t           j        j        r| j        t	          | j                  dk    rg S g }| j        D ]}|r|dk    r|                     |          }t          |t                    r|
                    |           K| j        J | j                            |i                               d          }|
                    t           j        j                            ||                     n#d | j                                        D             }|S )Nr   r  r   c                f    g | ].\  }}| d t           j        j                            |           /S r  r  )r   kr&  s      r   r   z/ExternKernel.codegen_kwargs.<locals>.<listcomp>  sM       Aq ??qw+::1==??  r   )rm   r  rw  rL  r   rh  rJ  r  r   r!   r  rN  r  r  r  r   r2  )r  r  r   r  r&  r  s         r   codegen_kwargszExternKernel.codegen_kwargs  s3   7 	+D4F0G0G10L0L	F > Q Q E 1 1))(33a&& QMM!$$$$1=== 266xDDHHPPEMM!'"6"E"Ea"O"OPPPPQ  K--//  F r   c                    | j         W| j         j        }t          |dd          }|                    dd          }|                    dd          d         }| d| }nd}|S )	Nr   unknown_namespacer}  r~  rv  r@   r   
unknown_op)rX  r'  r   rz  rsplit)r  r'  op_namespaceop_names       r   get_op_namezExternKernel.get_op_name  st    <#\(F"6<9LMML'//'BBL'..sA66q9L%0000GG"Gr   c                   t           j        rt          j        j        st          |                                           dk    rd S t          j        j                            |                                           }t          j        j                            | 	                                          }| 
                                }|                    d|                                  d| d| d|d	           d S d S d S )Nr   zassert_size_stride(rS  r3  )rA   size_assertsrm   r  rw  rh   r  r  codegen_shape_tuplerH  r	  r  r#  )r  rk  r  r  r	  s        r   codegen_size_assertsz!ExternKernel.codegen_size_asserts  s     		qw': 		T]]__--227';;DMMOOLLDW)==doo>O>OPPF&&((GWdmmooWWWWWW7WWW    		 		 		 		r   c           	     F   t           j        rt          j        j        s|                                 }|t          j        j        v}|                                 }|r&|                    d| dt           d|d           d S |                    d| d| d           d S d S d S )Nzassert_alignment(rS  r3  z	# buffer z (op: z) is assumed to be not aligned)
rA   alignment_assertsrm   r  rw  r#  r  r	  r  ra   )r  rk  r   alignedr	  s        r   codegen_alignment_assertsz&ExternKernel.codegen_alignment_asserts  s    # 	AG,? 	==??D!'";;G&&((G !!OOOOO7OOO     !!SSSGSSS    	 	 	 	r   c                    t           j        j        rt          j        j        rdS |                                 |                                 }|                    d| d| d           dS )zc
        Track outputs of fallback operators if config.test_configs.track_memory_lifecycle
        Nztrack_tensor(z, 'z'))	rA   test_configstrack_memory_lifecyclerm   r  rw  "write_memory_track_allocation_oncer#  r  )r  rk  r   s      r   codegen_memory_trackingz$ExternKernel.codegen_memory_tracking  sp     "9 	QW=P 	F22444}};$;;4;;;<<<<<r   'tuple[list[Sequence[Expr]], list[Expr]]c                ^    |                                  }|                                 }|g g|fS )zD
        get output sizes and strides, for template_codegen
        )r  rH  )r  r  r  s      r   get_group_stridezExternKernel.get_group_stride  s/     //##r{G##r   tuple[Expr, Sequence[Expr]]c           	     f   t           j        j        |                                 }|                                 }fd|D             }d t          t          |                    D             t          t          t          |                    |j        d          }d t          |          D             fdt          t                              D             }fd|D             | 
                                } |          }t           j        j                            ||g          \  }}}	t          d          \  }
t          t           |fd	|D                                           }t          t!          j        |          |          }|t%          |          fS )
zC
        Manually get canonicalization of the output index
        c                :    g | ]}                     |          S r   )r  )r   r   r  s     r   r   z-ExternKernel.canonicalize.<locals>.<listcomp>  s'    :::Q8%%a((:::r   c                2    g | ]}t          d |           S )r  )rf   r  s     r   r   z-ExternKernel.canonicalize.<locals>.<listcomp>  s'    MMMa(Q11MMMr   T)r  r}  c                    i | ]\  }}||	S r   r   r   s      r   r   z-ExternKernel.canonicalize.<locals>.<dictcomp>  s    BBBxsC#sBBBr   c                     g | ]
}|         S r   r   r   s     r   r   z-ExternKernel.canonicalize.<locals>.<listcomp>  s    777q777r   c                     g | ]
}|         S r   r   )r   r   r,  s     r   r   z-ExternKernel.canonicalize.<locals>.<listcomp>  s    333jm333r   cc                &    g | ]} |          S r   r   )r   r   add_vars     r   r   z-ExternKernel.canonicalize.<locals>.<listcomp>  s!    3R3R3R1GGAJJ3R3R3Rr   )rm   r  r  r  rH  r   r   r  rg  r   r  rI  rL   r   r   ri   r   r  r   )r  rU  r[  index_orderr   r  r   	new_sizesr   rK  r  replacementr	  r,  r   r  s               @@@@r   canonicalizezExternKernel.canonicalize  s   
 7#//##::::':::MM5U;L;LMMM
U3w<<00g6ISWXXXBB9[+A+ABBB7777E#f++$6$67773333U333
##%%
##%&W%5%E%Ew&
 &
"	7F !%%
73z773R3R3R3R	3R3R3R+S+STTUU5<..<<eI&&&&r   r  c                    |rt           nt          }t                              | |          }| j        D ]}| ||          z  }| j                                        D ]}| ||          z  }|S r   )maybe_free_unbacked_symbolsmaybe_free_symbolsr  rV  rC  r   r   )r  r  maybe_get_symbolsr  r)  s        r   rV  z!ExternKernel.get_free_symbol_uses#  s     ,9P''>P 	 --dMBB% 	( 	(C""3'''AA;%%'' 	( 	(C""3'''AAr   c                     t           dd           }d|g}| fdt          j                   D             z  }|                    d j                                        |          S )NrG  zpython_kernel_name=c                N    g | ]!}|j          d t          |j                    "S r  )r   r   )r   r  r  s     r   r   z(ExternKernel.__str__.<locals>.<listcomp>8  sE     
 
 
 z77GD%*5577
 
 
r   r  )r   r  fieldsr  r  r  )r  kernel_namer  s   `  r   r  zExternKernel.__str__3  s    d$8$??1+11
 	 
 
 
 
$+D11
 
 
 	
 	8D$488999u%%%r   r   NNNNr   N)r   r0  rc  r4  r{  r  rC  rz  r   rR  rF  rE  rG  r0  rH  r0  rJ  rI  rL  rK  r   r   r  r  ri  rk  rr   r   r   r   rH  r0  r   r   )rG  r0  r   r   r{  )r   r   r   r  )r5  r{   r   r   r   r   r   r  )r   r   r   rh  r  )NNF)
r   r   r   r1  rD  r  r9  r   r   r   r  )r   r   rD  rO  r9  r   r   r   )r   r   r   r   r9  r   r   r   )r   rz  r   rD  r   rz  )r  r  r   r   r   r   )r  r   r   r   r   r   )r  r   r   r   )r   r	  )r   r	  r  )>r   r   r   r  rC  r   r  r  r   r   rF  rG  rH  r   rJ  rL  rM  rN  rO  rP  rQ  rD  r  r  rW  r  rp  rs  rU  rV  r  r  r  r  r  r  r  r  r  r  r  r  r  rg  r  rj  r  r  r  r  r  r	  r	  r	  r	  r	  r!	  rX   rV  r  r  r  r  s   @r   r  r    sx         
 $&M%%%%.[.tDDDFDDDD-1K1111(,,,,,%)O)))) 4E;3D4 4 4!     +/K....59N99993D;3D4 4 4     =A@@@@<MK<M= = =     .?[->t-T-T-TTTTT (*+/15,0)-79.2, , , , , , ,<. . . .   #$ #$ #$ #$J! ! ! !
- - - -
" " " "; ; ; ; ;0   + + + +& 
 
 
 \
 w
 w
 w
 [w
r C
 C
 C
 [C
J ! ! ! [!@ ! ! ! [!  *.6:#a a a a [aF QV
 
 
 
 [
 DIP P P P [P
 > > > [> ? ? ? [?    [" 
 
 
 [
   " " " "HX X X X XB   4J J J J    4	 	 	 	
 
 
 
   	= 	= 	= 	=$ $ $ $' ' ' '> N++$)    ,+
& 
& 
& 
& HHHHHr   r  c                  @     e Zd ZddZ	 	 	 	 	 	 	 dd fdZddZ xZS )ExternKernelOutrk  rr   r   r   c                0    |                     |            d S r   )generate_extern_kernel_outrr  s     r   rs  zExternKernelOut.codegenD  s    **400000r   r   Nrc  r  r{  r|  rC  rz  r   rR  rF  rE  rG  r0  rH  rJ  rL  rK  c
                h   |                      |          }
t          |
t                    sJ t          |
                      t	                                          d ||
||pi d ||||	
  
         t          j                            |           | _	        t          j        
                    |            d S r   )rs  r   r   r   r  rD  rm   r  rt  r   ru  )r  rc  r{  rC  r   rF  rG  rH  rJ  rL  unwrapped_inputsr  s              r   rD  zExternKernelOut.__init__G  s      ..v66*H55MMt<L7M7MMM5Lb)	
 	
 	
 G++D11		""4(((((r   r   c                    dS r  r   r  s    r   r  zExternKernelOut.should_allocated  r  r   r+	  r*	  )rc  r  r{  r|  rC  rz  r   rR  rF  rE  rG  r0  rH  r0  rJ  rz  rL  rK  r   r   rq  )r   r   r   rs  rD  r  r  r  s   @r   r/	  r/	  B  s        1 1 1 1 (*+/15,0)-79.2) ) ) ) ) ) ):       r   r/	  c                        e Zd Zd fdZ xZS )	RandomSeedscountr   r  r  r   r   c                   t          j        t           j                  }t                                          t          |t           j        |g          g |j        |j        |ggddt          j	        j
                   d S )Nr  zaten.randint.low_outzat::_ops::randint_low_out::call)rc  r{  rC  rG  rH  rL  )r%  r  r  r  rD  re  r4  r  ru  randintlow_out)r  r7	  r  limitsr  s       r   rD  zRandomSeeds.__init__i  s    U[))kW  
 !:vzE7;5 >, 	 	
 	
 	
 	
 	
r   )r7	  r   r  r  r   r   r   r   r   rD  r  r  s   @r   r6	  r6	  h  s=        
 
 
 
 
 
 
 
 
 
r   r6	  c                  F     e Zd ZddZ	 	 	 	 	 	 dd fdZddZddZ xZS )r<  rk  rr   r   r   c                0    |                     |            d S r   )generate_extern_kernel_allocrr  s     r   rs  zExternKernelAlloc.codegen}      ,,T22222r   r   Nrc  r4  r{  r|  rC  rz  r   rR  rG  r0  rH  rJ  rL  rK  c	                   |                      |          }	t          d |	D                       sJ t                                          d |t	          t
          t                   |	          ||pi d ||||
  
         g | _        t          j	        
                    |           | _        t          j	                            |            d S )Nc              3  @   K   | ]}t          |t                    V  d S r   r  r  s     r   r   z-ExternKernelAlloc.__init__.<locals>.<genexpr>  s,      CCQ:a((CCCCCCr   )rs  r   r  rD  r   r   r   r  rm   r  rt  r   ru  )r  rc  r{  rC  r   rG  rH  rJ  rL  r3	  r  s             r   rD  zExternKernelAlloc.__init__  s      ..v66CC2BCCCCCCCC&!#344Lb)	
 	
 	
 ')G++D11		""4(((((r   r   c                    dS r  r   r  s    r   r  z!ExternKernelAlloc.should_allocate  r  r   c                    t           r   r  r  s    r   rj  z"ExternKernelAlloc.apply_constraint  r  r   r+	  )r   NNNr   N)rc  r4  r{  r|  rC  rz  r   rR  rG  r0  rH  r0  rJ  rz  rL  rK  r   r   rq  ri  )r   r   r   rs  rD  r  rj  r  r  s   @r   r<  r<  |  s        3 3 3 3 (*+/,0)-79.2) ) ) ) ) ) )@   " " " " " " " "r   r<  c                  D     e Zd ZdZd fd
ZddZddZddZddZ xZ	S )r  zP
    An output buffer that represents the mutation of a pre-existing buffer
    rc  r4  mutated_noder   mutating_noder  r   r   c                   t                                          d |           |                                }t          j                            |           |g| _        || _        t          j                            |           | _	        d S r  )
r  rD  r#  rm   r  r  mutation_namesrG	  rt  r   )r  rc  rF	  rG	  mutated_node_namer  s        r   rD  zMutationOutput.__init__  sy     	d6222(1133	##$566601(5G++D11			r   c                    | j         S r   )rG	  r  s    r   r  zMutationOutput.get_defining_op  s    !!r   r  c                    | j         S r   )rI	  r  s    r   rb  z!MutationOutput.get_mutation_names  rn  r   r   c                    dS r  r   r  s    r   r  zMutationOutput.should_allocate  r  r   r|  c                V    |                                  }d d |D             D             S )Nc                    g | ]}||S r   r   )r   r  s     r   r   z7MutationOutput.get_mutation_buffers.<locals>.<listcomp>  s%     
 
 
 r   c              3  T   K   | ]#}t           j                            |          V  $d S r   )rm   r  try_get_buffer)r   r   s     r   r   z6MutationOutput.get_mutation_buffers.<locals>.<genexpr>  s2      PP..t44PPPPPPr   )rb  )r  rI	  s     r   get_mutation_buffersz#MutationOutput.get_mutation_buffers  sB    0022
 
PPPPP
 
 
 	
r   )rc  r4  rF	  r   rG	  r  r   r   r  r  rq  r   r|  )
r   r   r   r  rD  r  rb  r  rR	  r  r  s   @r   r  r    s         2 2 2 2 2 2" " " "# # # #   
 
 
 
 
 
 
 
r   r  c                  t     e Zd ZU dZi Zded<   edd	            Zedd
            Zd fdZ	ddZ
ddZ xZS )TMADescriptorad  
    An IR node representing a generic host-side TMA descriptor in the Triton API
    Mostly useful for user-defined Triton kernels relying on host-side TMA;
    but can, in principle, be used for Inductor's Triton templates, too.

    See TMADescriptorExperimental and TMADescriptorStable for the two implementations
    (the old API and the new API)
    zdict[Any, TMADescriptor]_CACHErZ  r   tma_metatuple[str, tuple[Any, ...]]r   c                    t          |          dk    sJ |d         dk    rt          |g|d         R  S |d         dk    sJ t          |g|d         R  S )Nr   r   experimentalr@   r  )r   TMADescriptorExperimentalTMADescriptorStable)r  rZ  rW	  s      r   _create_implzTMADescriptor._create_impl  ss     8}}!!!!A;.((,VBhqkBBBBA;(****&v<<<<<r   c                    t          |          |f}|| j        vr|                     ||          | j        |<   | j        |         S r   )idrV	  r]	  )r  rZ  rW	  r  s       r   r  zTMADescriptor.create  sH     &zz8$cj  !..vx@@CJsOz#r   r{  rz  rC  r   c           
        t                                          d t          t          ||                                                    t          t          t                   |          t          |          d            || _	        t          j                            |           | _        t          j                            |            d S )Nra  )r  rD  r  rh  r  r   r   r  r   rZ  rm   r  rt  r   ru  )r  rZ  r{  rC  r  s       r   rD  zTMADescriptor.__init__  s     	 !,,..    &!6**-  	
 	
 	
  G++D11		""4(((((r   rk  rr   c                0    |                     |            d S r   )generate_tma_descriptorrr  s     r   rs  zTMADescriptor.codegen      ''-----r   c                    | j         S r   )rZ  r  s    r   
get_tensorzTMADescriptor.get_tensor  r  r   )rZ  r   rW	  rX	  r   rU	  )rZ  r   r{  rz  rC  rz  r   r   r+	  r~  )r   r   r   r  rV	  r   r  r]	  r  rD  rs  re	  r  r  s   @r   rU	  rU	    s           (*F))))= = = [=    [) ) ) ) ) ).. . . .       r   rU	  c                  (     e Zd ZdZ	 dd fdZ xZS )r[	  z
    the new host-side TMA Descriptor API:
    (the ones obtained via create_{1d,2d}_tma_descriptor calls).

    See also TMADescriptorStable for the new API.
    NrZ  r   r*  list[Union[int, torch.SymInt]]
block_dimselement_sizer*  r   r   c                   t          |          dv sJ t          |          t          |          k    sJ ||                                j        }|| _        || _        || _        t          | j                  | _        |g}g | j        | j        | j        }t                                          |||           d S )N)r@   r   rZ  r{  rC  )	r   r"  r  r*  rh	  ri	  r@  r  rD  )r  rZ  r*  rh	  ri	  r{  rC  r  s          r   rD  z"TMADescriptorExperimental.__init__  s     4yyF""""4yyC
OO++++!++--6L	$(	NN	
Y
_
 
 	' 	 	
 	
 	
 	
 	
r   r   )
rZ  r   r*  rg	  rh	  rg	  ri	  r*  r   r   r   r   r   r  rD  r  r  s   @r   r[	  r[	    sQ          '+
 
 
 
 
 
 
 
 
 
 
r   r[	  c                  $     e Zd ZdZd fdZ xZS )r\	  z
    the new host-side TMA descriptor API
    (the ones obtained via TensorDescriptor.from_tensor).

    See also TMADescriptorExperimental for the old API.
    rZ  r   block_shaperg	  c                `    || _         t                                          ||g|           d S )Nrk	  )rn	  r  rD  )r  rZ  rn	  r  s      r   rD  zTMADescriptorStable.__init__8  sA    &8% 	 	
 	
 	
 	
 	
r   )rZ  r   rn	  rg	  rl	  r  s   @r   r\	  r\	  0  sG         
 
 
 
 
 
 
 
 
 
r   r\	  c                  (     e Zd Zd fdZddZ xZS )SubgraphBufferrc  r  r   r  rn  ro  example_inputs	list[Any]subgraph_namer   c                4   t                                          d ||           || _        || _        t          j                            |           | _        t          j                            |            t          j        	                    | j        ||          | _
        t          | j                  sJ t          | j                  }|D ]:}|| j
        j        |j        <   | j
        j                            |j                   ;d |D             | _        dd lmc m} t	          j        | j
                  5  |                    ddd          5   | j
        j        | j          d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nc                    g | ]	}|j         
S r   r  )r   sym_vars     r   r   z+SubgraphBuffer.__init__.<locals>.<listcomp>Z  s    BBBG7<BBBr   r   FATEN)max_autotunemax_autotune_gemmmax_autotune_gemm_backends)r  rD  rn  rr	  rm   r  rt  r   ru  make_subgraphsubgraphr  r{  r  r'  graph_input_namesr  
sym_inputstorch._inductor.configr  rA   set_graph_handlerr    run)
r  rc  r   rn  rr	  rt	  r	  sym_inpinductor_configr  s
            r   rD  zSubgraphBuffer.__init__C  s    	v{333,G++D11		""4(((--dg~}UU,,,,,(55
! 	A 	AG7>DM&w|4M+227<@@@@BBzBBB888888888 // 	8 	8 &&""'+1 '   8 8
 "!4#6778 8 8 8 8 8 8 8 8 8 8 8 8 8 8	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8s6   ;FE5)F5E9	9F<E9	=FFFrk  rr   r   r   c                     G d d          }t          | j                  sJ d | j        D             }|                     || j                  g | j        || j        g           d S )Nc                      e Zd ZddZdS ),SubgraphBuffer.codegen.<locals>.CodegenGraphr  rs   c                ,    || _         |j        | _        d S r   )r  r   )r  r  s     r   rD  z5SubgraphBuffer.codegen.<locals>.CodegenGraph.__init__i  s    "
!J			r   N)r  rs   )r   r   r   rD  r   r   r   CodegenGraphr	  h  s(        ' ' ' ' ' 'r   r	  c                6    g | ]}|                                 S r   r  r   r(  s     r   r   z*SubgraphBuffer.codegen.<locals>.<listcomp>n  s$    CCC!++--CCCr   )r  r{  'codegen_subgraph_with_flattened_outputsr}	  r	  r   )r  rk  r	  outer_inputss       r   rs  zSubgraphBuffer.codegeng  s    	' 	' 	' 	' 	' 	' 	' 	'
  ,,,,,CCt{CCC77L''-do--YK	
 	
 	
 	
 	
r   )
rc  r  r   r  rn  ro  rr	  rs	  rt	  r   r+	  r   r   r   rD  rs  r  r  s   @r   rq	  rq	  B  sR        "8 "8 "8 "8 "8 "8H
 
 
 
 
 
 
 
r   rq	  c                       e Zd ZddZedd            Z ed           	 dd fd            ZddZd fdZ	d dZ
d!dZ xZS )"UserDefinedTritonKernelr   (tuple[Kernel, Any, list[str], list[str]]c                B   ddl m} ddlm} |                    | j                  g }g }g }t          |          rt          d          r'|                    fdj	        D                        n,t          d          sJ |                    j
                   t          d          r0j        D ]'}|                    j        j        |                    (n,t          d          sJ |                    j                   j        }j        |||fS )	Nr   )	Autotuner)kernel_side_tablerestore_idxc              3  <   K   | ]}j         j        |         V  d S r   )r   	arg_namesr   r   r5  s     r   r   zBUserDefinedTritonKernel.get_kernel_and_metadata.<locals>.<genexpr>  s>       * */0FI'** * * * * *r   restore_value	reset_idxreset_to_zero)triton.runtime.autotunerr	  *torch._higher_order_ops.triton_kernel_wrapr	  
get_kernel
kernel_idxr   r  rR  r	  r	  r	  r  r   r	  r	  configs)r  r	  r	  r	  restore_value_argsreset_to_zero_argsr   r5  s          @r   get_kernel_and_metadataz/UserDefinedTritonKernel.get_kernel_and_metadataw  s   666666PPPPPP"--do>>(*(*fi(( 	 v}-- @")) * * * *4:4F* * *     v77777"))&*>???v{++ @) F FA&--fi.A!.DEEEEF v77777"))&*>???nGYFw 24FFFr   rk  rr   r   c                8    ddl m}                                  \  }}}|                    | j        || j                  \  }}} fd j        D             }	t          d          rt          d          sJ t                                t          fdj
        D                       }
g }g }g }g }t          j        |	                                t          t          j        d          |                    D ]\  }}||
v r |            r|                    |           |                    |           t#          |t$                    rO|                    |                                           |                    |                                           t#          |t*          t,          t.          t0          j        f          r9|                    |           |                    t          |                     	||
v r1|                    d           |                    t*                     >|f	  |            r1|                    d           |                    t*                     ||                                 |                                 t7          d
t          |           d|                                |           |                    ||||||d                                  j        j         	  	         d	S )YOverrides the parent member.
        See https://github.com/pytorch/pytorch/issues/151692r   )triton_version_uses_attrs_dictc                <    i | ]}|                     |          S r   r  )r   r  r  s     r   r   z3UserDefinedTritonKernel.codegen.<locals>.<dictcomp>  s6     
 
 
,-At$$Q''
 
 
r   r	  
constexprsc              3  2   K   | ]}j         |         V  d S r   )r	  r	  s     r   r   z2UserDefinedTritonKernel.codegen.<locals>.<genexpr>  s+      $T$TQV%5a%8$T$T$T$T$T$Tr   r  rI  NzUnsupported arg type: r2  T)	arg_typesraw_argsraw_keystriton_metar;  r  original_fxnode_name)!torch._inductor.utilsr	  r	  !define_user_defined_triton_kernelr   gridrJ  r  r   r;   r	  r  chainr2  r   repeatr  r   r   r  r"  r   r  r   r   r!   r  r  rp  generate_kernel_callr#  rX  r   )r  rk  r	  r	  r	  r	  new_namer	  extra_launch_args
named_argsconstexpr_namesr   r	  raw_keys_filteredraw_args_filteredr   r)  r5  s   `                @r   rs  zUserDefinedTritonKernel.codegen  sx   
 	IHHHHH ((**	
 55KI
 
		

 
 
 
151S
 
 

 v{++ 	
0M0M 	
 	
tP
 P
 	
 	
M %$T$T$T$T&BS$T$T$TTT!	')')"I$4R$8$8:K L L
 
 $	W $	WID# &&+I+I+K+K&$$T***$$S)))#v&& WC1133444  1111C#udEJ!?@@ WC     c++++(( B  %%%% 2133 ,KKOOO$$S))))%))+++%))++++)*U499*U*UPS*U*UVVVW%%%$$&&#??$$!%!2 	% 
	
 
	
 
	
 
	
 
	
r   Fr  r   r   c                r    t                                          |          t          | j        |          z  S r   )r  rV  r)   r	  rk  s     r   rV  z,UserDefinedTritonKernel.get_free_symbol_uses  s8     ww++M::=MI}>
 >
 
 	
r   c                    t                      S r   r:   r  s    r   r  z0UserDefinedTritonKernel.get_unbacked_symbol_defs  r  r   r	  r   r	  r   tma_descriptor_metadatarD  kernel_argsc               r    g }i }g }                                 D ]\  }}	t          |	t                    rmt                                                   |	                    }
||v r!t                              |
||                   }
|                    |
           |
||<   |                    |	           |	||<   t          |          dk    sJ |d         
                                 _        t          |t                    sJ t          |                      t                                          d t!           j                  |t#          |          |           | _        | _                                         \  }}}}t+          |d          sJ fd|j        D              _        ddlm} t          |          dk    r|d         j        ni }fd ||i ||          D              _         fd j        D              _        t:          j                                        d S )Nr   r  r	  c                    g | ]}|v |	S r   r   )r   r)  r	  s     r   r   z4UserDefinedTritonKernel.__init__.<locals>.<listcomp>&  s*     .
 .
 .
sk/A/AC/A/A/Ar   )identify_mutated_tensorsc                     g | ]
}|         S r   r   )r   r  r	  s     r   r   z4UserDefinedTritonKernel.__init__.<locals>.<listcomp>-  s.     
 
 
 
 
 
r   c                X    g | ]&}t          t          j                   |          'S r  )r  r  r  )r   r  r  s     r   r   z4UserDefinedTritonKernel.__init__.<locals>.<listcomp>4  sB     !
 !
 !
 :T[9993EE!
 !
 !
r   ) r2  r   r   r  r  r  rU	  r  r  r   r#  r  r   r   r  rD  r  r   r	  r	  r	  r  r	  rJ  r	  r	  r   mutable_argsrQ  rm   r  ru  )r  r	  r	  r	  r	  r{  r   rC  r  r&  r(  r5  r	  r  r	  autotuned_kwargsr  s   `   `           r   rD  z UserDefinedTritonKernel.__init__  s     "$&&(%%'' 		 		DAq!Y''  99$:L:LQ:O:OPP///%,,Q0G0JKKAa   q		$$Q'''q		6{{aQi**,,&(++99T&\\99+dk***-  	
 	
 	
 %	 $ < < > >A v{+++++.
 .
 .
 .
!+.
 .
 .
* 	XWWWWW03Gq0@0@71:,,b
 
 
 
//;;;*:;=T 
 
 
!
 !
 !
 !
(!
 !
 !
 	
""4(((((r   r  c                *    t          | j                  S r   )r   rQ  r  s    r   r  z#UserDefinedTritonKernel.get_outputs:  s    D)***r   r  c                    | j         S r   r  r  s    r   r#  z"UserDefinedTritonKernel.get_device=  r  r   )r   r	  r+	  r  r  r  )
r	  r   r	  r   r	  rD  r	  rD  r   r   r  rv  )r   r   r   r	  r   rs  rX   rV  r  rD  r  r#  r  r  s   @r   r	  r	  v  s        G G G G> W
 W
 W
 XW
r 566$)
 
 
 
 
 
 76
   ;) ;) ;) ;) ;) ;)z+ + + +       r   r	  c                  D     e Zd ZdZddZddZdd
ZddZd fdZ xZ	S )InplaceBernoulliFallbackE
    This needs to be a custom class to handle mutation properly
    rk  rr   r   r   c                   t          d | j        D                       sJ d | j        D             \  }t          j        j        rd|                    |                                  d| dd                    t          t          | j
                             d|j                    d S |                    |                                  d| dd                    t          t          | j
                             d|j                    d S )Nc              3  @   K   | ]}t          |t                    V  d S r   r  r	  s     r   r   z3InplaceBernoulliFallback.codegen.<locals>.<genexpr>G  s,      >>Q:a((>>>>>>r   c              3  d   K   | ]+}t          t          |                                          V  ,d S r   )r   r   r  r	  s     r   r   z3InplaceBernoulliFallback.codegen.<locals>.<genexpr>H  s6      IIVQ1133IIIIIIr   r  rS  z, NULL)r3  )r   r{  rm   r  rw  r  r  r  r  reprrC  ending)r  rk  r   s      r   rs  z InplaceBernoulliFallback.codegenF  s'   >>$+>>>>>>>>IIT[III7 		 ''))rrArr3tTEW;X;X1Y1Yrrbibprr     ''))llAll3tTEW;X;X1Y1Yll\c\jll    r   r   c                    dS r  r   r  s    r   r  z(InplaceBernoulliFallback.should_allocateU  r  r   r  c                .    |                      d          gS r   r  r  s    r   rb  z+InplaceBernoulliFallback.get_mutation_namesX      ""##r   r   c                    t                      S r   r:   r  s    r   r  z1InplaceBernoulliFallback.get_unbacked_symbol_defs[  r  r   rL  r{   r   r   rC  r   c                   t                                          d t          |                                          |                     |g          ||           t
          j                            |                                           t
          j        	                    |           | _
        t
          j                            |            d S )Nr  rL  )r  rD  r  r#  rs  rm   r  r  r#  rt  r   ru  )r  rL  r   rC  r  s       r   rD  z!InplaceBernoulliFallback.__init__^  s     	allnn---$$# 	 	
 	
 	
 	
##AJJLL111G++D11		""4(((((r   r+	  rq  r  r  )rL  r{   r   r   rC  r   r   r   
r   r   r   r  rs  r  rb  r  rD  r  r  s   @r   r	  r	  A  s               $ $ $ $   ) ) ) ) ) ) ) ) ) )r   r	  c                  `     e Zd ZdZddZddZdd
ZddZd fdZe		 dd d            Z
 xZS )!InplaceCopyFallbackr	  rk  rr   r   r   c                d    |                                  \  }}}|                    |||           d S r   )r  codegen_device_copy)r  rk  r  r  non_blockings        r   rs  zInplaceCopyFallback.codegens  s8    #'#4#4#6#6 c<##Cl;;;;;r   r   c                    dS r  r   r  s    r   r  z#InplaceCopyFallback.should_allocatew  r  r   r  c                .    |                      d          gS r   r	  r  s    r   rb  z&InplaceCopyFallback.get_mutation_namesz  r	  r   r   c                    t                      S r   r:   r  s    r   r  z,InplaceCopyFallback.get_unbacked_symbol_defs}  r  r   rc  r4  r{  r|  rC  rz  c                J   t                                          d |||dd           t          j                            |d                                                    t          j                            |           | _        t          j                            |            d S )Nz
aten.copy_aoti_torch_copy_)rG  rH  r   )	r  rD  rm   r  r  r#  rt  r   ru  )r  rc  r{  rC  r  s       r   rD  zInplaceCopyFallback.__init__  s     	+. 	 	
 	
 	
 	
##F1I$6$6$8$8999G++D11		""4(((((r   Fr  r   r  r	  c                      fd||fD             }|f}t          t          |                                          ||          }|S )Nc                :    g | ]}                     |          S r   r  )r   r(  r  s     r   r   z.InplaceCopyFallback.create.<locals>.<listcomp>  s'    ;;;1###A&&;;;r   r  )r	  r  r#  )r  r  r  r	  r{  rC  r5  s   `      r   r  zInplaceCopyFallback.create  s]     <;;;c
;;;%$cnn..///
 

 r   r+	  rq  r  r  )rc  r4  r{  r|  rC  rz  r   r   r  )r  r   r  r   r	  r   r   r	  )r   r   r   r  rs  r  rb  r  rD  r  r  r  r  s   @r   r	  r	  n  s         < < < <   $ $ $ $   ) ) ) ) ) )$ <A
 
 
 
 [
 
 
 
 
r   r	  c                  :    e Zd ZdZddZddZdd
ZddZddZdS )MutatingFirstArgExternKernelr	  rk  rr   r   r   c                   t          | j                  sJ g d | j        D             t          t          | j                  }|                    |                                  dd                    |           d|j                    d S )Nc              3  >   K   | ]}|                                 V  d S r   r	  r	  s     r   r   z7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>  s.      99a!!##999999r   r  rS  r3  )	r  r{  r  r	  rC  r  r  r  r	  )r  rk  argrefss      r   rs  z$MutatingFirstArgExternKernel.codegen  s    ,,,,,
99T[999
t)**
 	##%%MM		'(:(:MMW^MM	
 	
 	
 	
 	
r   r   c                    dS r  r   r  s    r   r  z,MutatingFirstArgExternKernel.should_allocate  r  r   r  c                .    |                      d          gS r   r	  r  s    r   rb  z/MutatingFirstArgExternKernel.get_mutation_names  r	  r   r   c                    t                      S r   r:   r  s    r   r  z5MutatingFirstArgExternKernel.get_unbacked_symbol_defs  r  r   c                    dS r  r   r  s    r   has_side_effectsz-MutatingFirstArgExternKernel.has_side_effects  r  r   Nr+	  rq  r  r  )	r   r   r   r  rs  r  rb  r  r	  r   r   r   r	  r	    s         
 
 
 
   $ $ $ $        r   r	  c                        e Zd Zd fdZ xZS )	ResizeStorageBytesvariabler   r  r   r   r   c                   t          |t                    s
J d            t                                          d t	          |                                          |                     |g          |f           t          j        	                    |
                                           t          j                            |           | _        t          j                            |            d| _        d| _        t          |t           t"          t$          f          sJ t'          |                      t          j        j                            |j        
                                           d S )NzTODO: dynamic shapesr  )rC  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)r   r   r  rD  r  r#  rs  rm   r  r  r#  rt  r   ru  rG  rH  r  r  r   r   never_reuse_buffersr  rb  )r  r	  r  r  s      r   rD  zResizeStorageBytes.__init__  s1   (C((@@*@@@(h1133444
++#+	 	 	
 	
 	
 	
##H$5$5$7$7888G++D11		""4((("FG(Xz9$EFFVVXVVF	#''(>(>(@(@AAAAAr   )r	  r   r  r   r   r   r<	  r  s   @r   r	  r	    sG        B B B B B B B B B Br   r	  c                  (     e Zd Zd	 fdZd
dZ xZS )SetSourceTensorKernelself_tensorr   storage_tensorr   r   c                B   |                                  t                                          |                                ||gdt          j        j        j        j                   t          |t          t          t          f          sJ t          |                      t          j        j                            |j                                                   t          j        j                            |                                           t          j        j                            |                                            |                                }t+          t-          |          ||           t+          t-          |          ||           g| _        d S )Nz!torch.ops.aten.set_.source_Tensor)rG  rL  r  )r8  r  rD  r  r%  rk   ru  set_source_Tensorr   r  r  r   r   rm   r  r	  r  rb  r#  r#  r  r  rQ  )r  r	  r	  r  r  s       r   rD  zSetSourceTensorKernel.__init__  s`   $$&&&%%''.)B	+9	 	 	
 	
 	
 +*i'HII 	
 	
4L
 L
 	
 	
I 	
#''(8(A(A(C(CDDD	#''(?(?(A(ABBB	#''888**,,:V444k4HH:V444ndKK!
r   r  c                V    |                      d          |                      d          gS r+  r	  r  s    r   rf  z2SetSourceTensorKernel.get_inputs_that_alias_output  s%    ""DOOA$6$677r   )r	  r   r	  r   r   r   r  )r   r   r   rD  rf  r  r  s   @r   r	  r	    sQ        
 
 
 
 
 
(8 8 8 8 8 8 8 8r   r	  c                  L     e Zd ZdZddZddZdd
ZddZdddd  fdZ xZ	S )!ScatterFallbackz
    This needs to be a custom class to handle mutation properly.
    This class handles both aten.scatter_ and aten.scatter_reduce_.
    It also handle the case `src` being a scalar properly.
    rk  rr   r   r   c           
        | j         d         }t          j        j        rddd}||v r||         }t	          | j                  sJ | j        rd | j        D             \  }}}n!d | j        D             \  }}| j        d         }|                    ||| j        d         ||g| j	        | j
        | j        ||                                            d S )	Nr  r6  r5  )r  multiplyc              3  >   K   | ]}|                                 V  d S r   r	  r	  s     r   r   z*ScatterFallback.codegen.<locals>.<genexpr>  s.      JJq2244JJJJJJr   c              3  >   K   | ]}|                                 V  d S r   r	  r	  s     r   r   z*ScatterFallback.codegen.<locals>.<genexpr>  s.      EEA!--//EEEEEEr   r@   r   )r   rm   r  rw  r  r{  src_is_tensorrC  generate_scatter_fallbackrH  rG  r  )r  rk  r  get_operator_enumr   r   r  s          r   rs  zScatterFallback.codegen  s    X&7 	3(-6 B B****62,,,,, 	(JJdkJJJOQssEEEEEJQ$Q'C))"1%uc2 #!!	
 	
 	
 	
 	
r   r   c                    dS r  r   r  s    r   r  zScatterFallback.should_allocate  r  r   r   c                t    | j         d         }t          |t                    sJ |                                gS r   r	  )r  r  s     r   rb  z"ScatterFallback.get_mutation_names	  s4    k!n#v&&&&&r   r   c                    t                      S r   r:   r  s    r   r  z(ScatterFallback.get_unbacked_symbol_defs  r  r   NTr  include_selfrL  r{   r   r   rV  r   r   r  r  r0  r

  c          
     b    t          |t                     _         j        r fd|||fD             }|f}	n fd||fD             }||f}	t                                          d t          |                                                               |          |	||dt          |          ddg|           t          j
                            |                                           t          j
                                        _        t          j
                                        d S )Nc                :    g | ]}                     |          S r   r  r   r(  r  s     r   r   z,ScatterFallback.__init__.<locals>.<listcomp>   s'    FFFt))!,,FFFr   c                :    g | ]}                     |          S r   r  r
  s     r   r   z,ScatterFallback.__init__.<locals>.<listcomp>#  s'    AAAt))!,,AAAr   r  r	
  r  r

  )rG  rJ  rL  )r   r   r
  r  rD  r  r#  rs  r   rm   r  r  r#  rt  r   ru  )r  rL  r   rV  r   r  r  r

  tensorsrC  r  s   `         r   rD  zScatterFallback.__init__  s5    (Y77  	'FFFFq%oFFFG FMMAAAAq%jAAAG #JMallnn---((|<<";//+3^*D# 	 		
 		
 		
 	
##AJJLL111G++D11		""4(((((r   r+	  rq  r-	  r  )rL  r{   r   r   rV  r   r   r   r  r   r  r0  r

  r   r   r   r	  r  s   @r   r	  r	    s         
 
 
 
0          
    !%!!) !) !) !) !) !) !) !) !) !) !) !)r   r	  c                  D     e Zd ZdZddZddZdd
ZddZd fdZ xZ	S )IndexPutFallbackzQ
    This needs to be a custom class to handle mutation and indices properly
    rk  rr   r   r   c                   t          | j                  sJ d | j        D             ^}}}g }t          |          }t          | j                  D ]^\  }}| j        |         #|                    t          |                     5|                    t          j        j	        j
                   _ |j        |                                 |||g|                                 R   d S )Nc              3  >   K   | ]}|                                 V  d S r   r	  r	  s     r   r   z+IndexPutFallback.codegen.<locals>.<genexpr><  s.      &R&Rq':':'<'<&R&R&R&R&R&Rr   )r  r{  r  r   r  r  r  rm   r  r  r  generate_index_put_fallbackr  r  )	r  rk  r   r   valid_indicesr  iter_valid_indicesr   r  s	            r   rs  zIndexPutFallback.codegen:  s    ,,,,,&R&Rdk&R&R&R#F]!-00dl++ 	> 	>DAq|A*t$6778888qw3<====++  ""Aw	
9=9P9P9R9R	
 	
 	
 	
 	
 	
r   r   c                    dS r  r   r  s    r   r  z IndexPutFallback.should_allocateI  r  r   r  c                .    |                      d          gS r   r	  r  s    r   rb  z#IndexPutFallback.get_mutation_namesL  r	  r   r   c                    t                      S r   r:   r  s    r   r  z)IndexPutFallback.get_unbacked_symbol_defsO  r  r   rL  torch._ops.OpOverloadr   r   r  rs	  r   rz  
accumulater   c           	         | _         d |D             } fd||g|D             }d}t                                          d t          |                                                               |          |fd||           t          j                             	                    d                     t          j        
                                _        t          j                                        d S )Nc                    g | ]}||S r   r   r  s     r   r   z-IndexPutFallback.__init__.<locals>.<listcomp>[  s    ===qq}}}}r   c                :    g | ]}                     |          S r   r  )r   r   r  s     r   r   z-IndexPutFallback.__init__.<locals>.<listcomp>\  s'    NNNQ4%%a((NNNr   aoti_torch_index_put_outr  zaten.index_put_)rG  rH  rL  r   )r  r  rD  r  r#  rs  rm   r  r  r  rt  r   ru  )
r  rL  r   r  r   r
  r
  r
  rH  r  s
   `        r   rD  zIndexPutFallback.__init__R  s     ==G===NNNN1f2M}2MNNN4allnn---((M0+# 	 	
 	
 	
 	
##DOOA$6$6777G++D11		""4(((((r   r+	  rq  r  r  )rL  r
  r   r   r  rs	  r   rz  r
  r   r   r   r	  r  s   @r   r
  r
  5  s         
 
 
 
   $ $ $ $   ) ) ) ) ) ) ) ) ) )r   r
  c                  .    e Zd Zedd            ZddZdS )
DeviceCopyr   r   r  r  r	  r   r   c           	        |                                 sQt          d |                                D                       r&t          j        j        s|                    |          S t          j        	                    |           |
                                }|J t          j        	                    |           t          d           |f}t                              |          }d }|                                r|                                }t!          |j                  o|j        dk    o|}|j        dk    ot!          |j                  o|}|r(t%          |          rd|                                _        t+          t-          ||                                |                                ||          |                     |          g|          S )Nc              3  <   K   | ]}|t           j        j        v V  d S r   )rm   r  r  r  s     r   r   z$DeviceCopy.create.<locals>.<genexpr>q  s,      GGqA**GGGGGGr   zDeviceCopy in input programr8  Tr  )r\  r   r  rA   aot_inductoruse_runtime_constant_foldingr`  rm   r  add_device_infor#  r\   r  rg  r  rH  rd   r   r  r  rg  r!
  re  r"  r  )	r  r   r  r	  x_devicerC  r  is_destination_pinnedis_source_pinneds	            r   r  zDeviceCopy.createm  s    	0GGA4D4D4F4FGGGGG	0 'D	0
 ''///	'''<<>>###	)))7888%++A..::<< 	$\\^^F8=!!KfkU&:K| 	 MU"Kvfk':':K| 	  	, 5a 8 8 	,'+ALLNN$

/   q!!"

 

 
	
r   rk  rr   r   c                H   |                                  }t          |          dk    sJ | j        r<|                    |d         | j                                        |d                    d S |                    |d         |                                 |d                    d S )Nr   r   r@   )r  r   rF  r	  r  )r  rk  r   s      r   rs  zDeviceCopy.codegen  s      ""4yyA~~~~ 	T''Q);;==tAw     ''Q1G1G1I1I4PQ7SSSSSr   N)r   r   r  r  r	  r   r   r   r+	  )r   r   r   r  r  rs  r   r   r   r!
  r!
  l  sM        '
 '
 '
 ['
RT T T T T Tr   r!
  c                  r     e Zd ZdZddZddZd fdZddZ ed           	 ddd            Z	ddZ
 xZS )DynamicSelectStorageOffseta  
    The result of computing a dynamic selection index is determined as follows: when the index in the
    select operation is unbacked, the actual index calculation is ambiguous for negative indices
    (index + size) versus non-negative indices (just index). To resolve this, we allocate an unbacked
    SymInt to represent the storage offset and decompose the select operation into a call to as_strided,
    computing the storage offset at runtime with this node.
    r   rK  c                    t                      S r   r:   r  s    r   r  z$DynamicSelectStorageOffset.get_reads  r  r   r   c                    dS r  r   r  s    r   r  z*DynamicSelectStorageOffset.should_allocate  r  r   unbacked_offset_symbolsympy.Symbolr   base_offsetUnion[sympy.Symbol, int]base_dim_strider  r   c                    t                                          d t          t          j        d                    g            || _        || _        || _        || _        || _	        d S Nr8  r  )
r  rD  r  r%  r  r/
  r   r1
  r3
  r  )r  r/
  r   r1
  r3
  r  r  s         r   rD  z#DynamicSelectStorageOffset.__init__  sb     	ze1D1DEEErJJJ '=#
&.			r   r   c                ,    t          | j        g          S r   )r;   r/
  r  s    r   r  z3DynamicSelectStorageOffset.get_unbacked_symbol_defs  s    467888r   Fr  c                ,    t          | j        |          S r   )r)   r   rU  s     r   rV  z/DynamicSelectStorageOffset.get_free_symbol_uses  s      
M:::r   rk  rr   c                0    |                     |            d S r   )codegen_dynamic_select_indexrr  s     r   rs  z"DynamicSelectStorageOffset.codegen  r@	  r   r  rq  )r/
  r0
  r   r0
  r1
  r2
  r3
  r2
  r  r2
  r   r   r  r  r  r+	  )r   r   r   r  r  r  rD  r  rX   rV  rs  r  r  s   @r   r,
  r,
    s                    "9 9 9 9 899$); ; ; ; :9;
3 3 3 3 3 3 3 3r   r,
  c                  D     e Zd ZdZddZddZd fdZddZddZ xZ	S )r   z;
    The result of a call to aten._local_scalar_dense.
    r   rK  c                    t                      S r   r:   r  s    r   r  zDynamicScalar.get_reads  r  r   r   c                    dS r  r   r  s    r   r  zDynamicScalar.should_allocate  r  r   symr0
  keypathpytree.KeyPathrb  r   r   c                    |                                  t                                          d t          t	          j        d                    |                     |g                     || _        || _        d S r5
  )	r  r  rD  r  r%  r  rs  r=
  r>
  )r  r=
  r>
  rb  r  s       r   rD  zDynamicScalar.__init__  sn     	*EL$7$7888$:M:Mtf:U:U	
 	
 	
 r   r   c                ,    t          | j        g          S r   )r;   r=
  r  s    r   r  z&DynamicScalar.get_unbacked_symbol_defs  s    48*%%%r   rk  rr   c                0    |                     |            d S r   )codegen_dynamic_scalarrr  s     r   rs  zDynamicScalar.codegen  s    &&t,,,,,r   r  rq  )r=
  r0
  r>
  r?
  rb  r   r   r   r  r+	  )
r   r   r   r  r  r  rD  r  rs  r  r  s   @r   r   r     s                    & & & &- - - - - - - -r   r   c                  r     e Zd ZdZddZddZd fdZddZ ed           	 ddd            Z	ddZ
 xZS )r   z5
    The result of a call to aten._assert_scalar
    r   rK  c                    t                      S r   r:   r  s    r   r  zAssertScalar.get_reads  r  r   r   c                    dS r  r   r  s    r   r  zAssertScalar.should_allocate  r  r   scalarro   r  r   r   c                    t                                          d t          t          j        d                    g            || _        || _        d S r5
  )r  rD  r  r%  r  rG
  r  )r  rG
  r  r  s      r   rD  zAssertScalar.__init__  sQ    el511222	
 	
 	
 r   c                    dS r  r   r  s    r   r	  zAssertScalar.has_side_effects  r  r   Fr  r   c                ,    t          | j        |          S r   )r)   rG
  rU  s     r   rV  z!AssertScalar.get_free_symbol_uses  s      ];;;r   rk  rr   c           	        t           j        sd S t          t          |                     d                              }t
          j        j        rd S t
          j        j        rWd| d}t
          j        j	        
                    | j        d          }|                    d| d| j         d| d	           d S t
          j        j	                            | j        d          }|                    d
| d           |                    dt          | j                   d           |                    |                                  d           d S )NFr  zstd::to_string(r3  )r  zif (!(z()) { throw std::runtime_error("Expected z but received " + z); }zif not (z):z    raise RuntimeError(z = None)rA   scalar_assertsr  r  rV  rm   r  r  rw  r  codegen_cpp_sizevarrG
  r  r  codegen_python_sizevarr	  r#  )r  rk  symbol
symbol_strsizevars        r   rs  zAssertScalar.codegen
  st   $ 	F d44454IIJJKK7 	;DW  	;46444Jg*>>e ?  G xxx48xxgqxxx     g*AAe B  G 4444555ITXIIIJJJ 999:::::r   r  rq  )rG
  ro   r  r   r   r   r  r  r+	  )r   r   r   r  r  r  rD  r	  rX   rV  rs  r  r  s   @r   r   r     s               	 	 	 	 	 	    N++$)< < < < ,+<
; ; ; ; ; ; ; ;r   r   c                  $    e Zd ZU ded<   ded<   dS )ExternKernelNoder   r   zexport_schema.Noder   Nr   r   r   r   rS
  rS
  *  s'         IIIr   rS
  c                       e Zd ZdZ	 d0ddd1 fdZd2 fdZd3dZd4dZd5dZe	d6d!            Z
d7d#Zd8d%Zd8d&Zd' Zed3d(            Ze	d9d,            Zed:d.            Zd; fd/Z xZS )<FallbackKernelz
    A class that represents a fallback kernel for handling operators that are not
    directly support by inductor. It currently supports functional ops, view ops,
    inplace aten ops, and mutating ops that are auto-functionalizable.
    NrP  rc  r4  r5  r{   r  r|  nontensor_argsrz  r  r  r   rR  rP  ,Optional[dict[sympy.Symbol, pytree.KeyPath]]r   r   c               V    t                                          |t          |          t          |          |           d _        |pi  _        t          |t          j        j        t          j        j	        f          sJ d| dt          |           d            | _        | _        |i n| _         j        J t          j                             j                   g  _        g  _        t           j        t          j        j	                  rd S d j                                        v rd S  j        j        }t          j        j                             j                  r4 j                            |d                                                    d S |j        r!t9          |          st;          d|                                 j         j                  \  }	}d fd}
t          j        j                             ||	|          D ]\  }} |
||           d S )Nr	  F#Fails to create FallbackKernel for r2   not supported_c10d_functionalr   z'NYI: Can't generate FallbackKernel for infotorch._C.Argumentr)  r   r   r   c                    t           j        t          j                  r3t          |t          t
          f          sJ t          |                      t          j         j                  rt          |t
          t          f          rJ |d S  j        d S d fd}t          j	         j                  r||D ]} ||           d S d S t          j         j                  sJ  ||           d S )Nr(  r   r   r   c                    j                             |                                            j        J j        j        rKj                            t          t          |                                           |                      d S d S r  )	alias_namesr  r#  
alias_infois_writerQ  r  r  r#  )r(  r]
  r  s    r   	add_aliaszPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias  s     ''

555222?+ )00&z'H'H'H!TRR     r   )r(  r   r   r   )
r   r   r%  ListTyper   r   library_utilsis_tensor_like_typerb
  is_tensorlist_like_type)r]
  r)  rd
  optional_tensor_argr  s   `   r   handle_aliasing_and_mutationz=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutation  s+   $)U^44 A!#e}55@@tCyy@@50;; : &cE4=99999{&       4TY?? ?/2 7 7+!	"56666 #?7 7 %8CCCCC	#r   )r]
  r^
  r)  r   r   r   )!r  rD  r   use_runtime_dispatchrP  r   r%  rd  re  r  r   rL  r  r   rG  rm   r  warn_fallbackra
  rI	  r   rf  _libraryr  mutates_and_returns_first_argr  r#  
is_mutabler&   r  r{  rC  
zip_schema)r  rc  r5  r  rW
  r  r   rP  schemar   rj
  r]
  r)  r  s   `            r   rD  zFallbackKernel.__init__7  sj    	+.!!	 	 	
 	
 	
 %*!!2!8bUZ*EJ,JK
 
 	X 	XWWW4<<WWW	X 	X 
 ","Nbb&222	d5666 '))+d&
(FGG 	 F!1!6!6!8!888
 F!) >==d>NOO 	&&{1~'>'>'@'@AAAF 	%;F%C%C 	%B&BB   **4;8JKKf	 	 	 	 	 	< -88vNN 	4 	4ID#((s3333	4 	4r   rH  c                2   t                                                      }| j        t          j        j        j        u rX| j        D ]P}t          |t                    r9|
                    t          j        |                                                    }Q|S r   )r  rJ  rL  r%  _prims	rng_primsgraphsafe_run_with_rng_staterC  r   r  	with_readrB   r"  r#  )r  r  r)  r  s      r   rJ  zFallbackKernel.get_read_writes  s    gg--//u|5RRR)  c>22 "-"7"7$,S\\^^<<# #K r   rk  rr   c           	     |    |                     |                                 | j        t          | dd                     S NrP  )(codegen_unbacked_symbol_defs_for_outputsr#  r  r   rr  s     r   codegen_unbacked_symbol_defsz+FallbackKernel.codegen_unbacked_symbol_defs  s8    ??MMOOT\749Ld+S+S
 
 	
r   Container[sympy.Symbol]c                    t          | dd           x}r<t          t          j        j        j        |          }|J |                                S t                      S rx
  )r   r6   rm   r  r  r   r  r;   r  rP  resolveds      r   r  z'FallbackKernel.get_unbacked_symbol_defs  s`     '.A4 H HH 	 0 *,= H '''==??"<<r   r   c                
   t           j         G d d                      t          | j                  sJ fd| j        D             }|                     || j                  \  }}t          j        j        rdt          | j
        t          j        j                  r@|                     ||          }d t          | j
        j        j        |          D             }nd |D             }| j                            |           |S )Nc                  "    e Zd ZU ded<   ddZdS ))FallbackKernel.codegen_args.<locals>.Shimr   refr   r   c                    | j         S r   )r
  r  s    r   r  z2FallbackKernel.codegen_args.<locals>.Shim.__repr__  s	    xr   Nr{  )r   r   r   r   r  r   r   r   Shimr
    s3         HHH           r   r
  c                J    g | ]} |                                            S r   r	  )r   r   r
  s     r   r   z/FallbackKernel.codegen_args.<locals>.<listcomp>  s/    HHHqttA//1122HHHr   c                h    g | ]/\  }}t           j        j                            ||j                  0S r   )rm   r  r  r  r^  )r   paramr   s      r   r   z/FallbackKernel.codegen_args.<locals>.<listcomp>  sC       E1 $33AuGG  r   c                V    g | ]&}t           j        j                            |          'S r   r  r  s     r   r   z/FallbackKernel.codegen_args.<locals>.<listcomp>  s+    IIIqAG(77::IIIr   )r  	dataclassr  r{  r  rC  rm   r  rw  r   rL  r%  rd  re  r  r   rf  rg  r   r  )r  r  r   r   r
  s       @r   r  zFallbackKernel.codegen_args  s+   			  	  	  	  	  	  	  
		   ,,,,,HHHHDKHHH**;8JKKf7 	J:d.>
@U#V#V 	J..tV<<D  #D$4$<$F M M  DD
 JIDIIID 	6"""r    Optional[Sequence[torch.Tensor]]r  r   c                   | rd | D             nd }|r| sJ d | D             }|d         S t          |t          j                  r|j        S t          |t          t
          f          rt          d |D                       }d |D             }t          |          dk    r|d         S |D ]6}t          |t          j                  sJ t          |j	                  r|c S 7|d         S d S )Nc                <    g | ]}t          |t                    |S r   )r   r  r	  s     r   r   z.FallbackKernel.find_device.<locals>.<listcomp>  s'    JJJ1:a+I+IJQJJJr   c                ^    g | ]*}|                                 |                                 +S r   r  r(  s     r   r   z.FallbackKernel.find_device.<locals>.<listcomp>  s1    SSSC#..BRBRSs~~''SSSr   r   c              3  L   K   | ]}t                               d |          V   d S r   )rU
  find_devicer  s     r   r   z-FallbackKernel.find_device.<locals>.<genexpr>  sC       $ $89**433$ $ $ $ $ $r   c                    g | ]}||S r   r   )r   r  s     r   r   z.FallbackKernel.find_device.<locals>.<listcomp>  s    AAA&&AvAAAr   r@   )
r   r%  r  r  r   r   r;   r   rd   r   )r  r  non_torch_bind_tensor_argsdevices
device_setr  s         r   r
  zFallbackKernel.find_device  s?    JJJJJJ 	#
 & 	;SS;SSSG1:nel33 	)!((ntUm44 	# $ $=K$ $ $  J BAJAAAG7||q  qz!! " "!&%,77777&+&& "!MMM"1:tr   r   c                    t          | j        t          j        j                  rdS t          | j                                                  S r  )r   rL  r%  rd  r  r/   ro
  r  s    r   r	  zFallbackKernel.has_side_effects  s>    d&
(FGG 	5t/00;;===r   r  c                   t          | j        t          j        j        t          j        j        f          s(J d| j         dt          | j                   d            t          | j        t          j        j                  sBd| j                                        vr'| j        j        j	        rt          | j                  rg S | j        S )NrZ
  r2  r[
  r\
  )r   rL  r%  rd  re  r  r   r   rf  ro
  r&   ra
  r  s    r   rf  z+FallbackKernel.get_inputs_that_alias_output  s    uz4ej6TU
 
 	
 	
6$2B 6 6D$%%6 6 6	
 	
 
 4+UZ-KLL	$"$*:*?*?*A*AAA (3 B&t'788 B I##r   c                D    t          | j                  dk    sJ | j        S r  )r   rI	  r  s    r   rb  z!FallbackKernel.get_mutation_names  s'    4&''1,,,,""r   c           	     f    t                               d                                  j                   t	           t
                    sJ t                                                       j         j	                  \  } 
                    |          } fd j        D             } j        }t          j        j        sg ||S t          dg           }|                    ||          }dd
t	          |t"          j        j        j                  r(|                    |d         |d                   j        }n|j        j        }t1          |          dk    r0 j        r j        n j        }|d         j        } ||          g}	n!fdt9          | j                  D             }	 j        J t;                                           t=          j         j                                         ||	i                     }
t          j!        "                    |
           g ||S )a  
        ProxyExecutor Design Note
        We export the ExternFallbackNodes (for custom ops) into a serialized file
        and run it with a host side proxy executor to address the ABI problem
        This is currently only implemented for fbcode. Eventually, we will also make this work for OSS.
        Detailed design doc can be found at
        https://docs.google.com/document/d/1wC4DOZFaYym2t1Esz0X5yxlLI3RDnSiyRbUus3bkJ64/edit?usp=sharing
        z4Extern kernel node added for node %s with target %s.c                ,    g | ]} j         |fi S r   r	  )r   r  r   r  s     r   r   z<FallbackKernel.export_extern_kernel_node.<locals>.<listcomp>"  s>     
 
 
 "D!#0000
 
 
r   Nreturn_type6Union[torch.TensorType, torch.ListType, torch.JitType]rq  Union[IRNode, Sequence[IRNode]]r   export_schema.Argumentc                   t          | t          j        t          j        f          r|}t          |t          t
          f          rt          |          dk    sJ |d         }t          | t          j                  r\t          |t                    sJ t          j	        
                    t          j        |                                                    S |J t          j	        
                    d          S t          | t          j                  rt          |                                 t          j                  rVt          |t                    sJ t!          |                      t          j	        
                    d |D                       S t          | t          j                  rt          |                                 t          j                  r|>t          j	        
                    t          j        
                    d          	          S t          |t                    sJ t          j	        
                    t          j        
                    t          j        |                                                    	          S t          | t          j                  r t          j	        
                    |
          S t)          dt!          |                      )Nr@   r   r  )	as_tensorT)as_nonec                \    g | ])}t          j        |                                           *S )r  )export_schemaTensorArgumentr#  )r   r  s     r   r   zZFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output.<locals>.<listcomp>H  s>           &4#,,..III     r   )
as_tensors)as_optional_tensor)as_intzUnsupported return type )r   r%  
TensorTypeNoneTyper   r   r   r   r
  rp   r  r
  r#  re
  getElementTyper   r   OptionalTypeOptionalTensorArgumentIntTypeRuntimeError)r
  rq  r  s      r   handle_single_outputzFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output0  s    +(8%.'IJJ /SftUm44 $v;;!++++ )Ck5+;<< G%c622222(188"/">CLLNN"S"S"S 9    ;;;(1888FFFK88 !SZ**,,e.>> > !S "&(33AAT&\\AA3$-44   #)      5    K);<< S**,,e.>B B S >(188+8+O+V+V$( ,W , , 9    &ff55555(188+8+O+V+V&3&B%+__%6%6' ' ' ,W , , 9    K77 S$-44F4CCC"#Qd;>O>O#Q#QRRRr   r   r@   c                8    g | ]\  }} |j         |          S r   )r^  )r   return_schemarq  r
  s      r   r   z<FallbackKernel.export_extern_kernel_node.<locals>.<listcomp>r  sE          
 *M6	 %$!+      r   )r'  r{  r  metadata)r   r   )r
  r
  rq  r
  r   r
  )#r  r  r#  rL  r   rU
  r   r  r{  rC  r  rJ  rm   r  aot_moder%   serialize_inputsr%  r  	torchbindCallTorchBindrq
  returnsrf  r   r  rQ  r^  r   rS
  r
  r9   r   extern_kernel_nodesr  )r  r   ordered_kwargsr'  
serializernamed_argumentsr
  r  r
  output_argumentsr   r
  r   s   `          @@r   export_extern_kernel_nodez(FallbackKernel.export_extern_kernel_node  st    			BMMOO	
 	
 	
 $//;;d;;/**4;8JKKf**488
 
 
 
 
9
 
 
 !w 	,+T+N++*444
$55fdFKK3	S 3	S 3	S 3	Sj fe5?MNN 	-mmDGT!W55=GGn,Gw<<1 '+lMdll8MG!!*.K 4 4[' J JK       
 .1$,-G-G      +++#',,..&(	  
 
 
 	
$$T***''''r   c                     j         }|J |j        dk    rt          |t          j        j                  sJ t          |                      t          j        j	        r9ddl
m} t          |          |vr"t                              d|           d _        nk|j        dk    r7t          |t          j        j                  sJ t          |                      n)t          j        j	        r|t           j        j        v _        t          j        j	        rt          |t          j        j                  r j        sdfd                      j         j                  \  }t-          j        | fd j        D                       }t3          fdt5          ||j        j                  D                        _                             |            j        rs                                 } j        J  j         J |                      !                                 j         fd j         | j"        r j"        n j#                   nn|$                                t           j%        tL                    r? '                    |            (                    |            )                    |            *                    |           dS )r	  Nru  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantizedr(  torch.JitTyper   r   c                    t          | t          j                  r |                                           S t          | t          j                  S r   )r   r%  r
  r
  
NumberType)r(  	is_numbers    r   r
  z)FallbackKernel.codegen.<locals>.is_number  sG    a!344 9$9Q%5%5%7%7888!!U%5666r   c              3  4   K   | ]} j         |fi V  d S r   r	  )r   r  r   r  s     r   r   z)FallbackKernel.codegen.<locals>.<genexpr>  sL         *D)!66v66     r   c              3  h   K   | ],\  }}t          |t                    o |j                  V  -d S r   )r   complexr^  )r   r&  r@  r
  s      r   r   z)FallbackKernel.codegen.<locals>.<genexpr>  sV       , ,Aq 1g&&A99Q[+A+A, , , , , ,r   c                 X    g                                                                    S r   )r  r  r  s   r   r  z(FallbackKernel.codegen.<locals>.<lambda>  s*    F$++--F0C0C0E0EF r   )r(  r
  r   r   )+rL  rx  r   r%  rd  re  r   rm   r  rw  torchgen.aoti.fallback_opsr
  r   r  re  rk
  rA   r$
  custom_ops_to_c_shimsr  r{  rC  r  r	  rJ  r3  r   rf  rg  rp  r
  rG  ,generate_fallback_kernel_with_runtime_lookupr#  r  rQ  generate_fallback_kernelrc  r  r	  r	  r	  rz
  )	r  rk  r5  r
  r   	args_iterexported_argsr
  r   s	   `      @@r   rs  zFallbackKernel.codegen  s/    !!!!v%%fej&;<<JJd6llJJ<w" 
5LLLLLLv;;&;;; KKa   15D---fej&;<<JJd6llJJ<JW  	 f1GG % G	65:#899	 -	7 7 7 7 7 7  ..t{D<NOOLD& "    !?   I ), , , , ,	6>+CDD, , , ) )D%
 	W%%%$ 	6 ::<<M*666#///@@'FFFF  $G$2G    ,,T222$+v.. 6))'222..w777,,W555))'22222r   rq  r  re  c           	     
   d}	 |                                  }n# t          $ r Y nw xY wt          | j        | j        t          |                                           t          |                                           |          S )NFr  )rg  r
  re  r  r  rZ   r  r  )rq  rg  s     r   tensor_to_layoutzFallbackKernel.tensor_to_layout  s    		((**II 	 	 	D	 ML%fkkmm44%fmmoo66
 
 
 	
s    
&&r   c                F    t           j        f}||vr+t          t          d         t          j        j                  }nt                      }|5    j        |g|R i |\  }}}}	}
ddd           n# 1 swxY w Y   t          d |D                        
                    ||          }|s8t          |t          j        j        j                  rt          j        d          }|   t#          |          ||||	|
          n+|s
J d              t%          |          ||||	|
          d fd |g           }t          |t&          t(          f          r|_        n2t          |t,                    rt)          |          _        n|g_        |S )z9Create an instance of FallbackKernel from an _OpOverloadsNc              3  4   K   | ]}t          |          V  d S r   )r  r(  s     r   r   z(FallbackKernel.create.<locals>.<genexpr>  s*      !K!K,s"3"3!K!K!K!K!K!Kr   r8  r  rV
  z"Not sure where to find device inforq  r   r  list[tuple[Any, int]]r   c                    t           t          t          f          r@ t                      fdt	          t                               D                       S t           t                    r" fd                                 D             S t           t          j	                  rlt                                                   }t          j        sst                     s)t          j        j                            |j                   |S t           t(                    r S t           t          j                  r j        j        S  J dt                      d            d S )Nc              3  b   K   | ])} |         t                    |fgz             V  *d S r   r   )r   r   generate_outputr  rq  s     r   r   zAFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>$  sZ       $ $ $OF1Iw4<<:K9L/LMM$ $ $ $ $ $r   c           
     V    i | ]%\  }}| |t                    |fgz             &S r   r
  )r   r  r  r
  r  rq  s      r   r   zBFallbackKernel.create.<locals>.generate_output.<locals>.<dictcomp>)  sN        S g$v,,9L8M.MNN  r   zFallbackKernel output type z is not supported)r   r   r   r   r   r   r   r2  r%  r  MultiOutputr
  rA    assume_unaligned_fallback_outputrj   rm   r  r  r  r   r   SymIntr   r)  )rq  r  r  r  r
  has_unaligned_inputpackeds   `` r   r
  z.FallbackKernel.create.<locals>.generate_output"  s   &4-00 #tF|| $ $ $ $ $ $"3v;;//$ $ $    FD))      $*LLNN    FEL11 !((00  ;<*< -V44<
 G-11#(;;;
FC(( FEL11 {''~~Q$v,,QQQ &~~ tr   )rq  r   r  r
  r   r   )ru  *_fused_moving_avg_obs_fq_helper_functionalr   r	   rm   r  r  r
   r  r3  r
  r   r%  r  r
  r
  r  r  r  r   r   r  r   )r  r5  r   r   fake_incorrect_kernelscontextr  r  r  r  rP  r  r  r
  r
  r
  s   `            @@@r   r  zFallbackKernel.create  sS    #'"Q!S///1$79JKKGG!mmG 	< 	< #"6;D;;;F;;!	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< "!K!K{!K!K!KKKn== 	)*E+5C
 
 	) \%((F!S&)))"3  FF ?????6S!000"3  F 	  	  	  	  	  	  	  	  	D "/."55ge}-- 	'$FNN&& 	'"7^^FNN%YFNs   A55A9<A9c                D    t                                                      S r   )r  rj  r  s    r   rj  zFallbackKernel.apply_constraintM  s    ww'')))r   r   rc  r4  r5  r{   r  r|  rW
  rz  r  r  r   rR  rP  rX
  r   r   r  r+	  )r   r{
  r-	  )r  r
  r  rz  r   r   rq  r  )rq  r  r   re  )r5  r{   r   r   r   r   r   rU
  ri  )r   r   r   r  rD  rJ  rz
  r  r  r  r
  r	  rf  rb  r
  r   rs  r
  r  r  rj  r  r  s   @r   rU
  rU
  0  s         ,0h4 KOh4 h4 h4 h4 h4 h4 h4 h4T
 
 
 
 
 

 
 
 

          0    \:> > > >
$ $ $ $*# # # #w( w( w(r S3 S3 S3 XS3j 
 
 
 \
 [ [ [ [[z* * * * * * * * * *r   rU
  c                  :     e Zd ZdZddZddZddd fdZ xZS )ComplexViewz9View a complex number as two dtyped numbers or vice versar   r   c                    dS r  r   r  s    r   r  zComplexView.should_allocateU  r  r   r  c                .    |                      d          gS r   r	  r  s    r   rf  z(ComplexView.get_inputs_that_alias_outputX  s    ""##r   NrV
  rc  r4  r5  r{   r  r|  rW
  rz  r  r  rP  rX
  r   c               V    t                                          ||||||           d S )NrV
  )r  rD  )r  rc  r5  r  rW
  r  rP  r  s          r   rD  zComplexView.__init__\  sB     	/ 	 	
 	
 	
 	
 	
r   rq  r  )rc  r4  r5  r{   r  r|  rW
  rz  r  r  rP  rX
  r   r   )r   r   r   r  r  rf  rD  r  r  s   @r   r
  r
  Q  s|        CC   $ $ $ $ KO
 
 
 
 
 
 
 
 
 
 
 
r   r
  c                      e Zd ZdZddZdS )	MemoryCheckKernelz
    Custom kernel for memory checking that generates direct function calls

    TODO - the custom op was erroring with str inputs. should be able to custom op directly.
    rk  rr   r   r   c                   |                                  | j        \  }}}t          |          }t          |          }|r"|                    d           d| d| d| d}n	d| d| d}|                    |           dS )z.Override codegen to write direct function callzV# note: dont currently distinguish between buffers returned and dealloc'd in last stepzcheck_memory_step(allocated=z, freed=z, is_final_step=r3  N)r	  rC  r	  r  )r  rk  
alive_list	dead_listis_final_step
alive_repr	dead_reprcalls           r   rs  zMemoryCheckKernel.codegenw  s     	22444/3/A,
I}*%%
OO	 	Sh   r*qqiqqanqqqDDR*RRiRRRD$r   Nr+	  )r   r   r   r  rs  r   r   r   r
  r
  p  s2                    r   r
  c                  "    e Zd ZU ded<   ddZdS )r  r  r  r   r  c                    | j         S r   r  r  s    r   r#  zMultiOutputLayout.get_device  r  r   Nrv  )r   r   r   r   r#  r   r   r   r  r    s6              r   r  c                  j     e Zd ZddZ	 dd fdZ ed           	 ddd            ZddZddZ xZ	S )r
  rk  rr   r   r   c                    |                     |            | j        s,|                     |           |                     |           d S d S r   )codegen_multi_output!skip_size_stride_alignment_checksr	  r	  rr  s     r   rs  zMultiOutput.codegen  sY    $$T***5 	4%%g...**733333	4 	4r   Frc  r4  r
  r   r  list[tuple[Any, ...]]r
  r   c                    t                                          d ||gd           t          j                            |           | _        t          j                            |            || _        || _        d S r  )	r  rD  rm   r  rt  r   ru  r  r
  )r  rc  r
  r  r
  r  s        r   rD  zMultiOutput.__init__  sg     	vw333G++D11		""4(((1R...r   r  r   c                    | j         d         }t          |t                    s
J |            |                    |          S r   )r{  r   r   rV  )r  r  r  s      r   rV  z MultiOutput.get_free_symbol_uses  sA     [^
*f--99z99-..}===r   c                r    t          | j                  dk    ot          | j        d         t                    S )Nr@   r   )r   r{  r   r  r  s    r   r  zMultiOutput.should_allocate  s2    4;1$ 
t{1~'899	
r   r  c                $    d | j         D             S )Nc                    g | ]P}t          |t                    r9t          |                                          d k    <|                                QS r  )r   rU
  r   rf  r#  r   r  s     r   r   z<MultiOutput.get_inputs_that_alias_output.<locals>.<listcomp>  s_     
 
 
#~..
 C446677!;; LLNN <;;r   )r{  r  s    r   rf  z(MultiOutput.get_inputs_that_alias_output  s%    
 
{
 
 
 	
r   r+	  r  )
rc  r4  r
  r   r  r
  r
  r   r   r   r  rq  r  )
r   r   r   rs  rD  rX   rV  r  rf  r  r  s   @r   r
  r
    s        4 4 4 4 38S S S S S S S M**$)> > > > +*>
 
 
 


 
 
 
 
 
 
 
r   r
  c                     e Zd ZU dZded<   dOdZdPdZdQd
ZdRdZdSdZ	dTdZ
dUdVdZdWdZdXdZdYdZdOdZdXdZ	 dZd[d!Zd\d"Zd]d$Z	 dZd^d&Zd_d(Zd`d*Zdad+Zdbd-Zdcd/Zddd1ZdOd2ZdOd3Zded6Zdfd8ZdTd9Zdfd:Z dcd;Z! e"d           	 dZdgd>            Z#dhd@Z$didBZ%dUdjdEZ&e'dkdG            Z(dldIZ)dkdJZ*dddKZ+e'dmdM            Z,dTdNZ-e-Z.dS )nr  zC
    TensorBox / StorageBox allow in-place mutation of Tensors
    r   rb  r   r   c                4    | j                                         S r   r  r  s    r   r  z!MutableBox.has_exceeded_max_reads  r  r   r  c                4    | j                                         S r   r  r  s    r   r#  zMutableBox.get_device  r  r   r  c                4    | j                                         S r   r  r  s    r   r  zMutableBox.make_loader      y$$&&&r   r  c                4    | j                                         S r   )rb  r  r  s    r   r  zMutableBox.make_indexer      y%%'''r   rO  c                4    | j                                         S r   )rb  rH  r  s    r   rH  zMutableBox.get_stride  r  r   r   c                4    | j                                         S r   r  r  s    r   r#  zMutableBox.get_name  r  r   Nr)  r*  c                6    | j                             |          S r   )rb  r-  r,  s     r   r-  zMutableBox.has_large_inner_fn  s    y++I666r   r.  r   r   c                6    | j                             |          S r   r  r0  s     r   r1  zMutableBox.mark_reuse  r  r   c                4    | j                                         S r   r  r  s    r   r4  zMutableBox.realize_hint  r  r   c                4    | j                                         S r   )rb  r6  r  s    r   r6  zMutableBox.unwrap_view  r   r   c                4    | j                                         S r   )rb  r(  r  s    r   r(  zMutableBox.is_input_buffer      y((***r   c                4    | j                                         S r   )rb  r8  r  s    r   r8  zMutableBox.freeze_layout  s    y&&(((r   Fr   r   r9  c                8    | j                             ||          S r   )rb  r<  r;  s      r   r<  z*MutableBox.freeze_layout_with_stride_order  s     y88NNNr   c                6    | j                             |          S r   )rb  r@  r?  s     r   r@  z(MutableBox.freeze_layout_with_fill_order  s    y66u===r   r  c                6    | j                             |          S r   )rb  rC  rB  s     r   rC  z(MutableBox.freeze_layout_with_same_order  s    y66v>>>r   rD  c                8    | j                             ||          S r   )rb  rG  rF  s      r   rG  z+MutableBox.freeze_layout_with_exact_strides  s     y99-WWWr   rH  c                4    | j                                         S r   )rb  rJ  r  s    r   rJ  zMutableBox.get_read_writes  r
  r   rK  c                4    | j                                         S r   r  r  s    r   r  zMutableBox.get_reads  r  r   c                4    | j                                         S r   r  r  s    r   rQ  zMutableBox.num_reads  r  r   ry   c                4    | j                                         S r   r	  r  s    r   rS  zMutableBox.get_storage_numel   r
  r   r0  c                4    | j                                         S r   rj  r  s    r   rX  zMutableBox.get_reduction_type   rh  r   r  c                4    | j                                         S r   rg  r  s    r   rZ  zMutableBox.get_reduction_size   rh  r   c                4    | j                                         S r   r  r  s    r   r\  zMutableBox.is_extern   r  r   c                4    | j                                         S r   )rb  r^  r  s    r   r^  zMutableBox.is_no_op   r  r   r  r  c                6    | j                             |          S r   rn  r  s     r   r`  zMutableBox.constant_to_device   s    y++F333r   r  c                4    | j                                         S r   )rb  rb  r  s    r   rb  zMutableBox.get_mutation_names   rh  r   c                4    | j                                         S r   )rb  rd  r  s    r   rd  zMutableBox.get_operation_name   rh  r   c                4    | j                                         S r   )rb  rf  r  s    r   rf  z'MutableBox.get_inputs_that_alias_output   s    y55777r   c                4    | j                                         S r   r  r  s    r   r  zMutableBox.realize   r  r   r  r   c                6    | j                             |          S r   r  rU  s     r   rV  zMutableBox.get_free_symbol_uses    s     y--m<<<r   r  c                4    | j                                         S r   r  r  s    r   r  zMutableBox.get_read_names&   r  r   r  c                4    | j                                         S r   )rb  r  r  s    r   r  zMutableBox.get_defining_op)   r
  r   r  r	  c                6    | j                             |          S r   )rb  r  r  s     r   r  zMutableBox.codegen_reference,   s    y**6222r   r4  c                4    | j                                         S r   rb  r  r  s    r   rc  zMutableBox.layout/   s     y((***r   r  c                4    | j                                         S r   r  r  s    r   r  zMutableBox.get_layout4   r  r   c                4    | j                                         S r   r"  r  s    r   r  zMutableBox.get_output_spec7   r
  r   c                4    | j                                         S r   r  r  s    r   r  zMutableBox.get_size:   r  r   r  c                    | j         j        S r   )rb  r  r  s    r   r  zMutableBox.dtype=   s    yr   c                ^   t          | j        t                    r@t          |           j         dt          | j                  j         d}d}| j        j        }n t          |           j         d}| j        }d}|t          t          |                    |g}d                    |          S )Nr  z))r3  r  )r   rb  r  r   r   r  r   r  )r  line0endlr  r  s        r   r  zMutableBox.__str__A   s    di,, 	Dzz*HHT$)__-EHHHEDINEEDzz*---EIED 3u::

 yyr   rq  rv  rx  ry  rz  r{  r   r|  r}  ri  r~  r  r  r  r  r  r  r  r  r  rt  rr  r  r  r  rj  rl  ru  rp  ro  rn  )/r   r   r   r  r   r  r#  r  r  rH  r#  r-  r1  r4  r6  r(  r8  r<  r@  rC  rG  rJ  r  rQ  rS  rX  rZ  r\  r^  r`  rb  rd  rf  r  rX   rV  r  r  r  r  rc  r  r  r  r  r  r  r   r   r   r  r    s          LLL2 2 2 2& & & &' ' ' '( ( ( (& & & &$ $ $ $7 7 7 7 7+ + + +( ( ( (' ' ' '+ + + +) ) ) ) ;@O O O O O
> > > >? ? ? ? HMX X X X X
+ + + +% % % %% % % %- - - -. . . .. . . .% % % %$ $ $ $4 4 4 4. . . .. . . .8 8 8 8# # # # L))$)= = = = *)=
* * * *+ + + +3 3 3 3 3 + + + X+& & & &+ + + +$ $ $ $    X       " HHHr   r  c                  &    e Zd Zedd            ZdS )r   rb  r   r   r  c                h    t          | t                    r| S t          t          |                     S r   )r   r   r   r  )rb  s    r   r  zTensorBox.createV   s0    d122 	KD))***r   N)rb  r   r   r  )r   r   r   r  r  r   r   r   r   r   U   s2        + + + \+ + +r   c                  Z    e Zd ZdZddZddZddZdd	ZddZddZ	ddZ
ddZddZdS )r  z7
    StorageBox allow in-place mutation of Tensors
    r   r   c                    t          | j        t          t          f          r*| j                                        t
          j        j        v S dS r  )r   rb  r  rh  r#  rm   r  r'  r  s    r   r(  zStorageBox.is_input_bufferb   s?    di+!?@@ 	@9%%''17+???ur   c                    t          | j        t                    o)| j                                        t          j        j        v S r   )r   rb  r%  r#  rm   r  r  r  s    r   r  zStorageBox.is_module_bufferg   s6    ty>33 :	""$$(99	
r   r0  c           	     f   t                               | j                  r| j                                        S t	          | j        t
          t          t          t          f          sJ t          | j                              | j        
                                }| j                                        }| j                                        }|J t          d t          || j                                        | j                                        d          | j                  | _        t"          j                            | j                  | j        _        t"          j                            | j                   | j        | j        _        || j        _        || j        _        | j        j        S )NF)r  r  r  rg  r  )r   r  rb  r#  r   r  rd  ri  r  r   r  r  r#  r  r   r"  r  rm   r  rt  r   ru  r  r  r  )r  r  r  r  s       r   r  zStorageBox.realizem   sp   ""49-- 	(9%%'''$)iD$%GHH 	
 	
$IK
 K
 	
 	
H i//11I++--	%%''!!!"!i))++Y''))	   	
 	
 	
	 00;;		""49--- L	 +	'	y~r   r   c                    t          | j        t          t          f          r8| j                                        j        dk    r|                                  dS dS dS )zL
        Called on buffers we expect to be forced to realize later.
        r@   N)r   rb  r  rd  r  nontrivial_read_countr  r  s    r   r4  zStorageBox.realize_hint   sZ    
 ty9i"899		**,,BQFFLLNNNNN		 	FFr   r)  r   c                `    t          d |                                 D                       |k    S )Nc              3  T   K   | ]#}t           j                            |          V  $d S r   )rm   r  get_dep_size_hintr  s     r   r   zBStorageBox.has_accumulated_enough_reads_by_size.<locals>.<genexpr>   s2      KK3))#..KKKKKKr   )r6  r  r,  s     r   $has_accumulated_enough_reads_by_sizez/StorageBox.has_accumulated_enough_reads_by_size   s0    KK$..:J:JKKKKKiW	
r   c                    t          | j        t                    ob|                                 t          j        k    p@|                                 p,t          j        d uo|                     t          j                  S r   )	r   rb  r  rQ  rA   realize_acc_reads_thresholdr-   realize_acc_reads_size_thresholdr5  r  s    r   r  z!StorageBox.has_exceeded_max_reads   st    $)Y// 	
NNvAA &&(( 7tC ==; 		
r   r.  c                ^   |dk    rt          | j        t          t          f          rt	          | j                  r:| j                                        ddg}t          fd|D                       rdS |                                 t          j	        k    p| 
                                S dS )zj
        A heuristic to decide if we should realize a tensor
        that is used multiple times.
        r@   expsigmoidc              3  *   K   | ]}|j         v V  d S r   )used_ops)r   r   opcounts     r   r   z5StorageBox.should_realize_on_reuse.<locals>.<genexpr>   s+      @@qG,,@@@@@@r   TF)r   rb  r  rd  rB  r  r3  rQ  rA   realize_reads_thresholdr-  )r  r.  	heavy_opsr>  s      @r   should_realize_on_reusez"StorageBox.should_realize_on_reuse   s    
 199DI	9/EFF9di    )4466"I.	@@@@i@@@@@  4  6#AA -**,, ur   c                \    |                      |          r|                                  d S d S r   )rA  r  r0  s     r   r1  zStorageBox.mark_reuse   s3    ''.. 	LLNNNNN	 	r   c                4    | j                                         S r   r  r  s    r   rQ  zStorageBox.num_reads   r  r   Nrq  rt  ri  )r)  r   r   r   )r.  r   r   r   r}  r  )r   r   r   r  r(  r  r  r4  r5  r  rA  r1  rQ  r   r   r   r  r  ]   s            

 
 
 
   :   
 
 
 



 

 

 

   $   % % % % % %r   r  c                  2    e Zd ZU ded<   ded<   dZded<   dS )Subgraphr   r   ro  graph_moduleNzOptional[GraphLowering]r  )r   r   r   r   r  r   r   r   rE  rE     s8         III&&&&%)E))))))r   rE  buffersc                    d | D             } t          t          d | D                                 t          |           k     S )Nc                d    g | ]-}t          |t                    r|                                n|.S r   r   rh  r6  r   r  s     r   r   z(_has_aliased_buffers.<locals>.<listcomp>   sH        !+6? C CO  r   c              3  4   K   | ]}t          |          V  d S r   )r_	  rK  s     r   r   z'_has_aliased_buffers.<locals>.<genexpr>   s(      ;;"V**;;;;;;r   )r   r;   )rG  s    r   _has_aliased_buffersrM     sR       G
 z;;7;;;;;<<s7||KKr   c                  p     e Zd ZU dZdZded<   dZded<   dZded<   d fdZe	dd            Z
ddZ xZS )InvokeSubgraphz.
    Ir node for the invoke_subgraph HOP.
    NOptional[Subgraph]r}	  Optional[Sequence[IRNode]]operandsr  rE  r|  rc  r  r   r   c                    t                                          d ||           || _        t          j                            |           | _        t          j                            |            d S rT  )r  rD  r}	  rm   r  rt  r   ru  )r  r}	  rR  rc  r  s       r   rD  zInvokeSubgraph.__init__   sk     	 	 	
 	
 	

 !G++D11		""4(((((r   r   Elist[Union[ShapeAsConstantBuffer, NoneAsConstantBuffer, MultiOutput]]c                    ddl m} t          j        j        }d}|j                            d          x}r|d         dd         }n|j        dd         }d |D             } fd|D             }g }t          |          D ]\\  }	}
t          |
t          t          f          r|                    |
           7|                     ||
||	                              ]|}|j        qt          j                            |j        ||j        	          |_        t          j        |j                  5   |j        j        |  ddd           n# 1 swxY w Y   |j        j        }d}|D ]-}
t          |
t                    s|
                                } n.|J t)          ||t+          |
                    dfdfdt          |          D             }|_        |S )zFor each operand, get a realized input, force it to have the same
        strides as the subgraph inputs, then use an InvokeSubgraphr@   )constrain_to_fake_tensorNeager_input_valsr   r   c                (    g | ]}|j         d          S r  rw  r  s     r   r   z)InvokeSubgraph.create.<locals>.<listcomp>   s    @@@qQVE]@@@r   c                :    g | ]}                     |          S r   r  r  s     r   r   z)InvokeSubgraph.create.<locals>.<listcomp>   s'    !I!I!I1#"3"3A"6"6!I!I!Ir   rn  rr	  rt	  r  )r}	  rR  rc  rq  r   indr   r   ?Union[ShapeAsConstantBuffer, NoneAsConstantBuffer, MultiOutput]c                   t          | t          t          f          r| S |                                 }|J t	          t          ||                                 |                                 |                                 | 	                                j
        | 	                                j                  t          |fgd          S )Nr  T)r
  )r   r   r  r#  r
  re  r"  r  rH  r  rf  rg  r   )rq  r]  r  invoke_subgraphs      r   create_outputz,InvokeSubgraph.create.<locals>.create_output$!  s     &#8:N"OPP **,,)))"%$..00#__..%0022%00229"("3"3"5"5"?   $C[M6:   r   c                .    g | ]\  }} ||          S r   r   )r   r   rq  ra  s      r   r   z)InvokeSubgraph.create.<locals>.<listcomp>;!  s)    MMMYQfa((MMMr   )rq  r   r]  r   r   r^  )r  rV  rm   r  r  rw  r  r   r   r   r   r  r  r|	  rF  r   r	  r	  graph_outputsr#  rO  r  r  )r  r}	  rR  rV  r  fake_operandsrW  fx_operandsnew_operandsr   operandr  r  outsra  r`  s   `             @@r   r  zInvokeSubgraph.create   s    	766666 w++0445GHHH 	A,Q/3MM '+ABB/K@@K@@@M
 "J!I!I!I!I!I!I%'%h// 	 	LC'$9>#JKK ##G,,,,##,,WmC6HII     >!W22(,&m 3  HN
 $X^44 3 3""M223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ..  	 	Gg'<==  ++-- !!!($F333
 
 
	 	 	 	 	 	. NMMM)G:L:LMMM"&s   4EEErk  rr   c                0    |                     |            d S r   )codegen_invoke_subgraphrr  s     r   rs  zInvokeSubgraph.codegen?!  rc	  r   )r}	  rE  rR  r|  rc  r  r   r   )r}	  rE  rR  r   r   rT  r+	  )r   r   r   r  r}	  r   rR  r  rD  r  r  rs  r  r  s   @r   rO  rO     s           $(H''''+/H////*.G....
) 
) 
) 
) 
) 
) W W W [Wr. . . . . . . .r   rO  c                       e Zd ZU dZded<   dZded<   dZded<   dZded<   dZd	ed
<   d# fdZ	e
d$d            Zed%d            Zd&d Zd'd"Z xZS )(ConditionalNr  	predicaterQ  rR  rP  true_subgraphfalse_subgraphOptional[Sequence[MultiOutput]]r  r   r|  rE  rc  r  rP  rX
  r   r   c                L   || _         || _        || _        || _        t	          |g|          \  }}t                                          d |||           ||| _        t          j	        
                    |           | _        t          j	                            |            d S N)r   rc  r{  rC  )rm  rR  rn  ro  _split_by_sym_typer  rD  rP  rm   r  rt  r   ru  )
r  rm  rR  rn  ro  rc  rP  sym_argsr  r  s
            r   rD  zConditional.__init__K!  s     # *, 2I3I3I J J+"	 	 	
 	
 	
 (%6D"G++D11		""4(((((r   r   Union[int, torch.SymInt]Union[int, sympy.Expr]c                H    t          | t                    r| S | j        j        S r   )r   r   r   r)  )r   s    r   _maybe_exprzConditional._maybe_exprg!  s#    a 	Hv{r   r   true_fnfalse_fn-list[Union[TensorBox, ShapeAsConstantBuffer]]c           	                               |          } fd|D             }t          j        j        j        d         }t          |t                    sJ t          |                      t          d |D                       sJ d |D             }||fD ]z}|j        qt          j        	                    |j
        ||j                  |_        t          j        |j                  5   |j        j        |  ddd           n# 1 swxY w Y   {|j        J |j        J |j        j        }|j        j        }	d|fd|	ffD ])\  }
}t          |          rt!          d	|
 d
|           *t#          |          t#          |	          k    sJ ||	f            t%          t'          ||	                    D ]\  }\  }}|                                |                                k    sJ |||f            |                                |                                k    sJ |||f            |                                j        |                                j        k    sJ |||f            t1          d |g|z   D                       t3          t          j        j        j        t          j        j        j                            dd                    }
J d            t=          ||||t?                    |          fdt%          t'          |t          j        j        j        d                             D             }|_         |S )zNCreate a Sequence of IRNodes from a conditional statement (see .lowering.cond)c                :    g | ]}                     |          S r   r  r  s     r   r   z&Conditional.create.<locals>.<listcomp>w!  s'    ;;;QC%%a((;;;r   rI  c              3  @   K   | ]}t          |t                    V  d S r   )r   r9   rR  s     r   r   z%Conditional.create.<locals>.<genexpr>{!  s,      <<1:a&&<<<<<<r   c                N    g | ]"}t          t          |          j        d          #S rY  )r   r9   rw  r  s     r   r   z&Conditional.create.<locals>.<listcomp>|!  s(    HHHqdA+E2HHHr   Nr\  ry  rz  zVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: c              3  h   K   | ]-}t          |t                    |                                V  .d S r   )r   r   r#  )r   os     r   r   z%Conditional.create.<locals>.<genexpr>!  sO       
 
a!677
LLNN
 
 
 
 
 
r   rP  zcannot determine devicer  )rm  rR  rn  ro  rc  rP  c                d   g | ]\  }\  }}t          t          |                                d  |                                D             d |                                D             |                                j        |                                j                  t          |fg          S )c                B    g | ]}t                               |          S r   rl  rx  r   r7  s     r   r   z1Conditional.create.<locals>.<listcomp>.<listcomp>!  s&    UUU"+11"55UUUr   c                B    g | ]}t                               |          S r   r  r  s     r   r   z1Conditional.create.<locals>.<listcomp>.<listcomp>!  s3       8://33  r   r  )	r
  re  r"  r  r  r  rf  rg  r   )r   r   rq  merged_outputconditionalr  s       r   r   z&Conditional.create.<locals>.<listcomp>!  s     
 
 
" +*FM! ! **,,UU@R@R@T@TUUU >K>R>R>T>T   ",,..5$//11;	 	 	  
 
 
r   r  )!r  rm   r  r  r   r   r   r   r   r|	  rF  r   r	  r	  rc  rM  rL  r   r   r   r#  r"  r  rf  r  r6   r  r   rw  r  rl  r  r  )r  rm  ry  rz  rR  re  rd  r}	  true_outputsfalse_outputsr   r  r   t_of_orP  r  r  s   `               @@r   r  zConditional.createm!  s    %%i00	;;;;(;;; ! 4 9" =+x00CC${2C2CCC0<<<<<<<<<<HHKHHH (+ 		7 		7H~%!"!6!6,#0"*- "7 " "
 (88 7 7&HN&667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 }(((~)))}2 4(,7*m9TU 	 	MD'#L11 $_*._ _U\_ _   <  C$6$6666}8U666&s<'G'GHH 	U 	UMAzS>>##s~~'7'7777!S#777==??cmmoo5553}555>>##*cnn.>.>.EEEE3PS}EEEE 
 
[8+
 
 
 
 

 6G&G %))*=tDD
 
 !!#<!!!!!#$F333/
 
 

 
 
 
 
" /8L!'"6";E"BCC/ /#
 
 
, &s   -D		D	D	rk  rr   c           	         |                     |            |                    |                                 | j        t	          | di                      d S rx
  )codegen_conditionalry
  r#  r  r   rr  s     r   rs  zConditional.codegen!  sV    ##D)))88MMOOT\749Lb+Q+Q	
 	
 	
 	
 	
r   r   c                    t          | dd           x}rIt          t          j        j        j        |          }|J t          |                                          S t                      S rx
  r   r6   rm   r  r  r   r;   r  r}
  s      r   r  z$Conditional.get_unbacked_symbol_defs!  h     '.A4 H HH 	 0 *,= H '''hmmoo...<<r   )rm  r   rR  r|  rn  rE  ro  rE  rc  r  rP  rX
  r   r   )r   ru  r   rv  )
rm  r   ry  rE  rz  rE  rR  r{  r   r|  r+	  r  )r   r   r   rm  r   rR  rn  ro  r  rD  r  rx  r  r  rs  r  r  r  s   @r   rl  rl  C!  s        "&I&&&&+/H////(,M,,,,)-N----/3G3333) ) ) ) ) )8    \
 X X X [Xt
 
 
 
               r   rl  r   rs	  -tuple[list[ShapeAsConstantBuffer], list[Any]]c                    g }g }| D ]G}t          |t                    r|                    |j                   2|                    |           H||fS r   )r   r   r  r)  )r   non_sym_argsrt  r)  s       r   rs  rs  !  si     LH % %c011 	%OOCH%%%%$$$$\!!r   c                       e Zd ZU dZdZded<   dZded<   dZded<   dZded<   dZ	d	ed
<   d  fdZ
ed!d            Zed"d            Zd#dZd$dZ xZS )%	WhileLoopzSThe IR node for while_loop and while_loop_stack_output. It supports input mutation.NrQ  carried_inputsadditional_inputsrP  cond_subgraphbody_subgraphrp  r  r|  rE  rc  r  rP  rX
  stack_outputr   r   r   c                \   || _         || _        || _        || _        t	          g ||          \  }}	t                                          d ||	|           ||| _        || _        t          j
                            |           | _        t          j
                            |            d S rr  )r  r  r  r  rs  r  rD  rP  r  rm   r  rt  r   ru  )r  r  r  r  r  rc  rP  r  rt  r  r  s             r   rD  zWhileLoop.__init__!  s     -!2** 21n101!
 !
+ 	"	 	 	
 	
 	
 (%6D"(G++D11		""4(((((r   c                   t          |           s| S ddlm} d | D             }t                      }g }t	          t          | |                    D ]o\  }\  }}t          |          |v r|                     ||                     8|                    t          |                     |                    |           p|S )Nr@   )clonec                d    g | ]-}t          |t                    r|                                n|.S r   rJ  rK  s     r   r   z3WhileLoop._clone_aliased_inputs.<locals>.<listcomp>"  sH     
 
 
 %/v$G$GSF   V
 
 
r   )	rM  r  r  r;   r   r   r_	  r  r  )r  r  unwrapped_buffersseen_buffersr5  r   original_inputunwrapped_buffers           r   _clone_aliased_inputszWhileLoop._clone_aliased_inputs"  s    #N33 	"!! 	$#####
 
(
 
 
 )35> 1226
 6
 	. 	.1A1 0 "##|33eeN334444  $4!5!5666n----r   cond_fnbody_fnr
  c                4   & ddl m} d'd	}t          j        j        j        d
         }t          j        j        j        d         }	||	z   }
d |
D             }d |D             }d |	D             } fd|D             }t                              |          } |||          } fd|D             } |||          }||z   &||fD ]}|j        t          |
t                    sJ t          |
                      t          j                            |j        |
|j                  |_        t          j        |j                  5   |j        j        |  ||u rLt!          |j        j                  t!          |          k    sJ  ||j        j        |          |j        _        ddd           n# 1 swxY w Y   |j        r|j        sJ |j        j        }|j        j        }t%          |          rt'          d|           t!          |          dk    s
J |            |d         }t          |t(                    s[|                                t,          j        k    s
J |            t!          |                                          dk    s
J |            t!          &          dk    s
J d            &d                                         }|J t!          |          t!          |          k    sJ ||f            t5          t7          ||                    D ]\  }\  }}d(d} ||                                |                                            ||                                |                                           |                                |                                k    sJ ||||f            |                                |                                k    sJ |||f            |J t;          t          j        j        j        t          j        j        j         !                    dd                    }t          ||||tE          |          ||          }|j        )t          |j        j#        t,          j$        j%                  sJ  ||j        j#        |          d         }tM          |          }&fd|D             }tO          |          }g }g |_(        g |_)        |rt!          |          dk    s
J d             t5          t          j        j        j         d!                   D ]\  } }!tU          tW          |!j,        |!j-        d" |!.                                D             d# |!/                                D             $          |t`          | fg          }"|j(        1                    |"           |1                    |"           nDt5          |          D ]3\  } }!| |v rp| t!          |          k     s
J d%            te          |          }#|j)        1                    tg          |#j4        |#|                     |1                    |#           ztU          tW          |!                                |!                                |!                                |!                                |!5                                j6        &          |t`          | fg          }"|j(        1                    |"           |1                    |"           5t7          ||          D ]`\  }$}%|$7                                t          j        j8        v r6t          j        j9        :                    |%7                                           a|S ))zcreate the while_loop IR node. stack_output controls whether it stack
        each iterations' output, which is necessary for training.
        r   )check_input_alias_and_mutationtensor_boxesr|  fake_tensors,list[Union[int, torch.SymInt, torch.Tensor]]r   r   c                ^   t          |           t          |          k    sJ g }t          | |          D ]w\  }}t          |t          j                  rC|                    t                              ||                                d                     b|                    |           x|S )NFr  )	r   r   r   r%  r  r  r  r  r  )r  r  retr  fks        r   _require_exact_stridesz0WhileLoop.create.<locals>._require_exact_strides>"  s     |$$L(9(99999ClL99 # #Bb%,// #JJ$::		5 ;      JJrNNNNJr   rI  c                (    g | ]}|j         d          S rY  rZ  r  s     r   r   z$WhileLoop.create.<locals>.<listcomp>R"  s    @@@Q16%=@@@r   c                (    g | ]}|j         d          S rY  rZ  r  s     r   r   z$WhileLoop.create.<locals>.<listcomp>S"  s    HHHqve}HHHr   c                (    g | ]}|j         d          S rY  rZ  r  s     r   r   z$WhileLoop.create.<locals>.<listcomp>T"  s    !N!N!NA!&-!N!N!Nr   c                :    g | ]}                     |          S r   r  r  s     r   r   z$WhileLoop.create.<locals>.<listcomp>V"  s'    HHHA3,,Q//HHHr   c                :    g | ]}                     |          S r   r  r  s     r   r   z$WhileLoop.create.<locals>.<listcomp>Y"  s'    NNNqc//22NNNr   Nr\  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: r@   z9torch.while_loop is assumed to have at least one operand.	lhs_exprs Sequence[Union[int, sympy.Expr]]	rhs_exprsr   c                    t          |           t          |          k    sJ t          | |          D ]*\  }}t          j        j                            ||           +d S r   )r   r   rm   r  r  rp  )r  r  lhsrhss       r   _guard_list_equalsz,WhileLoop.create.<locals>._guard_list_equals"  sf     9~~Y7777 #Iy 9 9 < <HCG$11#s;;;;< <r   rP  r  )r  r  r  r  rc  rP  r  r   c                     g | ]
}|         S r   r   )r   r   
all_inputss     r   r   z$WhileLoop.create.<locals>.<listcomp>"  s    EEEc*S/EEEr   z-NYI: while_loop_stack_output input mutations.r  c                B    g | ]}t                               |          S r   r  r  s     r   r   z$WhileLoop.create.<locals>.<listcomp>"  s&    RRRbk55b99RRRr   c                B    g | ]}t                               |          S r   r  )r   r6  s     r   r   z$WhileLoop.create.<locals>.<listcomp>"  s&    VVV 7 7 ; ;VVVr   )r  r  r  r  zonly carries can be mutated.)r  r  r  r  rf  )r  r|  r  r  r   r   )r  r  r  r  r   r   );torch._higher_order_ops.utilsr  rm   r  r  r   r  r  r   r   r   r|	  rF  r   r	  r	  r   rc  rM  rL  r   r"  r%  r   r  r#  r   r   rH  r6   r  r   rw  r  r  modulefxGraphModuler;   r  r  rQ  r
  re  r  r  r  r  r   r  r  r  rc  r  rf  r#  r'  r	  r  )'r  r  r  r  r  r  r  r  fx_carried_inputsfx_additional_inputsfx_all_inputsfake_all_inputsfake_carried_inputsfake_additional_inputscarried_inputs_additional_inputs_r}	  cond_outputsbody_outputsrN  r  r   rr  bor  rP  
while_loopmutated_idxsmutated_idx_setr  mutated_inputs_iterall_outputsr   rq  	multi_outmutated_inputr  r  r  s'   `                                     @r   r  zWhileLoop.create0"  s~    	QPPPPP	 	 	 	" G05b9 w38<),@@@@-@@@HH6GHHH!N!N9M!N!N!NHHHHHHH#99/JJ00BUVVNNNN<MNNN33 6
 
 %'99
 '* 	 	H~%!-::OOD<O<OOO:!"!6!6,#0"*- "7 " "
 (88  &HN&88  7**"8>#?@@C/E E         8N7M$N8/8 84              " }....}2}2-- 	 gXdg g   <  A%%%|%%%O!233 	-;;==EJ......qzz||$$)))1))):"""G #"" A))++!!!?##s<'8'8888;
888 %S,%G%GHH 	A 	AKAxB< < < < r{{}}bkkmm<<<r}}@@@ ==??bmmoo5552r67J555<<>>R\\^^333aR[3333!!!5G&G %))*=tDD
 

 *0!!$F333/%	
 	
 	

 }(ZM %("6.
 .
(( 

 65M /
 

 %\22EEEE_EEE #>22$&
&(
# )	2''1,,,? -,,  ))=)B5)IJJ . .V'%}$lRRFKKMMRRRVVfmmooVVV	   C[M	 		 ")))444""9----.  )66 2 2V/))^!4!44446T444$()<$=$=M/66&}';]JWW    &&}5555 +##)#4#4#6#6"("2"2"4"4!'!2!2#)#4#4#6#6#)#4#4#6#6#=   #
! 
!I &--i888&&y1111NK88 	@ 	@HC||~~!555 +//???s   A F00F4	7F4	rk  rr   c           	         |                     | | j                   |                    |                                 | j        t          | di                      d S rx
  )codegen_while_loopr  ry
  r#  r  r   rr  s     r   rs  zWhileLoop.codegen"  s[    ""4):;;;88MMOOT\749Lb+Q+Q	
 	
 	
 	
 	
r   r   c                    t          | dd           x}rIt          t          j        j        j        |          }|J t          |                                          S t                      S rx
  r  r}
  s      r   r  z"WhileLoop.get_unbacked_symbol_defs#  r  r   )r  r|  r  r|  r  rE  r  rE  rc  r  rP  rX
  r  r   r   r   )r  r|  r   r|  )r  rE  r  rE  r  r|  r  r|  r  r   r   r
  r+	  r  )r   r   r   r  r  r   r  r  r  r  rD  r  r  r  r  rs  r  r  r  s   @r   r  r  !  s	        ]]15N5555488888(,M,,,,(,M,,,,/3G3333) ) ) ) ) )D    \8 K K K [KZ
 
 
 
               r   r  c                  >     e Zd Z	 dddd fdZd fdZddZ xZS )r   NrV
  rc  r4  r5  r{   r  r|  rW
  rz  r  r  r   rR  rP  rX
  r   r   c          	     2   t                                          |||||d |           ddlm} d |D             }	 ||g ||	R |          }
|
J |
| _        t
          j        j                            |
d           | _	        | t
          j        j        |
<   d S )Nr   rP  r   )get_effect_keyc                J    g | ] }t          |t                    r|j        n|!S r   )r   r  r)  r  s     r   r   z,EffectfulKernel.__init__.<locals>.<listcomp>'#  s:     
 
 
ABz!_55<AGG1
 
 
r   )
r  rD  torch._higher_order_ops.effectsr  effect_typerm   r  effectful_opsr  prev_effect_buffer)r  rc  r5  r  rW
  r  r   rP  r  uncovered_argsr  r  s              r   rD  zEffectfulKernel.__init__#  s     	/ 	 	
 	
 	
 	CBBBBB
 
FQ
 
 
 %nV-O~-O-O-OQWXX&&&&"#'"7";";K"N"N-1k***r   rH  c                    t                                                      }| j        C|j                            t          j        | j                                                             |S r   )r  rJ  r  rN  r  rB   r"  r#  )r  r  r  s     r   rJ  zEffectfulKernel.get_read_writes0#  sa    gg--//".!!$T%<%E%E%G%GHH   r   r   c                    dS r  r   r  s    r   r	  z EffectfulKernel.has_side_effects:#  r  r   r   r
  r  rq  )r   r   r   rD  rJ  r	  r  r  s   @r   r   r   #  s         ,02 KO2 2 2 2 2 2 2 2@            r   r   c                  <    e Zd Z ed           	 dd	d            ZdS )
r  Fr  r   r   r   c                    t                      S r   r:   rU  s     r   rV  z!NonTensorObj.get_free_symbol_uses?#  r  r   Nr  r  )r   r   r   rX   rV  r   r   r   r  r  >#  sF        N++$)    ,+  r   r  c                  N    e Zd ZU ded<   ded<   ddZddd
ZddZddZddZdS )r  r   r   +Union[FakeScriptObject, torch.ScriptObject]r)  r   c                    | j         S r   r  r  s    r   r#  zTorchBindObject.get_nameK#  r  r   Nr  r	  c                    | j         S r   r  r  s     r   r  z!TorchBindObject.codegen_referenceN#  r  r   c                    | j         S r   r-  r  s    r   r  zTorchBindObject.get_valueQ#  r  r   torch.ScriptObjectc                f    t          | j        t          j                  r| j        S | j        j        S r   )r   r)  r%  ScriptObjectreal_objr  s    r   get_real_objzTorchBindObject.get_real_objT#  s,    dj%"455 	'::&&r   r   c                   |                                  }t          |d          sJ t          |                                          }t	          j        |          d         }d |D             }t          j        t          j	        |d          S )N__obj_flatten__r   c                    g | ]E}t          |t          j                  |                                |                                z  FS r   )r   r%  r  ri	  numelr  s     r   r   z1TorchBindObject.get_buf_bytes.<locals>.<listcomp>`#  sQ     
 
 
!U\**
NNqwwyy(
 
 
r   )
r  r  r   r  r  r  r  r  operatorr  )r  real_script_obj	flat_dict
flat_elems
flat_sizess        r   get_buf_byteszTorchBindObject.get_buf_bytesZ#  s    ++--(9:::::88::;;	(33A6

 

 
 


 j!<<<r   r{  r   ru  )r   r  )r   r  r  )	r   r   r   r   r#  r  r  r  r  r   r   r   r  r  F#  s         III6666          ' ' ' '= = = = = =r   r  c                  6    e Zd ZU ded<   ded<   ddZddd
ZdS )r  r   r   r  r  r   c                    | j         S r   r  r  s    r   r#  zGeneratorState.get_namem#  r  r   Nr  r	  c                    | j         S r   r  r  s     r   r  z GeneratorState.codegen_referencep#  r  r   r{  r   ru  )r   r   r   r   r#  r  r   r   r   r  r  h#  sX         III         r   r  c                  X    e Zd ZddZddZddd	Zedd            Zedd            ZdS )_CollectiveKernelr   r   c                    dS r  r   r  s    r   r  z!_CollectiveKernel.should_allocateu#  r  r   c                    dS r  r   r  s    r   r	  z"_CollectiveKernel.has_side_effectsx#  r  r   NrH  r0  r   c                    t          | j                  t          j        j        u s
J d            | j        }||| _        n|j        j        | _        d |j        j        D             | _	        d S )Nz,Setting cpp kernel needs a valid op_overloadc                *    g | ]}|j         	|j        S r   rb  r  s     r   r   z9_CollectiveKernel.set_cpp_kernel_name.<locals>.<listcomp>#  s1     .
 .
 .
.
F.
 .
 .
r   )
r   rL  r%  rd  re  rH  rf  r   rg  rJ  )r  rH  r5  s      r   rU  z%_CollectiveKernel.set_cpp_kernel_name}#  s    D$%%)>>>>: ?>> !&#2D  #)>#6D .
 .
"N4.
 .
 .
***r   r5  r{   r{  Union[IRNode, list[IRNode]]r   r   r   c                   t           j        j        5   | j        ||g|R i |\  }}}}}	d d d            n# 1 swxY w Y   |	rJ | d|	             |D ]}
|
                                 |d                                          | t                    ||||          t          j        |          }j	        
                    fd|D                        j        
                    d |D                        d|v rqj	                            t          t                    |d                              j                            |d                                                    d S d S )NrG  r   r  c                N    g | ]!}t          t                     |          "S r  r  )r   r  r  r
  s     r   r   z4_CollectiveKernel.create_inplace.<locals>.<listcomp>#  s0    TTT^Jf555sFCCTTTr   c                6    g | ]}|                                 S r   r  r
  s     r   r   z4_CollectiveKernel.create_inplace.<locals>.<listcomp>#  s     "B"B"Bc3<<>>"B"B"Br   r  )rm   r  r  r  r  r#  r  r  tree_leavesrQ  rR  ra
  r  r  r#  )r  r5  r{  r   r   _example_outputr  r  r  rP  
tensor_arginpsr  r
  s               @@r   create_inplacez _CollectiveKernel.create_inplace#  s    W 	D 	D #"66CDCCCFCC!	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D %EE&E&E2C&E&EEE$% 	! 	!J    Q**,,f%%%
 
 !&))&&TTTTTtTTT	
 	
 	

 	!!"B"BT"B"B"BCCCF??#**z888&-PP   %%fUm&<&<&>&>????? ?   9= =!Union[TensorBox, list[TensorBox]]+Union[list[MultiOutput], _CollectiveKernel]c                b    t           j        j        5    j        ||g|R i |\  }}}}}	d d d            n# 1 swxY w Y   |	rJ | d|	             |D ]}
|
                                 t          |t                    r                     ||          }|J   t          |          ||||           fdt          |          D             _
        t          j
        |          D ]I\  }}t          j        st          |          s)t           j        j                            |j                   Jj
        S                        |          ||||          t          j        st          |          s)t           j        j                            j                   g_
        S )NrS  r  c                n    g | ]1\  }}t                              |          t          |fg          2S r   )r
  r
  r   )r   r   rZ  r  r
  s      r   r   z9_CollectiveKernel.create_out_of_place.<locals>.<listcomp>#  sV        Av ((00AYK   r   )rm   r  r  r  r  r   r   r
  r  r   r  r   rA   r
  rj   r  r  r   r
  )r  r5  r{  r   r   r  r  r  r  rP  r  r  r  rZ  r
  s   `             @r   create_out_of_placez%_CollectiveKernel.create_out_of_place#  sq    W 	D 	D #"66CDCCCFCC!	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D %FF&F&F3D&F&FFF$% 	! 	!J    nd++ %	__[.AAF%%%S!000 F     "+>!:!:  FN  #6>>BB < <V: <BSC C < G-11#(;;;>!S$$^44 F 6 ;>O? ? ; )--fk:::$XFNMr  rq  r   r,	  )
r5  r{   r{  r  r   r   r   r   r   r   )
r5  r{   r{  r  r   r   r   r   r   r  )	r   r   r   r  r	  rU  r  r  r  r   r   r   r   r   t#  s              

 
 
 
 
( (@ (@ (@ [(@@ 8 8 8 [8 8 8r   r   c                  2     e Zd Z	 dddd fdZddZ xZS )_AllReduce_KernelNrV
  rc  r4  r5  r{   r  r|  rW
  rz  r  r  r   rR  rP  rX
  r   r   c          	         t                                          |||||d |           |                     d           d S )Nr  +aoti_torch_cpu__c10d_functional_all_reduce_r  rD  rU  	r  rc  r5  r  rW
  r  r   rP  r  s	           r   rD  z_AllReduce_Kernel.__init__$  X     	/ 	 	
 	
 	
 	  !NOOOOOr   rk  rr   c                    |                     d           |                    |            t          | j        t                    r|                     |           d S d S Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hinclude_extra_headerr?	  r   rc  r  r	  rr  s     r   rs  z_AllReduce_Kernel.codegen%$  c    $$%RSSS,,T222dk6** 	/%%g.....	/ 	/r   r   r
  r+	  r	  r  s   @r   r  r  $  su         ,0P KOP P P P P P P P,/ / / / / / / /r   r  c                  2     e Zd Z	 dddd fdZddZ xZS )_AllReduceKernelNrV
  rc  r4  r5  r{   r  r|  rW
  rz  r  r  r   rR  rP  rX
  r   r   c          	         t                                          |||||d |           |                     d           d S )Nr  *aoti_torch_cpu__c10d_functional_all_reducer  r  s	           r   rD  z_AllReduceKernel.__init__.$  sX     	/ 	 	
 	
 	
 	  !MNNNNNr   rk  rr   c                    |                     d           |                    |            t          | j        t                    r|                     |           d S d S r  r  rr  s     r   rs  z_AllReduceKernel.codegenD$  r  r   r   r
  r+	  r	  r  s   @r   r!  r!  -$  su         ,0O KOO O O O O O O O,/ / / / / / / /r   r!  c                  ^     e Zd Z	 dddd fdZddZd dZed!d            Zd" fdZ xZ	S )#_WaitKernelNrV
  rc  r4  r5  r{   r  r|  rW
  rz  r  r  r   rR  rP  rX
  r   r   c          	         t                                          |||||d |           |                     d           d S )Nr  +aoti_torch_cpu__c10d_functional_wait_tensorr  r  s	           r   rD  z_WaitKernel.__init__M$  r  r   rk  rr   c                    |                     d           |                    |            t          | j        t                    r|                     |           d S d S r  r  rr  s     r   rs  z_WaitKernel.codegenc$  r  r   c                   | j         d         }t          |t                    sJ t          |t                    r<|j         d         }t          |t                    sJ t	          |                      |gS t          |t
                    rB|j         d         }t          |t                    r|j        d         \  }}|j         |         gS g S g S r   )r{  r   r   r   r   r
  r  )r  r  r   collr  r   s         r   get_volatile_readsz_WaitKernel.get_volatile_readsj$  s    k!n#v&&&&&c,-- 	
1Aa((11$q''11(3J[)) 	 :a=D$ 122 *Q3C())I Ir   r  r   c                   t           j        j        5  |                     ||          \  }}}}}d d d            n# 1 swxY w Y   |rJ | d|              | t	          |                                          ||||          }|j                            t          t	          |                                          ||                     d S )NrG  r  )	rm   r  r  r  r  r#  rQ  r  r  )	r  r5  r  r
  r  r  r  rP  r
  s	            r   create_waitz_WaitKernel.create_wait$  s0   W 	0 	0 ""63//!	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 %EE&E&E2C&E&EEE$cnn..///
 
 	&&:S^^-=-=>>>VLL	
 	
 	
 	
 	
s   ;??rH  c                    t                                                      }|                                 }|D ]@}|j                            t          j        |                                                     A|S r   )r  rJ  r,  rN  r  rB   r"  r#  )r  r  volatile_readsvrr  s       r   rJ  z_WaitKernel.get_read_writes$  sm    gg--//0022  	G 	GB!!,"6r{{}}"E"EFFFFr   r   r
  r+	  rS	  )r5  r{   r  r   r   r   r  )
r   r   r   rD  rs  r,  r  r.  rJ  r  r  s   @r   r&  r&  L$  s         ,0P KOP P P P P P P P,/ / / /   0 
 
 
 [
*         r   r&  r   r  c                l   t          | t          t          f          rt          |           S t          | t          t
          f          r7t          t          j                             }| D ]}|t          |          z  }|S t          | t          j                  rt          |           S t                      S r   )r   r8   r!   r3   r   r   r;   r   r#   r#	  r%  r  r   r  r(  s      r   r#	  r#	  $  s    !h%&& $Q'''	At}	%	% 	u|$&& 	0 	0A,Q///AA	Au|	$	$ $Q'''||r   c                l   t          | t          t          f          rt          |           S t          | t          t
          f          r7t          t          j                             }| D ]}|t          |          z  }|S t          | t          j                  rt          |           S t                      S r   )r   r8   r!   r2   r   r   r;   r   r#   r$	  r%  r  r3  s      r   r$	  r$	  $  s    !h%&& A	At}	%	% 	u|$&& 	' 	'A#A&&&AA	Au|	$	$ A||r   )r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   rm  )r   r  r	  r   r   r   )r   r   r	  r   r   r  )r   r  r	  r   r   r  )r)  r*  r   r+  )r   r/  r   r0  )r   r6  r   r   )r   rC  rD  r   r   r   )rN  rO  rP  rO  rQ  rO  r   r   )rZ  r   r[  r\  r   r   )rn  ro  r   r   )r{  r|  r   r}  )r   r  r  r  r   rl   )r9  r   r  r  r:  r   r   r;  )r  r   r  r1  rf  r!   r   r  r  )TFNFN)r   r   r  r   r  r   r  r  r9  r   rD  r  r   r  )r   r   r  r   r   r   rh  )r  rO  rQ  rO  r   r   )r  r  r   r   )r   r  r   r  )rG  r|  r   r   )r   rs	  r   r  )r   r   r   r  (L  
__future__r   r  r  r  r  loggingr  ostextwrapr  collections.abcr   r   r   r   r   r	   r
   enumr   r   typingr   r   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   r   unittest.mockr    r   r!   r"   r#   torch._export.serde.schema_exportserderq
  r
  torch._library.utilsrm
  r  rf
  torch._loggingr%  torch.fxtorch.utils._pytree_pytreer  torch._dynamo.utilsr$   torch._export.serde.serializer%   *torch._higher_order_ops.auto_functionalizer&   torch._inductorr'   r	  r)   torch._prims_commonr*   r+   r,   r-   r.   torch._subclasses.fake_tensorr/   %torch.fx.experimental.symbolic_shapesr0   r1   r2   r3   r4   r5   r6   r7   r8   torch.fx.noder9   torch.utils._ordered_setr;   torch.utils._sympy.functionsr<   r=   r>   torch.utils._sympy.symbolr?   r  rA   rB   codegen.commonrC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   	loop_bodyrM   ops_handlerrN   rO   rP   rQ   runtime.benchmarkingrR   runtime.hintsrS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   virtualizedrk   rl   rm   "torch._library.fake_class_registryrn   ro   rp   codegen.cuda.cuda_templaterq   codegen.wrapperrr   r  rs   rt   r   r   r;  __version__r  r  ImportErrorru   rv   rw   rx   r   ry   r  rz   rd  re  r  r{   	getLoggerr   r  r  ru  getenvr  r  r   r   r   r   r
  r   r   r   r   r   r   r`  rb  r   r   r  r  r.  r5  r@  rB  rM  rY  rm  rz  r  r   r  r  r  r  r(  r8  rb  rd  r  r8  r:  rG  rO  ri  r  r  r  r  rd  r  r  r  r   r)  r=  rN  r  rh  r  r  r  r  r  r  r  r4  r  re  r   r  r  r  r  r  r  r  r  r  r%  r  r   r  r  r  r   r   PrimitiveInfoTyper  r  r  r  r  r  r  r  r  r#  r  r/	  r6	  r<  r  rU	  r[	  r\	  rq	  r	  r	  r	  r	  r	  r	  r	  r
  r!
  r,
  r   r   rS
  rU
  r
  r
  r  r
  r  r   r  rE  rM  rO  rl  rs  r  r   r  r  r  r   r  r!  r&  r#	  r$	  r   r   r   <module>r_     sU    " " " " " " "                   				      N N N N N N N N N N N N N N : : : : : : : :                                                                 ' ' ' ' ' ' ' ' ' ' 2 2 2 2 2 2 2 2 2 2 2 2 , , , , , , , , ,      $ $ $ $ $ $ $ $ $ ( ( ( ( ( ( ? ? ? ? ? ? M M M M M M # # # # # # 2 2 2 2 2 2              : 9 9 9 9 9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
       / / / / / / L L L L L L L L L L * * * * * * " " " " " " " "                                  N N N N N N N N N N N N - - - - - - : : : : : : : :                                               0 * ) ) ) ) ) ) ) ) )  %CCCCCCBBBBBB&&&&&&888888555555$$$$$$%%%%%%% %L$$$$MMM'NJJ   NJJJ
 Yt__WT]]WT]]WT]]CI& & & & &C,- - - - -
 5uz7U UV V V V Vg!!		8?4	8	8	8y~#ibi 7<<==s929137788'T  k	sDk!12K8STU	 	 	 	 	) ) ) ) d###       $#$$ $ $ $D             !LL $__  TX    	 	 	 	 TX
 
 
 
 
 
 N N N N 
 N 
 O O O O 
 O .2    8   	> 	> 	> 	>; ; ; ;(' ' ' '0 0 0 00   .$G $G $G $GN' ' ' '   u, u, u, u, u, u, u, u,p	 UH H H H H H H HV A
 A
 A
 A
 A
F A
 A
 A
H& & & &  
  
  
  
  
  
  
  
F 
 
 
 
 
i 
 
 
F ;|$$;y!!;y!!K;u{=))< <      JN<N <N <N <N <N~ i
 i
 i
 i
 i
 i
 i
 i
\ '+71::    & "8D>8D>"BH"LM M M M M7S 7S 7S 7S 7S9 7S 7S 7St# # # # #1 # # #L[
 [
 [
 [
 [
+ [
 [
 [
| E
 E
 E
 E
 E
5 E
 E
 E
R 	 	 	 	 	 	 	 	 V V V V V5 V V Vr   	 	 	 	 !<@=A9 9 9 9 9x      $ ^
 ^
 ^
 ^
 ^
v ^
 ^
 ^
B N N N N N N N Nb - - - - -( - - -` :9 :9 :9 :9 :9( :9 :9 :9z ! ! ! ! !( ! ! !H s s s s s; s s sl S S S S Sh S S Sl & & & & & & & &R_A _A _A _A _A _A _A _AD     6   " K K K K K| K K K$ S S S S S| S S S   (< < < <7 7 7 7 7 7 7 7  O
 O
 O
 O
 O
Z O
 O
 O
dC C C C C& C C CRH RH RH RH RHV RH RH RHj!G !G !G !G !Gf !G !G !GH    T   %% %% %% %% %%{ %% %% %%P        .V* V* V* V* V* V* V* V*r UE E E E EV] E E EP U& & & & &fi & & &     &   
    K   
 
 
 
 
[ 
 
 
&     6   (     F     Uo4 o4 o4 o4 o4_ o4 o4 o4dE
 E
 E
 E
 E
_ E
 E
 E
PM M M M M> M M M` #udCeCeT<Q6R1SST :$ :$ :$ :$ :$ :$ :$ :$z" " " " "| " " "
WB WB WB WB WB. WB WB WBt5 5 5 5 5 5 5 50( ( ( ( ( ( ( (4    N   >5 5 5 5 UR R R R R? R R Rj       A A A A A9 A A AH Ut t t t t< t t tn U" " " " "l " " "J
 
 
 
 
/ 
 
 
((" (" (" (" (" (" (" ("V
 
 
 
 
V 
 
 
B= = = = =L = = =@%
 %
 %
 %
 %
 %
 %
 %
P
 
 
 
 
- 
 
 
$1
 1
 1
 1
 1
\ 1
 1
 1
hH H H H Hl H H HV)) )) )) )) ))| )) )) ))Z/ / / / /, / / /d    <   8B B B B B5 B B B$8 8 8 8 8- 8 8 82K) K) K) K) K)l K) K) K)\4) 4) 4) 4) 4)| 4) 4) 4)n3T 3T 3T 3T 3T 3T 3T 3Tl*3 *3 *3 *3 *3 *3 *3 *3Z- - - - -L - - -8<; <; <; <; <;< <; <; <;~ U       
^* ^* ^* ^* ^*& ^* ^* ^*B U
 
 
 
 
. 
 
 
<               2     
   '
 '
 '
 '
 '
, '
 '
 '
X T T T T T T T Tn+ + + + +
 + + +_% _% _% _% _% _% _% _%D U* * * * *v * * *L L L L Up. p. p. p. p.\ p. p. p.f UR  R  R  R  R , R  R  R j" " " " Ud  d  d  d  d  d  d  d N	, , , , ,n , , ,^    6    = = = = =l = = =B     \   W W W W W W W Wt/ / / / /) / / />/ / / / /( / / />R R R R R# R R Rp         s   G 	G'&G'