
    `i                      U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZm Z  d dl!Z!d dl"Z"d dl#Z"d dl$m%Z% d d	l&m'Z( d d
l)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z1 d dl2m3Z3m4Z4m5Z5 d dl6m7Z7m8Z8 ddl9m:Z:m;Z; ddl<m=Z= ddl>m?Z?m@Z@ ddlAmBZB ddlCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZO ddlPmQZQmRZRmSZSmTZTmUZUmVZV ersd dlWmXZXmYZYmZZZ d dl#m[Z[ ddl\m]Z] ddl^m_Z_m`Z`maZambZb ddlcmdZd ddlemfZfmgZgmhZh ddlAmiZi ddljmkZk  e d           Zleeeg         geff         Zmenek         Zoeepe!jq        f         ZrepZse"jt        u                    evd!          Zw ejx        ev          Zyd4d&Zzej{         G d' d(                      Z| G d) d*ej                  Z} G d+ d,e          Z~ eId-.           G d/ d0e~                      Z G d1 d2          Zej{         G d3 d4                      Zej{         G d5 d6                      Zej{         G d7 d8                      Zej{         G d9 d:                      Zej{         G d; d<                      Zeeeeeef         Zi Zd=ed><    G d? d@          Zi ZdAedB<   i ZdCedD<   i ZdEedF<   	 	 	 	 d5d6dSZ G dT dUe          Zd7dXZd8d[Zd9d]Z	 d:d;daZd<dbZd=dcZej        d>dd            Zd?djZd@dlZdAdmZe"j        e"j        e"j        e"j        idn e"j        e"j        e"j        e"j        e"j        e"j        e"j        e"j        e"j        e"j        e"j        fD             Zdoedp<   dBdvZdCd}Z G d~ d          Z G d de1          Z0 G d d          Z ej        dej                  ZdDdZ G d de?eeRe                   Zej{         G d d                      Z edEi d ee%j        d d          d ee%j        d d d          d ee%j        d d d          d ee%j        d d d          d ee%j        d d d          d ee%j        d d d          d ee%j        d d d          d ee%j        d d d d          d ee%j        d d          d ee%j        d d          d ee%j        d d          d ee%j        d d          d ee%j        d d d d          d ee%j        d d d          d ee%j        d dÄ dĬ          d ee%j        dƄ dǬ          d ee%j        dɄ dʬ          d ee%j        d̄ d̈́ dά          d ee%j        dЄ dф dҬ          d ee%j        dԄ dլ          d ee%j        dׄ dج          d ee%j        dڄ d۬          d ee%j        d݄ dެ          d ee%j        d d߬          d ee%j        d d㬏          d ee%j        d d欏          d ee%j        d d鬏          d ee%j        d d쬏          d ee%j        d d﬏          d ee%j        d d򬏦          d ee%j        d d          d ee%j        d d          d ee%j        d d          d ee%j        d d          d ee%j        d  d          d ee%j        d d          d ee%j        d d          d ee%j        d	 d
          d ee%j        d d          d ee%j        d d          Zded<   dFdZ G d deE          Z G d deH          Z G d de          Zej{         G d d                      Z G d d          Z e            Z G d d           Z G d! d"          Z e d#ep$          Z e dzee%          Zer!ee"j        eTeeeed&f         f         f         Z G d' d(eeef                   Z G d) d*          Z G d+ d,eee                   Zej{         G d- d.                      Zej        dGd/            Z G d0 d1          Z G d2 d3e@          ZdS (H      )annotationsN)ABCabstractmethod)autoEnum)chain)	AnyCallablecastClassVarGeneric
NamedTupleOptionalTYPE_CHECKINGUnion)SelfTypeVar)ELEMENTWISE_TYPE_PROMOTION_KIND)_pytree)ConfigModule)
OrderedSet)int_oo)PythonPrinter)free_symbol_is_typesymbol_is_typeSymT)bound_sympyValueRanges   )configmetrics)DtypePropagationOpsHandler)BasicMathOpsMixinDefaultHandler)ShapePropagationOpsHandler)boolean_opsDeferredLineBasegenerate_assertget_current_backendIndentedBufferir_dataclass
ScopedDict	sympy_dotsympy_index_symbol
sympy_substriton_typeunique)ops
OpsHandlerOpsValueReductionType	StoreModeV)IteratorMutableMappingSequence)GraphModule)CustomGraphModulePass)BufferChoiceCallerFixedLayoutIRNodeLoopBody)BaseScheduling	SchedulerSchedulerNode)BlockShapeType   PythonWrapperCodegen_TschedulemsgstrreturnNonec                    t                               t          j                  rt                               d|            d S d S )NzData type propagation: %s)schedule_logisEnabledForloggingDEBUGdebug)rL   s    r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torch/_inductor/codegen/common.pydata_type_loggerrW   Z   sA      // =6<<<<<= =    c                  h    e Zd ZU dZded<   ded<   ddZedd
            ZddZedd            Z	dS )FileBackedGraphModulez
    Output of FX wrapper codegen. Exposes the same methods as ModuleType, but these
    map back to a GraphModule instead of Python source.
    r;   gmzCallable[..., Any]compiled_fnrN   rO   c                   t          j        ddd          | _         t          j        t          j        | j         j                   | j         5 }|                    | j                   d d d            d S # 1 swxY w Y   d S )Nzw+z.pyF)modesuffixdelete)	tempfileNamedTemporaryFileatexitregisterosremovenamewritevalue)selffs     rV   __post_init__z#FileBackedGraphModule.__post_init__i   s     !3eE
 
 
 		4=#5666] 	 aGGDJ	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 s   A55A9<A9rM   c                    | j         j        S N)ra   rg   rj   s    rV   __file__zFileBackedGraphModule.__file__s   s    }!!rX   args	list[Any]r	   c                     | j         | S rn   )r\   rj   rq   s     rV   callzFileBackedGraphModule.callw   s    t&&rX   c                    | j         j        S rn   )r[   codero   s    rV   ri   zFileBackedGraphModule.valuez   s    w|rX   NrN   rO   rN   rM   )rq   rr   rN   r	   )
__name__
__module____qualname____doc____annotations__rl   propertyrp   ru   ri    rX   rV   rZ   rZ   _   s          
 OOO####        " " " X"' ' ' '    X  rX   rZ   c                  J    e Zd ZdZdZdZedd            Zedd
            ZdS )WorkspaceZeroModer   rG   r   abrN   c                    | |k    s|t           j        k    r| S | t           j        k    r|S t          d| d|d          )NzWorkspaceZeroMode.combine(, ))r   UNINITIALIZEDNotImplementedErrorr   r   s     rV   combinezWorkspaceZeroMode.combine   sT    66Q+999H!///H!"Lq"L"La"L"L"LMMMrX   	zero_fillboolc                6    | rt           j        S t           j        S rn   )r   ZERO_ON_CALLr   )r   s    rV   	from_boolzWorkspaceZeroMode.from_bool   s     	2$11 ..rX   N)r   r   r   r   rN   r   )r   r   rN   r   )	rz   r{   r|   r   r   ZERO_PER_GRAPHstaticmethodr   r   r   rX   rV   r   r      se        MLNN N N \N / / / \/ / /rX   r   c                  B    e Zd ZdZedd            Zed	d            ZdS )
CodegenSymbolzP
    An IR object possibly corresponding to a variable in the wrapper code.
    rN   rM   c                    d S rn   r   ro   s    rV   get_namezCodegenSymbol.get_name       rX   !Union[torch.Tensor, sympy.Symbol]c                    d S rn   r   ro   s    rV   get_examplezCodegenSymbol.get_example   r   rX   Nry   rN   r   )rz   r{   r|   r}   r   r   r   r   rX   rV   r   r      s^             ^    ^  rX   r   T)frozenc                  <   e Zd ZU dZded<   ded<   ded<   ded	<   d
Zded<   ej        Zded<   e	d(d)d            Z
e	d*d            Ze	d+d            Ze	d+d            Zd,dZeZd-dZd.dZd/dZed/d            ZeZeZeZd0dZd1d!Zd1d"Zd2d#Zd3d$Zd4d&Zd'S )5WorkspaceArga2  A temporary buffer used for a single kernel, then discarded.

    Not registered as a traditional buffer since there are no users,
    so it would be dead code eliminated.

    Args:
        nbytes: The size of the buffer in bytes.
        zero_fill: Whether the buffer should be initialized to zero.

    
sympy.Exprcountr   	zero_modetorch.devicedevicerM   
outer_namews_ptr
inner_nametorch.dtypedtype
workspace_prefixrN   c                F    |  t          t          j        j                   S rn   )nextr7   graphworkspace_id)r   s    rV   unique_namezWorkspaceArg.unique_name   s     6$qw344666rX   r   r   r   c                b    | j         |j         k    o| j        |j        k    o| j        |j        k    S rn   )r   r   r   r   s     rV   can_joinzWorkspaceArg.can_join   s3     LAL(XQW-?XAHPQPXDX	
rX   c                    t          | j        |j        z   t                              | j        |j                  | j        | j        | j        | j                  S N)r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   r   s     rV   joinzWorkspaceArg.join   sP    'AG#'//Q[II'8||
 
 
 	
rX   c                8   | j         |j         k    r | j        |j        k    r| j        |j        k    sJ t          t	          j        | j        |j                  t                              | j	        |j	                  | j         | j        | j        | j
                  S r   )r   r   r   r   sympyMaxr   r   r   r   r   r   s     rV   maximumzWorkspaceArg.maximum   s     Gqw18qx#7#7ALAL<X<X<XX)AGQW--'//Q[II'8||
 
 
 	
rX   c                    | j         S rn   r   ro   s    rV   
get_devicezWorkspaceArg.get_device   s
    {rX   c                    | j         S rn   )r   ro   s    rV   	get_dtypezWorkspaceArg.get_dtype   s
    zrX   r   c                N    |                                                                  S rn   )
get_layoutr   ro   s    rV   r   zWorkspaceArg.get_example   s      ,,...rX   r?   c                N    ddl m}  || j        | j        | j        gdg          S )Nr   )r?   rG   )r   r   sizestride)irr?   r   r   r   )rj   r?   s     rV   r   zWorkspaceArg.get_layout   sC    $$$$$${;**3	
 
 
 	
rX   c                *    |                                  S rn   )r   ro   s    rV   layoutzWorkspaceArg.layout   s       rX   c                $    t           j        j        S rn   )r   SZeroro   s    rV   
get_offsetzWorkspaceArg.get_offset   s    w|rX   list[sympy.Expr]c                    | j         gS rn   )r   ro   s    rV   get_sizezWorkspaceArg.get_size   s    
|rX   c                &    t           j        j        gS rn   )r   r   Onero   s    rV   
get_stridezWorkspaceArg.get_stride   s    }rX   c                    | j         S rn   )r   ro   s    rV   r   zWorkspaceArg.get_name   s
    rX   c                    dS )NFr   ro   s    rV   get_is_pinnedzWorkspaceArg.get_is_pinned  s    urX   	list[str]c                    g S rn   r   ro   s    rV   get_inputs_that_alias_outputz)WorkspaceArg.get_inputs_that_alias_output  s    	rX   N)r   )r   rM   rN   rM   )r   r   r   r   rN   r   )r   r   r   r   rN   r   )rN   r   )rN   r   r   )rN   r?   )rN   r   )rN   r   ry   )rN   r   )rN   r   )rz   r{   r|   r}   r~   r   torchuint8r   r   r   r   r   r   r   get_device_or_errorr   r   r   r   r   get_output_specmaybe_get_output_specmaybe_get_layoutr   r   r   r   r   r   r   rX   rV   r   r      s        	 	     OOOJE$$$$7 7 7 7 \7 
 
 
 \

 
 
 
 \
 
 
 
 \
    %   / / / /
 
 
 
 ! ! ! X! !O&!                    rX   r   c                      e Zd Zd
dZddZd	S )TritonScratchWorkspacer   intgenerate_dtype_strCallable[..., str]c                "    || _         || _        d S rn   )r   _generate_dtype_str)rj   r   r   s      rV   __init__zTritonScratchWorkspace.__init__
  s    	#5   rX   rN   rM   c                *    |                                  S rn   )r   ro   s    rV   r   z)TritonScratchWorkspace.generate_dtype_str  s    '')))rX   N)r   r   r   r   ry   )rz   r{   r|   r   r   r   rX   rV   r   r   	  s<        6 6 6 6* * * * * *rX   r   c                  ^    e Zd ZU ded<   ded<   ded<   ej        j        Zded<   dZd	ed
<   dS )	TensorArgrM   rg   bufferr   r   r   offsetNOptional[str]alias_of)	rz   r{   r|   r~   r   r   r   r   r   r   rX   rV   r   r     sV         IIIKKKF%%%%"H""""""rX   r   c                  <    e Zd ZU ded<   ded<   ed	d            ZdS )
SizeArgrM   rg   r   exprrN   r   c                    d S rn   r   ro   s    rV   r   zSizeArg.alias_of   s    trX   NrN   r   )rz   r{   r|   r~   r   r   r   rX   rV   r   r     sI         III   X  rX   r   c                      e Zd ZU ded<   dS )ConstexprArgrM   rg   Nrz   r{   r|   r~   r   rX   rV   r   r   %  s         IIIIIrX   r   c                  8    e Zd ZU ded<   ded<   ded<   ded<   dS )	TMADescriptorArgrM   rg   api_typezOptional[list[sympy.Expr]]block_shapeOptional[torch.dtype]r   Nr   r   rX   rV   r   r   *  s<         IIIMMM++++      rX   r   c                  @    e Zd ZU ded<   ded<   dZded<   dZded<   dS )	DeviceCodegenSchedulingConstructor
schedulingWrapperConstructorwrapper_codegenNOptional[WrapperConstructor]cpp_wrapper_codegenfx_wrapper_codegen)rz   r{   r|   r~   r   r   r   rX   rV   r   r   2  sN         %%%%''''8<<<<<7;;;;;;;rX   r   zdict[str, DeviceCodegen]device_codegensc                      e Zd ZddZddZd dZdd	Zd d
Zd dZd dZ	d dZ
d dZd dZd dZd dZd dZd dZd dZd dZ	 d!d"dZdS )#DeviceOpOverridesrg   rM   rN   c                    t           rn   r   rj   rg   s     rV   import_get_raw_stream_asz*DeviceOpOverrides.import_get_raw_stream_as@      !!rX   
device_idxr   c                    t           rn   r  rj   r	  s     rV   
set_devicezDeviceOpOverrides.set_deviceC  r  rX   c                    t           rn   r  ro   s    rV   synchronizezDeviceOpOverrides.synchronizeF  r  rX   c                    t           rn   r  r  s     rV   device_guardzDeviceOpOverrides.device_guardI  r  rX   c                    t           rn   r  ro   s    rV   cpp_device_guardz"DeviceOpOverrides.cpp_device_guardL  r  rX   c                    t           rn   r  ro   s    rV   cpp_aoti_device_guardz'DeviceOpOverrides.cpp_aoti_device_guardO  r  rX   c                    t           rn   r  ro   s    rV   cpp_stream_guardz"DeviceOpOverrides.cpp_stream_guardR  r  rX   c                    t           rn   r  ro   s    rV   cpp_aoti_stream_guardz'DeviceOpOverrides.cpp_aoti_stream_guardU  r  rX   c                    t           rn   r  ro   s    rV   cpp_getStreamFromExternalz+DeviceOpOverrides.cpp_getStreamFromExternalX  r  rX   c                    t           rn   r  ro   s    rV   kernel_headerzDeviceOpOverrides.kernel_header[  r  rX   c                    t           rn   r  ro   s    rV   kernel_driverzDeviceOpOverrides.kernel_driver^  r  rX   c                    t           rn   r  ro   s    rV   cpp_stream_typez!DeviceOpOverrides.cpp_stream_typea  r  rX   c                    t           rn   r  ro   s    rV   aoti_get_streamz!DeviceOpOverrides.aoti_get_streamd  r  rX   c                    t           rn   r  ro   s    rV   cpp_kernel_typez!DeviceOpOverrides.cpp_kernel_typeg  r  rX   c                    t           rn   r  ro   s    rV   cpp_device_ptrz DeviceOpOverrides.cpp_device_ptrj  r  rX   c                    t           rn   r  ro   s    rV   tma_descriptor_helpersz(DeviceOpOverrides.tma_descriptor_helpersm  r  rX   Nidx	workspacer   r   r   Optional[tuple[list[str], str]]c                    t           rn   r  )rj   r)  r*  r   s       rV   cpp_scratchzDeviceOpOverrides.cpp_scratchp  s
     "!rX   rg   rM   rN   rM   )r	  r   rN   rM   ry   rn   )r)  r   r*  r   r   r   rN   r+  )rz   r{   r|   r  r  r  r  r  r  r  r  r  r  r  r   r"  r$  r&  r(  r-  r   rX   rV   r  r  ?  st       " " " "" " " "" " " "" " " "" " " "" " " "" " " "" " " "" " " "" " " "" " " "" " " "" " " "" " " "" " " "" " " " TX" " " " " " "rX   r  zdict[str, DeviceOpOverrides]device_op_overrides_dictz*dict[str, Optional[CustomGraphModulePass]]custom_backend_passesz!dict[str, Optional[ConfigModule]]custom_backend_codegen_configsr   device_schedulingr   device_wrapper_codegenr   device_cpp_wrapper_codegenr   device_fx_wrapper_codegendevice_custom_passOptional[CustomGraphModulePass]device_custom_configOptional[ConfigModule]c                    t          ||||          t          | <   |t          | <   |r3t          |t                    r	|t
          usJ d|dt
                      |t          | <   d S )Nzdevice_custom_config=z: cannot be the same as the default inductor config config=)r   r  r0  
isinstancer   r    r1  )r   r2  r3  r4  r5  r6  r8  s          rV   register_backend_for_devicer<    s     ,"!	 OF %7&! 
+\::	
$F222b#bbY_bb 323
 .B"6***rX   c                      e Zd Z e            Z e            Z e            Z e            Z e            Z e            Z	 e            Z
 e            Z e            Z e            ZdS )BackendFeatureN)rz   r{   r|   r   FOREACH	BUCKETIZEINPLACE_BUFFERSMASKED_SCATTER_WITH_INDEXSCANSORTTUPLE_REDUCTIONPREFER_STORE_LOOP_ORDERTRITON_TEMPLATESREDUCE_TO_SINGLE_ELEMENTr   rX   rV   r>  r>    s        dffGIdffO $466D466DdffO"dfftvv#tvvrX   r>  Union[torch.device, str, None]OrderedSet[BackendFeature]c                l   | t                      S t                       t          | t          j                  r| j        }nBt          | t                    sJ t          |                       | }t          j        |          } t          |          }|sJ  |d           }|                    |           S rn   )	r   init_backend_registrationr;  r   r   typerM   get_scheduling_for_deviceget_backend_features)r   device_typescheduling_ctorr   s       rV   rO  rO    s     ~||&%,'' +k&#&&44V44&k**/<<O? &&J**6222rX   featurer   c                R    t          |t                    sJ |t          |           v S )zSee also V.graph.has_feature)r;  r>  rO  )r   rR  s     rV   has_backend_featurerT    s.     g~.....*62222rX   Optional[SchedulingConstructor]c                <    | t           v rt           |          j        nd S rn   )r  r   r   s    rV   rN  rN    s     17?1J1J?6"--PTTrX   Fcpp_wrapper
fx_wrapperc                d    | t           v r&t           |          }|r|j        S |r|j        S |j        S d S rn   )r  r   r   r   )r   rW  rX  wrapper_codegen_objs       rV   get_wrapper_codegen_for_devicer[    sJ       -<V-D 	7&99 	7&::&664rX   c                2    | t           v rt           |          nd S rn   )r0  r   s    rV   "get_custom_backend_pass_for_devicer]    s    ,26K,K,K ((QUUrX   c                2    | t           v rt           |          nd S rn   )r1  r   s    rV   $get_custom_backend_config_for_devicer_    s%     333 	'v..rX   c                 j   ddl m}  ddlm} ddlm} ddlm} ddlm	} ddl
m} ddlm} dd	lm} dd
lm} ddlm}	 ddlm}
 ddlm} t1          d          /| ||	dt3          dfd|
t4          j        j        r|n||           t1          d          ||dt3          dfd|
||           t1          d          t3          d|	|
||           t1          d          t3          d||
||           t1          d          t3          d|	|||           t:          j                                        }|dk    rtt1          |          gddl m!} 	  |d          } |d          } |d          } |d          }|r|r|rt3          |||||           dS dS dS dS # tD          $ r Y dS w xY wdS dS )z
    Register the backend for different devices, including the scheduling
    for kernel code generation and the host side wrapper code generation.
    rG   )CppScheduling)CppWrapperCpu)CppWrapperCpuArrayRef)CppWrapperGpu)CppWrapperMps)CUDACombinedScheduling)HalideScheduling)MetalScheduling)PythonWrapperMtia)TritonSchedulingrH   )WrapperFxCodegencpuN)cpphalidetritonc                :     t           j                 |           S rn   )r    cpu_backend)r   cpu_backendss    rV   <lambda>z+init_backend_registration.<locals>.<lambda>  s    ?|F,>?
KK rX   cuda)ro  rn  c                :     t           j                 |           S rn   )r    cuda_backend)r   cuda_backendss    rV   rs  z+init_backend_registration.<locals>.<lambda>  s    A}V-@A*MM rX   xpumpsmtiaprivateuseoner   )_get_custom_mod_func
SchedulingrI   CppWrapperCodegenrk  )#rm  ra  cpp_wrapper_cpurb  cpp_wrapper_cpu_array_refrc  cpp_wrapper_gpurd  cpp_wrapper_mpsre  cuda_combined_schedulingrf  rn  rg  ry  rh  python_wrapper_mtiari  ro  rj  wrapperrI   wrapper_fxirrk  rN  r<  r    aot_inductorallow_stack_allocationr   _C_get_privateuse1_backend_name torch.utils.backend_registrationr|  RuntimeError)ra  rb  rc  rd  re  rf  rg  rh  ri  rj  rI   rk  private_backendr|  r2  r   r   r   rr  rw  s                     @@rV   rL  rL    sA    #"""""......@@@@@@............@@@@@@(((((($$$$$$666666((((((------...... ''/ &&
 

 	$KKKK "9!!	
 	
 	
 !((0 -&
 
 	$MMMM 	
 	
 	
 !''/# 	
 	
 	
 !''/# 	
 	
 	
 !((0#	
 	
 	
 h<<>>O?**%o66>IIIIII	 4 4\ B B223IJJO"6"67J"K"K!5!56H!I!I  _ 9L +#%#'&           	 	 	DD	% 	+*>>s   AF   
F.-F.indexSequence[sympy.Expr]
index_varssizesr   c                \    ddl m} g | t          ||                    |                    S )Nr   )FlexibleLayout)r   r  r-   contiguous_strides)r  r  r  r  s       rV   index_prevent_reorderingr  T  sA    
 $##### UUTIj.*K*KE*R*RSSTTrX   device_op_overridesc                    |t           | <   d S rn   )r/  )r   r  s     rV   register_device_op_overridesr  _  s     (;V$$$rX   c                    t          | t                    sJ t          |                       t          sddlm}m} ddlm} ddl	m} ddl
m} t          |          S )NrG   )cpu_device_op_overridesmps_device_op_overrides)r  )r;  rM   rM  r/   r  r  rt  r  rz  rx  )r   r  r  r  mtia_op_overridesxpu_op_overridess         rV   get_device_op_overridesr  e  s    fc""00DLL00"# AFFFFFFFF------BBBBBB@@@@@@#F++rX   c                    i | ]}||S r   r   ).0r   s     rV   
<dictcomp>r  t  s*        	u  rX   zdict[torch.dtype, torch.dtype]DTYPE_TO_COMPUTATION_DTYPEop_namerq   r	   kwargsr   c                   | t                      v rt          j        S | dv rd|v r|d         n|d         S | dv rt          j        S | dv rt          j        S | dk    rd|v r|d         n|d         S | dk    rd|v r|d         n|d         S | d	v r'|d         }t
          j                            |          S | d
k    rd|v r|d         n|d         S dS )zK
    Given op name and a list of input dtypes, deduce the output dtype
    )to_dtype
index_exprr   )randrandn)	get_index	randint64	load_seed	reductionrG   constant)loadstorestore_reductionto_dtype_bitcastN)r&   r   r   floatint64r7   r   r   )r  rq   r  buf_names       rV   deduce_output_dtype_by_namer    s1    +--z	  
 
 #*V"3"3vgbA	  
 
 {	  
 

 {	K		")V"3"3vga@	J		")V"3"3vgbA	  
 

 7w  ***	&	&	&")V"3"3vgbA4rX   r   r*   varCSEVariableTyper   r   c                    t                      }t          j        j        r1|dk    r+|                     d| dt          |           d           d S t          j        j        r|dk    rddlm}m	} t          ||          sJ t          |                      |t          j        k    r|j        rd| d	}n,d
| d| d}n"d| d}|j        rd| d}d| d||          d}|                     d| d           d S d S d S )Nro  ztl.static_assert(z
.dtype == r   rm  rG   )CppCSEVariableDTYPE_TO_CPPzIsVecMaskType<decltype(z	)>::valuezstd::is_same_v<decltype(z$), bool> || std::is_same_v<decltype(z), int>z	decltype(z	typename z::value_typezstd::is_same_v<r   >zstatic_assert(z);)r)   r    test_configsruntime_triton_dtype_assert	writeliner0   static_cpp_dtype_assert	cpp_utilsr  r  r;  rM  r   r   is_vec)r   r  r   backendr  r  
is_same_dt
c_var_types           rV   check_dtyper    sw    "##G6 :7h;N;NQSQQK<N<NQQQRRRRR			4 :E9I9I;;;;;;;;#~..99S		99.EJz nEsEEE

 nmmadmmm

+S+++Jz BAAAA
O:OOe9LOOOJ8*88899999!: :9I9IrX   c                  n    e Zd ZddZdd	ZddZddZddZddZe	dd            Z
e	dd            ZdS )DataTypePropagationbodyrB   rN   rO   c                    || _         d|j        j        i| _        |j                                        D ]\  }}|j        | j        |<   d S Nroot)r  
root_blockr   graphs	subblocksitems)rj   r  kvs       rV   r   zDataTypePropagation.__init__  sY    	DO)B
 N((** 	% 	%DAqWDKNN	% 	%rX   nodetorch.fx.Noder   c                    |j         }d |D             }t          |          dk    rd S t          d |D                       }|sd S t          j        t
          j        d |D                       S )Nc                f    g | ].}t          |t          j        j                  r|j        d k    ,|/S )placeholder)r;  r   fxNodeopr  ns     rV   
<listcomp>zCDataTypePropagation.deduce_node_dtype_by_inputs.<locals>.<listcomp>  sA     
 
 
Aux}!=!=
BC$-BWBWABWBWBWrX   r   c              3  x   K   | ]5}t           j        |j        v o|j        t           j                 j        d uV  6d S rn   )OptimizationContextkeymetar   r  s     rV   	<genexpr>zBDataTypePropagation.deduce_node_dtype_by_inputs.<locals>.<genexpr>  s^       )
 )
   #qv- B*./5TA)
 )
 )
 )
 )
 )
rX   c                F    g | ]}|j         t          j                 j        S r   )r  r  r  r   r  s     rV   r  zCDataTypePropagation.deduce_node_dtype_by_inputs.<locals>.<listcomp>  s&    HHHqQV'+,2HHHrX   )all_input_nodeslenall	functoolsreducer   promote_types)rj   r  inputsinput_nodesall_input_nodes_propagateds        rV   deduce_node_dtype_by_inputsz/DataTypePropagation.deduce_node_dtype_by_inputs  s    %
 

 
 
 {q  4%( )
 )
 !)
 )
 )
 &
 &
"
 * 	4HHKHHH
 
 	
rX   r   c                \    | j         |j                 }|                     |          }|sJ |S rn   )r  targetpropagate_graph)rj   r  	sub_graphr   s       rV   deduce_node_dtype_by_subgraphz1DataTypePropagation.deduce_node_dtype_by_subgraph  s1    K,	$$Y//urX   c                |   |j         dk    rd S |j        dk    rt          |j                  dk    rd S |j        t          j        k    rX|j        d         }t          |t          j        j	                  sJ t          |                      |                     |          S t          |j        t                    sJ t          |j                              |j                            d          r|                     |          S t          |j        g|j        R i |j        x}	 |S |                     |          S )Nr  outputrG   r   masked_subblock)r  r  r  rq   operatorgetitemr;  r   r  r  rM  deduce_node_dtyperM   
startswithr  r  r  r  )rj   r  node_argoutput_dtypes       rV   r  z%DataTypePropagation.deduce_node_dtype  sG   7m##4;(""s49~~':':4;(***y|Hh66FFXFF6))(333$+s++>>T$+->->>>+;!!"344 	<55d;;; 8   +  L
   //555rX   r   torch.fx.Graphc                "   |j         sJ d }|j         D ]{}t          j        |j        v r|j        t          j                 }nt                      }|                     |          |_        ||j        t          j        <   |j        dk    r|j        }||S )Nr  )nodesr  r  r  r  r   r  )rj   r   graph_dtyper  opt_ctxs        rV   r  z#DataTypePropagation.propagate_graph  s    {{-1 K 		, 		,D"&$)33)$7$;<-// 22488GM18DI)-.{h&&%mrX   c                B    |                      | j        d                   S r  )r  r  ro   s    rV   	propagatezDataTypePropagation.propagate  s    ##DK$7888rX   c                <     | |                                           S rn   )r  )clsr  s     rV   propagate_loopbodyz&DataTypePropagation.propagate_loopbody   s    s4yy""$$$rX   rE   c                   ddl m} ddlm} t	          ||          sJ t          |                      t	          |j        |          sJ t          |j                              t                              |j                  S )Nr   rA   )rE   )		loop_bodyrB   	schedulerrE   r;  rM  _bodyr  r  )r  r  rB   rE   s       rV   propagate_scheduler_nodez,DataTypePropagation.propagate_scheduler_node$  s    ((((((------$..::T

::.$*h//AAdj1A1AAA/"55djAAArX   N)r  rB   rN   rO   )r  r  rN   r   )r  r  rN   r   )r   r  rN   r   )rN   r   )r  rB   rN   r   )r  rE   rN   r   )rz   r{   r|   r   r  r  r  r  r  classmethodr  r  r   rX   rV   r  r    s        % % % %
 
 
 
*   6 6 6 6:   $9 9 9 9 % % % [% B B B [B B BrX   r  c                  6     e Zd Zdddd fd
Zdd fdZ xZS )r   T)simplifypr   r   r  r   r  rN   rM   c                   |rXt          |t          j                  r>t          t          j        d          r$t          j        j                            |          }t                      	                    |          S )Nsizevars)
r;  r   Exprhasattrr7   r   r  r  superdoprint)rj   r   r  r  	__class__s       rV   r  zPythonPrinter.doprint/  sa      	3
444 	3*9U9U 	37#,,T22Dwwt$$$rX   Fitemlevelr   strictc                    t          |t          j                  rd|                     |           dS t	                                          |||          S N(r   )r;  r   Mod_printr  parenthesize)rj   r  r  r  r  s       rV   r!  zPythonPrinter.parenthesize7  sR    dEI&& 	= ,t{{4((++++77''eV<<<rX   )r   r   r  r   r  r   rN   rM   )F)r  r   r  r   r  r   rN   rM   )rz   r{   r|   r  r!  __classcell__r  s   @rV   r   r   .  so        48D% % % % % % % %= = = = = = = = = = =rX   r   c                     e Zd ZdZedd            Zed d            Zed d            Zed d	            Zed d
            Z	ed d            Z
ed d            Zed d            Zed d            Zed d            Zed d            Zed d            Zed!d            Zed"d            Zed"d            Zed"d            Zed#d            Zed"d            ZdS )$OpDecompositionsz!
    Decomposes inductor ops
    ri   OpVarTrN   c                    | S rn   r   )ri   s    rV   identityzOpDecompositions.identityE  s	     rX   xc                f    t          j        t          j        dt          j                  |           S NrG   )r2   truedivr  r   int32r)  s    rV   
reciprocalzOpDecompositions.reciprocalJ  s"    {3<5;77;;;rX   c                ,    t          j        | |           S rn   )r2   mulr.  s    rV   squarezOpDecompositions.squareN  s    wq!}}rX   c                    t          j        t          j        dt          j                  t          j        |                     S r+  )r2   subr  r   float32erfr.  s    rV   erfczOpDecompositions.erfcR  s*    ws|Au}55swqzzBBBrX   c                    t          j        t          j        t          j        |                     t          j        |                     S rn   )r2   r1  expr2  r7  r.  s    rV   erfcxzOpDecompositions.erfcxV  s.    wswsz!}}--sx{{;;;rX   c                    t          j        t          j        |           t          j        dt          j                            S r+  )r2   r4  r9  r  r   r5  r.  s    rV   expm1zOpDecompositions.expm1Z  s*    wswqzz3<5=#A#ABBBrX   c           	         t          j        t          j        |           t          j        dt	          j        d          z  t
          j                            S )NrG   
   r2   r1  logr  mathr   r5  r.  s    rV   log10zOpDecompositions.log10^  s7    wswqzz3<DHRLL0@%-#P#PQQQrX   c           	         t          j        t          j        |           t          j        dt	          j        d          z  t
          j                            S )NrG   r   r?  r.  s    rV   log2zOpDecompositions.log2b  s6    wswqzz3<DHQKK#O#OPPPrX   c           
         t          j        t          j        | t          j        t	          j        d          t          j                                      S )Nr   )r2   r9  r1  r  rA  r@  r   r5  r.  s    rV   exp2zOpDecompositions.exp2f  s4    wswq#,tx{{EM"J"JKKLLLrX   c           	         t          j        t          j        | t          j        dt          j                                      S r+  )r2   r@  addr  r   r-  r.  s    rV   log1pzOpDecompositions.log1pj  s,    wswq#,q%+">">??@@@rX   c                    t          j        dt          j                  }t          j        |t          j        |t          j        t          j        |                                         S r+  )r2   r  r   r-  r,  rH  r9  neg)r)  ones     rV   sigmoidzOpDecompositions.sigmoidn  sE    l1ek**{3SWSWQZZ-@-@ A ABBBrX   c                f    t          j        | t          j        dt          j                            S Nr   )r2   r   r  r   r-  r.  s    rV   reluzOpDecompositions.relus  s"    {1cl1ek::;;;rX   yzc                R    t          j        t          j        | |          |          S rn   )r2   rH  r1  r)  rQ  rR  s      rV   fmazOpDecompositions.fmaw  s      wswq!}}a(((rX   r   r   r   c                P    t          j        t          j        |           |          S rn   )r2   r  floorr   r   s     rV   floor_to_intzOpDecompositions.floor_to_int|      |CIaLL%000rX   c                P    t          j        t          j        |           |          S rn   )r2   r  ceilrX  s     rV   ceil_to_intzOpDecompositions.ceil_to_int  s    |CHQKK///rX   c                P    t          j        t          j        |           |          S rn   )r2   r  truncrX  s     rV   trunc_to_intzOpDecompositions.trunc_to_int  rZ  rX   r   c           	     v   t          j        | |          }t          j        t          j        |t          j        dt
          j                            t          j        t          j        |          t          j        |                              }t          j        |t          j	        ||          |          S rO  )
r2   modand_ner  r   r-  signbitwhererH  )r   r   rconds       rV   	remainderzOpDecompositions.remainder  s~    GAqMMxF1cl1ek2233F3;q>>3;q>>22
 
 yswq!}}a000rX   c                P    t          j        t          j        |           |          S rn   )r2   r  roundrX  s     rV   round_to_intzOpDecompositions.round_to_int  rZ  rX   N)ri   r&  rN   r&  r)  r&  rN   r&  )r)  r&  rQ  r&  rR  r&  rN   r&  )r   r&  r   r   rN   r&  r   r&  r   r&  rN   r&  )rz   r{   r|   r}   r   r(  r/  r2  r7  r:  r<  rB  rD  rF  rI  rM  rP  rU  rY  r]  r`  ri  rl  r   rX   rV   r%  r%  @  sz            \ < < < \<    \ C C C \C < < < \< C C C \C R R R \R Q Q Q \Q M M M \M A A A \A C C C \C < < < \< ) ) ) \) 1 1 1 \1 0 0 0 \0 1 1 1 \1 1 1 1 \1 1 1 1 \1 1 1rX   r%  z[a-z0-9_.]+|\([^)]*\)|)flagsstringc                   | d         dk    st          |           dk     rdS d}t          | dd                    D ];\  }}|dk    r|dz  }n|dk    r|dz  }|dk    r|t          |           dz
  k    r dS <|dk    sJ dS )Nr   r  r   FrG   r   T)r  	enumerate)rp  r   ichars       rV   _all_in_parensru    s    ayC3v;;??uEVABBZ((  43;;QJEES[[QJEA::!s6{{Q..55A::::4rX   c                     e Zd Zed\d            Zed]d	            Zed^d            Zed_d            Zed`d            Zed`d            Z	ed`d            Z
ed`d            Zed`d            Zedad            Zedbd            Z	 	 dcddd"Zded(Zdfd*Z	 dgdhd.Zdid/Zdjd4Zdkd;Zdld>Z	 	 dmdndHZdodIZd+ej        ddJdKdpdSZdqdUZdrdVZedsdX            ZedtdY            Z edud[            Z!d+S )vOpOverridesrp  r&  rN   c                    t          | t                    s)t                              |           st	          |           r| S d|  dS r  )r;  CSEVariable_RE_PAREN_NOT_NEEDED	fullmatchru  )rp  s    rV   parenzOpOverrides.paren  sS     v{++	#--f55	 f%%	 M6}}}rX   ri   Union[bool, float, int]r   r   c                     t          |           S rn   )repr)ri   r   s     rV   r  zOpOverrides.constant  s    E{{rX   r)  c                <    dt                               |            S )N~rw  r|  r.  s    rV   bitwise_notzOpOverrides.bitwise_not  s    );$$Q'')))rX   r   c                <    t                               |            dS )Nz == 0r  )r   s    rV   logical_notzOpOverrides.logical_not  s    ##A&&----rX   rQ  c                p    t                               |            dt                               |           S )Nz & r  r)  rQ  s     rV   bitwise_andzOpOverrides.bitwise_and  2    ##A&&AA;+<+<Q+?+?AAArX   c                p    t                               |            dt                               |           S )Nz | r  r  s     rV   
bitwise_orzOpOverrides.bitwise_or  r  rX   c                p    t                               |            dt                               |           S )Nz ^ r  r  s     rV   bitwise_xorzOpOverrides.bitwise_xor  r  rX   c                p    t                               |            dt                               |           S )Nz << r  r  s     rV   bitwise_left_shiftzOpOverrides.bitwise_left_shift  2    ##A&&BBK,=,=a,@,@BBBrX   c                p    t                               |            dt                               |           S )Nz >> r  r  s     rV   bitwise_right_shiftzOpOverrides.bitwise_right_shift  r  rX   r   c                ,    t          j        | |          S rn   )r2   r,  r   s     rV   int_truedivzOpOverrides.int_truediv  s    
 {1a   rX   rg   rM   r   c                P    t          j        | t          j        |                    S rn   )r2   r  r   Integer)rg   r   s     rV   r  zOpOverrides.load_seed  s    xemF33444rX   Tr  r   Union[sympy.Expr, int]checkr   wrap_negsympy.Symbolc                :    t          t          |                    S rn   )r.   rM   )rj   r  r   r  r  s        rV   indirect_indexingzOpOverrides.indirect_indexing  s     "#c((+++rX   r   r   lowerupperrO   c                J    t          t          |           j         d          )Nz,: check_bounds should be handled by CSEProxyr   rM  rz   rj   r   r   r  r  s        rV   check_boundszOpOverrides.check_bounds  s+     "Dzz"PPP
 
 	
rX   r  c                J    t          t          |           j         d          )Nz$: load should be handled by CSEProxyr  rj   rg   r  s      rV   r  zOpOverrides.load  s)    !Dzz"HHH
 
 	
rX   Nr^   r6   c                J    t          t          |           j         d          )Nz%: store should be handled by CSEProxyr  rj   rg   r  ri   r^   s        rV   r  zOpOverrides.store  s+     "Dzz"III
 
 	
rX   c                J    t          t          |           j         d          )Nz/: store_reduction should be handled by CSEProxyr  rj   rg   r  ri   s       rV   r  zOpOverrides.store_reduction  s)    !Dzz"SSS
 
 	
rX   	src_dtypereduction_typer5   !Union[OpVarT, tuple[OpVarT, ...]]c                J    t          t          |           j         d          )Nz): reduction should be handled by CSEProxyr  rj   r   r  r  ri   s        rV   r  zOpOverrides.reduction   s+     "Dzz"MMM
 
 	
rX   dtypestuple[torch.dtype, ...]
combine_fnFCallable[[tuple[OpVarT, ...], tuple[OpVarT, ...]], tuple[OpVarT, ...]]valuestuple[OpVarT, ...]c                J    t          t          |           j         d          )Nz$: scan should be handled by CSEProxyr  rj   r  r  r  s       rV   scanzOpOverrides.scan  s+     "Dzz"HHH
 
 	
rX   stable
descendingc                J    t          t          |           j         d          )Nz$: sort should be handled by CSEProxyr  rj   r  r  r  r  s        rV   sortzOpOverrides.sort  s+     "Dzz"HHH
 
 	
rX   
boundaries.tuple[str, sympy.Expr, sympy.Expr, sympy.Expr]boundary_indicesindexing_dtyperightsorter Optional[tuple[str, sympy.Expr]]sorter_indicesOptional[OpVarT]c                J    t          t          |           j         d          )Nz): bucketize should be handled by CSEProxyr  rj   r  r  r  r  r  r  r  s           rV   	bucketizezOpOverrides.bucketize#  s+     "Dzz"MMM
 
 	
rX   c                J    t          t          |           j         d          )Nz2: halide_clamp only implemented for Halide backendr  )rj   ri   r   r  s       rV   halide_clampzOpOverrides.halide_clamp1  s)    !Dzz"VVV
 
 	
rX   rG   )constraintsr   is_purepackr  asmr  r   r  r  r   c               J    t          t          |           j         d          )Nz<: inline_asm_elementwise only implemented for Triton backendr  )rj   r  r  r   r  r  r  s          rV   inline_asm_elementwisez"OpOverrides.inline_asm_elementwise6  s+     "Dzz"```
 
 	
rX   rq   c                J    t          t          |           j         d          )Nz.: ops.output should not appear at codegen timeAssertionErrorrM  rz   rt   s     rV   r  zOpOverrides.outputC  s)    Dzz"RRR
 
 	
rX   c                J    t          t          |           j         d          )Nz3: ops.placeholder should not appear at codegen timer  rj   r  s     rV   r  zOpOverrides.placeholderH  s)    Dzz"WWW
 
 	
rX   Callable[..., OpVarT]c                0     d
 fd} |_         d	|_        |S )Nrj   rw  rq   r	   r  rN   r&  c                P    t          t          |           j         d           )Nz does not implement ops.r  )rj   rq   r  rg   s      rV   unimplementedz1OpOverrides._unimplemented.<locals>.unimplementedO  s/    %::&FFFF  rX   T)rj   rw  rq   r	   r  r	   rN   r&  )rz   is_unimplemented)rg   r  s   ` rV   _unimplementedzOpOverrides._unimplementedM  s9    	 	 	 	 	 	
 "&)-&rX   c                    t          | |d           }t          t          |d           }| p||k    pt          |dd          S )Nr  F)getattrr3   )r  rg   fn
default_fns       rV   _is_unimplementedzOpOverrides._is_unimplementedX  sH    S$%%Zt44
vSz)SWR9KU-S-SSrX   r  c                ~   |dv s
J |            t                                           D ]\  }}t          ||          }|:|                     |          r$t	          | ||                     |                     Q|| j        vsJ d| d| j                     ||_        t	          | |t          |                     d S )N)ro  rm  cppvecrn  ry  zmultiple definitions of z on )	pointwise_overrides_datar  r  r  setattrr  __dict__rz   r   )r  r  funcnamedataimpls        rV   _initialize_pointwise_overridesz+OpOverrides._initialize_pointwise_overrides^  s    EEEEvEEE6<<>> 
	; 
	;NHd4((D|((22 IC3+=+=h+G+GHHHs|333KxKKS\KK 433 !)X|D'9'9::::
	; 
	;rX   )rp  r&  rN   r&  )ri   r}  r   r   rN   r&  rm  )r   r&  rN   r&  )r)  r&  rQ  r&  rN   r&  rn  )rg   rM   r   r&  rN   r&  TT)
r  r&  r   r  r  r   r  r   rN   r  
r   r   r   r   r  r   r  r   rN   rO   )rg   rM   r  r   rN   r&  rn   )
rg   rM   r  r   ri   r&  r^   r6   rN   rO   )rg   rM   r  r   ri   r&  rN   rO   )
r   r   r  r   r  r5   ri   r  rN   r  )r  r  r  r  r  r  rN   r  )
r  r  r  r  r  r   r  r   rN   r  NN)r  r&  r  r  r  r&  r  r   r  r   r  r  r  r  rN   r&  )ri   r&  r   r   r  r   rN   r&  )r  r&  r  rM   r  r   r   r   r  r   r  r   rN   r&  )rq   r&  rN   rO   )r  r   rN   r&  )rg   rM   rN   r  rg   rM   rN   r   )r  rM   rN   rO   )"rz   r{   r|   r   r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r5  r  r  r  r  r  r  r  r   rX   rV   rw  rw    s.          \    \ * * * \* . . . \. B B B \B B B B \B B B B \B C C C \C C C C \C ! ! ! \! 5 5 5 \5 , , , , ,
 
 
 

 
 
 
 NR
 
 
 
 

 
 
 

	
 	
 	
 	

 
 
 
	
 	
 	
 	
$ 48+/
 
 
 
 

 
 
 
 &*"]
 
 
 
 
 

 
 
 


 
 
 

    \ T T T [T
 ; ; ; [; ; ;rX   rw  c                  t    e Zd ZU ded<   ded<   dZded<   dZded<   ej        Zd	ed
<   dZ	ded<   dZ
ded<   dS )OverridesDatarM   rg   r   rm  NzOptional[Callable[..., str]]ro  r  r   type_promotion_kindrn  ry  )rz   r{   r|   r~   ro  r  r   DEFAULTr  rn  ry  r   rX   rV   r  r  o  s         III+/F////+/F////'/      ,0F////(,C,,,,,,rX   r  airy_aic                    d|  dS )Nzairy_ai_forward(r   r   r.  s    rV   rs  rs    s    ---- rX   special_airy_ai)r  rm  rg   	bessel_j0c                    d|  dS )Nzbessel_j0_forward(r   r   r.  s    rV   rs  rs        /1/// rX   c                    d|  dS )Nzlibdevice.j0(r   r   r.  s    rV   rs  rs        ---- rX   special_bessel_j0)r  rm  ro  rg   	bessel_j1c                    d|  dS )Nzbessel_j1_forward(r   r   r.  s    rV   rs  rs    r  rX   c                    d|  dS )Nzlibdevice.j1(r   r   r.  s    rV   rs  rs    r  rX   special_bessel_j1	bessel_y0c                    d|  dS )Nzbessel_y0_forward(r   r   r.  s    rV   rs  rs    r  rX   c                    d|  dS )Nzlibdevice.y0(r   r   r.  s    rV   rs  rs    r  rX   special_bessel_y0	bessel_y1c                    d|  dS )Nzbessel_y1_forward(r   r   r.  s    rV   rs  rs    r  rX   c                    d|  dS )Nzlibdevice.y1(r   r   r.  s    rV   rs  rs    r  rX   special_bessel_y1digammac                    d|  dS )Nzcalc_digamma(r   r   r.  s    rV   rs  rs    s    *a*** rX   c                    |  dS )Nz
.digamma()r   r.  s    rV   rs  rs    s    A))) rX   )r  rm  r  rg   r:  c                    d|  dS )Nzcalc_erfcx(r   r   r.  s    rV   rs  rs        (A((( rX   c                    d|  dS )Nzlibdevice.erfcx(r   r   r.  s    rV   rs  rs    s    0A000 rX   special_erfcxrU  c                    d|  d| d| dS )Nz	std::fma(r   r   r   rT  s      rV   rs  rs    s"    666Q66!666 rX   c                    d|  d| d| dS )Nzfmadd(r   r   r   rT  s      rV   rs  rs    s"    666Q66!666 rX   c                    d|  d| d| dS )Nzlibdevice.fma(r   r   r   rT  s      rV   rs  rs    s"    >>>Q>>!>>> rX   )r  rm  r  ro  rg   igammac                    d|  d| dS Nzcalc_igamma(r   r   r   r  s     rV   rs  rs        111Q111 rX   igammacc                    d|  d| dS Nzcalc_igammac(r   r   r   r  s     rV   rs  rs        222a222 rX   gammaincc                    d|  d| dS r  r   r  s     rV   rs  rs    r  rX   special_gammainc	gammainccc                    d|  d| dS r  r   r  s     rV   rs  rs    r  rX   special_gammaincci0c                    d|  dS )Nzcalc_i0(r   r   r.  s    rV   rs  rs        oooo rX   c                    d|  dS Nzlibdevice.cyl_bessel_i0(r   r   r.  s    rV   rs  rs        8A888 rX   c                    |  dS )Nz.i0()r   r.  s    rV   rs  rs    s    A rX   )r  rm  ro  r  rg   i0ec                    d|  dS )Nz	calc_i0e(r   r   r.  s    rV   rs  rs        &!&&& rX   c                    |  dS )Nz.i0e()r   r.  s    rV   rs  rs    s    A rX   special_i0ei1c                    d|  dS )Nzcalc_i1(r   r   r.  s    rV   rs  rs    r   rX   c                    d|  dS Nzlibdevice.cyl_bessel_i1(r   r   r.  s    rV   rs  rs    r#  rX   
special_i1i1ec                    d|  dS )Nz	calc_i1e(r   r   r.  s    rV   rs  rs    r'  rX   special_i1elog_ndtrc                    d|  dS )Nzcalc_log_ndtr(r   r   r.  s    rV   rs  rs    s    +q+++ rX   special_log_ndtrmodified_bessel_i0c                    d|  dS )Nzmodified_bessel_i0_forward(r   r   r.  s    rV   rs  rs        8A888 rX   c                    d|  dS r"  r   r.  s    rV   rs  rs    r#  rX   special_modified_bessel_i0modified_bessel_i1c                    d|  dS )Nzmodified_bessel_i1_forward(r   r   r.  s    rV   rs  rs    r7  rX   c                    d|  dS r-  r   r.  s    rV   rs  rs    r#  rX   special_modified_bessel_i1modified_bessel_k0c                    d|  dS )Nzmodified_bessel_k0_forward(r   r   r.  s    rV   rs  rs    r7  rX   special_modified_bessel_k0modified_bessel_k1c                    d|  dS )Nzmodified_bessel_k1_forward(r   r   r.  s    rV   rs  rs    r7  rX   special_modified_bessel_k1ndtrc                    d|  dS )Nz
calc_ndtr(r   r   r.  s    rV   rs  rs    s    '1''' rX   special_ndtrndtric                    d|  dS )Nzcalc_ndtri(r   r   r.  s    rV   rs  rs    r
  rX   special_ndtri	polygammac                *    |  d| d|  d| d| d|  dS )Nz == 0 ? calc_digamma(z) : (z == 1 ? trigamma(z) : calc_polygamma(r   z))r   r  s     rV   rs  rs  	  s=    bbabbabb!bbXYbb]^bbb rX   scaled_modified_bessel_k0c                    d|  dS )Nz"scaled_modified_bessel_k0_forward(r   r   r.  s    rV   rs  rs        ?1??? rX   !special_scaled_modified_bessel_k0scaled_modified_bessel_k1c                    d|  dS )Nz"scaled_modified_bessel_k1_forward(r   r   r.  s    rV   rs  rs    rN  rX   !special_scaled_modified_bessel_k1spherical_bessel_j0c                    d|  dS )Nzspherical_bessel_j0_forward(r   r   r.  s    rV   rs  rs    s    9Q999 rX   special_spherical_bessel_j0zetac                    d|  d| dS )Nzzeta(r   r   r   r  s     rV   rs  rs  !  s    ***a*** rX   special_zetachebyshev_polynomial_tc                    d|  d| dS )Nzchebyshev_polynomial_t_forward(r   r   r   r  s     rV   rs  rs  &      D1DDDDD rX   special_chebyshev_polynomial_tchebyshev_polynomial_uc                    d|  d| dS )Nzchebyshev_polynomial_u_forward(r   r   r   r  s     rV   rs  rs  +  r[  rX   special_chebyshev_polynomial_uchebyshev_polynomial_vc                    d|  d| dS )Nzchebyshev_polynomial_v_forward(r   r   r   r  s     rV   rs  rs  0  r[  rX   special_chebyshev_polynomial_vchebyshev_polynomial_wc                    d|  d| dS )Nzchebyshev_polynomial_w_forward(r   r   r   r  s     rV   rs  rs  5  r[  rX   special_chebyshev_polynomial_wlegendre_polynomial_pc                    d|  d| dS )Nzlegendre_polynomial_p_forward(r   r   r   r  s     rV   rs  rs  :      C!CCqCCC rX   special_legendre_polynomial_pshifted_chebyshev_polynomial_tc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_t_forward(r   r   r   r  s     rV   rs  rs  ?      L1LLLLL rX   &special_shifted_chebyshev_polynomial_tshifted_chebyshev_polynomial_uc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_u_forward(r   r   r   r  s     rV   rs  rs  D  rl  rX   &special_shifted_chebyshev_polynomial_ushifted_chebyshev_polynomial_vc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_v_forward(r   r   r   r  s     rV   rs  rs  I  rl  rX   &special_shifted_chebyshev_polynomial_vshifted_chebyshev_polynomial_wc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_w_forward(r   r   r   r  s     rV   rs  rs  N  rl  rX   &special_shifted_chebyshev_polynomial_whermite_polynomial_hc                    d|  d| dS )Nzhermite_polynomial_h_forward(r   r   r   r  s     rV   rs  rs  S  s    BBBaBBB rX   special_hermite_polynomial_hhermite_polynomial_hec                    d|  d| dS )Nzhermite_polynomial_he_forward(r   r   r   r  s     rV   rs  rs  X  rh  rX   special_hermite_polynomial_helaguerre_polynomial_lc                    d|  d| dS )Nzlaguerre_polynomial_l_forward(r   r   r   r  s     rV   rs  rs  ]  rh  rX   special_laguerre_polynomial_lzdict[str, OverridesData]r  rg   c                     t           fdt          j        j        t          j        j        t          j        j        t          j        j        fD                       S )Nc              3      K   | ]}|v V  	d S rn   r   )r  r)  rg   s     rV   r  z$is_buffer_removed.<locals>.<genexpr>d  s;         		     rX   )anyr7   r   removed_bufferskernelinplaced_to_removerg   s   `rV   is_buffer_removedr  c  s_         G#H$G&H'	
     rX   c                  4     e Zd ZdZd
 fdZddZdd	Z xZS )DeferredLinezHA line that can be 'unwritten' by adding name to V.graph.removed_buffersrg   rM   linec                    t                                          |           || _        t          |t                    rJ d S rn   )r  r   rg   r;  r'   )rj   rg   r  r  s      rV   r   zDeferredLine.__init__r  s?    	d$45555555rX   rN   r   c                <    t          | j                  s| j        S d S rn   )r  rg   r  ro   s    rV   __call__zDeferredLine.__call__w  s!     ++ 	9trX   c                ,    t          | j        |          S rn   )r  rg   )rj   r  s     rV   	_new_linezDeferredLine._new_line|  s    DIt,,,rX   )rg   rM   r  rM   r   )r  rM   rN   r  )rz   r{   r|   r}   r   r  r  r"  r#  s   @rV   r  r  o  sk        RR6 6 6 6 6 6
   
- - - - - - - -rX   r  c                      e Zd Zdd	dZdS )
BracesBufferrG   r   r   rN   'contextlib.AbstractContextManager[None]c                L     t           j        d fd            } |            S )NrN   Iterator[None]c               3    K   t                    D ]'}                     d           xj        dz  c_        (t                     D ]'} xj        dz  c_                            d           (d V  t                     D ]'}                     d           xj        dz  c_        (t                    D ]'} xj        dz  c_                            d           (d S )N{rG   })ranger  _indent)_r   rj   s    rV   ctxz BracesBuffer.indent.<locals>.ctx  s     6]] " "s###!F7^^ $ $!s####EEEF7^^ " "s###!6]] $ $!s####$ $rX   )rN   r  )
contextlibcontextmanager)rj   r   r  s   `` rV   indentzBracesBuffer.indent  sB    		"	$ 	$ 	$ 	$ 	$ 	$ 
#	"	$ suurX   N)rG   )r   r   rN   r  )rz   r{   r|   r  r   rX   rV   r  r    s-              rX   r  c                  $    e Zd ZU ded<   ded<   dS )InplacedBufferrM   r   r   other_namesNr   r   rX   rV   r  r    s'         OOOrX   r  c                  0    e Zd ZU ded<   dZded<   d	dZdS )
ArgNamerM   rg   Fr   is_constexprrN   c                *    | j          | j        rdnd S )Nz : tl.constexprr  )rg   r  ro   s    rV   	full_namezArgName.full_name  s#    )M$2CK..MMMrX   Nry   )rz   r{   r|   r~   r  r  r   rX   rV   r  r    sJ         IIILN N N N N NrX   r  c                      e Zd ZddZdS )
RemovedArgrN   rM   c                    dS )NREMOVEDr   ro   s    rV   __str__zRemovedArg.__str__  s    yrX   Nry   )rz   r{   r|   r  r   rX   rV   r  r    s(             rX   r  c                      e Zd Zed7d            Zd8d
Zd9dZed:d            Zd;dZd;dZ	d<dZ
d=dZd>dZd?dZd@dZdAd!ZdBd#ZdCd'ZdDd*Z	 dEdFd/ZdGd1ZdHd3ZdId4ZdJd6Zd+S )K
KernelArgsr   rM   odict6Union[dict[_T, Union[str, RemovedArg]], dict[_T, str]]rg   rJ   rN   c                    |                     |t                    }t          |t                    r|  t	          |           x||<   }|S |S rn   )getr  r;  r  r  )r   r  rg   result
new_results        rV   _lookupzKernelArgs._lookup  sS     */4)A)Afj)) 	*0'>#e**'>'>>E$K*rX   rO   c                L    i | _         i | _        i | _        i | _        g | _        d S rn   )input_buffersoutput_buffersinplace_buffersr  workspace_argsro   s    rV   r   zKernelArgs.__init__  s/    -/ACMO/124rX   c                    d                     d                    t          t          | j        | j        | j        | j        g                              S )NzKernelArgs({})r   )formatr   mapr  r  r  r  r  ro   s    rV   __repr__zKernelArgs.__repr__  sW    &&II*+,	 
 

 
 	
rX   r	   r   c                ,    t          | t                    S rn   r;  r  r  s    rV   _buffer_is_marked_removedz$KernelArgs._buffer_is_marked_removed  s     $
+++rX   c                   t           j        j        r*t           j        j        j                            ||          }|t           j        j        vs
J |            || j        v r t          t          | j        |                   S || j	        v r%t          t          | j	        |                   j        S |                    d          r|                     d| j        |          S |                     d| j        |          S )Nseedin_ptr)r7   r   r  mutation_real_namer  r  r  r   rM   r  r  r   r  r  r  r  s     rV   inputzKernelArgs.input  s    7 	H7$7;;D$GGD172222D2224&&&T067774'''(<T(BCCNN??6"" 	B<<(:DAAA||Hd&8$???rX   c                F   t           j        j        r*t           j        j        j                            ||          }|t           j        j        vs
J |            || j        v r%t          t          | j        |                   j	        S | 
                    d| j        |          S )Nout_ptr)r7   r   r  r  r  r  r  r   r  r   r  r  r  s     rV   r  zKernelArgs.output  s    7 	H7$7;;D$GGD172222D2224'''(<T(BCCNN||It':DAAArX   
input_nameoutput_namec                v   |t           j        j        v r$t           j        j                            |           || j        vs
J |            || j        v rJ| j        |         }t          |t                    rJ |j                            |           || j        |<   d S d | j        	                                D             }d | j        	                                D             }t          t          |                    t          |          z   }t          d| ||g          }|| j        |<   || j        |<   d S )Nc                <    g | ]}t          |t                    |S r   r  r  vals     rV   r  z+KernelArgs.make_inplace.<locals>.<listcomp>  s8       !#z22  rX   c                <    g | ]}t          |t                    |S r   r  r  s     rV   r  z+KernelArgs.make_inplace.<locals>.<listcomp>  s8       c:..  rX   
in_out_ptr)r7   r   unaligned_buffersrH  r  r;  r  r  appendr  r  r1   r  )rj   r  r  bufalive_buffersr  inplace_buffer_idxs          rV   make_inplacezKernelArgs.make_inplace  se   222G%))+666$"6666666---&z2C!#z22222O"";///03D --- /6688  M
 /6688  O
 "%VM%:%:!;!;c/>R>R!R 1/11[) C 03D ,03D ---rX   nbytesr   r   tuple[str, int]c                ,   t          |t                              |          t          j                                        t                                                     }t          | j                  D ]\  }}t           	                    ||          r5|j
        }t                               ||          | j        |<   |j        |fc S |j        |j        k    r|j        |j        k    s
J |            | j                            |           |j        dfS )a  
        Allocate or extend a workspace buffer of nbytes bytes.

        This function manages the allocation of a workspace buffer. It either creates
        a new WorkspaceArg or extends an existing one.

        Note:
        - Calling this function will in-place mutate the args by adding or updating
        a WorkspaceArg.
        - The codegen for generating the Python argdefs and call_defs will check
        this field and allocate the buffer accordingly.
        - A new argument "ws_ptr" will be present in the generated code.

        Args:
            nbytes (sympy.Expr): The number of bytes to allocate.
            zero_fill (bool): Whether to initialize the buffer to zero.

        Returns:
            Tuple[str, int]: A tuple containing:
                - "ws_ptr": A string identifier for the workspace pointer.
                - offset: An integer representing the byte offset in the workspace.
        )r   r   r   r   r   )r   r   r   r7   r   get_current_device_or_throwr   rr  r  r   r   r   r   r   r  )rj   r  r   argrs  existing_argr   s          rV   r*  zKernelArgs.workspace  s   . '11)<<76688#//11	
 
 
  ))<== 	 	OA|$$\377 7%+)5):):<)M)M#A&#.6666'3>99 +s~=== >=>=""3'''~q  rX   min_sizec           
     J   t           j                                        }t          |t          j        t          j        dd|j         d|j	         |          }| j
        D ]$}|j        |j        k    r||k    sJ ||f            %| j
                            |           |j        S )a  
        Lazily allocate a graph-wide semaphores buffer with at least min_size.  This is a single buffer shared by
        all kernels and zero initialized once at graph start.  Each kernel must leave the buffer zeroed on exit.

        Warning: multiple calls to this function will return the same buffer.

        Args:
            min_size: the number of int32 semaphores required

        Returns:
            name of the semaphores buffer
        sem_ptrsemaphores_r  )r   r   r   r   r   r   )r7   r   r  r   r   r   r   uint32rM  r  r  r   r  )rj   r  current_devicer  r  s        rV   
semaphoreszKernelArgs.semaphores-  s     <<>>'6, Q^%8QQ>;OQQ!
 
 
 !/ 	@ 	@L&#.88l***S,,?***""3'''~rX   ri   r   c                n   t          |t                    sJ t          |          |f            t          j        |          }|| j        v r| j        |         S | j                                        v r6 t          fd| j                                        D                        | j        |<   S )Nc              3  F   K   | ]}|                               d V  dS )rG   N)r  )r  r  rg   s     rV   r  z)KernelArgs.seed_offset.<locals>.<genexpr>Q  s4      UU1!,,tBTBTUQUUUUUUrX   )r;  r   rM  r   r  r  r  sum)rj   rg   ri   s    ` rV   seed_offsetzKernelArgs.seed_offsetI  s    %%%;;UU';;;%e$$DM!!=''4=''))))WUUUU(<(<(>(>UUUUUWW   $erX   r  c                    t          |t          j                  sJ t          |          |f            |j        dk    rd| j        |<   dS |                     d| j        |          S )Nr  ks)r;  r   SymbolrM  rg   r  r  r  s     rV   r   zKernelArgs.sizeV  sc    $--AAT

D/AAA-9"(DM$6||D$-666rX   Iterator[str]c                    t          | j                                        | j                                        | j                                                  S rn   )r   r  keysr  r  ro   s    rV   
call_nameszKernelArgs.call_names]  sG    ##%%t':'?'?'A'A4=CUCUCWCW
 
 	
rX   r   c                   | j                             |d          }|t          |t                    s|j        S | j                            |d          }|t          |t                    s|S | j                            |d          S )z;
        Returns inner name of a given outer name.
        N)r  r  r;  r  r   r  r  )rj   rg   inplacedr  s       rV   arg_namezKernelArgs.arg_nameb  s     '++D$77
8Z(H(H&&)--dD99":k:+N+N"!%%dD111rX   r  r   r   c                    |S rn   r   )rj   r  r   s      rV   wrap_ptr_argzKernelArgs.wrap_ptr_argn  s    
rX   r   
SymbolLikec                     t          |          S rn   )rM   )rj   r   s     rV   wrap_size_argzKernelArgs.wrap_size_argq  s    4yyrX   Ndtype_to_cpp_type Optional[dict[torch.dtype, str]]&tuple[list[str], list[str], list[str]]c                <   ddl m} |ddl m} |}g }g }g }t          | j                                                  D ]}t          |t                    r|j        d         }|j	        }	t          j                            |          }
||
         }|                    | d|	            |                    |                     ||
                     |                    | d           | j                                        D ]\  }}	|| j        v rt          j                            |          }
||
         }|                    d| d|	            |                    |                     ||
                     |                    d| d           | j                                        D ]\  }}|| j        v st          |t                    r$t          j                            |          }
||
         }|                    | d|            |                    |                     ||
                     |                    | d           | j                                        D ]\  }}	|                    d| d|	            |                    |                     |                     |                    d|            t          j        j        r$t          j        j                            |           | j        r
J d	            |||fS )
NrG   )
INDEX_TYPE)r  r  z* *zconst  zWorkspace not supported on CPU )r  r  r  r1   r  r  r;  r  r  r   r7   r   r   r  r  r  r  r  r  r  wrapper_codeensure_size_computedr  )rj   r  r  r  	call_argsarg_defs	arg_typesr  outerinnerr   	cpp_dtypemaybe_inners                rV   cpp_argdefszKernelArgs.cpp_argdefst  sd    	*)))))$////// ,		t3::<<== 		. 		.H(J// (,E'EG%%e,,E)%0IOOy33E33444T..ue<<===	___---- .4466 	4 	4LE5,,,G%%e,,E)%0IOO9Y99%99:::T..ue<<===2i2223333"&"5";";"="= 	. 	.E;,,,
;
0S0S,G%%e,,E)%0IOOy99K99:::T..ue<<===	___---- M//11 	A 	ALE5OO9Z99%99:::T//667772j22333w# A$99%@@@&II(III&I--rX   ?tuple[list[ArgName], list[str], list[KernelArgType], list[Any]]c                   g }g }g }g }t          | j                                                  D ]}t          |t                    r|                    t          |j                             |                    |j        d                    |                    t          j
                            |j        d                              |                    t          |j        |j        d         t          j
                            |j        d                                        t          | j                                        | j                                                  D ]\  }}|| j        v st          |t                    r$|                    t          |                     |                    |           |                    t          j
                            |                     |                    t          ||t          j
                            |                               | j                                        D ]\  }}|                    t          |                     |                    |           |                    t%          |                     |                    t'          ||                     t          j
        j        r$t          j
        j                            |           | j        D ]r}|                    t          |j                             |                    |j                   |                    |           |                    |j                   s||||fS )Nr  )rg   r   r   )r1   r  r  r;  r  r  r  r   r  r7   r   r   r   r   r  r  r  r  rM  r   r  r  r  r   r   )	rj   r   r  r  precompile_argsr  r  r  r  s	            rV   python_argdefszKernelArgs.python_argdefs  s@    #%!	!	/1t3::<<== 	 	H(J// OOGH$788999X1"5666QW..x/CB/GHHIII""!,#/3'++H,@,DEE      "$$&&(;(A(A(C(C
 
 	 	LE5 ,,,
5*0M0M,OOGENN+++U###QW..u55666"" '++E22      !M//11 	A 	ALE5OOGENN+++U###T%[[)))""75%#8#8999w# A$99%@@@& 	( 	(COOGCN33444S^,,,""3'''SY''''OY>>rX   Iterator[tuple[str, str]]c              #    K   t          | j                                                  D ]}t          |t                    r|j        D ]z}|t          j        j        v s|t          j	        j        v r)|| j
        v r| j
        |         |j        fV  || j        v r)t          t          | j        |                   |j        fV  {d S rn   )r1   r  r  r;  r  r  r7   r   r  r  r  r   r  r   rM   )rj   r  others      rV   aliaseszKernelArgs.aliases  s      t3::<<== 	U 	UH(J// !- 	U 	UQW777 ;;;D...,U3X5HHHHHD///sD$7$>??ATTTTT	U	U 	UrX   c                    t          | j                            |t                    t                    o2t          | j                            |t                    t                    S rn   )r;  r  r  r  r  r  r  s     rV   
is_removedzKernelArgs.is_removed  sS    ##D'22J
 
 N-11$@@*MM	NrX   OrderedSet[str]c                   t                      }t          | j                                                  D ]8}t	          |t
                    r|                    |j        d                    9| j        	                                D ]9\  }}|| j        v st	          |t
                    r$|                    |           :|S )Nr  )
r   r1   r  r  r;  r  rH  r  r  r  )rj   	live_outsr  r  r  s        rV   live_output_bufferszKernelArgs.live_output_buffers  s    %/\\	t3::<<== 	4 	4H(J// MM(.r23333 /5577 	! 	!LE5,,,
5*0M0M,MM%    rX   )r   rM   r  r  rg   rJ   rN   rM   rx   ry   )rg   r	   rN   r   r.  )r  rM   r  rM   rN   rO   )r  r   r   r   rN   r  )r  r   rN   rM   )rg   rM   ri   r   rN   rM   )rg   r  rN   rM   )rN   r  )rg   rM   rN   r   )r  rM   r   r   rN   rM   )r   r  rN   rM   rn   )r  r  rN   r  )rN   r  )rN   r  r  )rN   r  )rz   r{   r|   r   r  r   r  r  r  r  r  r*  r  r  r   r  r  r  r  r  r
  r  r  r  r   rX   rV   r  r    s       	 	 	 \	5 5 5 5
 
 
 
 , , , \,
@ 
@ 
@ 
@B B B B4 4 4 48'! '! '! '!R   8   7 7 7 7
 
 
 


2 
2 
2 
2       EI.. .. .. .. ..`/? /? /? /?bU U U UN N N N
 
 
 
 
 
rX   r  c                  R     e Zd ZdZ	 	 dd fdZddZddZddZddZddZ	 xZ
S ) ry  aD  A CSEVariable is just a name for an expression but it is useful to be able to annotate them on a backend dependent basis.
    To do so, the backends can simply overload `Kernel.create_cse_var`
    The "CSEVariable.update_on_args" method gives you a hook for annotations
    See example of TritonCSEVariable in triton.py
    Nrg   rM   boundsValueRanges[Any]r   r   shaperF   c                    t                                                       t          |t                    sJ t	          |                      || _        || _        d| _        || _        || _	        d S r+  )
r  r   r;  r   rM  rg   r  	use_countr   r  )rj   rg   r  r   r  r  s        rV   r   zCSEVariable.__init__   sf     	&+..<<V<<.	



rX   rN   c                    | j         S rn   r  ro   s    rV   r  zCSEVariable.__str__  s
    yrX   r   c                *    t          | j                  S rn   )hashrg   ro   s    rV   __hash__zCSEVariable.__hash__  s    DIrX   r  objectr   c                L    t          |t                    o|j        | j        k    S rn   )r;  ry  rg   )rj   r  s     rV   __eq__zCSEVariable.__eq__  s     %--I%*	2IIrX   rq   r	   r  rO   c                    d S rn   r   )rj   rg   rq   r  s       rV   update_on_argszCSEVariable.update_on_args  s    rX   c                0    | j         j         d| j        dS r  )r  rz   rg   ro   s    rV   r  zCSEVariable.__repr__  s     .)::DI::::rX   r  )rg   rM   r  r  r   r   r  rF   ry   )rN   r   )r  r  rN   r   )rg   rM   rq   r	   r  r	   rN   rO   )rz   r{   r|   r}   r   r  r  r!  r#  r  r"  r#  s   @rV   ry  ry    s          (, $            J J J J   ; ; ; ; ; ; ; ;rX   ry  AugmentedKeyT)default)boundr&  .c                      e Zd ZdZ	 	 	 	 	 	 	 d7d8dZd9dZd:dZd:dZd;dZd<dZ	d=d!Z
d>d#Zd?d$Z ej                    d%d%ddd&d@d3Z ej                    ddfdAd4Z ej                    ddfdBd6ZdS )CCSEz Common subexpression eliminationr  tmpNr   rM   r_   name_prefixiter_buffersOptional[itertools.count[int]]store_cache.Optional[MutableMapping[str, CSEVariableType]]reduction_cache<Optional[MutableMapping[ReductionCacheKey, CSEVariableType]]varname_map$Optional[dict[str, CSEVariableType]]c                    || _         || _        i | _        || _        |pi | _        |pi | _        |pt          j                    | _        t                      | _
        |pi | _        d S rn   )r   r_   _cacher+  r.  r0  	itertoolsr   iter_buffer_idsr   invalidated_storesr2  )rj   r   r_   r+  r,  r.  r0  r2  s           rV   r   zCSE.__init__-  sv     FH&ALARPR!r 	 6B5VY_EVEV3=<<7B7HbrX   	keep_varsOrderedSet[CSEVariable]rN   rO   c                    g | j                                         D ]+\  }}|vr"| j         |= | j                            |           ,r,fd| j                                        D             | _        d S i | _        d S )Nc                $    i | ]\  }}|v 	||S r   r   )r  r  r  r9  s      rV   r  z"CSE.invalidate.<locals>.<dictcomp>K  s$    RRRDAq1	>>1a>>>rX   )r.  r  r8  rH  r5  )rj   r9  rg   r*  s    `  rV   
invalidatezCSE.invalidateE  s    44+11334 	2 	2ID#)##$T*'++D111 	RRRRDK,=,=,?,?RRRDKKKDKKKrX   r   c           	          t          |           | j        | j        | j        | j        | j        | j        | j                  S )N)r   r_   r+  r,  r.  r2  r0  )rM  r   r_   r+  r7  r.  r2  r0  ro   s    rV   clonez	CSE.cloneO  sI    tDzz;;(-(( 0
 
 
 	
rX   c                    |                                  }t          | j                  |_        t          | j                  |_        t          | j                  |_        |S )zNReturn a copy of using ScopedDict so changes to *_cache aren't visible in self)r?  r,   r5  r0  r.  )rj   new_cses     rV   scoped_copyzCSE.scoped_copyZ  sK    **,,#DK00",T-A"B"B()9::rX   	cache_keyr%  c                ,    t          t          |          S )z@Override this method to augment cache key with backend specifics)r   r%  rj   rC  s     rV   augment_keyzCSE.augment_keyb  s    M9---rX   r  r  c                @    || j         |                     |          <   d S rn   r5  rF  )rj   rC  r  s      rV   putzCSE.putf  s"    36D$$Y//000rX   r   c                :    |                      |          | j        v S rn   )rF  r5  rE  s     rV   containszCSE.containsi  s    	**dk99rX   Optional[CSEVariableType]c                ^    | j                             |                     |          d           S rn   )r5  r  rF  rE  s     rV   try_getzCSE.try_getl  s&    {t//	::DAAArX   c                B    | j         |                     |                   S rn   rH  rE  s     rV   r  zCSE.geto  s    {4++I6677rX   T)r  rh   
assignmentr   r  r   r*   r   CUnion[str, CSEVariable, OpsValue, IndentedBuffer, DeferredLineBase]r  r  rh   rP  r   r   r  rF   c          	        t          |t                    r|j        }|s|sJ t          |t                    rD|j                            |          |_        |xj        dz  c_        t          t          |          S t          |t                    r|
                                }n6t          |t                    r|j        }nt          |t                    sJ |}|                     |          }	||sd}|	s|                     |||          }	|                     ||	           |ryt"          j        j        r&t"          j        j                            |d           t          |t                    rR|r|                    | j         |	 d           |                    |           |                    | j                   nt          |t                    rE|sJ |                    |                    | j         |	 d|j         | j                              n|r| j         |	 d| | j         }
n| | j         }
|                    |
           |rGt4          j        j        st4          j        j        r%|#t=                      dk    rt?          ||	|           n/|	j                            |          |	_        |	xj        dz  c_        |	S )NrG   r   T)	only_oncez =z = rm  ) r;  r4   ri   ry  r  tightenr  r   r  r*   getvaluer'   r  rM   rN  newvarrI  r7   r  current_nodecodegen_originating_infor  r   splicer_   r  r    r  r  r  r)   r  )rj   r   r   r  rh   rP  r   r  rC  r  r  s              rV   generatezCSE.generater  s    dH%% 	:D"
"""dK(( 	 +--f55DKNNaNN...n-- 	II.// 		IIdC(((((Ill9%%== E '	++feU33CHHY$$$  88( H)BB$ C    dN33 8! B((DK)@)@)@)@AAAMM$'''$$T[1111&677 8%%%:$$$+'Ws'W'Wty'W$+'W'WXX    " 6"&+JsJJtJT[JJ"&555$$T*** #	8 #/K	8  &2J		8 "-/11U::#FC777 ++F33CJMMQMM
rX   c                    | j          t          | j                   }t          j                            ||||          }|| j        |<   |S rn   )r+  r   r7  r7   r  create_cse_varr2  )rj   r  r   r  var_namer  s         rV   rV  z
CSE.newvar  sO     &DT-A(B(BDDh%%huEE%("
rX   rg   c                    t          j        | j        vfd           t          j                            |||          }|| j        <   |S )Nc                     d  S )Nzduplicate name: r   r  s   rV   rs  zCSE.namedvar.<locals>.<lambda>  s    2KT2K2K rX   )r   _check_valuer2  r7   r  r\  )rj   rg   r  r   r  r  s    `    rV   namedvarzCSE.namedvar  s`     	((*K*K*K*K	
 	
 	
 h%%dFE5AA!$
rX   )r  r  r*  NNNN)r   rM   r_   rM   r+  rM   r,  r-  r.  r/  r0  r1  r2  r3  )r9  r:  rN   rO   rN   r   )rC  rM   rN   r%  )rC  rM   r  r  rN   rO   )rC  rM   rN   r   )rC  rM   rN   rL  )rC  rM   rN   r  )r   r*   r   rQ  r  r  rh   r   rP  r   r   r   r  rF   rN   r  )r  r  r   r   r  rF   rN   r  )
rg   rM   r  r  r   r   r  rF   rN   r  )rz   r{   r|   r}   r   r=  r?  rB  rF  rI  rK  rN  r  r   unknownrZ  rV  ra  r   rX   rV   r)  r)  *  s       **  7;FJ <@I I I I I0   	
 	
 	
 	
   . . . .7 7 7 7: : : :B B B B8 8 8 8 $7;#6#8#8'+ $K K K K K K^ $7;#6#8#8'+ $		 	 	 	 	 $7;#6#8#8'+ $      rX   r)  c                  0     e Zd Zd fdZddZdd
Z xZS )CodeGenrN   rO   c                x    t                                                       t          j                    | _        d S rn   )r  r   r  	ExitStack
exit_stackrj   r  s    rV   r   zCodeGen.__init__  s-    $.00rX   r   c                8    | j                                          | S rn   )rh  	__enter__ro   s    rV   rk  zCodeGen.__enter__  s    !!###rX   exc_typer	   exc_valexc_tbc                >    | j                             |||           d S rn   )rh  __exit__)rj   rl  rm  rn  s       rV   rp  zCodeGen.__exit__  s"      7F;;;;;rX   rx   rb  rl  r	   rm  r	   rn  r	   rN   rO   )rz   r{   r|   r   rk  rp  r"  r#  s   @rV   re  re    se        1 1 1 1 1 1   < < < < < < < <rX   re  c                  h    e Zd ZU dZded<   dZded<   dZded<   	 d_d` fdZej	        dad            Z
ej	        	 	 dbdcd            ZdddZdddZded!Z	 dfdgd$Zdhd+Zdid2Zdjd5Zdkd7Z	 	 dbdldAZedmdB            Z	 dfdndJZdodMZdpdNZdq fdPZdr fdUZdsdVZdtdWZdtdXZdudZZdvd\Z dwd^Z! xZ"S )xKernelr  rM   newvar_prefixr_   Nz'Optional[Callable[[], OpsHandler[Any]]]	overridesTrq   Optional[KernelArgs]increase_kernel_countr   rN   rO   c                f   t                                                       |rt          xj        dz  c_        |pt	                      | _        t                      | _        t                      | _        t                      | _	        d| _
        d| _        t          | j        | j                  | _        t!                      | _        t!                      | _        d | _        d | _        d | _        d | _        t!                      | _        t!                      | _        i | _        d| _        d | _        d S )NrG   r   )r  r   r!   generated_kernel_countr  rq   r*   loadscomputestoresnum_loadnum_reductionr)  rt  r_   cser   must_keep_buffersstore_buffer_names
_load_mask_load_otherrW  node_to_boundsr  r  inplace_update_buffersmin_elem_per_threadkernel_name)rj   rq   rw  r  s      rV   r   zKernel.__init__  s    	  	0**a/**(JLL	#%%
%''$&&.1$2Ddk.R.R2<,,3=<<)-4859OS0:3=<<
 79##$ *.rX   r  rE   r  c              #     K   | j         }|| _         |j                                                                        | _        	 d V  || _         d S # || _         w xY wrn   )rW  r  r  
get_boundsr  )rj   r  priors      rV   set_current_nodezKernel.set_current_node  sh      ! "j//11<<>>	&EEE %DD%%%%s   A 	Albr*   cbOptional[IndentedBuffer]sbc              #    K   ||}|d u x}rt                      }| j        }| j        }| j        }| j        }|| _        || _        || _        |                                | _        	 d V  || _        || _        || _        || _        |r|rJ d            d S d S # || _        || _        || _        || _        |r|r
J d            w xY w)Nz$unexpected store inside swap_buffers)r*   rz  r{  r|  r  rB  )	rj   r  r  r  disallow_storesrz  r{  r|  r  s	            rV   swap_bufferszKernel.swap_buffers  s      :B Dj(? 	"!!B
,h
??$$	FEEEDJ"DL DKDH FEEEEEvF FEE DJ"DL DKDH FEEEEEvvvvvs   &B ,Crg   r  r   ry  c                    t           rn   r  r  s      rV   r  zKernel.load4  r  rX   c                |    | j         }	 | j        | _         |                     ||          || _         S # || _         w xY w)z+A load the depends on an index we have read)rz  r{  r  )rj   rg   r  r  s       rV   indirect_loadzKernel.indirect_load7  sD    
	DJ99T5))DJJDJs   !2 	;ri   c                    t           rn   r  r  s       rV   r  zKernel.store_reductionA  r  rX   r^   r6   c                    t           rn   r  r  s        rV   r  zKernel.storeD  
     "!rX   r   r   r  r  r5   +Union[CSEVariable, tuple[CSEVariable, ...]]c                    t           rn   r  r  s        rV   r  zKernel.reductionI  
     "!rX   r  r  r  UCallable[[tuple[CSEVariable, ...], tuple[CSEVariable, ...]], tuple[CSEVariable, ...]]r  tuple[CSEVariable, ...]c                    t           rn   r  r  s       rV   r  zKernel.scanR  s
     "!rX   r  r  c                    t           rn   r  r  s        rV   r  zKernel.sort\  r  rX   dict[sympy.Symbol, sympy.Expr]c                    t           rn   r  ro   s    rV   
var_rangeszKernel.var_rangese  r  rX   r  r  r  r  r  r  r  r  Optional[CSEVariable]c                    t           )z3
        See [Note: Inductor bucketize op]
        r  r  s           rV   r  zKernel.bucketizeh  s
     "!rX   c                    t           rn   r  ro   s    rV   assert_functionzKernel.assert_functionw  s    !!rX   r  Union[CSEVariable, str]r  r   r  mask!Optional[Union[CSEVariable, str]]c           	        t          |t                    rt          |          }t          |t                    sJ t          |                      |t          |t                    sJ |t          |t                    sJ |r|rd| d| d| d| d	}| d| d| }n|r
| d| }|}n|sJ | d| }|}|r	d| d| d}| j         d| d| dS )	Nr  z <= z) & (z < r   z) | ~(z, "index out of bounds: z"))r;  ry  rM   rM  r  )rj   r  r  r  r  rh  
cond_prints          rV   indirect_assertzKernel.indirect_assert{  sP    c;'' 	c((C#s##..T#YY..#}
5# 6 6}}6}
5# 6 6}}6 	U 	 =u<<#<<C<<E<<<D!66s66u66JJ 	&&&&DJJLL5%%e%%DJ 	+*t**4***D&UUUUzUUUUrX   r   r   c                    t           rn   r  r  s        rV   r  zKernel.check_bounds  r  rX   c                    t           rn   r  r  s     rV   index_to_strzKernel.index_to_str  r  rX   r   c           	     J   t                                                       | j        sJ | j                            t          j        t          | |                                                                | j                            t          j        |                      | S rn   )	r  rk  ru  rh  enter_contextr7   set_ops_handlerCSEProxyset_kernel_handlerri  s    rV   rk  zKernel.__enter__  s    ~~%%htT^^-=-=>>??	
 	
 	
 	%%a&:4&@&@AAArX   rl  r	   rm  rn  c                v    |                                   t                                          |||           d S rn   )remove_kernel_local_buffersr  rp  )rj   rl  rm  rn  r  s       rV   rp  zKernel.__exit__  s7    ((***7F33333rX   c                l   t           j        j        sdS t          fd| j        D                       }t                      | j        D ]D}|| j        vr9|| j        j        vr+                    ||          r	                    |           ED ]}|| j        j
        v rz| j        j
        |         }t          |t                    r8t          fd|j        D                       }|r|                     |           | j        	                    |           |                     |           dS )z
        Any buffers that are both created and have a last use in the
        same kernel can be removed.

        Note that V.graph.scheduler can be None when codegening triton template
        kernels.
        Nc              3  h   K   | ],}|j         v j         |                                         V  -d S rn   )name_to_bufdefining_op_name)r  r  r  s     rV   r  z5Kernel.remove_kernel_local_buffers.<locals>.<genexpr>  sQ       &
 &
i+++ !#&7799++++&
 &
rX   c              3      K   | ]}|v V  	d S rn   r   )r  r  names_to_removes     rV   r  z5Kernel.remove_kernel_local_buffers.<locals>.<genexpr>  s(      KKaQ/1KKKKKKrX   )r7   r   r  r   r  r  rq   r  $can_buffer_be_removed_through_fusionrH  r  r;  r  r  r  remove_inplace_bufferr  remove_buffer)rj   fused_node_namesrg   r  rf   r  r  s        @@rV   r  z"Kernel.remove_kernel_local_buffers  s    G%	 	F% &
 &
 &
 &
.&
 &
 &
 
 

 ,6<<+ 	* 	*DD222	 777BB*  8
  ##D)))# 
	) 
	)Dty000i/5c:.. KKKK3?KKKKK 5..t444'++D1111""4((((
	) 
	)rX   c                    t                               d|           t          | j        j        |<   | j                            |           d S )Nzremove_buffer(%r))r@  rU   r  rq   r  r  rH  r  s     rV   r  zKernel.remove_buffer  sE     			%t,,,)0	 &  &&&&&rX   c                    t                               d|           t          | j        j        |<   | j                            |           d S )Nzremoving_inplace_buffer(%r))r@  rU   r  rq   r  r  rH  r  s     rV   r  zKernel.remove_inplace_buffer  sC    		/666*1	!$'  &&&&&rX   ;Union[list[sympy.Expr], tuple[sympy.Expr, ...], sympy.Expr]c                
    t          |t          t          f          r fd|D             S t          j        j                            |          }t          |j        d           } fd|D             }t          ||          S )Nc                :    g | ]}                     |          S r   )rename_indexingr  r)  rj   s     rV   r  z*Kernel.rename_indexing.<locals>.<listcomp>  s'    ;;;D((++;;;rX   c                    | j         S rn   r  )ss    rV   rs  z(Kernel.rename_indexing.<locals>.<lambda>  s    !& rX   )r  c                    i | ]N}t          |t          j        t          j        t          j        f          3|j                            |          OS r   )r   r   UNBACKED_INTSIZEPRECOMPUTED_SIZErq   r   r  s     rV   r  z*Kernel.rename_indexing.<locals>.<dictcomp>  sb     
 
 
%I) 
ty~~a  
 
 
rX   )
r;  listtupler7   r   r  r  sortedfree_symbolsr/   )rj   r  sorted_symbolsreplacementss   `   rV   r  zKernel.rename_indexing  s    
 edE]++ 	<;;;;U;;;; ))%00 28H8HIII
 
 
 
#
 
 
 %...rX   r  c                    t          |i |S rn   )ry  )rj   rq   r  s      rV   r\  zKernel.create_cse_var  s    D+F+++rX   r@   c                b    |dS | j                             |                                          S )zC
        Returns arg name of a given input or output node.
        N)rq   r  r   )rj   r  s     rV   r  zKernel.arg_name  s,     <4y!!$--//222rX   )NT)rq   rv  rw  r   rN   rO   )r  rE   rN   r  r  )r  r*   r  r  r  r  rN   r  rg   rM   r  r   rN   ry  rg   rM   r  r   ri   ry  rN   rO   rn   
rg   rM   r  r   ri   ry  r^   r6   rN   rO   
r   r   r  r   r  r5   ri   r  rN   r  r  r  r  r  r  r  rN   r  
r  r  r  r  r  r   r  r   rN   r  )rN   r  r  ry  r  r  r  ry  r  r   r  r   r  r  r  r  rN   ry  ry   )
r  r  r  r   r  r   r  r  rN   rM   r  )r  r   rN   rM   rb  rq  rx   rg   rM   rN   rO   )r  r  rN   r   )rq   r	   r  r	   rN   ry  )r  r@   rN   r   )#rz   r{   r|   rt  r~   r_   ru  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  rk  rp  r  r  r  r  r\  r  r"  r#  s   @rV   rs  rs    s        MF9=I==== PT /  /  /  /  /  /  /D & & & &  (,'+	F F F F F8" " " "   " " " " SW" " " " "
" " " "" " " "" " " "" " " " 4804" " " " " " " " X" 37V V V V V<" " " "
" " " "     4 4 4 4 4 4%) %) %) %)N' ' ' '' ' ' '
/ / / /., , , ,3 3 3 3 3 3 3 3rX   rs  c                  :    e Zd ZU dZded<   dZded<   dZded	<   dS )
r  r  zClassVar[str]r  Nr   r   r  rM   ops_name)rz   r{   r|   r  r~   r   r  r   rX   rV   r  r  	  sE         "C""""#'E''''HrX   r  c                 d    	 dd l } |                     | j                  S # t          $ r Y d S w xY w)Nr   )	undefined)jinja2EnvironmentStrictUndefinedImportError)r  s    rV   
jinja2_envr  	  sW    !!, " 
 
 	
    tts   ! 
//c                      e Zd ZdZe	 ddd	            Zed d            Zed!d            Zd"dZe	d#d            Z
d$dZd%dZd&dZdS )'KernelTemplatezg
    Base class for defining kernel templates.

    Children classes: TritonTemplate, CUDATemplate
       sourcerM   num_indentsr   indents_spacingrN   c                    |                      d          }t          |          dk    rfd|dd          D             |dd <   d                    |          S )NTrG   c                &    g | ]}d z  z  |z   S )r  r   )r  r  r  r  s     rV   r  z6KernelTemplate.indent_except_first.<locals>.<listcomp>&	  s5       AE&4<  rX   r  )
splitlinesr  r   )r  r  r  liness    `` rV   indent_except_firstz"KernelTemplate.indent_except_first 	  sw     !!$''u::>>    INqrr  E!""I wwu~~rX   r	   c                    t                      }|d S t          j        |j        d<   ddlm} 	 |                    |           S # |$ r} G d d|          } ||          |d }~ww xY w)Nr  r   )TemplateSyntaxErrorc                  (     e Zd Zd fdZd	dZ xZS )
IKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxErrororiginal_errorr  rN   rO   c                    t                                          |j        |j        |j        |j                   || _        d S rn   )r  r   messagelinenorg   filenamer  )rj   r  r  s     rV   r   zRKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__init__8	  sG    GG$$&.&-&+&/	   +9D'''rX   rM   c                   d| j          d}|d| j         dz  }t          | j        d          r| j        j                            d          }|dz  }t          d| j         dz
            }t          t          |          | j         dz             }t          ||          D ]i}|| j         dz
  k    rE||dz    d	||          dz  }t          | j        d
          r|dd| j        j
        dz
  z  z   dz   z  }U||dz    d||          dz  }j|S )NzError in template at line 
zError message: r  z	Context:
r   r   rG   z: --> columnz     r  z^
z:     )r  r  r  r  r  splitmaxminr  r  r  )rj   
error_infor  startendrs  s         rV   r  zQKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__str__A	  sU   !Mdk!M!M!MJ"DDL"D"D"DDJt2H== K $ 3 : @ @ F F"l2
 #At{Q 7 7!#e**dkAo>>!&uc!2!2 
K 
KA DK!O33 *Q.J.JeAh.J.J.J J
#*4+>#I#I !&$.(/*-1D1Ka1O*P)Q*/)0%&J !+Q.J.JeAh.J.J.J J

%%rX   )r  r  rN   rO   ry   )rz   r{   r|   r   r  r"  r#  s   @rV   DetailedTemplateSyntaxErrorr  7	  sQ        9 9 9 9 9 9& & & & & & & &rX   r  )r  r  r  filtersr  r  from_string)r  envr  er  s        rV   _template_from_stringz$KernelTemplate._template_from_string+	  s    ll;4-;-O)*......#	8??6***" !	8 !	8 !	8& & & & &.A & & &> .-a00a7C!	8s   A A'A""A'	fake_outsUnion[list[Buffer], Buffer]Callable[[str], torch.dtype]c                    t           j        j        t          | t          t
          f          rd | D             n(|                                 |                                 idfd}|S )Nc                \    i | ])}|                                 |                                *S r   )r   r   )r  r  s     rV   r  z2KernelTemplate._fake_get_dtype.<locals>.<dictcomp>^	  s*    KKK#cllnncmmooKKKrX   rg   rM   rN   r   c                L                         |           }||S  |           S rn   )r  )rg   r  _get_dtype_reallookups     rV   r   z1KernelTemplate._fake_get_dtype.<locals>.get_dtypeb	  s/    ZZ%%F!"?4(((rX   )rg   rM   rN   r   )r7   r   r   r;  r  r  r   )r
  r   r  r  s     @@rV   _fake_get_dtypezKernelTemplate._fake_get_dtypeX	  s     '+i$// 	CKKKKKFF((**I,?,?,A,ABF	) 	) 	) 	) 	) 	) 	) rX   rg   rO   c                    || _         d S rn   r  r  s     rV   r   zKernelTemplate.__init__j	  s    			rX   c                    | j         S )a  
        entry point to override for templates to ensure a uid e.g. through a prefix

        the purpose of this is that every KernelTemplate/ExternKernelChoice is unique
        in the system, but reproducible e.g. restarting pytorch should yield the same id
        r  ro   s    rV   uidzKernelTemplate.uidm	  s     yrX   r  Optional[ChoiceCaller]c                `    g } | j         |fi |}|t          |          dk    r|d         S dS )z
        Maybe generates a new ChoiceCaller and returns it, or None if generation fails.

        kwargs: Additional kwargs to be passed to self.generate() to generate a new ChoiceCaller.
        NrG   r   )maybe_append_choicer  )rj   r  temp_choicesr  s       rV   choice_or_nonezKernelTemplate.choice_or_nonex	  sI     #%)),AA&AA>c,//144?"trX   choicesrr   Optional[NotImplementedError]c                   	 |                      | j        di |           dS # t          $ r\}t                              d|t          |           t                                          t          j        k                |cY d}~S d}~ww xY w)a%  
        Maybe generates a new ChoiceCaller and appends it into existing choices.
        Returns None if success, otherwise returns the error.

        choices: A list of ChoiceCallers.
        kwargs: Additional kwargs to be passed to self.generate() to generate a new ChoiceCaller.
        Nz3Cannot Append Choice: %s. KernelTemplate type is %s)
stack_infor   )	r  rZ  r   r@  inforM  getEffectiveLevelrS   INFO)rj   r  r  r  s       rV   r  z"KernelTemplate.maybe_append_choice	  s    
	NN=4=226223334" 	 	 	HHET

0022W\A	     HHHHHH	s    $ 
B
AB?B
B
r>   c                    t           )zM
        Generates a ChoiceCaller instance from the given arguments.
        r  )rj   r  s     rV   rZ  zKernelTemplate.generate	  s
    
 "!rX   N)r  )r  rM   r  r   r  r   rN   rM   )r  rM   rN   r	   )r
  r  rN   r  r  ry   )r  r	   rN   r  )r  rr   r  r	   rN   r  )r  r	   rN   r>   )rz   r{   r|   r}   r   r  r	  r  r   r   r  r  r  rZ  r   rX   rV   r  r  	  s          >?    \ *8 *8 *8 \*8X    \"       X
 
 
 
   ." " " " " "rX   r  c                       e Zd Zd ZdE fdZdFdZdGdZ	 	 dHdIdZdJd ZdKd"Z	dLd$Z
	 dMdNd(ZdOd)ZdPd0ZdQd7ZdRd:Z	 	 dSdTdDZ xZS )Ur  r  Kernel[Any]parent_handlerOpsHandler[Any]c                    t                                                       ddlm}  |            | _        || _        || _        d S )Nr   ValueRangeAnalysis)r  r   r  r)  vr_analysisr  r%  )rj   r  r%  r)  r  s       rV   r   zCSEProxy.__init__	  sQ    //////--//,rX   rg   rM   rq   tuple[Any, ...]r  dict[str, Any]rN   r	   c           	     L  
  | j         gR i  t          | j                  i }t                      }t	                      }t                      
t          |          }d d dk    r
dk    r|j        |j        nldk    rB
dk    r<t          j	        j
        j                            t          j        d           j        d n$
dv r t          |          } |i  |i 
dv rJ dd
fd}	t          j        |	|          S )Nmaskedro  rm  )ro  rm  ry  )ro  rm  r   r  Union[str, CSEVariable]rN   ry  c                   t          	t          t          f          r	
         n	}t          t          t          f          r=t                    dk    r*t          d         t          t          f          r
         n}
dz  
t          | t                    r"dk    r| j        || _        | j        || _        t          j        j	        
                    t          j        j        | 	          }|                               t          j        j        st          j        j        r$|J t#          t          j        j        ||           |S )Nr   rG   rm  r  r   r  )r;  r  r  r  ry  r   r  r7   r  r  rZ  r{  r#  r    r  r  r  r  )r  	var_dtype	var_shapecsevarrq   r  r  r  rg   r  
output_idxoutput_shapes       rV   do_csez!CSEProxy._default.<locals>.do_cse	  sd   
 lT5M::"Z((!  lT5M::"%%))|Au>> * Z(( "  !OJ ![)) (e##'AG7?'AGX\** "" +  F !!$f555 #?A&>A !,,,AH,fi@@@MrX   )r  r/  rN   ry  )_bound_variabler  r%  r"   r%   r)   r   r  r7   interpreterrW  r  r  r  r  pytreetree_map)rj   rg   rq   r  ri   dtype_handlershape_handlershape_opdtype_opr7  r  r  r  r5  r6  s    ```      @@@@@rV   _defaultzCSEProxy._default	  s   %%d<T<<<V<<2+T22DCFCC244244%''=$//88 3 3 ;L ;LLX'U"2"2=5:>>#'    LL000}d33H#8T4V44L#8T4V44L'''+++
(	 (	 (	 (	 (	 (	 (	 (	 (	 (	 (	 (	 (	T vu---rX   r  c                B  	 ddl m} ddlm} ddlm} t          t          j        |          rt          j
                    S t          t          j        |          rt          j
                    S t          j        j        		j        |k    r}| j        j        qt          | j        j        t                    s!J t!          | j        j                              | j        j                            	t          j
                              S t$          j        r{t)          ||          rkt+          	fddD                       rt          j
                    S |rJ dd}t-          t/          ||                    } t1          | j        |          | S t          j
                    S )z
        If the variable comes from an FX node, we forward the bound we have already computed
        Else, if the variable when codegen'ing another op, we try to compute its bounds
        r   r(  )TritonTemplateKernelrG   )CUDATemplateKernelNc              3  *   K   | ]}|j         v V  d S rn   )r  )r  r  fx_nodes     rV   r  z+CSEProxy._bound_variable.<locals>.<genexpr>
  s*      VV11&VVVVVVrX   )set_indirectr  r  r)  r	   rN   c                    t          | t                    r| j        S t          | t          j                  rt          |           S | S rn   )r;  ry  r  r   r  r   r.  s    rV   arg_to_boundz.CSEProxy._bound_variable.<locals>.arg_to_bound
  s@    a-- 8O5:.. &q>>)HrX   )r)  r	   rN   r	   )r  r)  select_algorithmrB  cuda.cuda_kernelrC  r;  r7   r  r   rc  r9  rW  r  r  dictrM  r  r    compute_all_boundsr  r  r  r  r  r*  )
rj   rg   rq   r  r)  rB  rC  rH  
arg_boundsrE  s
            @rV   r8  zCSEProxy._bound_variable	  s   
 	0/////;;;;;;888888ah 455 	)&(((ah 233 	)&(((-,>T!!dk&@&Ldk8$??  *B B  ? ;-11';;N;P;PQQQ& 	@73Et+L+L 	@ VVVV0UVVVVV -"*,,, :    c,5566J274+T22J??"$$$rX   Tr  ry  r   r  r  r   r  r  c                   t          |t                    rt          j        |          }t          |t          j                  sJ t          |          |f            |j        j        dk     rc|rnt          j	        |t          j
        |t          j                            }|j        j        dk    r+t          j        |d          }t          j        |||          }n|}t!          j                    }|j        t!          j                    k    rt          |t          j                  rp|j        t!          t&           d          z  }t!          |j        |z   |j        |z             }|j        j        dk    r"|j        t!          dt&                    z  }	||	z  }| j        j                            | j        j        |||j        |j                  }| j                            |||          }
t9          |          rZ|j        j        dk     }t          |t          j                   p|j        j        |k      }| j                            |
|||           |
S )Nr   r  r1  )r;  r   r   r  r  rM  r  r  r2   rH  r  r   longr  ltrf  r   rc  Numberr   r  r  rZ  r{  r   r  r%  r  r(   r  )rj   r  r   r  r  stmrP  
new_bounds
neg_boundspos	sympy_varassert_lowerassert_uppers                rV   r  zCSEProxy.indirect_indexing(
  s-    dC   	'=&&D$
++??d4jj$-???+ :a gc3>$
#C#CDD:#q((QB)BS11C %,..Jz[02222z$7U7U2 !Z+vgr*B*BB
($t+Z-=-D 
 :#q((*{1f'='==C!+c!1J+/**#!ii +  C '99#tUKK	5!! 	R #
 0A 56L)$=== 
 4'BL K$$YlLQQQrX   r   r   r  r  rO   c                <    | j                             ||||          S rn   )r  r  r  s        rV   r  zCSEProxy.check_bounds_
  s      {''dE5AAArX   r  c                   || j         j        j        v r$t          j         j                            |           t          |t          j                  r| j         	                    ||          S | j         j        j
        }||v r||         S | j                             ||          }|j        dk    r| j         xj        dz  c_        |S r+  )r  r  r8  r7   r  rH  r   r   TMPr  r.  r  r  r}  )rj   rg   r  r.  outs        rV   r  zCSEProxy.loadd
  s    4;?555 H&**4000udh// 	:;,,T5999ko1;t$$ktU++ =AK  A%  
rX   ri   c                   || j         j        j        |<   | j         j        r]|t          j        j        v rL| j         j                            |          }|                                D ]}|| j         j        j        |<   d S d S d S rn   )	r  r  r.  rW  r7   r   name_to_buffer
get_outputget_mutations)rj   rg   ri   r  
other_names        rV   _update_store_cachezCSEProxy._update_store_cacheu
  s    ,1#D);# 	@0F(F(F+*55d;;C!//11 @ @
:?+J77	@ 	@(F(F@ @rX   Nr^   r6   c                    | j         j                            |           ||                     ||           |t          j        j        vr | j                             ||||           d S d S )N)r^   )r  r  rH  rb  r7   r   r  r  r  s        rV   r  zCSEProxy.store|
  su     	&**4000<$$T5111qw...KdE5t<<<<< /.rX   c                    | j         j                            |           |                     ||           |t          j        j        vr| j                             |||          S d S rn   )r  r  rH  rb  r7   r   r  r  r  s       rV   r  zCSEProxy.store_reduction
  sc    &**4000  u---qw...;..tUEBBB /.rX   r   r   r  r  r5   r  c                f    | j         xj        dz  c_        | j                             ||||          S r+  )r  r~  r  r  s        rV   r  zCSEProxy.reduction
  s6     	!!Q&!!{$$UI~uMMMrX   r  r  r  r  r  r  c                :    | j                             |||          S rn   )r  r  r  s       rV   r  zCSEProxy.scan
  s     {
F;;;rX   r  r  c                <    | j                             ||||          S rn   )r  r  r  s        rV   r  zCSEProxy.sort
  s      {
CCCrX   r  r  r  r  r  r  r  r  r  c           	     B    | j                             |||||||          S )a  
        [Note: Inductor bucketize op]

        Inputs:
        -------
        values: the values to be bucketized.
        boundaries: a tuple containing
          (a) the name of the boundaries tensor (which must be sorted, unless
          the sorting tensor is present),
          (b) the length of the tensor in the last dimension (i.e. the length of
          one set of boundaries),
          (c) the number of elements in the underlying storage (i.e. the length
          of the flattened tensor, ignoring striding), and
          (d) the stride of the tensor in the last dimension.
        boundary_indices: indices into a flattened version of the boundaries
        tensor, of the same size and shape as "values".  Each index points to
        the first element in the set of boundaries to be used for the
        corresponding value.
        indexing_dtype: the dtype to use when indexing into the boundaries
        tensor.  This must be int64 or int32.  This additionally specifies the
        dtype of the return value.
        right: see "Details" below.
        sorter: an optional tuple containing
          (a) the name of an optional sorting tensor, used to access unsorted
          boundaries without reordering the boundaries tensor, and
          (b) the stride of the tensor in the last dimension.
        The values in the sorting tensor are used as indices into the *last*
        dimension of the boundaries tensor, with all other indices matching.
        The size of the sorting and boundaries tensors must be equivalent.
        sorter_indices: must be present if the sorting array is present; see
        "boundary_indices" for the equivalent definition for the boundaries
        tensor.

        Output:
        -------
        The buckets each value belongs in, within a given set of boundaries.  0
        indicates a position before the first boundary, and len(boundaries_set)
        represents a position after the last boundary.

        Details:
        --------
        Given a value and a set of boundaries, calculate the bucket that each
        value belongs to.  This works differently in 1-D and N-D cases.

        for values [[-1, 0, 1, 2], [3, 4, 5, 9]], boundaries [0, 4, 4, 8], right=True
        return =   [[ 0, 1, 1, 1], [1, 3, 3, 4]].

        for values [[-1, 0, 1, 2], [3, 4, 5, 9]], boundaries [[0, 4], [4, 8]], right=True
        return =   [[ 0, 1, 1, 1], [0, 1, 1, 2]]

        Note that in the N-D boundaries case, the shape of "values" and
        "boundaries" must match in every dimension _except_ the last.

        When right == False, bucket i refers to range (boundaries[i], boundaries[i+1]].
        When right == True,  bucket i refers to range [boundaries[i], boundaries[i+1]).

        Boundaries must be non-decreasing, or a sorter must be provided which
        would re-index offsets in a non-decreasing order (e.g. the second output
        of torch.sort(offsets)).  Otherwise, the result is undefined.
        )r  r  r  s           rV   r  zCSEProxy.bucketize
  s4    L {$$
 
 	
rX   )r  r$  r%  r&  )rg   rM   rq   r+  r  r,  rN   r	   )rg   rM   rq   r	   r  r	   rN   r  r  )
r  ry  r   r  r  r   r  r   rN   r  r  r  )rg   rM   ri   ry  rN   rO   rn   r  r  r  r  r  r  r  )rz   r{   r|   rg   r   r@  r8  r  r  r  rb  r  r  r  r  r  r  r"  r#  s   @rV   r  r  	  s|       D- - - - - -K. K. K. K.Z+% +% +% +%b 5 5 5 5 5nB B B B
   "@ @ @ @ SW= = = = =C C C CN N N N	< 	< 	< 	<D D D D  4804N
 N
 N
 N
 N
 N
 N
 N
 N
rX   r  )rL   rM   rN   rO   )NNNN)r   rM   r2  r   r3  r   r4  r   r5  r   r6  r7  r8  r9  rN   rO   )r   rI  rN   rJ  )r   rI  rR  r>  rN   r   )r   rM   rN   rU  )FF)r   rM   rW  r   rX  r   rN   r   )r   rM   rN   r7  )r   rM   rN   r9  rx   )r  r  r  r  r  r  rN   r   )r   rM   r  r  rN   rO   )r   rM   rN   r  )r  rM   rq   r	   r  r	   rN   r   )r   r*   r  r  r   r   rN   rO   )rp  rM   rN   r   r   r  )rN   r	   )
__future__r   rc   r  dataclassesenumr  r6  rS   rA  r  re   rera   abcr   r   r   r   r   typingr	   r
   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   torch.fxtorch._prims_commonr   torch.utilsr   r:  torch.utils._config_moduler   torch.utils._ordered_setr   torch.utils._sympy.numbersr   torch.utils._sympy.printersr   _PythonPrintertorch.utils._sympy.symbolr   r   r   torch.utils._sympy.value_rangesr   r   r  r    r!   dtype_propagationr"   ops_handlerr#   r$   shape_propagationr%   utilsr&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   virtualizedr2   r3   r4   r5   r6   r7   collections.abcr8   r9   r:   r;   custom_graph_passr<   r   r=   r>   r?   r@   r
  rB   r  rC   rD   rE   rF   r  rI   rJ   r   rM  r   rM   r  r  r&  _logginggetArtifactLoggerrz   rQ   	getLoggerr@  rW   	dataclassrZ   r   r   r   r   r   r   r   r   r   KernelArgTyper  r~   r  r/  r0  r1  r<  r>  rO  rT  rN  r[  r]  r_  cacherL  r  r  r  bfloat16r  float16r   r5  float64int8int16r-  r  r   uint16r  uint64r  r  r  r  r%  compile
IGNORECASErz  ru  rw  r  rK  INT_TO_FLOATr  r  r  r  r  r  r  r  r  ry  r%  r  r  r   ReductionCacheKeyr)  re  rs  r  r  r  r  r   rX   rV   <module>r     s   " " " " " " "                      				 				  # # # # # # # #              
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 , + + + + + + +    ? ? ? ? ? ? ) ) ) ) ) ) 3 3 3 3 3 3 / / / / / / - - - - - - G G G G G G O O O O O O O O O O D D D D D D D D         : : : : : : ; ; ; ; ; ; ; ; : : : : : :                            Q P P P P P P P P P P P P P P P  BBBBBBBBBB$$$$$$999999>>>>>>>>>>>>$$$$$$DDDDDDDDDD222222------	B$hy&9%:N%JK23sEL()J F~//*EEg!!= = = =
        >/ / / / /	 / / /(    C    Td d d d d= d d dN* * * * * * * * # # # # # # # #                 ! ! ! ! ! ! ! ! < < < < < < < < lIw8H,VW,. . . . .5" 5" 5" 5" 5" 5" 5" 5"p :<  ; ; ; ;DF  F F F FDF  F F F F8 @D>B:>37B B B B B4
& 
& 
& 
& 
&T 
& 
& 
&3 3 3 3$3 3 3 3U U U U
 @E    V V V V    ` ` ` `FU U U U; ; ; ;	, 	, 	, 	, 
NEK	M5;>  JMMJKKKKLLL
  >     ,' ' ' 'T: : : :2aB aB aB aB aB aB aB aBH= = = = =N = = =$S1 S1 S1 S1 S1 S1 S1 S1l "rz";2=QQQ    D; D; D; D; D;#%5z# D; D; D;N - - - - - - - -  6:T `6 `6 `6M;H--   `6 m;H//-- 	   `6 m;H//-- 	   `6$ m;H//-- 	   %`60 m;H//-- 	   1`6< M;H**))	   =`6L -;H((00	   M`6X 	;H6666>>	 	 	 	Y`6h =;H11   i`6r M;H22   s`6| ];H11   }`6F m;H22    G`6P };H%%88$$   Q`6^ 	;H&&%%		 	 	 	_`6j };H%%88	   k`6v 	;H&&	 	 	 	w`6@ ];H++   A`6L %};H8888)	   M`6X %};H8888)	   Y`6d %};H88)   e`6n %};H88)   o`6z 
;H''
 
 
 
{`6D -;H((   E`6N m;Hc c	   O`6^ ,m;H??0   _`6h ,m;H??0   i`6t &;H99*   u`6~ 
;H**
 
 
 
`6H )=;HDD-   I`6R )=;HDD-   S`6\ )=;HDD-   ]`6f )=;HDD-   g`6p (-;HCC,   q`6z $1=;HLL5$ $ $ ${`6D $1=;HLL5$ $ $ $E`6N $1=;HLL5$ $ $ $O`6X $1=;HLL5$ $ $ $Y`6b ';HBB+   c`6l (-;HCC,   m`6v (-;HCC,   w`6  ` ` ` `F	 	 	 	- - - - -# - - -"    >   *    Z   
 N N N N N N N N       
 *,,J J J J J J J JZ
#; #; #; #; #; #; #; #;L 555'+;TTT k5c!1223	5l l l l l'/=0
1 l l l^
< 
< 
< 
< 
< 
< 
< 
<\3 \3 \3 \3 \3Wgo. \3 \3 \3~            G" G" G" G" G" G" G" G"TU
 U
 U
 U
 U
~ U
 U
 U
 U
 U
rX   