
    `i                        d dl mZ d dlmZmZmZ d dlZd dlZd dlm	Z	m
Z
 d dlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZm Z  dd	l!m"Z" 	 	 	 	 d5dddddddee#         dee#         dee#         de#de$deee#                  dee%d                  ded         fdZ&	 	 	 d6dddddddee%d                  ded         de$fdZ'd Z( G d de          Z) G d de          Z* G d d e          Z+ G d! d"e          Z, G d# d$e          Z- G d% d&e          Z. G d' d(e          Z/ G d) d*e          Z0 G d+ d,e          Z1 G d- d.e          Z2 G d/ d0e          Z3 G d1 d2e          Z4 G d3 d4e          Z5dS )7    )Sequence)AnyOptionalUnionN)make_channels_last_strides_for
StrideType
OrderedSet   )ExternKernelAllocFixedLayoutFlexibleLayoutget_device_typeir_node_to_tensorIRNode is_contiguous_storage_and_layoutLayoutmay_convert_to_optionalMultiOutputMultiOutputLayoutMutationOutput
NoneLayoutShapeAsConstantBuffer	TensorBox)convert_shape_to_inductorpad_listlikeSUPPORTED_MKLDNN_DEVICES)VFxr   weightbiaspaddingstridedilationgroups
transposedoutput_paddingquantize_argsotherc                 
   d }dd}d }|                                  |                                  ||                                  t          j        j        5  t	          |d          }t	          |d          }t          |                                          dz
  }dt          |          cxk     r|k    sn J dt          |          cxk     r|k    sn J dt          |          cxk     r|k    sn J t          ||          }t          ||          }t          ||          }|	t          dg|          }	n.dt          |	          cxk     r|k    sn J t          |	|          }	t          |t          t          j        j        j        f          sJ |r2 |||          }|                                } |||||	|||          }nt          |j                  }t          |j                  }t          |          t          |          k    r=t          |          d	k    rt          |          d
k    sJ |                    d            ||||||          }dgt          t#          t%          dt          |          dz                                 z   }t          |          g|z   }ddd           n# 1 swxY w Y   |                     ||          }t)          d |D                        }|st+          |          dk    r$t-          |          rt/          j        |          }nUt+          |          dk    r3|                                d         dk    rt/          j        |          }nt5          |          }t+          |          t+          |          k    sJ t+          |          t6          v sJ |g}|
h|
\  }}}}|                                  |                                  |                                  |                                  |||gz   |gz   ||gz   }n||gz  }|3|                     ||          }t          |t8                    sJ ||gz  }t;          |                                |                                tA          |          tA          |                    }||||g} |r| !                    d|	           ||"                    |           n| !                    d|           || |||fS )a}  
    This function is a helper function to prepare inputs, layout and constant args
    for convolution post-op fusion's create function, including deciding the output
    layout (channels first or channels last), realizing inputs and make them etc. The
    function only supports the CPU/XPU device since conv post-op fusion kernel is only
    supported on CPU/XPU right now.
    c                 D   t          |           t          |          k    s
J d            t          |           }|dk    s
J d            d}d}	g }
|
                    | |                    |
                    ||	         |z             t          d|          D ]f}||         dz
  ||dz
           z  dz   }| |         dz
  ||dz
           z  ||dz
           dz  z
  |z   ||dz
           z   }|
                    |           gt          t	          t
          |
                    S )NzExpect input dim == weight dim   zExpect input dim > 2r   r   )lenappendrangelistmapint)output_sizeweight_sizer"   r'   r#   r$   r%   dim	BATCH_DIMWEIGHT_INPUT_CHANNELS_DIM
input_sizedkernelinput_size_ds                 m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torch/_inductor/mkldnn_ir.py_conv_input_sizez<_prepare_convolution_fusion_create.<locals>._conv_input_size7   sM    ;3{#3#33335U333+Qwww.www	$%!
+i0111+&?@6IJJJq# 	, 	,A!!nq(HQUO;a?FQ!#va!e}41q5>A%' !Q'(  l++++CZ(()))    Nc                 ~   |d u}t          |           }g }|                    | d                    |                    |d                    t          d|          D ]`}|r||dz
           nd}	|	||         dz
  z  dz   }
| |         d||dz
           z  z   |
z
  ||dz
           z  dz   }|                    |           a|S )Nr   r,   r   )r-   r.   r/   )r8   r4   r"   r#   r$   has_dilationr5   r3   r9   	dilation_r:   output_size_ds               r<   _conv_output_sizez=_prepare_convolution_fusion_create.<locals>._conv_output_sizeO   s    t+*oo:a=)));q>***q# 	. 	.A+7>QQI+a.1"459F']a'!a%..@AFJvAP M }----r>   c                    |                                  t                    }|dk    s
J d            |dk    rng }|                    d         |z             |                    d         |z             |                    fdt	          d|          D                        n(|                     dd                                           }|S )Nr,   zExpect weight dim > 2r   r   c              3   (   K   | ]}|         V  d S N ).0r9   prepacked_weight_sizes     r<   	<genexpr>z[_prepare_convolution_fusion_create.<locals>._original_deconv_weight_size.<locals>.<genexpr>m   s)      OOA4Q7OOOOOOr>   )sizer-   r.   extendr/   	transpose)prepacked_weightr%   r5   r4   rI   s       @r<   _original_deconv_weight_sizezH_prepare_convolution_fusion_create.<locals>._original_deconv_weight_sizeb   s     !1 5 5 7 7'((Qwww/wwwA::K4Q7&@AAA4Q7&@AAAOOOOq#OOOOOOO*44Q::??AAKr>   T)guard_shaper,   r         r   c              3   @   K   | ]}t          |t                    V  d S rF   )
isinstancer2   )rH   is     r<   rJ   z5_prepare_convolution_fusion_create.<locals>.<genexpr>   s,      GGAZ3//GGGGGGr>   xpurF   )#realizer   graph	fake_moder   r-   rK   r   rT   r2   sympycorenumbersIntegerr0   shapepopreversedr/   require_stride_orderallr   r   r   contiguous_strides
get_strider   r   r   r   get_device_or_error	get_dtyper   insertr.   )!clsr   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r=   rC   rO   x_fakeweight_fakedimsr4   r8   r3   x_shapeweight_shapereq_stride_orderdynamic_shapesoutput_strideinputsx_scalex_zero_pointw_scalew_zero_pointkernel_layoutconstant_argss!                                    r<   "_prepare_convolution_fusion_createrx       s   .* * *0   &    IIKKK
NN	
	 /F /F"1$777'DAAA6;;==!!A%3w<<''''4''''''3x==((((D((((((3v;;&&&&$&&&&&&w--$//fd++!)1#t44NNs>**2222d222222).$??N&3
(:(B"CDDDDD 	 76{FKKKJ** KK 6<((G 122L7||s<00007||q((S->->!-C-C-CC  ###++ K 3huQFa/H/H&I&I!J!JJ 01125EE_/F /F /F /F /F /F /F /F /F /F /F /F /F /F /Fb 	  $455A GG+GGGGGGND)!,,55
*1
-
- 6$2$Ek$R$R 
		u	$	$):a)?)?&9+FF6{CC1!8!888881!99999SF 7D4w7L11VH<?VV6(((0@AA%+++++5'		!+..!-00	 M fh7M 0Q///dQ%%%=-1A5HHs   IJ??KK
binary_sumc           
      X   |                                  |                                  ||                                  |                                ^ }}|                                \  }}	t          |          |	gz   }
t          t          t	          t          |                                                                        }|                     ||          }t          |          t          |          k    sJ t          |          t          v sJ |g}|h|\  }}}}|                                  |                                  |                                  |                                  |||gz   |gz   ||gz   }n||gz  }||r|                     ||          }||gz   }t          j
        |
          }t          |                                |                                |
|          }g }||                    |           n|                    d|           |||||fS )z
    This function is a helper function to prepare inputs, layout and constant args
    for linear post-op fusion's create function. The function only supports the CPU device
    since linear post-op fusion kernel is only supported on CPU right now.
    Nr   )rW   get_sizer0   r`   r/   r-   ra   r   r   r   rc   r   
get_devicerf   r.   rg   )rh   r   r    r!   r(   r)   ry   m_ocr3   rn   rq   rr   rs   rt   ru   rp   rv   rw   s                       r<   _prepare_linear_fusion_creater      s-    IIKKK
NNJJLLEQ OOEArq''RD.KHU3qzz||+<+<%=%=>>??  $455A1!8!888881!99999SF 7D4w7L11VH<?VV6( 	F,,U4DEEE5'!"5kBBM			 M  "MdQ%%%=-1A5HHr>   c                     t          |                                 | g           }t          |                                           | _        |g| _        |S )Ndevice)r   
get_layoutr   r|   layoutoutputs)packed	output_irs     r<   _create_output_noder   #  sU    
 I
 &V->->-@-@AAAFM[FNr>   c                        e Zd Z	 d	 d fdZ fdZeddddd	dd
ee         dee         dee         dedeee	                  fd            Z
 xZS )ConvolutionUnaryrG   returnNc           
          t          |d                   | _        t                                          |||d t          j        j        j        j        d| j         d           d S )Nr   aoti_torch__mkldnn__convolution_pointwiseop_overloadcpp_kernel_name)	r   device_typesuper__init__torchopsmkldnn_convolution_pointwisedefaultselfr   rq   rw   	__class__s       r<   r   zConvolutionUnary.__init__/  so     +6!955	(?GZ$*:ZZZ 	 	
 	
 	
 	
 	
r>   c                     |                     d| j         d           t                                          |           d S Nz&torch/csrc/inductor/aoti_torch/c/shim_z.hinclude_extra_headerr   r   codegenr   wrapperr   s     r<   r   zConvolutionUnary.codegen?  J    $$IT5EIII	
 	
 	
 	     r>   r   r   r    r!   padding_stride_rA   r%   scalarsc           
          t          | |||||||          \  }}}}}||t          |	          |
gz   }t          |||          }t          |          S )Nr   rq   rw   )rx   r   r   r   )rh   r   r    r!   r   r   rA   r%   attrr   	algorithmrq   rw   rv   r~   r   s                   r<   createzConvolutionUnary.createE  s    ( /FD(GY
 
	
 &#G,,)
 

 " '
 
 

 #6***r>   rG   r   N__name__
__module____qualname__r   r   classmethodr0   r2   r   r   r   __classcell__r   s   @r<   r   r   .  s        
 	

 

 
 
 
 
 
 ! ! ! ! !  + +  + 	 +
 s) + c + 9 +  + $s)$ +  +  + [ +  +  +  +  +r>   r   c                        e Zd Z	 	 d	 d fdZ fdZeddddd	dd
ddee         dee         dee         dedede	e
         de	e         de	ee                  de	e         fd            Z xZS )ConvolutionBinaryrG   r   Nc           
          t          |d                   | _        t                                          |||d t          j        j        j        j        d| j         d           || _	        d S )Nr   r   %_mkldnn__convolution_pointwise_binaryr   )
r   r   r   r   r   r   r   r   binarycpp_constant_args)r   r   rq   rw   r   r   s        r<   r   zConvolutionBinary.__init__j  su     +6!955	(?Fa$*:aaa 	 	
 	
 	
 "3r>   c                     |                     d| j         d           t                                          |           d S r   r   r   s     r<   r   zConvolutionBinary.codegen|  r   r>   r   r   r)   r    r!   r   r   rA   r%   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmc           
         t          | |||||||          \  }}}}}|                     ||          }|                    d|           ||	|
|t          |          |gz   }t	          |||          }t          |          S )Nr   r   )rx   ra   rg   r   r   r   )rh   r   r)   r    r!   r   r   rA   r%   r   r   r   r   r   rq   rw   rv   rn   r~   r   s                       r<   r   zConvolutionBinary.create  s    . /FD(GY
 
	
 ((0@AAa%#M22)
 
 # '
 
 

 #6***r>   )rG   rG   r   )r   r   r   r   r   r   r0   r2   strr   floatr   r   r   r   s   @r<   r   r   i  s)       
 3 
3 3 3 3 3 3$! ! ! ! ! '+'+ '+ 	'+
 '+ s)'+ c'+ 9'+ '+ '+ uo'+ SM'+  S	*'+ "#'+ '+ '+ ['+ '+ '+ '+ '+r>   r   c                        e Zd Z	 d	 d fdZ fdZdeej                 fdZe	ddd	dd
dddde
e         de
e         de
e         dededee         dee         dee
e                  dee         fd            Z xZS )ConvolutionBinaryInplacerG   r   Nc           
         t          |d                   | _        |d         |d         g|dd          z   }t                                          |||d t          j        j        j        j        d| j         d           t          t          |d                                                   |d         |           t          t          |d                                                   |d         |           g| _        d S )Nr   r   r,   r   &_mkldnn__convolution_pointwise_binary_r   r   )r   r   r   r   r   r   r   _convolution_pointwise_r   r   r   r|   mutation_outputs)r   rv   rq   rw   reordered_inputsr   s        r<   r   z!ConvolutionBinaryInplace.__init__  s     +6!955"1Ivay1F122J>	(@Gb$*:bbb 	 	
 	
 	
 :VAY-A-A-C-CDDDfQiQUVV:VAY-A-A-C-CDDDfQiQUVV!
r>   c                     |                     d| j         d           t                                          |           d S r   r   r   s     r<   r   z ConvolutionBinaryInplace.codegen  r   r>   c                     t                      S rF   r	   r   s    r<   get_unbacked_symbol_defsz1ConvolutionBinaryInplace.get_unbacked_symbol_defs      ||r>   r   r   r)   r    r!   r   r   rA   r%   r   r   r   r   r   c           
      J   t          | |||||||          \  }}}}}|                     ||          }|                    d|           ||	|
|t          |          |gz   }t	          t          |d                                                   ||          }|j        d         S )Nr   r   )rv   rq   rw   r   )rx   ra   rg   r   r   r   r|   rq   )rh   r   r)   r    r!   r   r   rA   r%   r   r   r   r   r   rq   rw   r~   rn   r   s                      r<   r   zConvolutionBinaryInplace.create  s    . /FD(GY
 
	
 ((0@AAa%#M22)
 
 *$F1I,@,@,B,BCCC'
 
 
 }Qr>   r   r   )r   r   r   r   r   r
   rZ   Symbolr   r   r0   r2   r   r   r   r   r   r   r   s   @r<   r   r     sH       
 	

 

 
 
 
 
 
0! ! ! ! !*U\*B     * *  *  	* 
 *  s)*  c*  9*  *  *  uo*  SM*   S	**  "#*  *  *  [*  *  *  *  * r>   r   c                        e Zd Z	 d	 d fdZ fdZeddddd	dd
ee         dee         dee         dee         dedeee	                  fd            Z
 xZS )ConvolutionTransposeUnaryrG   r   Nc           
          t          |d                   | _        t                                          |||d t          j        j        j        j        d| j         d           d S )Nr   r   (_mkldnn__convolution_transpose_pointwiser   )	r   r   r   r   r   r   r    _convolution_transpose_pointwiser   r   s       r<   r   z"ConvolutionTransposeUnary.__init__  so     +6!955	(IQd$*:ddd 	 	
 	
 	
 	
 	
r>   c                     |                     d| j         d           t                                          |           d S r   r   r   s     r<   r   z!ConvolutionTransposeUnary.codegen  r   r>   r   r   r    r!   r   output_padding_r   rA   groups_r   c                     d}t          | |||||||||
  
        \  }}}}}||	t          |
          |gz   }t          |||          }t          |          S )NTr   )rx   r   r   r   )rh   r   r    r!   r   r   r   rA   r   r   r   r   r&   rq   rw   rv   r~   r   s                     r<   r   z ConvolutionTransposeUnary.create  s     
 /
 
	
 &#G,,)
 

 + '
 
 

 #6***r>   r   r   r   r   s   @r<   r   r     s        
 	

 

 
 
 
 
 
 ! ! ! ! ! ++++ ++ 	++
 s)++ c++ c++ 9++ ++ $s)$++ ++ ++ [++ ++ ++ ++ ++r>   r   c                        e Zd Z	 d	 d fdZ fdZeddded	         d
ed	         dddddddddee         dee         dee         dede	defd            Z
 xZS )QConvPointWisePT2ErG   r   Nc           
          t          |d                   | _        t          |          dk    | _        t	                                          |||dt          j        j        j	        j
        d| j         d           dS )a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp]
            - const_args is: [stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp]
            - const_args is: [bias, stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        r      Nr   __qconv_pointwise_tensorr   )r   r   r-   has_biasr   r   r   r   onednnqconv_pointwiser   r   s       r<   r   zQConvPointWisePT2E.__init__D  s      +6!955Fq(	(8@T$*:TTT 	 	
 	
 	
 	
 	
r>   c                     |                     d| j         d           t                                          |           t	          | j        t                    r|                     |           d S d S r   r   r   r   r   rT   r   r   codegen_size_assertsr   s     r<   r   zQConvPointWisePT2E.codegen_  |    $$IT5EIII	
 	
 	
 	   dk6** 	/%%g.....	/ 	/r>   qxr   rr   )r   r   rs   qwrt   ru   r!   r#   r"   r$   r%   output_scaleoutput_zero_pointc                 P   d}d }t          | ||||	||
|||||||g          \  }}}}}||d         |d         c|d<   |d<   n|d         |d         c|d<   |d<   |||||t          |          |gz   }|J |t          j        t          j        fv r||_        t          |||          S )NFr,   r   r   r   )rx   r   r   float32bfloat16dtyper   )rh   r   rr   rs   r   rt   ru   r!   r#   r"   r$   r%   r   r   output_dtyper   r   r   r&   r'   rq   rw   rv   r~   s                           r<   r   zQConvPointWisePT2E.createg  s   * 
 /lG\:
 
	
 <1>q1A=QRCS.M!mA..1>q1A=QRCS.M!mA.%#G,,)
 
 '''EM5>::: #/M! '
 
 
 	
r>   r   r   )r   r   r   r   r   r   r   r0   r2   r   r   r   r   s   @r<   r   r   C  s2       
 	

 

 
 
 
 
 
6/ / / / / B
B
 ;<B
 @A	B

 B
 B
 "B
 B
 S	B
 cB
 s)B
 B
 B
 B
 B
 B
 [B
 B
 B
 B
 B
r>   r   c                        e Zd Z	 d	 d fdZ fdZdee         fdZdee	j
                 fdZedd	d
d	dd	dd	dd	dd	dee         dee         dee         dedd	dd	fd            Z xZS )QConvPointWiseBinaryPT2ErG   r   Nc           
         t          |d                   | _        t          |          dk    | _        d| _        t                                          |||dt          j        j	        j
        j        d| j         d           dS )ag  
        Needs input/weight/output qparams
        if bias is not None
            - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum, b]
            - const_args = [stride, padding, dilation, groups, o_scale, o_zp,
            output_dtype, accum_scale, accum_zp, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum]
            - const_args [b, stride, padding, dilation, groups, o_scale, o_zp,
             output_dtype, accum_scale, accum_zp, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        r         Nr   !__qconv2d_pointwise_binary_tensorr   )r   r   r-   r   idx_for_inplace_sumr   r   r   r   r   qconv2d_pointwiser   r   s       r<   r   z!QConvPointWiseBinaryPT2E.__init__  s    " +6!955Fq(#$ 	(:AQd.QQQ 	 		
 		
 		
 		
 		
r>   c                     |                     d| j         d           t                                          |           t	          | j        t                    r|                     |           d S d S r   r   r   s     r<   r   z QConvPointWiseBinaryPT2E.codegen  r   r>   c                 8    |                      | j                  gS rF   )
input_namer   r   s    r<   get_mutation_namesz+QConvPointWiseBinaryPT2E.get_mutation_names  s     899::r>   c                     t                      S rF   r	   r   s    r<   r   z1QConvPointWiseBinaryPT2E.get_unbacked_symbol_defs  r   r>   r   r   rr   rs   r   qaccumr!   r#   r"   r$   r%   r   r   c                    d}d }t          | ||||
|	||||||||g|          \  }}}}}||d         |d         c|d<   |d<   n|d         |d         c|d<   |d<   |||||||||t          |          |g
z   }|dk    s
J d            t          j                            |                                           t          t          |                                          ||          }|j	        |j
                 S )	NFr,   r   r   sumzCFor now, only post op sum is supported in QConvPointWiseBinaryPT2E.r   r   )rx   r   r   rX   mark_buffer_mutatedget_namer   r   r|   rq   r   )rh   r   rr   rs   r   rt   ru   r   r!   r#   r"   r$   r%   r   r   r   accum_scaleaccum_zero_pointr   alphar   r   r   r&   r'   rq   rw   _kernel_layoutrn   r   s                                 r<   r   zQConvPointWiseBinaryPT2E.create  se   4 
 /lG\:
 
	
" <1>q1A=QRCS.M!mA..1>q1A=QRCS.M!mA.%#M22)
 
 e###Q $## 	
##FOO$5$5666)V%6%6%8%8999'
 
 
 }V788r>   r   r   )r   r   r   r   r   r   r   r   r
   rZ   r   r   r   r0   r2   r   r   r   s   @r<   r   r     s\       
 	

 

 
 
 
 
 
>/ / / / /;HSM ; ; ; ;*U\*B     O9O9 O9 "	O9
 O9 O9 O9 S	O9 cO9 s)O9 O9 "O9 'O9 O9 O9 [O9 O9 O9 O9 O9r>   r   c                   F     e Zd Z	 d	 d fdZ fdZed             Z xZS )	MKLPackedLinearrG   r   Nc                     t                                          |||d t          j        j        j        j                   d S N)r   )r   r   r   r   mkl_mkl_linearr   r   s       r<   r   zMKLPackedLinear.__init__/  sI     		19 	 	
 	
 	
 	
 	
r>   c                 t    |                     d           t                                          |           d S Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hr   r   r   r   s     r<   r   zMKLPackedLinear.codegen=  s4    $$%RSSS     r>   c                 2   |                      |                     |                    }|                      |                     |                    }|                                ^ }}|                                \  }}t          |          |gz   }	t	          j        |	          }
|||g}|g}|||gz  }n|                    dd            |                                }|J t          t          ||
                                |	|
          ||          S )Nr   r   )require_stride1realize_inputr{   r0   r   rc   rg   r|   r	  r   rf   )rh   r   packed_worig_wB
batch_sizer}   r~   r   r3   rp   rq   rw   r   s                 r<   r   zMKLPackedLinear.createA  s    1 1! 4 455$$S%6%6v%>%>??

A!!A1ggn&9+FFXv&#=qcMFF  D)))!!!vq{{}}k=QQ'
 
 
 	
r>   r   r   r   r   r   r   r   r   r   r   r   s   @r<   r	  r	  .  s        
 	

 

 
 
 
 
 
! ! ! ! ! 
 
 [
 
 
 
 
r>   r	  c                   L     e Zd Z	 d	 d	 fdZ fdZed             Zd Z xZS )
LinearUnaryrG   r   Nc           
          t          |d                   | _        t                                          |||d t          j        j        j        j        d| j         d           d S )Nr   r   __linear_pointwiser   )	r   r   r   r   r   r   r   _linear_pointwiser   r   s       r<   r   zLinearUnary.__init__Z  so     +6!955	(:BN$*:NNN 	 	
 	
 	
 	
 	
r>   c                     |                     d| j         d           t                                          |           d S r   r   r   s     r<   r   zLinearUnary.codegenj  r   r>   c                    |                      |                     |                    }|                      |                     |                    }|                                ^ }}|                                \  }	}t          |          |	gz   }
||g}||r|ndg|g}|>|                      |                     |                    }|                    |           n|                    dd            |                                }|J t          t          ||	                                |
          ||          }t          |          S )Nr   r   r   rK   r   )require_contiguousr  r{   r0   r.   rg   r|   r  r   rf   r   )rh   r   wr  r   r   r   r}   _icr   r3   rq   rw   r   r   s                  r<   r   zLinearUnary.createp  sP   ""3#4#4Q#7#788""3#4#4Q#7#788**,,C**,,C1ggnQ';wwtYG=&&s'8'8';';<<AMM!  D)))!!!kkmm   
 '
 
 
 #6***r>   c                     d S rF   rG   r   s    r<   apply_constraintzLinearUnary.apply_constraint      r>   r   r   )	r   r   r   r   r   r   r   r&  r   r   s   @r<   r  r  Y  s        
 	

 

 
 
 
 
 
 ! ! ! ! ! + + [+:      r>   r  c                   P     e Zd ZdZ	 d		 d
 fdZ fdZed             Zd Z xZ	S )LinearBinaryz)torch.ops.mkldnn._linear_pointwise.binaryrG   r   Nc           
          t          |d                   | _        t                                          |||d t          j        j        j        j        d| j         d           d S )Nr   r   __linear_pointwise_binaryr   )	r   r   r   r   r   r   r   r  r   r   s       r<   r   zLinearBinary.__init__  so     +6!955	(:AU$*:UUU 	 	
 	
 	
 	
 	
r>   c                     |                     d| j         d           t                                          |           d S r   r   r   s     r<   r   zLinearBinary.codegen  r   r>   c                    |                      |                     |                    }|                      |                     |                    }|                      |                     |                    }|                                ^ }}|                                \  }}t          |          |gz   }	|||g}
|g}|>|                      |                     |                    }|
                    |           n|                    d|           |                                }|J t          t          ||	                                |	          |
|          }t          |          S )Nr   r!  r   )r"  r  r{   r0   r.   rg   r|   r)  r   rf   r   )rh   r   yr#  r  r   r}   r$  r   r3   rq   rw   r   r   s                 r<   r   zLinearBinary.create  sc   ""3#4#4Q#7#788""3#4#4Q#7#788""3#4#4Q#7#788**,,C**,,C1ggnQ=&&s'8'8';';<<AMM!  A&&&!!!kkmm   
 '
 
 
 #6***r>   c                     d S rF   rG   r   s    r<   r&  zLinearBinary.apply_constraint  r'  r>   r   r   )
r   r   r   r:   r   r   r   r   r&  r   r   s   @r<   r)  r)    s        8F 	

 

 
 
 
 
 
 ! ! ! ! ! + + [+:      r>   r)  c                   n     e Zd Z	 	 d	 d fdZ fdZeddd	dd
ddddddddddedefd            Z xZ	S )QLinearPointwisePT2ErG   Tr   Nc           
          t          |d                   | _        || _        t                                          |||dt
          j        j        j        j	        d| j         d           dS )a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp]
            - const_args is: [x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp]
            - const_args is: [bias, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        r   Nr   __qlinear_pointwise_tensorr   )
r   r   r   r   r   r   r   r   qlinear_pointwisetensorr   r   rq   rw   r   r   s        r<   r   zQLinearPointwisePT2E.__init__  sv    " +6!955 );BJd.JJJ 	 		
 		
 		
 		
 		
r>   c                     |                     d| j         d           t                                          |           t	          | j        t                    r|                     |           d S d S r   r   r   s     r<   r   zQLinearPointwisePT2E.codegen  s|    $$IT5EIII	
 	
 	
 	   dk6** 	/%%g.....	/ 	/r>   r   r   rr   rs   r   rt   ru   r!   r   r   c           
          t          | |||||||g          \  }}}}}|||	|
|t          |          |gz   }|
J |
t          j        t          j        fv r|
|_        t          ||||d u          S )Nr   rq   rw   r   )r   r   r   r   r   r   r1  )rh   r   rr   rs   r   rt   ru   r!   r   r   r   post_op_namepost_op_argspost_op_algorithmrq   rw   rv   r~   s                     r<   r   zQLinearPointwisePT2E.create  s    " 8UlG\:8
 8
4q! &#L11)
 
 '''EM5>::: #/M# '$&	
 
 
 	
r>   rG   Tr   )
r   r   r   r   r   r   r   r2   r   r   r   s   @r<   r1  r1    s        
 
 

 
 
 
 
 
</ / / / / ,
,
 ,
 "	,

 ,
 ,
 ",
 ,
 ,
 ,
 ,
 ,
 [,
 ,
 ,
 ,
 ,
r>   r1  c                        e Zd Z	 	 d	 d fdZ fdZdee         fdZedd	d
d	dd	dd	dd	dd	dd	dd	de	de
fd            Z xZS )QLinearPointwiseBinaryPT2ErG   Tr   Nc           
          t          |d                   | _        || _        d| _        t	                                          |||dt          j        j        j	        j
        d| j         d           dS )a  
        if bias is not None
            - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2, bias]
            - const_args is: [o_scale, o_zp,
              fp32_output, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2]
            - const_args is: [bias, o_scale, o_zp,
              fp32_output, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        r   r   Nr   !__qlinear_pointwise_binary_tensorr   )r   r   r   r   r   r   r   r   r   r4  binary_tensorr6  s        r<   r   z#QLinearPointwiseBinaryPT2E.__init__&  s~    " +6!955 #$ );I]$*:]]] 	 	
 	
 	
 	
 	
r>   c                     |                     d| j         d           t                                          |           t	          | j        t                    r|                     |           d S d S r   r   r   s     r<   r   z"QLinearPointwiseBinaryPT2E.codegenC  r   r>   c                     | j         d         }|dk    r>| j        | j                 }t          |t                    sJ |                                gS g S )Nr  )rw   rq   r   rT   r   r  )r   binary_post_opinputs      r<   r   z-QLinearPointwiseBinaryPT2E.get_mutation_namesK  sW    +B/U""K 89EeV,,,,,NN$$%%Ir>   r   r   rr   rs   r   rt   ru   r)   r!   r   r   c                    t          | |||||||g||dk              \  }}}}}||	|
||||||t          |          |g
z   }|dk    rxt          j                            |                                           t          t          |                                          |||d u          }|j	        |j
                 S |J |t          j        t          j        fv r||_        t          ||||d u          S )Nr  r   r9  )r   r   r   rX   r  r  r?  r   r|   rq   r   r   r   r   r   )rh   r   rr   rs   r   rt   ru   r)   r!   r   r   r   other_scaleother_zprF  r   unary_post_opunary_post_op_argsunary_post_op_algorithmrq   rw   rv   rn   r   s                           r<   r   z!QLinearPointwiseBinaryPT2E.createT  sM   8 *lG\:e#
 
	
 &#$677#)
 
 U""G''(8(8999/!)9)9););<<<+d*	  F =!;<<'''EM5>::: #/M) '$&	
 
 
 	
r>   r=  r   )r   r   r   r   r   r   r   r   r   r   r2   r   r   r   s   @r<   r?  r?  %  s       
 
 

 
 
 
 
 
:/ / / / /HSM     H
H
 H
 "	H

 H
 H
 "H
 H
 H
 H
 H
 H
 H
 [H
 H
 H
 H
 H
r>   r?  c            !            e Zd Z	 d	 d fdZeddddddd	dd
ddddddedee         dededededededef d            Z fdZ	 xZ
S )MkldnnRnnLayerrG   r   Nc                     t                                          |||d t          j        j        j        j                   d S r  )r   r   r   r   atenmkldnn_rnn_layerr   r   s       r<   r   zMkldnnRnnLayer.__init__  sI     		7? 	 	
 	
 	
 	
 	
r>   r   r   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc           	         |                      |                                                                          |                      |                     |                    }|                      |                     |                    }|                      |                     |                    }|                      |                     |                    }|                      |                     |                    }|                                 |                      |                     |                    }|                                                                 }t	          |          dk    s
J d            |\  }}}|||g}|                                }|                                }||||||g}||	|
||||||g	}                                }|J t          t          |          ||          d }|||dgg} |||          t          j	        |          t          j	        |          dgg}fdt          t          ||                    D             }|_        |S )NrQ   zExpect lstm input to be 3Dr   )rq   rw   c                 d    t          |           dk    s
J d            t          j        |           S )NrQ   zExpect output_shape to be 3D)r-   r   rc   )output_shaper`  s     r<   get_strides_of_lstm_outputz9MkldnnRnnLayer.create.<locals>.get_strides_of_lstm_output  s5    |$$)))+I)))!4\BBBr>   r   c           
          g | ]U\  }\  }}t          t                                                                          ||          t          |fg          VS rG   )r   r   r|   rf   tuple)rH   rU   r3   rp   r   r   s       r<   
<listcomp>z)MkldnnRnnLayer.create.<locals>.<listcomp>  su     
 
 
 0/K LLNNKKMM!	  	 	
 
 
r>   )r  r  freeze_layoutr{   r-   r|   rO  r   r   rc   	enumeratezipr   )rh   r   rS  rT  rU  rV  rW  rX  rY  rZ  r[  r\  r]  r^  r_  r`  ra  r8   
seq_length
mini_batchrd  hy_shapecy_shaperq   rw   r   re  output_sizesoutput_stridesr   r   s    `                            @r<   r   zMkldnnRnnLayer.create  s   (  1 1! 4 455 	
  !2!22!6!677  !2!22!6!677  !2!22!6!677  !2!22!6!677  !2!22!6!677
  !2!22!6!677
ZZ\\
:!###%A### .8*
J
"J<;;==;;==RRR,

 !!!V,,,'
 
 
	C 	C 	C %h1#>&&|[AA-h77-h77C	

 
 
 
 
 4=L.114 4
 
 
	 #r>   c                 p    |                     d           t                                          |          S r  r  r   s     r<   r   zMkldnnRnnLayer.codegen  s.    $$%RSSSwww'''r>   r   r   )r   r   r   r   r   boolr0   r2   r   r   r   r   s   @r<   rO  rO    s<       
 	

 

 
 
 
 
 
 ]] ] 	]
 ] ] ] ] ] #Y] ] ] ] ] ]  !]" #] ] ] []~( ( ( ( ( ( ( ( (r>   rO  c                   X     e Zd Z	 d	 d fdZ fdZe	 	 	 	 	 	 	 	 dd            Z xZS )WeightInt4PackMatmulrG   r   Nc                     t          |          dk    sJ t          |          dk    sJ t                                          |||dt          j        j        j        j        d           dS )zY
        inputs = [x, w, qGroupSize, qScalesAndZeros]
        constant_args = ()
        rR   r   N-aoti_torch_cpu__weight_int4pack_mm_cpu_tensorr   )r-   r   r   r   r   	quantizedint4mm_packed_weight_cpur   r   s       r<   r   zWeightInt4PackMatmul.__init__  s}     6{{a=!!Q&&&&,EML 	 	
 	
 	
 	
 	
r>   c                     |                     d           t                                          |           t          | j        t
                    r|                     |           d S d S r  )r   r   r   rT   r   r   r   r   s     r<   r   zWeightInt4PackMatmul.codegen+  sf    $$%RSSS   dk6** 	/%%g.....	/ 	/r>   r   r   r#  
qGroupSizeqScalesAndZerosc                 F   ||||g}|                                 ^ }}|                                 \  }}t          |          |gz   }	t          j        |	          }
t	          |                                |                                |	|
          }t          ||          S )N)r   rq   )r{   r0   r   rc   r   r|   rf   ru  )rh   r   r#  r{  r|  rq   r}   r~   nr3   rp   rv   s               r<   r   zWeightInt4PackMatmul.create2  s     Q
O4

Azz||11ggm&9+FF#LLNNKKMM	
 
 $ 
 
 
 	
r>   r   r   )r   r   r#  r   r{  r   r|  r   r  r   s   @r<   ru  ru    s        
 	

 

 
 
 
 
 
*/ / / / / 

 
  	

 %
 
 
 [
 
 
 
 
r>   ru  )FNNN)NNF)6collections.abcr   typingr   r   r   rZ   r   torch._prims_commonr   r   torch.utils._ordered_setr
   irr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   r   virtualizedr   r2   rs  r0   rx   r   r   r   r   r   r   r   r   r	  r  r)  r1  r?  rO  ru  rG   r>   r<   <module>r     s3   $ $ $ $ $ $ ' ' ' ' ' ' ' ' ' '   J J J J J J J J / / / / / /                                 " U T T T T T T T T T       .215#'AI AIAI AI 	AI
 c]AI SMAI smAI AI AI Xc]+AI D-.AI K AI AI AI AIR 26#'<I <I<I <I 	<I
 D-.<I K <I <I <I <I <I~  8+ 8+ 8+ 8+ 8+( 8+ 8+ 8+vA+ A+ A+ A+ A+) A+ A+ A+HM  M  M  M  M 0 M  M  M `C+ C+ C+ C+ C+ 1 C+ C+ C+Lg
 g
 g
 g
 g
* g
 g
 g
T~9 ~9 ~9 ~9 ~90 ~9 ~9 ~9B(
 (
 (
 (
 (
' (
 (
 (
V6 6 6 6 6# 6 6 6r8 8 8 8 8$ 8 8 8vU
 U
 U
 U
 U
, U
 U
 U
px
 x
 x
 x
 x
!2 x
 x
 x
vq( q( q( q( q(& q( q( q(j3
 3
 3
 3
 3
, 3
 3
 3
 3
 3
r>   