
    `io                     :   d dl Z d dlmZmZ d dlZd dlmc mZ d dl	m
Z
 ddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZmZmZ ddlmZmZm Z  ddlm!Z!m"Z" ddl#m$Z$m%Z%m&Z& dej'        dej        dej        dej        dej        de(e)eej        ej*        f         eeej        ej*        f                  f         fdZ+de)de%de%dee%         dee%         dee%         dee%         de%fdZ,	 	 	 	 d#dede-e         de-e         fd Z.d!e._/        d" Z0dS )$    N)OptionalUnion)mm_args   )configir)CppGemmTemplate)CppGroupedGemmTemplatecreate_epilogue_with_attr)	TensorBox)addadd_needs_realized_inputsatenpermuteregister_loweringto_dtypeview)autotune_select_algorithmChoiceCallerExternKernelChoice)use_aten_gemm_kernelsuse_cpp_gemm_template)opsOpsValueVW_tensorpacked_weightx_scalex_zpw_scalereturnc                 h   d }t          d |||fD                       }|r$t          j        j        |                                         t          j        j        |                                         z  }t          j                            ||                                dz             }t          j        |                     t          j	                  d          }t          j        j        |                                         }	||z  |	z  }t          j                            ||                                dz             }
nit          j        |                     t          j	                  d          }t          j                            ||                                dz             }
||
|fS )Nc              3     K   | ]|}t          |t          j                  o]|                                t          j        j        v o8t          |j        d           o#t          |j        j        t          j	                  V  }dS )dataN)

isinstancer   r   get_namer   graph	constantshasattrr%   ConstantBuffer).0items     t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torch/_inductor/mkldnn_lowerings.py	<genexpr>z+create_int8_compensation.<locals>.<genexpr>,   s       * *
 	 	4&& 	:MMOOqw00	:DIv&&	: ty~r'899	* * * * * *    _x_w_compensnamer   )dim_BMatrixCompens)
allr   r(   r)   r'   add_tensor_constanttorchsumtofloat)r   r   r   r    r!   	x_w_scaleuse_int8_fast_compensation_pathx_w_scale_tensorweight_compens_tensorx_zp_tensorweight_compenss              r.   create_int8_compensationrB       s    JNI&) * *
 dG,* * * ' '# ' 
Gg..001g 0 0 2 234 	 G//''))N: 0 
 
	 !&	(++ek*B*B J J Jg'8 58H H; V44!'')),== 5 
 

 !&	(++ek*B*B J J J44!'')),== 5 
 

 	( r0   r=   input_weight_compo_x_scale_x_zp_w_scale
_x_w_scalec                 H   | r)t          j        t          j        ||          |          }nvt          j        t          j        ||          |          }t          j        |t          j        t          j        t          j        ||          |          |                    }|S N)r   submul)r=   rC   rD   rE   rF   rG   rH   temps           r.   'codegen_int8_gemm_template_compensationrN   P   s     ' $
wG  
 
 wG  
 
 wGG      	 	
 
 Kr0   xwbc                 H   |                                  }t          |          dk    rt          | d|d         g          } t          |          }t          j        st          j        sJ d |D             }g }	t          | t          |d         ddg                    ^ }
} }
d |D             dd t          	                    t          |          |           d	}| g|}|                    d
 |D                        t          j        |	|fi | t          |	          dk    sJ t          d|	|          }|j        j        fdt          |          D             t!          j        |d                                                   _        _        fdt          |          D             }t          |          dk    rVt          |          D ]F}t          ||         g |d d         ||                                          d         R           ||<   G|S )N   c                 T    g | ]%}||nt           j                            |          &S rJ   )r   ExternKernelrealize_inputr,   biass     r.   
<listcomp>z)grouped_gemm_lowering.<locals>.<listcomp>   s0    UUU42?#@#@#F#FUUUr0   r   r   layoutc                     g | ]}|d uS rJ    rX   s     r.   rZ   z)grouped_gemm_lowering.<locals>.<listcomp>   s    444$T%444r0   T)has_biastrans_wepilogue_creatoract_mappingc                     g | ]}||S rJ   r^   rX   s     r.   rZ   z)grouped_gemm_lowering.<locals>.<listcomp>   s    ???d.>.>.>.>r0   grouped_gemmc                 L    g | ] }t          j        t          |fg          !S r^   )r   MultiOutputlist)r,   gemm_idxr\   template_bufs     r.   rZ   z)grouped_gemm_lowering.<locals>.<listcomp>   s?        	v|tX.>-?@@  r0   )devicec                 Z    g | ]'}t           j                            |                   (S r^   )r   r   create)r,   rh   return_bufss     r.   rZ   z)grouped_gemm_lowering.<locals>.<listcomp>   s;       7?K122  r0   )get_sizelenr   r   max_autotunemax_autotune_gemmr   r   dictfromkeysrangeextendr
   add_choicesr   r%   r   MultiOutputLayout
get_devicer\   outputs)rO   rP   rQ   attrscalars	algorithmr\   x_sizenum_gemmchoices_kwargsinput_nodesresultreturn_tensorsrh   rm   ri   s         `         @@r.   grouped_gemm_loweringr      s    ZZ\\F
6{{QR$%%1vvH:&"::::UUSTUUUA"$Gq'!A$A"7"7GGGQ1 54!444 }}U8__a88	 F 'q'K?????@@@&  	   w<<1&	 F ;#L    h  K .k!n6O6O6Q6QRRRL&L   CH??  N 6{{Qh 	 	H'+x(G&"+G~h7@@BB2FGG( (N8$$ r0   Tc            !         t           j        j        rUddlm t          t           j        j        j        ddj	        j
                  t          t           j        j        j        j        ddj        j
                  t          t           j        j        j        ddj        j
                  t          t           j        j        j        j        ddj        j
                  t           j        j        j        t           j        j        j        t           j        j        j        t           j        j        j        t(          j        j        t           j        j        j        g} t1          t           j        j        j                  dt2          dt2          d	t2          ffd
            }t1          t           j        j        j        j                  dt2          dt2          dt2          d	t2          ffd            }t1          t           j        j        j        j                  dt2          dt2          dt2          d	t2          ffd            }t1          t           j        j        j                  	 d3dt2          dt2          dt2          ffd            }t1          t           j        j        j        j                  	 d3dt2          dt2          dt2          dt2          ffd            }t1          t           j        j        j                  dt2          dt2          d	t2          ffd            }t1          t(          j        j                  dt2          dt2          dt2          dt2          dt2          dt2          dt2          dt4          dt6          t8                   dt8          dt8          dt8          dt4          d t4          d!t4          d"t4          f fd#            }t1          t           j        j        j        d $          dt2          d%t2          d&t2          d't2          d	t2          f
fd(            }t1          t           j        j        j        j        d $          t1          t           j        j        j        j        d $          dt2          d%t2          d&t2          d't2          d)t2          d	t2          ffd*                        }	t1          t           j        j        j        d $          	 d3dt2          d%t2          d&t2          d't2          d	t2          f
fd+            }
t1          t           j        j        j        j        d $          t1          t           j        j        j        j        d $          	 d3dt2          d%t2          d&t2          d't2          d,t2          d	t2          ffd-                        }t           j        j        rt          t           j        j         j!        d.dj"        j
                  | #                    t           j        j         j!                   t1          t           j        j         j!                  d d/dt2          d0t2          d1t2          dtH          t2                   ffd2            }tK          |            d S d S )4Nr   )	mkldnn_irzmkldnn::_linear_pointwiseF)has_out_variantkernel_creatorzonednn::qlinear_pointwiserO   weightrY   c
                 n    t          j        
j                            | |||||||||	
  
                  S rJ   )r   rl   ConvolutionUnary)rO   r   rY   paddingstridedilationgroupsrz   r{   r|   r   s             r.   convolution_unaryz5register_onednn_fusion_ops.<locals>.convolution_unary   sN     #*11   r0   otherc                 t    t          j        j                            | |||||||||	|
||                    S rJ   )r   rl   ConvolutionBinaryrO   r   r   rY   r   r   r   r   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmr   s                r.   convolution_binaryz6register_onednn_fusion_ops.<locals>.convolution_binary  sW      #+22 !#   r0   c                 t    t          j        j                            | |||||||||	|
||                    S rJ   )r   rl   ConvolutionBinaryInplacer   s                r.   convolution_binary_inplacez>register_onednn_fusion_ops.<locals>.convolution_binary_inplace'  sW      #299 !#   r0   rP   rQ   c                    |                                  }t          |          dk    rt          | d|d         g          } |t          j                            |          }g }t          j        st          j        rwt          |ddg          }	t          | |	|          ^ }
}} }	t          || |	          r=fd}|d uddk    rd n|d	}|g d
|d<   t          j        |||| |gn| ||gfi | t          |          dk    st                      rDt                    }|d |d<   |                     j        || |gn| ||g|fi |           |                                t&          j        j        v sJ dd i}t-          d||| |gn| ||g||          }t          |          dk    r5t          |g |d d         |                                 d         R           }|S )NrS   rT   r   r   r[   c                 *    t          |           S )Nr{   r|   r   )bufr|   rz   r{   s    r.   ra   zJregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.epilogue_creator_  s#    8w)      r0   Tnoner_   r`   ra   )rS   r   r   input_indices)rz   r{   r|   Bc                 T    t           j        j        |                                          S rJ   r   r(   r)   r'   rO   s    r.   <lambda>zBregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.<lambda>      QW.qzz||< r0   linear_unaryinput_gen_fnsrn   ro   r   r   rV   rW   r   rp   rq   r   r   r   r	   rv   r   rr   appendbindr'   r   r(   r)   r   )rO   rP   rQ   rz   r{   r|   r\   r}   r   transposed_wr   ra   r   r   r   aten_mkldnn_linear_unarys      ```         r.   r   z0register_onednn_fusion_ops.<locals>.linear_unaryI  s    ZZ\\F6{{QR,--}O11!44*,G" f&> &q1a&11.5af.U.U.U+FA|(LAA        %&TM#'$(FNNDD8H	 F }2;))/#/"#)A!Q  !	   7||q  $9$;$; 4INNN9"&F3K1,1"#)A!Q  !    ::<<17#44444<<M /)A!Q+  F 6{{Qf&Kss&KV__5F5Fr5J&K&KLLMr0   yc                 Z   |                                  }t          |          dk    rt          | d|d         g          }                                  }t          |          dk    rt          d|d         g          |t          j                            |          }g }t          j        st          j        rvt          |ddg          }	t          | |	|          ^ }
}} }	t          || |	          r:fd}|d ud|d}|g d	ng d
|d<   t          j        |||| |gn| ||gfi | t          |          dk    st                      rDt                    }|d |d<   |                     j        || |gn| ||g|fi |           |                                t&          j        j        v sJ dd i}t-          d||| |gn| ||g||          }t          |          dk    r5t          |g |d d         |                                 d         R           }|S )NrS   rT   r   r   r[   c                 (    t          |           S )N)r   r   )r   rz   r   s    r.   ra   zKregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.epilogue_creator  s    8d!LLLLr0   Tr   )r   rS   r   )   r   rS   r   r   )rz   r   c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zCregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.<lambda>  r   r0   linear_binaryr   r   )rO   r   rP   rQ   rz   r\   r}   y_sizer   r   r   ra   r   r   r   aten_mkldnn_linear_binarys    `  `          r.   r   z1register_onednn_fusion_ops.<locals>.linear_binary  s    ZZ\\F6{{QR,--ZZ\\F6{{QR,--}O11!44*,G" f&> &q1a&1118|Qv2 2 2.FA|Q )LAA M M M M M M %&TM#',< F <=9iiii,,,F?+#/%&YAq		Q1aL  !	   7||q  $9$;$; 49"&F3K2-2%&YAq		Q1aL  !    ::<<17#44444<<M /YAq		Q1aL+  F 6{{Qf&Kss&KV__5F5Fr5J&K&KLLMr0   c                 p    t          j        j                            | |||||||||	|
                    S rJ   )r   rl   ConvolutionTransposeUnary)rO   r   rY   r   output_paddingr   r   r   rz   r{   r|   r   s              r.   convolution_transpose_unaryz?register_onednn_fusion_ops.<locals>.convolution_transpose_unary  sQ     #3::"   r0   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                     t          j        t          j        j                            | |||||||||	|
|||||                    S rJ   )pytreetree_mapr   rl   MkldnnRnnLayer)rO   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                   r.   mkldnn_rnn_layerz4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layer  se    & ? (//!!   r0   )type_promotion_kindr   r!   w_zpc                    t          |          t          k    sJ t          j                            t          j        |t
          j                  d          }t          |          t          k    sJ t          j                            t          j        |t
          j	                  d          }t          j        j                            | |||||||||	|
||||||                    S )Ndtyper   r2   r    )typer;   r   r(   r7   r8   tensorfloat32intint32r   rl   QConvPointWisePT2E)rO   r   r    r   r!   r   rY   r   r   r   r   o_inv_scaleo_zero_pointoutput_dtyperz   r{   r|   r   s                    r.   qconvolution_unaryz6register_onednn_fusion_ops.<locals>.qconvolution_unary  s    * ==E))))g11WEM::: 2  G ::$$$$7..T555F /  D #,33!  #   r0   accumc                    t          |          t          k    sJ t          j                            t          j        |t
          j                  d          }t          |          t          k    sJ t          j                            t          j        |t
          j	                  d          }|dk    rn|t
          j        t
          j
        fv rT|                                t
          j        t
          j
        fv r(|                                |k    rt          ||          }t          j        j                            | |||||||||	|
|||||||||||                    S )Nr   r   r2   r    r9   )r   r;   r   r(   r7   r8   r   r   r   r   bfloat16	get_dtyper   r   rl   QConvPointWiseBinaryPT2E)rO   r   r    r   r!   r   r   rY   r   r   r   r   r   r   r   accum_scaleaccum_zpr   alphar   r   unary_algorithmmr   s                         r.   qconvolution_binaryz7register_onednn_fusion_ops.<locals>.qconvolution_binaryG  sk   > ==E))))g11WEM::: 2  G ::$$$$7..T555F /  D
 u$$ U]EN$CCCOO%%%-)HHHOO%%55 !55#299!  !$-   r0   c                 (  	
 |                                 t          j        t          j        fv s
J d            |                                 }t          |          dk    rt          | d|d         g          } t          t          j	                  sZt                    t          k    sJ t          j                            t          j        t          j                  d          n|                                 t%          d                                 D                       rt          g           t                                                    dv s
J d	            ?t          j                            t          j        d
t          j                  d          t          t          j	                  sZt                    t(          k    sJ t          j                            t          j        t          j                  d          n                                                                 dk    s
J d            |?t          j                            t          j        d
t          j                  d          }                                 |                                 |                                 t          j        k    rt          t          j                            |          t          j                  rt          j        j        |                                                             t          j                  }t          j                            t          j        |t          j                  |                                          }d n                                 g }t8          j        st8          j        rt?          | ||	          ^ }}} }t          t          j                            |          t          j                  rMt          j         t          j!        t          j        j        |                                                   t          j        j        |                                                   rtE          || |          rt          j        j        |                                         #                                }tI          ||          \  
	fd}|                                  t          j%        t          j        fv sJ tM          j'        ||| ||gn| ||gd u|g dng d           t          |          d
k    stQ                      rOtS          	
          }d |d<   |*                     j+        | ||fn| ||f|fi |           |                                t          j        j        v sJ d d d d d}t          t          j                                      t          j                  rd |d<   t          t          j                                      t          j                  rd |d<   tY          d|| ||gn| ||g||          }t          |          dk    r5t          |g |d d         |                                d         R           }|S )Nz=Only int8 and e4m3fn weights are supported by oneDNN qlinear.rS   rT   r   r   r2   c              3   "   K   | ]
}|d k    V  dS r   Nr^   r,   r4   s     r.   r/   zDregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<genexpr>  &      >>Csax>>>>>>r0   r   r   x_scale must be 0D or 1Dr   r    r   z(x_zp is incompatible with oneDNN qlinearr   r\   	out_dtypec                   	
 t           j        t           j        t           j        t           j        fv sJ |                                                                 d rJ                                                                                                 
                                d                                 
f
d}t          j        |                                 t           j        || 	                                          }dk    rt          |          }t           j        k    rW|                                fd}t          j        |                                ||	                                          }nt           j        t           j        fv rddlm |                                		fd}t          j        |                                t          j        |t!                    t#                    	          |	                                          }|S )
Nc           	         
  |           }t          j        |t          j                  }| d         f}d }d }d }s! d          } d          } |          } |          }d }rJ  |          }t	          ||||||          }
k |          }	t          j        t          j        fv sJ t          j        k    rt          j        |	t          j                  }	t          j        ||	          }|S NrT   r^   r   r   r8   r   rN   r   r   )indexrC   weight_compens_indexrE   rF   rG   rD   rH   rM   _biasrY   
bias_dtypebias_loaderinput_loaderr=   w_scale_loaderweight_compens_loaderx_scale_loaderx_w_scale_loaderx_zp_loaders             r.   inner_fnz]register_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn  s?   $0L$7$7E %(L$F$FE49"I<0'+H$(E'+H#B P+9>"+=+=(3B+9>:N+O+O,A,ABV,W,WM)-J> T'7'C'C'C-=-=>R-S-S
#J ? % - ( % ( *$ $D  $/(34H(I(I'1emU^5T'T'T'T'T#-#?#?,/L,N,NE'*wtU';';#'Kr0   rj   r   r  rangesr   r   c                 D     |           }t          j        |          S rJ   r   r   r   rC   output_cast_loaderr   s     r.   inner_fn_cast_output_to_bf16zqregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16T  %    (:(:5(A(A'*|E<'H'H Hr0   r   _create_constantsc                     |           } 	d|z  |t           j                  \  }}t          j        ||z            |z   }
t           j        k    r 	ddt           j                  \  }}n 	ddt           j                  \  }}t          j        t          j        ||          |          }t          j        |
          S Ng      ?r   r      i   r8   r   r   rounduint8minimummaximumr   r   scale
zero_pointrC   	inv_scalevalqminqmaxclampedr  r   requant_input_loaders            r.   inner_fn_requantzeregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_requantc  s    (<(<U(C(C8I8I$'%K5=9" 9" 9" 5	: '*i	0A&B&BZ&O#/5;#>#>1B1B()3em2& 2& 2&JD$$ 2C1B(,c2& 2& 2&JD$ +.+ck#t6L6Ld*S*S'*|G\'J'J Jr0   r  r  r8   r   r   r  int8make_loaderr   	Pointwiserx   rn   r   get_device_or_errorloweringr  	functoolspartialr;   r   )input_bufferr  
output_bufr  r"  r  r   r   r  r!  r   r  r  r  r  r|   rz   rY   r   o_scaler   r   r{   r=   r!   rA   r   r<   r    s        @@@@@@@@@@r.   ra   zKregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator  s   +!M!N!K!J	0         (4'?'?'A'A0>0J0J0L0L-+/(: G#,#8#8#8/8/D/D/F/F,)0)<)<)>)>)0)<)<)>)>&*&6&6&8&8&*+*.*:*:*<*<K'( '( '( '( '( '( '( '( '( '( '( '( '( '(R &(\#/#:#:#<#<"'-%-#/#8#8#:#:	& & &
  6>>)B *D'Y* * *J
 (5>991;1G1G1I1I.I I I I I I *,'1'E'E'G'G&2)E'1':':'<'<	* * *JJ *ek5:-FFFCCCCCC3=3I3I3K3K0K K K K K K K" *,'1'E'E'G'G&2)2):$4*/../2</@/@*" *" *"
 (2':':'<'<	* 	* 	*J  *)r0   )r   r   r   rS         )   r   r   r   rS   r/  r0  r_   ra   r   )output_scaleoutput_zero_pointr   post_op_namepost_op_argspost_op_algorithmrY   c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   r0   c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   r0   c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   r0   c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   r0   )r   r/  r0  r1  c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>      QW->qzz||-L r0   c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r=  r0   qlinear_unaryr   )-r   r8   r%  float8_e4m3fnrn   ro   r   r&   r   r   r   r;   r   r(   r7   r   r   realizer6   r   r   	get_numelInputsKernelunwrap_storage_for_inputr+   r)   r'   r:   r   rp   rq   r   equal
zeros_liker   to_denserB   r  r	   rv   r   rr   r   r   r   )rO   r   r    r   r!   r   rY   r.  r   r   rz   r{   r|   r\   r}   w_zp_tensorr   r   r   ra   r   r   r   r   r=   rA   r<   aten_mkldnn_qlinear_unarys    `` ` ```````          @@@@r.   r?  z1register_onednn_fusion_ops.<locals>.qlinear_unary  sz   " !**,,U=P0QQQQO RQQ ZZ\\F6{{QR,--gr|44 UG}}----'55L>>>Y 6   !!!>>7+;+;+=+=>>>>> 0 #7B//G7++--..&888:T888|
 w22L%+666V 3   dBL11 DzzS((((w22LU[999 3   >>##q(((*T(((
 | w22L%+666V 3   OOLLNNN~~5;..:88>>!4 4.
  g/@CCEKPPw22LEK@@@t}} 3   "&4>>3C3CJ*,G" gf&> g/6}V|0 0 0,FA} @@FF) b ():4==??)KLL)$--//: b ,FA}EEb  !w01G1G1I1IJSSUUH 1 % 	7&!{* {* {* {* {* {* {* {* {* {* {* {* {* {* {* {* {* {*z ;;==U[%*,EEEEE#/< GT='4HH$wdS!%T!1)9< '9&8&8&8222    7||q  $9$;$; !(&2!-!%!(&/   <%)F6N2-2< GT='4HH$wdS	 
 !    !))++qw/@@@@@<<<<<<<<	 M 88AA!  M
 $M#La 88>>!  M $M#La .< GT='4@@$wdK+  F 6{{Qf&Kss&KV__5F5Fr5J&K&KLLMr0   x2c                 6  	
 ! |                                  }                                 }t          |          t          |          k    sJ t          |          dk    r6|dk    r0t          | d|d         g          } t          d|d         g          t          t          j                  sZt                    t          k    sJ t          j	        
                    t          j        t          j                  d          n|                                 t          d                                  D                       rt          g           t                                                     dv s
J d	            ?t          j	        
                    t          j        d
t          j                  d          |?t          j	        
                    t          j        d
t          j                  d          }t          t          j                  sZt                    t"          k    sJ t          j	        
                    t          j        t          j                  d          n                                                                  |                                 |                                t          j        k    rt          t          j                            |          t          j                  rt          j	        j        |                                                             t          j                  }t          j	        
                    t          j        |t          j                  |                                          }|dk    r
t          j        t          j        fv rU                                t          j        t          j        fv r)                                
k    rt5          
          n"                                
k    s
J d                                                                             nd g }t6          j        st6          j        r|dk    rt=          | ||
          ^ }}} }t          t          j                                      t          j                  rt                                          j                   d
k    r]t          t          j                            |          t          j                  r%t          j!        t          j"        t          j	        j        |                                                   t          j	        j        |                                                   rtG          || |          rt          j	        j        |                                         }|$                                }tK          ||          \  !	
 !fd}tM          j'        ||	| ||gn	| ||gd u|g dng d           t          |          d
k    stQ                      rUtS          	
||||
  
        }d |d<   |*                     "j+        	| ||fn	| ||f|fi |           |                                t          j	        j        v sJ d d d d}d |d<   tY          d|	| ||gn	| ||g||          }t          |          dk    r;|dk    r5t          |g |d d         |                                 d         R           }|S )NrS   r   rT   r   r   r2   c              3   "   K   | ]
}|d k    V  dS r   r^   r   s     r.   r/   zEregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<genexpr>  r   r0   r   r   r   r    r   r9   zCdtype of accum for qlinear post op sum should be the same as outputr   c                   	
 t           j        t           j        t           j        t           j        fv sJ |                                                                                                 d rJ                                                                                                 
                                d                                 
fd}t          j        |                                 t           j        || 	                                          }dk    rt          |          }t           j        k    rW|                                fd}t          j        |                                ||	                                          }nt           j        t           j        fv rddlm |                                		fd}t          j        |                                t           j        t          j        |t!                    t#                    	          |	                                          }|S )
Nc           	          |           } |           }d }d }d }| d         f}s! d          } d          } |          }t          j        |t          j                  } |          }d }rJ  |          }t	          ||||||          }	k |          }
t          j        t          j        fv sJ t          j        k    rt          j        |
t          j                  }
t          j        |	|
          }	t          j        t          j        fv sJ t          j        k    rt          j        |t          j                  }t          j        |	|          }	|	S r   r   )r   rC   _x2rE   rF   rG   r   rD   rH   rM   r   rY   r   r   r   r=   r   r  x2_dtype	x2_loaderr  r  r  s              r.   r  z^register_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_  s   $0L$7$7E"+)E"2"2C'+H$(E'+H49"I<0#B P+9>"+=+=(3B+9>:N+O+O$'L$F$FE,A,ABV,W,WM)-J> T'7'C'C'C-=-=>R-S-S
#J ? % - ( % ( *$ $D  $/(34H(I(I'1emU^5T'T'T'T'T#-#?#?,/L,N,NE'*wtU';'; $,u~/N#N#N#N#N'5>99&)l3&F&F#&74#5#5D#'Kr0   r  r   r   c                 D     |           }t          j        |          S rJ   r	  r
  s     r.   r  zrregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16  r  r0   r   r  c                     |           } 	d|z  |t           j                  \  }}t          j        ||z            |z   }
t           j        k    r 	ddt           j                  \  }}n 	ddt           j                  \  }}t          j        t          j        ||          |          }t          j        |t           j                  S r  r  r  s            r.   r"  zfregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_requant  s    (<(<U(C(C8I8I$'%K5=9" 9" 9" 5	: '*i	0A&B&BZ&O#/5;#>#>1B1B()3em2& 2& 2&JD$$ 2C1B(,c2& 2& 2&JD$ +.+ck#t6L6Ld*S*S'*|GU['I'I Ir0   r#  r$  ) r,  r  r-  r  r"  r  r   r   r  r!  r   r  rQ  r  r  r  rY   r   r.  r   r   r   r   r   r=   r!   rA   rJ  rP  r   r<   r    s         @@@@@@@@@@@r.   ra   zLregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creatorG  s   +!M!N!K!J	0         (4'?'?'A'A$&NN$4$4	0>0J0J0L0L-+/(: G#,#8#8#8/8/D/D/F/F,)0)<)<)>)>)0)<)<)>)>&*&6&6&8&8&*+*.*:*:*<*<K-( -( -( -( -( -( -( -( -( -( -( -( -( -( -( -(^ &(\#/#:#:#<#<"'-%-#/#8#8#:#:	& & &
 &//)B * *(5*:	* * *J (5>991;1G1G1I1I.I I I I I I *,'1'E'E'G'G&2)E'1':':'<'<	* * *JJ *ek5:-FFFCCCCCC3=3I3I3K3K0J J J J J J J" *,'1'E'E'G'G&+k)2):$4*/../2</@/@*" *" *"
 (2':':'<'<	* 	* 	*J  *)r0   )r   r   r   rS   r/  r0  r1  )   r   r   r   rS   r/  r0  r1  r2  )
r3  r4  r   other_scaleother_zpbinary_post_opr   unary_post_opunary_post_op_argsunary_post_op_algorithmrY   c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r   r0   c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r   r0   c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r   r0   )r   r/  r0  c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r=  r0   rT  qlinear_binaryr   )-rn   ro   r   r&   r   r   r   r;   r   r(   r7   r8   r   r   rA  r6   r   r   r   rC  rD  r+   r)   r'   r:   r   r   r   rp   rq   r   
get_layoutsizerE  rF  r   rG  rB   r	   rv   r   rr   r   r   r   )#rO   r   r    r   r!   r   rJ  rY   r.  r   r   x2_scalex2_zpr   r   r   r   r   r\   r}   x2_sizerH  r   r   r   ra   r   r   r   r   r=   rA   rP  r<   aten_mkldnn_qlinear_binarys#    `` ` `````    ```           @@@@@r.   r_  z2register_onednn_fusion_ops.<locals>.qlinear_binary  s   6 ZZ\\FkkmmGv;;#g,,....6{{Q;%#7#7R,--"r72;/00gr|44 UG}}----'55L>>>Y 6   !!!>>7+;+;+=+=>>>>> 0 #7B//G7++--..&888:T888|w22L%+666V 3   |w22L%+666V 3   dBL11 DzzS((((w22LU[999 3   
 OOLLNNN~~5;..:88>>!4 4.  g/@CCEKPPw22LEK@@@t}} 3   e##MN$   llnn(GGG||~~55
 &b,77<<>>\999] :99 ||~~H-1-=)))4J*,G#x'-'?x&&3:}b<4 4 40FA}b @@FF) r
 DOO--233q88"@@FF)  9
 ():4==??)KLL)$--//:  9 .faGG 9  !w01G1G1I1IJH'0022H
 1 % 	7&!F* F* F* F* F* F* F* F* F* F* F* F* F* F* F* F* F* F* F* F*P $/< GT='4LL$wbRVW!%T!1)9  < '<&;&;&;555    7||q  $9$;$; !(&2!- ("#.!&",'4,<   <%)F6N3.3< GT='4LL$wbRVW	 
 !    !))++qw/@@@@@<<<<<< M
 #L#La . < GT='4DD$wb$O+  F 6{{Q;%#7#7f&Kss&KV__5F5Fr5J&K&KLLMr0   zmkl::_mkl_linearr[   packed_worig_wc                   g }t           j        st           j        rXt          |ddg          }t	          | ||          ^ }}} }t          || |          rt          j        ||| ||gdddg           t          |          dk    st                      r/|
                                        | ||f|d |                     |                                t          j        j        v sJ |                                t          j        j        v sJ d d	 d
}	t!          d|| ||g||	          }
|t#          |
|          }
|
S )Nr   r   r[   TrS   )r`   r   )r   
batch_sizec                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zGregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>8      !21::<<!@ r0   c                 T    t           j        j        |                                          S rJ   r   r   s    r.   r   zGregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>9  rk  r0   )r   rS   packed_linearr   )r   rp   rq   r   r   r   r	   rv   ro   r   r   r   r'   r   r(   r)   r   r   )rO   rf  rg  rQ   ri  r\   r   r   r   r   r   aten_mkl_linears              r.   mkl_packed_linearz5register_onednn_fusion_ops.<locals>.mkl_packed_linear  s    /1& &*B #*6Aq6#:#:L29<3 3 3/Q< -VQEE '3#"&1$(+,a&    w<<1$$(=(?(?$NN',,&16Tj -      ((**ag.?????((AG,===== A@@@! ! %>#&)"/% % % = ^^Fr0   rJ   )&r8   _C_has_mkldnn r   r   r   mkldnn_linear_pointwiseLinearUnaryrl   binaryLinearBinaryonednnqlinear_pointwiseQLinearPointwisePT2EQLinearPointwiseBinaryPT2E_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiser   r   defaultqconv_pointwiser   r   boolrg   r   qconv2d_pointwisebinary_tensorhas_mklmkl_mkl_linearMKLPackedLinearr   r   r   )cpu_needs_realized_inputsr   r   r   r   r   r   r   r   r   r?  r_  ro  rn  r   r   re  rI  r   s                @@@@@@r.   register_onednn_fusion_opsr     s   x C#5I.'!$07	$
 $
 $
  %7I.5'!$18	%
 %
 %
! %7I.'!$9@	%
 %
 %
! &8I.5'!$?F	&
 &
 &
" I3I4I=I.!)I,%
! 
59+B	C	C			 	 	 	 	 	 
D	C	6 
59+BI	J	J			 	 		 	 	 	 	 
K	J	B 
59+CJ	K	K			 	 		 	 	 	 	 
L	K	B 
59+=	>	> A	 A	A	A	 A	 A	 A	 A	 A	 
?	>A	F 
59+=D	E	EQU<	 <	<	&<	+4<	9B<	 <	 <	 <	 <	 
F	E<	| 
59+L	M	M			 	 	 	 	 	 
N	M	: 
408	9	9&	&	&	 &	 	&	
 &	 &	 &	 &	 c&	 &	 &	 &	 &	  &	 &	  !&	 &	 &	 &	 &	 
:	9&	P 
59+;QU	V	V	V1	1	 %	1	
 1	 1	 1	 1	 1	 1	 1	 
W	V1	f 
I.54

 

 

 
I.<RV

 

 

F	F	 %	F	
 F	 F	 F	 F	 F	 F	 F	 F	

 



 

F	P 
59+=SW	X	X	X j	 j	j	 %	j	
 j	 j	 j	 j	 j	 j	 j	 
Y	Xj	X	 
I.54

 

 

 
I.<RV

 

 

, '@	 @	@	 %	@	
 @	 @	 @	 @	 @	 @	 @	 @	

 



 

@	D
 8 :	0	)" %(8?	  O &,,UY]-FGGGuy}899 0 0 00#0 "0 I&	0 0 0 0 0 :90d 	"";<<<<<r0   )NNNN)1r*  typingr   r   r8   torch.utils._pytreeutils_pytreer    torch._inductor.kernel.mm_commonr   rr  r   r   codegen.cpp_gemm_templater	   !codegen.cpp_grouped_gemm_templater
   codegen.cpp_utilsr   r   r)  r   r   r   r   r   r   r   select_algorithmr   r   r   r   r   virtualizedr   r   r   Tensortupler  ShapeAsConstantBufferrB   rN   rg   r   _inductor_lowering_functionr  r^   r0   r.   <module>r     s       " " " " " " " "  $ $ $ $ $ $ $ $ $ 4 4 4 4 4 4         6 6 6 6 6 6 E E E E E E 8 8 8 8 8 8                                
 @ ? ? ? ? ? ? ? ) ) ) ) ) ) ) ) ) )-l-<- \- ,	-
 \- 	",0
01U2<!99:;=- - - -`.%).. . x 	.
 H. x . ". . . . .j 
= ==I= I= = = =@ 59  1D D D D Dr0   