
    `i                        d dl Z d dlmZ d dlZd dlmZ ddlmZmZ ddl	m
Z
mZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZ  e j        e          Z eej        dd          Z eej         j!        j"        ddej#                  Z$ej         j!        Z!ej         j%        Z%ej         j&        Z&ddZ'ddZ(dS )    N)Any)mm_args   )configlowering)CppGemmTemplateCppWoqInt4GemmTemplate)create_epilogue_with_attr)expandregister_lowering)WeightInt4PackMatmul)autotune_select_algorithmExternKernelChoicerealize_inputs)use_aten_gemm_kernelsuse_cpp_gemm_template)Vzat::_weight_int8pack_mmF)has_out_variantz*at::native::_weight_int4pack_mm_cpu_tensor)r   kernel_creatorreturnc                  $   t          j        t          j        t          j        t          j        g           t          j        t          j                   t          j        t          j                   t          j        t          j                   d S N)r   add_needs_realized_inputs	quantized
max_pool2d
_quantized$wrapped_fbgemm_pack_gemm_matrix_fp16!wrapped_fbgemm_linear_fp16_weightmake_fallback     w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torch/_inductor/quantized_lowerings.pyregister_quantized_opsr#   '   sr    & ;8	
   9/000:JKKK:GHHHHHr!   c                     t          t          j        d           d ddt          j        dt          j        dt          j        dt
          dt
          f
d            } t          t          j        d           d ddt          j        dt          j        d	t          d
t          j        dt
          dt
          fd            }t          j	        t          j
                   t          j	        t          j                   d S )N)type_promotion_kind)layoutinputweightscaler&   r   c                   t          | |d          \  }}}}}|                                t          j        t          j        t          j        fv r"|                                t          j        k    sJ }t                      rt          	                    ||f|          gng }dt          j
        dt          ffd}	t          |||d          rt          j        ||||gd|	           t          d|||g|          S )	NT)r&   mat2_transposedbufr   c           
      h    t          | dt          t          j                                      S )Nmul)other)r
   r   r   size)r,   r&   r)   s    r"   _mul_epiloguez?register_woq_mm_ops.<locals>.int8pack_mm.<locals>._mul_epilogueO   s5    ,U.v{1K1K"L"L   r!   )r+   )trans_wepilogue_creator_weight_int8pack_mm)r   	get_dtypetorchbfloat16float16floatint8r   aten__weight_int8pack_mmbindTensorr   r   r   add_choicesr   )
r'   r(   r)   r&   _mat1mat2aten_layoutchoicesr1   s
     ``      r"   int8pack_mmz(register_woq_mm_ops.<locals>.int8pack_mm5   sT    '.6&$'
 '
 '
#1at NN LLL  EJ.../ 
 %&&%**D$+>LLMM 		u| 	 	 	 	 	 	 	 	
 !dD$OOO 	'tU#!.    )!7T4,?
 
 	
r!   
qGroupSizeqScaleAndZerosc                J   t          | ||dd          \  }}}}}}|                                t          j        t          j        t          j        fv r"|                                t          j        k    sJ t          j        	                    t          j
        |t          j                  d           }|}	t                      r t                              ||||f|	          gng }
t          j        st          j        rat%          |	||dd|          rL|                                                                r&t*          |                             |
|	||||g           dt          j        j        j        dt          j        fd}|d	 d
}t7          d|
||||g|	|          S )NT)r&   use_4x2_dimr+   )dtype)name)r+   is_woq_int4q_group_sizexr   c                     |                                                                  sJ |                                 }|                                 }t	          j        dd|t          j        |          S )Nr      )rI   device)
get_layoutis_contiguousget_size
get_devicer6   randintuint8)rM   shaperP   s      r"   get_example_weightzHregister_woq_mm_ops.<locals>.int4pack_mm_cpu.<locals>.get_example_weight   sY    <<>>//11111JJLLE\\^^F=Cek&QQQQr!   c                 T    t           j        j        |                                          S r   )r   graph	constantsget_name)rM   s    r"   <lambda>z>register_woq_mm_ops.<locals>.int4pack_mm_cpu.<locals>.<lambda>   s    *1::<<8 r!   )r      _weight_int4pack_mm_for_cpu)input_gen_fns)r   r5   r6   r7   r8   r9   rV   r   rZ   add_tensor_constanttensorint64r   aten__weight_int4pack_mm_cpur<   r   max_autotunemax_autotune_gemmr   rQ   rR   r	   r>   	_inductorirIRNoder=   r   )r'   r(   rE   rF   r&   r?   r@   rA   
group_sizerB   rC   rX   r`   s                r"   int4pack_mm_cpuz,register_woq_mm_ops.<locals>.int4pack_mm_cpua   s    '.6&dD'
 '
 '
#1at NN LLL  EK///0 W00L5;777d 1 
 

  %&&,114^<k    	  	$*$<	% $ '  	 !!//11	 #:.::tZ8  	R%/"4"; 	R 	R 	R 	R 	R "88
 

 ))4^4'
 
 
 	
r!   )r   atenr4   r6   r=   r   r_   intr   r   _dyn_quant_matmul_4bit_dyn_quant_pack_4bit_weight)rD   rk   s     r"   register_woq_mm_opsrp   4   s2   t/TJJJ )
 )
 )
|)
)
 |)

 )
 
)
 )
 )
 KJ)
V t7TRRR C
 C
 C
|C
C
 C
 	C
 C
 
C
 C
 C
 SRC
J 467774;<<<<<r!   )r   N))loggingtypingr   r6    torch._inductor.kernel.mm_commonr    r   r   codegen.cpp_gemm_templater   r	   codegen.cpp_utilsr
   r   r   	mkldnn_irr   select_algorithmr   r   r   utilsr   r   virtualizedr   	getLogger__name__logr4   r;   opsr   int4mm_packed_weight_cpucreaterd   r   rl   r#   rp   r    r!   r"   <module>r      s           4 4 4 4 4 4         N N N N N N N N 8 8 8 8 8 8 / / / / / / / / + + + + + +         
 @ ? ? ? ? ? ? ?       g!!--	8%     21	I00'.	       I	Y!
y~
I 
I 
I 
It= t= t= t= t= t=r!   