
    Pi                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	  e j
        e          Ze                     e j                               	 d dlmZ n"# e$ r e                    d           dZY nw xY w e e ej        dd                               Zdej        d	ej        d
ej        fdZdej        dej        d
ej        fdZdej        dej        dej        d
ej        fdZdS )    N)is_compiling)	out_dtype)check_cpu_version)intmm_tritonzTWarning: Detected no triton, on systems without Triton certain kernels will not workTORCHAO_AUTOTUNER_ENABLEinputmat2returnc                     t                      sd|                                 v r| j        j        dk    rYt	          t
          j        j        j        j	        t
          j
        |                                 |                                          S t	          t
          j        j        j        j	        t
          j
        | |          S |j        | j        k    sJ d|j         d| j                     d|j        j        | j        j        fv }| j        d         dz  dk    o| j        d         dk    }|j        d         dz  dk    o|j        d         dk    }|o| }|s|rt          j        |                                                     t
          j
                  |                                                    t
          j
                                                | j        j                  S |                                s|                                }|                                 s(| j        d         dz  dk    r|                                 } 	 t	          t
          j        j        j        j	        t
          j
        | |          S # t$          $ ro t          j        |                     t
          j                  |                    t
          j                                                t
          j
                  cY S w xY w)a  
    Performs a safe integer matrix multiplication, considering different paths for
    torch.compile, cublas, and fallback cases.

    Args:
        input (torch.Tensor): The input tensor of shape [i, j].
        mat2 (torch.Tensor): The matrix to multiply with, of shape [j, k].

    Returns:
        torch.Tensor: The result of the matrix multiplication.

    Raises:
        AssertionError: If the tensors are not on the same device.
    
FakeTensorcpuz3need both tensors to be on the same device but got z and       r   )dynamo_is_compiling__repr__devicetyper   torchopsatenmmdefaultint32floatshapematmulr   tois_contiguous
contiguous	Exceptionfloat32)r   r	   
device_cpuj_is_nonzero_multiple_of_8k_is_nonzero_multiple_of_8bad_dimensions_for_cublass         h/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/kernel/intmm.pysafe_int_mmr'      s      N0@0@ @ @<%%	!)5;tzz||   *2EKMMM ;%,&&&^dk^^PUP\^^ '&& 4;+U\->??J"'+a.1"4"9!SAQR@R"&*Q-!"3q"8!Qtz!}q?P"A'A!  
. 
|EIIKKNN5;77u{9S9STTWWL
 
 	

  !  !! 
Aa  	
*2EKMMM 
 
 
 |EHHU]33TWWU]5K5KLLOOK
 
 	
 	
 	

s   4J A6K=<K=abc                     t           ,t          r%t          j        j                            | |          S t          | |          S )a\  
    Performs integer matrix multiplication using intmm_triton if available and autotuner is enabled,
    otherwise falls back to safe_int_mm.

    Args:
        a (torch.Tensor): The first matrix to multiply.
        b (torch.Tensor): The second matrix to multiply.

    Returns:
        torch.Tensor: The result of the matrix multiplication.
    )r   AUTOTUNER_ENABLEr   r   torchao
int_matmulr'   )r(   r)   s     r&   r-   r-   [   s;     $4y ++Aq111q!    scales1c                 r   | j         \  }}|j         \  }}||                    d          k    s|                                dk    sJ d|                    d          k    sJ |                                sJ |                    ||f          }|                                dk    sJ t          |j                  r2t          j	        | |          }|
                    |j                  |z  S t          -t          r&t          j        j                            | ||          S t#          | |          }||z  S )a  
    Performs scaled integer matrix multiplication.

    Args:
        a (torch.Tensor): The first matrix to multiply.
        b (torch.Tensor): The second matrix to multiply.
        scales1 (torch.Tensor): The scaling factors for the rows of the result.

    Returns:
        torch.Tensor: The result of the scaled matrix multiplication.

    Raises:
        AssertionError: If the dimensions of the input tensors do not match the expected shapes.
    r   r      )r   sizenumelr   expanddimr   r   r   _int_mmr   dtyper   r+   r   r,   int_scaled_matmulr'   )r(   r)   r/   MKNcs          r&   r8   r8   l   s(   " 7DAq7DAqQ7==??a#7#7#77Q  """""nnaV$$G;;==A(( - M!QttGM""W,,$4y 221aAAAAqAw;r.   )loggingosr   torch._dynamor   r   !torch._higher_order_ops.out_dtyper   torchao.utilsr   	getLogger__name__logger
addHandlerNullHandlertorchao.kernelr   ImportErrorwarningboolintgetenvr+   Tensorr'   r-   r8    r.   r&   <module>rO      s    				  = = = = = = 7 7 7 7 7 7 + + + + + +		8	$	$   %'%'' ( ( (+++++++   
NN^   LLL 4IBI&@!DDEEFF :
u| :
5< :
EL :
 :
 :
 :
z%, 5< EL    "#|##/4|#
\# # # # # #s   A A87A8