
    Piq                        d dl mZmZ d dlmZmZ d dlZd dlZd dlm	Z	m
Z
  ed           G d d                      Z e            Zd	ed
efdZej        fdej        dededej        ded
ej        fdZej        fdej        dededej        ded
ej        fdZd	ede	d
ej        fdZd	ed
eee         ee         f         fdZd	ede	d
ej        fdZd	ed
eej        ej        f         fdZdS )    )	dataclassfield)ListTupleN)GranularityPerAxisT)frozenc                       e Zd ZU dZeed<   dZeed<   dZeed<    ed           Z	e
e         ed<    ed	           Ze
e         ed
<   dS )MarlinQQQConstants   TILE@   MIN_THREAD_NMAX_PARALLELc                      dgS )N    r       y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/quantization/marlin_qqq/utils.py<lambda>zMarlinQQQConstants.<lambda>   s    1# r   )default_factorySUPPORTED_NUM_BITSc                  
    ddgS )N   r   r   r   r   r   zMarlinQQQConstants.<lambda>   s
    b#Y r   SUPPORTED_GROUP_SIZESN)__name__
__module____qualname__r   int__annotations__r   r   r   r   r   r   r   r   r   r   r      s         D#NNNL#L#$)E++$F$F$FS	FFF',u=N=N'O'O'O49OOOOOr   r   num_bitsreturnc                 8    d| z  dk    sJ d|              d| z  S )zCompute the packing factor for a given number of bits.

    Args:
        num_bits (int): Number of bits to pack.
    Returns:
        int: The packing factor.
        r   zUnsupported num_bits = r   )r"   s    r   get_pack_factorr&      s4     =ACCC>r   q_wsize_ksize_npermtilec                    | j         ||fk    sJ ||z  dk    sJ d| d|             ||z  dk    sJ d| d|             |                     ||z  |||z  |f          } |                     d          } |                     ||z  ||z  f          } |                     d|                                f          dd|f                             | j                   } | S )a  Permute weights to 16x64 Marlin tiles.

    Args:
        q_w (torch.Tensor): Quantized weights.
        size_k (int): Number of input features.
        size_n (int): Number of output features.
        perm (torch.Tensor): The computed permutation tensor to be applied.
        tile (int, optional): Tile size. Defaults to `TILE`.
    Returns:
        torch.Tensor: Weight tensor permuted to Marlin tiles.
    r   	size_k = 	, tile = r            r   N)shapereshapepermutenumel)r'   r(   r)   r*   r+   s        r   marlin_permute_weightsr7   +   s   $ 9(((((D=AB6BBDBBD=AB6BBDBB ++v~tVt^TB
C
CC
++l
#
#C
++v~v}5
6
6C
++r4::<<(
)
)!!!T'
2
:
:39
E
ECJr   q_w_unpackedreverse_permc                    | j         d         |f||z  | j         d         |z  fk    sJ ||z  dk    sJ d| d|             ||z  dk    sJ d| d|             |                     d|                                f          dd|f                             | j                   }|                    ||z  ||z  ||f          }|                    d          }|                    ||f          }|S )a  Reverse permute weights from 16x64 Marlin tiles.
    Args:
        q_w_unpacked (torch.Tensor): Unpacked quantized weights.
        size_k (int): Number of input features.
        size_n (int): Number of output features.
        reverse_perm (torch.Tensor): The computed reverse permutation tensor to be applied.
        tile (int, optional): Tile size. Defaults to `TILE`.
    Returns:
        torch.Tensor: Weight tensor reverse permuted from Marlin tiles.
    r   r1   r-   r.   r   Nr/   )r3   r4   r6   r5   )r8   r(   r)   r9   r+   q_w_comps         r   reverse_marlin_permute_weightsr<   K   sA   $ q!6*$1%/     D=AB6BBDBBD=AB6BBDBB ##R););)=)=$>??	<gl !!  44t LMMH--H 011HOr   granularityc                   
 g }t          d          D ]}g }|dz  }dD ]M}d|dz  z  d|dz  z  dz   d|dz  z  dz   d|dz  z  dz   fD ]#}|                    d|z  |z   d|z  z              $Nt          d          D ]#
|                    
fd	|D                        $t          j        |          }| dk    rCt          |t                    rt          j        g d
          }	n9t          j        g d          }	n"t          d                    |                     |	                    dt          |	          f          dd|	f                                         }t          j        |          }|S )a  Precompute permutations for the marlin weight shuffling.

    Args:
        num_bits (int): Number of bits to pack.
        granularity (Granularity): The weight quantization granularity.
    Returns:
        torch.Tensor: The weight permutation tensor.
    r%   r   )r   r1   r1   r0   r2   r      c                      g | ]
}|d z  z   S )   r   ).0pjs     r   
<listcomp>z'get_qqq_weight_perm.<locals>.<listcomp>   s!    999aa#'k999r   )r   r      r1      r0      r2   )r   r0   r   rG   r1   r2   rF   rH   num_bits must be 4, got {}r   N)rangeappendextendnumpyarray
isinstancer   	Exceptionformatr4   lenraveltorch
from_numpy)r"   r=   	perm_listiperm1colblockrowr*   
interleaverD   s             @r   get_qqq_weight_permr]   p   s    I2YY ; ;1f 	9 	9EQUQUaQUaQUa	 9 9 R#X^a%i788889 q 	; 	;A99995999::::	; ;y!!D1}}k7++ 	?%=%=%=>>JJ%=%=%=>>JJ4;;HEEFFF<<S__-..qqq*}=CCEEDD!!DKr   c                 J   | dk    r"t          d                    |                     g }t          d          D ]0|                    fdt          d          D                        1g }t          d          D ]#|                    fddD                        $||fS )zPrecompute permutations for the marlin scale shuffling.
    Args:
        num_bits (int): Number of bits to pack.
    Returns:
        Tuple[List[int], List[int]]: Scale permutation list and
        scale permutation list for a single group.
    r   rI   r?   c                      g | ]
}d |z  z   S )r?   r   rB   rD   rW   s     r   rE   z'get_qqq_scale_perms.<locals>.<listcomp>   s!    7771q1u9777r   c                      g | ]
}d z  |z   S )r0   r   r`   s     r   rE   z'get_qqq_scale_perms.<locals>.<listcomp>   s!    !R!R!R!a%!)!R!R!Rr   )r   r1   r?   	   r            )rP   rQ   rJ   rL   )r"   
scale_permscale_perm_singlerW   s      @r   get_qqq_scale_permsrh      s     1}}4;;HEEFFFJ1XX 9 97777eAhh7778888#%1XX T T  !R!R!R!R5Q!R!R!RSSSS(((r   c                 N    t          | |          }|                                }|S )a  Reverse permutation for Marlin weight shuffling from `get_qqq_weight_perm`.
    Args:
        num_bits (int): Number of bits to pack.
        granularity (Granularity): The weight quantization granularity.
    Returns:
        torch.Tensor: The reversed weight permutation tensor.
    )r]   argsort)r"   r=   r*   s      r   get_qqq_weight_reverse_permrk      s%     x55D<<>>DKr   c                     t          |           \  }}t          j        |                                          }t          j        |                                          }||fS )a#  Reverse permutation for Marlin scale shuffling from `get_qqq_scale_perms`.
    Args:
        num_bits (int): Number of bits to pack.
    Returns:
        Tuple[List[int], List[int]]: The reversed scale permutation list and
        the reversed scale permutation list for a single group.
    )rh   rT   tensorrj   )r"   rf   rg   s      r   get_qqq_scale_reverse_permsrn      s[     %8$A$A!J!j))1133J%677??AA(((r   )dataclassesr   r   typingr   r   rM   rT    torchao.quantization.granularityr   r   r   constr    r&   r   Tensorr7   r<   r]   rh   rk   rn   r   r   r   <module>rt      ss   ) ( ( ( ( ( ( (                  $P P P P P P P P 		c 	c 	 	 	 	" 
 	  ,	
  \   J 
! !,!! ! ,	!
 ! \! ! ! !J$# $K $EL $ $ $ $N)# )%S	490D*E ) ) ) )& +
\    )# )%el8R2S ) ) ) ) ) )r   