
    Pi                         d dl Z d dlmZ d dlZd dlmZ d dlmZmZ d dlm	Z	m
Z
 d dlmZ ddlmZ d	gZ G d
 d	e          ZdS )    N)Optional)nn)BaseSparsifierget_arg_info_from_tensor_fqn)QConfigdefault_placeholder_observer)_remove_qconfig   )PerChannelNormObserverWandaSparsifierc            
            e Zd ZdZ	 	 ddedee         f fdZdej	        de
e         d	df fd
Zdej	        deded	dfdZ	 	 ddeeedf                  deeeeedf         f                  f fdZ xZS )r   a  Wanda sparsifier

    Wanda (Pruning by Weights and activations), proposed in https://arxiv.org/abs/2306.11695
    is an activation aware pruning method. The sparsifier removes weights based on the product
    of the input activation norm and the weight magnitude.

    This sparsifier is controlled by three variables:
    1. `sparsity_level` defines the number of *sparse blocks* that are zeroed-out;

    Args:
        sparsity_level: The target level of sparsity;
        model: The model to be sparsified;
          ?Nsparsity_levelsemi_structured_block_sizec           	          ||d}|#|}t          j        d| d|dz   d| d           t                                          |           d S )N)r   r   z.WandaSparsifier got semi_structured_bock_size=z, sparsity_level fixed to 50% (   :z
) sparsity)defaults)warningswarnsuper__init__)selfr   r   r   m	__class__s        j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/sparsity/wanda.pyr   zWandaSparsifier.__init__#   s     -*D
 
 &1*AMyyycdhiciyylmyyy   	(+++++    modelconfigreturnc                    |!t          t          t                    |_        nf|D ]c}|                    dd           }|t          d          t          ||          }|d         }| t          t          t                    |_        dt          j        j	        
                    |d           t                      
                    ||           d S )N)
activationweight
tensor_fqnz(Each config must contain a 'tensor_fqn'.moduleT)inplace)r   r   r   qconfigget
ValueErrorr   torchaoquantizationpreparer   )r   r   r   module_configr$   info_from_tensor_fqnr%   r   s          r   r-   zWandaSparsifier.prepare3   s     >#1:V  EMM "(  *..|TBB
%$%OPPP (DE:'V'V$-h7 %%,#9;& & &FN 	%%eT%::: 	v&&&&&r   r%   tensor_namec                 P   t          |j        |          d         j        }t          |j        |          j        }|j        j        }t          j        |          |z  }|                                }	t          |	|z            }
|
                    dd          |d         }	|	dz  }
|                    d|	                              d          ddd|
f         }|j                            d|	                              d|t          j        ||j                             dS )	a  Pruning function for WandaSparsifier

        The activation statistics is retrieved first in the `act_per_input` variable.
        Then the Wanda pruning metric is computed. The weight matrix is then pruned
        by comparing this metric across the whole current layer.
        r   r   Nr   r
   )dim)dtype)getattrparametrizationsmaskoriginalactivation_post_processnormr*   absnumelintr(   viewargsortdatascatter_
zeros_liker4   )r   r%   r0   r   kwargsr7   tensoractivation_norm_per_channelpruning_metric
block_sizenum_specifiedpruning_indss               r   update_maskzWandaSparsifier.update_maskP   s0    v.<<Q?D0+>>G&,&D&I# 6**-HH $))++
 J788::2D99E <=J&!OM &**2z::BBqBIIAA~~

 		r:&&//|U-l$*MMM	
 	
 	
 	
 	
r   params_to_keep.params_to_keep_per_layerc                     | j         D ]}|d         }t          |           t                                          ||           d S )Nr%   )rK   rL   )groupsr	   r   squash_mask)r   rK   rL   argsrC   r   r%   r   s          r   rO   zWandaSparsifier.squash_maskv   sd     k 	$ 	$FH%FF#### 	)%= 	 	
 	
 	
 	
 	
r   )r   N)NN)__name__
__module____qualname____doc__floatr   r=   r   r   Modulelistdictr-   strrJ   tuplerO   __classcell__)r   s   @r   r   r      s;          !$48, ,, %-SM, , , , , , 'RY 'T
 't ' ' ' ' ' ':$
i$
.1$
CH$
	$
 $
 $
 $
P 59IM
 
 sCx1
 #+4U38_0D+E"F
 
 
 
 
 
 
 
 
 
r   )r   typingr   r*   r   torch.ao.pruningr   r   torch.ao.quantizationr   r   torch.ao.quantization.quantizer	   utilsr   __all__r    r   r   <module>rc      s                  I I I I I I I I G G G G G G G G : : : : : : ) ) ) ) ) )
r
 r
 r
 r
 r
n r
 r
 r
 r
 r
r   