
    Pi"                        d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZmZ  ej        d          Z G d d	          Zej        d
e
ddfd            Zedk    r ej         e                       dS dS )    N)Path)AnyDict)
DictConfig)nn)configtrainingutilsDEBUGc                       e Zd ZdZdeddfdZdedeeef         fdZ	deddfdZ
d	ed
eeef         dej        fdZ ej                    defd            ZdefdZdS )QuantizationRecipea  
    Recipe for quantizing a Transformer-based LLM.
    Uses quantizer classes from torchao to quantize a model.

    Supported quantization modes are:
    8da4w (PyTorch 2.3+):
        torchtune.training.quantization.Int8DynActInt4WeightQuantizer
        int8 per token dynamic activation with int4 weight only per axis group quantization
        Args:
            `groupsize` (int): a parameter of int4 weight only quantization,
            it refers to the size of quantization groups which get independent quantization parameters
            e.g. 32, 64, 128, 256, smaller numbers means more fine grained and higher accuracy,
            but also higher memory overhead

    8da4w-qat (PyTorch 2.4+):
        torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
        int8 per token dynamic activation with int4 weight only per axis group quantization
        Same as "8da4w", but for quantizing QAT checkpoints
        Args:
            `groupsize` (int): a parameter of int4 weight only quantization,
            it refers to the size of quantization groups which get independent quantization parameters
            e.g. 32, 64, 128, 256, smaller numbers means more fine grained and higher accuracy,
            but also higher memory overhead
    cfgreturnNc                 d   t          j        |j                  | _        t	          j        |j        | j                  | _        t          j	        |j
                  | _        t	          j        | j                  | _        t	          j        |j        |                    dd                      d S )N)device)dtyper   cudnn_deterministic_mode)seed
debug_mode)r
   
get_devicer   _devicer	   	get_dtyper   _dtyper   instantiate	quantizer
_quantizerget_quantizer_mode_quantization_modeset_seedr   get)selfr   s     d/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/recipes/quantize.py__init__zQuantizationRecipe.__init__0   s    'sz:::(syNNN ,S];;"*"=do"N"Ncgg.H$&O&O	
 	
 	
 	
 	
 	
    checkpointer_cfgc                 j    t          j        |          | _        | j                                        }|S )N)r   r   _checkpointerload_checkpoint)r!   r%   checkpoint_dicts      r"   r(   z"QuantizationRecipe.load_checkpoint9   s0    #/0@AA,<<>>r$   c                     |                      |j                  }|                     |j        |t          j                           | _        d S )N)	model_cfgmodel_state_dict)r(   checkpointer_setup_modelmodelr	   	MODEL_KEY_model)r!   r   	ckpt_dicts      r"   setupzQuantizationRecipe.setup>   sH    (()9::	''i&x'9: ( 
 
r$   r+   r,   c                    t          j        | j                  5  | j        5  t	          j        |          }d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d| j        v r| j                            |          }|	                    |           t          j
        |                                | j                   t                              d| j         d           |S )Nqat)r   z$Model is initialized with precision .)r	   set_default_dtyper   r   r   r   r   r   prepareload_state_dictvalidate_expected_param_dtypenamed_parametersloggerinfo)r!   r+   r,   r/   s       r"   r.   zQuantizationRecipe._setup_modelE   sh   
 '44 	2 	2dl 	2 	2&y11E	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 D+++O++E22E./// 	.""$$DK	
 	
 	
 	
 	I4;IIIJJJs3   AAAA	A
A	AA!Ac                    t          j                    }d| j        v r%| j                            | j                  | _        n$| j                            | j                  | _        t          j                    |z
  }t                              d|dd           | j	        j
        dk    rIt          j                    }t                              d|                                dz  dd           d S d S )	Nr5   zTime for quantization: z.02fz seccpuzMemory used: g    eAz GB)timeperf_counterr   r   convertr1   quantizer<   r=   r   typer
   get_torch_device_namespacemax_memory_allocated)r!   r   t0ttorch_devices        r"   rC   zQuantizationRecipe.quantizeX   s      D+++/11$+>>DKK/224;??DK"$:a::::;;;<%% ;==LKKS A A C Cc ISSSS     &%r$   c                 "   | j                                         }|j        j        d                             d          d         }t          |j        j                  }|                    d           t          j        || d| j	         
                    d                                        d          }t          j        ||           t                              dt           j                            |          d	z  d
d|            d S )Nr   r6   T)exist_ok-z-qatz.ptzModel checkpoint of size i   @z.2fz GiB saved to )r1   
state_dictr-   checkpoint_filessplitr   
output_dirmkdirjoinpathr   rstripwith_suffixtorchsaver<   r=   ospathgetsize)r!   r   r2   	file_namerP   checkpoint_files         r"   save_checkpointz"QuantizationRecipe.save_checkpointg   s   K**,,	$5a8>>sCCAF	#*566
$'''-9@@t'>@@GGOO
 

+e

 	 	
9o...*w//'9D* *'* *	
 	
 	
 	
 	
r$   )__name__
__module____qualname____doc__r   r#   r   strr   r(   r3   r   Moduler.   rU   no_gradrC   r\    r$   r"   r   r      s        2
J 
4 
 
 
 

 tCH~    

 
 
 
 
 
 sCx. 
	   & U]__J    _
: 
 
 
 
 
 
r$   r   r   r   c                     t          j        d|            t          |           }|                    |            |                    |            |                    |            d S )Nr   )recipe_namer   )r   )r   
log_configr   r3   rC   r\   )r   recipes     r"   mainri   y   sn    
"6C@@@@C(((F
LLSL
OOO
s#####r$   __main__)rW   sysr@   pathlibr   typingr   r   rU   	omegaconfr   r   	torchtuner   r	   r
   
get_loggerr<   r   parseri   r]   exitrd   r$   r"   <module>rs      s=   
			 



                                   - - - - - - - - - -		'	"	"`
 `
 `
 `
 `
 `
 `
 `
F $j $T $ $ $ $ zCHTTVV r$   