
    Pi[/              	       J   d dl Z d dlmZ  ed          rddlmZ d dlmZ d dlmZmZ d dl	m
Z
 d d	lmZmZ d d
lmZmZmZ d dlmZmZmZ ddlmZ ddlmZ ddlmZ g dZdedeee j        j        j        j        j        f         defdZdedeee j        j        j        j        j        f         defdZe j         j!        j"        j#        e j         j!        j"        j$        e j         j!        j%        j#        e j         j&        j'        gZ(dede)fdZ*d Z+	 	 ddede)de)defdZ,dS )    N)torch_version_at_least2.7.0   )constant_fold)Union)GraphModuleNode)PassManager)_fold_conv_bn_qat_fuse_conv_bn_qat)DuplicateDQPassPortNodeMetaForQDQ	Quantizer)_disallow_eval_train_fuse_conv_bn__get_node_name_to_scope)#_convert_to_reference_decomposed_fx)prepare) reference_representation_rewrite)prepare_pt2eprepare_qat_pt2econvert_pt2emodel	quantizerreturnc                    t          |t          j        j        j        j        j                  rddlm}  || |          S t          j        	                    d           | j
        }t          |           }t          |            |                    |           } |                    |            |                    |            t!          | |d|j                  } | j
                            |           t'          |           } | S )a  Prepare a model for post training quantization

    Args:
      * `model` (torch.fx.GraphModule): a model captured by `torch.export.export` API.
      * `quantizer`: A backend specific quantizer that conveys how user want the
        model to be quantized. Tutorial for how to write a quantizer can be found here:
        https://pytorch.org/tutorials/prototype/pt2e_quantizer.html

    Return:
      A GraphModule with observer (based on quantizer annotation), ready for calibration

    Example::

        import torch
        from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e
        from torchao.quantization.pt2e.quantizer import (
            XNNPACKQuantizer,
            get_symmetric_quantization_config,
        )

        class M(torch.nn.Module):
            def __init__(self) -> None:
                super().__init__()
                self.linear = torch.nn.Linear(5, 10)

           def forward(self, x):
               return self.linear(x)

        # initialize a floating point model
        float_model = M().eval()

        # define calibration function
        def calibrate(model, data_loader):
            model.eval()
            with torch.no_grad():
                for image, target in data_loader:
                    model(image)

        # Step 1. program capture
        # NOTE: this API will be updated to torch.export API in the future, but the captured
        # result shoud mostly stay the same
        m = torch.export.export(m, *example_inputs).module()
        # we get a model with aten ops

        # Step 2. quantization
        # backend developer will write their own Quantizer and expose methods to allow
        # users to express how they
        # want the model to be quantized
        quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
        m = prepare_pt2e(m, quantizer)

        # run calibration
        # calibrate(m, sample_inference_data)
    r   )r   z&torchao.quantization.pt2e.prepare_pt2eFis_qatobs_or_fq_callback)
isinstancetorchaoquantizationr   r   #torch.ao.quantization.quantize_pt2er   _C_log_api_usage_oncemetar   r   transform_for_annotationannotatevalidater   prepare_obs_or_fq_callbackupdater   )r   r   torch_prepare_pt2eoriginal_graph_metanode_name_to_scopes        {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/quantization/pt2e/quantize_pt2e.pyr   r   *   s   x )UX2<FPQQ 4	
 	
 	
 	
 	
 	
 "!%333	H  !IJJJ*077 5..u55Euu$?	  E 
J)*** ''EL    c                    t          |t          j        j        j        j        j                  rddlm}  || |          S t          j        	                    d           | j
        }t          |           }|                    |           } |                    |            |                    |            t          |            t!          | |d|j                  } | j
                            |           t'          |           } | S )a<  Prepare a model for quantization aware training

    Args:
      * `model` (torch.fx.GraphModule): see :func:`~torchao.quantization.pt2e.quantize_pt2e.prepare_pt2e`
      * `quantizer`: see :func:`~torchao.quantization.pt2e.quantize_pt2e.prepare_pt2e`

    Return:
      A GraphModule with fake quant modules (based on quantizer annotation), ready for
      quantization aware training

    Example::
        import torch
        from torchao.quantization.pt2e.quantize_pt2e import prepare_qat_pt2e
        from torchao.quantization.pt2e.quantizer import (
            XNNPACKQuantizer,
            get_symmetric_quantization_config,
        )

        class M(torch.nn.Module):
            def __init__(self) -> None:
                super().__init__()
                self.linear = torch.nn.Linear(5, 10)

           def forward(self, x):
               return self.linear(x)

        # initialize a floating point model
        float_model = M().eval()

        # define the training loop for quantization aware training
        def train_loop(model, train_data):
            model.train()
            for image, target in data_loader:
                ...

        # Step 1. program capture
        # NOTE: this API will be updated to torch.export API in the future, but the captured
        # result shoud mostly stay the same
        m = torch.export.export(m, *example_inputs).module()
        # we get a model with aten ops

        # Step 2. quantization
        # backend developer will write their own Quantizer and expose methods to allow
        # users to express how they
        # want the model to be quantized
        quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
        m = prepare_qat_pt2e(m, quantizer)

        # run quantization aware training
        train_loop(prepared_model, train_loop)

    r   )r   z*torchao.quantization.pt2e.prepare_qat_pt2eTr   )r    r!   r"   r#   r   r   r$   r   r%   r&   r'   r   r(   r)   r*   r   r   r+   r,   r   )r   r   torch_prepare_qat_pt2er.   r/   s        r0   r   r      s   t )UX2<FPQQ 8	
 	
 	
 	
 	
 	
 &%eY777	H  !MNNN*077..u55Euu e$?	  E 
J)*** ''ELr1   nc                 4    | j         dk    o| j        t          v S )aT  If there is any pure ops between get_attr and quantize op they will be const propagated
    e.g. get_attr(weight) -> transpose -> quantize -> dequantize*
    (Note: dequantize op is not going to be constant propagated)

    This filter is added because we don't want to constant fold the things that are not
    related to quantization
    call_function)optarget
_QUANT_OPS)r4   s    r0   _quant_node_constraintr:      s     4?"=qx:'==r1   c                    ddl m} ddlm} ddlm} d}d}|                                 D ]\  }}t          |t          j        j	        j
        j                  sRt          |t          j        j	        j        j                  s)t          |t          j        j	        j        j                  rd}t          ||          s t          ||          st          ||          rd}|r|r
J d            |r|r
J d            |S )Nr   )FakeQuantize)AffineQuantizedObserverBase)ObserverBaseFTz2Cannot be prepared using both torch.ao and torchao)'torchao.quantization.pt2e.fake_quantizer<   "torchao.quantization.pt2e.observerr=   r>   named_modulesr    r!   r"   r#   fake_quantizeobserver)r   torchao_FakeQuantize#torchao_AffineQuantizedObserverBasetorchao_ObserverBaseis_torch_ao_preparedis_torchao_prepared_ms           r0   1_is_torchao_prepared_do_not_use_outside_this_filerK      sw              XWWWWW ##%% ' '1q%(/=JKK	(!UX2;HII	( !UX2;WXX	(
 $( q.//	'!122	' !@AA	'
 #' 
& 	
 	
@	
 	
&  
' 	
 	
@	
 	
' r1   FTuse_reference_representationfold_quantizec                    t          |           sddlm}  || ||          S t          j                            d           t          |t                    st          d| d          | j	        }t          |           } t          |           } t          t                      g          } ||           j        } t          t                      g          } ||           j        } |r$t!          d          rt#          | t$                     |rt'          |           } | j	                            |           t+          |           } | S )a  Convert a calibrated/trained model to a quantized model

    Args:
      * `model` (torch.fx.GraphModule): calibrated/trained model
      * `use_reference_representation` (bool): boolean flag to indicate whether to produce referece representation or not
      * `fold_quantize` (bool): boolean flag for whether fold the quantize op or not

    Returns:
        quantized model, either in q/dq representation or reference representation

    Example::

        # prepared_model: the model produced by `prepare_pt2e`/`prepare_qat_pt2e` and calibration/training
        # `convert_pt2e` produces a quantized model that represents quantized computation with
        # quantize dequantize ops and fp32 ops by default.
        # Please refer to
        # https://pytorch.org/tutorials/prototype/pt2e_quant_ptq_static.html#convert-the-calibrated-model-to-a-quantized-model
        # for detailed explanation of output quantized model
        quantized_model = convert_pt2e(prepared_model)

    r   )r   z&torchao.quantization.pt2e.convert_pt2ezjUnexpected argument type for `use_reference_representation`, please make sure you intend to pass argument z to convert_pt2er   )rK   r$   r   r!   r%   r&   r    bool
ValueErrorr'   r   r   r
   r   graph_moduler   r   r   r:   r   r,   r   )r   rL   rM   torch_convert_pt2er.   pms         r0   r   r     sz   8 =UCC V	
 	
 	
 	
 	
 	
 "!%)E}UUU	H  !IJJJ2D99 
k<Xk k k
 
 	
  */66Ee$$E	o''(	)	)BBuII"E	(**+	,	,BBuII"E 5/88 5e3444# 8077	J)*** ''ELr1   )FT)-r!   torchao.utilsr   r   typingr   torch.fxr   r	   "torch.fx.passes.infra.pass_managerr
   #torchao.quantization.pt2e.qat_utilsr   r   #torchao.quantization.pt2e.quantizerr   r   r   torchao.quantization.pt2e.utilsr   r   r   convertr   r   r   __all__r"   r#   r   r   r   opsquantized_decomposedquantize_per_tensordefaulttensorquantize_per_channeltorchaoquantize_affiner9   rO   r:   rK   r    r1   r0   <module>rf      s    0 0 0 0 0 0'"" -,,,,,,       & & & & & & & & : : : : : : T T T T T T T T         
          9 8 8 8 8 8       N N N N N N  UUY 5 ? I SSTU U U U UpSSY 5 ? I SSTS S S S Sn 
I"6>	I"6=	I"7?	I%	
>d >t > > > >" " "N */; ;;"&; ; 	; ; ; ; ; ;r1   