
     `i                     f    d dl mZmZmZ  e            rddlZ ej        e          Z	 	 	 	 ddZdS )   )is_optimum_quanto_availableis_torch_availablelogging    NFc                    ddl m} t                      rddlm}m}m}m}	m}
m	} |||
|	d}d||d}|g }| 
                                D ]\  }}g                     |           t          fd|D                       s |            5  t          |t          j        j                  rm ||j        |j        |j        du|j        j        ||j                 ||j                           | j        |<   | j        |                             d	           d
}nat          |t          j        j                  rB|j        ; ||j        |j        |j        |j        du||j                           | j        |<   d
}ddd           n# 1 swxY w Y   t=          t?          |                                                     dk    rtC          ||||          \  }}"                    d           | |fS )aZ  
    Public method that recursively replaces the Linear layers of the given model with Quanto quantized layers.
    Returns the converted model and a boolean that indicates if the conversion has been successful or not.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        quantization_config (`AqlmConfig`, defaults to `None`):
            The quantization config object that contains the quantization parameters.
        modules_to_not_convert (`list`, *optional*, defaults to `None`):
            A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
            converted.
        current_key_name (`list`, *optional*, defaults to `None`):
            A list that contains the current key name. This is used for recursion and should not be passed by the user.
        has_been_replaced (`bool`, *optional*, defaults to `None`):
            A boolean that indicates if the conversion has been successful or not. This is used for recursion and
            should not be passed by the user.
    r   )init_empty_weights)
QLayerNormQLinearqfloat8qint2qint4qint8)float8int8int4int2N)Nr   r   c              3   F   K   | ]}|d                                v V  dS ).N)join).0keycurrent_key_names     t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/integrations/quanto.py	<genexpr>z-replace_with_quanto_layers.<locals>.<genexpr>A   s5      WW3#((#3444WWWWWW    )in_featuresout_featuresbiasdtypeweightsactivationsFT)r!   )quantization_configmodules_to_not_convertr   has_been_replaced)#
accelerater   r   optimum.quantor	   r
   r   r   r   r   named_childrenappendany
isinstancetorchnnLinearr   r   r   weightr   r    r!   _modulesrequires_grad_	LayerNormnormalized_shapeepselementwise_affinelenlistchildrenreplace_with_quanto_layerspop)modelr"   r#   r   r$   r   r	   r
   r   r   r   r   	w_mapping	a_mappingnamemodule_s      `             r   r9   r9      s   2 .-----"$$ UTTTTTTTTTTTTTTTT"E5%PPIw>>I%!#,,.. %! %!f#!%%%WWWW@VWWWWW 	1##%% 1 1feho66 1+27$*$6%+%8#[4$m1 )*=*E F$-.A.M$N, , ,EN4( N4(77>>>(,%%(:;; 	1*6B/9z"3"J"5"Kt3(12E2Q(R0 0 0t, -1)+1 1 1 1 1 1 1 1 1 1 1 1 1 1 1, tFOO%%&&''!++#=$7'=!1"3$ $ $ A  	R    ###s   C.F

F	F	)NNNF)	utilsr   r   r   r,   
get_logger__name__loggerr9    r   r   <module>rF      s    M L L L L L L L L L  LLL		H	%	%
 J$ J$ J$ J$ J$ J$r   