
    .`i                     Z    d Z ddlmZmZmZmZ  G d de          Z G d de          ZdS )a!  
This file contains the Pydantic schemas for various quantization-related
parameters. When a relevant quantization technique is specified, these
parameters are loaded in the form of a JSON alongside the model weights
and augment the model with additional information needed for use of that
technique. The format of this JSON should be specified by one or more
schemas contained here.

For example, when the KV cache is quantized to FP8-E4M3 (currently only
possible on ROCm), the model can be optionally augmented with KV cache
scaling factors.
    )	BaseModel
ConfigDictValidationInfomodel_validatorc                       e Zd ZU eed<   eeeeef         f         ed<    ed          dd            Z	 ed          de
dd fd            Z ed          de
dd fd	            Zd
S )KVCacheQuantSchemadtypescaling_factoraftermodereturnc                 B    | j         dk    sJ d| j          d            | S )Nfloat8_e4m3fnz5Loaded scaling factors intended for KV cache dtype = z rather than float8_e4m3fn!)r	   )selfs    /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/layers/quantization/schema.pycheck_is_fp8zKVCacheQuantSchema.check_is_fp8   s=    z_,,,7z7 7 7 -,,     infoc           
         |j         }|r|d         }|d         }t          | j                  |k    s#J dt          | j                   d| d            | j                                        D ]9\  }}t          |          |k    s!J d| d| dt          |           d            :t	          |          D ]}|| j        v sJ d| d	            | S )
Ntp_sizenum_hidden_layerszLoaded dictionary has TP size z2 but LLM engine is currently running with TP size .z KV cache scales map for TP rank z is malformed. Expected z layers, got z not found.)contextlenr
   itemsrange)r   r   r   r   r   tp_rank
layer_mapsis           r   check_tp_ranksz!KVCacheQuantSchema.check_tp_ranks#   sH   , 	i(G '(; <t*++w666OT5H1I1I O ODKO O O 766 (,':'@'@'B'B  #:*;;;;*w * * 1* *:* * * <;;;
 7^^  D////EqEEE 0/// r   c                     |j         }|rD|d         }|d         }| j        |         }t          |          D ]}||v sJ d| d| d            | S )Nr   r   z)Could not find KV cache scales for layer z in TP rank r   )r   r
   r   )r   r   r   r   r   layer_scales_mapr    s          r   check_current_rankz%KVCacheQuantSchema.check_current_rank9   s    , 	i(G '(; <#27;,--  ,,,,* * *&* * * -,,, r   N)r   r   )__name__
__module____qualname__str__annotations__dictintfloatr   r   r   r!   r$    r   r   r   r      s         JJJ
 d3:..////_'"""   #" _'"""> 6J    #"* _'"""~ :N    #"  r   r   c                   v    e Zd ZU  ed          Zedz  ed<   eed<    ed          de	d	d fd
            Z
dS )QuantParamSchemar-   )protected_namespacesN
model_typekv_cacher   r   r   r   c                     |j         }|r9|                    dd           }|!|| j        k    sJ d| d| j         d            | S )Nr1   zModel type is z> but loaded scaling factors belonging to different model type !)r   getr1   )r   r   r   r1   s       r   check_model_typez!QuantParamSchema.check_model_typeO   sq    , 	 \488J%!T_4445Z 5 5"&/5 5 5 544
 r   )r%   r&   r'   r   model_configr(   r)   r   r   r   r6   r-   r   r   r/   r/   H   s          :2666Ld
    _'"""
^ 
8J 
 
 
 #"
 
 
r   r/   N)__doc__pydanticr   r   r   r   r   r/   r-   r   r   <module>r:      s     L K K K K K K K K K K K2 2 2 2 2 2 2 2j    y     r   