
    .`i"                     :   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZmZ d dlmZ ddlmZ ddlm Z m!Z!m"Z"  e
e#          Z$e G d dej%                              Z& G d de          Z'dS )    )IterableN)support_torch_compile)
VllmConfig)init_logger)RMSNorm)LogitsProcessor)QuantizationConfig)TorchAOConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)Llama4DecoderLayerLlama4ForCausalLM)extract_layer_index   )SupportsMultiModal)AutoWeightsLoadermaybe_prefixprocess_eagle_weightc                   @    e Zd Zdddddededededz  d	df
 fd
Zdej	        d	ej	        fdZ
	 ddej	        dz  dej	        dej	        dej	        dz  d	eej	        ej	        f         f
dZdeeeej	        f                  d	ee         fdZ	 ddededz  d	dfdZ xZS )
LlamaModel r   N)prefixstart_layer_idquant_configvllm_configr   r   r   returnc                    t                                                       j        j        j         _                             |            j        j         _        t           j        j         j        j	        t          d                     _        j        }|_        	 t          j         fdt           j        j                  D                        _        |_        n# |_        w xY wt$          j                             j        j	        dz   j        j	        d           _        t+           j        j	         j        j                   _        d S )Nembed_tokensr   c                 f    g | ]-}t          t          d |z              j                  .S )zlayers.)r   r   config)r   r   r"   ).0ir   selfr   r   s     {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/llama4_eagle.py
<listcomp>z'LlamaModel.__init__.<locals>.<listcomp>F   s]         '$/+F4Ra.>P4R4RSS#{           F)bias)eps)super__init__speculative_configdraft_model_config	hf_configr"   validate_and_update_config
vocab_sizer   hidden_sizer   r   r   nn
ModuleListrangenum_hidden_layerslayerstorchLinearfcr   rms_norm_epsnorm)r%   r   r   r   r   original_quant_config	__class__s   ````  r&   r-   zLlamaModel.__init__/   sl    	!4GQ''EEE+02K"K#77
 
 
 !, 8#/ 	=-       #4;#@AA  	 	DK (=K$$'<K$<<<<(//K#a')@u " 
 
 DK39QRRR			s   +?C2 2	C;	input_idsc                 ,    |                      |          S N)r   )r%   r@   s     r&   embed_input_idszLlamaModel.embed_input_idsW   s      +++r(   	positionshidden_statesinputs_embedsc                     ||                      |          }|                     t          j        ||fd                    }d }| j        D ]} ||||          \  }}|                     ||          \  }}||fS )N)dim)rC   r;   r9   catr8   r=   )r%   r@   rD   rE   rF   residuallayer_s           r&   forwardzLlamaModel.forwardZ   s       00;;M	=-*Hb Q Q QRR[ 	 	E&+e' '#M88
  99]H==qm++r(   weightsc                    g d}t          |                                           }t                      }|D ]\  }}|                    d          }|D ]>\  }}}	||vr|                    ||          }||         }
|
j        } ||
||	            n*||         }
t          |
dt                    } ||
|           |                    |           |D ]}||v sJ | d            |S )N))	.qkv_projz.q_projq)rQ   z.k_projk)rQ   z.v_projv).gate_up_projz
.gate_projr   )rU   z.up_projr   model.weight_loaderz is not loaded!)	dictnamed_parameterssetremoveprefixreplacerW   getattrr   add)r%   rO   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamrW   s               r&   load_weightszLlamaModel.load_weightsn   sB   "
 "
 "
 4002233"%%%#* 	$ 	$D-$$X..D5K 4 41
Kd**||K<<#D) % 3e]H===#D) '@U V Ve]333d#### 	C 	CD=(((T*B*B*B((((r(   c                 r   | j         j        J | j         j        J t          | j         j                  dk    sJ dgz  | j         j        z   | j         _        t          |t                    rGdt          dt          ffd|j	        }fd|j
                                        D             |_
        d S d S )Nr   rL   r   c                     t          |           }|                     t          |          t          |z                       S rB   )r   r\   str)rL   layer_indexr   s     r&   pad_layer_namez=LlamaModel.validate_and_update_config.<locals>.pad_layer_name   s@    1%88}}$$c+*F&G&G  r(   c                 .    i | ]\  }} |          |S  ro   )r#   rL   quantizationrm   s      r&   
<dictcomp>z9LlamaModel.validate_and_update_config.<locals>.<dictcomp>   s9     3 3 3'E< u%%|3 3 3r(   )r"   yoco_global_kv_layeryoco_local_kv_layerlen
moe_layersno_rope_layers
isinstancer
   rk   torchao_configmodule_fqn_to_configitems)r%   r   r   rx   rm   s    `  @r&   r1   z%LlamaModel.validate_and_update_config   s     {/777{.6664;)**a//// '(S>%9DK<V%V"lM22 	c c       *8N3 3 3 3+9+N+T+T+V+V3 3 3N///	 	r(   rB   )__name__
__module____qualname__r   rk   intr	   r-   r9   TensorrC   tuplerN   r   rZ   rh   r1   __classcell__)r?   s   @r&   r   r   -   s        26&S &S &S  &S 	&S
 &S )4/&S 
&S &S &S &S &S &SP, ,%, , , , , .2, ,<$&, <, |	,
 |d*, 
u|U\)	*, , , ,(HU33D-E$F 3s8    > NR !1Cd1J	       r(   r   c                       e Zd ZdddedefdZdej        j        fdZ	e
j        Z	 dd	ej        d
ej        dej        dej        dz  deej        ej        f         f
dZdeeeej        f                  ddfdZdS )EagleLlama4ForCausalLMr   r    r   r   c                ,   t           j                            |            |j        j        j        | _        |j                            |j	                  }t          j        |j        j        |j                  }t          |d||          | _        t          | j        dd          }t!          | j        j        |          | _        t'          | j        j        | j        j        t-          |d                    | _        |                                  d S )Nmodel)r   r   r   r   logit_scaleg      ?)scalelm_headr    )r4   Moduler-   r.   r/   r0   r"   model_configget_num_layersparallel_configr   get_quantization_configload_configr   r   r]   r   r2   logits_processorr   draft_vocab_sizer3   r   r   set_moe_parameters)r%   r   r   target_layer_numr   r   s         r&   r-   zEagleLlama4ForCausalLM.__init__   s   
	4   !4GQ&3BB'
 
 "9*={?V
 
  #+%	
 
 

 dk=#>> /K"+!
 !
 !
 &K(K#	22
 
 
 	!!!!!r(   r   c                     | j         S rB   r   )r%   s    r&   get_language_modelz)EagleLlama4ForCausalLM.get_language_model   s
    zr(   Nr@   rD   rE   rF   c                 2    |                      ||||          S rB   r   )r%   r@   rD   rE   rF   s        r&   rN   zEagleLlama4ForCausalLM.forward   s     zz)Y}MMMr(   rO   c                 z      fd}t           g           }|                    t          ||                     d S )Nc                 z    | \  }}                     ||          \  }}d|vrd|z   }t          |           ||fS )Nr   rV   )permute_qk_weight_for_rotaryr   )inputsrb   rc   weightr%   s       r&   	transformz6EagleLlama4ForCausalLM.load_weights.<locals>.transform   sT    "(D-<<T=QQLD&$$$ t,,,<r(   )skip_prefixes)r   rh   map)r%   rO   r   loaders   `   r&   rh   z#EagleLlama4ForCausalLM.load_weights   s^    	  	  	  	  	  #
 
 

 	C	73344444r(   rB   )r{   r|   r}   r   rk   r-   r9   r4   r   r   r   rC   r   r   rN   r   rh   ro   r(   r&   r   r      s       AC " " "z "3 " " " "<EHO     )8O .2N N<N <N |	N
 |d*N 
u|U\)	*N N N N5HU33D-E$F 54 5 5 5 5 5 5r(   r   )(collections.abcr   r9   torch.nnr4   vllm.compilation.decoratorsr   vllm.configr   vllm.loggerr   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr	   /vllm.model_executor.layers.quantization.torchaor
   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   !vllm.model_executor.models.llama4r   r    vllm.model_executor.models.utilsr   
interfacesr   utilsr   r   r   r{   loggerr   r   r   ro   r(   r&   <module>r      s  & % $ $ $ $ $        = = = = = = " " " " " " # # # # # # 8 8 8 8 8 8 G G G G G G F F F F F F I I I I I I        P O O O O O S S S S S S S S @ @ @ @ @ @ * * * * * * H H H H H H H H H H	X		 u u u u u u u up;5 ;5 ;5 ;5 ;5. ;5 ;5 ;5 ;5 ;5r(   