
    .`ir6                        d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZmZ d dlmZmZ d dl m!Z! ddl"m#Z#m$Z$m%Z%m&Z&  ee'          Z( G d de          Z ed dd d d           G d dej)                              Z* G d de          Z+dS )    )IterableN)LlamaConfig)support_torch_compile)
VllmConfigget_current_vllm_config)init_logger)RMSNorm)QKVParallelLinearReplicatedLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)LlamaDecoderLayerLlamaForCausalLM)NestedTensors   )AutoWeightsLoaderget_draft_quant_configmaybe_prefixprocess_eagle_weightc                   B    e Zd Z	 	 	 ddedededz  deddf
 fd	Zdededz  fd
Z	de
j        dee
j        e
j        f         fdZde
j        dee
j        e
j        f         fdZde
j        de
j        de
j        de
j        dz  dee
j        e
j        f         f
dZ xZS )r    Nr   vllm_configprefixconfig	layer_idxreturnc                     t                                          |||           |p|j        j        }|                     |          }|dk    r
d| j        z  n| j        }t          || j        j        | j        j	        | j        j
        d|t          |d                    | j        _        t          |j        |j                  | _        || _        t#          |dd          r| j        | _        d S | j        | _        d S )	N)r   r   r      Fqkv_proj)biasquant_configr   epsnorm_before_residual)super__init__model_config	hf_configget_quant_confighidden_sizer
   	self_attnhead_dimtotal_num_headstotal_num_kv_headsr   r#   r	   rms_norm_epshidden_normr   getattr_norm_before_residual_residual_norm_norm_after_residual)selfr   r   r   r   r%   qkv_input_size	__class__s          {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/llama_eagle3.pyr*   zLlamaDecoderLayer.__init__'   s
    	VFCCC=;3=,,[99 2;aT---TEU #4N#N*N-%
33#
 #
 #
 #6#56;NOOO"61599 	<"&"<D"&";D    c                      t          |          S )z8Use drafter's quantization config instead of verifier's.)r   )r9   r   s     r<   r-   z"LlamaDecoderLayer.get_quant_configJ   s    %k222r=   hidden_statesc                 8    |                      |          }|}||fS Nr4   r9   r?   residuals      r<   r6   z'LlamaDecoderLayer._norm_before_residualN   s'     ((77 h&&r=   c                 8    |}|                      |          }||fS rA   rB   rC   s      r<   r8   z&LlamaDecoderLayer._norm_after_residualU   s'     !((77h&&r=   	positionsembedsrD   c                 j   | j         dk    rG|                     |          }|                     |          \  }}t          j        ||gd          }n|                     ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )Nr   )r?   )dim)rF   r?   )r   input_layernormr7   torchcatr/   post_attention_layernormmlp)r9   rF   rG   r?   rD   s        r<   forwardzLlamaDecoderLayer.forward\   s     >Q))&11F&*&9&9&9&V&V#M8!Iv}&=2FFFMM '+&:&:=(&S&S#M8 ' ' 
 

 #'"?"?x"X"Xx //h&&r=   )r   Nr   )__name__
__module____qualname__r   strr   intr*   r   r-   rL   Tensortupler6   r8   rP   __classcell__r;   s   @r<   r   r   &   ss        %)!< !<!< !< d"	!<
 !< 
!< !< !< !< !< !<F3J 3;MPT;T 3 3 3 3'"\'	u|U\)	*' ' ' ''"\'	u|U\)	*' ' ' ''<' ' |	'
 ,%' 
u|U\)	*' ' ' ' ' ' ' 'r=   r   rI   )	input_idsrF   r?   input_embeds)dynamic_arg_dimsc                       e Zd Zddddedededdf fd	Zd
ej        dej        fdZ		 dd
ej        dej        dej        dej        dz  de
ej        ej        f         f
dZdee
eej        f                  dee         fdZ xZS )
LlamaModelr   r   )start_layer_idr   r   r_   r   r    Nc                    t                                                       |j        j        j         _         j        j         _        t          |           _        t           j        dd           }|d|v r|d          _
        nd _
        t                      t           j        j         j        j        t          d                     _        t!          j         fdt%           j        j                  D                        _         j
        rvt+           j        d          r j        j        dz  }n j        j        dz  }t/          | j        j        d	|j        j         j        t          d
          d	           _        t7           j        j         j        j                   _        d S )Neagle_configuse_aux_hidden_stateTembed_tokensr   c                 h    g | ].}t          t          d |z              j        |          /S )zlayers.)r   r   r   )r   r   r   ).0r   current_vllm_configr   r9   r_   s     r<   
<listcomp>z'LlamaModel.__init__.<locals>.<listcomp>   s`         "''0V)n:T0V0VWW;'	    r=   target_hidden_size   Ffc)
input_sizeoutput_sizer$   params_dtyper%   r   return_biasr&   )r)   r*   speculative_configdraft_model_configr,   r   
vocab_sizer   r%   r5   rb   r   r   r.   r   rc   nn
ModuleListrangenum_hidden_layerslayershasattrri   r   r+   dtyperk   r	   r3   norm)r9   r   r_   r   ra   fc_input_sizerg   r;   s   ` ``  @r<   r*   zLlamaModel.__init__   s    	!4GQ+0 3;??t{NDAA#(>,(N(N(45K(LD%%(,D%5772K"K#77
 
 
 m       "'t{'D!E!E  

 

 $ 	t{$899 < $ > B $ 7! ;&( K3(5;!.#FD11!  DG K#(
 
 
			r=   rZ   c                 ,    |                      |          S rA   )rc   )r9   rZ   s     r<   embed_input_idszLlamaModel.embed_input_ids   s      +++r=   rF   r?   r[   c                     ||                      |          }|j        d         |j        d         k    sJ d }| j        D ]} |||||          \  }}|                     ||          \  }}||fS )NrI   )rF   rG   r?   rD   )r}   shaperw   rz   )r9   rZ   rF   r?   r[   rD   layerhidden_prenorms           r<   rP   zLlamaModel.forward   s     //	::L"2&,*<R*@@@@@[ 	 	E&+e##+!	' ' '#M88 )-		-(J(J%~n,,r=   weightsc                    g d}t          |                                           }t                      }|D ]@\  }}d|v r|                    dd          }| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |	                    |           d|v sd|v rt          ||          }||D ]>\  }
}}||vr|                    ||
          }||         }|j        }	 |	|||            n*||         }t          |dt                    }	 |	||           |	                    |           B|S )N))	.qkv_projz.q_projq)r   z.k_projk)r   z.v_projv).gate_up_projz
.gate_projr   )r   z.up_projr   z	midlayer.z	layers.0.weight_loaderr   scale
zero_point)dictnamed_parameterssetreplacer%   get_cache_scaler5   r   rJ   addr   r   )r9   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                r<   load_weightszLlamaModel.load_weights   s   "
 "
 "
 4002233"%%%#* !	$ !	$D-d""||K== ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---$,$"6"60{CC<5K 4 41
Kd**||K<<#D) % 3e]H===#D) '@U V Ve]333d####r=   rA   )rQ   rR   rS   r   rU   rT   r*   rL   rV   r}   rW   rP   r   r   r   rX   rY   s   @r<   r^   r^   z   s>         8
 8
 8
  8
 	8

 8
 
8
 8
 8
 8
 8
 8
t, ,%, , , , , -1- -<- <- |	-
 lT)- 
u|U\)	*- - - -,-HU33D-E$F -3s8 - - - - - - - -r=   r^   c                   ^   e Zd ZdddedefdZ	 	 ddej        dedz  d	ej        dz  d
ej        fdZ		 ddej        dej        dej        dej        dz  d
e
ej        ej        f         f
dZdej        d
ej        dz  fdZdej        d
ej        fdZdee
eej        f                  fdZdS )Eagle3LlamaForCausalLMr   rd   r   r   c                   t           j                            |            |j        j        j        | _        t          | j        dd           "t          | j        dd           }|| j        _        |j	        
                    |j                  }|| j        _        t          |d|          | _        t          | j        dd          }t          | j        j        | j        j        t#          |d                    | _        t'          | j        j        |	          | _        t          j        t-          j        | j        j        t,          j        
          d          | _        d S )Ndraft_vocab_sizerr   model)r   r   r_   logit_scaleg      ?lm_headrd   )r   )ry   F)requires_grad)rs   Moduler*   rp   rq   r,   r   r5   r   r+   get_num_layersparallel_configtarget_layer_countr^   r   r   r.   r   r   r   logits_processor	ParameterrL   zeroslongdraft_id_to_target_id)r9   r   r   base_vocab_sizetarget_layer_numr   s         r<   r*   zEagle3LlamaForCausalLM.__init__  sM   
	4   !4GQ 4; 2D99A%dk<FFO+:DK(&3BB'
 
 *:&#GDT
 
 

 dk=#>>%K(K#	22
 
 

 !0K(!
 !
 !
 &(\K4EJGGG&
 &
 &
"""r=   NrZ   multimodal_embeddingsis_multimodalr    c                 6    | j                             |          S rA   )r   r}   )r9   rZ   r   r   s       r<   r}   z&Eagle3LlamaForCausalLM.embed_input_ids(  s     z)))444r=   rF   r?   inputs_embedsc                 2    |                      ||||          S rA   )r   )r9   rZ   rF   r?   r   s        r<   rP   zEagle3LlamaForCausalLM.forward0  s     zz)Y}MMMr=   c                    |                      | j        |          }| j        <|j        d         | j        j        k    sJ d| j        j         d|j                     |S t          j        | j        j        |j	                  }|| j        z   }|
                    |j        d         | j        j        ft          d                    }||d d |f<   |S )Nr   z"Expected logits to have shape (*, z), but got )devicer   z-inf)r   r   r   r   r   rr   rL   aranger   r   new_fullfloat)r9   r?   logitsbasetargets
logits_news         r<   compute_logitsz%Eagle3LlamaForCausalLM.compute_logits9  s     &&t|]CC%-<?dk&<<<<I{-I I:@,I I =<< M|DK8OOO33__Q& &MM
 

 "(
111g:r=   c                 R    | j         j        s|S | j                             |          S rA   )r   rb   rk   )r9   r?   s     r<   combine_hidden_statesz,Eagle3LlamaForCausalLM.combine_hidden_statesQ  s+     z. 	!  z}}]+++r=   r   c                    i }d}d}|D ]K\  }}d|v r
d|v r|                     dd          }d}n	d|vrd|z   }d|v rd}|||<   t          | |           Lg }|s|                    d           |s|                    d           | j        j        s|                    d	           t          | d |
          }|                    |                                           d S )NFt2dd2tr   Tr   zmodel.rc   zfc.)skip_prefixesskip_substrs)r   r   appendr   rb   r   r   items)	r9   r   model_weightsincludes_draft_id_mappingincludes_embed_tokensr   r   r   loaders	            r<   r   z#Eagle3LlamaForCausalLM.load_weightsZ  s?   $)! %#* 	- 	-D-}}}}||E+BCC,0))$&&$%%(,%"/M$ t,,,,( 	9 7888$ 	0///z. 	'&&&"%
 
 

 	M//1122222r=   )NNrA   )rQ   rR   rS   r   rT   r*   rL   rV   r   r}   rW   rP   r   r   r   r    r=   r<   r   r     s       AC 
 
 
z 
3 
 
 
 
H 7;-1	5 5<5  -t35 |d*	5
 
5 5 5 5 .2N N<N <N |	N
 |d*N 
u|U\)	*N N N N| 
	   0,|, 
, , , ,3HU33D-E$F 3 3 3 3 3 3r=   r   ),collections.abcr   rL   torch.nnrs   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.loggerr   $vllm.model_executor.layers.layernormr	   !vllm.model_executor.layers.linearr
   r   +vllm.model_executor.layers.logits_processorr   3vllm.model_executor.layers.quantization.base_configr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r    vllm.model_executor.models.llamar   r   vllm.multimodal.inputsr   utilsr   r   r   r   rQ   loggerr   r^   r   r   r=   r<   <module>r      sy   % $ $ $ $ $        $ $ $ $ $ $ = = = = = = ; ; ; ; ; ; ; ; # # # # # # 8 8 8 8 8 8 Q Q Q Q Q Q Q Q G G G G G G R R R R R R               Q P P P P P P P 0 0 0 0 0 0            
X		Q' Q' Q' Q' Q') Q' Q' Q'h 	   A A A A A A A AHq3 q3 q3 q3 q3- q3 q3 q3 q3 q3r=   