
    .`id(                         d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZ d dlmZ ddlmZ  G d de          Z G d de          Ze G d dej                              Z dS )    )IterableN)support_torch_compile)
VllmConfig)FusedMoE)RMSNorm)LogitsProcessor)VocabParallelEmbedding)default_weight_loader)DeepSeekMultiTokenPredictor DeepSeekMultiTokenPredictorLayer
SharedHead)maybe_prefix)IntermediateTensors   )OpenPanguDecoderLayerc                   "    e Zd ZdededdfdZdS )!OpenPanguMultiTokenPredictorLayervllm_configprefixreturnNc                    t           j                            |            |j        j        j        }|| _        |j        }t          |j	        |j
                  | _        t          |j	        |j
                  | _        t          j        |j	        dz  |j	        d          | _        t          ||t!          |d                    | _        t%          |||          | _        d S )N)eps   F)biasshared_head)configquant_configr   )nnModule__init__speculative_configdraft_model_config	hf_configr   r   r   hidden_sizerms_norm_epsenormhnormLineareh_projr   r   r   r   	mtp_block)selfr   r   r   r   s        |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/openpangu_mtp.pyr    z*OpenPanguMultiTokenPredictorLayer.__init__2   s    
	4   /BL"/V/V5HIII
V/V5HIII
y!3a!79KRWXXX%%66
 
 

 /vv{KK    __name__
__module____qualname__r   strr     r-   r,   r   r   1   sF        LJ L L L L L L L Lr-   r   c                   $    e Zd ZdddedefdZdS )OpenPanguMultiTokenPredictor r   r   r   c                   t           j                            |            j        j        }|j        | _        |j        | _        t          j         
                    fdt          | j        | j        | j        z             D                       | _        t          |j        |j                  | _        t#          |j                  | _        d S )Nc           	      V    i | ]%}t          |          t           d |           &S )z.layers.)r2   r   ).0idxr   r   s     r,   
<dictcomp>z9OpenPanguMultiTokenPredictor.__init__.<locals>.<dictcomp>L   sQ         C;F!9!9C!9!9   r-   )r   r   r    model_configr#   num_hidden_layersmtp_start_layer_idxnum_nextn_predict_layersnum_mtp_layerstorch
ModuleDictrangelayersr	   
vocab_sizer$   embed_tokensr   logits_processor)r+   r   r   r   s    `` r,   r    z%OpenPanguMultiTokenPredictor.__init__E   s    
	4   )3#)#; $=h))     !,,t/BB 	  

 

 3
 
 !00A B Br-   Nr.   r3   r-   r,   r5   r5   D   sM        AC C C Cz C3 C C C C C Cr-   r5   c                   J    e Zd Zdddedef fdZdej        dej        fdZ	 	 	 ddej        dej        dej        de	d	z  dej        d	z  de
dej        fdZ	 ddej        de
dej        d	z  fdZd Zdeeeej        f                  dee         fdZde
dedefdZ xZS )OpenPanguMTPr6   r7   r   r   c                    t                                                       |j        j        | _        t          |t          |d                    | _        d S )Nmodel)r   r   )superr    r=   r#   r   r5   r   rL   )r+   r   r   	__class__s      r,   r    zOpenPanguMTP.__init___   sO    !.81#L,I,I
 
 



r-   	input_idsr   c                 6    | j                             |          S N)rL   embed_input_ids)r+   rO   s     r,   rR   zOpenPanguMTP.embed_input_idsf   s    z)))444r-   Nr   	positionshidden_statesintermediate_tensorsinputs_embedsspec_step_idxc                 8    |                      |||||          }|S rQ   )rL   )r+   rO   rS   rT   rU   rV   rW   s          r,   forwardzOpenPanguMTP.forwardi   s.     


 
 r-   c                 8    | j                             ||          S rQ   )rL   compute_logits)r+   rT   rW   s      r,   r[   zOpenPanguMTP.compute_logits{   s    
 z((FFFr-   c                 (   d|v rt          | j        d          rx| j        j        dk    rht          |                    d          d                             d          d                   }|| j        j        z
  }|dk    r|| j        j        k     r|S d S )NrE   r@   r   zlayers..)hasattrr   r@   intsplitr>   )r+   name	layer_idxmtp_idxs       r,   get_spec_layerzOpenPanguMTP.get_spec_layer   s    %?@@ 4q88DJJy11"5;;C@@CDDI$+"??G!||$+*N N N  tr-   weightsc           	      |   g d}t          j        | ddd| j        j                  }t	          |                                           }t                      }|D ]a\  }}d|v r|                     |          }|#|                     ||          }|D ]n\  }	}
}|
|vrd|v r||vr|	                    |
|	          }|	dk    r||vr5|}|
                    d	          r||vrQ||         }|j        } ||||            n|D ]D}|\  }	}
}}|
|vr|	                    |
|	          }||         }|j        } ||||||
            n[|
                    d	          r||vr|| j        j        k    rd|vr"||         }t          |dt                    } |||           |                    |           c|S )N))gate_up_proj	gate_projr   )rh   up_projr   )fused_qkv_a_projq_a_projr   )rk   kv_a_proj_with_mqar   ri   	down_projrj   )ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namenum_expertszrotary_emb.inv_freqzmlp.experts.rk   z.bias)shard_id	expert_idz.layersweight_loader)r   make_expert_params_mappingr   n_routed_expertsdictnamed_parameterssetre   _rewrite_spec_layer_namereplaceendswithru   rL   r?   getattrr
   add)r+   rf   stacked_params_mappingexpert_params_mappingparams_dictloaded_paramsrb   loaded_weight
spec_layer
param_nameweight_namers   name_mappedparamru   mappingrt   s                    r,   load_weightszOpenPanguMTP.load_weights   s   "
 "
 "
 !) C + +'4!
 !
 !
 4002233"%%%#* H	$ H	$D-$,,,,T22J!00TBBD5K ?8 ?81
Kd** #d**K0G0G"ll;
CC
 "444!44&D ==)) d+.E.E#D) % 3e]H===4 8 8GCJ@JY"$.. <<Z@@D'-E$)$7M!M%!)"+    E }}W-- !$k2I2I  #dj&DDD%T11 '-E$+0E% %M "M%777d####r-   r   rb   c                     g d}dg}d}d}|D ]}||v r
d}||v rd} n|s|                     d| dd| d          }n|r|                     d| dd          }|S )	z
        Rewrite the weight name to match the format of the original model.
        Add .mtp_block for modules in transformer layer block for spec layer
        and rename shared layer weights to be top level.
        )rG   r&   r'   r)   r   rG   FTzmodel.layers.r^   z.mtp_block.zmodel.)r|   )r+   r   rb   spec_layer_weight_namesshared_weight_namesspec_layer_weightshared_weightr   s           r,   r{   z%OpenPanguMTP._rewrite_spec_layer_name   s    #
 #
 #
  ..!2 	 	Kd""$(!"555$(M	 #
 ! 	I<<-
---/Vz/V/V/V DD  	I<< =
 = = =xHHDr-   )NNr   )r   )r/   r0   r1   r   r2   r    rB   TensorrR   r   r`   rY   r[   re   r   tuplerz   r   r{   __classcell__)rN   s   @r,   rJ   rJ   ]   s       AC 
 
 
z 
3 
 
 
 
 
 
5 5%, 5 5 5 5 <@-1 < < |	
 2D8 |d*  
   * G G|G G 
		G G G G
 
 
[HU33D-E$F [3s8 [ [ [ [z3 c c        r-   rJ   )!collections.abcr   rB   torch.nnr   vllm.compilation.decoratorsr   vllm.configr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   3vllm.model_executor.layers.vocab_parallel_embeddingr	   -vllm.model_executor.model_loader.weight_utilsr
   'vllm.model_executor.models.deepseek_mtpr   r   r    vllm.model_executor.models.utilsr   vllm.sequencer   	openpangur   r   r5   r   rJ   r3   r-   r,   <module>r      s  * % $ $ $ $ $        = = = = = = " " " " " " : 9 9 9 9 9 8 8 8 8 8 8 G G G G G G      P O O O O O         
 : 9 9 9 9 9 - - - - - - , , , , , ,L L L L L(H L L L&C C C C C#> C C C2 k k k k k29 k k k k kr-   