
    .`i2                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.  G d dej/                  Z0 G d dej/                  Z1 G d dej/                  Z2e G d dej/                              Z3 G d dej/        e'e(          Z4dS ) zAInference-only Phi-1.5 model compatible with HuggingFace weights.    )Iterable)isliceN)nn)	PhiConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )PhiAttentionN configcache_configquant_configprefixc           	      J   t                                                       |j        | _        | j        |j        z  | _        t                      }|j        |z  dk    sJ |j        |z  | _        t          | j        | j        |j        d|| d          | _        t          | j        | j        || d          | _
        | j        dz  }t          |dd	          }t          | j        ||j        
          | _        t          | j        | j        |||| d          | _        d S )Nr   Tz	.qkv_projbiasr%   r&   z.denser%   r&   g      max_position_embeddingsi   )max_positionrope_parametersz.attn)r$   r%   r&   )super__init__hidden_sizenum_attention_heads	head_sizer   	num_headsr   qkv_projr   densegetattrr   r-   
rotary_embr   attn)	selfr#   r$   r%   r&    tensor_model_parallel_world_sizescalingr+   	__class__s	           r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/phi.pyr/   zPhiAttention.__init__O   s]    	!-)V-GG+O+Q+Q(),LLPQQQQQ37WW *N&%'''
 
 
 '%$$$	
 
 

 .$&")&2KT"R"R"N0"2
 
 

 NN%%###
 
 
			    position_idshidden_statesreturnc                     |                      |          \  }}|                    dd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N   )chunksdim)r4   chunkr7   r8   r5   )
r9   r?   r@   qkv_qkvattn_outputoutputs
             r=   forwardzPhiAttention.forward   s|    
 }--Q))1")--1a|Q221ii1a((JJ{++	r>   NNr"   __name__
__module____qualname__r   r	   r   strr/   torchTensorrO   __classcell__r<   s   @r=   r!   r!   N   s         ,026.
 .
.
 "D(.
 )4/	.

 .
 .
 .
 .
 .
 .
`
l
 |
 
	
 
 
 
 
 
 
 
r>   r!   c                   >     e Zd Z	 	 ddededz  def fdZd Z xZS )	PhiMLPNr"   r#   r%   r&   c                 8   t                                                       t          |dd           }||n	d|j        z  }t	          |j        ||| d          | _        t          ||j        || d          | _        t          |j	                  | _
        d S )Nn_inner   z.fc1r*   z.fc2)r.   r/   r6   r0   r   fc1r   fc2r   
hidden_actact)r9   r#   r%   r&   r]   r<   s        r=   r/   zPhiMLP.__init__   s     	&)T22$0''a&:L6L'%???	
 
 
 %%???	
 
 
 f/00r>   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r_   rb   r`   )r9   r@   rI   s      r=   rO   zPhiMLP.forward   sE    88M22q//88M22qr>   )Nr"   )	rR   rS   rT   r   r   rU   r/   rO   rX   rY   s   @r=   r[   r[      sz         37	1 11 )4/1 	1 1 1 1 1 12      r>   r[   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )PhiLayerNr"   r#   r$   r%   r&   c                     t                                                       t          j        |j        |j                  | _        t          |||| d          | _        t          ||| d          | _
        d S )Nepsz
.self_attnr&   z.mlp)r.   r/   r   	LayerNormr0   layer_norm_epsinput_layernormr!   	self_attnr[   mlp)r9   r#   r$   r%   r&   r<   s        r=   r/   zPhiLayer.__init__   s     	!|F$9 
  
  
 &L,&7L7L7L
 
 
 &,&GGGr>   r?   r@   rA   c                     |}|                      |          }|                     ||          }|                     |          }||z   |z   }|S )N)r?   r@   )rm   rn   ro   )r9   r?   r@   residualattn_outputsfeed_forward_hidden_statess         r=   rO   zPhiLayer.forward   sd    
 !,,];;~~%' & 
 
 &*XXm%<%<"$'AAHLr>   rP   rQ   rY   s   @r=   rf   rf      s         ,026H HH "D(H )4/	H
 H H H H H H l | 
	       r>   rf   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )PhiModelr"   rj   vllm_configr&   c                   t                                                       |j        j        |j        |j        | _        | _        t          j        j	                  | _
        t          j        fd| d          \  | _        | _        | _        t!          j        j	        j                  | _        t)          dgj	                  | _        d S )Nc                 *    t          |           S )Nrj   )rf   )r&   r$   r#   r%   s    r=   <lambda>z#PhiModel.__init__.<locals>.<lambda>   s    8FL,vVVV r>   z.layersrj   rh   r@   )r.   r/   model_config	hf_configr$   r%   r#   r   
vocab_sizer0   embed_tokensr   num_hidden_layersstart_layer	end_layerlayersr   rk   rl   final_layernormr   make_empty_intermediate_tensors)r9   rv   r&   r$   r#   r%   r<   s      @@@r=   r/   zPhiModel.__init__   s    )3"/"/(2v1
 
 9D$VVVVVV%%%9
 9
 9
5$.$+
  "|F$9 
  
  
 0Wv10
 0
,,,r>   	input_idsrA   c                 ,    |                      |          S rd   )r}   r9   r   s     r=   embed_input_idszPhiModel.embed_input_ids   s      +++r>   N	positionsintermediate_tensorsinputs_embedsc                 J   t                      j        r||}n"|                     |          }n|J |d         }t          | j        | j        | j                  D ]} |||          }t                      j        st          d|i          S | 	                    |          }|S )Nr@   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )r9   r   r   r   r   r@   layers          r=   rO   zPhiModel.forward   s     >>' 	B( - $ 4 4Y ? ?'3330AMDK)94>JJ 	< 	<E!E)];;MM~~* 	I&'GHHH,,];;r>   weightsc                 2   g d}t          |                                           }t                      }|D ]\  }}d|v r
|D ]i\  }}}	||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }
|
j        } ||
||	            nU|                    d          r||vrt          ||           r||         }
t          |
dt                    } ||
|           |	                    |           |S )N))r4   q_projrJ   )r4   k_projrK   )r4   v_projrL   zrotary_emb.inv_freqz.biasweight_loader)
dictnamed_parameterssetreplaceendswithr   r   r6   r   add)r9   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               r=   load_weightszPhiModel.load_weights  sw   "
 "
 "
 4002233"%%%#* 	$ 	$D-$,,5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E +466 #D) '@U V Ve]333d####r>   rd   )rR   rS   rT   r
   rU   r/   rV   rW   r   r   rO   r   tupler   r   rX   rY   s   @r=   ru   ru      s       AC 
 
 
z 
3 
 
 
 
 
 
0, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   2'HU33D-E$F '3s8 ' ' ' ' ' ' ' 'r>   ru   c                   &    e Zd Zdg diZdddedef fdZdej        d	ej        fd
Z		 	 ddej        dej        de
dz  dej        dz  d	ej        e
z  f
dZdej        d	ej        dz  fdZdeeeej        f                  d	ee         fdZ xZS )PhiForCausalLMr4   )r   r   r   r"   rj   rv   r&   c          
         t                                                       |j        j        }|j        }|| _        |j        rJ || _        t          |t          |d                    | _	        t          |j        |j        d|t          |d                    | _        t          |j                  | _        | j	        j        | _        d S )Nmodel)rv   r&   Tlm_headr(   )r.   r/   rz   r{   r%   r#   tie_word_embeddingsru   r   r   r   r|   r0   r   r   logits_processorr   )r9   rv   r&   r#   r%   r<   s        r=   r/   zPhiForCausalLM.__init__7  s    )3"/----(#L,I,I
 
 

 &%	22
 
 
 !00A B BJ6 	,,,r>   r   rA   c                 6    | j                             |          S rd   )r   r   r   s     r=   r   zPhiForCausalLM.embed_input_idsR  s    z)))444r>   Nr   r   r   c                 6    |                      ||||          }|S rd   )r   )r9   r   r   r   r   r@   s         r=   rO   zPhiForCausalLM.forwardU  s)     

y"6
 
 r>   r@   c                 R    |                      | j        || j        j                  }|S rd   )r   r   r)   )r9   r@   logitss      r=   compute_logitszPhiForCausalLM.compute_logitsb  s'     &&t|]DLDUVVr>   r   c                 J    t          |           }|                    |          S rd   )r   r   )r9   r   loaders      r=   r   zPhiForCausalLM.load_weightsi  s#    "4((""7+++r>   )NN)rR   rS   rT   packed_modules_mappingr
   rU   r/   rV   rW   r   r   rO   r   r   r   r   r   rX   rY   s   @r=   r   r   .  sl        
 
 
 BD 
 
 
z 
3 
 
 
 
 
 
65 5%, 5 5 5 5 <@-1 < < 2D8	
 |d* 
+	+   | 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r>   r   )5__doc__collections.abcr   	itertoolsr   rV   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   Moduler!   r[   rf   ru   r    r>   r=   <module>r      s  N H G $ $ $ $ $ $              " " " " " " * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <         
 H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - 0 0 0 0 0 0 0 0             ; ; ; ; ;29 ; ; ;|    RY   B    ry   B \ \ \ \ \ry \ \ \~=, =, =, =, =,RYj =, =, =, =, =,r>   