
    .`i5                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-  G d dej.                  Z/ G d dej.                  Z0 G d dej.                  Z1e G d dej.                              Z2 G d dej.        e'          Z3dS ) zCInference-only persimmon model compatible with HuggingFace weights.    )Iterable)isliceN)nn)PersimmonConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   N     e Zd Z	 	 d	dededz  def fdZdej        fdZ	 xZ
S )
PersimmonMLPN configquant_configprefixc                    t                                                       t          |j        |j        || d          | _        t          |j        |j        || d          | _        t          |j	                  | _
        d S )Nz.dense_h_to_4hr#   r$   z.dense_4h_to_h)super__init__r   hidden_sizeintermediate_sizedense_h_to_4hr   dense_4h_to_hr   
hidden_actact)selfr"   r#   r$   	__class__s       x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/persimmon.pyr(   zPersimmonMLP.__init__@   s     	1$%,,,	
 
 
 /$%,,,	
 
 
 f/00    returnc                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r+   r.   r,   )r/   hidden_states_s      r1   forwardzPersimmonMLP.forwardU   sI    --m<<q//--m<<qr2   )Nr!   )__name__
__module____qualname__r   r   strr(   torchTensorr8   __classcell__r0   s   @r1   r    r    ?   s         37	1 11 )4/1 	1 1 1 1 1 1*        r2   r    c            	            e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        fd
Z
dej	        d	ej	        fdZdej	        dej	        d	ej	        fdZ xZS )PersimmonAttentionNr!   r"   cache_configr#   r$   c           	      v   t                                                       || _        t                      }|j        | _        |j        | _        | j        |z  | _        | j        | j        z  | _        |j	        | _	        d| _
        | j        | j        z  | j        k    sJ | j        |z  dk    sJ t          | j        | j        | j        d|| d          | _        t          | j        | j        z  | j        d|| d          | _        |j        | _        | j        r<t#          j        | j                  | _        t#          j        | j                  | _        t+          | j        | j	        |j                  | _        | j        dz  | _        t3          | j        | j        | j        ||| d	          | _        d S )
NTr   z.query_key_value)biasr#   r$   z.dense)max_positionrope_parametersg      z.attn)scalerC   r#   r$   )r'   r(   r"   r   r)   num_attention_headstotal_num_heads	num_headshead_dimmax_position_embeddings	is_causalr   query_key_valuer   denseqk_layernormis_qk_layernormr   	LayerNormq_layernormk_layernormr   rG   
rotary_embscalingr   attn)r/   r"   rC   r#   r$   tensor_parallel_world_sizer0   s         r1   r(   zPersimmonAttention.__init__]   s    	%I%K%K"!-%9-1KK(D,@@'-'E$ 449IIIII#&@@AEEEE0M %... 
  
  
 ' 4=0%$$$
 
 

  &2 	;!|DM::D!|DM::D"M5"2
 
 

 }d*NM,%%###
 
 
			r2   xr3   c                 ^    |j         d         }|                    || j        | j                  S Nr   shapeviewrK   rL   r/   rZ   
seq_lengths      r1   _split_headszPersimmonAttention._split_heads   s'    WQZ
vvj$.$-@@@r2   c                 b    |j         d         }|                    || j        | j        z            S r\   r]   r`   s      r1   _merge_headszPersimmonAttention._merge_heads   s*    WQZ
vvj$.4="@AAAr2   position_idsr6   c                    |                      |          \  }}|                    dd          \  }}}| j        r~|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |||          \  }}|                     |||          }| 	                    |          \  }	}|	S )N   )chunksdim)
rO   chunkrR   rb   rT   rU   rd   rV   rX   rP   )
r/   re   r6   qkvr7   qkvattn_outputoutputs
             r1   r8   zPersimmonAttention.forward   s     %%m44Q))1")--1a 		%!!!$$A!!!$$A  ##A  ##A!!!$$A!!!$$A|Q221ii1a((JJ{++	r2   NNr!   )r9   r:   r;   r   r	   r   r<   r(   r=   r>   rb   rd   r8   r?   r@   s   @r1   rB   rB   \   s        ,0267
 7
7
 "D(7
 )4/	7

 7
 7
 7
 7
 7
 7
rAel Au| A A A A
Bel Bu| B B B B
l | 
	       r2   rB   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )PersimmonDecoderLayerNr!   r"   rC   r#   r$   c                 ^   t                                                       |j        | _        t          |||| d          | _        t          ||| d          | _        t          j        |j        |j	                  | _
        t          j        |j        |j	                  | _        d S )Nz
.self_attn)r"   rC   r#   r$   z.mlpr&   eps)r'   r(   r)   rB   	self_attnr    mlpr   rS   layer_norm_epsinput_layernormpost_attention_layernorm)r/   r"   rC   r#   r$   r0   s        r1   r(   zPersimmonDecoderLayer.__init__   s     	!-+%%(((	
 
 
  %???
 
 

  "|F$9 
  
  
 )+F$9)
 )
 )
%%%r2   re   r6   r3   c                     |}|                      |          }|                     ||          }||z   }|}|                     |          }|                     |          }||z   }|}|S )N)re   r6   )r{   rx   r|   ry   )r/   re   r6   residualoutputss        r1   r8   zPersimmonDecoderLayer.forward   s    
 !,,];; %' ' 
 
 !=0 !55mDD//%0r2   rr   )r9   r:   r;   r   r	   r   r<   r(   r=   r>   r8   r?   r@   s   @r1   rt   rt      s         ,026
 

 "D(
 )4/	

 
 
 
 
 
 
6l | 
	       r2   rt   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )PersimmonModelr!   r$   vllm_configr$   c                   t                                                       |j        j        |j        |j        j        | _        | _        t          j        j	                  | _
        t          j        fd| d          \  | _        | _        | _        t!          j        j	        j                  | _        t)          dgj	                  | _        d S )Nc                 *    t          |           S )Nr   )rt   )r$   rC   r"   r#   s    r1   <lambda>z)PersimmonModel.__init__.<locals>.<lambda>  s     0l6   r2   z.layersr   rv   r6   )r'   r(   model_config	hf_configrC   r#   
vocab_sizer"   r   r)   embed_tokensr   num_hidden_layersstart_layer	end_layerlayersr   rS   rz   final_layernormr   make_empty_intermediate_tensors)r/   r   r$   rC   r"   r#   r0   s      @@@r1   r(   zPersimmonModel.__init__   s   )3"/"/ +2v1
 
 9D$      %%%9
 9
 9
5$.$+  "|F$9 
  
  
 0Wv10
 0
,,,r2   	input_idsr3   c                 ,    |                      |          S r5   )r   r/   r   s     r1   embed_input_idszPersimmonModel.embed_input_ids  s      +++r2   N	positionsintermediate_tensorsinputs_embedsc                 J   t                      j        r||}n"|                     |          }n|J |d         }t          | j        | j        | j                  D ]} |||          }t                      j        st          d|i          S | 	                    |          }|S )Nr6   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )r/   r   r   r   r   r6   layers          r1   r8   zPersimmonModel.forward  s     >>' 	B( - $ 4 4Y ? ?'3330AMDK)94>JJ 	< 	<E!E)];;MM~~* 	I&'GHHH,,];;r2   weightsc                 *   t          |                     d                    }t                      }|D ]\  }}t          ||           r||         }d|v rt	          |dd           }| j        j        }|f|j        }	|                    |	d |         |ddfz   |	|dz   d          z             }|	                    ||dz             }|
                    |	          }t	          |dt                    }
 |
||           |                    |           |S )	NF)remove_duplicaterO   
output_dimrg   rh   r   weight_loader)dictnamed_parameterssetr   getattrr"   rI   r^   r_   	transposereshaper   add)r/   r   params_dictloaded_paramsnameloaded_weightparamr   rK   loaded_weight_shaper   s              r1   load_weightszPersimmonModel.load_weights&  sL   400%0HHII"%%%#* 	$ 	$D-&tT22 %E D(( %UL$??
 K;	)*7*='$1$6$6+KZK8$a,--j1n.>.>?@% %M
 %2$;$;J
UV$W$WM$1$9$9:M$N$NM#E?<QRRMM%///d####r2   r5   )r9   r:   r;   r
   r<   r(   r=   r>   r   r   r8   r   tupler   r   r?   r@   s   @r1   r   r      s       AC 
 
 
z 
3 
 
 
 
 
 
4, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   ,HU33D-E$F 3s8        r2   r   c            
           e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  fdZ
dej        dej        d	z  fdZdeeeej        f                  dee         fdZ xZS )PersimmonForCausalLMr!   r   r   r$   c          	         t                                                       |j        j        }|| _        |j        | _        t          |t          |d                    | _        t          |j        |j
        dt          |d                    | _        t          |j                  | _        | j        j        | _        d S )Nmodel)r   r$   Flm_head)rE   r$   )r'   r(   r   r   r"   r   r   r   r   r   r)   r   r   logits_processorr   )r/   r   r$   r"   r0   s       r1   r(   zPersimmonForCausalLM.__init__G  s    )3 +##L,I,I
 
 

 &	22	
 
 
 !00A B BJ6 	,,,r2   r   r3   c                 6    | j                             |          S r5   )r   r   r   s     r1   r   z$PersimmonForCausalLM.embed_input_idsZ  s    z)))444r2   Nr   r   r   c                 8    |                      ||||          }|S )N)r   r   r   r   )r   )r/   r   r   r   r   r6   s         r1   r8   zPersimmonForCausalLM.forward]  s0     

!5'	 # 
 
 r2   r6   c                 <    |                      | j        |          }|S r5   )r   r   )r/   r6   logitss      r1   compute_logitsz#PersimmonForCausalLM.compute_logitsl  s      &&t|]CCr2   r   c                 J    t          |           }|                    |          S r5   )r   r   )r/   r   loaders      r1   r   z!PersimmonForCausalLM.load_weightss  s#    "4((""7+++r2   )NN)r9   r:   r;   r
   r<   r(   r=   r>   r   r   r8   r   r   r   r   r   r?   r@   s   @r1   r   r   F  s<       AC 
 
 
z 
3 
 
 
 
 
 
&5 5%, 5 5 5 5 <@-1 < < 2D8	
 |d*   | 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r2   r   )4__doc__collections.abcr   	itertoolsr   r=   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   Moduler    rB   rt   r   r    r2   r1   <module>r      s  0 J I $ $ $ $ $ $              ( ( ( ( ( ( * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <         
 H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - " " " " " "                 29   :[ [ [ [ [ [ [ [|4 4 4 4 4BI 4 4 4n Q Q Q Q QRY Q Q Qh/, /, /, /, /,29j /, /, /, /, /,r2   