
    .`iA=                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZmZ ddlmZ ddlmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.  G d dej/                  Z0 G d dej1                  Z2 G d dej1                  Z3 G d dej1                  Z4e G d dej1                              Z5 G d d ej1        e'e&          Z6dS )!z=Inference-only OPT model compatible with HuggingFace weights.    )Iterable)isliceN)nn)	OPTConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderWeightsMapperis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   B     e Zd Zdedef fdZdej        f fdZ xZS )OPTLearnedPositionalEmbeddingnum_embeddingsembedding_dimc                 j    d| _         t                                          || j         z   |           d S )N   )offsetsuper__init__)selfr#   r$   	__class__s      r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/opt.pyr)   z&OPTLearnedPositionalEmbedding.__init__>   s3     $+5}EEEEE    	positionsc                 V    t                                          || j        z             S N)r(   forwardr'   )r*   r.   r+   s     r,   r1   z%OPTLearnedPositionalEmbedding.forwardE   s     wwy4;6777r-   )	__name__
__module____qualname__intr)   torchTensorr1   __classcell__r+   s   @r,   r"   r"   =   sz        Fs F3 F F F F F F8 8 8 8 8 8 8 8 8 8 8r-   r"   c                   v     e Zd Z	 	 	 	 ddededededz  dedz  d	ed
df fdZde	j
        d
e	j
        fdZ xZS )OPTAttentionTN 	embed_dim	num_headsbiascache_configquant_configprefixreturnc           	         t                                                       || _        t                      }|}||z  dk    sJ ||z  | _        ||z  | _        | j        dz  | _        t          || j        |||| d          | _        t          ||||| d          | _
        t          | j        | j        | j        ||| d          | _        d S )Nr   g      z	.qkv_projr?   rA   rB   z	.out_projz.attn)scaler@   rA   rB   )r(   r)   r=   r   r>   head_dimscalingr   qkv_projr   out_projr   attn)
r*   r=   r>   r?   r@   rA   rB    tensor_model_parallel_world_sizetotal_num_headsr+   s
            r,   r)   zOPTAttention.__init__J   s    	"+O+Q+Q(#;;q@@@@(,LL!_4}d*)M%'''
 
 
 *%'''
 
 
 NM,%%###
 
 
			r-   hidden_statesc                     |                      |          \  }}|                    dd          \  }}}|                     |||          }|                     |          \  }}|S )N   )chunksdim)rI   chunkrK   rJ   )	r*   rN   qkv_qkvattn_outputoutputs	            r,   r1   zOPTAttention.forwardt   sc     }--Q))1")--1aii1a((MM+..	r-   )TNNr<   )r2   r3   r4   r5   boolr	   r   strr)   r6   r7   r1   r8   r9   s   @r,   r;   r;   I   s        
 +/26(
 (
(
 (
 	(

 "D((
 )4/(
 (
 
(
 (
 (
 (
 (
 (
T| 
       r-   r;   c            	       h     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        fd
Z
 xZS )OPTDecoderLayerNr<   configr@   rA   rB   c           	      P   t                                                       || _        |j        | _        t          | j        |j        |j        ||| d          | _        |j	        | _	        t          j        | j        |j                  | _        t          | j        |j        |j        || d          | _        t#          |j                  | _        t)          |j        | j        |j        || d          | _        t          j        | j        |j                  | _        d S )Nz
.self_attn)r=   r>   r?   r@   rA   rB   elementwise_affinez.fc1rE   z.fc2)r(   r)   r`   hidden_sizer=   r;   num_attention_headsenable_bias	self_attndo_layer_norm_beforer   	LayerNormlayer_norm_elementwise_affineself_attn_layer_normr   ffn_dimfc1r   activation_functionactivation_fnr   fc2final_layer_normr*   r`   r@   rA   rB   r+   s        r,   r)   zOPTDecoderLayer.__init__   s;    	+%n0#%%(((
 
 
 %+$?!$&LNv/S%
 %
 %
! (NN#%???
 
 
 ((BCC$NN#%???
 
 
 !#Nv/S!
 !
 !
r-   rN   rC   c                    |}| j         r|                     |          }|                     |          }||z   }| j         s|                     |          }|}| j         r|                     |          }|                     |          \  }}|                     |          }|                     |          \  }}||z   }| j         s|                     |          }|S )N)rN   )rh   rk   rg   rq   rm   ro   rp   )r*   rN   residualrV   s       r,   r1   zOPTDecoderLayer.forward   s    
 !$ 	E 55mDDM]CC =0( 	E 55mDDM !$ 	A 11-@@M88M22q**=9988M22q =0( 	A 11-@@Mr-   NNr<   )r2   r3   r4   r   r	   r   r]   r)   r6   r7   r1   r8   r9   s   @r,   r_   r_      s         ,026(
 (
(
 "D((
 )4/	(

 (
 (
 (
 (
 (
 (
T| 
       r-   r_   c                        e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        fd
Z
	 ddej	        dej	        dedz  dej	        dz  d	ej	        ez  f
dZ xZS )
OPTDecoderNr<   r`   r@   rA   rB   c                    t                                                       | _        j        | _        j        | _        t          j        j                  | _        t          j        j
                  | _        j        j
        k    r't          j
        j        d| d          | _        nd | _        j        j
        k    r't          j        j
        d| d          | _        nd | _        j        r-j        s&t#          j        j
        j                  | _        nd | _        t+          j        fd| d          \  | _        | _        | _        d S )	NFz.project_outrE   z.project_inrb   c                 *    t          |           S )NrB   )r_   )rB   r@   r`   rA   s    r,   <lambda>z%OPTDecoder.__init__.<locals>.<lambda>  s    ?l6   r-   z.layersrz   )r(   r)   r`   max_position_embeddingsmax_target_positions
vocab_sizer   word_embed_proj_dimembed_tokensr"   rd   embed_positionsr   project_out
project_inrh   _remove_final_layer_normr   ri   rj   rq   r   num_hidden_layersstart_layer	end_layerlayersrr   s    ``` r,   r)   zOPTDecoder.__init__   s    	$*$B! +2&
 

  =*F,> 
  

 %);;;/"*) ...     D  $D%);;;.*") ---  DOO #DO & 	)v/N 	)$&L"#)#G% % %D!!
 %)D!8C$      %%%9
 9
 9
5$.$+++r-   	input_idsrC   c                 ,    |                      |          S r0   )r   r*   r   s     r,   embed_input_idszOPTDecoder.embed_input_ids	  s      +++r-   r.   intermediate_tensorsinputs_embedsc                     t                      j        rQ||                     |          }|                     |          }| j        |                     |          \  }}||z   }n|J |d         }t          | j        | j        | j                  D ]} ||          }t                      j	        st          d|i          S | j        |                     |          }| j        |                     |          \  }}|S )NrN   )r   is_first_rankr   r   r   r   r   r   r   is_last_rankr   rq   r   )	r*   r   r.   r   r   
pos_embedsrV   rN   layers	            r,   r1   zOPTDecoder.forward  s    >>' 		B$ $ 4 4Y ? ?--i88J*#'??=#A#A q)J6MM'3330AMDK)94>JJ 	1 	1E!E-00MM~~* 	I&'GHHH , 11-@@M'#//>>M1r-   ru   r0   )r2   r3   r4   r   r	   r   r]   r)   r6   r7   r   r   r1   r8   r9   s   @r,   rw   rw      s
        ,026>
 >
>
 "D(>
 )4/	>

 >
 >
 >
 >
 >
 >
@, ,%, , , , , .2 < < 2D8	
 |d* 
+	+       r-   rw   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )OPTModelr<   rz   vllm_configrB   c                    t                                                       |j        j        }|j        }|j        }t          |||| d          | _        t          dg|j	                  | _
        d S )Nz.decoderrz   rN   )r(   r)   model_config	hf_configr@   rA   rw   decoderr   rd   make_empty_intermediate_tensors)r*   r   rB   r`   r@   rA   r+   s         r,   r)   zOPTModel.__init__,  s    )3"/"/!L,&7J7J7J
 
 
 0Wv10
 0
,,,r-   r   rC   c                 6    | j                             |          S r0   )r   r   r   s     r,   r   zOPTModel.embed_input_ids:  s    |++I666r-   Nr.   r   r   c                 4    |                      ||||          S )N)r   )r   )r*   r   r.   r   r   s        r,   r1   zOPTModel.forward=  s)     ||y"6m  
 
 	
r-   weightsc                 ,   g d}t          |                     d                    }t                      }|D ]\  }}|D ]i\  }}}	||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }
|
j        } ||
||	            nU|                    d          r||vrt          ||           r||         }
t          |
dt                    } ||
|           |	                    |           |S )N))rI   q_projrW   )rI   k_projrX   )rI   v_projrY   F)remove_duplicatez.biasweight_loader)
dictnamed_parameterssetreplaceendswithr   r   getattrr   add)r*   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               r,   load_weightszOPTModel.load_weightsH  sl   "
 "
 "
 400%0HHII"%%%#* 	$ 	$D-5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E*466 #D) '@U V Ve]333d####r-   r0   )r2   r3   r4   r
   r]   r)   r6   r7   r   r   r1   r   tupler   r   r8   r9   s   @r,   r   r   *  s       AC 
 
 
z 
3 
 
 
 
 
 
7 7%, 7 7 7 7 .2	
 	
<	
 <	
 2D8		

 |d*	
 
+	+	
 	
 	
 	
!HU33D-E$F !3s8 ! ! ! ! ! ! ! !r-   r   c                   B    e Zd Zdg diZ eddi          Zddded	ef fd
Zde	j
        de	j
        fdZ	 	 dde	j
        de	j
        dedz  de	j
        dz  de	j
        ez  f
dZde	j
        de	j
        dz  fdZdeeee	j
        f                  dee         fdZ xZS )OPTForCausalLMrI   )r   r   r   zdecoder.zmodel.decoder.)orig_to_new_prefixr<   rz   r   rB   c                   t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        | j        j	        r| j        j
        j        | _        n/t          |j        |j        t          |d                    | _        t!          |j                  | _        | j        j        | _        d S )Nmodel)r   rB   lm_headrz   )r(   r)   r   r   rA   r`   r   r    r   tie_word_embeddingsr   r   r   r   r~   r   r   logits_processorr   )r*   r   rB   r`   rA   r+   s        r,   r)   zOPTForCausalLM.__init__w  s    )3"/(#L,I,I
 
 

 ;* 	:-:DLL)!*#FI66  DL
 !00A B BJ6 	,,,r-   r   rC   c                 6    | j                             |          S r0   )r   r   r   s     r,   r   zOPTForCausalLM.embed_input_ids  s    z)))444r-   Nr.   r   r   c                 6    |                      ||||          }|S r0   )r   )r*   r   r.   r   r   rN   s         r,   r1   zOPTForCausalLM.forward  s)     

y"6
 
 r-   rN   c                 <    |                      | j        |          }|S r0   )r   r   )r*   rN   logitss      r,   compute_logitszOPTForCausalLM.compute_logits  s      &&t|]CCr-   r   c                 z    t          | | j        j        rdgnd           }|                    || j                  S )Nzlm_head.weight)skip_prefixes)mapper)r   r`   r   r   hf_to_vllm_mapper)r*   r   loaders      r,   r   zOPTForCausalLM.load_weights  sM    "&*k&EO!""4
 
 
 ""743I"JJJr-   )NN)r2   r3   r4   packed_modules_mappingr   r   r
   r]   r)   r6   r7   r   r   r1   r   r   r   r   r   r8   r9   s   @r,   r   r   l  s       222 &(
   BD 
 
 
z 
3 
 
 
 
 
 
,5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   KHU33D-E$F K3s8 K K K K K K K Kr-   r   )7__doc__collections.abcr   	itertoolsr   r6   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   r    	Embeddingr"   Moduler;   r_   rw   r   r    r-   r,   <module>r      s   * D C $ $ $ $ $ $              " " " " " " * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <            H G G G G G F F F F F F        P O O O O O - - - - - - 0 0 0 0 0 0 0 0               	8 	8 	8 	8 	8BL 	8 	8 	83 3 3 3 329 3 3 3lF F F F Fbi F F FR_ _ _ _ _ _ _ _D > > > > >ry > > >B>K >K >K >K >KRY
L >K >K >K >K >Kr-   