
    .`iC                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0m1Z1  G d dej2                  Z3 G d dej2                  Z4 G d dej2                  Z5e G d dej2                              Z6 G d d ej2        e*e+          Z7dS )!zEInference-only IBM Granite model compatible with HuggingFace weights.    )Iterable)isliceN)nn)GraniteConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parametermake_layersmaybe_prefixc                   P     e Zd Z	 	 	 ddededededz  ded	ed
df fdZd Z xZ	S )
GraniteMLPNF hidden_sizeintermediate_size
hidden_actquant_configbiasprefixreturnc                    t                                                       t          ||gdz  ||| d          | _        t	          ||||| d          | _        |dk    rt          d| d          t                      | _        d S )	N   .gate_up_proj)
input_sizeoutput_sizesr)   r(   r*   z
.down_projr/   output_sizer)   r(   r*   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr%   r&   r'   r(   r)   r*   	__class__s          v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/granite.pyr5   zGraniteMLP.__init__E   s     	6"+,q0%+++
 
 
 +(#%(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r6   r9   r7   )r:   xgate_up_s       r<   forwardzGraniteMLP.forwardc   sD    &&q))
KK  ~~a  1r=   )NFr$   )
__name__
__module____qualname__intstrr   boolr5   rC   __classcell__r;   s   @r<   r#   r#   D   s         37# ## # 	#
 )4/# # # 
# # # # # #<      r=   r#   c                        e Zd Z	 	 	 	 	 ddedededed	ed
edz  dededz  deddf fdZ	de
j        de
j        de
j        fdZ xZS )GraniteAttention    NFr$   configr%   	num_headsnum_kv_headsmax_position_embeddingsr(   r)   cache_configr*   r+   c
           
         t                                                       || _        t                      }
|| _        | j        |
z  dk    sJ | j        |
z  | _        || _        | j        |
k    r| j        |
z  dk    sJ n|
| j        z  dk    sJ t          d| j        |
z            | _        t          |dd           | _
        | j
        | j        | j        z  | _
        | j        | j
        z  | _        | j        | j
        z  | _        |j        | _        || _        t!          || j
        | j        | j        |||	 d          | _        t%          | j        | j
        z  ||||	 d          | _        t)          | j
        ||j                  | _        t/          | j        | j
        | j        | j        |||	 d	
          | _        d S )Nr   r   head_dim	.qkv_proj)r%   	head_sizetotal_num_headstotal_num_kv_headsr)   r(   r*   z.o_projr1   )max_positionrope_parametersz.attn)rQ   rS   r(   r*   )r4   r5   r%   r   rX   rP   rY   maxrQ   getattrrU   q_sizekv_sizeattention_multiplierscalingrR   r   qkv_projr   o_projr   r[   
rotary_embr   attn)r:   rO   r%   rP   rQ   rR   r(   r)   rS   r*   tp_sizer;   s              r<   r5   zGraniteAttention.__init__k   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF
D99=  ,0DDDMnt}4(4=82'>$)#m 0#6%'''
 
 
 (+dm;#%%%%
 
 
 #M0"2
 
 

 NML*%%###
 
 
			r=   	positionshidden_statesc                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)rb   splitr^   r_   rd   re   rc   )
r:   rg   rh   qkvrB   qkvattn_outputoutputs
             r<   rC   zGraniteAttention.forward   s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	r=   )rN   NFNr$   )rD   rE   rF   r   rG   r   rI   r	   rH   r5   torchTensorrC   rJ   rK   s   @r<   rM   rM   j   s        (,26+/C
 C
C
 C
 	C

 C
 "%C
 )4/C
 C
 "D(C
 C
 
C
 C
 C
 C
 C
 C
J
<
 |
 
	
 
 
 
 
 
 
 
r=   rM   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        de
ej	        ej	        f         fdZ xZS )GraniteDecoderLayerNr$   rO   rS   r(   r*   r+   c                 P   t                                                       |j        | _        |j        | _        t	          |dd          }t	          |dd          pt	          |dd          }t          || j        |j        t	          |d|j                  ||||| d	  	        | _        t          | j        |j	        |j
        |t	          |d	d          | d
          | _        t          |j        |j                  | _        t          |j        |j                  | _        d S )NrR   rN   attention_biasFr)   num_key_value_headsz
.self_attn)	rO   r%   rP   rQ   rR   r(   r)   rS   r*   mlp_biasz.mlp)r%   r&   r'   r(   r)   r*   eps)r4   r5   r%   residual_multiplierr]   rM   num_attention_heads	self_attnr#   r&   r'   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)r:   rO   rS   r(   r*   rR   rx   r;   s          r<   r5   zGraniteDecoderLayer.__init__   sU    	!-#)#= ")&2KT"R"R !)95AA 
WFEF
 F
 *(0 -v/I  %<%%(((
 
 
 ($6(%U33???
 
 
  'v'9v?RSSS(/F$7)
 )
 )
%%%r=   rg   rh   c                     |}|                      |          }|                     ||          }||| j        z  z   }|}|                     |          }|                     |          }||| j        z  z   }|S )N)rg   rh   )r   r   r}   r   r   )r:   rg   rh   residuals       r<   rC   zGraniteDecoderLayer.forward   s     !,,];;' ' 
 
 !=43K#KK 55mDD// =43K#KKr=   )NNr$   )rD   rE   rF   r   r	   r   rH   r5   rs   rt   tuplerC   rJ   rK   s   @r<   rv   rv      s         ,026)
 )
)
 "D()
 )4/	)

 )
 
)
 )
 )
 )
 )
 )
V< | 
u|U\)	*	       r=   rv   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d	z  d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )GraniteModelr$   r*   vllm_configr*   c                V   t                                                       |j        j        |j        |j        | _        | _        t                      j        sj	        r5t                      j
        r"t          j        j                  | _        nt                      | _        t!          j        fd| d          \  | _        | _        | _        t                      j
        r"t+          j        j                  | _        d S t                      | _        d S )N)r(   c                 *    t          |           S )N)rO   rS   r(   r*   )rv   )r*   rS   rO   r(   s    r<   <lambda>z'GraniteModel.__init__.<locals>.<lambda>  s#    .))	   r=   z.layersr   r{   )r4   r5   model_config	hf_configrS   r(   rO   r   is_first_ranktie_word_embeddingsis_last_rankr   
vocab_sizer%   embed_tokensr   r    num_hidden_layersstart_layer	end_layerlayersr   r   norm)r:   r   r*   rS   rO   r(   r;   s      @@@r<   r5   zGraniteModel.__init__   s>   )3"/"/(>>' 		1&		1+7>>+F		1 !7!")! ! !D !/ 0 0D8C$      %%%	9
 	9
 	9
5$.$+ >>& 	) 28KLLLDIII&((DIIIr=   	input_idsr+   c                 ,    |                      |          S r?   )r   r:   r   s     r<   embed_input_idszGraniteModel.embed_input_ids#  s      +++r=   Nrg   intermediate_tensorsinputs_embedsc                 h   t                      j        r*||}n|                     |          }|| j        j        z  }n|J |d         }t          | j        | j        | j                  D ]} |||          }t                      j	        st          d|i          S |                     |          }|S )Nrh   )r   r   r   rO   embedding_multiplierr   r   r   r   r   r   r   )r:   r   rg   r   r   rh   layers          r<   rC   zGraniteModel.forward&  s     >>' 		B( - $ 4 4Y ? ?T[==MM'3330AMDK)94>JJ 	< 	<E!E)];;MM~~* 	&#]   		-00r=   weightsc                 b   g d}t          |                                           }t                      }|D ]w\  }}| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]i\  }
}}||vr|	                    ||
          }|
                    d          r||vr;t          ||           rL||         }|j        }	 |	|||            nk|
                    d          r||vrt          ||          }|&t          ||           r8||         }t          |dt                    }	 |	||           |                    |           y|S )N))rV   z.q_projrn   )rV   z.k_projro   )rV   z.v_projrp   )r.   z
.gate_projr   )r.   z.up_projr   weight_loaderr   z.bias)dictnamed_parameterssetr(   get_cache_scaler]   r   rk   addreplaceendswithr   r   r   )r:   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                r<   load_weightszGraniteModel.load_weightsE  s   "
 "
 "
 4002233"%%%#* ,	$ ,	$D- ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E0{CC<*466 #D) '@U V Ve]333d####r=   r?   )rD   rE   rF   r
   rH   r5   rs   rt   r   r   rC   r   r   r   r   rJ   rK   s   @r<   r   r      s!       AC !) !) !)z !)3 !) !) !) !) !) !)F, ,%, , , , , .2 <$& < 2D8	
 |d* 
+	+   >8HU33D-E$F 83s8 8 8 8 8 8 8 8 8r=   r   c                   b    e Zd Zg dddgdZdddZdd	d
edef fdZdej	        dej	        fdZ
	 	 ddej	        dej	        dedz  dej	        dz  dej	        ez  f
dZdej	        dej	        dz  fdZdedej        dej        defdZdeeeej	        f                  dee         fdZ xZS )GraniteForCausalLM)q_projk_projv_proj	gate_projup_proj)rb   r6   input_embeddingsoutput_embeddings)r   lm_headr$   r   r   r*   c          	      R   t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t                      j
        rt          |j        |j        |t          |d                    | _        |j        r| j        j        j        | j        _        t%          |dd          }t'          |d          r
||j        z  }t+          |j        |          | _        d S t/                      | _        d S )	Nmodel)r   r*   r   )r(   r*   logit_scaleg      ?logits_scaling)scale)r4   r5   r   r   r(   rO   r   r!   r   r   r   r   r   r%   r   r   r   weightr]   hasattrr   r   logits_processorr   )r:   r   r*   rO   r(   r   r;   s         r<   r5   zGraniteForCausalLM.__init__  s,   )3"/(!#L,I,I
 
 

 >>& 	,)!")#FI66	  DL ) E&*j&=&D#!&-==Kv/00 5v44$3!% % %D!!! *++DLLLr=   r   r+   c                 6    | j                             |          S r?   )r   r   r   s     r<   r   z"GraniteForCausalLM.embed_input_ids  s    z)))444r=   Nrg   r   r   c                 6    |                      ||||          }|S r?   )r   )r:   r   rg   r   r   model_outputs         r<   rC   zGraniteForCausalLM.forward  s)     zzy"6
 
 r=   rh   c                 <    |                      | j        |          }|S r?   )r   r   )r:   rh   logitss      r<   compute_logitsz!GraniteForCausalLM.compute_logits  s    &&t|]CCr=   
batch_sizedtypedevicec                 f    t          dt          j        || j        j        f||          i          S )Nrh   )r   r   )r   rs   zerosrO   r%   )r:   r   r   r   s       r<   make_empty_intermediate_tensorsz2GraniteForCausalLM.make_empty_intermediate_tensors  sB     #!89v" " "
 
 	
r=   r   c                 p    | j         j        rdgnd }t          | |          }|                    |          S )Nzlm_head.)skip_prefixes)rO   r   r   r   )r:   r   r   loaders       r<   r   zGraniteForCausalLM.load_weights  sI     )-(GQT"'
 
 
 ""7+++r=   )NN)rD   rE   rF   packed_modules_mappingembedding_modulesr
   rH   r5   rs   rt   r   r   rC   r   rG   r   r   r   r   r   r   r   rJ   rK   s   @r<   r   r     s       
 
 
 

 
 +& 
 BD , , ,z ,3 , , , , , ,@5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
EL U\D=P    	
	
&+k	
;@<	
		
 	
 	
 	

,HU33D-E$F 
,3s8 
, 
, 
, 
, 
, 
, 
, 
,r=   r   )8__doc__collections.abcr   	itertoolsr   rs   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   utilsr   r   r   r    r!   Moduler#   rM   rv   r   r    r=   r<   <module>r      s  2 L K $ $ $ $ $ $              & & & & & & * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < < 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - 0 0 0 0 0 0 0 0             # # # # # # # #LP P P P Pry P P Pf> > > > >") > > >B ~ ~ ~ ~ ~29 ~ ~ ~B[, [, [, [, [,L* [, [, [, [, [,r=   