
    .`i6                     Z   d Z ddlZddlmZ ddlmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZmZ dd
lmZmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z. de/dej0        fdZ1 G d dej2                  Z3 G d dej2                  Z4 G d dej2                  Z5e G d dej2                              Z6 G d  d!ej2        e'e(          Z7d"ee8e9ej0        f                  dee8e9ej0        f                  fd#Z:dS )$z?Inference-only BLOOM model compatible with HuggingFace weights.    N)Iterable)islice)nn)BloomConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )
SupportsPPSupportsQuant)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixtotal_num_headsreturnc                    dt          j        t          j        |                     z  }t          j        ddt          j        |          dz
   z   z  t          j                  }t          j        dd|z   t          j                  }t          j        ||          }|| k    rt          j        ddt          j        d|z            dz
   z   z  t          j                  }t          || |z
            }t          j        ddd|z  z   dt          j                  }t          j
        |t          j        ||          gd          }|S )N      )dtyper   )startendstepr%   r   )dim)mathfloorlog2torchtensorfloat32arangeint32powmincat)r    closest_power_of_2basepowersslopes
extra_basenum_remaining_headsextra_powerss           t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/bloom.py_get_alibi_slopesr=   ?   sU   dj?)C)CDDD<	ty!344q899:;m  D \!Q!335;GGGFYtV$$F_,,\A49Q);%;<<q@AABC-
 
 

 "2D D
 
 |Q!4441EK
 
 
 FEIj,$G$GHaPPPM    c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )BloomAttentionN configcache_configquant_configprefixc           
         t                                                       |j        | _        |j        | _        | j        | j        z  | _        | j        | j        z  | j        k    sJ t                      }| j        |z  dk    sJ | j        |z  | _        t          | j        | j        | j        d|| d          | _	        t          | j        | j        d|| d          | _        t                      }|| j        z  }|dz   | j        z  }t          | j                  }	|	||                                         }	| j        dz  }
t          | j        | j        |
|	||| d	          | _        d S )
Nr   Tz.query_key_value)biasrD   rE   z.denser   g      z.attn)alibi_slopesrC   rD   rE   )super__init__hidden_sizen_headr    head_dimr   	num_headsr   query_key_valuer   denser   r=   tolistr   attn)selfrB   rC   rD   rE   tp_world_sizetp_rank
head_starthead_endrH   scaling	__class__s              r<   rJ   zBloomAttention.__init__X   s    	!-%}(D,@@}t33t7GGGGG<>>#m3q8888->0M %... 
  
  
 '%$$$
 
 

 122t~-
aK4>1()=>>#Jx$78??AA-%NM%%%###
 
 
			r>   position_idshidden_statesr!   c                     ~|                      |          \  }}|                    dd          \  }}}|                     |||          }|                     |          \  }	}|	S )Nr$   )chunksr)   )rO   chunkrR   rP   )
rS   rZ   r[   qkv_qkvattn_outputoutputs
             r<   forwardzBloomAttention.forward   sh    
 %%m44Q))1")--1aii1a((JJ{++	r>   NNrA   __name__
__module____qualname__r   r	   r   strrJ   r-   Tensorrg   __classcell__rY   s   @r<   r@   r@   W   s         ,0261
 1
1
 "D(1
 )4/	1

 1
 1
 1
 1
 1
 1
f
l
 |
 
	
 
 
 
 
 
 
 
r>   r@   c                   \     e Zd Z	 	 d
dededz  def fdZdej        dej        fd	Z	 xZ
S )BloomMLPNrA   rB   rD   rE   c                     t                                                       |j        }t          |d|z  || d          | _        t          d          | _        t          d|z  ||| d          | _        d S )N   z.dense_h_to_4h)rD   rE   geluz.dense_4h_to_h)	rI   rJ   rK   r   dense_h_to_4hr   	gelu_implr   dense_4h_to_h)rS   rB   rD   rE   rK   rY   s        r<   rJ   zBloomMLP.__init__   s     	(1O%,,,	
 
 
 $F++.O%,,,	
 
 
r>   xr!   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rv   rw   rx   )rS   ry   ra   s      r<   rg   zBloomMLP.forward   sF    !!!$$1NN1!!!$$1r>   )NrA   )rj   rk   rl   r   r   rm   rJ   r-   rn   rg   ro   rp   s   @r<   rr   rr      s         37	
 

 )4/
 	
 
 
 
 
 
, %,        r>   rr   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )
BloomBlockNrA   rB   rC   rD   rE   c                 X   t                                                       |j        }t          j        ||j                  | _        t          |||| d          | _        t          j        ||j                  | _	        t          ||| d          | _        |j        | _        d S )Nepsz.self_attentionrE   z.mlp)rI   rJ   rK   r   	LayerNormlayer_norm_epsiloninput_layernormr@   self_attentionpost_attention_layernormrr   mlp(apply_residual_connection_post_layernorm)rS   rB   rC   rD   rE   rK   rY   s         r<   rJ   zBloomBlock.__init__   s     	(!|KV=VWWW,L,&7Q7Q7Q
 
 
 )+V6)
 )
 )
% FLFIII; 	555r>   rZ   r[   r!   c                     |                      |          }| j        r|}n|}|                     ||          }||z   }|                     |          }| j        r|}n|}|                     |          |z   }|S )N)rZ   r[   )r   r   r   r   r   )rS   rZ   r[   layernorm_outputresidualattention_outputrf   s          r<   rg   zBloomBlock.forward   s      //>> 8 	%'HH$H  ..%* / 
 
 ,h6889IJJ 8 	('HH'H *++h6r>   rh   ri   rp   s   @r<   r}   r}      s         ,026
 

 "D(
 )4/	

 
 
 
 
 
 
,l | 
	       r>   r}   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )
BloomModelrA   r   vllm_configrE   c                   t                                                       |j        j        |j        |j        | _        j        | _        t          j
        | j                  | _        t          j        | j        j                  | _        t!          j        fd| d          \  | _        | _        | _        t          j        | j        j                  | _        t-          dgj                  | _        d S )Nr   c                 *    t          |           S )Nr   )r}   )rE   rC   rB   rD   s    r<   <lambda>z%BloomModel.__init__.<locals>.<lambda>  s    :l6   r>   z.hr   r[   )rI   rJ   model_config	hf_configrC   rD   rB   rK   	embed_dimr   
vocab_sizeword_embeddingsr   r   r   word_embeddings_layernormr   num_hidden_layersstart_layer	end_layerhln_fr   make_empty_intermediate_tensors)rS   r   rE   rC   rB   rD   rY   s      @@@r<   rJ   zBloomModel.__init__   s   )3"/"/+  6N 
  
 *,N 9*
 *
 *
&
 4?$      ===4
 4
 4
0$.$& LV5NOOO	/Vv10
 0
,,,r>   	input_idsr!   c                 ,    |                      |          S r{   )r   rS   r   s     r<   embed_input_idszBloomModel.embed_input_ids  s    ##I...r>   NrZ   intermediate_tensorsinputs_embedsc                 t   t                      j        r0||}n|                     |          }|                     |          }n|J |d         }t	          | j        | j        | j                  D ]} |||          }t                      j        st          d|i          S | 
                    |          }|S )Nr[   )r   is_first_rankr   r   r   r   r   r   is_last_rankr   r   )rS   r   rZ   r   r   r[   layers          r<   rg   zBloomModel.forward  s     >>' 	B( - $ 4 4Y ? ? ::=IIMM'3330AMDFD$4dnEE 	? 	?E!E,>>MM~~* 	I&'GHHH		-00r>   weightsc                 *   t          |                     d                    }t                      }|D ]\  }}t          ||           r||         }d|v rt	          |dd           }| j        j        }|f|j        }	|                    |	d |         |ddfz   |	|dz   d          z             }|	                    ||dz             }|
                    |	          }t	          |dt                    }
 |
||           |                    |           |S )	NF)remove_duplicaterO   
output_dimr$   r]   r   weight_loader)dictnamed_parameterssetr   getattrrB   num_attention_headsshapeview	transposereshaper   add)rS   r   params_dictloaded_paramsnameloaded_weightparamr   rN   loaded_weight_shaper   s              r<   load_weightszBloomModel.load_weights,  sL   400%0HHII"%%%#* 	$ 	$D-&tT22 %E D((
 %UL$??
 K;	)*7*='$1$6$6+KZK8$a,--j1n.>.>?@% %M
 %2$;$;J
UV$W$WM$1$9$9:M$N$NM#E?<QRRMM%///d####r>   r{   )rj   rk   rl   r
   rm   rJ   r-   rn   r   r   rg   r   tupler   r   ro   rp   s   @r<   r   r      s       AC  
  
  
z  
3  
  
  
  
  
  
D/ /%, / / / / .2 < l 2D8	
 |d* 
+	+   .HU33D-E$F 3s8        r>   r   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
dej        dej        d	z  fdZdeeeej        f                  dee         fdZ xZS )BloomForCausalLMrA   r   r   rE   c                   t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        | j        j	        r| j        j
        | _        n9t          | j        j        | j        j        t          |d                    | _        t          |j                  | _        | j        j        | _        d S )Ntransformer)r   rE   lm_headr   )rI   rJ   r   r   rD   rB   r   r   r   tie_word_embeddingsr   r   r   r   rK   r   logits_processorr   )rS   r   rE   rB   rD   rY   s        r<   rJ   zBloomForCausalLM.__init__M  s    )3"/(%#L,O,O
 
 
 ;* 	+;DLL)&'#FI66  DL !00A B B< 	,,,r>   r   r!   c                 6    | j                             |          S r{   )r   r   r   s     r<   r   z BloomForCausalLM.embed_input_idsd  s    //	:::r>   N	positionsr   r   c                 6    |                      ||||          }|S r{   )r   )rS   r   r   r   r   r[   s         r<   rg   zBloomForCausalLM.forwardg  s+     ((y"6
 
 r>   r[   c                 <    |                      | j        |          }|S r{   )r   r   )rS   r[   logitss      r<   compute_logitszBloomForCausalLM.compute_logitss  s      &&t|]CCr>   r   c                 n    t          | dg          }t          |          }|                    |          S )Nzlm_head.weight)skip_prefixes)r   _add_transformer_prefixr   )rS   r   loaders      r<   r   zBloomForCausalLM.load_weightsz  s9    "48H7IJJJ)'22""7+++r>   )NN)rj   rk   rl   r
   rm   rJ   r-   rn   r   r   rg   r   r   r   r   r   ro   rp   s   @r<   r   r   L  sN       AC 
 
 
z 
3 
 
 
 
 
 
.; ;%, ; ; ; ; <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r>   r   r   c              #   Z   K   | D ]%\  }}|                     d          sd|z   }||fV  &d S )Nztransformer.)
startswith)r   r   r.   s      r<   r   r     sV          f~.. 	)!D(DFl r>   );__doc__r*   collections.abcr   	itertoolsr   r-   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   intrn   r=   Moduler@   rr   r}   r   r   r   rm   r    r>   r<   <module>r      s]  ( F E  $ $ $ $ $ $              $ $ $ $ $ $ * * * * * * = = = = = = / / / / / / / /         
 = < < < < <         
 H G G G G G F F F F F F        P O O O O O - - - - - - 1 1 1 1 1 1 1 1             s u|    0> > > > >RY > > >B    ry   <5 5 5 5 5 5 5 5p Z Z Z Z Z Z Z Zz1, 1, 1, 1, 1,ry*m 1, 1, 1,heC-./eC%&'     r>   