
    .`iO                     B   U d Z ddlZddlmZ ddlmZ ddlmZ ddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZmZmZ ddlmZ ddlmZmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4m5Z5m6Z6m7Z7 ee/z  Zee8d<   de9dej:        fdZ; G d de	j<                  Z= G d d e	j<                  Z> G d! d"e	j<                  Z?e G d# d$e	j<                              Z@ G d% d&e	j<        e1          ZAdS )'zPyTorch Falcon model.    N)Iterable)islice)	TypeAlias)nn)	LayerNorm)FalconConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors)RWConfig   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixr   total_num_headsreturnc                    dt          j        t          j        |                     z  }t          j        ddt          j        |          dz
   z   z  t          j                  }t          j        dd|z   t          j                  }t          j        ||          }|| k    rt          j        ddt          j        d|z            dz
   z   z  t          j                  }t          || |z
            }t          j        ddd|z  z   dt          j                  }t          j
        |t          j        ||          gd          }|S )N      )dtyper   r   dim)mathfloorlog2torchtensorfloat32arangeint32powmincat)r$   closest_power_of_2basepowersslopes
extra_basenum_remaining_headsextra_powerss           u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/falcon.py_get_alibi_slopesr?   G   sS   dj?)C)CDDD<	ty!344q899:;5=  D \!Q!335;GGGFYtV$$F_,,\A49Q);%;<<q@AABC5=
 
 

 "2D D
 
 |q1***AU[
 
 
 FEIj,$G$GHaPPPM    c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )FalconAttentionN configcache_configquant_configprefixc                    t                                                       |j        | _        t                      }|j        | _        | j        |z  dk    sJ | j        |z  | _        | j        | j        z  | _        | j        | j        z  | j        k    sJ |j        | _        |j	        | _	        | j        r|j
        | _        n| j	        rd| _        n| j        | _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _
        t          | j        | j        | j        | j        |j        d|| d          | _        | j        | j        z  | _        | j
        | j        z  | _        dt%          j        | j                  z  | _        |j        p|j         | _        t/          | j        | j        |j        d|| j        | d          | _        |j        | _        |j        | _        | j        r| j        r
J d	            | j        ret;          |d
d          }t=          | j        ||j                  | _         tC          | j        | j        | j        | j
        || d          | _"        d S | j        rtG                      }|| j        z  }|dz   | j        z  }	tI          | j                  | j        z  }
|
||	         %                                }
tC          | j        | j        | j        | j
        |
|| d          | _"        d S tC          | j        | j        | j        | j
        ||| d          | _"        d S )Nr   r   Tz.query_key_valuebiasskip_bias_addrF   rG   g      ?z.dense)rJ   rK   rF   reduce_resultsrG   z(Rotary and alibi are mutually exclusive.max_position_embeddingsi    )max_positionrope_parametersz.attn)num_kv_headsrF   rG   )rP   alibi_slopesrF   rG   )scalerP   rE   rF   rG   )&super__init__hidden_sizer   num_attention_headsr$   	num_headshead_dimnew_decoder_architecturemulti_queryrP   total_num_kv_headsmaxr   rJ   query_key_valueq_sizekv_sizer,   sqrtinv_norm_factorparallel_attnreduce_row_parallel_resultsr   denserotary
use_rotaryalibi	use_alibigetattrr   rO   
rotary_embr	   attnr   r?   tolist)selfrD   rE   rF   rG   tp_sizerM   tp_rank
head_starthead_endrQ   	__class__s              r>   rT   zFalconAttention.__init___   s    	!-688%9#g-2222-8(D,@@}t33t7GGGGG(.(G%!-( 	;&,&9D## 	;&'D##&*&:D#"g-- *W499999 T4499994#:g#EFF0M #%...	 
 	 
 	 
 nt}4(4=8  #TYt}%=%==+Cv/C,
( '%;$$$
 
 

 !-O 	
 	
 	
6	
 	
7 ? )	&-f6OQU&V&V#&4 & 6  DO
 "$!.) '''  DIII ^ 	466G 4>1J!t~5H!$"677$:NN  (
8(;<CCEEL!$!.)) '''  DIII "*!.)) '''  DIIIr@   	positionshidden_statesr%   c                 B   |                      |          \  }}|||z  }|                    | j        | j        | j        gd          \  }}}| j        r|                     |||          \  }}|                     |||          }|                     |          \  }}||fS )Nr*   )r]   splitr^   r_   rf   rj   rk   rd   )	rm   rs   rt   qkvrJ   qkvattn_outputs	            r>   forwardzFalconAttention.forward   s    
 ((77	T4KC))T[$,E2)NN1a? 	4??9a33DAqii1a(( JJ{33TD  r@   NNrC   __name__
__module____qualname__r   r   r   strrT   r/   Tensorr}   __classcell__rr   s   @r>   rB   rB   ^   s         ,026p pp "D(p )4/	p
 p p p p p pd!<! |! 
	! ! ! ! ! ! ! !r@   rB   c                   \     e Zd Z	 	 d
dededz  def fdZdej        dej        fd	Z	 xZ
S )	FalconMLPNrC   rD   rF   rG   c           
      F   t                                                       |j        }t          |d|z  |j        d|| d          | _        t          d          | _        |j        p|j	         | _
        t          d|z  ||j        d| j
        || d          | _        d S )N   Tz.dense_h_to_4hrI   geluz.dense_4h_to_h)rJ   rK   rL   rF   rG   )rS   rT   rU   r   rJ   dense_h_to_4hr   actrY   rb   rc   r   dense_4h_to_h)rm   rD   rF   rG   rU   rr   s        r>   rT   zFalconMLP.__init__   s     	(1O%,,,
 
 
 f%%+Cv/C,
( /O;%,,,
 
 
r@   xr%   c                     |                      |          \  }}|||z  }|                     |          }|                     |          \  }}||fS N)r   r   r   )rm   r   rJ   s      r>   r}   zFalconMLP.forward  sW    $$Q''4IAHHQKK$$Q''4$wr@   )NrC   )r   r   r   r   r   r   rT   r/   r   r}   r   r   s   @r>   r   r      s         37	
 

 )4/
 	
 
 
 
 
 
> %,        r@   r   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )FalconDecoderLayerNrC   rD   rE   rF   rG   c                    t                                                       |j        }|j        | _        t          |||| d          | _        t          ||| d          | _        || _	        t          |d          sd |_        |j        |j        rd|_        |j        s7t          ||j                  | _        t          ||j                  | _        n]|j        dk    r7t          ||j                  | _        t          ||j                  | _        nt          ||j                  | _        |j        p|j         | _        d S )Nz.self_attentionrG   z.mlpnum_ln_in_parallel_attnr'   eps)rS   rT   rU   rV   rW   rB   self_attentionr   mlprD   hasattrr   rY   rb   r   layer_norm_epsilonpost_attention_layernorminput_layernormln_attnln_mlprc   )rm   rD   rE   rF   rG   rU   rr   s         r>   rT   zFalconDecoderLayer.__init__  ss    	(3-L,&7Q7Q7Q
 
 
 V\V///JJJv899 	2-1F*)1f6U1-.F*# 	,5!:- - -D) $-[f>W#X#X#XD  -22(&:STTT'9RSSS'0V%>( ( ($
 +Cv/C,
(((r@   rs   rt   r%   c                 `   |}| j         j        dk    r+|                     |          }|                     |          }n|                     |          }|                     ||          \  }}| j        r|||z  }| j         j        s)| j         j        r|}n||z  }| 	                    |          }| j         j        r| j         j        r| j         j        dk    r|}| 
                    |          \  }}	| j        r|	||	z  }| j        s"||z  }t          |          }|||z  }|	||	z  }||z   }
|
S )Nr'   )rs   rt   r   )rD   r   r   r   r   r   rc   rY   rb   r   r   r   )rm   rs   rt   residualattention_layernorm_outmlp_layernorm_outattention_outputattention_bias
mlp_outputmlp_biasoutputs              r>   r}   zFalconDecoderLayer.forward6  s   
 !;.!33&*ll=&A&A# $M : :&*&:&:=&I&I# ,0+>+>1 ,? ,
 ,
(. + 	/0J.{3 	L{( L$;!!,,$($A$A($K$K! K0	8)	8 3q88 7  $xx(9::
H+ 	#0D("J/ 		' **J9*EEJ)n,
#h&
h&r@   r~   r   r   s   @r>   r   r     s         ,026(
 (
(
 "D((
 )4/	(

 (
 (
 (
 (
 (
 (
T4<4 |4 
	4 4 4 4 4 4 4 4r@   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )FalconModelrC   r   vllm_configrG   c                   t                                                       |j        j        |j        |j        | _        j        | _        j	        | _
        j        | _        t          j        | j                  | _        t!          j        fd| d          \  | _        | _        | _        t+          | j        j                  | _        t1          dgj                  | _        d S )Nc                 *    t          |           S )Nr   )r   )rG   rE   rD   rF   s    r>   <lambda>z&FalconModel.__init__.<locals>.<lambda>  s     -l6   r@   z.hr   r   rt   )rS   rT   model_config	hf_configrE   rF   rD   rU   	embed_dimrV   rW   rg   rh   r   
vocab_sizeword_embeddingsr"   num_hidden_layersstart_layer	end_layerhr   r   ln_fr!   make_empty_intermediate_tensors)rm   r   rG   rE   rD   rF   rr   s      @@@r>   rT   zFalconModel.__init__o  s   )3"/"/+3  6N 
  
 4?$      ===4
 4
 4
0$.$& dn&2KLLL	/Vv10
 0
,,,r@   	input_idsr%   c                 ,    |                      |          S r   )r   rm   r   s     r>   embed_input_idszFalconModel.embed_input_ids  s    ##I...r@   Nrs   intermediate_tensorsinputs_embedsc                 B   t                      j        r||}n|                     |          }n|d         }t          | j        | j        | j                  D ]} |||          }t                      j        st          d|i          S | 	                    |          }|S )Nrt   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )rm   r   rs   r   r   rt   layers          r>   r}   zFalconModel.forward  s     >>' 	B( - $ 4 4Y ? ?0AMDFD$4dnEE 	< 	<E!E)];;MM~~* 	I&'GHHH		-00r@   weightsc                    | j         j        }| j         j        r| j         j        }n| j         j        rd}n|}||z  }t          |                     d                    }t                      }|D ]\  }}|                    d          r||vr t          ||           r1||         }	d|v rt          |	dd           }
|j        }|
|                    |d |
         ||dz   dfz   ||
dz   d          z             } |                    |
dz   d	|          j        g |d |
         d||
dz   d          R  } |                    |
dz   |d          j        g |d |
         d||
dz   d          R  } |                    |
dz   |dz   d          j        g |d |
         d||
dz   d          R  }t          j        |||g|

          }t          |	dt"                    } ||	|           |                    |           |S )Nr   F)remove_duplicatez.biasr]   
output_dimr'   rv   r   r*   weight_loader)rD   rV   rY   rP   rZ   dictnamed_parameterssetendswithr    ri   shapeviewnarrowreshaper/   r6   r   add)rm   r   r$   r[   num_query_heads_per_kv_headparams_dictloaded_paramsnameloaded_weightparamr   loaded_weight_shapewqwkwvr   s                   r>   load_weightszFalconModel.load_weights  s   +9;/ 	1!%!9[$ 	1!"!0&59K&K#400%0HHII"%%%#* )	$ )	$D-}}W%% $k*A*A&tT22 %E D(($UL$??
&3&9#)$1$6$6+KZK8-/JQ/NPRST-j1n.>.>?@% %M
--"Q+F  -[j[9 	
 -Z!^-=-=>  B--"Q(CQ  -[j[9 	
 -Z!^-=-=>  B--"Q(Ca(G  -[j[9 	
 -Z!^-=-=>  B %*Ir2rl
$K$K$KM#E?<QRRMM%///d####r@   r   )r   r   r   r   r   rT   r/   r   r   r   r}   r   tupler   r   r   r   s   @r>   r   r   m  s       AC 
 
 
z 
3 
 
 
 
 
 
B/ /%, / / / / .2 < < 2D8	
 |d* 
+	+   *5HU33D-E$F 53s8 5 5 5 5 5 5 5 5r@   r   c                       e Zd ZddgiZdddedef fdZdej        dej        fd	Z		 	 ddej
        dej        ded
z  dej        d
z  dej        f
dZdej        dej        d
z  fdZdeeeej        f                  dee         fdZ xZS )FalconForCausalLMr]   rC   r   r   rG   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        |j	        |j	        nd| _	        | j	        r| j        j
        | _        n0t          |j        |j        |t          |d                    | _        t          |j                  | _        | j        j        | _        d S )Ntransformer)r   rG   Tlm_head)rF   rG   )rS   rT   r   r   rF   rD   r   r#   r   tie_word_embeddingsr   r   r   r   rU   r   logits_processorr   )rm   r   rG   rD   rF   rr   s        r>   rT   zFalconForCausalLM.__init__  s    )3"/(&#L,O,O
 
 
 )5 && 	 
 # 	+;DLL)!")#FI66	  DL !00A B B< 	,,,r@   r   r%   c                 6    | j                             |          S r   )r   r   r   s     r>   r   z!FalconForCausalLM.embed_input_ids  s    //	:::r@   Nrs   r   r   c                 6    |                      ||||          }|S r   )r   )rm   r   rs   r   r   rt   s         r>   r}   zFalconForCausalLM.forward  s+     ((y"6
 
 r@   rt   c                 <    |                      | j        |          }|S r   )r   r   )rm   rt   logitss      r>   compute_logitsz FalconForCausalLM.compute_logits  s      &&t|]CCr@   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   rD   r   r   )rm   r   loaders      r>   r   zFalconForCausalLM.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++r@   )NN)r   r   r   packed_modules_mappingr   r   rT   r/   r   r   
LongTensorr   r}   r   r   r   r   r   r   r   s   @r>   r   r     s^       -. BD 
 
 
z 
3 
 
 
 
 
 
>; ;%, ; ; ; ; <@-1
 
#
 <
 2D8	

 |d*
 

 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r@   r   )B__doc__r,   collections.abcr   	itertoolsr   typingr   r/   r   torch.nnr   transformersr   HF_FalconConfigvllm.attention.layerr	   vllm.compilation.decoratorsr
   vllm.configr   r   vllm.distributedr   r   r   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   vllm.transformers_utils.configsr   
interfacesr   utilsr   r    r!   r"   r#   __annotations__intr   r?   ModulerB   r   r   r   r    r@   r>   <module>r
     s  *     $ $ $ $ $ $                          8 8 8 8 8 8 * * * * * * = = = = = = / / / / / / / /            = < < < < <         
 H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - 4 4 4 4 4 4 " " " " " "              *H4i 4 4 4s u|    .@! @! @! @! @!bi @! @! @!F' ' ' ' '	 ' ' 'T_ _ _ _ _ _ _ _D o o o o o") o o od?, ?, ?, ?, ?,	: ?, ?, ?, ?, ?,r@   