
    .`iA                     j   d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dlmc m	Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlm Z m!Z!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* ddl+m,Z,  eed          Z-	 	 d+dej.        de/e0e0f         de1de2dej.        f
dZ3 G d dej4                  Z5ej6        d e d!e0de7e          fd"            Z8 G d# d$e          Z9 G d% d&ej:                  Z; G d' d(ej:                  Z< G d) d*ej:                  Z=dS ),    N)replace)partial)nn)	Attention)CacheConfig
VllmConfig)$get_tensor_model_parallel_world_size)RMSNorm)QKVParallelLinearRowParallelLinear)QuantizationConfig)get_rope)
MistralMLP)WhisperPosEmbedType)AttentionBackendAttentionMetadataAttentionTypeCommonAttentionMetadata)subclass_attention_backend_with_overrides)FlashAttentionBackend)get_attn_backend)AttentionSpec   )make_layersgh㈵>)epsconstant        xpaddingsmodevaluereturnc                 d   | j         d         }|\  }}|dk    r|dk    sJ ||f            |dk    rjt          ||          }d}||k    r||z
  dz   }t          j        | d|f          } t          j        | |||          }	|	j         d         |z
  }
|	dd|
f         S t          j        | |||          S )zTiny wrapper around F.pad, just to allow for
    reflect padding on small input.
    If this is the case, we insert extra 0 padding
    to the right before the reflection happen.
    r   reflectr   .N)shapemaxFpad)r   r   r    r!   lengthpadding_leftpadding_rightmax_pad	extra_padpaddedends              }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/whisper_causal.py_pad1dr2   )   s     WR[F"*L-1!!3!3!3lM5R!3!33ylM22	W&(1,Ia!Y((Aq(D%00l2*c4C4i  uQ$...    c                   l     e Zd Z	 	 	 ddededededed	ed
df fdZdej        d
ej        f fdZ xZ	S )WhisperCausalConv1dr   r   Tin_channelsout_channelskernel_sizestridepaddingbiasr"   Nc                     t                                          ||||||           | j        d         | _        |dz
  | j        d         z  dz   | _        | j        | j        z
  | _        d S )N)r9   r:   r;   r   r   )super__init__r9   _stridedilation_effective_kernel_size_padding_total)selfr6   r7   r8   r9   r:   r;   	__class__s          r1   r>   zWhisperCausalConv1d.__init__E   s|     	 	 	
 	
 	
 {1~'2Q$-:J&JQ&N#"9DLHr3   r   c                 F   |j         d         | j        z
  | j        z   | j        z  dz   }t	          j        |          dz
  | j        z  | j        | j        z
  z   }||j         d         z
  }t          || j        |fd          }t                                          |          S )Nr$   r   r   )r    )	r&   rA   rB   r?   mathceilr2   r=   forward)rC   r   n_framestarget_lengthextra_paddingrD   s        r1   rH   zWhisperCausalConv1d.forwardZ   s    GBK$558KKL 8,,q0DL@'$*==
 &31t*M:LLLwwq!!!r3   )r   r   T)
__name__
__module____qualname__intboolr>   torchTensorrH   __classcell__rD   s   @r1   r5   r5   D   s         I II I 	I
 I I I 
I I I I I I*	" 	"%, 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"r3   r5   underlying_attn_backendblock_pool_sizec                     d}|                                  } G fdd|          t          | t                    st          |  d          t	          || fdfdd          }|S )	N'WhisperCausalAttentionWithBlockPooling_c            	       l     e Zd Zdedee         dedej        f fdZ		 dde
ded	ed
ef fdZ xZS )jcreate_whisper_attention_backend_with_block_pooling.<locals>.WhisperCausalAttentionWithBlockPoolingBuilderkv_cache_speclayer_namesvllm_configdevicec                     |j         z  dk    sJ t          ||j        z  |j         z            }t                                          ||||           d S )Nr   )
block_sizenum_kv_heads)ra   r   r`   r=   r>   )rC   r[   r\   r]   r^   rD   rV   s        r1   r>   zscreate_whisper_attention_backend_with_block_pooling.<locals>.WhisperCausalAttentionWithBlockPoolingBuilder.__init__n   sq     !-?1DDDD#(3oE*7?J  M
 GG]KfMMMMMr3   Fcommon_prefix_lencommon_attn_metadata
fast_buildr"   c                 H   t          j        |          }|xj        z  c_        |xj        z  c_        |xj        z  c_        |xj        z  c_        |xj        z  c_        |xj        z  c_        |xj        z  c_        |xj	        z  c_	        |j
        }|z  }|                    d          z  t          j        |j                  z                                                       d          |_
        t#                                          |||          S )Nr   )r^   r$   )min)copydeepcopyquery_start_locquery_start_loc_cpuseq_lens_seq_lens_cpu_num_computed_tokens_cpunum_actual_tokensmax_query_lenmax_seq_lenslot_mapping	unsqueezerQ   aranger^   flattenclampr=   build)rC   rb   rc   rd   new_common_attn_metadataoriginal_slot_mappingrD   rV   s         r1   rv   zpcreate_whisper_attention_backend_with_block_pooling.<locals>.WhisperCausalAttentionWithBlockPoolingBuilder.build}   sB    (,}5I'J'J$$44G44$88OK88$--@--$22oE22$==P==$66/I66$22oE22$00OC00$8$E!0 *33A66Hl?;P;WXXXY 2 %1 77==!#;Z  r3   )F)rL   rM   rN   r   liststrr   rQ   r^   r>   rO   r   rP   r   rv   rS   )rD   rV   s   @r1   -WhisperCausalAttentionWithBlockPoolingBuilderrZ   m   s        	N(	N c	N $		N
 L	N 	N 	N 	N 	N 	N 	N&  %		 	"	 #:	 		
 	 	 	 	 	 	 	 	 	 	 	r3   r{   zR is not yet supported.Contributions to support more backends are much appreciated.c                       S N )r{   s   r1   <lambda>zEcreate_whisper_attention_backend_with_block_pooling.<locals>.<lambda>   s    'T r3   c                     d| |z  |z  |fS )N   r~   )
num_blocksr`   ra   	head_sizecache_dtype_strrV   s        r1   r   zEcreate_whisper_attention_backend_with_block_pooling.<locals>.<lambda>   s(    
 _,/	 r3   )get_builder_clsget_kv_cache_shape)name_prefixattention_backend_cls	overrides)r   
issubclassr   NotImplementedErrorr   )rU   rV   prefixunderlying_builderattn_backendr{   s    `   @r1   3create_whisper_attention_backend_with_block_poolingr   f   s     7F0@@BB+ + + + + + +8J + + +Z -/DEE 
!&   
 
 	
 =5TTTT# # # #
 
  L( r3   c                        e Zd ZdZdddddddej        dddfdededededz  d	ee         dz  d
e	dz  de
dz  dedz  dedz  dedededz  dedee         dz  ddf fdZdef fdZ xZS )&WhisperCausalAttentionWithBlockPoolingz#Attention layer with block pooling.N r   	num_headsr   scalera   alibi_slopescache_configquant_configlogits_soft_capper_layer_sliding_windowr   	attn_typekv_sharing_target_layer_namerV   r   r"   c                     || _         t          j                    }||j        }|j        }nd}d}t          |||||          }t          ||          } t                      j        d|||||||||	|
|||d| d S )Nauto   )r   )r   r   r   ra   r   r   r   r   r   r   r   r   r   r~   )	rV   rQ   get_default_dtypecache_dtyper`   r   r   r=   r>   )rC   r   r   r   ra   r   r   r   r   r   r   r   r   rV   r   extra_impl_argsdtypekv_cache_dtyper`   rU   rD   s                       r1   r>   z/WhisperCausalAttentionWithBlockPooling.__init__   s    $  /'))#)5N%0JJ#NJ"2#
 #
 #
 K#_
 
 	 	
%%%%+%=)E%	
 	
 	
 	
 	
 	
 	
r3   r]   c                     t                                          |          }t          |t                    sJ t	          || j        |j        z            }|S )N)ra   )r=   get_kv_cache_spec
isinstancer   r   rV   ra   )rC   r]   r[   rD   s      r1   r   z8WhisperCausalAttentionWithBlockPooling.get_kv_cache_spec   s]    11+>>-77777-0JJ
 
 
 r3   )rL   rM   rN   __doc__r   DECODERrO   floatry   r   r   rz   typer   r>   r   r   rS   rT   s   @r1   r   r      sV       -- $(+/+/26(,/3&.37 6:6
 6
6
 6
 	6

 Dj6
 5kD(6
 "D(6
 )4/6
 6
 #&*6
 6
 6
 '*Dj6
 6
 +,t36
" 
#6
 6
 6
 6
 6
 6
pZ          r3   r   c                        e Zd Zdej        dddddfdedededed	ed
ededz  dededz  dedz  de	f fdZ
deddfdZ	 	 	 dded	ededz  de	ddf
dZ	 ddej        dej        dz  fdZ xZS )WhisperCausalAttentionTNr   r   	embed_dimr   head_dimmax_position_embeddingsr;   r   r   rV   r   r   r   c                 F   t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        || _        | j        | j        z  | _	        | j        | j        z  | _
        || _        | j        dz  | _        |                     |||
|           t          | j        | j        z  |||
| d          | _        |dk    sJ d| d            t!          | j        | j        | j        | j        |	|
| d	t"          j        ||

  
        | _        |
J d            |                     |           d S )Nr   r   g      ࿩r   z	.out_proj)
input_sizeoutput_sizer;   r   r   z6Causal attention only supports block_pool_size>1, not .z.attn)ra   r   r   r   r   r   rV   z7rope can only used in combination with a sliding window)r=   r>   r   r	   total_num_headsr   r'   ra   r   q_sizekv_sizer   scaling	_init_qkvr   out_projr   r   r   attn_init_rotary_emb)rC   r   r   r   r   r;   r   r   rV   r   r   r   tp_sizerD   s                r1   r>   zWhisperCausalAttention.__init__   s    	"688(#g-2222-87** ''1Q66666 T11Q66664#77#BCC nt}4(4=8"}d*y$VDDD)+dm;!%'''
 
 
 """W_WWW #"" ;NML*%%####+%=+
 
 
	 (33E 433 	566666r3   r"   c                 D    t          | j        |dddi          | _        d S )NF
rope_thetag    .A)max_positionis_neox_stylerope_parameters)r   r   
rotary_emb)rC   r   s     r1   r   z'WhisperCausalAttention._init_rotary_emb?  s/    "M0)3/	
 
 
r3   c           
      `    t          || j        | j        | j        ||| d          | _        d S )Nz	.qkv_proj)hidden_sizer   r   total_num_kv_headsr;   r   r   )r   r   r   qkv_proj)rC   r   r;   r   r   s        r1   r   z WhisperCausalAttention._init_qkvG  sD     *!m 0#3%'''
 
 
r3   hidden_states	positionsc                 *   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|J |                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )Nr$   )dim)r   splitr   r   r   r   r   )
rC   r   r   qkv_qkvattn_outputoutputs
             r1   rH   zWhisperCausalAttention.forwardX  s    
 }--Q))T[$,E2)NN1a$$$y!Q//1ii1a((MM+..	r3   )TNr   r}   )rL   rM   rN   r   r   rO   rP   r   r   rz   r>   r   r   rQ   rR   rH   rS   rT   s   @r1   r   r      s        #0#8/3 +/26?7 ?7?7 ?7 	?7
 "%?7 ?7 !?7 #&*?7 ?7 "D(?7 )4/?7 ?7 ?7 ?7 ?7 ?7 ?7B
 
 
 
 
 
 26
 

 
 )4/	

 
 

 
 
 
( *. | <$&       r3   r   c                   \     e Zd Zdddedef fdZ	 d
dej        dej        dz  fd	Z xZ	S )WhisperCausalEncoderLayerr   r   r]   r   c                   t                                                       |j        j        }t	          |dd           }|j        }|dk    sJ |j        }|j        }|j        | _	        | j	        |j
        z  | _        t          | j	        |j
        |j        |j        ||||| d	  	        | _        t!          | j	                  | _        t%          |j        |j        d|dd| d	          | _        t!          | j	                  | _        d S )
Nsliding_windowr   z
.self_attn)	r   r   r   r   rV   r   r   r   r   siluTFz.mlp)r   intermediate_size
hidden_actr   r;   gate_up_proj_biasr   )r=   r>   model_config	hf_configgetattrrV   r   r   d_modelr   encoder_attention_headsr   r   encoder_head_dimr   	self_attnCausalRMSNormself_attn_layer_normr   encoder_ffn_dimmlpfinal_layer_norm)	rC   r]   r   configr   rV   r   r   rD   s	           r1   r>   z"WhisperCausalEncoderLayer.__init__k  s"   )3 )94@@ 0"""""/"/&*HH/n4,$*$B+%3%%(((

 

 

 %2$.$A$A!$4%#???
 
 
 !.dn = =r3   Nr   r   c                     |}|                      |          }|                     ||          }||z   }|}|                     |          }|                     |          }||z   }|S )N)r   r   )r   r   r   r   )rC   r   r   residuals       r1   rH   z!WhisperCausalEncoderLayer.forward  st    
 !11-@@]iXX =0 --m<<// =0r3   r}   )
rL   rM   rN   r   rz   r>   rQ   rR   rH   rS   rT   s   @r1   r   r   j  s        AC "> "> ">z ">3 "> "> "> "> "> ">N *. | <$&       r3   r   c                        e Zd Zdddedef fdZdej        eej                 z  dej        fdZ	d	ej        d
ej        dej        fdZ
 xZS )WhisperCausalEncoderr   r   r]   r   c                   t                                                       j        j        }|j        }t          |j                  t
          j        k    sJ |j        sJ |j	        | _	        |j
        | _
        |j        rt          j        |          nd| _        t          | j	        |d          | _        t          ||dd          | _        | j        j        d         | j        j        d         z  | _        t)          |j        fd| d	          \  | _        | _        | _        t3          |j                  | _        d S )
Ng      ?   )r8   r   )r9   r8   r   c                 ,    t          |  d          S )N.layers)r]   r   )r   )r   r]   s    r1   r   z/WhisperCausalEncoder.__init__.<locals>.<lambda>  s%    4'60B0B0B   r3   r   r   )r=   r>   r   r   r   r   	pos_embedROPE	is_causalnum_mel_binsmax_source_positionsscale_embeddingrF   sqrtembed_scaler5   conv1conv2r9   total_strider   encoder_layersstart_layer	end_layerlayersr   
layer_norm)rC   r]   r   r   r   rD   s    `   r1   r>   zWhisperCausalEncoder.__init__  sE   )3N	"6#3448K8PPPPP"/$*$?!393IR49Y///s():ISTUUU
(IaUVWWW
 J-a04:3DQ3GG8C!    %%%9
 9
 9
5$.$+ (77r3   input_featuresr"   c                    g }|D ]}t           j                            |                     |                    }t           j                            |                     |                    }|                    dd                              |j                  }|                    |           t          j
        |          }|S )Nr$   )r   
functionalgelur   r   	transposetor   appendrQ   cat)rC   r  r   featuresembedss        r1   forward_convz!WhisperCausalEncoder.forward_conv  s     & 	) 	)H]''

8(<(<==F]''

6(:(:;;F%%b"--00>>F  ((((	-00r3   r   r   c                 \    | j         D ]} |||          }|                     |          }|S r}   )r   r   )rC   r   r   encoder_layers       r1   rH   zWhisperCausalEncoder.forward  sA     "[ 	D 	DM)M-CCMM66r3   )rL   rM   rN   r   rz   r>   rQ   rR   ry   r  rH   rS   rT   s   @r1   r   r     s        AC 8 8 8z 83 8 8 8 8 8 82#lT%,-??	    "\6;l	       r3   r   )r   r   )>rg   	functoolsrF   dataclassesr   r   rQ   torch.nn.functionalr   r  r(   vllm.attention.layerr   vllm.configr   r   vllm.distributedr	   $vllm.model_executor.layers.layernormr
   !vllm.model_executor.layers.linearr   r   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   "vllm.model_executor.models.mistralr   "vllm.model_executor.models.whisperr   vllm.v1.attention.backendr   r   r   r   r   %vllm.v1.attention.backends.flash_attnr   vllm.v1.attention.selectorr   vllm.v1.kv_cache_interfacer   utilsr   r   rR   tuplerO   rz   r   r2   Conv1dr5   	lru_cacher   r   r   Moduler   r   r   r~   r3   r1   <module>r$     s                                     * * * * * * / / / / / / / / A A A A A A 8 8 8 8 8 8        G F F F F F @ @ @ @ @ @ 9 9 9 9 9 9 B B B B B B              H G G G G G 7 7 7 7 7 7 4 4 4 4 4 4      T*** 	/ /|/CHo/ / 	/
 \/ / / /6" " " " "") " " "D N-N@CN	
N N N NbB B B B BY B B BJj j j j jRY j j jZ3 3 3 3 3	 3 3 3l1 1 1 1 129 1 1 1 1 1r3   