
    ,`i#9                         d dl Z d dlmZ d dlmZ  ee          Z	 d dlZn-# e	$ r%Z
e                    de
j                   Y dZ
[
ndZ
[
ww xY w G d d          ZdS )    N)init_logger)current_platformzImport error msg: %sc            0          e Zd Zedej        deej        ej        f         fd            Zedej        dej        ddfd            Zedej        dej        ddfd            Z	edej        dej        ddfd            Z
edej        dej        fd	            Zedej        dej        fd
            Zedej        dej        ddfd            Ze	 	 	 	 	 dfdej        dej        dej        dej        dededej        dej        dededej        dz  dededededededededdf(d             Ze	 	 	 	 	 dfdej        d!ej        d"ej        d#ej        dej        dej        dej        dededej        dej        dededej        dz  dededededededededdf.d$            Zed%ej        dej        d&ej        d'ed(ej        d)eddfd*            Zed+ej        d,ej        d-edej        fd.            Zed+ej        d/ej        d,ej        d-eddf
d0            Zedej        d&ej        d1ej        dej        d2ej        d3ej        dej        dz  d4ed5ed6ed7ed8ed9ed:ed;ej        d<ed=ed>eddf&d?            Zed&ej        d1ej        dej        dej        d@ej        dedededdfdA            Ze	 	 	 	 dgd&ej        d1ej        dej        dej        d@ej        dedej        dz  dej        dz  dCedDeddfdE            Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dhdIej        dJej        dKej        dLej        d4ed5ed7edz  dMedej        dz  dNej        dz  dej        dz  dOee         dz  dPedz  dQej        dz  dRej        dz  dSedTedUej        dz  f$dV            Zeej        dddddddFdWdFdddfdXej        dLej        dz  dYej        dz  dZej        dz  d[edz  ddfd\            Zed]ej        d^ej        d_ej        ddfd`            Ze	 	 	 	 	 did+ej        dej        dz  daedz  dbej        dz  dceddej        dz  deej        ej        f         fde            Z dS )jipex_opsxreturnc                    |                      d          }|                      d          dz  }|                     |d|          } t          j        | dd          \  }}|                    ||          }|                    ||          }||fS )Nr         )chunksdim)sizereshapetorchchunk)r   numdx1x2s        b/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/_ipex_ops.py_reshape_activation_tensorz#ipex_ops._reshape_activation_tensor   s     ffQiiFF1IINIIc1a  Qqa000BZZQZZQ2v    outNc                 P    t           j        j                            ||            d S N)ipexllm
functionalsilu_and_mulr   r   s     r   r   zipex_ops.silu_and_mul   #    ((C00000r   c                 P    t           j        j                            ||            d S r   r   r   r   gelu_and_mulr    s     r   r$   zipex_ops.gelu_and_mul#   r!   r   c                 P    t           j        j                            ||            d S r   r#   r    s     r   gelu_tanh_and_mulzipex_ops.gelu_tanh_and_mul'   r!   r   c                 J    t           j        j                            |           S r   r   nnr   gelur   s    r   	gelu_fastzipex_ops.gelu_fast+       x"''***r   c                 J    t           j        j                            |           S r   r(   r+   s    r   gelu_newzipex_ops.gelu_new/   r-   r   c                 P    t           j        j                            ||            d S r   )r   r   r   
gelu_quickr    s     r   r1   zipex_ops.gelu_quick3   s#    &&q#.....r   r   @   query	key_cachevalue_cachenum_kv_headsscaleblock_tablescontext_lens
block_sizemax_context_lenalibi_slopeskv_cache_dtypek_scalev_scaletp_rankblocksparse_local_blocksblocksparse_vert_strideblocksparse_block_sizeblocksparse_head_sliding_stepc                     |dk    sJ |                      d          }||z  }t          j        j        j                            | |                                |                    |          |||||||	|
           d S Nautor
   r   r   r   modulesPagedAttentionsingle_query_kv_attention
contiguousview_as)r   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   	num_headsnum_queries_per_tokenss                        r   paged_attention_v1zipex_ops.paged_attention_v17   s    , ''''HHQKK	!*l!:'AAk**"	
 	
 	
 	
 	
r   exp_sum
max_logitstmp_outc                     |dk    sJ |                      d          }||z  }t          j        j        j                            | |                                |                    |          ||||	|
|||           d S rF   rH   )r   rQ   rR   rS   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rN   rO   s                           r   paged_attention_v2zipex_ops.paged_attention_v2^   s    2 ''''HHQKK	!*l!:'AAk**"	
 	
 	
 	
 	
r   	positionskey	head_sizecos_sin_cacheis_neoxc           	          |                     d          }t          j        j                            | ||||||           d S )Nr
   )r   r   r   r   rotary_embedding_batched)rV   r3   rW   rX   rY   rZ   rot_dims          r   rotary_embeddingzipex_ops.rotary_embedding   sM      $$Q''44uc9mWg	
 	
 	
 	
 	
r   inputweightepsilonc                     t          j        |           }t           j        j                            ||                                 ||           |S r   )r   
empty_likeops
torch_ipexrms_norm_vllmrL   )r_   r`   ra   r   s       r   rms_normzipex_ops.rms_norm   sD     u%%	**30@0@0B0BFGTTT
r   residualc                 T    t           j        j                            | |||           d S r   )r   rd   re   fused_add_rms_norm_vllm)r_   rh   r`   ra   s       r   fused_add_rms_normzipex_ops.fused_add_rms_norm   s)     		44UHfgVVVVVr   valueseqlen_qseqlen_kmax_seqlen_qmax_seqlen_kpdropoutsoftmax_scalezero_tensors	is_causalreturn_softmaxgen_window_size_leftwindow_size_rightlogits_soft_capc                    t           j                            d          r|dk    rt          d          |J |dk     r|dk     sJ t           j        j                            |                                 |                                |                                ||                                |                                |||	|
||||           d S t           j        j                            |                                 |                                |                                ||                                |                                ||||	|
|||||||           d S )Ncpu        z)IPEX CPU does not support logits_soft_capr   )	r   __version__endswith
ValueErrorr   r   varlen_attentionrL   int)r3   rW   rl   r   rm   rn   r<   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   s                     r   r   zipex_ops.varlen_attention   ss   * $$U++ )	#%% !LMMM'''#a'',=,A,A,AAH00  ""    ""    " H00  ""    "" !%    r   slot_mappingc                 p    |dk    sJ t           j        j        j                            | ||||           d S )NrG   )r   r   rI   rJ   reshape_and_cache)rW   rl   r4   r5   r   r=   r>   r?   s           r   r   zipex_ops.reshape_and_cache   sJ     '''''99	;	
 	
 	
 	
 	
r         ?k_scale_floatv_scale_floatc
           
      f    t           j        j        j                            | |||||||	           d S r   )r   r   rI   rJ   reshape_and_cache_flash)
rW   rl   r4   r5   r   r=   r>   r?   r   r   s
             r   r   z ipex_ops.reshape_and_cache_flash   sF     	'??		
 		
 		
 		
 		
r   Fr|   r   qkvcu_seqlens_qcausalblock_tablewindow_sizesoftcap	seqused_kcu_seqlens_k	dropout_p
fa_versions_auxc                 v   |&t          j        | j        | j        | j                  }|d}n%t          |          dk    sJ |d         |d         f}|	|
J d            || j        d         dz  }t                              |                                 |                                |                                |||d ||d	|d
|d
d |d         |d         d           |S t          j
        j        j                            ||                                 |||||||||	|
|||d         |d         dd          S )N)dtypedevicer   r   r   r
   z9cu_seqlens_k can't be None when calling varlen_attention.r   g      r|   Fr   )sinkr   rw   rx   r>   r?   )r   emptyshaper   r   lenr   r   rL   r   r   rI   rJ   flash_attn_varlen_func)r   r   r   r   ro   rp   rr   r   r   r   r<   r   r   r   r   r   scheduler_metadatar   	q_descale	k_descale	v_descale
num_splitsr   real_window_sizes                           r   r   zipex_ops.flash_attn_varlen_func  sz   : ;+agQWQXFFFC'{##q(((( +AA?++K ,++ $ ! 5%% # #%  ( J8#2II!1!!4"21"5% J   r   r   cache_seqlenscu_seqlens_k_newcache_leftpad	page_sizec                 :    t                               d           d S )NzGget_scheduler_metadata is not implemented for ipex_ops, returning None.)loggerwarning_once)
batch_sizero   rp   num_heads_qnum_heads_kvheaddimr   	qkv_dtype	headdim_vr   r   r   r   max_seqlen_k_newr   r   has_softcapr   pack_gqa	sm_margins                       r   get_scheduler_metadatazipex_ops.get_scheduler_metadataf  s&    . 	U	
 	
 	
 tr   srcdstblock_mappingc                 H    t           j                            | ||           d S r   )r   xpuswap_blocks)r   r   r   s      r   r   zipex_ops.swap_blocks  s$     		c366666r   num_token_paddingscale_ubuse_per_token_if_dynamicoutputc                    | j         dk    sJ | j        }t          j                    }|r#t	          || j        d                   |d         f}|t          j        || j        |          }n|
J d            |j        |k    sJ |
J d            |r
J d            t          j	        d| j        t
          j
                  }t
          j        j                            || |           ||fS )	a]  
        Quantize input tensor to FP8 and return quantized tensor and scale.

        This function is designed for both static and dynamic quantization:
        If you provide the scale, it will use static scaling and if you omit
        it, the scale will be determined dynamically. Currently, XPU platform
        only supports dynamic quantization. The function also allows optional
        padding of the output tensors for downstream kernels that will benefit
        from padding.

        Args:
            input: The input tensor to be quantized to FP8
            scale: Optional scaling factor for the FP8 quantization
            scale_ub: Optional upper bound for scaling factor in dynamic
                per token case
            num_token_padding: If specified, pad the first dimension
                of the output to at least this value.
            use_per_token_if_dynamic: Whether to do per_tensor or per_token
                in the dynamic quantization case.

        Returns:
            tuple[torch.Tensor, torch.Tensor]: The output tensor in FP8 and
                scaling factor.
        r   r   r
   N)r   r   z)padding not supported if output passed inz.only dynamic fp8 quantization supported on XPUz7per token dynamic fp8 quantization not supported on XPU)ndimr   r   	fp8_dtypemaxr   r   r   r   zerosfloat32rd   re   dynamic_scaled_fp8_quant)r_   r7   r   r   r   r   r   	out_dtypes           r   scaled_fp8_quantzipex_ops.scaled_fp8_quant  s   D zQ.3k!1!;!=!=	 	G*EKN;;U1XFE>[u|9MMMFF$,,; -,, <9,,,,}}N}}}+ 	
 	
E	
 	
+ Ael%-HHH	55feUKKKu}r   )r   r   r   r2   r   )NNr   r   )NFNNNNr|   NNr|   Nr   NNNr   N)NNNFN)!__name__
__module____qualname__staticmethodr   Tensortupler   r   r$   r&   r,   r/   r1   r   floatstrrP   rU   boolr^   rg   rk   	Generatorr   r   r   listr   bfloat16r   r   r    r   r   r   r      s
       	<		u|U\)	*	 	 	 \	 1%, 15< 1D 1 1 1 \1 1%, 15< 1D 1 1 1 \1 1u| 1 1 1 1 1 \1 +U\ +el + + + \+ +EL +U\ + + + \+ / / /$ / / / \/   ()'(&(-.'$
 $
\$
|$
 <$
 \	$

 $
 $
 l$
 l$
 $
 $
 lT)$
 $
 $
 $
 $
  #&!$
" "%#$
$ !$%$
& (+'$
( 
)$
 $
 $
 \$
L & ()'(&(-.-'
 '
\'
'
 L'
 	'

 |'
 <'
 \'
 '
 '
 l'
 l'
 '
 '
 lT)'
 '
  !'
" #'
$ %'
& #&''
( "%)'
* !$+'
, (+-'
. 
/'
 '
 '
 \'
R 
<
|
 \
 	

 |
 
 

 
 
 \
 |%*\<A	   \ W|W,W W 	W
 
W W W \W =|=\= |= \	=
 ,= ,= lT)= = = = = = = = o=   !=" !#=$ %=& 
'= = = \=~ 
\
|
 <
 \	

 l
 
 
 
 

 
 
 \
  (,'+""
 
\
|
 <
 \	

 l
 
 $
 $
 
 
 

 
 
 \
.  '+#'+/,0(, #)-,0  %)5T T<T<T <T l	T
 T T t|T T \D T \D(T lT)T #Y%T T <$&T lT)T" #T* +T4 |d"5T T T \Tl  .,004-1 $)  | lT)  ,- |d* :* 
+   \6 7\7 %7=B\7	7 7 7 \7
  &*(,(,).&*4 4|4|d"4 :4 ,%	4
 #'4 t#4 
u|U\)	*4 4 4 \4 4 4r   r   )r   vllm.loggerr   vllm.platformsr   r   r   intel_extension_for_pytorchr   ImportErroredebugmsgr   r   r   r   <module>r      s   
  # # # # # # + + + + + +	X		0..... 0 0 0
LL'////////0k k k k k k k k k ks   " AAA