
    .`iG                        d Z ddlZddlZddlmc mZ ddlmZ ddl	m
Z
 	 	 	 ddej        dej        dej        ded	ed
edz  dedz  dej        dz  dej        dz  dej        fdZ	 	 	 ddej        dej        dej        ded	ed
edz  dedz  dej        dz  dej        dz  dej        fdZ e
dee           	 	 	 ddej        dej        dej        ded	ed
edz  dedz  dej        dz  dej        dz  dej        fdZ	 ddej        dej        dej        dedz  dej        f
dZ	 	 ddej        dej        dej        dedz  dej        dz  dej        fdZdej        dej        dej        dedz  dej        dz  dej        fdZ e
dee           	 	 ddej        dej        dej        dedz  dej        dz  dej        fdZdS )a  
This file contains ops for ViT attention to be compatible with torch.compile
as there are operations here not supported by torch.compile (for instance,
`.item()` in flash attention)

Using these ops and wrapping vision blocks with `torch.compile` can speed up
throughput in vision models by ~5% relative on H100, and improve token
latencies by ~7% (see qwen2_5_vl for example usage)

To use these ops, you must have a recent version of PyTorch installed (>= 2.4.0)
    N)current_platform)direct_register_custom_opqkv
batch_sizeis_rocm_aiter
fa_versionscale
cu_seqlens
max_seqlenreturnc	                    i }	|rddl m}
 n ddlm}
 t          j                    s|||	d<   |                     d          }|.t          j        d|dz   |z  |t          j        | j	                  }||n|
                                }d | ||fD             \  } }} |
| ||f||||dd|d	|	}t          j        |d
|          }|S )Nr   )flash_attn_varlen_funcr
      )stepdtypedevicec              3   @   K   | ]}t          j        |d           V  dS )zb s ... -> (b s) ...Neinops	rearrange.0xs     {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/attention/ops/vit_attn_wrappers.py	<genexpr>z/flash_attn_maxseqlen_wrapper.<locals>.<genexpr>2   s0      NNqv#9::NNNNNN            F)cu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_k	dropout_pcausalsoftmax_scalez(b s) h d -> b s h d)b)aiterr   #vllm.v1.attention.backends.fa_utilsr   is_rocmsizetorcharangeint32r   itemr   r   )r   r   r   r   r	   r
   r   r   r   kwargsr   q_lenoutputcontext_layers                 r   flash_attn_maxseqlen_wrapperr4      s:    F .0000000NNNNNN')) 	.j.D#-F< FF1IIE\
Q%'e5;qx
 
 

 %,*//2C2CJNNQ1INNNGAq!##			     F $V-CzRRRMr   c	                 *    t          j        |           S Nr,   
empty_like	r   r   r   r   r	   r
   r   r   r   s	            r   !flash_attn_maxseqlen_wrapper_faker:   D   s     Ar   r4   )op_nameop_func	fake_implc	                 Z    t           j        j                            | ||||||||	  	        S r6   )r,   opsvllmr4   r9   s	            r   vit_flash_attn_wrapperrA   Y   s;     9>66			
 
 
r   c                     d | ||fD             \  } }}t          j        | ||d|          }t          j        |d          }|S )zI
    Input shape:
    (batch_size x seq_len x num_heads x head_size)
    c              3   @   K   | ]}t          j        |d           V  dS )zb s h d -> b h s dNr   r   s     r   r   zapply_sdpa.<locals>.<genexpr>{   s0      LLQv#788LLLLLLr   r   )r$   r   zb h s d -> b s h d )Fscaled_dot_product_attentionr   r   )r   r   r   r   r2   s        r   
apply_sdparF   q   sW     ML1a)LLLGAq!+Aq!s%PPPFf&;<<FMr   c                 V   t          j                    r<|                                 } |                                }|                                }|t          | |||          S g }|dd          |d d         z
                                  }t          j        | |d          }t          j        ||d          }t          j        ||d          }	t          |||	          D ].\  }
}}t          |
|||          }|                    |           /t          j	        |d          }|S )N)r   r   )dim)
r   r*   
contiguousrF   tolistr,   splitzipappendcat)r   r   r   r   r   outputslensq_chunksk_chunksv_chunksq_ik_iv_ioutput_ir3   s                  r   torch_sdpa_wrapperrY      s,    !! LLNNLLNNLLNN!Q////GqrrNZ_,4466D{1d***H{1d***H{1d***HXx:: ! !S#c35999x    Ig1---Mr   c                 *    t          j        |           S r6   r7   r   r   r   r   r   s        r   torch_sdpa_wrapper_faker\      s     Ar   rY   c                 R    t           j        j                            | ||||          S r6   )r,   r?   r@   rY   r[   s        r   vit_torch_sdpa_wrapperr^      s$     9>,,Q1eZHHHr   )NNNr6   )NN)__doc__r   r,   torch.nn.functionalnn
functionalrD   vllm.platformsr   vllm.utils.torch_utilsr   Tensorintboolfloatr4   r:   rA   rF   rY   r\   r^    r   r   <module>rj      s9  
 
            + + + + + + < < < < < < &*&** *|*|* |* 	*
 * d
* 4<* t#* t#* \* * * *h &*&* || | 	
  d
 4< t# t# \     *(/    &*&* || | 	
  d
 4< t# t# \   8 	 || | 4<	
 \   , &* || | 4<	
 t# \   <|| | 4<	
 t# \      %    &*I I|I|I |I 4<	I
 t#I \I I I I I Ir   