
     `i5                     *   d dl mZ d dlZddlmZmZ ddlmZ  ej        e	          Z
 e            Z	 	 	 	 ddej        j        dej        d	ej        d
ej        deej                 dedee         dee         dee         deej        df         fdZdS )    )OptionalN   )_flash_attention_forward!flash_attn_supports_top_left_mask)logging        modulequerykeyvalueattention_maskdropoutscalingsliding_windowsoftcapreturnc	                 v   |	                     dd          s|	                     d          t                              d           |j        d         }
t	          d |j        D                       rt          d          |                    dd          }|                    dd          }|                    dd          }d }|j        t          j	        k    r~t          j
                    rt          j                    }nWt          | j        d	          r| j        j        }n5t          d
 |                                 D                       j        j        }|	                    dd           }|| j        }t)          ||||f|
|||||t*          || j        j        t          | d          r| j        nd d
|	}|d fS )Noutput_attentionsF	head_maskz`flash_attention_2` does not support `output_attentions=True` or `head_mask`. Please set your attention to `eager` if you want any of these features.r   c              3   "   K   | ]
}|d k    V  dS )r   N ).0dims     }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/integrations/flash_attention.py	<genexpr>z*flash_attention_forward.<locals>.<genexpr>#   s&      
+
+3!8
+
+
+
+
+
+    zTensor query has shape  with a zero dimension.
FlashAttention does not support inputs with dim=0.
Please check your input shapes or use SDPA instead.   _pre_quantization_dtypec              3   X   K   | ]%}t          |t          j        j                  !|V  &d S )N)
isinstancetorchnnLinear)r   layers     r   r   z*flash_attention_forward.<locals>.<genexpr>;   s9      jj%zRWY^YaYhGiGijjjjjjjr   	is_causal	layer_idx)
query_lengthr%   r   softmax_scaler   r   use_top_left_masktarget_dtypeattn_implementationr&   )getloggerwarning_onceshapeany
ValueError	transposedtyper!   float32is_autocast_enabledget_autocast_gpu_dtypehasattrconfigr   nextmodulesweightpopr%   r   _use_top_left_mask_attn_implementationr&   )r	   r
   r   r   r   r   r   r   r   kwargsseq_lenr*   r%   attn_outputs                 r   flash_attention_forwardrB      s    zz%u-- 
K1H1H1TW	
 	
 	
 k!nG

+
+u{
+
+
+++ 
B
 
 	
 OOAq!!E
--1

COOAq!!E L{em##$&& 	x 799LLV]$=>> 	x!=@LLjj6>>3C3CjjjjjqwL 

;--I$	*	
 %,!"M>&-fk&B&BL&""   K$ r   )r   NNN)typingr   r!   modeling_flash_attention_utilsr   r   utilsr   
get_logger__name__r-   r=   r"   ModuleTensorfloatinttuplerB   r   r   r   <module>rM      s8          h h h h h h h h       
	H	%	%6688  #$(#F FHOF<F 
F <	F
 U\*F F e_F SMF e_F 5<F F F F F Fr   