
     `i]                         d dl Z d dlZd dlZddlmZ  e            rd dlmZ dZdZ e	 ej
        de                    Zeeefvr ed          i Zd	 Zd
 Z	 	 	 ddZ	 	 	 	 	 ddZdS )    N   )is_torch_npu_available)npu_fusion_attention   NPU_FA2_SPARSE_MODE)defaultzEnvironment variable `NPU_FA2_SPARSE_MODE` can only be set as 2 (top-left aligned causal mask) or 3 (down-right aligned causal mask).c                     | t           vrFt          j        t          j        ddg|           d                                          t           | <   t           |          S )z6Get or create attention mask for the specified device.i   device   )diagonal)ATTN_MASK_NPU_CACHEtorchtriuonesboolr
   s    /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/integrations/npu_flash_attention.pyget_attn_mask_npur   (   sT    (((&+jT4LQW1X1X1Xcd&e&e&e&j&j&l&lF#v&&    c                  B    t                      rt          t          k    ndS )NF)r   SPARSE_MODE!TOP_LEFT_ALIGNED_CAUSAL_MASK_MODE r   r   'is_npu_fa2_top_left_aligned_causal_maskr   /   s    ?U?W?Wb;;;;]bbr           Fc                 8   d|z
  }|"dt          j        | j        d                   z  }|s*| j        d         }t          | |||d||          d         }	nDt	          | j                  }
| j        d         }t          | |||d|||
t          	  	        d         }	|	S )N      ?r   BSND)	keep_probscaler   )r    r!   
atten_masksparse_mode)mathsqrtshaper   r   r   r   )qkv	dropout_psoftmax_scalecausalkwargsr    head_numoutputattn_mask_npus              r   npu_flash_attn_funcr1   3   s     iIdi444 71:%aAx9\ijjjklm)!(3371:%$#

 

 

 
 Mr   c
                    d|z
  }|"dt          j        | j        d                   z  }|	s| j        d         }t          | |||d d ||dt	          |dd                                                                                                                    t	          |dd                                                                                                                              d         }nt          | j	                  }| j        d         }t          | |||d d |||dt	          |dd                                                                                                                    t	          |dd                                                                                                                    t                    d         }|S )Nr   r   r   TND)pser"   r!   r    input_layoutactual_seq_qlenactual_seq_kvlenr   )	r4   padding_maskr"   r!   r    r5   r6   r7   r#   )r$   r%   r&   r   tuplecpunumpytolistr   r   r   )r'   r(   r)   cu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_kr*   r+   r,   r-   r    r.   r/   r0   s                  r   npu_flash_attn_varlen_funcrA   V   s    iIdi444  71:%!,qrr"2"6"6"8"8">">"@"@"G"G"I"IJJ"<#3#7#7#9#9#?#?#A#A#H#H#J#JKK
 
 
  *!(3371:%$!,qrr"2"6"6"8"8">">"@"@"G"G"I"IJJ"<#3#7#7#9#9#?#?#A#A#H#H#J#JKK#
 
 
   Mr   )r   NF)NNr   NF)r$   osr   utils.import_utilsr   	torch_npur   r   #DOWN_RIGHT_ALIGNED_CAUSAL_MASK_MODEintgetenvr   
ValueErrorr   r   r   r1   rA   r   r   r   <module>rI      s,    				  7 7 7 7 7 7  /......
 %& !&' #c)")1;^___``8:]^^^
*	1  
  ' ' 'c c c        R 4 4 4 4 4 4r   