
    Pis'                        U d dl Z d dlmZmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZmZ  e            Ze j        ed<   e	rjd dlmZmZmZ d Z e            Zej                            d	
          dej        dej        dej        dedej        f
d            Zeej        ef         Znej        Zdeej                 dej        fdZdeej                 dej        fdZdeej                 defdZdefdZdS )    N)CallableListOptionalUnion)nn)_SUPPORTS_FLEX_ATTENTION)
get_loggerlog_once_log)	BlockMaskcreate_block_maskflex_attentionc                  @   	 t          j        t          d          S # t          $ rv} t                              d|  d           	 t          j        t          dd          cY d } ~ S # t          $ r$} t                              d|  d            d } ~ ww xY wd } ~ ww xY w)	NF)dynamicz,Compiling flex_attention failed with error 'z%'. Retrying with mode='max-autotune'.zmax-autotune)r   modez-Compiling flex_attention failed with error: 'z', Updating your pytorch version to nightlies may solve it, or you can setin your config dataset.packed=False to avoid using flex attention.)torchcompiler   	Exceptionr   info)es    u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/modules/attention_utils.pycompile_flex_attentionr      s    	=???? 	 	 	 IIgqggg  }^UXXXXXXXXX   		YA Y Y Y  
 	s8    
BBA'!B'
B1BBBBF)	recursiveqkv
block_maskreturnc                 (    t          | |||          S )Nr   )flex_attention_compiled)r   r   r   r   s       r   compile_friendly_flex_attentionr"   2   s     'q!Q:FFFF    seq_lensc                     t          |           }g }t          |          D ]H}t          j        d t	          | |                   D                       }|                    |           It          j        |          }|S )a  
    Convert a batch tensor of seq lens into integer IDs denoting sample ownership.
    For example, seq_lens = [2, 3, 1] would return [0, 0, 1, 1, 1, 2].

    Args:
        seq_lens (List[torch.Tensor]): Sequence lengths of samples in each pack in the batch,
            shape (batch_size, n), where n is the max number of sequences in a pack and can vary
            across packs.

    Returns:
        Tensor: Document IDs of shape (batch_size, max_seq_len).
    c                 d    g | ]-\  }}t          j        |f|t           j        |j                   .S )dtypedevice)r   fulllongr)   .0iseq_lens      r   
<listcomp>z3_get_document_ids_from_seq_lens.<locals>.<listcomp>U   sE       Aw 
G:q
7>RRR  r#   )lenranger   cat	enumerateappendstack)r$   
batch_sizebatch_document_ids
sample_idxdocument_idss        r   _get_document_ids_from_seq_lensr;   @   s     XJJ'' 	0 	0
 y "+HZ,@"A"A  
 
 	!!,////%788r#   c                     g }t          |           }t          |          D ]B}d t          | |                   D             }|                    t	          j        |            Ct	          j        |          S )a  
    Given a batch tensor of seq lens defining the lengths of samples in each pack,
    Construct a 2D block causal mask for each pack in the batch. For example, if
    a single sample's seq_lens is [3, 2, 1], the mask would be::

        mask = [
            [1, 0, 0, 0, 0, 0],
            [1, 1, 0, 0, 0, 0],
            [1, 1, 1, 0, 0, 0],
            [0, 0, 0, 1, 0, 0],
            [0, 0, 0, 1, 1, 0],
            [0, 0, 0, 0, 0, 1],
        ]

    Args:
        seq_lens (List[torch.Tensor]): Sequence lengths of samples in each pack in the batch,
            shape (batch_size, n), where n is the max number of sequences in a pack and can vary
            across packs.


    Returns:
        Tensor: Block causal mask of shape (batch_size, max_seq_len, max_seq_len).
    c           
          g | ]>\  }}t          j        t          j        ||t           j        |j                             ?S r'   )r   trilonesboolr)   r,   s      r   r0   z,create_block_causal_mask.<locals>.<listcomp>z   sT     
 
 
 7 J
7G5:gnUUU 
 
 
r#   )r1   r2   r4   r5   r   
block_diagr6   )r$   batch_block_attn_masksr7   r9   block_attn_maskss        r   create_block_causal_maskrD   _   s    0  XJJ'' K K

 
 ((<==	
 
 
 	%%e&68H&IJJJJ;-...r#   c                     t           rHt          |           j        \  }}                    d          fd}t	          ||d||d          S t          |           S )a  
    Create a block causal document mask for a batch of packed sequences. If
    flex attention is supported by the current hardware, block causal logic and
    passing this into :func:`torch.nn.attention.flex_attention.create_block_mask`.
    The resultant BlockMask is a compressed representation of the full block causal
    mask. If on an older version, a standard 2D block causal mask is created and returned.

    Args:
        seq_lens (List[torch.Tensor]): Sequence lengths of samples in each pack in the batch,
            shape (batch_size, n), where n is the max number of sequences in a pack and can vary
            across packs.

    Returns:
        _MaskType: BlockMask or Tensor if torch version < 2.5.0.
    cudac                 F    ||k    }| |f         | |f         k    }||z  S )a  
            Defines the logic of a block causal mask by combining both a standard causal mask
            and a block diagonal document mask.

            See :func:`~torchtune.modules.attention_utils.create_block_causal_mask`
            for an illustration.
             )bhq_idxkv_idxcausal_maskdocument_maskr:   s         r   mask_modz*packed_block_causal_mask.<locals>.mask_mod   s5      6/K(E2l1f96MMM..r#   N)r)   )r$   )r   r;   shapetocreate_block_causal_mask_flexrD   )r$   r7   max_seq_lenrO   r:   s       @r   packed_block_causal_maskrT      s    $   ;6x@@"."4
K#v..
	/ 
	/ 
	/ 
	/ 
	/ -
 
 
 	
 (::::r#   c                  f   t           rUdt          j        dt          j        dt          j        dt          t                   dt
          dt          dt          j        fd} nTdt          j        dt          j        dt          j        dt          t                   dt
          dt          dt          j        fd	} | S )
aE  
    Helper function to decide when to call flex attention or SDPA. It will use
    flex attention if ALL of the following conditions are met, otherwise it will
    default to SDPA:
    - torch version >= 2.5.0
    - we are sample packing, therefore mask is a BlockMask
    - torch.cuda.get_device_capability() >= (7, 5)
    r   r   r   mask	dropout_p	is_causalr   c                 0   t          |t                    rIt          t          dt          j                   |dk    rt          d          t          | |||          S ||d d d d d d d f         }t          j	        
                    | |||||          S )NzOUsing flex attention for attention computation since a BlockMask was passed in.)levelg        zCFlex attention does not support dropout. Please set dropout to 0.0.r    	attn_maskrW   rX   )
isinstancer   r
   r   loggingDEBUG
ValueErrorr"   r   
functionalscaled_dot_product_attentionr   r   r   rV   rW   rX   s         r   _attention_callz0_sdpa_or_flex_attention.<locals>._attention_call   s     $	** e!-   
 s??$]   7#	    #4AAA.D }AA"'' B   r#   c                 t    ||d d d d d d d f         }t           j                            | |||||          S )Nr[   )r   ra   rb   rc   s         r   rd   z0_sdpa_or_flex_attention.<locals>._attention_call   s[     AAAtQQQM* ===## >   r#   )r   r   Tensorr   	_MaskTypefloatr@   )rd   s    r   _sdpa_or_flex_attentionri      s       G-	|-	|-	 |-	 9%	-	
 -	 -	 \-	 -	 -	 -	 -	b	|	|	 |	 9%		
 	 	 \	 	 	 	, r#   ) r^   typingr   r   r   r   r   r   torchtune.utils._import_guardr   torchtune.utils._loggingr	   r
   r   Logger__annotations__!torch.nn.attention.flex_attentionr   r   rR   r   r   r!   compilerdisablerf   r"   rg   r;   rD   rT   ri   rH   r#   r   <module>rr      s-    2 2 2 2 2 2 2 2 2 2 2 2        B B B B B B 9 9 9 9 9 9 9 9!z||gn # # # +           & 5466 ^e,,G<G<G <G 	G
 
G G G -,G elI-.III5< 
\   >#/tEL'9 #/el #/ #/ #/ #/L1;5< 1;1; 1; 1; 1;hS S S S S S Sr#   