
    Pi<                     L    d dl mZ d dlZd dlmZ  G d dej                  ZdS )    )TupleN)nnc                        e Zd ZdZdededededej        ddf fd	Zdd
Ze	defd            Z
dej        dej        deej        ej        f         fdZ xZS )KVCachea  
    Standalone ``nn.Module`` containing a kv-cache to cache past key and values during inference.

    Args:
        batch_size (int): batch size model will be run with
        max_seq_len (int): maximum sequence length model will be run with
        num_kv_heads (int): number of key/value heads.
        head_dim (int): per-attention head embedding dimension
        dtype (torch.dtype): dtype for the caches
    
batch_sizemax_seq_lennum_kv_headshead_dimdtypereturnNc                 t   t                                                       ||||f}|                     dt          j        ||          d           |                     dt          j        ||          d           |                     dt          j        d|d                   d           || _        d S )	Nk_cache)r   F)
persistentv_cache	cache_posr      )super__init__register_buffertorchzerosaranger   )selfr   r   r	   r
   r   cache_shape	__class__s          n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/modules/kv_cache.pyr   zKVCache.__init__   s     	!<hGu{;e<<< 	 	
 	
 	
 	u{;e<<< 	 	
 	
 	
 	aQ88U 	 	
 	
 	
 %    c                     | j                                          | j                                         | xj        | j        z  c_        dS )zReset the cache to zero.N)r   zero_r   r   sizer   s    r   resetzKVCache.reset.   s@    $)#r   c                 @    | j         d                                         S )Nr   )r   itemr!   s    r   r    zKVCache.size4   s    ~a %%'''r   k_valv_valc                    |j         \  }}}}|| j        j         d         k    r1t          d| j        j         d          d|j         d          d          | j        d         |z   | j        j         d         k    sJ | j        }| j        }||dddd| j        d|         f<   ||dddd| j        d|         f<   | j                            |           ||fS )aP  Update KV cache with the new ``k_val``, ``v_val`` and return the updated cache.

        Note:
            When updating the KV cache, it is assumed that subsequent updates should update key-value
            positions in consecutive sequence positions. If you wish to update cache values which have
            already been filled, use ``.reset()``, which will reset the cache to the zero-th position.

        Example:
            >>> cache = KVCache(batch_size=2, max_seq_len=16, num_kv_heads=4, head_dim=32, dtype=torch.bfloat16)
            >>> keys, values = torch.ones((2, 4, 8, 32)), torch.ones((2, 4, 8, 32))
            >>> cache.update(keys, values)
            >>> # now positions 0 through 7 are filled
            >>> cache.size
            >>> 8
            >>> keys, values = torch.ones((2, 4, 1, 32)), torch.ones((2, 4, 1, 32))
            >>> cache.update(keys, values)
            >>> # this will fill at position 8
            >>> cache.size
            >>> 9

        Args:
            k_val (torch.Tensor): Current key tensor with shape [B, H, S, D]
            v_val (torch.Tensor): Current value tensor with shape [B, H, S, D]

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: Updated key and value cache tensors, respectively.

        Raises:
            ValueError: if the batch size of the new key (or value) tensor is greater than the batch size
                used during cache setup.

        Note:
            This function will raise an ``AssertionError`` if the sequence length of ``k_val``
                is longer than the maximum cache sequence length.

        r   z6The current cache has been setup with a batch size of z,, but found new key tensors with batch size !r   N)shaper   
ValueErrorr   r   add_)r   r%   r&   bsz_seq_lenk_outv_outs           r   updatezKVCache.update8   s   N #[Q#A&&&QI[\]I^ Q Q?D{1~Q Q Q  
 q!G+0B10EEEEE05aaaDN8G8,,-05aaaDN8G8,,- 	G$$$e|r   )r   N)__name__
__module____qualname____doc__intr   r   r   r"   propertyr    Tensorr   r1   __classcell__)r   s   @r   r   r      s        	 	%% % 	%
 % {% 
% % % % % %*$ $ $ $ (c ( ( ( X(>\>*/,>	u|U\)	*> > > > > > > >r   r   )typingr   r   r   Moduler    r   r   <module>r=      st                i i i i ibi i i i i ir   