§
    ÇPƒi<  ã                   óL   — d dl mZ d dlZd dlmZ  G d„ dej        ¦  «        ZdS )é    )ÚTupleN)Únnc                   ó¶   ‡ — e Zd ZdZdededededej        ddfˆ fd	„Zdd
„Ze	defd„¦   «         Z
dej        dej        deej        ej        f         fd„Zˆ xZS )ÚKVCachea   
    Standalone ``nn.Module`` containing a kv-cache to cache past key and values during inference.

    Args:
        batch_size (int): batch size model will be run with
        max_seq_len (int): maximum sequence length model will be run with
        num_kv_heads (int): number of key/value heads.
        head_dim (int): per-attention head embedding dimension
        dtype (torch.dtype): dtype for the caches
    Ú
batch_sizeÚmax_seq_lenÚnum_kv_headsÚhead_dimÚdtypeÚreturnNc                 ót  •— t          ¦   «                              ¦   «          ||||f}|                      dt          j        ||¬¦  «        d¬¦  «         |                      dt          j        ||¬¦  «        d¬¦  «         |                      dt          j        d|d         ¦  «        d¬¦  «         || _        d S )	NÚk_cache)r   F)Ú
persistentÚv_cacheÚ	cache_posr   é   )ÚsuperÚ__init__Úregister_bufferÚtorchÚzerosÚaranger   )Úselfr   r   r	   r
   r   Úcache_shapeÚ	__class__s          €ún/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/modules/kv_cache.pyr   zKVCache.__init__   sÔ   ø€ õ 	‰Œ×ÒÑÔÐØ! <°¸hÐGˆØ×ÒØ•u”{ ;°eÐ<Ñ<Ô<Èð 	ñ 	
ô 	
ð 	
ð 	×ÒØ•u”{ ;°eÐ<Ñ<Ô<Èð 	ñ 	
ô 	
ð 	
ð 	×ÒØœ a¨°Q¬Ñ8Ô8ÀUð 	ñ 	
ô 	
ð 	
ð %ˆŒˆˆó    c                 ó”   — | j                              ¦   «          | j                             ¦   «          | xj        | j        z  c_        dS )zReset the cache to zero.N)r   Úzero_r   r   Úsize©r   s    r   ÚresetzKVCache.reset.   s@   € àŒ×ÒÑÔÐØŒ×ÒÑÔÐØˆŒ˜$œ)Ñ#ˆŒˆˆr   c                 ó@   — | j         d                              ¦   «         S )Nr   )r   Úitemr!   s    r   r    zKVCache.size4   s   € àŒ~˜aÔ ×%Ò%Ñ'Ô'Ð'r   Úk_valÚv_valc                 ó°  — |j         \  }}}}|| j        j         d         k    r1t          d| j        j         d         › d|j         d         › d¦  «        ‚| j        d         |z   | j        j         d         k    sJ ‚| j        }| j        }||dd…dd…| j        d|…         f<   ||dd…dd…| j        d|…         f<   | j                             |¦  «         ||fS )aP  Update KV cache with the new ``k_val``, ``v_val`` and return the updated cache.

        Note:
            When updating the KV cache, it is assumed that subsequent updates should update key-value
            positions in consecutive sequence positions. If you wish to update cache values which have
            already been filled, use ``.reset()``, which will reset the cache to the zero-th position.

        Example:
            >>> cache = KVCache(batch_size=2, max_seq_len=16, num_kv_heads=4, head_dim=32, dtype=torch.bfloat16)
            >>> keys, values = torch.ones((2, 4, 8, 32)), torch.ones((2, 4, 8, 32))
            >>> cache.update(keys, values)
            >>> # now positions 0 through 7 are filled
            >>> cache.size
            >>> 8
            >>> keys, values = torch.ones((2, 4, 1, 32)), torch.ones((2, 4, 1, 32))
            >>> cache.update(keys, values)
            >>> # this will fill at position 8
            >>> cache.size
            >>> 9

        Args:
            k_val (torch.Tensor): Current key tensor with shape [B, H, S, D]
            v_val (torch.Tensor): Current value tensor with shape [B, H, S, D]

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: Updated key and value cache tensors, respectively.

        Raises:
            ValueError: if the batch size of the new key (or value) tensor is greater than the batch size
                used during cache setup.

        Note:
            This function will raise an ``AssertionError`` if the sequence length of ``k_val``
                is longer than the maximum cache sequence length.

        r   z6The current cache has been setup with a batch size of z,, but found new key tensors with batch size ú!r   N)Úshaper   Ú
ValueErrorr   r   Úadd_)r   r%   r&   ÚbszÚ_Úseq_lenÚk_outÚv_outs           r   ÚupdatezKVCache.update8   s  € ðN #œ[ÑˆˆQ˜Ø”Ô# AÔ&Ò&Ð&ÝðQÈÌÔI[Ð\]ÔI^ð Qð QØ?D¼{È1¼~ðQð Qð Qñô ð ð
 ”˜qÔ! GÑ+°´Ô0BÀ1Ô0EÒEÐEÐEÐEØ”ˆØ”ˆà05ˆˆaˆaˆaD”N 8 G 8Ô,Ð,Ñ-Ø05ˆˆaˆaˆaD”N 8 G 8Ô,Ð,Ñ-ð 	Œ×Ò˜GÑ$Ô$Ð$àeˆ|Ðr   )r   N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Úintr   r   r   r"   Úpropertyr    ÚTensorr   r1   Ú__classcell__)r   s   @r   r   r      sü   ø€ € € € € ð	ð 	ð%àð%ð ð%ð ð	%ð
 ð%ð Œ{ð%ð 
ð%ð %ð %ð %ð %ð %ð*$ð $ð $ð $ð ð(cð (ð (ð (ñ „Xð(ð>Ø”\ð>Ø*/¬,ð>à	ˆuŒ|˜Uœ\Ð)Ô	*ð>ð >ð >ð >ð >ð >ð >ð >r   r   )Útypingr   r   r   ÚModuler   © r   r   ú<module>r=      st   ðð Ð Ð Ð Ð Ð à €€€Ø Ð Ð Ð Ð Ð ðið ið ið ið iˆbŒiñ iô ið ið ið ir   