
    Pi                     \    d dl mZ d dlZd dlmZmZ d dlmZ  G d dej                  Z	dS )    )TupleN)nnTensor)
functionalc                   l     e Zd ZdZdededdf fdZdedeeef         fdZde	fd	Z
d
edefdZ xZS )VectorQuantizedEmbeddingsa  
    Vector quantized embedding layer that takes in the output of an encoder
    and performs a nearest-neighbor lookup in the embedding space.
    Vector quantization was introduced in Oord et al. 2017 (https://arxiv.org/pdf/1711.00937.pdf)
    to generate high-fidelity images, videos, and audio data.

    This module currently does not support pre-training of the embeddings via EMA.

    Code was adapted from torchmultimodal's `Codebook module
    <https://github.com/facebookresearch/multimodal/blob/main/torchmultimodal/modules/layers/codebook.py>`_.

    Args:
        num_embeddings (int): Number of vectors in the embedding space.
        embedding_dim (int): Dimensionality of the embedding vectors.
    num_embeddingsembedding_dimreturnNc                     t                                                       t          j        t	          j        ||                    | _        || _        || _        d S N)	super__init__r   	Parametertorchempty	embeddingr	   r
   )selfr	   r
   	__class__s      s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/modules/vq_embeddings.pyr   z"VectorQuantizedEmbeddings.__init__   sN    
 	ek.-&P&PQQ,*    zc                    |j         \  }}}|| j        k    rt          d| d| j                   |                    d|          }t	          j        || j        d          dz  }t	          j        |d          }|                     |          }|||z
  	                                z   }|                    |||          }	|                    ||          }
|	|
fS )	a  
        Args:
            z (Tensor): Tensor containing a batch of encoder outputs of shape ``(b, s, d)``, where
                b is batch size, s is sequence length or time, and d is ``embedding_dim``.

        Returns:
            Tuple[Tensor, Tensor]: The quantized input and the embedding vector ids that were used.

        Raises:
            ValueError: if input embedding dimension does not match embedding dimension of module
        z)Expected last dimension of input tensor (z) to be embedding size of g       @)p      )dim)
shaper
   
ValueErrorviewr   cdistr   argmindecodedetach)r   r   bszseq_lenz_embed_dimz_flat	distancestoken_ids_flatquantized_flat	quantized	token_idss              r   forwardz!VectorQuantizedEmbeddings.forward)   s     %&G!Wk$,,,wKwwcgcuww  
 K((K#>>>!C	 iQ777 ^44  >F#:"B"B"D"DD #''WkBB	"''W55	)##r   c                 B    d                     | j        | j                  S )Nz#num_embeddings={}, embedding_dim={})formatr	   r
   )r   s    r   
extra_reprz$VectorQuantizedEmbeddings.extra_reprO   s%    4;;!3
 
 	
r   r.   c                 6    t          j        || j                  S r   )Fr   )r   r.   s     r   r$   z VectorQuantizedEmbeddings.decodeT   s    {9dn555r   )__name__
__module____qualname____doc__intr   r   r   r/   strr2   r$   __classcell__)r   s   @r   r   r      s          ++ + 
	+ + + + + +$$ $$E&&.$9 $$ $$ $$ $$L
C 
 
 
 

6 66 6 6 6 6 6 6 6 6r   r   )
typingr   r   r   r   torch.nnr   r4   Moduler    r   r   <module>r@      s                   $ $ $ $ $ $H6 H6 H6 H6 H6	 H6 H6 H6 H6 H6r   