
    Pi+                        d Z ddlmZ ddlmZmZ ddlZddlmc m	Z
 ddlmZmZmZ ddlmZmZmZ ddlmZ ddlmZ d	 Zd
 Z G d de          ZdS )z
Disentanglement via Latent Quantization
 - https://arxiv.org/abs/2305.18378
Code adapted from Jax version in https://github.com/kylehkhsu/latent_quantization
    )annotations)CallableListN)pack	rearrangeunpack)Tensorint32nn)Module)	Optimizerc                $    t          | g|          S N)r   )tpatterns     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vector_quantize_pytorch/latent_quantization.pypack_oner      s    W    c                0    t          | ||          d         S )Nr   )r   )r   psr   s      r   
unpack_oner      s    !R!!!$$r   c                  ~     e Zd Z	 	 	 	 	 	 	 d%d& fdZd'd(dZd'd(dZd)dZd*dZd+dZd+d Z	d,d-d"Z
d)d#Zd)d$Z xZS ).LatentQuantize皙?   NTlevelsList[int] | intdimintcommitment_loss_weightfloat | Nonequantization_loss_weightnum_codebookscodebook_dimkeep_num_codebooks_dimbool | Noneoptimize_valuesin_place_codebook_optimizerCallable[..., Optimizer]c
                   t                                                       || _        |	| _        t	          j        |t                    }
t          |t                    r+	 |
	                    |          }
n# t          $ r}|d}~ww xY w|                     dt	          j        |t          j                  d           |                     dt	          j        |t          j                  d           |                     d|
d           t	          j        t	          j        t	          j        dgt                    |
dd	         gd
          d
          }|                     d|d           |d
k    r|nt          |
          | _        | j        |z  }|| _        || _        |r|n|dk    }|dk    r|sJ || _        | j        |k    }|rt)          j        | j        |          nt)          j                    | _        |rt)          j        || j                  nt)          j                    | _        || _        | j                                                                        | _        |                     t	          j        | j                  d          }|                     d|d           d |
D             }|r>t)          j         d |D                       | _!        |	 |	| j!                  | _        dS dS || _!        dS )a  
        Initializes the LatentQuantization module.

        Args:
            levels (List[int]|init): The number of levels per codebook.
                If an int is provided, it is used for all codebooks.
            dim (int): The dimensionality of the input tensor.
                The input tensor is expected to be of shape [B D ...]
            num_codebooks (int): The number of codebooks to use.
                (default is 1)
            codebook_dim (int): the dimension of the codebook.
                If levels is a list, codebook_dim is the length of the list.
                (default to -1) 
            keep_num_codebooks_dim (Optional[bool]): Whether to keep the number of codebooks dimension in the output tensor. If not provided, it is set to True if num_codebooks > 1, otherwise False.
            optimize_values (Optional[bool]): Whether to optimize the values of the codebook. If not provided, it is set to True.
        )dtypeNr!   F)
persistentr#   _levelsr   r   r   r   _basis)project_outimplicit_codebookc                    g | ];}|d z  dk    rt          j        dd|          nt          j        |          |z  dz
  <S )   r   g      g      ?)torchlinspacearange).0levels     r   
<listcomp>z+LatentQuantize.__init__.<locals>.<listcomp>x   sa     
 
 
  qyA~~ N4e,,,e$$u,s2
 
 
r   c                6    g | ]}t          j        |          S  )r   	Parameter)r8   valuess     r   r:   z+LatentQuantize.__init__.<locals>.<listcomp>   s"    FFF&f%%FFFr   )"super__init__r   r)   r5   tensorr
   
isinstancer    repeatRuntimeErrorregister_bufferfloat32cumprodconcatlenr%   r$   effective_codebook_dimr&   r   LinearIdentity
project_inr1   has_projectionsr.   proditemcodebook_sizeindices_to_codesr7   ParameterListvalues_per_latent)selfr   r   r!   r#   r$   r%   r&   r(   r)   r.   er0   rJ   rN   r2   rT   	__class__s                    r   r@   zLatentQuantize.__init__   so   < 	+F(,vU333 fc"" 	!..66   $L/u}EEE 	 	
 	
 	

 	&L1GGG 	 	
 	
 	

 	YEBBBL%,s%888'#2#,GQOOOUV
 
 
 	Xv%@@@,81,<,<LL#g,,!%!2]!B*&<# '=S""-RSBS 	 "A%%.D%%E&<#(&<< BIdh 6777 	 BI,dh777 	
  /!\..005577 11L+,,% 2 
 
 	02CPUVVV
 
 !	
 
 
  		7%'%5FF4EFFF& &D" +63N3N*4 4000 76
 &7D"""s   !A7 7
BBBmeanzr	   zhatreturnc                T    t          j        |                                ||          S )zComputes the quantization loss.	reductionFmse_lossdetachrU   rY   rZ   reduces       r   quantization_lossz LatentQuantize.quantization_loss   s!    z$++--f====r   c                T    t          j        |                                ||          S )zComputes the commitment loss.r]   r_   rc   s       r   commitment_losszLatentQuantize.commitment_loss   s!    z!((**df====r   c                    d t          j         fdt           j                  D             d          t          j         fdt           j                  D             d          }|z
                                  z   }|S )zQuantizes z, returns quantized zhat, same shape as z.
        The quantization is done by measuring the distance between the input and the codebook values per latent dimension
        and returning the index of the closest codebook value.
        c                0    t          j        | |z
            S r   )r5   abs)xys     r   distancez)LatentQuantize.quantize.<locals>.distance   s    9QU###r   c           
     x    g | ]6}t          j         d |df         j        |                   d          7S ).Nr   r/   )r5   argminrT   )r8   irm   rU   rY   s     r   r:   z+LatentQuantize.quantize.<locals>.<listcomp>   s_         HQsAt|_d.DQ.GHHb    r   r   r/   c                F    g | ]}j         |         d |f                  S ).)rT   )r8   rp   indexrU   s     r   r:   z+LatentQuantize.quantize.<locals>.<listcomp>   s>        &q)%Q-8  r   )r5   stackranger%   rb   )rU   rY   quantizerm   rr   s   `` @@r   ru   zLatentQuantize.quantize   s    	$ 	$ 	$       t011	   
 
 
 ;    t011   
 
 
 1,,...r   zhat_normalizedc                ,    | j         dz  }|dz  |z  |z   S )z;scale and shift zhat from [-0.5, 0.5] to [0, level_per_dim]r4   r.   )rU   rv   
half_widths      r   _scale_and_shiftzLatentQuantize._scale_and_shift   s#    \Q&
!#j0J>>r   c                ,    | j         dz  }||z
  |z  dz  S )znormalize zhat to [-0.5, 0.5]r4   rx   )rU   rZ   ry   s      r   _scale_and_shift_inversez'LatentQuantize._scale_and_shift_inverse   s#    \Q&
z!Z/!33r   c                    |j         d         | j        k    sJ |                     |          }|| j        z                      d                              t                    S )zSConverts a `code` which contains the number per latent to an index in the codebook.r   r/   )shaper%   rz   r0   sumtor
   )rU   rZ   s     r   codes_to_indiceszLatentQuantize.codes_to_indices   s[    z"~!22222$$T**t{"''B'//225999r   indicesc                    t          |d          }|| j        z  | j        z  }|                     |          }| j        rt          |d          }|r|                     |          }t          |d          }|S )zInverse of `codes_to_indices`.z... -> ... 1z... c d -> ... (c d)b ... d -> b d ...)r   r0   r.   r|   r&   r1   )rU   r   r1   codes_non_centeredcodess        r   rR   zLatentQuantize.indices_to_codes   s     G^44%4D--.@AA& 	=e%;<<E 	,$$U++E%!566r   c                <   |                      |          }|                     |          }t          |d          }|                     |          }t	          ||d          }t          |d          }t	          ||d          }| j        st          |d          }|||fS )Nb n c d -> b n (c d)b * dr   b * c... 1 -> ...)ru   r   r   r1   r   r&   )rU   rY   is_img_or_videor   r   r   outs          r   quantize_and_projectz#LatentQuantize.quantize_and_project   s    a  ''..%!788u%%b'**122Wb'22* 	988Gc7""r   c                2   |}| j         du}t          |d          }t          |d          \  }}|j        d         | j        k    s J d| j         d|j        d                      |                     |          }t          |d| j                  }|                     |          }|                     |          }t          |d	          }| 	                    |          }t          ||d          }t          |d
          }t          ||d          }| j        st          |d          }|r[| j        rS| j        sK| j        dk    r|                     ||          nt!          j        d          }|| j        dk    r|                     ||          nt!          j        d          z  }|                                 | j                                          | j                                          |                     |          }|                     |          }t          |d	          }| 	                    |          }t          ||d          }t          |d
          }t          ||d          }| j        st          |d          }| j        r!| j        dk    r|                     ||          nt!          j        d          }	| j        r!| j        dk    r|                     ||          nt!          j        d          }
| j        |	z  | j        |
z  z   }|||fS )z
        einstein notation
        b - batch
        n - sequence (or flattened spatial dimensions)
        d - feature dimension
        c - number of codebook dim
        Nzb d ... -> b ... dr   r   zexpected dimension of z but found dimension of zb n (c d) -> b n c d)cr   r   r   r   r   g        )r)   r   r   r~   r   rM   r$   ru   r   r1   r   r&   trainingr(   r!   rg   r5   rA   r#   re   backwardstep	zero_grad)rU   rY   original_inputshould_inplace_optimizer   r   r   r   lossrg   re   s              r   forwardzLatentQuantize.forward   sQ    "&"B$"Na-..G$$2 GBK48###SDHSSagbkSS $## OOAa/43EFFFa  ''..%!788u%%b'**122Wb'22* 	988G" 	=t} 	=T=Q 	= .!33 $$Q,,,\#&& 
 0A55 &&q#...\#&&D
 MMOOO,11333,66888MM!$$E++E22Ge%;<<E""5))CS"g..CC!566C "g66G. =#G^<<
 }#!%!<!A!A   555c"" 	 }#!%!>!!C!C "">3777c"" 	 '/9+.??@ 	
 GT!!r   )r   r   r   r   NTN)r   r   r   r    r!   r"   r#   r"   r$   r    r%   r    r&   r'   r(   r'   r)   r*   )rX   )rY   r	   rZ   r	   r[   r	   )rY   r	   r[   r	   )rv   r	   r[   r	   )rZ   r	   r[   r	   )T)r   r	   r[   r	   )__name__
__module____qualname__r@   re   rg   ru   rz   r|   r   rR   r   r   __classcell__)rW   s   @r   r   r      s.       
 0314.2'+ l7 l7 l7 l7 l7 l7 l7\> > > > >> > > > >   <? ? ? ?
4 4 4 4
: : : :    "# # # # S" S" S" S" S" S" S" S"r   r   )__doc__
__future__r   typingr   r   r5   torch.nn.functionalr   
functionalr`   einopsr   r   r   r	   r
   torch.nnr   torch.optimr   r   r   r   r<   r   r   <module>r      s    # " " " " " ! ! ! ! ! ! ! !           * * * * * * * * * * # # # # # # # # # #       ! ! ! ! ! !
  % % %Y" Y" Y" Y" Y"V Y" Y" Y" Y" Y"r   