
    Pix1                     (   d dl mZmZ d dlmZ d dlZd dlZd dlm	Z	 d dl
m	c mZ d dlZd dlmZ d dlmZmZmZ d dlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ  G d de	j         edd          Z! G d de!          Z" G d dedd          Z#dS )    )OptionalDict)PathN)
transforms)PyTorchModelHubMixinModelHubMixinhf_hub_download)AutoFeatureExtractorHubertModelWav2Vec2BertModel   )CodecEncoder)DistillCodecEncoder)CodecDecoderVocos)SemanticEncoderc                   :    e Zd Zdedef fdZed             Zedddddddddd		d
ede	e         de	e         de
de	e         de
de
de	e         dede
fd            Zdej        ez  ez  fdZdej        ez  ez  dej        fdZdej        dej        fdZ xZS )NeuCodecsample_rate
hop_lengthc                    t                                                       || _        || _        t	          j        dd          | _        | j        j        j        d d          t          j        d          | _
        t          ddd          | _        t                      | _        t          |          | _        t#          j        dd          | _        t#          j        dd          | _        d S )Nzfacebook/w2v-bert-2.0Toutput_hidden_states      r      )super__init__r   r   r   from_pretrainedsemantic_modelencoderlayersr
   feature_extractorr   SemanticEncoder_moduler   CodecEncr   	generatornnLinearfc_prior	fc_post_a)selfr   r   	__class__s      b/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/neucodec/model.pyr   zNeuCodec.__init__   s    &$/?#$
 
 

 	#*3B3//!5!E#"
 "
 '6dD$&G&G#$*jAAA	$--4..    c                 N    t          |                                           j        S N)next
parametersdevice)r+   s    r-   r3   zNeuCodec.device,   s    DOO%%&&--r.   NFcpuT	revision	cache_dirforce_downloadproxiesresume_downloadlocal_files_onlytokenmap_locationstrictmodel_idr6   r7   r8   r9   r:   r;   r<   r=   r>   c       
         T   |dv sJ |dk    rddgn|dk    rg t          |d|||||||	  	        }t          |d|||||||	  	        } | d	d
          }t          j        ||	          }d fd|                                D             }|                    |d           |S )N)neuphonic/neucodecneuphonic/distill-neucodecrA   	fc_post_sSemanticDecoderrB   zpytorch_model.bin	repo_idfilenamer6   r7   r8   r9   r:   r;   r<   	meta.yaml]  i  c                 :     t           fd|D                       S )Nc              3       K   | ]}|v V  	d S r0    ).0iss     r-   	<genexpr>z>NeuCodec._from_pretrained.<locals>.<lambda>.<locals>.<genexpr>f   s'      (;(;Aa(;(;(;(;(;(;r.   )any)rO   ls   ` r-   <lambda>z+NeuCodec._from_pretrained.<locals>.<lambda>f   s$    S(;(;(;(;(;(;(;%;%; r.   c                 4    i | ]\  }} |          ||S rL   rL   )rM   kvcontains_listignore_keyss      r-   
<dictcomp>z-NeuCodec._from_pretrained.<locals>.<dictcomp>g   sA     
 
 
Aq =K00
a
 
 
r.   F)r>   )r	   torchloaditemsload_state_dict)clsr?   r6   r7   r8   r9   r:   r;   r<   r=   r>   model_kwargs	ckpt_path_model
state_dictrW   rX   s                   @@r-   _from_pretrainedzNeuCodec._from_pretrained0   s+   " OOOOO+++&(9:KK555K $()+-

 

 

	  )+-

 

 

 FC   Z	<88
;;
 
 
 
 
&,,..
 
 

 	j777r.   audio_or_pathc                    t          |t          t          f          rJt          j        |          \  }}|dk    r, t          j        |d          |          d}}|d d d f         }nNt          |t          j                  r4|}t          |j
                  dk    r|}nt          d|j
                   d|j
        d         dz  z
  }t          j        j                            |d|f          }|S )N>     zPNeuCodec expects tensor audio input to be of shape [B, 1, T] -- received shape: i@  r   )
isinstancer   str
torchaudior[   TResamplerZ   Tensorlenshape
ValueErrorr'   
functionalpad)r+   re   ysrpad_for_wavs        r-   _prepare_audiozNeuCodec._prepare_audioq   s     mdC[11 	OM22EArV||/B//22F2dAAAgJ u|44 	A17||q  ! pghgnpp  
 QWR[3./H##A;'788r.   returnc                 B   |                      |          }g }t          |                    d                    D ]h}|                     ||ddf                                         dd          j                            | j                  }|                    |           it          j
        |          }|                     |                    | j                            }|                    dd          }|                     |dd                   j        d                             dd          }|                     |          }|j        d	         |j        d	         k    rHt#          |j        d	         |j        d	                   }	|ddddd|	f         }|ddddd|	f         }t          j        ||gd
          }
|                     |
                    dd                                        dd          }
|                     |
d          \  }}}|S )
        Args:
            audio_or_path: torch.Tensor [B, 1, T] | Path | str, input audio

        Returns:
            fsq_codes: torch.Tensor [B, 1, F], 50hz FSQ codes
        r   Nrg   ptsampling_ratereturn_tensorsr      r   ri   dimTvq)rx   rangesizer#   r4   input_featurestor3   appendrZ   vstackr%   	transposer    hidden_statesr$   rq   mincatr)   r&   )r+   re   ru   all_semantic_featuresrN   semantic_featuresacoustic_embsemantic_outputsemantic_encodedmin_len
concat_embra   	fsq_codess                r-   encode_codezNeuCodec.encode_code   s!    .. "qvvayy!! 		< 		<A&&adGKKMM"(#' '  
  4;  "(():;;;;!L)>?? }}QTT$+%6%677#--a33  1!!! 455CBGQQRSUVWW 	  66GG b!%5%;B%???,,R02B2H2LMMG'111hwh7L/111hwh?Y 0,?QGGG
]]:#7#71#=#=>>HHANN
 ...==9ar.   r   c                 t   | j         j                            |                    dd                    }|                    dd          }|                     |                    dd                                        dd          }|                      |                    dd          d          d         }|S )z
        Args:
            fsq_codes: torch.Tensor [B, 1, F], 50hz FSQ codes

        Returns:
            recon: torch.Tensor [B, 1, T], reconstructed 24kHz audio
        r   r   Fr   r   )r&   	quantizerget_output_from_indicesr   r*   )r+   r   fsq_post_embrecons       r-   decode_codezNeuCodec.decode_code   s     ~/GG	H[H[\]_`HaHabb#--a33~~l&<&<Q&B&BCCMMaQRSS|55a;;FFqIr.   )__name__
__module____qualname__intr   propertyr3   classmethodrk   r   boolr   rd   rZ   ro   r   rx   r   r   __classcell__)r,   s   @r-   r   r      s       /C /S / / / / / /& . . X. 
 #'#'$"& %!&#!> > > > 3-	>
 C=> > $> > > }> > > > > [>@EL4,?#,E    2,)<s)B ,u| , , , ,\U\ el        r.   r   z%https://github.com/neuphonic/neucodecz
apache-2.0)repo_urllicensec                   N    e Zd ZdedefdZdej        ez  ez  dej        fdZ	dS )DistillNeuCodecr   r   c                    t           j                            |            || _        || _        t          j        dd          | _        t          j        d          | _	        t          ddd          | _        t                      | _        t          |          | _        t          j        dd          | _        t          j        d	d          | _        t          j        dd          | _        d S )
Nzntu-spml/distilhubertTr   i   r   r   i   r   i   )r'   Moduler   r   r   r   r   r    r
   r#   r   r$   r   codec_encoderr   r&   r(   r)   fc_sq_priorr*   )r+   r   r   s      r-   r   zDistillNeuCodec.__init__   s    
	4   &$)9#$
 
 
 "6!E#"
 "
 '6c3&E&E#022*jAAA	
 

 9S#..4..r.   re   ry   c           	         |                      |          }g }t          |                    d                    D ]}|                     t	          j        ||ddf                                         d          dd          j                            | j	                  
                    d          }|                    |           t          j        |          }|                     |                     |                    | j	                                      }|                    dd          }|                     |          j                            dd          }|                     |          }|j        d	         |j        d	         k    rHt+          |j        d	         |j        d	                   }|ddddd|f         }|ddddd|f         }t          j        ||gd
          }	|                     |	                    dd                                        dd          }	|                     |	d          \  }
}}
|S )r{   r   N)   r   rg   r|   r}   r   r   ri   r   Tr   )rx   r   r   r#   Frt   r4   input_valuesr   r3   squeezer   rZ   r   r   r   r   r    last_hidden_stater$   rq   r   r   r)   r&   )r+   re   ru   r   rN   r   fsq_embsemantic_targetr   r   ra   r   s               r-   r   zDistillNeuCodec.encode_code   s-    .. "qvvayy!! 
	< 
	<A&&E!AqqqD'++--44"(#' '  
 bboo  "(():;;;;!L)>?? ""4#5#5add4;6G6G#H#HII##Aq)) --
 

IIaOO 	 55oFF= 5b 999'-+_-B2-FGGGaaaHWHn-G-aaaHWHn=OY9qAAA
]]:#7#71#=#=>>HHANN
...==9ar.   N)
r   r   r   r   r   rZ   ro   r   rk   r   rL   r.   r-   r   r      sf        /C /S / / / /*,%,*=*C , , , , , , ,r.   r   c                       e Zd Zd Zedddddddddd	dedee         d	ee         d
edee         dededee         dedefd            Z	d Z
dej        dej        fdZdS )NeuCodecOnnxDecoderc                     	 dd l }n"# t          $ r}t          d          |d }~ww xY w|                                }|j        j        |_        |                    ||          | _        d| _        d S )Nr   z[Failed to import `onnxruntime`. Install with the following command: pip install onnxruntime)sess_optionsrI   )	onnxruntimeImportErrorSessionOptionsGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelInferenceSessionsessionr   )r+   	onnx_pathr   esos        r-   r   zNeuCodecOnnxDecoder.__init__  s    	D 	D 	D 	D{||  CD  D	D ''))&1&H&W#"33 4 
 
 "s    
&!&NFr4   Tr5   r?   r6   r7   r8   r9   r:   r;   r<   r=   r>   c       
             t          |d|||||||	  	        }t          |d|||||||	  	        } | |          }|	dk    rt          d          |S )Nz
model.onnxrE   rH   r4   z6The onnx decoder currently only supports CPU runtimes.)r	   rr   )r^   r?   r6   r7   r8   r9   r:   r;   r<   r=   r>   r_   r   ra   rb   s                  r-   rd   z$NeuCodecOnnxDecoder._from_pretrained$  s    $ $!)+-

 

 

	  )+-

 

 

 I 5  UVVVr.   c                      t          d          )NzfThe onnx decoder has no functionality to encode codes, as it only contains the compiled decoder graph.)NotImplementedError)r+   argskwargss      r-   r   zNeuCodecOnnxDecoder.encode_codeX  s    !t
 
 	
r.   codesry   c                 H   t          |t          j                  st          d          t	          |j                  dk    r|j        d         dk    rt          d          | j                            dd|i          d                             t          j	                  }|S )z
        Args:
            fsq_codes: np.array [B, 1, F], 50hz FSQ codes

        Returns:
            recon: np.array [B, 1, T], reconstructed 24kHz audio
        z`Codes` should be an np.array.rh   r   z%`Codes` should be of shape [B, 1, F].Nr   r   )
rj   npndarrayrr   rp   rq   r   runastypefloat32)r+   r   r   s      r-   r   zNeuCodecOnnxDecoder.decode_code]  s     %,, 	?=>>>5;1$$A!(;(;DEEE   7E"
 

VBJ 	 r.   )r   r   r   r   r   rk   r   r   r   rd   r   r   r   r   rL   r.   r-   r   r     s       " " "" 
 #'#'$"& %!&#!1 1 1 1 3-	1
 C=1 1 $1 1 1 }1 1 1 1 1 [1f
 
 

 
      r.   r   )$typingr   r   pathlibr   numpyr   rZ   torch.nnr'   torch.nn.functionalrs   r   rl   r   rm   huggingface_hubr   r   r	   transformersr
   r   r   r   r   codec_encoder_distillr   codec_decoder_vocosr   moduler   r   r   r   r   rL   r.   r-   <module>r      s   ! ! ! ! ! ! ! !                               & & & & & & P P P P P P P P P P M M M M M M M M M M ' ' ' ' ' ' 6 6 6 6 6 6 2 2 2 2 2 2 # # # # # #s s s s sI4	s s s slB B B B Bh B B BJd d d d d4d d d d d dr.   