
    %`ivN                       d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlZd dlmZmZmZmZmZmZmZmZ m!Z"m#Z$ d dl%m&Z'm(Z)m*Z+ d dl,m-Z- 	 d d	l.m/Z0 n # e1$ r 	 d d	lm/Z0 n# e1$ r dZ0Y nw xY wY nw xY wg d
Z2 edg d          Z3d Z4d Z5 G d de          Z6 G d de          Z7 G d de          Z8 G d d          Z9dddddddd  e:d          d ddddfd=d8Z;d>d;Z<d>d<Z=dS )?    )annotationsN)abstractmethod)
namedtuple)DictList
NamedTupleOptionalTupleUnion)
CriterionTypeLexiconDecoderLexiconDecoderOptionsLexiconFreeDecoderLexiconFreeDecoderOptionsLMLMStateSmearingModeTrieZeroLM)create_word_dict
Dictionary
load_words)_download_asset)KenLM)CTCHypothesis
CTCDecoderCTCDecoderLMCTCDecoderLMStatectc_decoderdownload_pretrained_filesPretrainedFileslexicontokenslmc                                                      }t          ||          }|                    d          }|                                D ]]\  }}	|                    |          }
|                    ||
          \  }}|	D ]'} fd|D             }|                    ||
|           (^|                    t          j	                   |S )NFc                :    g | ]}                     |          S  )	get_index).0tokentokens_dicts     z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchaudio/models/decoder/_ctc_decoder.py
<listcomp>z#_construct_trie.<locals>.<listcomp>;   s'    OOOUK11%88OOO    )

index_size_Triestartitemsr)   scoreinsertsmear_SmearingModeMAX)r,   	word_dictr#   r%   silence
vocab_sizetriestart_stateword	spellingsword_idx_r4   spellingspelling_idxs   `              r-   _construct_trierD   2   s    ''))JW%%D((5//K"==?? 7 7i&&t,,88K225! 	7 	7HOOOOhOOOLKKh6666	7 	JJ} !!!Kr/   c                   d }|t          |          }| r|t          |           }n]| s[|Yt          |          t          u rCfdt	                                                    D             }|gg||<   t          |          }|S )Nc                f    i | ]-}                     |                               |          gg.S r(   )	get_entry)r*   ir,   s     r-   
<dictcomp>z"_get_word_dict.<locals>.<dictcomp>I   s=    mmm[""1%%)>)>q)A)A(B'Cmmmr/   )_Dictionary_create_word_dicttypestrranger0   )r#   r%   lm_dictr,   unk_wordr9   ds      `   r-   _get_word_dictrR   A   s    I((	 )9$%g..		 )*tBxx3mmmmUS^SiSiSkSkMlMlmmm zl(%a((	r/   c                  B    e Zd ZU dZded<   	 ded<   	 ded<   	 ded	<   d
S )r   zORepresents hypothesis generated by CTC beam search decoder :class:`CTCDecoder`.torch.LongTensorr$   z	List[str]wordsfloatr4   torch.IntTensor	timestepsN)__name__
__module____qualname____doc____annotations__r(   r/   r-   r   r   P   sR         ZZh LLL+nnr/   r   c                  H     e Zd ZdZed
 fd            Zd fdZdd	Z xZS )r   zLanguage model state.returnDict[int, CTCDecoderLMState]c                *    t                      j        S )zMap of indices to LM states)superchildren)self	__class__s    r-   rc   zCTCDecoderLMState.childrenh   s     wwr/   	usr_indexintc                F    t                                          |          S )a!  Returns child corresponding to usr_index, or creates and returns a new state if input index
        is not found.

        Args:
            usr_index (int): index corresponding to child state

        Returns:
            CTCDecoderLMState: child state corresponding to usr_index
        )rb   child)rd   rf   re   s     r-   ri   zCTCDecoderLMState.childm   s     ww}}Y'''r/   statec                    dS )zCompare two language model states.

        Args:
            state (CTCDecoderLMState): LM state to compare against

        Returns:
            int: 0 if the states are the same, -1 if self is less, +1 if self is greater.
        Nr(   rd   rj   s     r-   comparezCTCDecoderLMState.comparey   s	     	r/   )r_   r`   )rf   rg   r_   r   )rj   r   r_   r   )	rY   rZ   r[   r\   propertyrc   ri   rm   __classcell__)re   s   @r-   r   r   e   s                  X 
( 
( 
( 
( 
( 
(	 	 	 	 	 	 	 	r/   r   c                  Z    e Zd ZdZedd            Zedd            Zedd            ZdS )r   zVLanguage model base class for creating custom language models to use with the decoder.start_with_nothingboolr_   r   c                    t           )zInitialize or reset the language model.

        Args:
            start_with_nothing (bool): whether or not to start sentence with sil token.

        Returns:
            CTCDecoderLMState: starting state
        NotImplementedError)rd   rq   s     r-   r2   zCTCDecoderLM.start   s
     "!r/   rj   usr_token_idxrg   Tuple[CTCDecoderLMState, float]c                    t           )ax  Evaluate the language model based on the current LM state and new word.

        Args:
            state (CTCDecoderLMState): current LM state
            usr_token_idx (int): index of the word

        Returns:
            (CTCDecoderLMState, float)
                CTCDecoderLMState:
                    new LM state
                float:
                    score
        rt   )rd   rj   rv   s      r-   r4   zCTCDecoderLM.score   s
     "!r/   c                    t           )a8  Evaluate end for language model based on current LM state.

        Args:
            state (CTCDecoderLMState): current LM state

        Returns:
            (CTCDecoderLMState, float)
                CTCDecoderLMState:
                    new LM state
                float:
                    score
        rt   rl   s     r-   finishzCTCDecoderLM.finish   s
     "!r/   N)rq   rr   r_   r   )rj   r   rv   rg   r_   rw   )rj   r   r_   rw   )rY   rZ   r[   r\   r   r2   r4   rz   r(   r/   r-   r   r      sx        ``	" 	" 	" ^	" " " " ^"  " " " ^" " "r/   r   c                  b    e Zd ZdZd(dZd)dZd*dZd Zd Zd+dZ	d,dZ
d,d Z	 d-d.d%Zd/d'Zd!S )0r   zCTC beam search decoder from *Flashlight* :cite:`kahn2022flashlight`.

    .. devices:: CPU

    Note:
        To build the decoder, please use the factory function :func:`ctc_decoder`.
    nbestrg   r#   Optional[Dict]r9   rJ   r,   r%   r   decoder_options9Union[_LexiconDecoderOptions, _LexiconFreeDecoderOptions]blank_tokenrM   	sil_tokenrP   r_   Nonec
           
        || _         || _        || _        | j                            |          | _        | j                            |          }
g }|rKt          |||||
          }|                    |	          }	d}t          ||||
| j        |	||          | _        nt          |||
| j        |          | _        || _	        dS )a  
        Args:
            nbest (int): number of best decodings to return
            lexicon (Dict or None): lexicon mapping of words to spellings, or None for lexicon-free decoder
            word_dict (_Dictionary): dictionary of words
            tokens_dict (_Dictionary): dictionary of tokens
            lm (CTCDecoderLM): language model. If using a lexicon, only word level LMs are currently supported
            decoder_options (_LexiconDecoderOptions or _LexiconFreeDecoderOptions):
                parameters used for beam search decoding
            blank_token (str): token corresopnding to blank
            sil_token (str): token corresponding to silence
            unk_word (str): word corresponding to unknown
        FN)
r|   r9   r,   r)   blankrD   _LexiconDecoderdecoder_LexiconFreeDecoderr%   )rd   r|   r#   r9   r,   r%   r~   r   r   rP   r:   transitionsr<   token_lms                 r-   __init__zCTCDecoder.__init__   s    4 
"&%//<<
",,Y77 	f";	7BPPD **844HH*
	 	DLL /GTZYdeeDL r/   idxsrW   rT   c                     d t          j        |          D             }t           fd|          }t          j        t          |                    S )Nc              3  &   K   | ]}|d          V  dS )r   Nr(   )r*   gs     r-   	<genexpr>z)CTCDecoder._get_tokens.<locals>.<genexpr>   s&      //!//////r/   c                    | j         k    S N)r   )xrd   s    r-   <lambda>z(CTCDecoder._get_tokens.<locals>.<lambda>   s    TZ r/   )itgroupbyfiltertorch
LongTensorlistrd   r   s   ` r-   _get_tokenszCTCDecoder._get_tokens   sP    //bj..///////66T

+++r/   c                    g }t          |          D ];\  }}|| j        k    r|dk    s|||dz
           k    r|                    |           <t          j        |          S )z8Returns frame numbers corresponding to non-blank tokens.r      )	enumerater   appendr   	IntTensor)rd   r   rX   rH   idxs        r-   _get_timestepszCTCDecoder._get_timesteps   sr     	oo 	$ 	$FAsdj  AvvQU++  ###y)))r/   c                8    | j                                          dS )a  Initialize the internal state of the decoder.

        See :py:meth:`decode_step` for the usage.

        .. note::

           This method is required only when performing online decoding.
           It is not necessary when performing batch decoding with :py:meth:`__call__`.
        N)r   decode_beginrd   s    r-   r   zCTCDecoder.decode_begin  s     	!!#####r/   c                8    | j                                          dS )a  Finalize the internal state of the decoder.

        See :py:meth:`decode_step` for the usage.

        .. note::

           This method is required only when performing online decoding.
           It is not necessary when performing batch decoding with :py:meth:`__call__`.
        N)r   
decode_endr   s    r-   r   zCTCDecoder.decode_end  s     	!!!!!r/   	emissionstorch.FloatTensorc                   |j         t          j        k    rt          d          |j        st          d          |                                st          d          |j        dk    rt          d|j                   |	                                \  }}| j
                            |                                ||           dS )a  Perform incremental decoding on top of the curent internal state.

        .. note::

           This method is required only when performing online decoding.
           It is not necessary when performing batch decoding with :py:meth:`__call__`.

        Args:
            emissions (torch.FloatTensor): CPU tensor of shape `(frame, num_tokens)` storing sequences of
                probability distribution over labels; output of acoustic model.

        Example:
            >>> decoder = torchaudio.models.decoder.ctc_decoder(...)
            >>> decoder.decode_begin()
            >>> decoder.decode_step(emission1)
            >>> decoder.decode_step(emission2)
            >>> decoder.decode_end()
            >>> result = decoder.get_final_hypothesis()
        emissions must be float32.emissions must be a CPU tensor.emissions must be contiguous.   zemissions must be 2D. Found N)dtyper   float32
ValueErroris_cpuRuntimeErroris_contiguousndimshapesizer   decode_stepdata_ptr)rd   r   TNs       r-   r   zCTCDecoder.decode_step   s    ( ?em++9::: 	B@AAA&&(( 	@>???>QOioOOPPP~~1  !3!3!5!5q!<<<<<r/   List[CTCHypothesis]c                       fd|D             S )Nc           
         g | ][}t                              |j                  fd |j        D             |j                            |j                            \S )c                P    g | ]"}|d k    j                             |          #S )r   )r9   rG   )r*   r   rd   s     r-   r.   z2CTCDecoder._to_hypo.<locals>.<listcomp>.<listcomp>G  s/    SSSqAQRFFt~//22FFFr/   )r$   rU   r4   rX   )r   r   r$   rU   r4   r   )r*   resultrd   s     r-   r.   z'CTCDecoder._to_hypo.<locals>.<listcomp>D  s~     
 
 
  ''66SSSSFLSSSl--fm<<	  
 
 
r/   r(   rd   resultss   ` r-   _to_hypozCTCDecoder._to_hypoC  s0    
 
 
 
 "
 
 
 	
r/   c                x    | j                                         }|                     |d| j                           S )a9  Get the final hypothesis

        Returns:
            List[CTCHypothesis]:
                List of sorted best hypotheses.

        .. note::

           This method is required only when performing online decoding.
           It is not necessary when performing batch decoding with :py:meth:`__call__`.
        N)r   get_all_final_hypothesisr   r|   r   s     r-   get_final_hypothesiszCTCDecoder.get_final_hypothesisN  s4     ,7799}}W\tz\2333r/   NlengthsOptional[torch.Tensor]List[List[CTCHypothesis]]c                   |j         t          j        k    rt          d          |j        st          d          |                                st          d          |j        dk    rt          d|j                   ||j        st          d          |	                                \  }}}|t          j
        |f|          }d}g }t          |          D ]}|                                ||z  |                    d	          z  z   }	| j                            |	||         |          }
|                    |                     |
d| j                                      |S )
a  
        Performs batched offline decoding.

        .. note::

           This method performs offline decoding in one go. To perform incremental decoding,
           please refer to :py:meth:`decode_step`.

        Args:
            emissions (torch.FloatTensor): CPU tensor of shape `(batch, frame, num_tokens)` storing sequences of
                probability distribution over labels; output of acoustic model.
            lengths (Tensor or None, optional): CPU tensor of shape `(batch, )` storing the valid length of
                in time axis of the output Tensor in each batch.

        Returns:
            List[List[CTCHypothesis]]:
                List of sorted best hypotheses for each audio sequence in the batch.
        r   r   r      zemissions must be 3D. Found Nzlengths must be a CPU tensor.   r   )r   r   r   r   r   r   r   r   r   r   fullrN   r   strider   decoder   r   r|   )rd   r   r   Br   r   float_byteshyposbemissions_ptrr   s              r-   __call__zCTCDecoder.__call__]  sk   , ?em++9::: 	B@AAA&&(( 	@>???>QOioOOPPPw~>???..""1a?j!q))Gq 	? 	?A%..00;?YEUEUVWEXEX3XXMl))-QGGGLLw||'<==>>>>r/   r   c                       fd|D             S )z
        Map raw token IDs into corresponding tokens

        Args:
            idxs (LongTensor): raw token IDs generated from decoder

        Returns:
            List: tokens corresponding to the input IDs
        c                h    g | ].}j                             |                                          /S r(   )r,   rG   item)r*   r   rd   s     r-   r.   z-CTCDecoder.idxs_to_tokens.<locals>.<listcomp>  s2    GGG3 **388::66GGGr/   r(   r   s   ` r-   idxs_to_tokenszCTCDecoder.idxs_to_tokens  s      HGGG$GGGGr/   )r|   rg   r#   r}   r9   rJ   r,   rJ   r%   r   r~   r   r   rM   r   rM   rP   rM   r_   r   )r   rW   r_   rT   )r   rW   r_   rW   )r   r   )r_   r   r   )r   r   r   r   r_   r   )r   rT   r_   r   )rY   rZ   r[   r\   r   r   r   r   r   r   r   r   r   r   r(   r/   r-   r   r      s         7 7 7 7r, , , ,
	* 	* 	* 	*
$ 
$ 
$
" 
" 
"!= != != !=F	
 	
 	
 	
4 4 4 4  OS0 0 0 0 0d
H 
H 
H 
H 
H 
Hr/   r   r   2   r   z-infF-|z<unk>r#   Optional[str]r$   Union[str, List[str]]r%   Union[str, CTCDecoderLM]rO   r|   rg   	beam_sizebeam_size_tokenOptional[int]beam_thresholdrV   	lm_weight
word_score	unk_score	sil_scorelog_addrr   r   rM   r   rP   r_   c                V   |%t          |          t          urt          d          t          |          }| rFt	          |           } t          ||p|                                |||	|
||t          j        	  	        }n4t          ||p|                                ||||t          j                  }t          | ||||          }t          |          t          u r't          t          d          t          ||          }n|t                      }t          || |||||||	  	        S )aY	  Builds an instance of :class:`CTCDecoder`.

    Args:
        lexicon (str or None): lexicon file containing the possible words and corresponding spellings.
            Each line consists of a word and its space separated spelling. If `None`, uses lexicon-free
            decoding.
        tokens (str or List[str]): file or list containing valid tokens. If using a file, the expected
            format is for tokens mapping to the same index to be on the same line
        lm (str, CTCDecoderLM, or None, optional): either a path containing KenLM language model,
            custom language model of type `CTCDecoderLM`, or `None` if not using a language model
        lm_dict (str or None, optional): file consisting of the dictionary used for the LM, with a word
            per line sorted by LM index. If decoding with a lexicon, entries in lm_dict must also occur
            in the lexicon file. If `None`, dictionary for LM is constructed using the lexicon file.
            (Default: None)
        nbest (int, optional): number of best decodings to return (Default: 1)
        beam_size (int, optional): max number of hypos to hold after each decode step (Default: 50)
        beam_size_token (int, optional): max number of tokens to consider at each decode step.
            If `None`, it is set to the total number of tokens (Default: None)
        beam_threshold (float, optional): threshold for pruning hypothesis (Default: 50)
        lm_weight (float, optional): weight of language model (Default: 2)
        word_score (float, optional): word insertion score (Default: 0)
        unk_score (float, optional): unknown word insertion score (Default: -inf)
        sil_score (float, optional): silence insertion score (Default: 0)
        log_add (bool, optional): whether or not to use logadd when merging hypotheses (Default: False)
        blank_token (str, optional): token corresponding to blank (Default: "-")
        sil_token (str, optional): token corresponding to silence (Default: "|")
        unk_word (str, optional): word corresponding to unknown (Default: "<unk>")

    Returns:
        CTCDecoder: decoder

    Example
        >>> decoder = ctc_decoder(
        >>>     lexicon="lexicon.txt",
        >>>     tokens="tokens.txt",
        >>>     lm="kenlm.bin",
        >>> )
        >>> results = decoder(emissions) # List of shape (B, nbest) of Hypotheses
    Nz!lm_dict must be None or str type.)	r   r   r   r   r   r   r   r   criterion_type)r   r   r   r   r   r   r   zflashlight-text is installed, but KenLM is not installed. Please refer to https://github.com/kpu/kenlm#python-module for how to install it.)	r|   r#   r9   r,   r%   r~   r   r   rP   )rL   rM   r   rJ   _load_words_LexiconDecoderOptionsr0   _CriterionTypeCTC_LexiconFreeDecoderOptionsrR   _KenLMr   _ZeroLMr   )r#   r$   r%   rO   r|   r   r   r   r   r   r   r   r   r   r   rP   r,   r~   r9   s                      r-   r   r     sh   r tG}}C77<===f%%K  
g&&0+G{/E/E/G/G)!)-

 

 

 5+G{/E/E/G/G))-
 
 
 wG[(KKIBxx3>d   B	""	YY'
 
 
 
r/   model_PretrainedFilesc                ~    | dvrt          |  d          d|  }t          | d| d| dk    r| dnd           S )	N)librispeechzlibrispeech-3-gramzlibrispeech-4-gramzZ not supported. Must be one of ['librispeech-3-gram', 'librispeech-4-gram', 'librispeech']zdecoder-assets/z/lexicon.txtz/tokens.txtr   z/lm.binr"   )r   r   )r   prefixs     r-   _get_filenamesr     s    OOOppp
 
 	
 'u&&F'''%%%!&-!7!7fT   r/   c                    t          |           }t          |j                  }t          |j                  }|j        t          |j                  }nd}t          |||          S )aM  
    Retrieves pretrained data files used for :func:`ctc_decoder`.

    Args:
        model (str): pretrained language model to download.
            Valid values are: ``"librispeech-3-gram"``, ``"librispeech-4-gram"`` and ``"librispeech"``.

    Returns:
        Object with the following attributes

            * ``lm``: path corresponding to downloaded language model,
              or ``None`` if the model is not associated with an lm
            * ``lexicon``: path corresponding to downloaded lexicon file
            * ``tokens``: path corresponding to downloaded tokens file
    Nr"   )r   r   r#   r$   r%   r   )r   fileslexicon_filetokens_filelm_files        r-   r    r      sm    " 5!!E"5=11L!%,//Kx!%(++   r/   )"r#   r   r$   r   r%   r   rO   r   r|   rg   r   rg   r   r   r   rV   r   rV   r   rV   r   rV   r   rV   r   rr   r   rM   r   rM   rP   rM   r_   r   )r   rM   r_   r   )>
__future__r   	itertoolsr   abcr   collectionsr   typingr   r   r   r	   r
   r   r   flashlight.lib.text.decoderr   r   r   r   r   r   r   r   r   r   r   _LMr   _LMStater   r7   r   r1   r   r   flashlight.lib.text.dictionaryr   rK   r   rJ   r   r   torchaudio.utilsr   !flashlight.lib.text.decoder.kenlmr   r   	Exception__all__r   rD   rR   r   r   r   r   rV   r   r   r    r(   r/   r-   <module>r
     sD   " " " " " "           " " " " " " A A A A A A A A A A A A A A A A                                 
 - , , , , ,AAAAAAA   ???????      :/1L1L1LMM     o o o o oJ o o o*       @." ." ." ." ."3 ." ." ."bcH cH cH cH cH cH cH cHR $(!%)uV}}!n n n n nb        s6   A A8!A('A8(A2/A81A22A87A8