
    .`i                     :   d dl mZ deedz           fdZdedee         deded	ef
d
ZdZ	 ddedee         ded	e	ee         eef         fdZ
dedee         d	ee         fdZ	 	 ddedee         dee         dz  dedededed	e	ee         eeef         fdZdS )    )TokenizerLiketokensNc                 >    t          |           D ]\  }}|d| |<   d S )N )	enumerate)r   itokens      u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/tokenizers/detokenizer_utils.py_replace_none_with_emptyr      s6    f%%  5=F1I     	tokenizeroutput_tokensskip_special_tokensspaces_between_special_tokensreturnc                    g }g }| j         }t          |                                           }|rt          | j                  nd}|D ]j}	|	|v r|	|v rJ|r2|                     ||                     |                                 |                    |	           U|                    |	           k|r|                     ||                     |rd                    |          S d                    |          S )N  r   )convert_tokens_to_stringsetget_added_vocaball_special_tokensappendclearjoin)
r   r   r   r   	sub_textscurrent_sub_textr   added_vocab_setr   r	   s
             r
   -_convert_tokens_to_string_with_added_encodersr      s8    I"$(A)335566O-@HI()))b   
+ 
+&&&O## )  !9!9:J!K!KLLL &&(((U######E**** E112BCCDDD$ #xx	"""779r      F
prompt_idsc                     |                      |t           dz
  d         |          }t          |          }t          |t          z
  d          }t	          |           |||fS )a  Converts the prompt ids to tokens and returns the tokens and offsets
    for incremental detokenization.

    Note that not all tokens are converted to strings. Only the tokens that
    are necessary for incremental detokenization are converted to strings.
       Nr   r   )convert_ids_to_tokens)INITIAL_INCREMENTAL_DETOKENIZATION_OFFSETlenmaxr   )r   r!   r   
new_tokensread_offsetprefix_offsets         r
   convert_prompt_ids_to_tokensr,   ;   sv     00==ACCD/ 1  J j//K&OOQRSSMZ(((}k11r   	token_idsc                 r    g }|D ]1}|                      |g          }|d}|                    |           2|S )zDetokenize the input ids individually.

    Args:
      tokenizer: tokenizer used by model under test
      token_ids: convert these tokens (Python list form)

    Returns:
      Python list of token string representations

    Nr   )decoder   )r   r-   token_str_lsttoken_id	token_strs        r
   convert_ids_list_to_tokensr3   S   sU     M ( ($$hZ00	IY''''r   Tall_input_idsprev_tokensr+   r*   c                    |d         }|du }|rt          | |dd         |          \  }}}|J d|cxk    rt          |           k     r4n n1|                     |g|          }	t          |	t                    r|	g}	ndg}	||	z   }
|r|
}	| j        s|                                 s;|                     |
||                   }|                     |
|d                   }n6t          | |
||         ||          }t          | |
|d         ||          }t          |          t          |          k    s|	                    d          r|	d||fS |t          |          d         }|	||t          |
          fS )a(  Detokenizes the input ids incrementally and returns the new tokens
    and the new text.

    If `prev_tokens` is None, this function will convert the input ids to
    tokens and return the tokens and the new text. Otherwise, it will return the
    new tokens and the new text.

    This function will also return the new prefix offset and the new read
    offset to be used in the next iteration.

    The offsets are necessary to defeat cleanup algorithms in the decode which
    decide to add a space or not depending on the surrounding ids.

    Args:
        tokenizer: The tokenizer to use.
        all_input_ids: The input ids. The last id is the new token id.
        prev_tokens: The previous tokens. If None, this function will convert
            the input ids to tokens and return the tokens and the new text.
        prefix_offset: The prefix offset.
        read_offset: The read offset.
        skip_special_tokens: Whether to skip special tokens.
        spaces_between_special_tokens: Whether to add spaces between special
            tokens.
    Nr$   r   r   )r   r   u   �)
r,   r'   r%   
isinstancestris_fastr   r   r   endswith)r   r4   r5   r+   r*   r   r   new_token_idis_first_iterr)   r   prefix_textnew_texts                r
   detokenize_incrementallyr@   n   s	   B !$L4'M 
4P}SbS)?R5
 5
 5
1m[ """ 	L))))3y>>)))))44N0C 5 
 

 j#&& 	&$JT
*,M  #"

  
	 9 9 ; ; 
88-34
 
 55mMNN6STTC-34 3*G	
 
 
 A-..) 3*G	
 
 
 8}}K((((H,=,=e,D,D(
 2}k99K((**+Hxc-.@.@@@r   )F)FT)vllm.tokenizersr   listr9   r   boolr   r&   inttupler,   r3   r@   r   r   r
   <module>rF      s  
 * ) ) ) ) )T#*%5    %%9% % $(	%
 	% % % %T -. ) !&2 22S	2 2 49c3	2 2 2 20Cy 
#Y   B !&*.XA XAXA9XA cT!XA 	XA
 XA XA $(XA 49c3#$XA XA XA XA XA XAr   