
    *`i^                        U d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 eed<   	 d dlZd dlZdZn# e$ r dZY nw xY w e j        e          Zdd
Zdee         ded	eee                  fdZdededz  d	ee         fdZdee         d	eeeef                  fdZdee         d	efdZ	 	 	 	 	 ddedeez  dz  deez  dz  dedz  deded	efdZdS )    N)Path)Iterator)TokenizerVersion)MultiModalVersion_hub_installedTFreturnc                  2    t           st          d          d S )NzxPlease install the `huggingface_hub` package to use this method.
Run `pip install mistral-common[hf-hub]` to install it.)r   ImportError     z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/utils.py_assert_hub_installedr      s,     
F
 
 	

 
r   lst
chunk_sizec              #   j   K   t          dt          |           |          D ]}| |||z            V  dS )a  Chunk a list into smaller lists of a given size.

    Args:
        lst: The list to chunk.
        chunk_size: The size of each chunk.

    Returns:
        An iterator over the chunks.

    Examples:
        >>> all_chunks = list(chunks([1, 2, 3, 4, 5], 2))
    r   N)rangelen)r   r   is      r   chunksr      sP       1c#hh
++ & &!a*n$%%%%%& &r   repo_idrevisionc                    t                       t          t          j        j                  t          j        j                            dg|                     d                    z  }|l|dz  t          j        j        z  }|	                                rA|
                    d          5 }|                                }ddd           n# 1 swxY w Y   |r0|dz  |z  }|                                rt          j        |          S g S )zlist the files of a local Hugging Face repo.

    Args:
        repo_id: The Hugging Face repo ID.
        revision: The revision of the model to use. If `None`, the latest revision will be used.
    models/Nrefsr	snapshots)r   r   huggingface_hub	constantsHF_HUB_CACHEREPO_ID_SEPARATORjoinsplitDEFAULT_REVISIONis_fileopenreadis_diroslistdir)r   r   
repo_cacherevision_filefilerevision_dirs         r   list_local_hf_repo_filesr/   0   s?    o/<==@Y@k@p@p	'GMM#&&'A A J "V+o.G.XX  "" 	'##C(( 'D99;;' ' ' ' ' ' ' ' ' ' ' ' ' ' '  ,!K/(:   	,:l+++Is   *CCCfilesc                 x   g }t          t          j                  }t          t          j                  dgz   fd|D             dgz   }| D ]k}t	          |          }|j        }d                    |j                  }|dk    r|                    ||f           P||v r|                    ||f           l|S )zFilter the valid tokenizer files from a list of files.

    Args:
        files: The list of files to filter.

    Returns:
        The list of tuples of file names and paths to the valid tokenizer files.
     c                 (    g | ]}D ]	}d | | 
S )z.model.r   ).0vmmm_versionss      r   
<listcomp>z1_filter_valid_tokenizer_files.<locals>.<listcomp>X   s5    ___1S^__a..1..____r   z.modeltekken.json)	listr   __members__r   r   namer"   suffixesappend)	r0   valid_tokenizer_filesinstruct_versionssentencepiece_suffixesr-   pathlib_file	file_namesuffixr7   s	           @r   _filter_valid_tokenizer_filesrE   K   s     -9::(455<K____8I___ckbll < <Dzz %	.//%%!(()T):;;;;---!(()T):;;;  r   c                 H   t          |           }t          |          dk    rt          d          t          |          dk    rO|D ]\  }}d|k    r|c S t          |d           d         d         }t                              d| d	           n|d         d         }|S )
zGet one valid tokenizer file from a list of files.

    Args:
        files: The list of files to filter.

    Returns:
        The path to the tokenizer file.
    r   zNo tokenizer file found.   r9   c                     | d         S )Nr   r   )xs    r   <lambda>z.get_one_valid_tokenizer_file.<locals>.<lambda>x   s    TUVWTX r   )keyz,Multiple valid tokenizer files found. Using .)rE   r   
ValueErrorsortedloggerwarning)r0   $valid_tokenizer_file_names_and_filesrC   tokenizer_files       r   get_one_valid_tokenizer_filerT   f   s     ,I+O+O(
/00A553444
/00144)M 	& 	&%I~	))%%%% * D..YYYZ\]^_`WnWWWXXXX=a@Cr   	cache_dirtokenforce_downloadlocal_files_onlyc           	         t                       |r|rt          d          |s	 t          j                    }|                    | ||          }d}n# t
          j        t
          j        t
          j        f$ re}|r|t          | |          }d}t                              d           t          |          dk    rt          d|  d	| d
          |Y d}~nBd}~ww xY wt          | |          }t          |          dk    rt          d|  d	| d          	 t          |          }	n!# t          $ r t          d|  d          w xY wt          j        | ||	||||          }
|
S )a  Download the tokenizer file of a Mistral model from the Hugging Face Hub.

    See [here](https://huggingface.co/mistralai/models) for a list of our OSS models.

    Note:
        You need to install the `huggingface_hub` package to use this method.

        Please run `pip install mistral-common[hf-hub]` to install it.

    Args:
        repo_id: The Hugging Face repo ID.
        cache_dir: The directory where the tokenizer will be cached.
        token: The Hugging Face token to use to download the tokenizer.
        revision: The revision of the model to use. If `None`, the latest revision will be used.
        force_download: Whether to force the download of the tokenizer. If `True`, the tokenizer will be downloaded
            even if it is already cached.
        local_files_only: Whether to only use local files. If `True`, the tokenizer will be downloaded only if it is
            already cached.

    Returns:
        The downloaded tokenizer local path for the given model ID.
    zSYou cannot force the download of the tokenizer if you only want to use local files.)r   rV   F)r   r   TzBCould not connect to the Hugging Face Hub. Using local files only.r   zXCould not connect to the Hugging Face Hub and no local files were found for the repo ID z and revision z6. Please check your internet connection and try again.Nz%No local files found for the repo ID zz. Please check the repo ID and the revision or try to download the tokenizer without setting `local_files_only` to `True`.)r0   z*No valid tokenizer file found in the repo rM   )r   rU   filenamerV   r   rX   rW   )r   rN   r   HfApilist_repo_filesrequestsConnectionError	HTTPErrorTimeoutr/   rP   infor   FileNotFoundErrorrT   hf_hub_download)r   rU   rV   r   rW   rX   hf_api
repo_fileserS   tokenizer_paths              r   download_tokenizer_from_hf_hubrh      s    <  p* pnooo 	$*,,F//(RW/XXJ$((*<h>NO 	 	 	 1'HUUUJ#KK\]]]:!##'fov f f%-f f f   $####	 .gQQQ
z??a#o o ox o o o  
R5JGGG R R RPgPPPQQQR %4)%  N s$   -A &C9ACCD- -E)r   N)NNNFF)loggingr)   pathlibr   typingr   r]   %mistral_common.tokens.tokenizers.baser   &mistral_common.tokens.tokenizers.imager   bool__annotations__r   huggingface_hub.constantsr   r
   	getLogger__name__rP   r   r:   strintr   r/   tuplerE   rT   rh   r   r   r   <module>rv      s@    				              B B B B B B D D D D D D   $$$$NN   NNN 
	8	$	$
 
 
 
&S	 &s &xS	/B & & & &"c S4Z DI    6!c !tE#s(O7L ! ! ! !6S	 c    8 $(# "L LLTzD L #:L Dj	L
 L L 	L L L L L Ls   
7 A A