
    &`i_:                     *   d dl Z d dlZd dlZd dlmZmZmZmZmZ d dl	m
Z
mZmZ d dlmZmZ  e j        e          Zerd dlZd dlZd"dZdeed	                  ded
         fdZdeded
         ded
ef         fdZdedeee                  defdZdddddddddddddZdd
dedefdZ	 d#deded
         ded
ef         fdZ	 d#deeee         f         ded
         deee         d
f         fdZdd
defdZ d Z!defdZ"defd Z#dedefd!Z$dS )$    N)TYPE_CHECKINGListOptionalTupleUnion)quoteunquoteurlparse)RetryingPyFileSystem_resolve_custom_schemereturnpyarrow.fs.PyFileSystemc                      	 ddl } ddlm} n# t          $ r t	          d          dw xY wddlm}m}  | | |                                S )zGet fsspec HTTPFileSystem wrapped in PyArrow PyFileSystem.

    Returns:
        PyFileSystem wrapping fsspec HTTPFileSystem.

    Raises:
        ImportError: If fsspec is not installed.
    r   NHTTPFileSystemz.Please install fsspec to read files from HTTP.FSSpecHandlerPyFileSystem)fsspecfsspec.implementations.httpr   ModuleNotFoundErrorImportError
pyarrow.fsr   r   )r   r   r   r   s       q/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/datasource/path_util.py_get_fsspec_http_filesystemr      s    V>>>>>>> V V VJKKQUUV 76666666<nn&6&677888s   
 (
filesystem)pyarrow.fs.FileSystemzfsspec.spec.AbstractFileSystemr   c                 6   | dS ddl m} t          | |          r| S 	 ddl}n# t          $ r t          d          dw xY wt          | |j        j                  s$t          dt          |           j	                   ddl m
}m}  | ||                     S )a  Validate filesystem and wrap fsspec filesystems in PyArrow.

    Args:
        filesystem: Filesystem to validate and potentially wrap. Can be None,
            a pyarrow.fs.FileSystem, or an fsspec.spec.AbstractFileSystem.

    Returns:
        None if filesystem is None, otherwise a pyarrow.fs.FileSystem
        (either the original if already PyArrow, or wrapped if fsspec).

    Raises:
        TypeError: If filesystem is not None and not a valid pyarrow or fsspec filesystem.
    Nr   )
FileSystemzfsspec is not installedzYFilesystem must conform to pyarrow.fs.FileSystem or fsspec.spec.AbstractFileSystem, got: r   )r   r   
isinstancer   r   	TypeErrorspecAbstractFileSystemtype__name__r   r   )r   r   r   r   r   s        r   _validate_and_wrap_filesystemr&   $   s    $ t%%%%%%*j)) = = = =122<= j&+"@AA 
P484D4D4MP P
 
 	

 76666666<j11222s   # >pathc                 v    ddl m} t          | dd          } |||          \  }}|t          |d          fS )a]  Try resolving a path with URL encoding for special characters.

    This handles paths with special characters like ';', '?', '#' that
    may cause URI parsing errors.

    Args:
        path: The path to resolve.
        filesystem: Optional filesystem to validate against.

    Returns:
        Tuple of (resolved_filesystem, resolved_path).
    r   _resolve_filesystem_and_pathz/:ignore)safeerrors)r-   )r   r*   r   r	   )r'   r   r*   encoded_pathresolved_filesystemresolved_paths         r   _try_resolve_with_encodingr1   N   sa      877777D:::L)E)Ej* *& h G G GGG    
extensionsc                      |,t          |t                    sJ t          |                      |dS d |D             }t           fd|D                       S )a=  Check if a path has a file extension in the provided list.

    Examples:
        >>> _has_file_extension("foo.csv", ["csv"])
        True
        >>> _has_file_extension("foo.CSV", ["csv"])
        True
        >>> _has_file_extension("foo.CSV", [".csv"])
        True
        >>> _has_file_extension("foo.csv", ["json", "jsonl"])
        False
        >>> _has_file_extension("foo.csv", None)
        True

    Args:
        path: The path to check.
        extensions: A list of extensions to check against. If `None`, any extension is
            considered valid.
    NTc                     g | ]B}|                     d           sd |                                 n|                                CS ).)
startswithlower).0exts     r   
<listcomp>z'_has_file_extension.<locals>.<listcomp>   sW        "%!4!4ECIIKK#))++  r2   c              3   f   K   | ]+}                                                     |          V  ,d S N)r8   endswith)r9   r:   r'   s     r   	<genexpr>z&_has_file_extension.<locals>.<genexpr>   s9      @@ctzz||$$S))@@@@@@r2   )r    listr$   any)r'   r3   s   ` r   _has_file_extensionrB   g   s~    ( J!=!=tJ?O?O=t   J @@@@Z@@@@@@r2   )local)s3)gcs)hdfs)abfs)py) filerD   s3agsrE   rF   viewfsrG   abfsshttphttpsschemec                     t                               |                                          }|dS | j        }|dk    r|dv rt	          |           S ||v S )a  Check if a filesystem is compatible with a URI scheme.

    Uses PyArrow's `type_name` property for reliable filesystem type detection.
    This prevents silently using the wrong filesystem for a URI, which can result
    in malformed paths or incorrect behavior.

    Args:
        filesystem: The PyArrow filesystem to check.
        scheme: The URI scheme (e.g., 's3', 'gs', 'http', 'file', '').

    Returns:
        True if the filesystem can handle the scheme, False otherwise.
    NTrH   rO   rP   )_SCHEME_TO_FS_TYPE_NAMESgetr8   	type_name_is_http_filesystem)r   rQ   expected_typesfs_types       r   %_is_filesystem_compatible_with_schemerZ      se    $ .11&,,..AAN t "G $6%666":...n$$r2   c           
      z   ddl }ddlm} t          |           } 	 t	          |          }n%# t
          $ r}t          d|           |d}~ww xY wt          | d          }|j        r|j        	                                nd}|dv rS|t          ||          r|| fS 	 t                      }| }||fS # t          $ r}	t          d	|  d
|	           |	d}	~	ww xY w|4t          ||          r$	  || |          \  }
}||fS # t          $ r Y nw xY w	  || d          \  }}n# |j        j        t          f$ r\}	 t!          | d          \  }}n=# |j        j        t          t
          f$ r}t          d|  d| d|           |d}~ww xY wY d}~n+d}~wt
          $ r}t          d|  d|           |d}~ww xY w||fS )a  Resolve a single path with filesystem, with fallback to re-resolution on error.

    This is a helper for lazy filesystem resolution. If a filesystem is provided,
    it first validates that the filesystem type is compatible with the URI scheme,
    then attempts to resolve the path. If the filesystem is incompatible or
    resolution fails, it re-resolves without the cached filesystem.

    Args:
        path: A single file/directory path.
        filesystem: Optional cached filesystem from previous resolution.

    Returns:
        Tuple of (resolved_filesystem, resolved_path).

    Raises:
        ValueError: If path resolution fails.
        ImportError: If required dependencies are missing.
    r   Nr)   zInvalid filesystem provided: Fallow_fragmentsrI   rS   zCannot resolve HTTP path '': Failed to resolve path 'z'. Initial error: z%. URL encoding fallback also failed: zThe path: 'z' has an invalid type )pyarrowr   r*   r   r&   r!   
ValueErrorr
   rQ   r8   rZ   r   r   	ExceptionlibArrowInvalidr1   )r'   r   par*   eparsedrQ   r/   r0   import_error_original_errorencoding_errors                r   "_resolve_single_path_with_fallbackrl      s   , 777777!$''DE2:>>

 E E E<<<==1DE dE222F&,m;V]  """F """!&K'
 '
! t##	 "="?"? M&55 	  	  	 DTDDlDD  	  "GF# #	;;D*MMA}},, 	 	 	D	
O-I-I$PT-U-U*]]F, 	" 	" 	"	"1KDRV1W1W.#Z; 	" 	" 	"G4 G G> G G6DG G  ""	" "/  O O OFtFF1FFGGQNO --s   + 
AAAB2 2
C<CC-D   
DDD! !F68EFF(FFFF6F11F6pathsc           	         t          | t                    r| g} t          | t          j                  rt          |           g} nct          | t                    rt          d | D                       rt          d|  d          t          |           dk    rt          d          t          |          }g }| D ]}	 t          ||          \  }}n?# t          t          f$ r+}t                              d| d| d           Y d	}~Nd	}~ww xY w||}t          |          st          |          }|                    |          }|                    |           ||fS )
at  
    Resolves and normalizes all provided paths, infers a filesystem from the
    paths and assumes that all paths use the same filesystem.

    Args:
        paths: A single file/directory path or a list of file/directory paths.
            A list of paths can contain both files and directories.
        filesystem: The filesystem implementation that should be used for
            reading these files. If None, a filesystem will be inferred. If not
            None, the provided filesystem will still be validated against all
            filesystems inferred from the provided paths to ensure
            compatibility.
    c              3   B   K   | ]}t          |t                     V  d S r=   )r    str)r9   ps     r   r?   z0_resolve_paths_and_filesystem.<locals>.<genexpr>(  s/      +R+Rq
1c0B0B,B+R+R+R+R+R+Rr2   zIExpected `paths` to be a `str`, `pathlib.Path`, or `list[str]`, but got ``r   zMust provide at least one path.r_   r^   z
, skippingN)r    rp   pathlibPathr@   rA   ra   lenr&   rl   r   loggerwarningrW   _unwrap_protocolnormalize_pathappend)rm   r   resolved_pathsr'   r/   r0   rf   s          r   _resolve_paths_and_filesystemr|     s   " % %&& <Ut$$ <+R+RE+R+R+R(R(R <  
 
 	
 
Uq:;;; /z::JN - -	1Sj2 2. K( 	 	 	NNLdLLqLLLMMMHHHH	 ,J ##677 	<,];;M+::=IIm,,,,:%%s   =CD"!DDfsc                    ddl m}m} 	 ddlm} n# t
          $ r Y dS w xY wt          | t                    r|                                 } t          | |          sdS t          | j	        |          ot          | j	        j
        |          S )zKReturn whether ``fs`` is a PyFileSystem handled by a fsspec HTTPFileSystem.r   r   r   F)r   r   r   r   r   r   r    r   unwraphandlerr}   )r}   r   r   r   s       r   rW   rW   M  s    66666666>>>>>>>   uu "*++ YY[[b,'' ubj-00 Z

~6 6 s    
c                 V   t           j        dk    r5t          |           r&t          j        |                                           S t          | d          }|j        r
d|j        z   nd}|j        r
d|j        z   nd}|j	        }|j
        dk    r)d|j	        v r |j	                            d          d	         }|j        }t           j        dk    rQ|sOt          |          d
k    r<|d         dk    r0|d                                         r|dd         dv r
|dd         }||z   |z   |z   S )z2
    Slice off any protocol prefixes on path.
    win32Fr\   ;rI   ?rD   @   r   /         ):z:/N)sysplatform_is_local_windows_pathrs   rt   as_posixr
   paramsquerynetlocrQ   splitr'   ru   isalpha)r'   rg   r   r   r   parsed_paths         r   rx   rx   b  sI    |w#9$#?#? |D!!**,,,dE222F$*M9S6=  rF"(,6C&,BE]F}!5!5 $$S))"-+K 	 	 !!Nc!!N""$$ "!++!!""oK&(500r2   c                 2    t          |           j        dk    S )NrI   r
   rQ   r'   s    r   _is_urlr     s    D>> B&&r2   c                 2    t          |           }|j        dv S )NrS   r   )r'   rg   s     r   _is_http_urlr     s    d^^F=---r2   c                    t           j        dk    rdS t          |           dk    r| d         dk    rdS t          |           dk    r@| d         dk    r4| d	         d
k    s| d	         dk    r| d                                         rdS dS )z5Determines if path is a Windows file-system location.r   Fr   r   \Tr   r   r   r   )r   r   ru   r   r   s    r   r   r     s    
|wu
4yyA~~$q'T//tD		QGsNN!W^^tAw$GOO  / t5r2   )r   r   r=   )%loggingrs   r   typingr   r   r   r   r   urllib.parser   r	   r
   ray.data._internal.utilr   r   	getLoggerr%   rv   fsspec.specr   r`   r   r&   rp   r1   boolrB   rT   rZ   rl   r|   rW   rx   r   r   r    r2   r   <module>r      s"     



 > > > > > > > > > > > > > > 1 1 1 1 1 1 1 1 1 1 P P P P P P P P		8	$	$ NNN9 9 9 9('3GH'3 %&	'3 '3 '3 '3TH
H01H "C'(H H H H2Ac AxS	/B At A A A AL 	

   %'%% 
% % % %H 59R. R.
R.01R. "C'(R. R. R. R.n 597& 7&d3i 7&017& 49--.7& 7& 7& 7&t3     *1 1 1D'T ' ' ' '.$ . . . .
       r2   