
    &`i14                        d Z ddlmZ ddlmZ ddlmZmZmZm	Z	 ddl
Z
ddlmZ ddlmZ ddlmZ erddlmZmZ ddZe G d d                      ZdS )zCString namespace for expression operations on string-typed columns.    )annotations)	dataclass)TYPE_CHECKINGAnyCallableLiteralN)DataTypepyarrow_udf)ExprUDFExprpc_funcCallable[..., pyarrow.Array]return_dtyper	   returnCallable[..., 'UDFExpr']c                     d	 fd}|S )
a  Helper to create a string UDF that wraps a PyArrow compute function.

    This helper handles all types of PyArrow compute operations:
    - Unary operations (no args): upper(), lower(), reverse()
    - Pattern operations (pattern + args): starts_with(), contains()
    - Multi-argument operations: replace(), replace_slice()

    Args:
        pc_func: PyArrow compute function that takes (array, *positional, **kwargs)
        return_dtype: The return data type

    Returns:
        A callable that creates UDFExpr instances
    exprr   
positionalr   kwargsr   	'UDFExpr'c                Z    t                    dfd            } ||           S )Nr   arrpyarrow.Arrayr   c                     | gR i S N )r   r   r   r   s    /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/namespace_expressions/string_namespace.pyudfz-_create_str_udf.<locals>.wrapper.<locals>.udf%   s#    736666v666    r   r   r   r   r
   )r   r   r   r    r   r   s    `` r   wrapperz _create_str_udf.<locals>.wrapper$   sS    	,	/	/	/	7 	7 	7 	7 	7 	7 	7 
0	/	7 s4yyr!   )r   r   r   r   r   r   r   r   r   )r   r   r#   s   `` r   _create_str_udfr$      s.    $       Nr!   c                  ~   e Zd ZU dZded<   dDdZdDdZdDdZdDd	ZdDd
Z	dDdZ
dDdZdDdZdDdZdDdZdDdZdDdZdDdZdDdZdDdZdDdZdDdZdDdZdEdZdEdZdEdZdEd ZdEd!ZdEd"ZdEd#ZdEd$ZdEd%ZdDd&Z dFd'Z!dGd)Z"dGd*Z#dHd.Z$dEd/Z%dEd0Z&dFd1Z'dEd2Z(dId4Z)	 dJdKd8Z*dLdMd<Z+dLdMd=Z,dLdMd>Z-	 	 dNdOdCZ.d9S )P_StringNamespacea  Namespace for string operations on expression columns.

    This namespace provides methods for operating on string-typed columns using
    PyArrow compute functions.

    Example:
        >>> from ray.data.expressions import col
        >>> # Convert to uppercase
        >>> expr = col("name").str.upper()
        >>> # Get string length
        >>> expr = col("name").str.len()
        >>> # Check if string starts with a prefix
        >>> expr = col("name").str.starts_with("A")
    r   _exprr   r   c                t     t          t          j        t          j                              | j                  S )z,Get the length of each string in characters.)r$   pcutf8_lengthr	   int32r'   selfs    r   lenz_StringNamespace.lenB   s(    @r~x~/?/?@@LLLr!   c                t     t          t          j        t          j                              | j                  S )z'Get the length of each string in bytes.)r$   r)   binary_lengthr	   r+   r'   r,   s    r   byte_lenz_StringNamespace.byte_lenF   s)    Br/1A1ABB4:NNNr!   c                t     t          t          j        t          j                              | j                  S )zConvert strings to uppercase.)r$   r)   
utf8_upperr	   stringr'   r,   s    r   upperz_StringNamespace.upperK   (    @r}ho.?.?@@LLLr!   c                t     t          t          j        t          j                              | j                  S )zConvert strings to lowercase.)r$   r)   
utf8_lowerr	   r4   r'   r,   s    r   lowerz_StringNamespace.lowerO   r6   r!   c                t     t          t          j        t          j                              | j                  S )z.Capitalize the first character of each string.)r$   r)   utf8_capitalizer	   r4   r'   r,   s    r   
capitalizez_StringNamespace.capitalizeS   s)    Er18?3D3DEEdjQQQr!   c                t     t          t          j        t          j                              | j                  S )zConvert strings to title case.)r$   r)   
utf8_titler	   r4   r'   r,   s    r   titlez_StringNamespace.titleW   r6   r!   c                t     t          t          j        t          j                              | j                  S )z Swap the case of each character.)r$   r)   utf8_swapcaser	   r4   r'   r,   s    r   swapcasez_StringNamespace.swapcase[   s)    Cr/1B1BCCDJOOOr!   c                t     t          t          j        t          j                              | j                  S )z4Check if strings contain only alphabetic characters.)r$   r)   utf8_is_alphar	   boolr'   r,   s    r   is_alphaz_StringNamespace.is_alpha`   '    Ar/AA$*MMMr!   c                t     t          t          j        t          j                              | j                  S )z6Check if strings contain only alphanumeric characters.)r$   r)   utf8_is_alnumr	   rE   r'   r,   s    r   is_alnumz_StringNamespace.is_alnumd   rG   r!   c                t     t          t          j        t          j                              | j                  S )z%Check if strings contain only digits.)r$   r)   utf8_is_digitr	   rE   r'   r,   s    r   is_digitz_StringNamespace.is_digith   rG   r!   c                t     t          t          j        t          j                              | j                  S )z1Check if strings contain only decimal characters.)r$   r)   utf8_is_decimalr	   rE   r'   r,   s    r   
is_decimalz_StringNamespace.is_decimall   '    Cr18=??CCDJOOOr!   c                t     t          t          j        t          j                              | j                  S )z1Check if strings contain only numeric characters.)r$   r)   utf8_is_numericr	   rE   r'   r,   s    r   
is_numericz_StringNamespace.is_numericp   rQ   r!   c                t     t          t          j        t          j                              | j                  S )z)Check if strings contain only whitespace.)r$   r)   utf8_is_spacer	   rE   r'   r,   s    r   is_spacez_StringNamespace.is_spacet   rG   r!   c                t     t          t          j        t          j                              | j                  S )zCheck if strings are lowercase.)r$   r)   utf8_is_lowerr	   rE   r'   r,   s    r   is_lowerz_StringNamespace.is_lowerx   rG   r!   c                t     t          t          j        t          j                              | j                  S )zCheck if strings are uppercase.)r$   r)   utf8_is_upperr	   rE   r'   r,   s    r   is_upperz_StringNamespace.is_upper|   rG   r!   c                t     t          t          j        t          j                              | j                  S )z!Check if strings are title-cased.)r$   r)   utf8_is_titler	   rE   r'   r,   s    r   is_titlez_StringNamespace.is_title   rG   r!   c                t     t          t          j        t          j                              | j                  S )z3Check if strings contain only printable characters.)r$   r)   utf8_is_printabler	   rE   r'   r,   s    r   is_printablez_StringNamespace.is_printable   s'    Er3X]__EEdjQQQr!   c                t     t          t          j        t          j                              | j                  S )z/Check if strings contain only ASCII characters.)r$   r)   string_is_asciir	   rE   r'   r,   s    r   is_asciiz_StringNamespace.is_ascii   rQ   r!   patternstrargsr   r   c                x     t          t          j        t          j                              | j        |g|R i |S )z&Check if strings start with a pattern.)r$   r)   starts_withr	   rE   r'   r-   rg   ri   r   s       r   rk   z_StringNamespace.starts_with   sI    ?r~x}??J
"&
 
 
*0
 
 	
r!   c                x     t          t          j        t          j                              | j        |g|R i |S )z$Check if strings end with a pattern.)r$   r)   	ends_withr	   rE   r'   rl   s       r   rn   z_StringNamespace.ends_with   sI    =r|X]__==J
"&
 
 
*0
 
 	
r!   c                x     t          t          j        t          j                              | j        |g|R i |S )z%Check if strings contain a substring.)r$   r)   match_substringr	   rE   r'   rl   s       r   containsz_StringNamespace.contains   sJ    Cr18=??CCJ
"&
 
 
*0
 
 	
r!   c                x     t          t          j        t          j                              | j        |g|R i |S )z)Match strings against a SQL LIKE pattern.)r$   r)   
match_liker	   rE   r'   rl   s       r   matchz_StringNamespace.match   sI    >r}hmoo>>J
"&
 
 
*0
 
 	
r!   c                x     t          t          j        t          j                              | j        |g|R i |S )z)Find the first occurrence of a substring.)r$   r)   find_substringr	   r+   r'   rl   s       r   findz_StringNamespace.find   sL    Cr0(.2B2BCCJ
"&
 
 
*0
 
 	
r!   c                x     t          t          j        t          j                              | j        |g|R i |S )z!Count occurrences of a substring.)r$   r)   count_substringr	   r+   r'   rl   s       r   countz_StringNamespace.count   sL    Dr18>3C3CDDJ
"&
 
 
*0
 
 	
r!   c                x     t          t          j        t          j                              | j        |g|R i |S )z3Find the first occurrence matching a regex pattern.)r$   r)   find_substring_regexr	   r+   r'   rl   s       r   
find_regexz_StringNamespace.find_regex   sL    Ir68H8HIIJ
"&
 
 
*0
 
 	
r!   c                x     t          t          j        t          j                              | j        |g|R i |S )z+Count occurrences matching a regex pattern.)r$   r)   count_substring_regexr	   r+   r'   rl   s       r   count_regexz_StringNamespace.count_regex   sL    Jr79I9IJJJ
"&
 
 
*0
 
 	
r!   c                x     t          t          j        t          j                              | j        |g|R i |S )z'Check if strings match a regex pattern.)r$   r)   match_substring_regexr	   rE   r'   rl   s       r   match_regexz_StringNamespace.match_regex   sJ    Ir7IIJ
"&
 
 
*0
 
 	
r!   c                t     t          t          j        t          j                              | j                  S )zReverse each string.)r$   r)   utf8_reverser	   r4   r'   r,   s    r   reversez_StringNamespace.reverse   s(    Br0A0ABB4:NNNr!   c                v     t          t          j        t          j                              | j        g|R i |S )z"Slice strings by codeunit indices.)r$   r)   utf8_slice_codeunitsr	   r4   r'   r-   ri   r   s      r   slicez_StringNamespace.slice   sJ    Jr68I8IJJJ

 
 
!'
 
 	
r!   replacementc                z     t          t          j        t          j                              | j        ||g|R i |S )z#Replace occurrences of a substring.)r$   r)   replace_substringr	   r4   r'   r-   rg   r   ri   r   s        r   replacez_StringNamespace.replace   sQ     Hr3X_5F5FGGJ
/3
 
 
7=
 
 	
r!   c                z     t          t          j        t          j                              | j        ||g|R i |S )z-Replace occurrences matching a regex pattern.)r$   r)   replace_substring_regexr	   r4   r'   r   s        r   replace_regexz_StringNamespace.replace_regex   sQ     Nr98?;L;LMMJ
/3
 
 
7=
 
 	
r!   startintstopc                |     t          t          j        t          j                              | j        |||g|R i |S )zReplace a slice with a string.)r$   r)   binary_replace_slicer	   r4   r'   )r-   r   r   r   ri   r   s         r   replace_slicez_StringNamespace.replace_slice   sS     Kr68I8IJJJt[
37
 
 
;A
 
 	
r!   c                z     t          t          j        t          t                              | j        |g|R i |S )zSplit strings by a pattern.)r$   r)   split_patternr	   objectr'   rl   s       r   splitz_StringNamespace.split   sL    Br/&1A1ABBJ
"&
 
 
*0
 
 	
r!   c                z     t          t          j        t          t                              | j        |g|R i |S )z!Split strings by a regex pattern.)r$   r)   split_pattern_regexr	   r   r'   rl   s       r   split_regexz_StringNamespace.split_regex   sL    Hr5x7G7GHHJ
"&
 
 
*0
 
 	
r!   c                x     t          t          j        t          t                              | j        g|R i |S )zSplit strings on whitespace.)r$   r)   utf8_split_whitespacer	   r   r'   r   s      r   split_whitespacez!_StringNamespace.split_whitespace   sJ    Jr7&9I9IJJJ

 
 
!'
 
 	
r!   c                x     t          t          j        t          j                              | j        |g|R i |S )z-Extract a substring matching a regex pattern.)r$   r)   extract_regexr	   r4   r'   rl   s       r   extractz_StringNamespace.extract   sL    Cr/1B1BCCJ
"&
 
 
*0
 
 	
r!   nc                x     t          t          j        t          j                              | j        |g|R i |S )zRepeat each string n times.)r$   r)   binary_repeatr	   r4   r'   )r-   r   ri   r   s       r   repeatz_StringNamespace.repeat   sL    Cr/1B1BCCJ
 
 
 
$*
 
 	
r!    widthpaddingc                z     t          t          j        t          j                              | j        ||g|R i |S )z)Center strings in a field of given width.)r$   r)   utf8_centerr	   r4   r'   )r-   r   r   ri   r   s        r   centerz_StringNamespace.center  sP     Br~x/@/@AAJw
)-
 
 
17
 
 	
r!   N
characters
str | Nonec                ~    t          t          j                              dfd            } || j                  S )zRemove leading and trailing whitespace or specified characters.

        Args:
            characters: Characters to remove. If None, removes whitespace.

        Returns:
            UDFExpr that strips characters from both ends.
        r   r   r   r   c                \    t          j        |           S t          j        |           S N)r   )r)   utf8_trim_whitespace	utf8_trimr   r   s    r   
_str_stripz*_StringNamespace.strip.<locals>._str_strip  s/    !.s333|CJ????r!   r"   r   r	   r4   r'   )r-   r   r   s    ` r   stripz_StringNamespace.strip  s[     
(/"3"3	4	4	4	@ 	@ 	@ 	@ 	@ 
5	4	@ z$*%%%r!   c                ~    t          t          j                              dfd            } || j                  S )zRemove leading whitespace or specified characters.

        Args:
            characters: Characters to remove. If None, removes whitespace.

        Returns:
            UDFExpr that strips characters from the left.
        r   r   r   r   c                \    t          j        |           S t          j        |           S r   )r)   utf8_ltrim_whitespace
utf8_ltrimr   s    r   _str_lstripz,_StringNamespace.lstrip.<locals>._str_lstrip*  /    !/444}SZ@@@@r!   r"   r   )r-   r   r   s    ` r   lstripz_StringNamespace.lstrip   [     
(/"3"3	4	4	4	A 	A 	A 	A 	A 
5	4	A {4:&&&r!   c                ~    t          t          j                              dfd            } || j                  S )zRemove trailing whitespace or specified characters.

        Args:
            characters: Characters to remove. If None, removes whitespace.

        Returns:
            UDFExpr that strips characters from the right.
        r   r   r   r   c                \    t          j        |           S t          j        |           S r   )r)   utf8_rtrim_whitespace
utf8_rtrimr   s    r   _str_rstripz,_StringNamespace.rstrip.<locals>._str_rstrip=  r   r!   r"   r   )r-   r   r   s    ` r   rstripz_StringNamespace.rstrip3  r   r!   rightfillcharside Literal['left', 'right', 'both']c                    t          t          j                              dfd            } || j                  S )a  Pad strings to a specified width.

        Args:
            width: Target width.
            fillchar: Character to use for padding.
            side: "left", "right", or "both" for padding side.

        Returns:
            UDFExpr that pads strings.
        r   r   r   r   c                    dk    rt          j        |           S dk    rt          j        |           S dk    rt          j        |           S t	          d          )Nr   )r   r   leftbothz'side must be 'left', 'right', or 'both')r)   	utf8_rpad	utf8_lpadr   
ValueError)r   r   r   r   s    r   _str_padz&_StringNamespace.pad.<locals>._str_padX  sr    w|CuhGGGG|CuhGGGG~cIIII !JKKKr!   r"   r   )r-   r   r   r   r   s    ``` r   padz_StringNamespace.padG  si    " 
(/"3"3	4	4	4	L 	L 	L 	L 	L 	L 	L 
5	4	L x
###r!   )r   r   )rg   rh   ri   r   r   r   r   r   )ri   r   r   r   r   r   )
rg   rh   r   rh   ri   r   r   r   r   r   )r   r   r   r   r   rh   ri   r   r   r   r   r   )r   r   ri   r   r   r   r   r   )r   )
r   r   r   rh   ri   r   r   r   r   r   r   )r   r   r   r   )r   r   )r   r   r   rh   r   r   r   r   )/__name__
__module____qualname____doc____annotations__r.   r1   r5   r9   r<   r?   rB   rF   rJ   rM   rP   rT   rW   rZ   r]   r`   rc   rf   rk   rn   rq   rt   rw   rz   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r!   r   r&   r&   .   s          KKKM M M MO O O O
M M M MM M M MR R R RM M M MP P P P
N N N NN N N NN N N NP P P PP P P PN N N NN N N NN N N NN N N NR R R RP P P P

 
 
 

 
 
 

 
 
 

 
 
 

 
 
 

 
 
 

 
 
 

 
 
 

 
 
 
O O O O
 
 
 

 
 
 

 
 
 

 
 
 

 
 
 

 
 
 

 
 
 

 
 
 

 
 
 
 *-
 
 
 
 
& & & & &&' ' ' ' '&' ' ' ' '. 18	$ $ $ $ $ $ $r!   r&   )r   r   r   r	   r   r   )r   
__future__r   dataclassesr   typingr   r   r   r   pyarrowpyarrow.computecomputer)   ray.data.datatyper	   ray.data.expressionsr   r   r   r$   r&   r   r!   r   <module>r      s   I I " " " " " " ! ! ! ! ! ! 8 8 8 8 8 8 8 8 8 8 8 8        & & & & & & , , , , , , 322222222   8 t$ t$ t$ t$ t$ t$ t$ t$ t$ t$r!   