
    &`i                      j    d dl Z d dlZd dlZd dlmZmZmZmZ d dlZd dl	Z	dddZ
 G d d          ZdS )    N)AnyCallableDictOptional
read_deltaread_parquet)deltaparquetc                       e Zd ZdZdddddddededed	ee         d
ee         dedee         dee         fdZdefdZ	d Z
d Zedefd            ZdefdZd
ededef         fdZd Zd ZdS )UnityCatalogConnectora  
    Load a Unity Catalog table or files into a Ray Dataset, handling cloud credentials automatically.

    Currently only supports Databricks-managed Unity Catalog

    Supported formats: delta, parquet.
    Supports AWS, Azure, and GCP with automatic credential handoff.
    Nr	   READ)regiondata_format	operationray_init_kwargsreader_kwargsbase_urltokentable_full_namer   r   r   r   r   c                    |                     d          | _        || _        || _        |r|                                nd | _        || _        || _        |pi | _        |pi | _	        d | _
        d S )N/)rstripr   r   r   lowerr   r   r   r   r   _gcp_temp_file)	selfr   r   r   r   r   r   r   r   s	            /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/_internal/datasource/uc_datasource.py__init__zUnityCatalogConnector.__init__   sz     !,,
.2=G;,,...4".4"*0b"    returnc                     | j          d| j         }dd| j         i}t          j        ||          }|                                 |                                }|| _        |d         | _        |S )Nz/api/2.1/unity-catalog/tables/AuthorizationBearer )headerstable_id)	r   r   r   requestsgetraise_for_statusjson_table_info	_table_id)r   urlr#   respdatas        r   _get_table_infoz%UnityCatalogConnector._get_table_info0   s{    TTd>RTT"$:dj$:$:;|C111yy{{j)r   c                     | j          d}dd| j         d}| j        | j        d}t	          j        |||          }|                                 |                                | _        | j        d         | _	        d S )Nz2/api/2.1/unity-catalog/temporary-table-credentialszapplication/jsonr"   )zContent-Typer!   )r$   r   )r(   r#   r+   )
r   r   r*   r   r%   postr'   r(   _creds_response
_table_url)r   r+   r#   payloadr,   s        r   
_get_credsz UnityCatalogConnector._get_creds:   s    RRR.3tz33
 
  $~DNKK}Sw@@@#yy{{.u5r   c                 v   i }| j         }d|v rF|d         }|d         |d<   |d         |d<   |d         |d<   | j        r| j        |d<   | j        |d	<   nd
|v r|d
         |d<   nd|v r|d         pi }|                    d          p>|                    d          p)|                    d          p|                    d          }|r|                    d          r
|dd          }|r||d<   n9d                    |                                          }t          d|           |                    d          }|r
||d<   ||d<   nd|v r|d         }t          j        dddd          }	|		                    |           |	
                                 |	j        |d<   |	j        | _        t          j        | j        |	j                   n9d                    |                                          }t          d|           |                                D ]\  }
}|t"          j        |
<   d |i| _        d S )!Naws_temp_credentialsaccess_key_idAWS_ACCESS_KEY_IDsecret_access_keyAWS_SECRET_ACCESS_KEYsession_tokenAWS_SESSION_TOKEN
AWS_REGIONAWS_DEFAULT_REGIONazuresasuriAZURE_STORAGE_SAS_TOKENazure_user_delegation_sas	sas_tokensasr   sasToken?   z, zUAzure UC credentials missing SAS token in azure_user_delegation_sas. Available keys: storage_accountAZURE_STORAGE_ACCOUNTAZURE_STORAGE_ACCOUNT_NAMEgcp_service_accountwgcp_sa_z.jsonF)modeprefixsuffixdeleteGOOGLE_APPLICATION_CREDENTIALSzJNo known credential type found in Databricks UC response. Available keys: env_vars)r1   r   r&   
startswithjoinkeys
ValueErrortempfileNamedTemporaryFilewriteclosenamer   atexitregister_cleanup_gcp_temp_fileitemsosenviron_runtime_env)r   rR   credsawsazurerB   
known_keysrG   gcp_json	temp_filekvs               r   _set_envzUnityCatalogConnector._set_envF   s   $!U**./C,/,@H()034G0HH,-,/,@H(){ =)-&15-.e##272FH.// )E1156<"E		+&& )99U##)99W%%) 99Z((	   *Y11#66 *%abbM	 6?233!YYuzz||44
 4'14 4   $ii(9::O I4C019H56"e++23H 3 	  I OOH%%%OO9BH56"+.DOD7HHHH5::<<00J0#-0 0  
 NN$$ 	 	DAqBJqMM'2r   temp_file_pathc                     | rGt           j                            |           r*	 t          j        |            dS # t          $ r Y dS w xY wdS dS )z,Clean up temporary GCP service account file.N)r`   pathexistsunlinkOSError)rl   s    r   r^   z,UnityCatalogConnector._cleanup_gcp_temp_file   sr      	bgnn^<< 		.)))))   	 	 	 	s   9 
AAc                    | j         r| j         S | j        p|                                 }d|v r$|d         r|d                                         }|S |                    d          pt          | dd           }|rVt          j                            |          d         	                    dd                                          }|t          v r|S t          d          )Ndata_source_formatstorage_locationr2   . z0Could not infer data format from table metadata.)r   r)   r.   r   r&   getattrr`   rn   splitextreplace_FILE_FORMAT_TO_RAY_READERrV   )r   infofmtstorage_locexts        r   _infer_data_formatz(UnityCatalogConnector._infer_data_format   s     	$##94#7#7#9#94''D1E,F'+,2244CJhh122WgdLRV6W6W 	'"";//3;;CDDJJLLC000
KLLLr   .c                     |                                 }|t          v r*t          t          j        t          |         d           }|r|S t          d|           )NzUnsupported data format: )r   r{   rx   rayr-   rV   )r   r   r}   reader_funcs       r   _get_ray_readerz%UnityCatalogConnector._get_ray_reader   s]    !!,,,!#(,Fs,KTRRK #"":S::;;;r   c                    ddl m} | j        }| j                                        }d|v rS| j        st          d          |d         }|                    |d         |d         |d         | j                  }||d	<   	 t          j	        j
        | j        fi |S # t          $ r1}t          |          }d
|v sd|v rt          d| d          | d}~ww xY w)zCRead Delta table with proper PyArrow filesystem for session tokens.r   Nr6   zoThe 'region' parameter is required for AWS S3 access. Please specify the AWS region (e.g., region='us-west-2').r7   r9   r;   )
access_key
secret_keyr;   r   
filesystemDeletionVectorszUnsupported reader featureszLDelta table uses Deletion Vectors, which requires deltalake>=0.10.0. Error: z4
Solution: pip install --upgrade 'deltalake>=0.10.0')
pyarrow.fsfsr1   r   copyr   rV   S3FileSystemr   r-   r   r2   	ExceptionstrRuntimeError)r   pafsrc   r   rd   r   e	error_msgs           r   _read_delta_with_credentialsz2UnityCatalogConnector._read_delta_with_credentials   sI   !!!!!!$*//11 "U**;  P   ./C**/23!/2{	 +  J +5M,'	8&tHH-HHH 	 	 	AI!Y..0I=="K'K K K  	
 	s   ?B 
C%,CCc                    |                                   |                                  |                                  |                                 }t	          j                    st	          j        dd| j        i| j         |dk    r| 	                                S | 
                    |          } || j        fi | j        S )Nruntime_envr	    )r.   r4   rk   r   r   is_initializedinitrb   r   r   r   r2   r   )r   r   readers      r   readzUnityCatalogConnector.read   s    --//!## 	LHKK!2Kd6JKKK '!!44666 %%k22vdo<<);<<<r   )__name__
__module____qualname____doc__r   r   r   r   dictr.   r4   rk   staticmethodr^   r   r   r   r   r   r   r   r   r   r   r      sx         !%%,*.(,# # # # 	#
 # # c]# # "$#  ~# # # #,    
6 
6 
6<3 <3 <3| s    \MC M M M M"<3 <8CH3E < < < <% % %N= = = = =r   r   )r\   r`   rW   typingr   r   r   r   r%   r   r{   r   r   r   r   <module>r      s     				  0 0 0 0 0 0 0 0 0 0 0 0  



   M= M= M= M= M= M= M= M= M= M=r   