
    Pi:              	         d Z ddlmZ ddlmZ ddlZddlmZ ddlZddlZ	ddl
mZ ddlZddlmZ ddlmZ erdd	lmZmZmZmZ d
ZdZdZdZg dZdZdZdZdZde de de de d	Z de de dZ!d#dZ"d$dZ#d Z$d  Z% G d! d"e          Z&dS )%a-  
Read a SAS XPort format file into a Pandas DataFrame.

Based on code from Jack Cushman (github.com/jcushman/xport).

The file format is defined here:

https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf
    )annotations)datetimeN)TYPE_CHECKING)find_stack_level)
get_handle)	SASReader)CompressionOptionsDatetimeNaTTypeFilePath
ReadBufferzPHEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  zKHEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000zPHEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  zPHEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  )ntypenhfunfield_lengthnvar0namelabelnformnflnum_decimalsnfjnfillniformniflnifdnpos_zParameters
----------
filepath_or_buffer : str or file-like object
    Path to SAS file or object implementing binary read method.zindex : identifier of index column
    Identifier of column that should be used as index of the DataFrame.
encoding : str
    Encoding for text data.
chunksize : int
    Read file `chunksize` lines at a time, returns iterator.zBformat : str
    File format, only `xport` is currently supported.z\iterator : bool, default False
    Return XportReader object for reading file incrementally.z#Read a SAS file into a DataFrame.


a  

Returns
-------
DataFrame or XportReader

Examples
--------
Read a SAS Xport file:

>>> df = pd.read_sas('filename.XPT')

Read a Xport file in 10,000 line chunks:

>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
>>> for chunk in itr:
>>>     do_something(chunk)

z$Class for reading SAS Xport files.

z

Attributes
----------
member_info : list
    Contains information about the file
fields : list
    Contains information about the variables in the file
datestrstrreturnr
   c                f    	 t          j        | d          S # t          $ r t          j        cY S w xY w)z1Given a date in xport format, return Python date.z%d%b%y:%H:%M:%S)r   strptime
ValueErrorpdNaT)r   s    k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/pandas/io/sas/sas_xport.py_parse_dater'      sA     *;<<<   vs    00sc                r    i }d}|D ],\  }}| |||z                                             ||<   ||z  }-|d= |S )a  
    Parameters
    ----------
    s: str
        Fixed-length string to split
    parts: list of (name, length) pairs
        Used to break up string, name '_' will be filtered from output.

    Returns
    -------
    Dict of name:contents of string at given location.
    r   r   )strip)r(   partsoutstartr   lengths         r&   _split_liner/      s[     CE  feefn,-3355D	CJ    c                    |dk    rnt          j        t          |           t          j        d                    }t          j        d| dd|z
             }|                    |          }| |d<   |S | S )N   S8Sz,Sdtypef0)npzeroslenr6   view)vecnbytesvec1r6   vec2s        r&   _handle_truncated_float_vecr@      sv     {{xC"(4..113V33q6z3344yyuy%%T
Jr0   c                   t          j        d          }|                     |          }|d         }|d         }|dz  }t          j        t	          |           t           j                  }d|t          j        |dz            <   d|t          j        |d	z            <   d
|t          j        |dz            <   ||z  }||z	  |dz  dd
|z
  z   z  z  }|dz  }||dz	  dz  dz
  dz  |z   dz   dz  |dz  z  z  }t          j        t	          |          fd          }||d<   ||d<   |                    d          }|                    d          }|S )zf
    Parse a vector of float values representing IBM 8 byte floats into
    native 8 byte floats.
    z>u4,>u4r5   r7   f1i    i       i  @    i         l          A   i     l        z>f8f8)	r8   r6   r;   r9   r:   uint8whereemptyastype)	r<   r6   r>   xport1xport2ieee1shiftieee2ieees	            r&   _parse_float_vecrW      s{   
 HYE88%8  D$ZF$ZF ZE HSXXRX...E+,E"(6J&
'
'(+,E"(6J&
'
'(+,E"(6J&
'
'( 
eOEu_&:"52U;K!LME 
ZE 
6R<4'2-!3u<tCJ E 8SZZM333DDJDJ9959!!D;;tDKr0   c                  d    e Zd ZeZ	 	 	 	 dddZddZd ZddZddZ	ddZ
d d!dZd Zd d"dZdS )#XportReaderN
ISO-8859-1inferfilepath_or_bufferFilePath | ReadBuffer[bytes]encoding
str | None	chunksize
int | Nonecompressionr	   r    Nonec                   || _         d| _        || _        || _        t	          |d|d|          | _        | j        j        | _        	 |                                  d S # t          $ r | 
                                  w xY w)Nr   rbF)r^   is_textrb   )	_encoding_lines_read_index
_chunksizer   handleshandler\   _read_header	Exceptionclose)selfr\   indexr^   r`   rb   s         r&   __init__zXportReader.__init__   s     "#!#
 
 
 #',"5	 	 	 	JJLLL	s   A  A>c                8    | j                                          d S N)rk   ro   rp   s    r&   ro   zXportReader.close  s    r0   c                Z    | j                             d                                          S )NP   )r\   readdecoderu   s    r&   _get_rowzXportReader._get_row  s%    &++B//66888r0   c                   | j                             d           |                                 }|t          k    r"d|v rt	          d          t	          d          |                                 }ddgddgd	dgd
dgddgg}t          ||          }|d         dk    rt	          d          t          |d                   |d<   || _        |                                 }t          |d d                   |d<   |                                 }|                                 }|                    t                    }|t          k    }	|r|	st	          d          t          |dd                   }
ddgddgddgddgd	dgd
dgddgg}t          |                                 |          }ddgd
dgddgddgg}|                    t          |                                 |                     t          |d                   |d<   t          |d                   |d<   || _        ddd}t          |                                 dd                   }|
|z  }|dz  r|d|dz  z
  z  }| j                             |          }g }d}t          |          |
k    r|d |
         ||
d          }}|                    d          }t#          j        d|          }t'          t)          t*          |d !                    }|d
= ||d"                  |d"<   |d#         }|d"         dk    r!|d$k     s|dk    rd%| d&}t-          |          |                                D ]-\  }}	 |                                ||<   # t2          $ r Y *w xY w||d#         z  }||gz  }t          |          |
k    |                                 }|t4          k    st	          d'          || _        || _        | j                                         | _        |                                 | _         d( | j        D             | _!        d) tE          | j                  D             }tG          j$        |          }|| _%        d S )*Nr   z**COMPRESSED**z<Header record indicates a CPORT file, which is not readable.z#Header record is not an XPORT file.prefixrH   versionr2   OSr   created   zSAS     SAS     SASLIBz!Header record has invalid prefix.modifiedzMember header not foundset_namesasdatar   (   typenumericchar)rC   rD   6   :   rw      z>hhhh8s40s8shhh2s8shhl52sT)strictr   r   rD   zFloating field width z is not between 2 and 8.zObservation header not found.c                B    g | ]}|d                                           S )r   )ry   ).0xs     r&   
<listcomp>z,XportReader._read_header.<locals>.<listcomp>u  s(    @@@q&	((**@@@r0   c                h    g | ]/\  }}d t          |          z   dt          |d                   z   f0S )r(   r4   r   )r   )r   ifields      r&   r   z,XportReader._read_header.<locals>.<listcomp>x  sM     
 
 
5 3q66\3U>%:!;!;;<
 
 
r0   )&r\   seekrz   _correct_line1r#   r/   r'   	file_info
startswith_correct_header1_correct_header2intupdatemember_inforx   r:   ljuststructunpackdictzip
_fieldkeys	TypeErroritemsr*   AttributeError_correct_obs_headerfieldsrecord_lengthtellrecord_start_record_countnobscolumns	enumerater8   r6   _dtype)rp   line1line2fifr   line3header1header2	headflag1	headflag2fieldnamelengthmemr   types
fieldcount
datalength	fielddatar   
obs_length
fieldbytesfieldstructr   flmsgkvheaderdtypelr6   s                                r&   rm   zXportReader._read_header  s   $$Q''' N""5(( !R   BCCC"~	1~ay3)iQS_Us++	X":::@AAA*9Y+?@@	)" +E#2#J 7 7	* --//--//&&'788	//	 	8i 	86777gben-- qMONN1I"IO
 "$--//377B#rWbMFA;G;t}}<<==="-k*.E"F"FJ!,[-C!D!DI& &))B/00
$z1
? 	/"zB..J+00<<	
)nn// *?*+/**+ "J $))#..J -(CZPPKZTBBBCCEc
"5>2E'N~&BW~**aR!VVJbJJJnn$  1 wwyyE!HH%   D %//JugF7 )nn//: ,,,<===' 388::&&((	@@DK@@@
 
%dk22
 
 
   s   M55
NNpd.DataFramec                <    |                      | j        pd          S )NrC   nrows)rx   rj   ru   s    r&   __next__zXportReader.__next__  s    yyt3!y444r0   r   c                   | j                             dd           | j                                         | j        z
  }|dz  dk    r"t	          j        dt                                 | j        dk    r)| j                             | j                   || j        z  S | j                             dd           | j                             d          }t          j
        |t          j                  }t          j        |dk              }t          |          dk    rd}nd	t          |          z  }| j                             | j                   ||z
  | j        z  S )
z
        Get number of records in file.

        This is maybe suboptimal because we have to seek to the end of
        the file.

        Side effect: returns file position to record_start.
        r   rD   rw   zxport file may be corrupted.)
stacklevelir5   l     @@  r2   )r\   r   r   r   warningswarnr   r   rx   r8   
frombufferuint64flatnonzeror:   )rp   total_records_lengthlast_card_bytes	last_cardixtail_pads         r&   r   zXportReader._record_count  sU    	$$Q***#6;;==@QQ"$))M.+--   
 ""#(():;;;'4+===$$S!,,,166r::M/CCC	 ^I)<<==r77a<<HH3r77{H$$T%6777$x/D4FFFr0   sizec                @    || j         }|                     |          S )a  
        Reads lines from Xport file and returns as dataframe

        Parameters
        ----------
        size : int, defaults to None
            Number of lines to read.  If None, reads whole file.

        Returns
        -------
        DataFrame
        Nr   )rj   rx   )rp   r   s     r&   	get_chunkzXportReader.get_chunk  s$     <?Dyyty$$$r0   c                    |                     d          }|d         dk    |d         dk    z  |d         dk    z  }|d         dk    |d         d	k    z  |d         d
k    z  |d         dk    z  }||z  }|S )Nzu1,u1,u2,u4r5   rB   r   f2f3r7   rJ   Z   _   .   )r;   )rp   r<   r   missmiss1s        r&   _missing_doublezXportReader._missing_double  s    HH=H))$14A.!D'Q,?go!D'T/2w$ w$  	
 	r0   r   c                    | j         }t          | j          j        z
            }| j        z  }|dk    r                                  t
           j                            |          }t          j	        | j
        |          }i }t           j                  D ]\  }}|dt          |          z            }	 j        |         d         }
|
dk    rUt          |	 j        |         d                   }	                     |	          }t#          |	          }t          j        ||<   n8 j        |         d         dk    r!d	 |	D             } j         fd
|D             }|                    ||i           t+          j        |          } j        5t+          j        t3           j         j        |z                       |_        n|                     j                  } xj        |z  c_        |S )a  Read observations from SAS Xport file, returning as data frame.

        Parameters
        ----------
        nrows : int
            Number of rows to read from data file; if None, read whole
            file.

        Returns
        -------
        A DataFrame.
        Nr   )r6   countr(   r   r   r   r   c                6    g | ]}|                                 S  )rstrip)r   ys     r&   r   z$XportReader.read.<locals>.<listcomp>  s     ---AQXXZZ---r0   c                D    g | ]}|                     j                  S r   )ry   rg   )r   r   rp   s     r&   r   z$XportReader.read.<locals>.<listcomp>  s'    ===a$.11===r0   )r   minrh   r   ro   StopIterationr\   rx   r8   r   r   r   r   r   r   r@   r   rW   nanrg   r   r$   	DataFrameri   Indexrangerq   	set_index)rp   r   
read_linesread_lenrawdatadf_datajr   r<   r   r   r   dfs   `             r&   rx   zXportReader.read  s    =IE	D,< <==
 22q==JJLLL%**844}S:FFFdl++ 	# 	#DAqsSVV|$CKN7+E	!!1#t{1~n7UVV++C00$S))&$Q(F22----->-====1===ANNAq6""""\'"";xd&68H:8U V VWWBHHdk**BJ&	r0   )NrZ   Nr[   )
r\   r]   r^   r_   r`   ra   rb   r	   r    rc   )r    rc   )r    r   )r    r   rt   )r   ra   r    r   )r   ra   r    r   )__name__
__module____qualname___xport_reader_doc__doc__rr   ro   rz   rm   r   r   r   r   rx   r   r0   r&   rY   rY      s        G
 + $*1    8   9 9 9l l l l\5 5 5 5$G $G $G $GL% % % % %"	 	 	1 1 1 1 1 1 1r0   rY   )r   r   r    r
   )r(   r   )'r   
__future__r   r   r   typingr   r   numpyr8   pandas.util._exceptionsr   pandasr$   pandas.io.commonr   pandas.io.sas.sasreaderr   pandas._typingr	   r
   r   r   r   r   r   r   r   _base_params_doc_params2_doc_format_params_doc_iterator_doc_read_sas_docr   r'   r/   r@   rW   rY   r   r0   r&   <module>r     s?    # " " " " "                         4 4 4 4 4 4     ' ' ' ' ' ' - - - - - -             W  R  W  W   
(C @9 A
    	 
   2   	         ,  &6 6 6rI I I I I) I I I I Ir0   