
    Pi5A              
         U d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlmZmZmZmZmZmZ ddlmZ ddlZddlmZmZmZ ddlmZ dgZd	Zi Zd
e d<   d:dZ!d Z"d;dZ#ej$        fdZ%d<d=dZ& G d d          Z' G d de'          Z( G d de'          Z) G d  d!e)          Z* G d" d#e)          Z+ G d$ d%e)          Z, G d& d'e'          Z- G d( d)e)          Z. G d* d+e'          Z/ e'            Z0d, Z1d>d-Z2d. Z3d<d/Z4d0 Z5d<d1Z6d<d2Z7d3 Z8d>d4Z9e1e2e3e4e6e8e9e5e7d5	Z:d6 Z; G d7 d          Z<d8 Z=e>d9k    r e=             dS dS )?z
A commandline tool for semi-automatically converting CSV to RDF.

See also https://github.com/RDFLib/pyTARQL in the RDFlib family of tools

try: `csv2rdf --help`
    )annotationsN)AnyDictListOptionalTupleUnion)quote)RDFRDFS	split_uri)URIRefCSV2RDFa^  
csv2rdf.py     -b <instance-base>     -p <property-base>     [-D <default>]     [-c <classname>]     [-i <identity column(s)>]     [-l <label columns>]     [-s <N>] [-o <output>]     [-f configfile]     [--col<N> <colspec>]     [--prop<N> <property>]     <[-d <delim>]     [-C] [files...]"

Reads csv files from stdin or given files
if -d is given, use this delimiter
if -s is given, skips N lines at the start
Creates a URI from the columns given to -i, or automatically by numbering if
none is given
Outputs RDFS labels from the columns given to -l
if -c is given adds a type triple with the given classname
if -C is given, the class is defined as rdfs:Class
Outputs one RDF triple per column in each row.
Output is in n3 format.
Output is stdout, unless -o is specified

Long options also supported:     --base,     --propbase,     --ident,     --class,     --label,     --out,     --defineclass

Long options --col0, --col1, ...
can be used to specify conversion for columns.
Conversions can be:
    ignore, float(), int(), split(sep, [more]), uri(base, [class]), date(format)

Long options --prop0, --prop1, ...
can be used to use specific properties, rather than ones auto-generated
from the headers

-D sets the default conversion for columns not listed

-f says to read config from a .ini/config file - the file must contain one
section called csv2rdf, with keys like the long options, i.e.:

[csv2rdf]
out=output.n3
base=http://example.org/
col0=split(";")
col1=split(";", uri("http://example.org/things/",
                    "http://xmlns.com/foaf/0.1/Person"))
col2=float()
col3=int()
col4=date("%Y-%b-%d %H:%M:%S")

z*Dict[Any, Tuple[URIRef, Optional[URIRef]]]urislabelstrc                
   t          j        dd|           } t          j        dd|           } |                     d          } d                    | d                                         gd | dd	         D             z             S )
zc
    CamelCase + lowercase initial a string


    FIRST_NM => firstNm

    firstNm => firstNm

    z[^\w] z([a-z])([A-Z])z\1 \2 r   c                6    g | ]}|                                 S  )
capitalize.0xs     h/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/rdflib/tools/csv2rdf.py
<listcomp>ztoProperty.<locals>.<listcomp>n   s     (K(K(KA(K(K(K       N)resubsplitjoinlowerr   s    r   
toPropertyr&   `   s{     F8S%((EF#Y66EKKE77E!HNN$$%(K(Kqrr(K(K(KKLLLr   c                    | dd                                          s'| dd                                         | dd          z   S | S )Nr      r   )isupperr$   r%   s    r   toPropertyLabelr*   q   sJ    1: .QqSz!!E!""I--Lr   l_	List[int]iTuple[int, ...]returnc                :     t           fd|D                       S )zPreturn a set of indexes from a list
    >>> index([1,2,3],(0,2))
    (1, 3)
    c                     g | ]
}|         S r   r   )r   r   r+   s     r   r   zindex.<locals>.<listcomp>|   s    ###A"Q%###r   )tuple)r+   r-   s   ` r   indexr3   w   s(    
 #######$$$r   c              +  D   K   t          j        | fd|i|}|D ]}|V  d S )Ndialect)csvreader)csv_datar5   kwargs
csv_readerrows        r   r:   r:      sF      H@@g@@@J  				 r   class_Optional[URIRef]c           	         |rNt          j        |t          |                     d                              dd          d          z             }nt          j        |           }||ft
          | <   |S )Nutf8r   _r   safe)rdflibr   r
   encodereplacer   )r   prefixr<   rs       r   	prefixurirH      sk     M&5&)9)9)A)A#s)K)KRT#U#U#UUVVM!&kDGHr   c                      e Zd Zd ZddZdS )	NodeMakerc                $    t           j        j        S N)rC   r   Literalselfs    r   rangezNodeMaker.range   s    {""r   r   r   c                *    t          j        |          S rL   )rC   rM   rO   r   s     r   __call__zNodeMaker.__call__   s    ~a   r   N)r   r   )__name__
__module____qualname__rP   rS   r   r   r   rJ   rJ      s7        # # #! ! ! ! ! !r   rJ   c                       e Zd Zd Zd Zd ZdS )NodeUric                j    d | _         || _        |rt          j        |          | _         d S d | _         d S rL   )r<   rF   rC   r   )rO   rF   r<   s      r   __init__zNodeUri.__init__   s8    (, 	 -//DKKKDKKKr   c                8    t          || j        | j                  S rL   )rH   rF   r<   rR   s     r   rS   zNodeUri.__call__   s    DK555r   c                2    | j         pt          j        j        S rL   )r<   rC   r   ResourcerN   s    r   rP   zNodeUri.range   s    {1fj11r   NrT   rU   rV   rZ   rS   rP   r   r   r   rX   rX      sA          6 6 62 2 2 2 2r   rX   c                      e Zd ZddZdS )NodeLiteralNc                    || _         d S rL   f)rO   rc   s     r   rZ   zNodeLiteral.__init__   s    r   rL   )rT   rU   rV   rZ   r   r   r   r`   r`      s(             r   r`   c                      e Zd Zd Zd ZdS )	NodeFloatc                    | j         s!t          j        t          |                    S t	          | j                   r4t          j        t          |                      |                              S t          d          )Nz(Function passed to float is not callable)rc   rC   rM   floatcallable	ExceptionrR   s     r   rS   zNodeFloat.__call__   se    v 	,>%((+++DF 	4>%q		"2"2333BCCCr   c                $    t           j        j        S rL   )rC   XSDdoublerN   s    r   rP   zNodeFloat.range   s    z  r   NrT   rU   rV   rS   rP   r   r   r   re   re      s5        D D D! ! ! ! !r   re   c                      e Zd Zd Zd ZdS )NodeIntc                    | j         s!t          j        t          |                    S t	          | j                   r4t          j        t          |                      |                              S t          d          )Nz&Function passed to int is not callable)rc   rC   rM   intrh   ri   rR   s     r   rS   zNodeInt.__call__   sc    v 	*>#a&&)))DF 	2>#dffQii..111@AAAr   c                $    t           j        j        S rL   )rC   rk   rq   rN   s    r   rP   zNodeInt.range   s    z~r   Nrm   r   r   r   ro   ro      s5        B B B    r   ro   c                      e Zd Zd Zd ZdS )NodeBoolc                    | j         s!t          j        t          |                    S t	          | j                   r4t          j        t          |                      |                              S t          d          )Nz'Function passed to bool is not callable)rc   rC   rM   boolrh   ri   rR   s     r   rS   zNodeBool.__call__   sc    v 	+>$q''***DF 	3>$tvvayy//222ABBBr   c                $    t           j        j        S rL   )rC   rk   rv   rN   s    r   rP   zNodeBool.range   s    zr   Nrm   r   r   r   rt   rt      s5        C C C    r   rt   c                      e Zd Zd Zd ZdS )NodeReplacec                "    || _         || _        d S rL   ab)rO   r|   r}   s      r   rZ   zNodeReplace.__init__   s    r   c                B    |                     | j        | j                  S rL   )rE   r|   r}   rR   s     r   rS   zNodeReplace.__call__   s    yy(((r   N)rT   rU   rV   rZ   rS   r   r   r   ry   ry      s2          ) ) ) ) )r   ry   c                      e Zd Zd Zd ZdS )NodeDatec                p    t          j        t          j                            || j                            S rL   )rC   rM   datetimestrptimerc   rR   s     r   rS   zNodeDate.__call__   s'    ~h/88DFCCDDDr   c                $    t           j        j        S rL   )rC   rk   dateTimerN   s    r   rP   zNodeDate.range   s    z""r   Nrm   r   r   r   r   r      s5        E E E# # # # #r   r   c                       e Zd Zd Zd Zd ZdS )	NodeSplitc                "    || _         || _        d S rL   seprc   )rO   r   rc   s      r   rZ   zNodeSplit.__init__   s    r   c                      j         st          j         _         t           j                   st	          d           fd|                     j                  D             S )Nz)Function passed to split is not callable!c                    g | ]A}|                                 d k                        |                                           BS )r   )striprc   )r   yrO   s     r   r   z&NodeSplit.__call__.<locals>.<listcomp>   s:    PPPa		Rqwwyy!!r   )rc   rC   rM   rh   ri   r"   r   rR   s   ` r   rS   zNodeSplit.__call__   s`    v 	$^DF 	IGHHHPPPP17748+<+<PPPPr   c                    | j         r3t          | j         t                    r| j                                         S t                              |           S rL   )rc   
isinstancerJ   rP   rN   s    r   rP   zNodeSplit.range   sA    6 	"j33 	"6<<>>!t$$$r   Nr^   r   r   r   r   r      sD          Q Q Q% % % % %r   r   c                     dS )Nignorer   )argsr9   s     r   _config_ignorer      s    8r   c                "    t          | |          S rL   )rX   )rF   r<   s     r   _config_urir      s    66"""r   c                     t                      S rL   )r`   r   r   r   _config_literalr      s    ==r   c                     t          |           S rL   )re   rb   s    r   _config_floatr     s    Q<<r   c                "    t          | |          S rL   )ry   r{   s     r   _config_replacer     s    q!r   c                     t          |           S rL   )ro   rb   s    r   _config_intr     s    1::r   c                     t          |           S rL   )rt   rb   s    r   _config_boolr     s    A;;r   c                     t          |           S rL   )r   )format_s    r   _config_dater     s    Gr   c                "    t          | |          S rL   )r   r   s     r   _config_splitr     s    S!r   )	r   uriliteralrg   rq   dater"   rE   rv   c                ,    t          | t                    S )z$Return a function for column mapping)evalconfig_functions)vs    r   columnr   )  s     #$$$r   c                       e Zd Zd Zd Zd ZdS )r   c                    d | _         d | _        d | _        d| _        d | _        d| _        d| _        d| _        d | _        i | _	        i | _
        t          j        | _        d| _        d S )NautoFr   ,)CLASSBASEPROPBASEIDENTLABELDEFINECLASSSKIPDELIMDEFAULTCOLUMNSPROPSsysstdoutOUTtriplesrN   s    r   rZ   zCSV2RDF.__init__0  sd    
	28

 	

:r   c                    | j                             |                                d|                                d|                                d           | xj        dz  c_        d S )Nr   z .
r   )r   writen3r   )rO   spos       r   triplezCSV2RDF.tripleB  sU    @AAAr   c                    t          j                     } j        r,t          j                            d j        j        z              j        dk    r't           j        t                    s j        f _         j	        s-t          j        d           t          j        d           _	         j        s-t          j        d           t          j        d           _        t           j                  D ]}t#          |           t%          t#          |                    }t'          t)           fd|D                                 } j                                        D ]"\  }}|||<   t/          |          d         ||<   # j        ru                      j        t6          j        t:          j                   t          t?          |                    D ]'}||         ||         }
}	|	d	k    s|
d	k    r  j         !                    | j"                  d
k    rE                     |	t6          j        t6          j#                                        |	t:          j$        t          j%        tM          |
                                                    |	t:          j'         j                                        |	t:          j         j         !                    |tP                                                               )d}|D ]}
	  j        dk    r j	        d|z           }n= j	        d)                    d tU          |
 j                  D                                } j+        rY                     |t:          j$        t          j%        d)                    tU          |
 j+                                                  j        r&                     |t6          j         j                   t)          |
          D ]\  }}|,                                }|d	k    r j         !                    | j"                  d
k    rE	   j         !                    |t          j%                  |          }t          |t$                    r#|D ]}                     |||         |            n                     |||         |           # tZ          $ r4}t          j        dd|||         ||j.        fz  z              Y d }~d }~ww xY w|dz  }|dz  dk    r>t          j                            d| j/        t          j                     |z
  fz             T# tZ          $ r$ t          j                            d|z              w xY wta                      }tb                                          D ]\  }
}|\  }}                     |t:          j$        t          j%        |
                     |rJt          j2        |          }|3                    |                                |t6          j        |           |D ]-}                     |t6          j        t:          j                   . j        4                                 t          j                            d| j/        fz             t          j                            dt          j                     |z
  z             d S )NzOutput to %s
r   z2No base given, using http://example.org/instances/zhttp://example.org/instances/z:No property base given, using http://example.org/property/zhttp://example.org/props/c                D    g | ]}j         t          |                   S r   )r   r&   )r   r   rO   s     r   r   z#CSV2RDF.convert.<locals>.<listcomp>]  s&    !V!V!V1$-
1">!V!V!Vr   r   r   r   r   z%dr@   c                ~    g | ]:}t          |                    d                               dd          d          ;S )r?   r   r@   r   rA   )r
   rD   rE   r   s     r   r   z#CSV2RDF.convert.<locals>.<listcomp>{  sS        %& !&ahhv&6&6&>&>sC&H&Hr R R R  r   r   z#Could not process value for column z%d:%s in row %d, ignoring: %s i z$%d rows, %d triples, elapsed %.2fs.
zError processing line: %d
z#Converted %d rows into %d triples.
zTook %.2f seconds.
)5timer   r   stderrr   namer   r   r2   r   warningswarnrC   	Namespacer   rP   r   nextlistdict	enumerater   itemsr   r   r   r   r   typer   Classlenr   getr   Propertyr   rM   r*   domaindefault_node_maker#   r3   r   r   ri   messager   setr   r   addclose)rO   	csvreaderstartr   header_labelsheaderskr   r-   hr+   rowsr   r   _oeclassesucs   `                  r   convertzCSV2RDF.convertF  s   	8 	?J-=>>>:
4:u(E(E*DJy 	JMNOOO()HIIDI} 	JMVWWW",-HIIDM ty!! 	 	AOOOO T)__--y!V!V!V!V!V!V!VWWXXJ$$&& 	/ 	/DAqGAJ(||AM! 	KK
CHdj9993w<<((  
M!$4277bBhh<##At|44@@Asx666Atz6>/":M:M+N+NOOOAt{DJ777tz4<#3#3A7H#I#I#O#O#Q#Q     :	 :	B9:'')D4K0CC)  */r4:)>)>	   	C : KK
sxxb$*0E0E'F'FGG	   : ;KKSXtz:::%bMM  DAq		ABww<++At|<<HH$ C 0 0FN C CA F FA)!T22 @*+ !E !EB$(KKWQZ$D$D$D$D!E !%CQ ? ? ?(   $M E"B#$gaj$	"B#C!C        & 	&=A%%J$$?ty{{U/BCD      
  !>!EFFF
 %%ZZ\\ 	, 	,EBDAqKK4:v~b'9'9::: ,M!$$AAsx+++ 	1 	1AKK38TZ0000
?4BVVWWW
/49;;3FGHHHHHs9   D:UBSU
T
)T?UT

AU.VN)rT   rU   rV   rZ   r   r   r   r   r   r   r   /  sK          $  I I I I Ir   c                 :   t                      } t          j        t          j        dd          dg d          \  }}t	          |          }d|v sd|v r(t          t                     t          j        d           d|v rt          j	                    }|
                    t          |d                              |                    d          D ]\  }}|d	k    rt          j        |d
d          | _        (|dk    rt          j        |          | _        H|dk    rt          j        |          | _        h|dk    rt          j        |          | _        |dk    rt+          |          | _        |dk    rt/          |          | _        |dk    rt/          |          | _        |dk    r|| _        |dk    rt7          |          | _        |dk    rt;          |          | _        |                    d          r.t;          |          | j         t7          |dd                    <   b|                    d          r1t          j        |          | j!        t7          |dd                    <   d|v r!t          j        |d         d
d          | _        d|v r!t          j        |d         d
d          | _        d|v rt          j        |d                   | _        d|v rt          j        |d                   | _        d|v r|d         | _        d|v r|d         | _        d|v rt;          |d                   | _        d |v rt;          |d                    | _        d!|v rt          j        |d!                   | _        d"|v rt          j        |d"                   | _        d#|v rt/          |d#                   | _        d$|v rt/          |d$                   | _        d%|v rt/          |d%                   | _        d&|v rt/          |d&                   | _        d'|v rt7          |d'                   | _        d(|v rt7          |d(                   | _        d)|v rt          j        |d)                   | _        d*|v rt          j        |d*                   | _        |                                D ]\  }}|                    d+          r-t;          |          | j         t7          |d,d                    <   G|                    d-          r1t          j        |          | j!        t7          |d.d                    <   | j        rd/|v sd0|v rd1| _        | "                    tG          tI          j%        |          | j        2                     d S )3Nr   zhc:b:p:i:o:Cf:l:s:d:D:)
zout=zbase=zdelim=z	propbase=zclass=zdefault=ident=zlabel=zskip=defineclasshelpz-hz--helpz-fcsv2rdfoutwzutf-8basepropbaseclassr   identr   delimskipdefaultcol   prop   z-oz--outz-bz--basez-dz--delimz-Dz	--defaultz-pz
--propbasez-lz--labelz-iz--identz-sz--skipz-cz--classz--col   z--prop   z-Cz--defineclassT)	delimiter)&r   getoptr   argvr   printHELPexitconfigparserConfigParser	read_fileopenr   codecsr   rC   r   r   r   r   r   rv   r   r   r   r   r   rq   r   r   r   
startswithr   r   r   r:   	fileinputinput)r   optsfilesconfigr   r   s         r   mainr    s;   iiG - 	
 	
 	
 KD%  ::Dt||x4''dt||*,,d4j))***LL++ 	= 	=DAqEzz$k!S'::f%/22j#)#3A#6#6  g &a 0 0m##&*1gg##g $Qg $Qg !f"1vvi"())e$$ =.4QiiAabbE

++f%% =,2M!,<,<c!ABB%jj)t||k$t*c7;;$k$w-g>>t||'T
334'X77t||T
DYt|| d,,d k!233t||!+DJ77t!+D,>??t||T$Z((DT)_--t||T$Z((DT)_--t||4:44>**t||d4j11Dd9o66

 9 91<<   	9*0))GOC!""JJ''\\(## 	9(.a(8(8GM#ae**%} #$$,,/T*A*A"OOJyu55OOOPPPPPr   __main__)r   r   )r+   r,   r-   r.   r/   r.   rL   )r<   r=   )NN)?__doc__
__future__r   r  r  r6   r   r  r  r    r   r   r   typingr   r   r   r   r   r	   urllib.parser
   rC   rdflib.namespacer   r   r   rdflib.termr   __all__r  r   __annotations__r&   r*   r3   excelr:   rH   rJ   rX   r`   re   ro   rt   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  rT   r   r   r   <module>r     sf     # " " " " "      



       				 



   : : : : : : : : : : : : : : : :        1 1 1 1 1 1 1 1 1 1      +<~ 46 5 5 5 5M M M M"  % % % % "%        ! ! ! ! ! ! ! !2 2 2 2 2i 2 2 2     )   
	! 	! 	! 	! 	! 	! 	! 	!	 	 	 	 	k 	 	 		 	 	 	 	{ 	 	 	) ) ) ) )) ) ) )# # # # #{ # # #% % % % %	 % % %$ IKK   # # # #                  
 
 
 % % %VI VI VI VI VI VI VI VIrmQ mQ mQ` zDFFFFF r   