
    Pi42                    ~   d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZmZmZmZmZmZmZmZmZ erd dlZn	 d dlZesJ n# e$ r d dlZY nw xY wd dlmZmZ d dl
mZmZ d dlmZmZmZ 	 d dlZd	Zn# e$ r dZd
ZY nw xY wd dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& 	 	 d"d#dZ'dZ(d$dZ)d%dZ*d&dZ+g dZ, G d  d!e          Z-dS )'    )annotationsN)
HTMLParser)StringIO
TextIOBaseTextIOWrapper)	IOTYPE_CHECKINGAnyDictListOptionalTextIOTupleUnion)normpathsep)r	   cast)urljoinurlsplit
urlunsplitTF)BytesIOWrapperInputSourcePythonInputSourceStringInputSourceURLInputSourcecreate_input_sourcesourceMOptional[Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath]]fragment_idOptional[str]extract_all_scriptsOptional[bool]return#Tuple[Union[Dict, List[Dict]], Any]c                (   t          | t                    r	| j        dfS t          | t                    rvd}|                                 }d}t          |t
                    rst          t          t          t          t          f         |j                  }t          |t                    r|}n)t          |t                    r|                                }t          r|t          j        |          }nt          |t
                    r;|                                 }t          j        |                                          }ndt          j        |                                          }n=|t%          j        |          }n&t%          j        |                                           }||fS t)          | d          } 	 | j        }	n# t,          t.          f$ r d}	Y nw xY w|	duo|	                                dv }
|
rt3          ||          }nd}	 |                                 }n# t,          t.          f$ r d}Y nw xY w	 |                                 }n# t,          t.          f$ r d}Y nw xY w|!|t5          dt7          |                      	 |dn|                                 }n# t,          t.          f$ r d}Y nw xY wd}|]t          |t
                    rH|j        }t          |t                    r|}n)t          |t                    r|                                }	 |
r|||}nI||                                }n2t:          r|J |d}t=          ||                                          }|                    |           |                                 |!                                }}n6t          rd}|t          j        |          }n|t          |t
                    s|)|'t          j        |                                          }nt:          r|J t          j        |                                          }nd}|dt%          j        |          |&	 |"                                 n# t,          $ r Y nw xY w|&	 |"                                 S # t,          $ r Y S w xY wS ||}n t:          r|J |d}t=          ||          }t%          j        |          }||f|&	 |"                                 n# t,          $ r Y nw xY w|&	 |"                                 S # t,          $ r Y S w xY wS # |&	 |"                                 n# t,          $ r Y nw xY w|&	 |"                                 w # t,          $ r Y w w xY ww xY w)	a  Extract JSON from a source document.

    The source document can be JSON or HTML with embedded JSON script elements (type attribute = "application/ld+json").
    To process as HTML `source.content_type` must be set to "text/html" or "application/xhtml+xml".

    Args:
        source: the input source document (JSON or HTML)
        fragment_id: if source is an HTML document then extract only the script element with matching id attribute, defaults to None
        extract_all_scripts: if source is an HTML document then extract all script elements (unless fragment_id is provided), defaults to False (extract only the first script element)

    Returns:
        Tuple with the extracted JSON document and value of the HTML base element
    Nzjson-ld)format)z	text/htmlzapplication/xhtml+xml)r   r!   zLSource does not have a character stream or a byte stream and cannot be used zutf-8)encoding)#
isinstancer   datar   getByteStreamr   r   r   strr   r   wrappedgetvalue_HAS_ORJSONorjsonloadsgetCharacterStreamreadjsonloadr   content_typeAttributeErrorLookupErrorlowerHTMLJSONParser
ValueErrortypegetEncodingr	   r   feedget_jsonget_baseclose)r   r   r!   	html_baseb_streamoriginal_stringwrapped_inner	json_dictc_streamr5   is_htmlhtml_docparser
b_encodingunderlying_stringhtml_string
use_streams                   u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/rdflib/plugins/shared/jsonld/util.pysource_to_jsonrN   +   s   * &+,, !{D  &+,, $	 '')))-h// 	; sHj'@!A8CSTTM--- ;"/M844 ;"/"8"8":": 	C*"L99		Hn55 :!4466"L99		 #L99		* J77		 If&?&?&A&ABB	)## !	:::F*K(    $& <+=+=+?+? D ,G  3A#9L4
 4
 4
 ''))K(   ,,..K(   H,i[_`f[g[gii
 
 	
,4,<DD&BTBTBVBV

K(   


'+
8^ D D (mS)) 	9 -x00 	9 - 6 6 8 87 *	.~1 ,#4%&mmoo  0#///%!(J+HzJJJOOQQ,,,#1#:#:#<#<n>U>U>W>WyII 	.I ,"L):;;		%*X~*N*N%#&"L99		  0#///"L99		I ,z"344     !       !     # #%

  0#///%!(J*8jIII
	*--I)#    !       !          !       !     s   =G GGH$ $H:9H:>I I)(I)J) )J?>J?$D=T? $Q99
RRR!!
R.-R.2<T? 1T
TTT..
T;:T;?VUV
U%"V$U%%V+V ?V 
V
VVV)#/:irir+   Tuple[str, Optional[str]]c                    t           D ]9}|                     |          }|dk    r| d |dz            | |dz   d          fc S :| d fS )N   )VOCAB_DELIMSrfind)rR   delimats      rM   	split_irir[      sc     0 0YYu77xax=#b1fhh-//// 9    baseurlc                N   d|v r|S t          |           }t          |          }|j        r|S |j        dv rz|j                            dd          }dt	          |          dk    r|d         ndz   }t          ||j                  }|j        r
d|j         nd}|j         d|d          | | }nt          t          | |                    }	t          |	d	                   }
t          dk    r-d	                    |
                    t                              }
|	d	         
                    d          r|

                    d          s|
dz  }
t          |	dd	         |
fz   |	d
d         z             }|
                    d          r|
                    d          s|dz  }|S )a  
    ```python
    >>> norm_url('http://example.org/', '/one')
    'http://example.org/one'
    >>> norm_url('http://example.org/', '/one#')
    'http://example.org/one#'
    >>> norm_url('http://example.org/one', 'two')
    'http://example.org/two'
    >>> norm_url('http://example.org/one/', 'two')
    'http://example.org/one/two'
    >>> norm_url('http://example.org/', 'http://example.net/one')
    'http://example.net/one'
    >>> norm_url('http://example.org/', 'http://example.org//one')
    'http://example.org//one'

    ```
    z://)urnzurn-xrP   rV    rO   rQ   r         N)r   schemepathsplitlenr   fragmentr   r   joinendswithr   )r]   r^   parsed_base
parsed_urlbase_path_parts	base_pathjoined_pathrh   resultpartsre   s              rM   norm_urlrr      s   $ ||
 4..K#J 
---%*00a88_1E1E1I1I?1--rR	i990:0CK,z*,,,&TT);T[T(TTs++,,a!!#::88DJJsOO,,D8S!! 	$--*<*< 	CKDE!A#J$059<==
||C !5!5 #Mr\   r   c                   | j         dk    r{	 | j        }n# t          $ r Y dS w xY w|D ]`}d|v rX|                    d          |                    d          }}|dk    r(|dk    r"t	          | j        ||dz   |                   c S _dS dS )aL  
    Please note that JSON-LD documents served with the `application/ld+json` media type
    MUST have all context information, including references to external contexts,
    within the body of the document. Contexts linked via a
    http://www.w3.org/ns/json-ld#context HTTP Link Header MUST be
    ignored for such documents.
    application/ld+jsonNz+ rel="http://www.w3.org/ns/json-ld#context"<>rU   rV   )r5   linksr6   indexr   r^   )r   rw   linkijs        rM   context_from_urlinputsourcer|     s     333	LEE 	 	 	FF	  	@ 	@D<DDzz#

31r66a"ff"6:tAEAI????? 43	@ 	@s    
##)r3   rN   r[   rr   r|   r/   r.   c                  @     e Zd Z	 	 dd fdZd Zd	 ZddZd Z xZS )r9   NFr   r    r!   r"   c                    t                                                       || _        g | _        d| _        d| _        d | _        || _        d| _        d S )NFr   )	super__init__r   r3   contains_jsonfragment_id_does_not_matchr]   r!   script_count)selfr   r!   	__class__s      rM   r   zHTMLJSONParser.__init__$  sW    
 	& "	"*/'	#6 r\   c                    d| _         d| _        |dk    r=|D ]8\  }}|dk    r|dk    rd| _         |dk    r| j        r|| j        k    rd| _        9d S |dk    r|D ]\  }}|dk    r|| _        d S d S )	NFscriptr;   rt   Tidr]   href)r   r   r   r]   )r   tagattrsattrvalues        rM   handle_starttagzHTMLJSONParser.handle_starttag2  s    "*/' (??$ ; ;e6>>e/D&D&D)-D&&T\\d&6\5DDT;T;T6:D3	; ; F]]$ & &e6>> %DI ]& &r\   c                   | j         du r| j        du r| j        s| j        dk    rd S |                                dk    rd S t
          rt          j        |          }nt          j        |          }t          |t                    r| j                            |           n| j                            |           | xj        dz  c_        d S d S d S )NTFr   ra   rV   )r   r   r!   r   stripr.   r/   r0   r3   r(   listextendappend)r   r)   parseds      rM   handle_datazHTMLJSONParser.handle_dataD  s     %%$*IU*R*R+ 0AA0E0Ezz||r!!  *  d++D)) &$'' )	  ((((	  ((("1 &%*R*Rr\   r#   
List[Dict]c                    | j         S N)r3   r   s    rM   r>   zHTMLJSONParser.get_jsonb  
    yr\   c                    | j         S r   )r]   r   s    rM   r?   zHTMLJSONParser.get_basee  r   r\   NF)r   r    r!   r"   )r#   r   )	__name__
__module____qualname__r   r   r   r>   r?   __classcell__)r   s   @rM   r9   r9   #  s         &*.3      & & &$# # #<         r\   r9   r   )r   r   r   r    r!   r"   r#   r$   )rR   r+   r#   rS   )r]   r+   r^   r+   r#   r+   )r   r   r#   r    ).
__future__r   r3   pathlibhtml.parserr   ior   r   r   typingr   r	   r
   r   r   r   r   r   r   ImportError
simplejson	posixpathr   r   r   urllib.parser   r   r   r/   r.   rdflib.parserr   r   r   r   r   r   rN   rW   r[   rr   r|   __all__r9    r\   rM   <module>r      s   " " " " " "   " " " " " " 2 2 2 2 2 2 2 2 2 2 U U U U U U U U U U U U U U U U U U U U U U "KKKK"t " " "!!!!!!" $ # # # # # # # & & & & & & & & 6 6 6 6 6 6 6 6 6 6MMMKK   FKKK
                "&*/X X X X Xv    , , , ,`@ @ @ @.  C C C C CZ C C C C Cs#   A 	AA0A7 7	BB