
    `i]                        d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ e	rd dlmZmZmZmZmZmZ d dlmZ d dlZd d	lmZ d
ZdZ G d d          Zej        dgej        ddgej        ddgej        ddgiZi Ze                                 D ]\  Z!Z"e"D ]Z#e!ee#<   de$ddfdZ%de$de$fdZ&	 	 	 	 dFdZ'	 	 	 	 dFdZ(	 	 	 	 dFd Z)	 	 	 	 dFd!Z*	 	 	 	 dFd"Z+dGd%Z,dHd'd$d(e-dd$fd)Z.	 dHd*d+d,e$d-d$d(e-ddf
d.Z/d/e$de$fd0Z0dId3Z1dJd5Z2d6dd7e3ddfd8Z4d1d9d:e3de3fd;Z5	 	 	 	 dKd<Z6eefd1d9d:e3d=e3dd>fd?Z7efd1d@d*d$dAd$d=e3dd@f
dBZ8efd1d@d*d$dAd$d:e3dd@f
dCZ9dDe$ddfdEZ:dS )L    N)deque)deepcopy)	getsizeof)TYPE_CHECKING)BLOB_DATA_SUBSTITUTE)AnyCallableDictListOptionalTuple)Span)loggeri N  i'  c                       e Zd ZdZdZdZdZdS )GEN_AI_ALLOWED_MESSAGE_ROLESsystemuser	assistanttoolN)__name__
__module____qualname__SYSTEMUSER	ASSISTANTTOOL     g/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/sentry_sdk/ai/utils.pyr   r      s"        FDIDDDr   r   r   r   humanr   air   	tool_callurlreturnzTuple[str, str]c                     d| vrt          d          |                     dd          \  }}|                    d          r|dd         }n|}|                    d          d         }||fS )	u  
    Parse a data URI and return (mime_type, content).

    Data URI format (RFC 2397): data:[<mediatype>][;base64],<data>

    Examples:
        data:image/jpeg;base64,/9j/4AAQ... → ("image/jpeg", "/9j/4AAQ...")
        data:text/plain,Hello → ("text/plain", "Hello")
        data:;base64,SGVsbG8= → ("", "SGVsbG8=")

    Raises:
        ValueError: If the URL is not a valid data URI (missing comma separator)
    ,z)Invalid data URI: missing comma separator   data:   N;r   )
ValueErrorsplit
startswith)r#   headercontent	mime_part	mime_types        r   parse_data_urir2   +   s     #~~DEEEiiQ''OFG
 !! 122J			$$Q'Igr   r1   c                    | sdS |                                  }|                    d          rdS |                    d          rdS |                    d          rdS |                    d          s|                    d          rd	S dS )
a  
    Infer the content modality from a MIME type string.

    Args:
        mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3")

    Returns:
        One of: "image", "audio", "video", or "document"
        Defaults to "image" for unknown or empty MIME types.

    Examples:
        "image/jpeg" -> "image"
        "audio/mp3" -> "audio"
        "video/mp4" -> "video"
        "application/pdf" -> "document"
        "text/plain" -> "document"
    imagezimage/zaudio/audiozvideo/videozapplication/ztext/document)lowerr-   )r1   
mime_lowers     r   get_modality_from_mime_typer:   K   s    $  w""JX&& 	w			x	(	( w			x	(	( w			~	.	. *2G2G2P2P zwr   content_partDict[str, Any]Optional[Dict[str, Any]]c                    t          | t                    sdS |                     d          }|dk    rdS |                     d          }t          |t                    r|}n.t          |t                    r|                    dd          }ndS |sdS |                    d          r>	 t          |          \  }}dt          |          ||dS # t          $ r
 d	d
d|dcY S w xY wd	d
d|dS )a  
    Transform an OpenAI/LiteLLM content part to Sentry's standardized format.

    This handles the OpenAI image_url format used by OpenAI and LiteLLM SDKs.

    Input format:
    - {"type": "image_url", "image_url": {"url": "..."}}
    - {"type": "image_url", "image_url": "..."} (string shorthand)

    Output format (one of):
    - {"type": "blob", "modality": "image", "mime_type": "...", "content": "..."}
    - {"type": "uri", "modality": "image", "mime_type": "", "uri": "..."}

    Args:
        content_part: A dictionary representing a content part from OpenAI/LiteLLM

    Returns:
        A transformed dictionary in standardized format, or None if the format
        is not OpenAI image_url format or transformation fails.
    Ntype	image_urlr#    r(   blobr?   modalityr1   r/   urir4   r?   rD   r1   rE   )
isinstancedictgetstrr-   r2   r:   r+   )r;   
block_typeimage_url_datar#   r1   r/   s         r   transform_openai_content_partrM   m   sV   . lD)) t!!&))J[  t!%%k22N.#&& 	ND	)	)   ++t t ~~g 
	!/!4!4Iw7	BB&"	    	 	 	 #	    	 	
 
 	
s   *%C C$#C$c                    t          | t                    sdS |                     d          }|dvsd| vrdS |                     d          }t          |t                    sdS |                    d          }|                    dd          }|dk    rdnt          |          }|dk    rd	|||                    d
d          dS |dk    rd|||                    dd          dS |dk    rd|||                    dd          dS dS )a  
    Transform an Anthropic content part to Sentry's standardized format.

    This handles the Anthropic image and document formats with source dictionaries.

    Input format:
    - {"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}}
    - {"type": "image", "source": {"type": "url", "media_type": "...", "url": "..."}}
    - {"type": "image", "source": {"type": "file", "media_type": "...", "file_id": "..."}}
    - {"type": "document", "source": {...}} (same source formats)

    Output format (one of):
    - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
    - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
    - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}

    Args:
        content_part: A dictionary representing a content part from Anthropic

    Returns:
        A transformed dictionary in standardized format, or None if the format
        is not Anthropic format or transformation fails.
    Nr?   )r4   r7   source
media_typerA   r7   base64rB   datarC   r#   rE   rF   filefile_idr?   rD   r1   rT   rG   rH   rI   r:   )r;   rK   rO   source_typerP   rD   s         r    transform_anthropic_content_partrX      sd   4 lD)) t!!&))J...(,2N2Nth''Ffd## t**V$$KL"--J ## 	
(44  h #zz&"--	
 
 	
 
		 #::eR((	
 
 	
 
		 #zz)R00	
 
 	
 4r   c                    t          | t                    sdS d| v rj|                     d          }t          |t                    r>|                    dd          }dt          |          ||                    dd          dS dS d| v rj|                     d          }t          |t                    r>|                    dd          }d	t          |          ||                    d
d          dS dS dS )a  
    Transform a Google GenAI content part to Sentry's standardized format.

    This handles the Google GenAI inline_data and file_data formats.

    Input format:
    - {"inline_data": {"mime_type": "...", "data": "..."}}
    - {"file_data": {"mime_type": "...", "file_uri": "..."}}

    Output format (one of):
    - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
    - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}

    Args:
        content_part: A dictionary representing a content part from Google GenAI

    Returns:
        A transformed dictionary in standardized format, or None if the format
        is not Google format or transformation fails.
    Ninline_datar1   rA   rB   rR   rC   	file_datarE   file_urirF   rV   )r;   rZ   r1   r[   s       r   transform_google_content_partr]      s   . lD)) t $$"&&}55k4(( 	#R88I7	BB&&??6266	   t l"" $$[11	i&& 	!k266I7	BB& }}Z44	   t4r   c                 p   t          | t                    sdS |                     d          }|dvrdS d| v rdS |                     dd          }|dk    r|nd}d	| v rd
|||                     d	d          dS d| v rd|||                     dd          dS d| v rd|||                     dd          dS dS )a  
    Transform a generic/LangChain-style content part to Sentry's standardized format.

    This handles generic formats where the type indicates the modality and
    the data is provided via direct base64, url, or file_id fields.

    Input format:
    - {"type": "image", "base64": "...", "mime_type": "..."}
    - {"type": "audio", "url": "...", "mime_type": "..."}
    - {"type": "video", "base64": "...", "mime_type": "..."}
    - {"type": "file", "file_id": "...", "mime_type": "..."}

    Output format (one of):
    - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
    - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
    - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}

    Args:
        content_part: A dictionary representing a content part in generic format

    Returns:
        A transformed dictionary in standardized format, or None if the format
        is not generic format or transformation fails.
    Nr?   )r4   r5   r6   rS   rO   r1   rA   rS   r7   rQ   rB   rC   r#   rE   rF   rT   rU   )rG   rH   rI   )r;   rK   r1   rD   s       r   transform_generic_content_partr_   1  s(   6 lD)) t!!&))J<<<t <t  b11I'611zzzH < "#''"55	
 
 	
 
,		 "##E2..	
 
 	
 
l	"	" "#''	266	
 
 	
 4r   c                     t          | t                    sdS t          |           }||S t          |           }||S t	          |           }||S t          |           }||S dS )a  
    Transform a content part from various AI SDK formats to Sentry's standardized format.

    This is a heuristic dispatcher that detects the format and delegates to the
    appropriate SDK-specific transformer. For direct SDK integration, prefer using
    the specific transformers directly:
    - transform_openai_content_part() for OpenAI/LiteLLM
    - transform_anthropic_content_part() for Anthropic
    - transform_google_content_part() for Google GenAI
    - transform_generic_content_part() for LangChain and other generic formats

    Detection order:
    1. OpenAI: type == "image_url"
    2. Google: "inline_data" or "file_data" keys present
    3. Anthropic: type in ("image", "document") with "source" key
    4. Generic: type in ("image", "audio", "video", "file") with base64/url/file_id

    Output format (one of):
    - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
    - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
    - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}

    Args:
        content_part: A dictionary representing a content part from an AI SDK

    Returns:
        A transformed dictionary in standardized format, or None if the format
        is unrecognized or transformation fails.
    N)rG   rH   rM   r]   rX   r_   )r;   results     r   transform_content_partrb   w  s    @ lD)) t +<88F +<88F .l;;F ,L99F 4r   r/   r   c                 $   t          | t                    r| S t          | t          t          f          r\g }| D ]U}t          |t                    r)t          |          }|                    ||n|           @|                    |           V|S | S )a  
    Transform message content, handling both string content and list of content blocks.

    For list content, each item is transformed using transform_content_part().
    Items that cannot be transformed (return None) are kept as-is.

    Args:
        content: Message content - can be a string, list of content blocks, or other

    Returns:
        - String content: returned as-is
        - List content: list with each transformable item converted to standardized format
        - Other: returned as-is
    )rG   rJ   listtuplerH   rb   append)r/   transformeditemra   s       r   transform_message_contentri     s     '3 'D%=)) 	 	) 	)D$%% )/55""V-?66TJJJJ""4((((Nr   TrR   unpackc                    t          | d          rt          j        |           rd| j         dS 	 t	          |                                           S # t          $ rX}t          j        d|           t          | t          t          t          t          f          r| nt          |           cY d }~S d }~ww xY wt          | t                    rGr*t          |           dk    rt	          | d                   S t          fd| D                       S t          | t                     r fd	|                                 D             S t          | t          t          t          t          f          r| nt          |           S )
N
model_dumpz<ClassType: >rj   z+Could not convert pydantic data to JSON: %sr'   r   c              3   :   K   | ]}t          |           V  dS )rn   N_normalize_data).0xrj   s     r   	<genexpr>z"_normalize_data.<locals>.<genexpr>  s0      DD!OAf555DDDDDDr   c                 :    i | ]\  }}|t          |           S )rn   rp   )rr   kvrj   s      r   
<dictcomp>z#_normalize_data.<locals>.<dictcomp>  s,    PPP!Q?1V444PPPr   )hasattrinspectisclassr   rq   rl   	Exceptionr   warningrG   intfloatboolrJ   rd   lenrH   items)rR   rj   es    ` r   rq   rq     s   t\"" 
T ?4   	32$-2222	T"4??#4#4VDDDD 	T 	T 	TNH!LLL%dS%s,CDDS44#d))SSSSSS	T $ E 	;c$ii1nn"476::::DDDDtDDDDDD$ QPPPP4::<<PPPPdS%s$;<<K44#d))Ks   "A 
B7AB2,B72B7spanr   keyvaluec                     t          ||          }t          |t          t          t          t
          f          r|                     ||           d S |                     |t          j        |                     d S )Nrn   )	rq   rG   r~   r   r   rJ   set_datajsondumps)r   r   r   rj   
normalizeds        r   set_data_normalizedr     so     !v666J*sE4566 3c:&&&&&c4:j1122222r   rolec                 8    t                               | |           S )z
    Normalize a message role to one of the 4 allowed gen_ai role values.
    Maps "ai" -> "assistant" and keeps other standard roles unchanged.
    )GEN_AI_MESSAGE_ROLE_MAPPINGrI   )r   s    r   normalize_message_roler     s    
 '**4666r   messageslist[dict[str, Any]]c                     g }| D ]r}t          |t                    s|                    |           -|                                }d|v rt	          |d                   |d<   |                    |           s|S )z
    Normalize roles in a list of messages to use standard gen_ai role values.
    Creates a deep copy to avoid modifying the original messages.
    r   )rG   rH   rf   copyr   )r   normalized_messagesmessagenormalized_messages       r   normalize_message_rolesr     s    
  7 7'4(( 	&&w///$\\^^W)?)P)Pv&""#56666r   Callable[..., Any]c                  v    t          j                    } | d uo| j        d u}|rt           j        nt           j        S )N)
sentry_sdkget_current_spancontaining_transaction
start_spanstart_transaction)current_spantransaction_existss     r   get_start_span_functionr     sC    .00LD T\%HPT%T  %7X:  J<XXr   r   	max_charsc                     t          | t                    rd| vr| S | d         }t          |t                    rt          |          |k    r| S |d|         dz   | d<   | S )zy
    Truncate a message's content to at most `max_chars` characters and append an
    ellipsis if truncation occurs.
    r/   Nz...)rG   rH   rJ   r   )r   r   r/   s      r   +_truncate_single_message_content_if_presentr     ss     gt$$ 	(@(@i Ggs## s7||y'@'@ ),u4GINr   List[Dict[str, Any]]	max_bytesc                     d}t          t          |           dz
  dd          D ]P}t          t          j        | |         d                              d                    }||z  }||k    r|dz   c S QdS )z
    Find the index of the first message that would exceed the max bytes limit.
    Compute the individual message sizes, and return the index of the first message from the back
    of the list that would exceed the max bytes limit.
    r   r'   r&   :
separatorsutf-8)ranger   r   r   encode)r   r   running_sumidxsizes        r   _find_truncation_indexr   )  s     KS]]Q&B//  4:hsm
CCCJJ7SSTTt""7NNN # 1r   c                 4   d}| D ]}}t          |t                    s|                    d          }t          |t                    r7|D ]4}t          |t                    r|                    d          dk    rd} n5|r n~|s| S t	          |           }|D ]}t          |t                    s|                    d          }t          |t                    r=|D ]:}t          |t                    r#|                    d          dk    r
t
          |d<   ;|S )aP  
    Redact blob message parts from the messages by replacing blob content with "[Filtered]".

    This function creates a deep copy of messages that contain blob content to avoid
    mutating the original message dictionaries. Messages without blob content are
    returned as-is to minimize copying overhead.

    e.g:
    {
        "role": "user",
        "content": [
            {
                "text": "How many ponies do you see in the image?",
                "type": "text"
            },
            {
                "type": "blob",
                "modality": "image",
                "mime_type": "image/jpeg",
                "content": "data:image/jpeg;base64,..."
            }
        ]
    }
    becomes:
    {
        "role": "user",
        "content": [
            {
                "text": "How many ponies do you see in the image?",
                "type": "text"
            },
            {
                "type": "blob",
                "modality": "image",
                "mime_type": "image/jpeg",
                "content": "[Filtered]"
            }
        ]
    }
    Fr/   r?   rB   T)rG   rH   rI   rd   r   r   )r   	has_blobsr   r/   rh   messages_copys         r   redact_blob_message_partsr   9  sT   Z I 
 
'4(( 	++i((gt$$ 	  dD)) dhhv.>.>&.H.H $IE 	E	   X&&M ! ; ;'4(( 	++i((gt$$ 	; ; ;dD)) ;dhhv.>.>&.H.H&:DOr   max_single_message_charsz Tuple[List[Dict[str, Any]], int]c                    t          j        | d          }t          |                    d                    }||k    r| dfS t	          | |          }|t          |           k     r| |d         }nt          |           dz
  }| dd         }t          |          dk    r't          t          |d                   |          |d<   ||fS )	a  
    Returns a truncated messages list, consisting of
    - the last message, with its content truncated to `max_single_message_chars` characters,
      if the last message's size exceeds `max_bytes` bytes; otherwise,
    - the maximum number of messages, starting from the end of the `messages` list, whose total
      serialized size does not exceed `max_bytes` bytes.

    In the single message case, the serialized message size may exceed `max_bytes`, because
    truncation is based only on character count in that case.
    r   r   r   r   Nr'   r   r   )r   r   r   r   r   r   r   )r   r   r   serialized_jsoncurrent_sizetruncation_indextruncated_messagess          r   truncate_messages_by_sizer     s     jjAAAO--g6677Ly  {-h	BB#h--''%&6&7&78x==1,%bcc]
!## K'*++7O!
 !
 !
1 ///r   zOptional[List[Dict[str, Any]]]scopec                     | sd S t          |           } t          t          | d                   |          }t          |           dk    rt          |           |j        |j        <   |gS )Nr   r   r'   )r   r   r   r   _gen_ai_original_message_countspan_id)r   r   r   r   truncated_messages        r   truncate_and_annotate_messagesr     sv      t(22HC"*B   8}}q=@]],T\:r   c                     | sd S t          |           } t          | |          \  }}|dk    rt          |           |j        |j        <   |S )Nr   )r   r   r   r   r   )r   r   r   r   r   removed_counts         r   &truncate_and_annotate_embedding_inputsr     sY      t(22H(A(I(V(V%q=@]],T\:r   conversation_idc                 V    t          j                    }|                    |            dS )z/
    Set the conversation_id in the scope.
    N)r   get_current_scopeset_conversation_id)r   r   s     r   r   r     s,     (**E	o.....r   )r;   r<   r$   r=   )r/   r   r$   r   )T)r   r   r$   r   )r$   r   )r   r   r$   r   );rz   r   collectionsr   r   r   sysr   typingr   sentry_sdk._typesr   r   r	   r
   r   r   r   sentry_sdk.tracingr   r   sentry_sdk.utilsr   MAX_GEN_AI_MESSAGE_BYTES MAX_SINGLE_MESSAGE_CONTENT_CHARSr   r   r   r   r   #GEN_AI_MESSAGE_ROLE_REVERSE_MAPPINGr   r   target_rolesource_rolessource_rolerJ   r2   r:   rM   rX   r]   r_   rb   ri   r   rq   r   r   r   r   r~   r   r   r   r   r   r   r   r   r   r   <module>r      s                                   2 2 2 2 2 2 (AAAAAAAAAAAAAAAA''''''     # # # # # #! #)          !'( %'8 *[$,? %'<	' # ! !D!J!J!L!L ? ?K# ? ?3>#K00?  1    @3 3    DC
"C
C
 C
 C
 C
LD"DD D D DN4"44 4 4 4nC"CC C C CL8"88 8 8 8v   @L L% L L L L L L4 :>3 3
33#(3263	3 3 3 37 7 7 7 7 7   $Y Y Y Y*-   $%;  PS     L$LL L L Lb .$D!0 !0$!0!0 "!0 (	!0 !0 !0 !0P %E	 .
  "	
 &   0 .	 .
  	
 &   $/ / / / / / / /r   