
     `i*<                     h   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	  G d de          Z
	 d dlmZ d dlmZmZmZ dZn# e$ r d	ZY nw xY wdd
Z	 ddd	dddeeeeee	eeegef         f         f                           fdZ ej        e          Z e             G d d                      ZdS )    N)Enum)AnyCallableOptionalUnionc                   .    e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
RequestStatusz5Status of a generation request through its lifecycle.pending
prefillingprefilling_splitsplit_pending_remainderdecodingfinishedfailedN)__name__
__module____qualname____doc__PENDING
PREFILLINGPREFILLING_SPLITSPLIT_PENDING_REMAINDERDECODINGFINISHEDFAILED     n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/utils/metrics.pyr	   r	      s9        ??GJ)7HHFFFr   r	   )metrics)Status
StatusCode
get_tracerTFc                 &     t           sd S  fd}|S )a  
    Decorator that attaches a tracer to a class.

    This decorator should be applied to classes that need OpenTelemetry tracing.
    It adds a tracer attribute to the class instance that can be used by the traced decorator.

    Args:
        tracer_name_template: Optional template string for the tracer name.
            If provided, it should contain {module} which will be replaced with the class's full module path
            and {class_name} for the class name.
            If None, a default naming scheme will be used where:
              - If the module already starts with "transformers.", it will use that directly
              - Otherwise, it will prepend "transformers." to the module name

    Returns:
        Class decorator function
    c                     | S Nr   )clss    r   <lambda>zattach_tracer.<locals>.<lambda>0   s    3 r   c                 j      j         t          j                   fd            }| _          S )Nc                      | g|R i | j         }j        }&|                    d          r| d| }n d| d| }n                    ||          }t	          |          | _        d S )Ntransformers..)module
class_name)r   r   
startswithformatr"   tracer)	selfargskwargsmodule_namer-   tracer_namer&   original_inittracer_name_templates	         r   init_with_tracerz:attach_tracer.<locals>.decorator.<locals>.init_with_tracer5   s    M$0000000.K)J#+))/:: M%0"?"?:"?"?KK"L+"L"L
"L"LKK299Yc9dd$[11DKKKr   )__init__	functoolswraps)r&   r8   r6   r7   s   ` @r   	decoratorz attach_tracer.<locals>.decorator2   sR    		'	'	2 	2 	2 	2 	2 	2 
(	'	2  (
r   )_has_opentelemetry)r7   r<   s   ` r   attach_tracerr>      s6    $      . r   )	span_name
standaloneadditional_attributesrA   c                4    fd}| |S  ||           S )a  
    Decorator to trace function calls with OpenTelemetry.

    Can be used as @traced or @traced(span_name="custom_name")

    Args:
        func: The function to trace
        span_name: Optional custom name for the span (defaults to function name)
        standalone: If True, creates a parentless span
        additional_attributes: Optional list of additional attributes to set on the span.
          Each item is a tuple of (instance_attribute_name, span_attribute_key, value_or_transform_function)
          where:
            - instance_attribute_name: Name of the attribute to get from the class instance
            - span_attribute_key: Key to use when setting the attribute on the span
            - value_or_transform_function: Either a raw value to use directly, or a function to transform
              the attribute value before setting it on the span

    Returns:
        Decorated function with tracing
    c                 `     t           s S t          j                    fd            }|S )Nc            
         | rt          d          rj        | d         nd }|d u}|rt          |d          r|j        }nt          dj         dj                   }pj        }r|j        n|j        } ||          5 }|                    dj                   |                    dj                   |                    d|           | rt          |           D ]\  }}	t          |	t          t          t          t          f          s|	'|                    d	| t          |	                     V|                    d	| t          t          |	                               |r|                                D ]\  }
}t          |t          t          t          t          f          s|'|                    d
|
 t          |                     V|                    d
|
 t          t          |                               r`|r^D ][}|\  }}}t          ||          rCt#          ||          }t%          |          r ||          }n|}|                    ||           \	  | i |}|cd d d            S # t&          $ rG}|                    t+          t,          j                             |                    |            d }~ww xY w# 1 swxY w Y   d S )N__self__r   r0   r*   r+   zfunction.namezfunction.modulezfunction.is_methodzargs.zkwargs.)hasattrrE   r0   r"   r   r   
start_spanstart_as_current_spanset_attribute	enumerate
isinstancestrintfloatbooltypeitemsgetattrcallable	Exception
set_statusr    r!   ERRORrecord_exception)r2   r3   instance	is_methodr0   namespan_fnspaniargkeyvalueattr_configinstance_attribute_namespan_attribute_keyvalue_or_transform_functionattribute_valuetransformed_valueresulterA   funcr?   r@   s                       r   wrapperz*traced.<locals>.decorator.<locals>.wrapperl   s   "&nGD*,E,En$-JctAwwjnH ,I WWXx88 W!#$UDO$U$Udm$U$UVV-D+5Wf''6;WG #$""?DMBBB""#4doFFF""#7CCC L"+D// L L3%cCeT+BCC Ls{ ..{q{{CHHEEEE ..{q{{CS		NNKKKK R&,llnn R R
U%ec3t-DEE R ..E

KKKK ..DKK@P@PQQQQ( 	VY 	V'< V Vcn`/1CE`"8-DEE V.5h@W.X.XO'(CDD P4O4OP_4`4` 1 14O 1 ../ACTUUU!T42622F!?# # # # # # # #@ !   OOF:+;$<$<===))!,,,A# # # # # # # # # #s2   G*K1	J
KAKKKK"K)r=   r:   r;   )ri   rj   rA   r?   r@   s   ` r   r<   ztraced.<locals>.decoratorh   sV    ! 	K			.	 .	 .	 .	 .	 .	 .	 
	.	` r   r   )ri   r?   r@   rA   r<   s    ``` r   tracedrk   L   sD    85 5 5 5 5 5 5n |9T??r   c                       e Zd ZdZdefdZd Zedede	ddfd	            Z
ed
eddfd            Zedd            Zedededdfd            Zedede	ddfd            ZdS )ContinuousBatchProcessorMetricsz0Metrics collection for ContinuousBatchProcessor.max_batch_tokensc                 <    || _         |                                  dS )zInitialize metrics for continuous batch processor.

        Args:
            max_batch_tokens: Maximum number of tokens in a batch
        N)rn   _setup_metrics)r1   rn   s     r   r9   z(ContinuousBatchProcessorMetrics.__init__   s$     !1r   c                 D   t           st                              d           dS t          j        d          | _        g d}| j                            ddd|          | _        | j                            d	d
d          | _	        | j                            ddd          | _
        g d}| j                            ddd|          | _        | j                            ddd          | _        | j                            ddd          | _        | j                            ddd          | _        g d}| j                            ddd|          | _        | j                            ddd           | _        | j                            d!d"d           | _        dS )#zIInitialize OpenTelemetry metrics and tracing if the library is available.zIOpenTelemetry is not installed. Metrics and tracing will not be recorded.Nz2transformers.generation.continuous_batch_processor)
      2   K   d         i,    i        '  ttft_millisecondsz#Time to first token in millisecondsms)rZ   descriptionunit#explicit_bucket_boundaries_advisoryactive_requests_countz3Number of active requests currently being processedrequests)rZ   r   r   waiting_requests_countz*Number of requests waiting to be processed)rt   rv      ry   rz   r{   r|   r}   i N  i0u  i`  request_latency_millisecondsz9End-to-end latency for completed requests in millisecondsdecode_prefill_ratioz3Ratio of decode tokens to prefill tokens in a batchratioprefill_tokens_processedz"Number of prefill tokens processedtokensdecode_tokens_processedz!Number of decode tokens processed)   rr         (   rt   <   F   P   Z   _   b   rv   batch_fill_percentagez5Percentage of max_batch_tokens utilized in each batchpercentkv_cache_free_memory_bytesz/Free memory of the PagedAttentionCache in bytesbyteskv_cache_memory_bytesz0Memory usage of the PagedAttentionCache in bytes)r=   loggerinfor   	get_metermetercreate_histogramttft_histogramcreate_gaugeactive_requests_gaugewaiting_requests_gaugerequest_latency_histogramdecode_prefill_ratio_gaugecreate_counterprefill_tokens_counterdecode_tokens_counterbatch_fill_percentage_histogramkv_cache_free_memory_gaugekv_cache_memory_gauge)r1   ttft_bucketslatency_bucketsbatch_fill_bucketss       r   rp   z.ContinuousBatchProcessorMetrics._setup_metrics   s    " 	KKcdddF&'[\\
 _^^"j99$=0<	 : 
 
 &*Z%<%<(M &= &
 &
" '+j&=&=)D '> '
 '
# \[[)-)D)D/S0?	 *E *
 *
& +/**A*A'M +B +
 +
' '+j&?&?+< '@ '
 '
# &*Z%>%>*; &? &
 &
" RQQ/3z/J/J(O0B	 0K 0
 0
, +/**A*A-I +B +
 +
' &*Z%<%<(J &= &
 &
"""r   created_time
request_idreturnNc                 .   t           sdS t          j                    |z
  dz  }	 | j                            |           t                              d| d|dd           dS # t          $ r(}t                              d|            Y d}~dS d}~ww xY w)zRecord Time to First Token (TTFT).

        Args:
            created_time: The time the request was created
            request_id: The ID of the request
        N     @@zRecorded TTFT for request : .2fr   zFailed to record TTFT metric: )r=   timer   recordr   debugrT   warning)r1   r   r   ttft_msrh   s        r   record_ttft_metricz2ContinuousBatchProcessorMetrics.record_ttft_metric  s     " 	F9;;-7	A&&w///LLSjSSGSSSSTTTTT 	A 	A 	ANN?A??@@@@@@@@@	A   <A" "
B,BBrequests_in_batchc                    t           r|sdS d}d}|D ]S}|j        t          j        k    r|dz  }|j        t          j        t          j        fv r|t          |j                  z  }T||z   }	 |dk    r| j        	                    |           |dk    r| j
        	                    |           |dk    r||z  }| j                            |           || j        z  dz  }| j                            |           t                               d| d| d|dd	| d
| j         d           dS # t$          $ r(}t                               d|            Y d}~dS d}~ww xY w)zRecord metrics about the batch composition including decode/prefill ratio and batch fill percentage.

        Args:
            requests_in_batch: List of request states in the current batch
        Nr      g      Y@zBatch metrics: z decode tokens, z prefill tokens, batch fill: r   z% (/)z Failed to record batch metrics: )r=   statusr	   r   r   r   len
prompt_idsr   addr   r   setrn   r   r   r   r   rT   r   )	r1   r   decode_tokensprefill_tokensstatetotal_batch_tokensr   fill_percentagerh   s	            r   record_batch_metricsz4ContinuousBatchProcessorMetrics.record_batch_metrics  s    " 	): 	F& 	8 	8E|}555"-":M<Z![[[#e&6"7"77*^;	C!!+//???q  *..}===!!%6/33E:::1D4IIURO077HHHLLe- e e e e.de e7Ie eLPLae e e      	C 	C 	CNNAaAABBBBBBBBB	Cs   ,B<D* *
E4EEc                 4   t           sdS 	 |j        |j        z  }||j        j        z  }dt          |j                  z  |j        z  |z  }|                                }|j	        |z
  }||z  }||z  }| j
                            |           | j                            |           t                              d|dz  dd| d|j	         d||j	        z  d	z  d
d	           dS # t          $ r(}	t                              d|	            Y d}	~	dS d}	~	ww xY w)a&  Record memory usage of the PagedAttentionCache without GPU synchronization.

        This calculates the theoretical memory usage based on cache configuration
        and the number of blocks currently in use.

        Args:
            cache: The PagedAttentionCache object to measure
        N   zKV Cache memory: i   r   zMB, Used blocks: r   z (rv   z.1fz%)z*Failed to record KV cache memory metrics: )r=   head_dimnum_key_value_headsdtypeitemsizer   	key_cache
block_sizeget_num_free_blocks
num_blocksr   r   r   r   r   rT   r   )
r1   cache	page_sizepage_mem_in_bytesblock_mem_in_bytesfree_blocksused_blocksused_memory_bytesfree_memory_bytesrh   s
             r   record_kv_cache_memory_metricsz>ContinuousBatchProcessorMetrics.record_kv_cache_memory_metricsB  s    " 	F	M)BBI )EK,@ @ "#S%9%9!9E<L!LO`!`  3355K*[8K !,.@ @ +.@ @ &**+<===+//0ABBBLLA$5$EO A A +A A.3.>A A%"22S8@A A A    
  	M 	M 	MNNKKKLLLLLLLLL	Ms   CC% %
D/DDactive_requestswaiting_requestsc                 .   t           sdS 	 | j                            |           | j                            |           t                              d| d| d           dS # t          $ r(}t                              d|            Y d}~dS d}~ww xY w)zRecord metrics about active and waiting requests.

        Args:
            active_requests: Number of active requests
            waiting_requests: Number of waiting requests
        NzQueue metrics: z active requests, z waiting requestsz Failed to record queue metrics: )r=   r   r   r   r   r   rT   r   )r1   r   r   rh   s       r   record_queue_metricsz4ContinuousBatchProcessorMetrics.record_queue_metricsj  s     " 	F	C&**?;;;'++,<===LLq?qqN^qqqrrrrr 	C 	C 	CNNAaAABBBBBBBBB	Cs   AA" "
B,BBc                 .   t           sdS t          j                    |z
  dz  }	 | j                            |           t                              d| d|dd           dS # t          $ r(}t                              d|            Y d}~dS d}~ww xY w)zRecord metrics about a completed request.

        Args:
            created_time: The time the request was created
            request_id: The ID of the request
        Nr   z Recorded request completion for r   r   r   z,Failed to record request completion metric: )r=   r   r   r   r   r   rT   r   )r1   r   r   
latency_msrh   s        r   record_request_completionz9ContinuousBatchProcessorMetrics.record_request_completion|  s     " 	FikkL0F:
	O*11*===LL\J\\*\\\\]]]]] 	O 	O 	ONNM!MMNNNNNNNNN	Or   )r   N)r   r   r   r   rM   r9   rp   rk   rN   rL   r   listr   r   r   r   r   r   r   rm   rm      sZ       ::    O
 O
 O
b Au A# A$ A A A VA$ &Cd &Ct &C &C &C V&CP %M %M %M V%MN CC C3 CSW C C C VC" Oe O OQU O O O VO O Or   rm   r%   )r:   loggingr   enumr   typingr   r   r   r   r	   opentelemetryr   opentelemetry.tracer    r!   r"   r=   ImportErrorr>   r   tuplerL   rk   	getLoggerr   r   rm   r   r   r   <module>r      s               1 1 1 1 1 1 1 1 1 1 1 1	 	 	 	 	D 	 	 	%%%%%%BBBBBBBBBB   , , , ,` 
U _cU U U
 $DsCsHcUTWZDX?X9Y/Y)Z$[\U U U Up 
	8	$	$ fO fO fO fO fO fO fO fO fO fOs   A A
A