
     `iZ!                        d dl Z d dlmZmZ d dlmZ d dlmZ d dlZddl	m
Z
 ddlmZ  e
j        d          Zd	eej        eeef         fd
Z G d de          Ze G d d                      Ze G d d                      ZdS )    N)	dataclassfield)Enum)Optional   )logging)tracedContinuousBatchingLoggerreturnc                     t           j                                        rt          j        d          } t           j                                         t           j                                         t           j                            |           j        }t           j                            |           }t           j        	                    |           }nt           j
        j                                        ryt           j
        j                                        rVt          j        d          } t           j                                        }|t           j                                        z
  }d}nt          j        d          } d }d}d}| |||fS )Ncudampsr   cpu)torchr   is_availabledeviceempty_cachesynchronizeget_device_propertiestotal_memorymemory_reservedmemory_allocatedbackendsr   is_builtdriver_allocated_memoryrecommended_max_memory)r   r   reserved_memoryallocated_memorys       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/generation/continuous_batching/requests.pyget_device_and_memory_breakdownr       s7   z   f%%
   
   z77??L*44V<< :66v>>			(	(	*	* 
u~/A/J/J/L/L 
e$$y88::'%)*J*J*L*LLe$$<2BBB    c                   .    e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
RequestStatusz5Status of a generation request through its lifecycle.pending
prefillingprefilling_splitsplit_pending_remainderdecodingfinishedfailedN)__name__
__module____qualname____doc__PENDING
PREFILLINGPREFILLING_SPLITSPLIT_PENDING_REMAINDERDECODINGFINISHEDFAILED r!   r   r#   r#   5   s9        ??GJ)7HHFFFr!   r#   c                      e Zd ZU dZeed<    ee          Zee	         ed<    ee          Z
ee	         ed<    ee          Zee         ed<   dZee         ed<   ej        Zeed	<    eej                  Zeed
<   dS )GenerationOutputa5  Tracks the output of a generation request.

    Attributes:
        request_id (str): The ID of the generation request.
        prompt_ids (list[int]): The IDs of the prompt tokens.
        generated_tokens (list[int]): The generated tokens.
        logprobs (list[float]): The log probabilities of the generated tokens.
        error (Optional[str]): Any error message associated with the request. When None, the request was successful.
        status (RequestStatus): The status of the request.
        created_time (float): The time the request was created.
    
request_iddefault_factory
prompt_idsgenerated_tokenslogprobsNerrorstatuscreated_time)r+   r,   r-   r.   str__annotations__r   listr<   intr=   r>   floatr?   r   r#   r/   r@   timerA   r6   r!   r   r8   r8   A   s         
 
 OOO!E$777JS	777"'%"="="=d3i===!E$777Hd5k777E8C=)1FM111%	:::L%:::::r!   r8   c                      e Zd ZU dZeed<   dZeee	                  ed<   dZ
eee	                  ed<    ee          Zee	         ed<    ee          Zee	         ed<   d	Ze	ed
<   d	Ze	ed<   ej        Zeed<   dZe	ed<   dZe	ed<    eej                  Zeed<   dZee         ed<   dZeeef         ed<   edefd            Zej        defd            Zd Zde	fdZde	fdZ e!de	de"fd            Z#d Z$d Z%dS ) RequestStateay  Tracks the state of a generation request through its lifecycle.

    Attributes:
        request_id (str): The ID of the generation request.
        full_prompt_ids (list[int] | None): The tokens IDs of the full prompt.
        prompt_ids (list[int] | None): The tokens IDs currently being processed.
        remaining_prompt_ids (list[int]): The tokens IDs remaining to be processed (for split requests).
        static_outputs (list[int]): The generated tokens.
        allocated_blocks (int): The number of blocks allocated to the request.
        position_offset (int): The current position in the sequence for position_ids.
        status (RequestStatus): The status of the request: can be one of PENDING, PREFILLING, PREFILLING_SPLIT,
                                SPLIT_PENDING_REMAINDER, DECODING, FINISHED, FAILED
        max_new_tokens (int): The maximum number of new tokens to generate.
        eos_token_id (int): The ID of the end-of-sequence token.
        created_time (float): The time the request was created.
        error (Optional[str]): Any error message associated with the request. When None, has had no error yet.
    r9   Nfull_prompt_idsr<   r:   remaining_prompt_idsstatic_outputsr   allocated_blocksposition_offset_status   max_new_tokenseos_token_idrA   r?   )rR   rR   lifespanr   c                     | j         S )N)rO   selfs    r   r@   zRequestState.status{   s
    |r!   valuec                    | j         t          j        k    rt          j                    df| _        nI|t          j        k    r9| j        d         t          j                    f| _        |                                  || _         d S )NrR   r   )rO   r#   r/   rG   rT   r4   log_end_of_request)rW   rX   s     r   r@   zRequestState.status   sj    <=000!Y[["-DMMm,,,!]1-ty{{;DM##%%%r!   c                    t          | j                  }|                                 }| j        d         | j        z
  }| j        d         | j        z
  }t
                              d| j         d|d|d|d|
           d S )Nr      zRequest z finished: prefill_len = z decode_len = z start_time = z end_time = )lenrJ   generated_lenrT   rA   loggerinfor9   )rW   prefill_len
decode_len
start_timeend_times        r   rZ   zRequestState.log_end_of_request   s    $.//''))
]1%(99
=#d&77qtqq;qqJqqT^qqdlqq	
 	
 	
 	
 	
r!   c                     | j         S )zCGet the current length of the sequence (prompt + generated tokens).)rN   rV   s    r   current_lenzRequestState.current_len   s    ##r!   c                 *    t          | j                  S )z*Get the number of tokens generated so far.)r]   rL   rV   s    r   r^   zRequestState.generated_len   s    4&'''r!   token_idc                    | j         t          j        k    rdS || j        k    o
| j        dk    }|                                 | j        k    }|r|r| j                            |g           |s|rt          j        | _         dS dS )zUpdate the request with a newly generated token and check for completion.

        Args:
            token_id: The token ID to add to the output sequence

        Returns:
            bool: True if the request is now complete, False otherwise
        FrR   T)	r@   r#   r3   rS   r^   rQ   rL   extendr4   )rW   rh   is_eos
is_max_lens       r   update_with_tokenzRequestState.update_with_token   s     ;-0005T..J43D3J''))T-@@
  	36 	3&&z222 	Z 	'0DK4ur!   c           
      F   d| j          d| j         d|                                  dt          | j                   dt          | j                   d| j         dt          | j                   d| j         d	| j	         g	}d
d
                    |          z   dz   S )Nzrequest_id=zstatus=zout_tokens=zquery_length=zremaining_tokens=z
kv_length=zfull_prompt_length=zallocated_blocks=zgenerated_tokens=zRequestState(
	z,
	z
))r9   rO   r^   r]   r<   rK   rN   rJ   rM   rL   join)rW   msgs     r   __repr__zRequestState.__repr__   s    +$/++$dl$$0$,,..002C0022@D$= > >@@/-//=#d&:";";==7 5775 355

 #W\\#%6%66>>r!   c                 ^    t          | j        | j        | j        | j        g | j                  S )z7Convert the request state to a GenerationOutput object.)r9   r<   r@   r=   r>   r?   )r8   r9   rJ   r@   rL   r?   rV   s    r   to_generation_outputz!RequestState.to_generation_output   s7    +;!0*
 
 
 	
r!   )&r+   r,   r-   r.   rB   rC   rJ   r   rD   rE   r<   r   rK   rL   rM   rN   r#   r/   rO   rQ   rS   rG   rA   rF   r?   rT   tuplepropertyr@   setterrZ   rf   r^   r	   boolrm   rq   rs   r6   r!   r   rI   rI   X   s         & OOO+/OXd3i(///&*Jc#***&+eD&A&A&A$s)AAA %d ; ; ;NDI;;;cOS*2G]222NCL#%	:::L%:::E8C=$,HeE5L!,,,    X ]M    ]
 
 
$S $ $ $ $(s ( ( ( (
 # $    V4? ? ?	
 	
 	
 	
 	
r!   rI   )rG   dataclassesr   r   enumr   typingr   r   utils.loggingr   utils.metricsr	   	getLoggerr_   rt   r   rE   r    r#   r8   rI   r6   r!   r   <module>r~      sd    ( ( ( ( ( ( ( (              $ $ $ $ $ $ # # # # # # 
	5	6	6Cu|S#s/J)K C C C C,	 	 	 	 	D 	 	 	 ; ; ; ; ; ; ; ;, s
 s
 s
 s
 s
 s
 s
 s
 s
 s
r!   