
    .`i&                         d dl mZmZ d dlmZ d dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZ d dlm Z   G d d          Z!de"e#         dz  de#de#dede$e#e#f         f
dZ%de#dee#         de$e#e#f         dede#dz  f
dZ&de$e#e#f         dej'        deddfdZ(de#de#d e d!ed"e"e         d#e#d$ede)e$e*ef         e$e*ej+        f         f         fd%Z,dS )&    )CallableIterable)AnyN)tqdm)
VllmConfig)CUDAGraphMode)graph_captureis_global_first_rank)set_forward_context)AttentionMetadataBuilder)KVCacheConfig)build_attn_metadatabuild_slot_mappings_by_layer)BlockTables)make_num_tokens_across_dp)InputBuffersc                   b   e Zd Zdededej        fdZdefdZde	de
e	         de	d	z  fd
Zde	dej        dedej        d	z  dej        d	z  dedee         dedd	fdZ ej                    dej        dedej        d	z  dej        d	z  dedee         dedd	fd            Zde	dej        fdZd	S )CudaGraphManagervllm_config
uses_mropedevicec                    || _         |j        | _        || _        || _        |j        j        | _        | j        j        | _        | j        j        | _	        |j
        j        | _        |j        | _        | j        J |  | j        j        t          j        | _        n| j        j        | _        t#          | j        j        | j        | j	        | j                  | _        i | _        t*          j                                        | _        d | _        d S N)r   scheduler_configr   r   model_configmax_model_lenmax_num_seqsmax_num_reqsmax_num_batched_tokensmax_num_tokensparallel_configdata_parallel_sizedp_sizecompilation_configcudagraph_moder   NONEget_cudagraph_sizescudagraph_capture_sizescudagraph_sizesgraphstorchcudagraph_pool_handlepoolhidden_states)selfr   r   r   s       v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/gpu/cudagraph_utils.py__init__zCudaGraphManager.__init__   s     ' + <$(5C 1>"3J"2E"-"@&222*"19"/"4D"&"9"HD2#;	 
  
 8:J0022	26    returnc                 2    t          | j                  dk    S )Nr   )lenr)   )r0   s    r1   needs_capturezCudaGraphManager.needs_capture<   s    4'((1,,r3   num_tokens_after_paddingnum_tokens_per_requestNc                 :    t          ||| j        | j                  S r   )get_cudagraph_sizer)   r%   )r0   r8   r9   s      r1   r;   z#CudaGraphManager.get_cudagraph_size?   s(    
 "$" 	
 
 	
r3   
num_tokensmodelinput_buffersmrope_positionsinputs_embedsblock_tablesattn_metadata_builderskv_cache_configc	           	      z   t          || j                  }	|j        d |         }
|j        d |         }| j        r|J |d d d |f         }|
|d |         }t          |	||||| j        |          \  }}t          | j        |          }t          || j
        |t          j        ||          5   ||
||          }| j        t          j        |          | _        d d d            n# 1 swxY w Y   || j        vsJ t          j                                        }t          || j
        |t          j        ||          5  t          j                            || j                  5   ||
||          }|| j        d |<   d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   || j        |<   d S )N)r<   cudagraph_runtime_modenum_tokens_across_dpslot_mapping)	input_ids	positionsr@   )minr   rH   rI   r   prepare_inputs_to_capturer   r   r#   r   r   r   r&   r/   r+   
empty_liker*   r,   	CUDAGraphgraphr.   )r0   r<   r=   r>   r?   r@   rA   rB   rC   num_reqsrH   rI   attn_metadataslot_mappingsrF   r/   rN   s                    r1   capture_graphzCudaGraphManager.capture_graphK   s    z4#455!+KZK8	!+KZK8	? 	8"...';J;7I$)+:+6M'@"(
 (
$}  9zRR !!#0#5!5&
 
 
 	E 	E "E##+  M
 !)%*%5m%D%D"	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E" ,,,,
$$&& %'4'9%9*  	< 	< JUDI..	< 	< "E##+  M
 /<D{
{+!	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	<" #(JsH   //C**C.1C.&F&(FF&F	F&F	F&&F*-F*c                 V    t          | j        | j        | j        |||||||
  
         d S )N)r=   r>   r?   r@   rA   rB   rC   )capture_graphsr)   r   rR   )r0   r=   r>   r?   r@   rA   rB   rC   s           r1   capturezCudaGraphManager.capture   sK     	 K'+'%#9+	
 	
 	
 	
 	
 	
r3   c                     || j         v sJ | j         |                                          | j        J | j        d |         S r   )r*   replayr/   )r0   r<   s     r1   runzCudaGraphManager.run   sO    T[((((J&&(((!---!+:+..r3   )__name__
__module____qualname__r   boolr+   r   r2   r7   intr   r;   nnModuler   Tensorr   listr   r   rR   inference_moderU   rX    r3   r1   r   r      s       77 7 	7 7 7 7B-t - - - -

"%

 !)

 
t	

 

 

 

C(C( yC( $	C(
 ,C( |d*C( "C( !%%= >C( 'C( 
C( C( C( C(J U
y
 $
 ,	

 |d*
 "
 !%%= >
 '
 

 
 
 
./c /el / / / / / /r3   r   capture_sizesr   r    r%   r4   c                    |                                 si S | si S t          |           } |t          j        k    r|n|fd| D             } | si S i }t	          d| d         dz             D ]}| D ]}||k    r|||<    n|S )Nc                      g | ]
}|k    |S rc   rc   ).0xupper_bounds     r1   
<listcomp>z'get_cudagraph_sizes.<locals>.<listcomp>   s#    BBB1k1A1AQ1A1A1Ar3      )has_full_cudagraphssortedr   FULL_DECODE_ONLYrange)rd   r   r    r%   r)   irh   ri   s          @r1   r'   r'      s     --// 	 	=))M ];;; 	 
 CBBBBBBM 	&(O1mB'!+,,   	 	AAvv%&"  r3   num_tokens_after_dp_paddingr9   r)   c                     |                                 sd S |                    |           }|d S t          d |D                       }|r$|                                t          j        k    rd S |S )Nc              3   "   K   | ]
}|d k    V  dS )rk   Nrc   )rg   rh   s     r1   	<genexpr>z%get_cudagraph_size.<locals>.<genexpr>   s&      99Q1q5999999r3   )rm   getany
mixed_moder   FULL)rr   r9   r)   r%   sizeis_mixeds         r1   r;   r;      s     --// t:;;D|t99"899999H N--//=3EEEtKr3   r   
capture_fnc                    t          t          |                                           d          }t                      rt	          |d          }t          |          5  |D ]} ||fi | 	 d d d            d S # 1 swxY w Y   d S )NT)reversezCapturing CUDA graphs)desc)r   )rn   setvaluesr
   r   r	   )r)   r   r|   capture_kwargssizes_to_capturerz   s         r1   rT   rT      s     c/"8"8":":;;TJJJ P 07NOOO	f	%	%	% / /$ 	/ 	/DJt..~....	// / / / / / / / / / / / / / / / / /s    A==BBrO   r<   r>   rA   rB   r   rC   c                     | z  }t          j         dz   t           j                  |z  }||d<   t          j        |          }	|	|j        d  dz   <   ||j         dz   d <   |j        d  dz            }
||j        d  <   d|j         d <    fd|j        D             }|j        d d d |f         }t          ||          }t          | ||
|	|j        ||||
  
        }||fS )Nrk   )dtyperl   r   c                 $    g | ]}|d          S r   rc   )rg   rh   rO   s     r1   rj   z-prepare_inputs_to_capture.<locals>.<listcomp>  s!    PPP1!IXI,PPPr3   )
rB   rO   r<   query_start_loc_gpuquery_start_loc_cpuseq_lensmax_seq_lenrA   rQ   rC   )nparangeint32r+   
from_numpyquery_start_locr   input_block_tablesrQ   r   r   )rO   r<   r>   rA   rB   r   rC   num_tokens_per_reqquery_start_loc_npr   r   r   rQ   slot_mappings_by_layerrP   s   `              r1   rK   rK      sA    $x/8a<rx@@@CUU'r*+=>>4GM!.HqL.14>M!(Q,..1#3NhlNCO )3M9H9%()M899%PPPP0OPPP .qqq+:+~>M9  (5+/'!'#'  M 000r3   )-collections.abcr   r   typingr   numpyr   r+   torch.nnr^   r   vllm.configr   vllm.config.compilationr   vllm.distributed.parallel_stater	   r
   vllm.forward_contextr   vllm.v1.attention.backendr   vllm.v1.kv_cache_interfacer   vllm.v1.worker.gpu.attn_utilsr   r   vllm.v1.worker.gpu.block_tabler   vllm.v1.worker.gpu.dp_utilsr   vllm.v1.worker.gpu.input_batchr   r   ra   r]   dictr'   r;   r   rT   tuplestrr`   rK   rc   r3   r1   <module>r      s   / . . . . . . .                        " " " " " " 1 1 1 1 1 1 O O O O O O O O 4 4 4 4 4 4 > > > > > > 4 4 4 4 4 4        7 6 6 6 6 6 A A A A A A 7 7 7 7 7 7R/ R/ R/ R/ R/ R/ R/ R/j9t#  "	
 
#s(^   >!$$SM #s(^ "	
 	4Z   ,/#s(^/L/ /
 
/ / / / )1)1)1  )1 	)1
 !!9:)1 )1 #)1 4S>4U\ 1223)1 )1 )1 )1 )1 )1r3   