
    .`i3                         d dl Zd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ  ee          Z G d d          Z G d	 d
          ZdS )    N)get_dcp_groupget_pcp_group)init_loggercdiv)CpuGpuBuffer)get_total_cp_world_sizec                      e Zd Zdedededededej        dedefd	Zd
ee         deddfdZ	d
ee         deddfdZ
dededdfdZdededdfdZdej        dej        ddfdZdeddfdZdeddfdZd&dZedej        dedej        dej        fd            Zdedej        fd Zdej        fd!Zdej        fd"Zd#eej        z  d$ej        defd%ZdS )'
BlockTable
block_sizemax_num_reqsmax_num_blocks_per_reqmax_num_batched_tokens
pin_memorydevicekernel_block_sizecp_kv_cache_interleave_sizec	                    || _         || _        || _        || _        ||k    r|| _        d| _        d| _        n7||z  dk    rt          d| d| d          || _        ||z  | _        d| _        || j        z  | _        | 	                    | j         | j        t          j                  | _        t          j        |t          j                  | _        | 	                    | j        t          j                  | _        | j        r4t          j        d| j                                      dd	          | _        nd
| _        	 t+                      j        | _        t+                      j        | _        n# t4          $ r d| _        d| _        Y nw xY w	 t7                      j        | _        t7                      j        | _        n# t4          $ r d| _        d| _        Y nw xY w|| _        d
S )a  
        Args:
            block_size: Block size used for KV cache memory allocation
            max_num_reqs: Maximum number of concurrent requests supported.
            max_num_blocks_per_req: Maximum number of blocks per request.
            max_num_batched_tokens: Maximum number of tokens in a batch.
            pin_memory: Whether to pin memory for faster GPU transfers.
            device: Target device for the block table.
            kernel_block_size: The block_size of underlying attention kernel.
                Will be the same as `block_size` if `block_size` is supported
                by the attention kernel.
           Fr   zkernel_block_size z( must divide kv_manager_block_size size z evenlyT)dtypeN)r   r   r   r   r   blocks_per_kv_blockuse_hybrid_blocks
ValueErrorr   _make_buffertorchint32block_tablenpzerosnum_blocks_per_rowint64slot_mappingarangereshape_kernel_block_aranger   
world_sizepcp_world_sizerank_in_grouppcp_rankAssertionErrorr   dcp_world_sizedcp_rankr   )	selfr   r   r   r   r   r   r   r   s	            n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/block_table.py__init__zBlockTable.__init__   s0   . )&<#$
** )DO'(D$%*D"" --22 F): F F2<F F F  
 0DO'15F'FD$%)D"&<t?W&W#,,t:%+ - 
 
 #%(<rx"H"H"H --'u{ . 
 
 ! 	-(*	!T5M(N(N(V(V2) )D%% )-D%	"///"<D)OO9DMM 	 	 	"#DDMMM		"///"<D)OO9DMM 	 	 	"#DDMMM	 ,G(((s$   0E4 4FF0G GG	block_idsrow_idxreturnNc                    |sd S | j         r3|                     t          j        |          | j        | j                  }t          |          }| j        |         }| j        |xx         |z  cc<   || j        j        ||||z   f<   d S N)	r   map_to_kernel_blocksr   arrayr   r&   lenr!   r   )r.   r1   r2   
num_blocksstarts        r/   
append_rowzBlockTable.append_rowd   s    
  	F! 	11##T%=t?X I ^^
'0(((J6(((CLGUUZ-?%??@@@    c                 F    d| j         |<   |                     ||           d S Nr   )r!   r;   )r.   r1   r2   s      r/   add_rowzBlockTable.add_rowv   s)    +,(	7+++++r<   srctgtc                 r    | j         |         }| j        j        }||d |f         ||d |f<   || j         |<   d S r5   r!   r   r   )r.   r@   rA   r9   block_table_nps        r/   move_rowzBlockTable.move_rowz   sN    ,S1
),+9#{
{:J+KsKZK'('1$$$r<   c                 ~    ||g||g}}| j         |         | j         |<   | j        j        |         | j        j        |<   d S r5   rC   )r.   r@   rA   src_tgttgt_srcs        r/   swap_rowzBlockTable.swap_row   sI    :Sz+/+B7+K('+'7':7'CG$$$r<   req_indices	positionsc                    | j         | j        z  }| j        | j        z  | j        z   }|dk    r| j        |z  }|| j        z  ||z  z   }| j        j                                        |         }||z  }|| j	        z  |z  |k    }	||| j	        z  z  | j	        z  || j	        z  z   }
|| j        z  |
z   }t          j
        |	|d          | j        j        d |j        d         <   d S || j        z  || j        z  z   }| j        j                                        |         }|| j        z  }
t          j        || j        z  |
| j        j        d |j        d                             d S )Nr   r   r   )out)r(   r,   r*   r-   r   r   r   r   ravelr   wherer#   shapeadd)r.   rJ   rK   total_cp_world_sizetotal_cp_rankvirtual_block_sizeblock_table_indicesblock_numbersvirtual_block_offsetsmaskblock_offsetsr#   s               r/   compute_slot_mappingzBlockTable.compute_slot_mapping   s    #1D4GG(;;dmK"" "&3F!Fd99112  
 !,/55778KLM %.0B$B!%34%& !!  &'$*JJL23 ($*JJK  )4?:]JL;=8lB< <D !7;#4Q#7!7888
 d99I<XX   !,/55778KLM%7MF/%()?;+<Q+?)?@     r<   num_reqsc                 :    | j                             |           d S r5   )r   copy_to_gpur.   r[   s     r/   commit_block_tablezBlockTable.commit_block_table   s    $$X.....r<   
num_tokensc                 :    | j                             |           d S r5   )r#   r]   )r.   r`   s     r/   commit_slot_mappingzBlockTable.commit_slot_mapping   s    %%j11111r<   c                     | j         j                            d           | j         j                            d           d S r>   )r   gpufill_cpur.   s    r/   clearzBlockTable.clear   s<    ""1%%%""1%%%%%r<   kv_manager_block_idsr   kernel_block_arangec                 t    |dk    r| S |                      dd          |z  |z   }|                     d          S )u5  Convert kv_manager_block_id IDs to kernel block IDs.

        Example:
            # kv_manager_block_ids: 32 tokens,
            # Kernel block size: 16 tokens
            # blocks_per_kv_block = 2
            >>> kv_manager_block_ids = np.array([0, 1, 2])
            >>> Result: [0, 1, 2, 3, 4, 5]

            # Each kv_manager_block_id maps to 2 kernel block id:
            # kv_manager_block_id 0 → kernel block id [0, 1]
            # kv_manager_block_id 1 → kernel block id [2, 3]
            # kv_manager_block_id 2 → kernel block id [4, 5]
        r   r   )r%   )ri   r   rj   kernel_block_idss       r/   r6   zBlockTable.map_to_kernel_blocks   sS    ( !##'' !((Q//2EE!" 	
  ''+++r<   c                 *    | j         j        d|         S )z-Returns the device tensor of the block table.N)r   rd   r^   s     r/   get_device_tensorzBlockTable.get_device_tensor   s    #IXI..r<   c                     | j         j        S )z*Returns the CPU tensor of the block table.)r   rf   rg   s    r/   get_cpu_tensorzBlockTable.get_cpu_tensor   s    ##r<   c                     | j         j        S )z+Returns the numpy array of the block table.)r   r   rg   s    r/   get_numpy_arrayzBlockTable.get_numpy_array   s    ""r<   sizer   c                2    t          ||| j        | j        dS )N)r   r   r   )r   r   r   )r.   r   rs   s      r/   r   zBlockTable._make_buffer   s(     t{t
 
 
 	
r<   r3   N)__name__
__module____qualname__intboolr   r   r0   listr;   r?   rE   rI   r   ndarrayrZ   r_   rb   rh   staticmethodr6   Tensorrn   rp   rr   SymIntr   r   r    r<   r/   r   r      s       QGQG QG !$	QG
 !$QG QG QG QG &)QG QG QG QGfM9M M 
	M M M M$,c ,S ,T , , , ,2C 2c 2d 2 2 2 2DC Dc Dd D D D D
:::24*:	: : : :x/3 /4 / / / /2c 2d 2 2 2 2& & & & , j, ,  Z, 
	, , , \,:/# /%, / / / /$ $ $ $ $# # # # #
5<'
05
	
 
 
 
 
 
r<   r   c                   d   e Zd ZdZ	 	 d#dededededej        d	ee         d
ee         dee         dz  deddfdZ	de
ee         df         deddfdZde
ee         df         deddfdZdededdfdZdededdfdZdej        dej        ddfdZdeddfdZdeddfdZd$dZd edd!fd"ZdS )%MultiGroupBlockTablez(The BlockTables for each KV cache group.Nr   r   max_model_lenr   r   r   block_sizeskernel_block_sizesmax_num_blocksr   r3   c
                   	
 t          |          t          |          k    r0t          dt          |           dt          |           d          |t                      

fd|D             }t          |          t          |          k    r0t          dt          |           dt          |           d          	fdt          |||          D             | _        d S )Nzkernel_block_sizes length (z!) must match block_sizes length ()c                 6    g | ]}t          |z            S r   r   ).0r   r   rR   s     r/   
<listcomp>z1MultiGroupBlockTable.__init__.<locals>.<listcomp>  s9        ]J1D$DEE  r<   zmax_num_blocks length (c                 D    g | ]\  }}}t          |||          S r   )r   )	r   r   r   r   r   r   r   r   r   s	       r/   r   z1MultiGroupBlockTable.__init__.<locals>.<listcomp>"  sV     
 
 
 F
-/E &&!+	 	
 
 
r<   )r8   r   r	   zipblock_tables)r.   r   r   r   r   r   r   r   r   r   rR   s    `````   `@r/   r0   zMultiGroupBlockTable.__init__   s{    !""c+&6&666Fc2D.E.E F F25k2B2BF F F   !
 #:";";    "-  N
 ~#k"2"222F#n*=*= F F25k2B2BF F F  

 
 
 
 
 
 
 
 JM/J J
 
 
r<   r1   .r2   c                 r    t          | j                  D ]!\  }}|                    ||         |           "d S r5   )	enumerater   r;   r.   r1   r2   ir   s        r/   r;   zMultiGroupBlockTable.append_row2  sG    '(9:: 	: 	:NA{""9Q<9999	: 	:r<   c                 r    t          | j                  D ]!\  }}|                    ||         |           "d S r5   )r   r   r?   r   s        r/   r?   zMultiGroupBlockTable.add_row6  sG    '(9:: 	7 	7NA{	!g6666	7 	7r<   r@   rA   c                 F    | j         D ]}|                    ||           d S r5   )r   rE   r.   r@   rA   r   s       r/   rE   zMultiGroupBlockTable.move_row:  7    , 	+ 	+K  c****	+ 	+r<   c                 F    | j         D ]}|                    ||           d S r5   )r   rI   r   s       r/   rI   zMultiGroupBlockTable.swap_row>  r   r<   rJ   rK   c                 F    | j         D ]}|                    ||           d S r5   )r   rZ   )r.   rJ   rK   r   s       r/   rZ   z)MultiGroupBlockTable.compute_slot_mappingB  s=      , 	E 	EK,,[)DDDD	E 	Er<   r[   c                 D    | j         D ]}|                    |           d S r5   )r   r_   )r.   r[   r   s      r/   r_   z'MultiGroupBlockTable.commit_block_tableH  s5    , 	5 	5K**84444	5 	5r<   r`   c                 D    | j         D ]}|                    |           d S r5   )r   rb   )r.   r`   r   s      r/   rb   z(MultiGroupBlockTable.commit_slot_mappingL  s5    , 	8 	8K++J7777	8 	8r<   c                 B    | j         D ]}|                                 d S r5   )r   rh   )r.   r   s     r/   rh   zMultiGroupBlockTable.clearP  s3    , 	  	 K	  	 r<   idxr   c                     | j         |         S )z3Returns the BlockTable for the i-th KV cache group.)r   )r.   r   s     r/   __getitem__z MultiGroupBlockTable.__getitem__T  s     %%r<   )Nr   ru   )rv   rw   rx   __doc__ry   rz   r   r   r{   r0   tupler;   r?   rE   rI   r   r|   rZ   r_   rb   rh   r   r   r<   r/   r   r      s        22 ,0+,0
 0
0
 0
 !$	0

 0
 0
 #Y0
 !I0
 S	D(0
 &)0
 
0
 0
 0
 0
d:E$s)S.$9 :C :D : : : :7tCy#~!6 7 7 7 7 7 7+C +c +d + + + ++C +c +d + + + +E:E24*E	E E E E53 54 5 5 5 58c 8d 8 8 8 8       &s &| & & & & & &r<   r   )numpyr   r   vllm.distributedr   r   vllm.loggerr   vllm.utils.math_utilsr   vllm.v1.utilsr   vllm.v1.worker.cp_utilsr	   rv   loggerr   r   r   r<   r/   <module>r      s         9 9 9 9 9 9 9 9 # # # # # # & & & & & & & & & & & & ; ; ; ; ; ;	X		j
 j
 j
 j
 j
 j
 j
 j
ZY& Y& Y& Y& Y& Y& Y& Y& Y& Y&r<   