
    .`i9                        d dl mZ d dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZmZmZ d dlmZ d dlm Z  d dl!m"Z" e G d d                      Z# G d d          Z$dS )    )	dataclass)castN)LoRARequest)MultiModalFeatureSpec)PoolingParams)SamplingParamsSamplingType)&length_from_prompt_token_ids_or_embeds)swap_dict_values)LogprobsTensors)PoolingMetadataPoolingStates)BatchUpdateBuilderLogitsProcessorsMoveDirectionality)SamplingMetadata)
copy_slice)MultiGroupBlockTablec                      e Zd ZU eed<   ee         dz  ed<   ee         ed<   edz  ed<   e	j
        dz  ed<   eee         df         ed<   eed	<   ee         ed
<   dZe	j        dz  ed<   dZedz  ed<   dZe	j        dz  ed<   dZedz  ed<   dZe	j        dz  ed<   dZeed<   dZedz  ed<   dZedz  ed<   d Zedefd            ZdedefdZdS )CachedRequestStatereq_idNprompt_token_idsmm_featuressampling_params	generator.	block_idsnum_computed_tokensoutput_token_idsmrope_positionsmrope_position_deltaxdrope_positionslora_requestprompt_embedsr   prev_num_draft_lenpooling_paramspooling_statesc                 |    t          | j        | j                  | _        | j        t                      | _        d S d S N)r
   r   r#   num_prompt_tokensr%   r   r&   selfs    r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/gpu_input_batch.py__post_init__z CachedRequestState.__post_init__8   sD    !G!4#5"
 "
 *"///D +*    returnc                 :    | j         t          | j                  z   S r(   )r)   lenr   r*   s    r,   
num_tokenszCachedRequestState.num_tokens@   s    %D,A(B(BBBr.   idxc                     || j         k     r'| j        t          d| d          | j        |         S || j         z
  t          | j                  k     r| j        || j         z
           S dS )NzTried to access token index zG, but that token was provided via prompt_embeds, and its ID is unknown.)r)   r   
ValueErrorr1   r   )r+   r3   s     r,   get_token_idzCachedRequestState.get_token_idD   s    '''$, I3 I I I   (--''#d.C*D*DDD(t/E)EFFrr.   )__name__
__module____qualname__str__annotations__listintr   r   torch	Generatortupler   Tensorr    r!   r"   r   r#   r$   r%   r   r&   r   r-   propertyr2   r7    r.   r,   r   r      s        KKK3i$&&&&+,,,,#d****%%%%T#Y^$$$$3i+/OU\D(///'+#*+++,0elT)000'+L+$+++)-M5<$&---   ,0NMD(///+/NMD(///2 2 2 CC C C C XC
 
 
 
 
 
 
 
r.   r   c                   H   e Zd Z	 	 	 	 	 	 d9dedededej        ded	ed
ee         dee         dee         dz  dedz  dedededefdZ	e
dee         fd            ZdddefdZdddefdZdedeeee         f         ddfdZdededz  fdZdededdfdZd:d Zd! Zdefd"Zdee         fd#Zdee         fd$Zdefd%Zdej        fd&Zd'e j!        d(e j!        de"e"ed)f         e"ed)f         e#e$         f         fd*Z%d+ej        d,ej&        ddfd-Z'd:d.Z(d/eee                  ddfd0Z)e
defd1            Z*e
defd2            Z+e
defd3            Z,e
defd4            Z-e
defd5            Z.e
defd6            Z/e
dedz  fd7            Z0e
defd8            Z1dS );
InputBatchNF   max_num_reqsmax_model_lenmax_num_batched_tokensdevice
pin_memory
vocab_sizeblock_sizeskernel_block_sizesmax_num_blocks_per_reqlogitsprocs!logitsprocs_need_output_token_idsis_spec_decodeis_pooling_modelcp_kv_cache_interleave_sizec                    || _         || _        || _        || _        || _        || _        || _        || _        g | _        i | _	        t          j        ||fdt          j        d          | _        | j                                        | _        t          j        ||fdt           d          | _        | j                                        | _        i | _        t)          j        |t(          j                  | _        t)          j        |t(          j                  | _        t          j        |fdt          j        |          | _        | j                                        | _        t3          ||||||||	|	  	        | _        t          j        |ft          j        |          | _        t          j        |ft          j        d|          | _        | j                                        | _        tA                      | _!        tA                      | _"        t          j        |ft          j        |          | _#        t          j        |ft          j        d|          | _$        | j$                                        | _%        tA                      | _&        t          j        |ft          j        |          | _'        t          j        |ft          j        d|          | _(        | j(                                        | _)        tA                      | _*        t          j        |ft          j+        |          | _,        t          j        |ft          j+        d|          | _-        | j-                                        | _.        tA                      | _/        t          j        |ft          j+        |          | _0        t          j        |ft          j+        d|          | _1        | j1                                        | _2        tA                      | _3        t          j        |ft          j+        |          | _4        t          j        |ft          j+        d|          | _5        | j5                                        | _6        tA                      | _7        t          j8        |ft          j9        d|          | _:        | j:                                        | _;        t)          j        | j        ft(          j9                  | _<        i | _=        i | _>        i | _?        i | _@        i | _A        t                      | _C        tA                      | _D        d | _E        d | _F        i | _G        t)          j        |t                     | _H        g | _I        |
pt                      | _K        || _L        d t          |          D             | _N        | O                                | _P        i | _Q        i | _R        d | _S        d | _T        d | _U        d | _V        d S )	NcpuFrK   dtyperL   )rY   )	rH   rI   rJ   rL   rK   rN   rO   max_num_blocksrU   rY   rK   )rY   rK   rL   c                     g | ]}g S rD   rD   ).0_s     r,   
<listcomp>z'InputBatch.__init__.<locals>.<listcomp>   s    /P/P/Pq/P/P/Pr.   )WrT   rS   rH   rI   rJ   rK   rL   rM   _req_idsreq_id_to_indexr?   zerosint32token_ids_cpu_tensornumpytoken_ids_cpuboolis_token_ids_tensoris_token_idsreq_prompt_embedsnpnum_tokens_no_specr)   num_computed_tokens_cpu_tensornum_computed_tokens_cpur   block_tableemptyfloat32temperaturetemperature_cpu_tensortemperature_cpusetgreedy_reqsrandom_reqstop_ptop_p_cpu_tensor	top_p_cpu
top_p_reqstop_ktop_k_cpu_tensor	top_k_cpu
top_k_reqsfloatfrequency_penaltiesfrequency_penalties_cpu_tensorfrequency_penalties_cpufrequency_penalties_reqspresence_penaltiespresence_penalties_cpu_tensorpresence_penalties_cpupresence_penalties_reqsrepetition_penaltiesrepetition_penalties_cpu_tensorrepetition_penalties_cpurepetition_penalties_reqsonesint64num_accepted_tokens_cpu_tensornum_accepted_tokens_cpurequest_lora_mappinglora_id_to_request_idslora_id_to_lora_request
generatorsnum_logprobsin_progress_prompt_logprobs_cpur   batch_update_builderhas_allowed_token_idsallowed_token_ids_mask!allowed_token_ids_mask_cpu_tensorbad_words_token_ids!logits_processing_needs_token_idsreq_output_token_idsr   rQ   rR   rangespec_token_ids_make_sampling_metadatasampling_metadatar%   r&   prev_sampled_token_idsprev_req_id_to_indexsampled_token_ids_cpuasync_copy_ready_event)r+   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   s                  r,   __init__zInputBatch.__init__R   s   " !1,(*&<#$$*,/1 %*K=)+	%
 %
 %
! "6<<>>#(;=)%tPU$
 $
 $
  !4::<< ;="$(<rx"H"H"H!#,bh!G!G!G.3kO+!	/
 /
 /
+ (,'J'P'P'R'R$ 0%'#9!#11(C

 

 

 !;O5=
 
 
 ',kO5=:'
 '
 '
#  $:@@BB%(UU%(UU[,fUUU
 %O5=:!
 !
 !
 .4466$'EE[,FSSS
 %O5;u!
 !
 !
 .4466$'EE $);O5;v$
 $
 $
  /4kO5;u/
 /
 /
+ (,'J'P'P'R'R$25%%% #(+O5;v#
 #
 #
 .3[O5;u.
 .
 .
* '+&H&N&N&P&P#14$ %*KO5;v%
 %
 %
! 05{O5;u0
 0
 0
, )-(L(R(R(T(T%3655& /4jO5;u/
 /
 /
+ (,'J'P'P'R'R$ %'Hd.?-A$R$R$R!;=#?A$
 79,. LN,
 %7$8$8! 03uu" <@#FJ. @B 13,d1S1S1S.<>! '<*:*<*<1R. 0Q/PE,<O<O/P/P/P "&!=!=!?!? 9;8: <@#;?! ;?":>###r.   r/   c                 L    t          t          t                   | j                  S r(   )r   r=   r;   r`   r*   s    r,   req_idszInputBatch.req_ids  s     DIt}---r.   requestr   c                     | j                                         x}| j        }|| j        k     sJ d| j         _        |j        r2| j         j                            ||j        |j        |j	        f           |S )zfTrack add-request operations for logits processors.
        Not applicable to pooling models.
        NT)
r   pop_removednum_reqsrH   batch_changedr   addedappendr   r   )r+   r   new_req_indexs      r,   _register_add_requestz InputBatch._register_add_request  s     "6BBDDDMM MMt0000026!/" 
	 %+22!+,,	   r.   c                 
   |                      |          }|j        }|t          | j                  k    rT| j                            |           | j                            |j                   | j                            g            n8|| j        |<   |j        | j        |<   | j        |                                          || j	        |<   t          |j        |j                  }|| j        |<   |}|t          |j                  z   }|j        "|j        | j        |d |f<   d| j        |d |f<   nd| j        |d |f<   |j        |j        | j        |<   |j        | j        |||f<   d| j        |||f<   |j        | j        |<   |j        | j        |<   | j                            |j        |           |j        x}r~|j        t4          j        k    r%d| j        |<   | j                            |           n)|j        | j        |<   | j                             |           |j!        | j"        |<   |j!        dk     r| j#                            |           |j$        }d|cxk     r| j%        k     rn n| j&                            |           n| j%        }|| j'        |<   |j(        | j)        |<   |j(        dk    r| j*                            |           |j+        | j,        |<   |j+        dk    r| j-                            |           |j.        | j/        |<   |j.        dk    r| j0                            |           |j1        |j1        | j2        |<   |j3        !|j3        dk    r| j%        n|j3        | j4        |<   |j5        r| j6                            |           | j7        gtq          j9        | j:        | j%        tp          j;        | j<                  | _=        tq          j9        | j:        | j%        tp          j;        d	          | _7        d| j7        |<   d| j7        |         |j5        <   |j>        r|j>        | j>        |<   nG|j?        x}	r/|j@        }
|
J |	| j?        |<   |
| j@        |<   |	jA        | jB        |<   nt          d
          d| jD        |<   |jE        rj|jE        jF        }|| jG        vrt                      | jG        |<   || jI        |<   | jG        |                             |j                   |jE        | jJ        |<   n
d| jI        |<   |S )NTFg        rG   r   g      ?r5   r[   rW   zUnrecognized request type)Kr   r   r1   r`   r   r   r   r   clearra   r
   r   r#   r)   rf   ri   rj   r2   rl   r   rn   ro   add_rowr   r   sampling_typer	   GREEDYrt   rv   addrr   rw   rx   rz   r{   r|   rM   r   r~   frequency_penaltyr   r   presence_penaltyr   r   repetition_penaltyr   r   r   r   logprobsr   allowed_token_idsr   r   r?   rb   rH   rg   rK   r   r   r%   r&   requires_token_idsr   NotImplementedErrorr   r"   lora_int_idr   ru   r   r   )r+   r   	req_indexr   r)   	start_idxend_idxr   r|   r%   r&   lora_ids               r,   add_requestzInputBatch.add_request0  s    ..w77	DM****M  (((%,,W-EFFF&&r****'-DM)$3:3KD%i0	*00222'0V$ C$g&;
 
 ->y)%	c'":;;;#/@G@XDy*<+<*<<=?CDi);*;);;<<?DDi);*;);;< ,070ED"9-;B;S9i&778:>)Yw%667-4-?	*292M$Y/  !2I>>>%55? O	C,0CCC25$Y/ $$V,,,,2A2M$Y/ $$V,,,(7(=DN9%$q((##F+++#)E5****4?*****##F++++(-DN9%6E6WD(30C77-11&9995D5UD'	2/366,008882 ))4 1S88.226:::  ,-4->	*'3 '/255 OO(1 !&) 0 *..v6669A 38+)#j#{	3 3 3D/ >C[)#j$	> > >D: EI6yA  6yA#5 2 #7 (3  '55^ 
	C$3N!---*8D'*8D'1 29== &&ABBB 34$Y/  
	5*6Gd9997:uu+G43:D%i0'044W^DDD4;4HD(11 45D%i0r.   scheduled_spec_tokensc                 :   |j         }| j        |         }| j        |         }|                                 |                    |d          }t          |          }||_        |sd S | j        |         }||z   }	|| j        |||	f<   |	                    |           d S )NrD   )
r   ra   r   r   getr1   r$   rl   rf   extend)
r+   r   r   r   r   cur_spec_token_idsr   num_spec_tokensstart_indexend_token_indexs
             r,   update_req_spec_token_idsz$InputBatch.update_req_spec_token_ids  s     (0	!0; 	  """.2262>>n--%4" 	F
 -i8%7ES9k/&AAB!!.11111r.   r   c                    | j                             |d          }|dS | j                            |           d| j        |<   d| j        |<   | j        |                                          | j        |         }|dk    r>| j	        |         }|
                    |           |s| j	        |= | j        |= d| j        |<   | j        r8| j                            |d           | j                            |d           |S | j        
                    |           | j        
                    |           | j        
                    |           | j        
                    |           | j        
                    |           | j        
                    |           | j        
                    |           | j                            |d           | j                            |d           | j                            |d           | j        | j                            |d           | j        
                    |           | j         | j        |                             d           | j                            |d           |S )zThis method must always be followed by a call to condense().

        Args:
          req_id: request to remove

        Returns:
          Removed request index, or `None` if `req_id` not recognized
        Nr   F)ra   popr   removed_appendr`   r   r   r   r   r   discardr   rT   r%   r&   rv   rw   r{   r   r   r   r   r   r   r   r   r   r   fill_r   )r+   r   r   r   lora_req_idss        r,   remove_requestzInputBatch.remove_request  s~    (,,VT::	4!00;;;#'i /3!),I&,,... +I6a<<6w?L  ((( :/80934D%i0  	##FD111##FD111  (((  (((''''''%--f555$,,V444&..v666It,,,fd+++,00>>>$0%))&$777"**62221=29=CCEJJJ $$Y555r.   i1i2c                    | j         |         }| j         |         }| j         |         | j         |         c| j         |<   | j         |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   ||J | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |df                                         }| j        |df         | j        |df<   || j        |df<   | j	        ||gdf         | j	        ||gdf<   | j
                            |          }| j
                            |          }||| j
        |<   n| j
                            |d            ||| j
        |<   n| j
                            |d            | j                            ||           | j        |         | j        |         c| j        |<   | j        |<   | j        rd S | j        j                            ||t(          j        f           | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   t;          | j        ||           t;          | j        ||           | j         .| j         |         | j         |         	 c| j         |<   | j         |<   d S d S )N.)!r`   r   r   ra   rl   r)   rn   rf   copyri   rj   r   r   ro   swap_rowr   rT   r   movedr   r   SWAPrt   rz   r~   r   r   r   r   r   r   r   r   )r+   r   r   	old_id_i1	old_id_i2tmp	embeds_i1	embeds_i2s           r,   swap_stateszInputBatch.swap_states  s   M"%	M"%	/3}R/@$-PRBS,b4=,%b)%b) 	E!"%t'@'D
 ## 	9B!4R!8 $)>)>> + + 	IY')=i)H
 #B'#B' 	A#T%<R%@
 "2&"2& 	?r"D$:2$>
 (,(, 	K$R($*Fr*J  S)..00&*&8S&A2s7#&)2s7#+/+<b"Xs]+K2r(C-( *..r22	*..r22	 )2D"2&&"&&r4000 )2D"2&&"&&r4000!!"b))) %b)%b) 	E!"%t'@'D
   	F 	!'..B8J8O/PQQQ  $ $ 	;R $"6r": 261CT^TVEW.rDN2.151CT^TVEW.rDN2.(,(, 	K$R($*Fr*J
 '+'+ 	I#B')DR)H
 )"-)"- 	M%b)4+H+L
 (,(, 	K$R($*Fr*J
 	"b11112r:::1=
 6r:6r:6r:6r::: >=r.   c                    | j         }| j        j        x}sdS |dk    rM| j                                         | j                                         | j                                         dS |t          |          z   dz
  }|r ||v r	|dz  }||v 	| j                                        }|J ||k    rn| j        	                                 | j        |         }| j        |         }|J || j        |<   d| j        |<   || j        |<   d| j        |<   || j
        |<   | j        |         t          | j        |                   z   }| j        |         | j        |         c| j        |<   | j        |<   | j        |                                          | j        |d|f         | j        |d|f<   | j        |d|f         | j        |d|f<   || j        v r"| j                            |          | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j                            ||           | j        |         | j        |<   | j        r|dz  }| j        j                            ||t0          j        f           | j        |         | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j         |         | j         |<   | j!                            |d          }|
|| j!        |<   | j"        | j"        |         | j"        |<   | j#                            |d          }	|	
|	| j#        |<   |dz  }| | j        |d= | j        |d= | j        |d= dS )a0  Slide non-empty requests down into lower, empty indices.

        Any consecutive empty indices at the very end of the list are not
        filled.

        Returns:
          swaps: list of (from,to) swap tuples for moved requests
          empty_req_indices: indices not filled by condensation
        Nr   rG   )$r   r   removedr`   r   r   r   r1   peek_removedr   ra   rl   rf   ri   rj   r   r)   rn   ro   move_rowr   rT   r   r   r   UNIDIRECTIONALrt   rz   r~   r   r   r   r   r   r   r   )
r+   r   empty_req_indiceslast_req_indexempty_indexr   r   r2   r   r   s
             r,   condensezInputBatch.condenser  s    =%)%>%FF! 	 Fq==M!!!%++---%%'''F "C(9$:$::Q> a	  $555!# !$555 3@@BBK***n,, %11333]>2F#8H%%%)/DM+&,0DM.)5ED%k28<D%n5+6D (0@3#N3D D J
 #K0#N3 TT 0$2Ek2R /55777;?;M+<D{KZK78 ;?:K+;Dk;J;67 !7776:6L6P6P"7 7&{3 483J4D#K0 372H2XD";/8<8T9D(5 %%nkBBB595N6D%k2 $ !# %+22.@.OP   150D^0TD -*..*HDN;'*..*HDN;'8<8T9D(5 8<7R8D'4 :>9V:D)+6 9=8T9D(5 ++NDAAI$/8, 5A:>J 6{C #'":">">~t"T"T".8K(5 aNC   a	 H M())$%hii0		***r.   c                 @   | j         r6| j                                        }|r|                                 | _        dS | j                            | j                  }| j        j        D ]}|	                    |           |r|                                 | _        dS dS )z-Apply any batch updates to sampling metadata.N)
rT   r   resetr   r   get_and_resetr   rQ   allupdate_state)r+   r   batch_update
logit_procs       r,   refresh_metadatazInputBatch.refresh_metadata  s       	 5;;==M H)-)E)E)G)G&F
 0>>t}MM*. 	2 	2J##L1111 	D%)%A%A%C%CD"""	D 	Dr.   c                    | j         }| j        st          | j        | j        |          }nd }| j        st          | j        | j        |           | j        st          | j	        | j
        |           | j        sQt          | j        | j        |           t          | j        | j        |           t          | j        | j        |           | j         p | j        d |                                         }|r|                                 nd }| j         pt+          | j                  p| j        }|r0t1          t2          t2          t4                            | j                  ng }d }| j        s3| j        J t          | j        | j        |           | j        d |         }t?          di d|d| j        d| j         d| j        rd n| j        d |         d| j        rd n| j
        d |         d| j!        d| j"        d|d	| j        d |         d
| j        d |         d| j        d |         d|d| j#        d| j        d|d| j        d| j$        S )Nrr   
all_greedy
all_randomrx   r|   r   max_num_logprobsr   r   r   r   r   r   no_penaltiesr   r   rQ   rD   )%r   r   r   rs   rr   no_top_pry   rx   no_top_kr}   r|   r   r   r   r   r   r   r   r   any_make_prompt_token_ids_tensorrg   r   rR   r   r=   r>   r   no_allowed_token_idsr   r   r   r   r   r   r   rQ   )r+   r   rr   needs_prompt_token_idsr   needs_output_token_idsr   r   s           r,   r   z"InputBatch._make_sampling_metadata  s>   = 	$+T-=x KK K} 	Dt,dj(CCC} 	Dt,dj(CCC  	 3T5Mx   2D4KX   4)   !! G5ixi@DDFF 	 5KTD..000PT 	 !! 6D,--65 	 &Dd3i$";<<< 	 7;( 	L.:::6+  
 &*%@(%K" 
 
 
#

 
 -B$$TZ		-B	

 -B$$TZ		-B
 
 "22
 .-
 !% 8( C C
  $6yyAA
 "&!:9H9!E!E
 .-
  ..
 **
 $:#9
  !% 8 8!
" ((#
 	
r.   c                      t           j                  t           j                  k    sJ  fd j        D             S )Nc                 *    g | ]}j         |         S rD   )r%   r]   r   r+   s     r,   r_   z1InputBatch.get_pooling_params.<locals>.<listcomp>\  !    GGG#F+GGGr.   )r1   r   r%   r*   s   `r,   get_pooling_paramszInputBatch.get_pooling_paramsZ  E    4<  C(;$<$<<<<<GGGG$,GGGGr.   c                      t           j                  t           j                  k    sJ  fd j        D             S )Nc                 *    g | ]}j         |         S rD   )r&   r  s     r,   r_   z1InputBatch.get_pooling_states.<locals>.<listcomp>`  r  r.   )r1   r   r&   r*   s   `r,   get_pooling_stateszInputBatch.get_pooling_states^  r  r.   c                     |                                  }|                                 }t          t          j        | j        d | j                           | j        j        ||          S )N)prompt_lensr   r%   r&   )	r  r  r   r?   
from_numpyr)   r   r   r   )r+   r%   r&   s      r,   get_pooling_metadatazInputBatch.get_pooling_metadatab  sf    00220022()?$-)PQQ!3D))	
 
 
 	
r.   c                    | j         }| j        d |                                         }t          j        | j         |fdt          j        | j                  }|                                }| j        d |d |f         |d d <   t          |          D ]}| j
        ||| j        |         d f<   |                    | j        d          S )NrW   rX   T)rK   non_blocking)r   r)   maxr?   rp   r   rL   re   rf   r   rM   torK   )r+   r   max_prompt_lenprompt_token_ids_cpu_tensorr   is         r,   r   z(InputBatch._make_prompt_token_ids_tensorm  s    =/		:>>@@&+k]N++	'
 '
 '
# 7<<>>"0(O^O1KL x 	O 	OA?CQ 6q 9 ; ;;<<*--T[t-TTTr.   num_scheduled_tokensnum_sampled_tokens.c                    | j         d| j                 }t          |                    |                    }t          |                    |                    }t	          | j                                                  }|||fS )a<  
        Given the num_scheduled_tokens for each request in the batch, return
        datastructures used to activate the current LoRAs.
        Returns:
            1. prompt_lora_mapping: A tuple of size np.sum(num_sampled_tokens)
               where, prompt_lora_mapping[i] is the LoRA id to use for the ith
               sampled token.
            2. token_lora_mapping: A tuple of size np.sum(num_scheduled_tokens)
               where, token_lora_mapping[i] is the LoRA id to use for ith token.
            3. lora_requests: Set of relevant LoRA requests.
        N)r   r   rA   repeatru   r   values)r+   r  r  req_lora_mappingprompt_lora_mappingtoken_lora_mappingactive_lora_requestss          r,   make_lora_inputszInputBatch.make_lora_inputs~  s      4_t}_E#$4$;$;<N$O$OPP"#3#:#:;O#P#PQQ14(//112
 2
 #$68LLLr.   r   r   c                 Z    | j         j        r|| _        || _        dS d| _        d| _        dS )z
        In async scheduling case, store ref to sampled_token_ids_cpu
        tensor and corresponding copy-ready event. Used to repair
        output_token_ids prior to sampling, if needed by logits processors.
        N)r   r   r   r   )r+   r   r   s      r,   set_async_sampled_token_idsz&InputBatch.set_async_sampled_token_ids  s>     !2 	/)>D&*@D''')-D&*.D'''r.   c                 f   | j         j        }| j        |sdS | j        J d}t	          | j                  D ]\  }}| j                            |          }|"||         }|r|d         dk    r9|;| j        J | j                                         | j        	                                }||         }|s|d         dk    rt          |          n|                    d          }|                    d          }	t          |          |	z
  }
t          ||
          }||d= |	|z   }|||	|<   dS )z
        In async scheduling case, update output_token_ids in sampling metadata
        from prior steps sampled token ids once they've finished copying to CPU.
        This is called right before they are needed by the logits processors.
        Nr5   )r   r   r   r   	enumerater   r   r   synchronizetolistr1   indexmin)r+   r   sampled_token_idsr#  r   
prev_indexr   new_idsnum_sampled_idsfirst_placeholdernum_placeholdersnum_to_replace	end_indexs                r,   update_async_output_token_idsz(InputBatch.update_async_output_token_ids  s     1B%-5E-F(444 &t|44 	H 	HME6266v>>J!#3E#: ' +?+Cr+I+I  (2>>>+77999$($>$E$E$G$G!!2:!>G .5bkR.?.?c'lllW]]SUEVEVO !5 : :2 > >"#788;LL 2BCCN()N:I@G !29!<==3	H 	Hr.   draft_token_idsc                 8   |r| j         sdS | j        j        x}~t          | j        |          D ]j\  }}|ra| j                             |          }|E||         }|r;|t          |          d= |                                 |                    |           idS dS )z
        In async scheduling case, update spec_token_ids in sampling metadata with
        real draft token ids from prior step. This is called right before they are
        needed by the rejection sampler for penalty/bad_words computation.
        N)	r   r   r   zipr   r   r1   r   r   )r+   r.  r   r   spec_idsr&  	draft_idss          r,   update_async_spec_token_idsz&InputBatch.update_async_spec_token_ids  s      	d&? 	F"4CCNP$'n$E$E 7 7  7!%!:!>!>v!F!FJ!-$3J$?	$ 7 )#h--// :$NN,,,$OOI666 QP7 7r.   c                 *    t          | j                  S r(   )r1   ra   r*   s    r,   r   zInputBatch.num_reqs  s    4'(((r.   c                 2    t          | j                  dk    S Nr   )r1   rw   r*   s    r,   r   zInputBatch.all_greedy      4#$$))r.   c                 2    t          | j                  dk    S r6  )r1   rv   r*   s    r,   r   zInputBatch.all_random  r7  r.   c                 2    t          | j                  dk    S r6  )r1   r{   r*   s    r,   r   zInputBatch.no_top_p      4?##q((r.   c                 2    t          | j                  dk    S r6  )r1   r   r*   s    r,   r   zInputBatch.no_top_k  r:  r.   c                     t          | j                  dk    o/t          | j                  dk    ot          | j                  dk    S r6  )r1   r   r   r   r*   s    r,   r   zInputBatch.no_penalties  sL     ,--2 9D122a79D233q8	
r.   c                 `    | j         r&t          | j                                                   nd S r(   )r   r  r  r*   s    r,   r   zInputBatch.max_num_logprobs   s,    262CMs4$++--...Mr.   c                 2    t          | j                  dk    S r6  )r1   r   r*   s    r,   r   zInputBatch.no_allowed_token_ids  s    4-..!33r.   )NNFFFrG   )r/   N)2r8   r9   r:   r>   r?   rK   rg   r=   r   r   rC   r;   r   r   r   r   dictr   r   r   r   r   r   r   r   r  r   r  r   r  rB   r   rk   ndarrayrA   ru   r   r  Eventr  r-  r3  r   r   r   r   r   r   r   r   rD   r.   r,   rF   rF   Q   s        48/327$!&+,|? |?|? |? !$	|?
 |? |? |? #Y|? !I|? !%S	D 0|? &,|? ,0|? |? |? &)|? |? |? |?| .c . . . X.
-A c    4I%I 
I I I IV2)2BFsDQTI~BV2	2 2 2 244S 4S4Z 4 4 4 4lec es et e e e eN@+ @+ @+ @+DD D D$R
)9 R
 R
 R
 R
hHD$7 H H H HHD$7 H H H H	
o 	
 	
 	
 	
Uu| U U U U"M$&JMDFJM	uS#Xc3h[1AA	BM M M M2/$|/ !&/ 
	/ / / /"&H &H &H &HP74S	? 7t 7 7 7 7( )# ) ) ) X) *D * * * X* *D * * * X* )$ ) ) ) X) )$ ) ) ) X) 
d 
 
 
 X
 N#* N N N XN 4d 4 4 4 X4 4 4r.   rF   )%dataclassesr   typingr   re   rk   r?   vllm.lora.requestr   vllm.multimodal.inputsr   vllm.pooling_paramsr   vllm.sampling_paramsr   r	   
vllm.utilsr
   vllm.utils.collection_utilsr   vllm.v1.outputsr   vllm.v1.pool.metadatar   r   vllm.v1.sample.logits_processorr   r   r   vllm.v1.sample.metadatar   vllm.v1.utilsr   vllm.v1.worker.block_tabler   r   rF   rD   r.   r,   <module>rP     s  
 " ! ! ! ! !            ) ) ) ) ) ) 8 8 8 8 8 8 - - - - - - = = = = = = = = = = = = = = 8 8 8 8 8 8 + + + + + + @ @ @ @ @ @ @ @         
 5 4 4 4 4 4 $ $ $ $ $ $ ; ; ; ; ; ; 0 0 0 0 0 0 0 0fu4 u4 u4 u4 u4 u4 u4 u4 u4 u4r.   