
    .`iq^                         d dl mZ d dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d
Z G d d          ZdS )    )castN)LoRARequest)SamplingType)&length_from_prompt_token_ids_or_embeds)swap_dict_values)LogprobsTensors)MultiGroupBlockTable)CachedRequestStategh㈵>c                   b   e Zd Zdedededej        dededee         dee         fd	Ze	d
ee
         fd            Z	 d&dddedz  d
dfdZde
d
edz  fdZdeded
dfdZdee         d
dfdZd
ej        fdZdej        dej        d
eeedf         eedf         ee         f         fdZe	d
efd            Ze	d
efd            Ze	d
efd            Ze	d
efd             Ze	d
efd!            Ze	d
efd"            Ze	d
efd#            Ze	d
edz  fd$            Ze	d
efd%            Z dS )'
InputBatchmax_num_reqsmax_model_lenmax_num_batched_tokensdevice
pin_memory
vocab_sizeblock_sizeskernel_block_sizesc	           	      
   || _         || _        || _        || _        || _        || _        g | _        i | _        t          j	        ||fdt          j
        d          | _        | j                                        | _        t          j	        |t          j
                  | _        t          j	        |t          j
                  | _        t          j	        |fdt          j
        |          | _        | j                                        | _        t'          |||||||          | _        t          j        |ft          j        |          | _        t          j        |ft          j        d|          | _        | j                                        | _        t5                      | _        t5                      | _        t          j        |ft          j        |          | _        t          j        |ft          j        d|          | _        | j                                        | _        t5                      | _         t          j        |ft          j
        |          | _!        t          j        |ft          j
        d|          | _"        | j"                                        | _#        t5                      | _$        t          j        |ft          j        |          | _%        t          j        |ft          j        d|          | _&        | j&                                        | _'        t5                      | _(        t          j        |ft          j)        |          | _*        t          j        |ft          j)        d|          | _+        | j+                                        | _,        t5                      | _-        t          j        |ft          j)        |          | _.        t          j        |ft          j)        d|          | _/        | j/                                        | _0        t5                      | _1        t          j        |ft          j)        |          | _2        t          j        |ft          j)        d|          | _3        | j3                                        | _4        t5                      | _5        i | _6        t          j	        | j         ft          j7                  | _8        i | _9        i | _:        i | _;        i | _<        i | _=        d g|z  | _>        t5                      | _?        d | _@        d | _A        i | _B        g | _C        d S )NcpuFr   dtyper   )r   )r   r   r   r   r   r   r   r   r   )r   r   r   )Dr   r   r   r   r   r   _req_idsreq_id_to_indextorchzerosint32token_ids_cpu_tensornumpytoken_ids_cpunpnum_tokens_no_specnum_prompt_tokensnum_computed_tokens_cpu_tensornum_computed_tokens_cpur	   block_tableemptyfloat32temperaturetemperature_cpu_tensortemperature_cpusetgreedy_reqsrandom_reqstop_ptop_p_cpu_tensor	top_p_cpu
top_p_reqstop_ktop_k_cpu_tensor	top_k_cpu
top_k_reqsmin_pmin_p_cpu_tensor	min_p_cpu
min_p_reqsfloatfrequency_penaltiesfrequency_penalties_cpu_tensorfrequency_penalties_cpufrequency_penalties_reqspresence_penaltiespresence_penalties_cpu_tensorpresence_penalties_cpupresence_penalties_reqsrepetition_penaltiesrepetition_penalties_cpu_tensorrepetition_penalties_cpurepetition_penalties_reqs
min_tokensint64request_lora_mappinglora_id_to_request_idslora_id_to_lora_request
generatorsnum_logprobsin_progress_prompt_logprobs_cpu
logit_biashas_allowed_token_idsallowed_token_ids_mask!allowed_token_ids_mask_cpu_tensorbad_words_token_idsreq_output_token_ids)	selfr   r   r   r   r   r   r   r   s	            r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/tpu_input_batch.py__init__zInputBatch.__init__   s    )*&<#$$*,/1 %*K=)+	%
 %
 %
! "6<<>>"$(<rx"H"H"H!#,bh!G!G!G.3kO+!	/
 /
 /
+ (,'J'P'P'R'R$ 0%'#9!#1
 
 
 !;O5=
 
 
 ',kO5=:'
 '
 '
#  $:@@BB%(UU%(UU[,fUUU
 %O5=:!
 !
 !
 .4466$'EE[,FSSS
 %O5;u!
 !
 !
 .4466$'EE[,fUUU
 %O5=:!
 !
 !
 .4466$'EE $);O5;v$
 $
 $
  /4kO5;u/
 /
 /
+ (,'J'P'P'R'R$25%%% #(+O5;v#
 #
 #
 .3[O5;u.
 .
 .
* '+&H&N&N&P&P#14$ %*KO5;v%
 %
 %
! 05{O5;u0
 0
 0
, )-(L(R(R(T(T%3655& <> %'Hd.?-A$R$R$R!;=#?A$
 79,. LN,:>,9N/2uu" <@#FJ. @B <>!!!    returnc                 L    t          t          t                   | j                  S N)r   liststrr   rW   s    rX   req_idszInputBatch.req_ids   s     DIt}---rZ   Nrequestr
   	req_indexc                 8	   || j         }|| j        k     sJ |j        }|t          | j                  k    r:| j                            |           | j                            |j                   n|| j        |<   |j        | j        |<   || j        |<   t          |j
        |j                  }|| j        |<   |j
        | j        |d |f<   |}|t          |j                  z   }|j        | j        |||f<   |j        | j        |<   |j        | j        |<   | j                            |j        |           |j        }|
J d            |j        t.          j        k    r%d| j        |<   | j                            |           n)|j        | j        |<   | j                            |           |j        | j        |<   |j        dk     r| j                             |           |j!        }d|cxk     r| j"        k     rn n| j#                            |           n| j"        }|| j$        |<   |j%        | j&        |<   |j'        | j(        |<   |j%        tR          k    r| j*                            |           |j'        dk    r| j+                            |           |j,        | j-        |<   |j,        dk    r| j.                            |           |j/        | j0        |<   |j/        dk    r| j1                            |           |j2        r|j2        |j3        f| j2        |<   |j4        |j4        | j5        |<   |j6        |j6        | j7        |<   |j8        |j8        | j8        |<   |j9        r| j:                            |           | j;        gty          j=        | j        | j"        tx          j>        | j?                  | _@        ty          j=        | j        | j"        tx          j>        d          | _;        d| j;        |<   d	| j;        |         |j9        <   |jA        r|jA        | jA        |<   |jB        rk|jB        jC        }	|	| jD        vrt                      | jD        |	<   |	| jF        |<   | jD        |	                             |j                   |jB        | jG        |	<   d S d| jF        |<   d S )
Nz"pooling requests not supported yetg           r   g      ?r   r   TF)Hnum_reqsr   req_idlenr   appendrV   output_token_idsr   r   prompt_token_idsprompt_embedsr$   r!   
num_tokensr#   num_computed_tokensr&   r'   add_row	block_idssampling_paramssampling_typer   GREEDYr,   r.   addr*   r/   r0   r2   r3   r4   r   r7   r6   r8   r:   frequency_penaltyr?   _SAMPLING_EPSr;   r@   presence_penaltyrC   rD   repetition_penaltyrG   rH   rI   all_stop_token_ids	generatorrN   logprobsrO   rQ   allowed_token_idsrR   rT   r   r   boolr   rS   rU   lora_requestlora_int_idrL   r-   rK   rM   )
rW   rb   rc   rg   r$   	start_idxend_idxrq   r4   lora_ids
             rX   add_requestzInputBatch.add_request   s   
 I4,,,,,DM****M  (((%,,W-EFFFF'-DM)$3:3KD%i0'0V$ C$g&;
 
 ->y)<C<T9&8'8&889%	c'":;;;;B;S9i&778-4-?	*292M$Y/  !2I>>>!1**,P***(L,???.1D +  ((((.=.ID +  ((($3$9y! 1$$O'''%u&&&&t&&&&&O''''OE$)y!$3$9y!2A2S$Y/ =00O''',33)--f5551@1Q#I.+s22(,,V4443B3U%i0-44*..v666% 	*2*DOI& ()0):DOI&#/(7(@Df%%1)8)CDOI&, 	&**62225= /4k%O*;	/ / /+ :?%tejQV: : :6 AED29=  29=1 . 	V2A2UD$Y/  
	5*6Gd9997:uu+G43:D%i0'044W^DDD4;4HD(111 45D%i000rZ   rg   c                    | j                             |d          }|dS d| j        |<   d| j        |<   | j                            |           | j                            |           | j                            |           | j                            |           | j	                            |           | j
                            |d           | j                            |           | j                            |           | j                            |           | j                            |d           | j                            |d           | j                            |d           | j        |         }|dk    r|| j        |                             |           t'          | j        |                   dk    r4| j                            |           | j                            |           d| j        |<   d| j        |<   | j                            |           | j         | j        |                             d           | j                            |d           |S )z<This method must always be followed by a call to condense().Nr   F)r   popr   rV   r.   discardr/   r3   r7   r;   rI   r@   rD   rH   rN   rO   rP   rK   rL   rh   rM   rQ   rR   rT   fill_rU   )rW   rg   rc   r   s       rX   remove_requestzInputBatch.remove_request#  sH    (,,VT::	4#'i /3!),  (((  ((('''''''''It,,,%--f555$,,V444&..v666It,,,fd+++,00>>> +I6a<<'088@@@4.w788A==+//888,0099934D%i0%)	""**62221=29=CCEJJJ $$Y555rZ   i1i2c                    | j         |         }| j         |         }| j         |         | j         |         c| j         |<   | j         |<   | j        |         | j        |         c| j        |<   | j        |<   ||J | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j	        |         | j	        |         c| j	        |<   | j	        |<   | j
        |         | j
        |         c| j
        |<   | j
        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        |df                                         }| j        |df         | j        |df<   || j        |df<   t          | j        ||           t          | j        ||           t          | j        ||           | j        |         | j        |         c| j        |<   | j        |<   | j        |         | j        |         c| j        |<   | j        |<   | j        ,| j        |         | j        |         	 c| j        |<   | j        |<   | j                            ||           d S )N.)r   rV   r   r#   r$   r&   r,   r2   r6   r?   rC   rG   r:   r!   copyr   rN   rI   rU   rK   rQ   rT   r'   swap_row)rW   r   r   	old_id_i1	old_id_i2tmps         rX   swap_stateszInputBatch.swap_statesJ  s   M"%	M"%	/3}R/@$-PRBS,b4=,%b)%b) 	E!"%t'@'D $)>)>> + + 	IY')=i)H
 #B'#B' 	A#T%<R%@
 "2&"2& 	?r"D$:2$>
 (,(, 	K$R($*Fr*J
  $ $ 	;R $"6r": 261CT^TVEW.rDN2.151CT^TVEW.rDN2.(,(, 	K$R($*Fr*J
 '+'+ 	I#B')DR)H
 )"-)"- 	M%b)4+H+L 261CT^TVEW.rDN2.  S)..00&*&8S&A2s7#&)2s7#"b111"b11112r::: %b)%b) 	E!"%t'@'D
 OBOB 	1T_R0
 1=
 6r:6r:6r:6r:
 	!!"b)))))rZ   empty_req_indicesc                    | j         }|dk    r4| j                                         | j                                         dS |t	          |          z   dz
  }|rS||v r	|dz  }||v 	|                                }||k    rn*| j        |         }| j        |         }|J || j        |<   d| j        |<   || j        |<   d| j        |<   || j        |<   | j        |         }| j        |d|f         | j        |d|f<   | j        |         | j        |<   | j	        |         | j	        |<   | j
        |         | j
        |<   | j                            ||           | j        |         | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j        |         | j        |<   | j                            |d          }|
|| j        |<   | j                            |d          }	|	
|	| j        |<   | j        |         | j        |<   | j        |         | j        |<   | j        | j        |         | j        |<   | j                            |d          }
|

|
| j        |<   |dz  }|S| j        | j         d= | j        | j         d= dS )zMove non-empty requests down into lower, empty indices.

        Args:
          empty_req_indices: empty batch indices, sorted descending.
        r   Nre   )rf   r   clearrV   rh   r   r   r#   r!   r$   r&   r'   move_rowr,   r2   r6   r?   rC   rG   r:   rN   rI   rK   rQ   rT   rU   )rW   r   rf   last_req_indexempty_indexrg   rj   rm   rz   	min_tokenrU   s              rX   condensezInputBatch.condense  sV    =q==M!!!%++---F "C(9$:$::Q> D	  $555!# !$555 ,//11Kn,, ]>2F#8H%%%)/DM+&,0DM.)5ED%k28<D%n5+6D (0@J;?;M+<D{KZK78 483J4D#K0 372H2XD";/8<8T9D(5 %%nkBBB040D^0TD -*..*HDN;'*..*HDN;'8<8T9D(5 8<7R8D'4 :>9V:D)+6 +/.*HDN;'++NDAAI$/8,++NDAAI$/8,595N6D%k2 ,0?>+JDOK(5A:>J 6{C #'":">">~t"T"T".8K(5aNI   D	 N M$-//*%dmoo666rZ   c                    | j         d | j                                                 }t          j        | j        |fdt          j        | j                  }|                                }| j        d | j        d |f         |d d <   t          | j                  D ]}| j
        ||| j         |         d f<   |                    | j        d          S )Nr   r   T)r   non_blocking)r$   rf   maxr   r(   rJ   r   r    r!   ranger   tor   )rW   max_prompt_lenprompt_token_ids_cpu_tensorrk   is        rX   _make_prompt_token_ids_tensorz(InputBatch._make_prompt_token_ids_tensor  s    /$-@DDFF&+k]N++	'
 '
 '
# 7<<>>"04=/>/1QR t}%% 	O 	OA?CQ 6q 9 ; ;;<<*--T[t-TTTrZ   num_scheduled_tokensnum_sampled_tokens.c                     | j         d| j                 }t          |          }t          |                    |                    }t	          | j                                                  }|||fS )a  
        Given the num_scheduled_tokens for each request in the batch, return
        datastructures used to activate the current LoRAs.
        Returns:
            1. prompt_lora_mapping: A tuple of size self.num_reqs where,
               prompt_lora_mapping[i] is the LoRA id to use for the ith prompt.
            2. token_lora_mapping: A tuple of size np.sum(num_scheduled_tokens)
               where, token_lora_mapping[i] is the LoRA id to use for ith token.
            3. lora_requests: Set of relevant LoRA requests.
        N)rK   rf   tuplerepeatr-   rM   values)rW   r   r   req_lora_mappingprompt_lora_mappingtoken_lora_mappingactive_lora_requestss          rX   make_lora_inputszInputBatch.make_lora_inputs  sx      4_t}_E#$455"#3#:#:;O#P#PQQ14(//112
 2
 #$68LLLrZ   c                 *    t          | j                  S r]   )rh   r   r`   s    rX   rf   zInputBatch.num_reqs  s    4'(((rZ   c                 2    t          | j                  dk    S Nr   )rh   r/   r`   s    rX   
all_greedyzInputBatch.all_greedy      4#$$))rZ   c                 2    t          | j                  dk    S r   )rh   r.   r`   s    rX   
all_randomzInputBatch.all_random   r   rZ   c                 2    t          | j                  dk    S r   )rh   r3   r`   s    rX   no_top_pzInputBatch.no_top_p$      4?##q((rZ   c                 2    t          | j                  dk    S r   )rh   r7   r`   s    rX   no_top_kzInputBatch.no_top_k(  r   rZ   c                 2    t          | j                  dk    S r   )rh   r;   r`   s    rX   no_min_pzInputBatch.no_min_p,  r   rZ   c                     t          | j                  dk    o/t          | j                  dk    ot          | j                  dk    S r   )rh   rD   r@   rH   r`   s    rX   no_penaltieszInputBatch.no_penalties0  sL     ,--2 9D122a79D233q8	
rZ   c                 `    | j         r&t          | j                                                   nd S r]   )rO   r   r   r`   s    rX   max_num_logprobszInputBatch.max_num_logprobs8  s,    262CMs4$++--...MrZ   c                 2    t          | j                  dk    S r   )rh   rR   r`   s    rX   no_allowed_token_idszInputBatch.no_allowed_token_ids<  s    4-..!33rZ   r]   )!__name__
__module____qualname__intr   r   r}   r^   rY   propertyr_   ra   r   r   r   r   Tensorr   r"   ndarrayr   r-   r   r   rf   r   r   r   r   r   r   r   r    rZ   rX   r   r      sf       O?O? O? !$	O?
 O? O? O? #YO? !IO? O? O? O?b .c . . . X. !%t5 t5%t5 :t5 
	t5 t5 t5 t5l%S %S4Z % % % %NK*c K*s K*t K* K* K* K*ZX7$s) X7 X7 X7 X7 X7tUu| U U U U M$&JMDFJM	uS#Xc3h[1AA	BM M M M. )# ) ) ) X) *D * * * X* *D * * * X* )$ ) ) ) X) )$ ) ) ) X) )$ ) ) ) X) 
d 
 
 
 X
 N#* N N N XN 4d 4 4 4 X4 4 4rZ   r   )typingr   r    r"   r   vllm.lora.requestr   vllm.sampling_paramsr   
vllm.utilsr   vllm.utils.collection_utilsr   vllm.v1.outputsr   vllm.v1.worker.block_tabler	   vllm.v1.worker.gpu_input_batchr
   rv   r   r   rZ   rX   <module>r      s   
            ) ) ) ) ) ) - - - - - - = = = = = = 8 8 8 8 8 8 + + + + + + ; ; ; ; ; ; = = = = = =i4 i4 i4 i4 i4 i4 i4 i4 i4 i4rZ   