
    .`i?                        d dl mZ d dlmZ d dlZd dlZd dlmZm	Z	 d dl
mZ  G d d          Ze G d d	                      Ze	j        d
ej        fd            Zdej        dej        dej        dej        dej        dej        dej        ddfdZe	j        d
ej        fd            Zdej        dej        dej        dej        dej        ddfdZe	j        d
ej        fd            Zdej        dej        dej        dej        dej        dej        dej        dej        dedej        fdZe	j        d             Zd ej        dej        dej        dej        dej        deej        ej        f         fd!Ze	j        d"             Zdej        dej        dej        d#ej        d$ej        d ej        d%ej        dej        ddfd&Ze	j        d
ej        fd'            Zdej        d(edej        d)edej        f
d*ZdS )+    )	dataclass)AnyN)tltritonrandom_uuidc                   ,    e Zd Zdededej        fdZdS )InputBuffersmax_num_reqsmax_num_tokensdevicec                 f   || _         || _        || _        t          j        |t          j        |          | _        t          j        |t          j        |          | _        t          j        |dz   t          j        |          | _	        t          j        |t          j        |          | _
        d S )Ndtyper      )r   r   r   torchzerosint32	input_idsint64	positionsquery_start_locseq_lens)selfr   r   r   s       r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/gpu/input_batch.py__init__zInputBuffers.__init__   s     ),^5;vVVV^5;vVVV${1EK 
  
  
 LFSSS    N)__name__
__module____qualname__intr   r   r    r   r   r
   r
      sO        TT T 	T T T T T Tr   r
   c                      e Zd ZU ee         ed<   eed<   ej        ed<   e	j
        ed<   ej        ed<   e	j
        ed<   eed<   eed<   eed	<   ej        ed
<   e	j
        ed<   ej        ed<   ej        ed<   ej        ed<   ej        dz  ed<   ej        dz  ed<   eeef         ed<   eeej        f         ed<   ej        ed<   ej        ed<   e	j
        ed<   ededededej        dd f
d            ZdS )
InputBatchreq_idsnum_reqsidx_mappingidx_mapping_npexpanded_idx_mappingnum_scheduled_tokens
num_tokensnum_tokens_after_paddingnum_draft_tokensr   query_start_loc_npr   r   r   Nmrope_positionsinputs_embedsattn_metadataslot_mappingslogits_indicescu_num_logitscu_num_logits_npinput_buffersr   returnc                    d|cxk     r|k    sn J d t          |          D             }t          j        |t          j                  }t	          j        |t          j        |          }|}t          j        |||z  t          j                  }	|	dxx         ||z  z  cc<   t          |	                                          |k    sJ ||z  |j        d |<   |j        |dz
  xx         ||z  z  cc<   d|j        |d <   |j        d |         }
t          j	        |dz   t          j                  }d|d<   t          j
        |	|dd                     d|j        d<   t	          j
        |
d|j        d|dz                       ||j        |dz   d <   |j        d |dz            }|j        d |                                         }|j        d |                                         }|dd          dz
  }t	          j        |dz   |t          j        	          }t          j        |dz   t          j                  } | di d
|d|d|d|d|d|	d|d|ddd|d|d|
d|d|dd dd dd dd d|d|d|S ) Nr   c                 6    g | ]}d | dt                       S )req__r   ).0is     r   
<listcomp>z)InputBatch.make_dummy.<locals>.<listcomp>W   s-    GGG!-!--kmm--GGGr   )r   r   r   )out)dimr@   )r   r   r%   r&   r'   r(   r)   r*   r+   r,   r-   r   r.   r   r   r   r/   r0   r1   r2   r3   r4   r5   r"   )rangenparanger   r   fullr!   sumr   emptycumsumr   r   zero_r   )clsr&   r+   r6   r   r%   r(   r'   r)   r*   r   r.   r   r   r   r3   r4   r5   s                     r   
make_dummyzInputBatch.make_dummyN   s    8))))z))))))GGuXGGG828<<<l85;vNNN*!wxx1GrxXXXR   J$99   '++--..*<<<< -7(,Byy)x!|,,,
X0EE,,,,-xyy) ))8)4Xhl"(CCC !1
	&,>qrr,BCCCC+,%a(!!>q8a<?O!P	
 	
 	
 	
 9C%hlnn5'7(Q,G!+KZK8>>@@	!+KZK8>>@@	 ),q0X\&TTT9X\BBBs 
 
 
G
X
 $
 *>	

 "6!5
 "6!5
 "z
 &0Z
 Q
 ,O
  21
 X
  i
  i
 !D
  $!
" $#
$ $%
& *>'
( (-)
* .-+
 	
r   )r   r   r    liststr__annotations__r!   r   TensorrC   ndarraydictr   classmethodr
   r   rK   r"   r   r   r$   r$       s         #YMMM J,&&& *$$$OOO!!!! \!!!
"""l ||\D((((<$&&&& S>!!!U\)**** L   <j   ?
?
 ?
 $	?

 ?
 
?
 ?
 ?
 [?
 ?
 ?
r   r$   
BLOCK_SIZEc	                    t          j        d          }	t          j        ||	z             }
t          j        ||
z             }t          j        ||
z             }||k    rd S t          j        ||	z             }t          j        ||	z   dz             }||z
  }||
|z  z   }t          d||          D ]Y}|t          j        d|          z   }||k     }t          j        ||z   |z   |          }t          j        | |z   |z   ||           Z||z   }||k     r1t          j        ||z             }t          j        ||
z   |           d S d S Nr   r   )mask)r   
program_idloadrB   rD   store)input_ids_ptrnext_prefill_tokens_ptridx_mapping_ptrquery_start_loc_ptrprefill_token_ids_ptrprefill_token_ids_strideprefill_lens_ptrnum_computed_tokens_ptrrS   	batch_idxreq_state_idxprefill_lennum_computedquery_start	query_end	query_lenprefill_ptrr=   blockrV   tokensnext_pos
next_tokens                          r   _prepare_prefill_inputs_kernelrn      sx    a  IGOi788M'*]:;;K72]BCCL{""'-	9::K+i7!;<<IK'I'-:R*RRK1i,, I IBIa,,,y |3e;$GGG
,u4f4HHHHHi'H+W[8344

(=8*EEEEE r   r   next_prefill_tokensr'   r   prefill_token_idsrd   num_computed_tokensr7   c                     |j         d         }t          |f         | |||||                    d          ||d	  	         d S )Nr      rS   )shapern   stride)r   ro   r'   r   rp   rd   rq   r&   s           r   prepare_prefill_inputsrw      sb      #H"H;/  ##
 
 
 
 
 
r   c                    t          j        d          }t          j        d          dz
  }||k    rSt          j        |||          D ]:}	|	t          j        d|          z   }
|
|k     }t          j        ||
z   d|           ;d S t          j        ||z             }t          j        ||z             }t          j        ||z             }t          j        ||z   dz             }||z
  }||z   }t          j        ||z   |           t          j        d||          D ]B}	|	t          j        d|          z   }
|
|k     }||
z   }t          j        | |z   |
z   ||           Cd S rU   )r   rW   num_programsrB   rD   rY   rX   )pos_ptrseq_lens_ptrr\   r]   ra   r   rS   req_idr&   r=   rj   rV   rc   rq   startendrh   seq_lenposs                      r   _prepare_pos_seq_lens_kernelr      s    ]1Fq!!A%H(L*== 	9 	9A	!Z000E<'DH\E)1488888GOf455M'"9M"IJJG'&011E
'%.2
3
3CeI!I-GH\F"G,,,XaJ// : :BIa,,,y !E)
55(#D99999	: :r   r   r   c           	      v    | j         d         }t          |dz   f         ||| |||j         d         d           d S )Nr   r   rs   rt   )ru   r   )r'   r   rq   r   r   r&   s         r   prepare_pos_seq_lensr      s[      #H !(Q,1q     r   c                    t          j        d          }t          j        ||z             }t          j        ||z             }t          j        ||z   dz             }||z
  }|dz
  }t          j        d|
          }t          j        ||z   dz             }||z
  }t          j        |	|z   |z   ||z   ||k                t          j        ||z             }t          j        ||z             }||k    rd S t          j        ||z             }t          j        | |z   |z
  |           |dk    rG||k     }t          j        |||z  z   |z   |          }t          j        | |z   |z
  |z   ||           d S d S rU   r   rW   rX   rD   rY   )rZ   r\   last_sampled_tokens_ptrr]   r{   prefill_len_ptrdraft_tokens_ptrdraft_tokens_stridecu_num_logits_ptrlogits_indices_ptrrS   rb   rc   cu_num_logits_startcu_num_logits_end
num_logitsr-   rj   rg   logits_startr   rd   last_token_idrV   draft_tokenss                            r   (_combine_sampled_and_draft_tokens_kernelr     s    a  IGOi788M '"3i"?@@ 1I = ABB"%88J!A~ Ia$$E+i7!;<<Iz)LH0058uZ    glY.//G'/M9::K+ G3mCDDMH]Y&3]CCC !''w}/BBBUJ
 
 
 	I%(885@	
 	
 	
 	
 	
 	
 r   last_sampled_tokensr   r4   r   c	                    |j         d         }	|j         d         }
t          j        |t          j        | j                  }t          |	f         | |||||||                    d          ||t          j        |
dz                        |S )Nr   r?   r   r   rt   )	ru   r   rG   r   r   r   rv   r   next_power_of_2)r   r'   r   r   r   rd   r   r4   r   r&   num_speculative_stepsr3   s               r    combine_sampled_and_draft_tokensr   @  s     ~a H(.r2[k  N
 -h[9A )*?!*CDD    r   c                     t          j        d          }t          j        ||z             }t          j        ||z             }t          j        ||z             }	||	k     }
t          j        | |z             }t          j        |
d|          }t          j        | |z   |           t          j        ||z             }t          j        ||z   dz             }||z
  }||z
  }t          j        |
d|          }t          j        ||z   |           d S Nr   r   )r   rW   rX   whererY   )num_sampled_ptrnum_rejected_ptrr{   r   r\   r   rb   rc   r   rd   is_chunked_prefillingnum_sampledr   
logits_endr   num_rejecteds                   r   $_get_num_sampled_and_rejected_kernelr   e  s    a  IGOi788MglY.//G'/M9::K#k1'/I566K(0![AAKH_y(+6667,y899L*Y6:;;Jl*J+L811lCCLH	)<88888r   r   c                     |j         d         }t          j        |           }t          |f         | |||||           | |fS )Nr   )ru   r   
empty_liker   )r   r   r4   r'   rd   r&   r   s          r   get_num_sampled_and_rejectedr     sY      #H#K00L((5   $$r   c
                    t          j        d          }
t          j        | |
z             }t          j        ||
z             }|dk    r8t          j        ||
|z  z   |z   dz
            }t          j        ||z   |           t	          |          D ]X}t          j        ||
|z  z   |z             }|||z  z   |z   }t          j        |          }|dz  }t          j        ||           Yt          j        |	|
z             }t          j        |	|
z   dz             }||z
  }t          j        ||
z             }t          j        ||z             }|||z
  z  }t          j        ||z   |           d S r   )r   rW   rX   rY   rB   )r\   ra   r   output_bin_counts_ptroutput_bin_counts_stridesampled_tokens_ptrsampled_tokens_strider   r   r]   r|   rc   r   token_idr=   	token_ptrcountrf   rg   rh   r   re   s                         r   _post_update_kernelr     s    ]1FGOf455M'/F233KQ7*?!??+MPQQ
 
 	(=8(CCC; # #7-9N0NNQRRSS!M4L$LLxW 	 	""

E""""'-677K+f4q899IK'I7+f455L72]BCCLI,,LH$}4lCCCCCr   output_bin_countssampled_tokensr   c                     | j         d         }t          |f         | ||||                    d          ||                    d          |||d           d S )Nr   r   )	num_warps)ru   r   rv   )	r'   rq   r   r   r   r   r   r   r&   s	            r   post_updater     sv    $  #H$  ##a       r   c                 8   t          j        d          }t          j        ||z             }t          j        ||z   dz             }||z
  }t          j        d|          }||k     }	t          j        | |z             }
t          j        ||z   |z   |
|	           d S rU   r   )r\   expanded_idx_mapping_ptrr   rS   req_idx	start_idxend_idxr+   rj   rV   rc   s              r   _expand_idx_mapping_kernelr     s     mAG)G344Ig''1A566G9$JIa$$E:DGOg566MH%	1E9=tTTTTTTr   total_num_logitsmax_expand_lenc                     | j         d         }|                     |          }t          |f         | ||t          j        |                     |S )Nr   rt   )ru   	new_emptyr   r   r   )r'   r   r4   r   r&   r)   s         r   expand_idx_mappingr     sc      #H&001ABB{+).99	     r   )dataclassesr   typingr   numpyrC   r   vllm.triton_utilsr   r   
vllm.utilsr   r
   r$   jit	constexprrn   rO   rw   r   r   r   r!   r   r   tupler   r   r   r   r   r"   r   r   <module>r      sX   " ! ! ! ! !            ( ( ( ( ( ( ( ( " " " " " "T T T T T T T T& m
 m
 m
 m
 m
 m
 m
 m
` !F !F !F !F !FH|  \	
 |   
   . !: !: !: !: !:H\  
	
 l 
   * 5
 5
 5
 5
 5
p"|"" " \	"
 l" " ," <" " \" " " "J 9 9 98%%l% <% 	%
 % 5<%&% % % %( &D &D &DR 	  | L  ,  \!" 
#   D U 	U U U U"    <  	 
 \           r   