
    .`iY                         d dl Zd dlZd dlmZ d dlmZmZ d dlm	Z	 d dl
mZ  G d d          Zej        dej        fd	            Zd
ej        dej        dej        dej        dej        dej        dej        ddfdZ ej        ddg          dej        fd            Zdej        dededej        dej        ddfdZdedefdZdS )    N)SamplingParams)tltriton)cdiv)UvaBackedTensorc                       e Zd Zdededej        fdZdededdfd	Zd
ej	        de
j        de
j        ddfdZdej	        dej	        de
j        ddfdZdS )PenaltiesStatemax_num_reqs
vocab_sizedevicec                    || _         || _        || _        t          |t          j                  | _        t          |t          j                  | _        t          |t          j                  | _        t          j
        |t                    | _        | j        j	                            d           | j                                         t	          j
        | j         t          | j        d          t          j        | j                  | _        t	          j
        | j         | j        t          j        | j                  | _        g | _        d S )N)dtype      ?    )r   r   )r
   r   r   r   torchfloat32repetition_penaltyfrequency_penaltypresence_penaltynpzerosbooluse_penaltyfillcopy_to_uvar   int32prompt_bin_maskoutput_bin_counts_penalties_reqs)selfr
   r   r   s       w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/gpu/sample/penalties.py__init__zPenaltiesState.__init__   s   ($"1,em"T"T"T!0U]!S!S!S /EM R R R8L=== 	"'',,,++---  %{"%%+;	 
  
  
 "'tek$+"
 "
 "
 +-    req_idxsampling_paramsreturnNc                     |j         | j         j        |<   |j        | j        j        |<   |j        | j        j        |<   t	          |          }|| j        |<   |r| j                            |           d S d S N)r   r   r   r   r   r   append)r    r$   r%   
do_penaltys       r!   add_requestzPenaltiesState.add_request*   s    .=.P"7+-<-N!'*,;,L ) 11
$.! 	1 ''00000	1 	1r#   prefill_token_idsprefill_lensprompt_lensc           	         | j         D ]W}t          ||         t          ||                   t          ||                   | j        |         | j        |                    X| j                                          | j                                         | j                                         | j	                                         d S r(   )
r   bincountintr   r   clearr   r   r   r   )r    r,   r-   r.   r$   s        r!   apply_staged_writesz"PenaltiesState.apply_staged_writes4   s     + 	 	G!'*L)**K())$W-&w/    	""$$$++---**,,,))+++++r#   logitsidx_mappingidx_mapping_npc           	          t          j        | j        |                   sd S t          ||| j        j        | j        j        | j        j        | j        | j	                   d S r(   )
r   anyr   apply_penaltiesr   gpur   r   r   r   )r    r4   r5   r6   s       r!   r9   zPenaltiesState.apply_penaltiesI   sl     vd&~677 	F#'"&!% "	
 	
 	
 	
 	
r#   )__name__
__module____qualname__r1   r   r   r"   r   r+   Tensorr   ndarrayr3   r9    r#   r!   r	   r	      s        -S -c -5< - - - -:13 1 1D 1 1 1 1, <, j, Z	,
 
, , , ,*

 \
 
	

 

 
 
 
 
 
r#   r	   
BLOCK_SIZEc                    t          j        d          }t          j        ||z             }t          j        ||z             }t          j        ||z             }t          j        ||z             }|dk    }|dk    }|dk    }|p|p|}|sd S t          j        d          }||z  t          j        d|          z   }||
k     }t          j        | ||z  z   |z   |          }|                    t           j                  }t          j        |||	z  z   |z   |          }|dk    }|r||z  dz  t          j        d|dz            z   }t          j        |||z  z   |z   |t          j        |
d          k               }|d d d f         t          j        dd          d d d f         z	  dz  }|                    t           j                  }|                    |          }t          j	        ||z  |d          }|t          j	        |dk    d|z  |          z  }|||z  z  }|||z  z  }t          j
        | ||z  z   |z   ||           d S )Nr   r              maskr   )r   
program_idloadarangetor   r   int1reshapewherestore)
logits_ptrlogits_strideidx_mapping_ptrrepetition_penalty_ptrfrequency_penalty_ptrpresence_penalty_ptrprompt_bin_mask_ptrprompt_bin_mask_strideoutput_bin_counts_ptroutput_bin_counts_strider   rA   	batch_idxreq_state_idxrep_penaltyfreq_penaltypres_penaltyuse_rep_penaltyuse_freq_penaltyuse_pres_penaltyr   	block_idxblockrF   r4   r   output_bin_maskpacked_blockpacked_maskr   scales                                  r!   _penalties_kernelrg   ^   s    a  IGOi788M'0=@AAK70=@AAL7/-?@@L!S(O#s*#s*!I%5I9IK a  I
"RYq*%=%==E:DWZ)m";;eC$OOOFYYrz""F0H HH5P   (!+O  ; :-3bi:QSCS6T6TTg-2H"HH<W
B 7 77
 
 
 'qqq$w/BIa4D4DT111W4MNRSS),,RW55)11*== ?:KMM"(6A:sU{E::: l...F
l_,,FHZ)m33e;V$OOOOOOr#   r4   r5   r   r   r   r   r   r&   c                     | j         \  }}d}	t          j        ||	          }
t          ||
f         | |                     d          ||||||                    d          ||                    d          ||	           d S )Ni    r   rA   )shaper   r   rg   stride)r4   r5   r   r   r   r   r   num_reqsr   rA   
num_blockss              r!   r9   r9      s     "<HjJZ44Jx,-aq!!  ##     r#   prefill_len
prompt_len)do_not_specializec                    t          j        d          }||z  |k    rd S ||z  t          j        d|          z   }||z  |k     rg||k     }t          j        | |z   |          }	|	dz  }
|	dz  }t          j        |fdt           j                  |z  }t          j        ||
z   ||           |dz   |z  |k    rD||k     }|||k    z  }t          j        | |z   |          }	t          j        ||	z   d|           d S d S )Nr   rE   r   rD   )r   rG   rI   rH   fullr   	atomic_or
atomic_add)prefill_token_ids_ptrrn   ro   rU   rW   rA   ra   rb   rF   prefill_tokensidxbit_idxbits                r!   _bincount_kernelrz      s6    a  I:,,
"RYq*%=%==E:
**z!!6!>TJJJ" 2%gzmQ11W<
(3.$????A#z11{"##!6!>TJJJ
+n<adKKKKKK	 21r#   r,   c                     |                                  |                                  d}t          j        ||          }t          |f         | |||||           d S )Ni   ri   )zero_r   r   rz   )r,   rn   ro   r   r   rA   rm   s          r!   r0   r0      sx     J[*55Jj]#     r#   r%   c                 D    | j         dk    p| j        dk    p
| j        dk    S )Nr   rC   )r   r   r   )r%   s    r!   r   r      s2    *c1 	3,3	3+s2r#   )numpyr   r   vllm.sampling_paramsr   vllm.triton_utilsr   r   vllm.utils.math_utilsr   vllm.v1.worker.gpu.buffer_utilsr   r	   jit	constexprrg   r>   r9   rz   r1   r0   r   r   r@   r#   r!   <module>r      s        / / / / / / ( ( ( ( ( ( ( ( & & & & & & ; ; ; ; ; ;O
 O
 O
 O
 O
 O
 O
 O
d =P =P =P =P =P@L  |	
 l \ | 
   8 }l;<<<L L L L =<L6|  \	
 | 
   * D      r#   