
    )`i N                     0   d Z ddlmZmZmZmZ ddlZddlmZ ddl	m
Z
mZ ddlmZ ddlmZmZ d	eej        eef         d
eeej                 eeef         f         fdZ G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d  d!e          Z G d" d#e          Z  G d$ d%e          Z!dS )&a3  
Copyright (c) 2025 by FlashInfer team.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
    )AnyOptionalTupleUnionN)get_sampling_module)_get_cache_bufdevice_support_pdl   )ParameterizedOp)TaggedTensor
TensorTypexreturnc                 t    t          | t          j                  r| | j        t          j        k    rdndfS d | fS )Nr   g        )
isinstancetorchTensordtypeint32)r   s    y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/flashinfer/logits_processor/operators.py_to_tensor_scalar_tupler      s?     !U\"" 5;..11C88ay    c                   B    e Zd ZdZej        Zej        Zdede	defdZ
dS )TemperatureOpz
    Temperature scaling operator.

    :attr:`TensorType.LOGITS` -> :attr:`TensorType.LOGITS`

    Parameters
    ----------
    temperature : float or torch.Tensor
        Temperature value for scaling.
    tensorkwargsr   c                    |                      |          }|                     d|d          }t          |          \  }}|*t          |t                    r|dk    rt          d          ||}n|}|j        |z  }t          ||          S )NtemperatureTrequiredr   4Temperature must be positive float or a tensor array)_validate_input_type
_get_paramr   r   float
ValueErrordatar   )selfr   r   output_typer   maybe_temperature_arrtemperature_valscaled_logitss           r   __call__zTemperatureOp.__call__4   s    //77oomVdoKK1H1U1U. (?E22 )6E6J6JSTTT ,/KK)Kk1M;777r   N__name__
__module____qualname____doc__r   LOGITSINOUTr   r   r,    r   r   r   r   %   sY        	 	 
	B

C8| 8s 8| 8 8 8 8 8 8r   r   c                   B    e Zd ZdZej        Zej        Zde	de
de	fdZdS )	SoftmaxOpa  
    Softmax operator.

    Converts logits to probabilities using softmax function.

    :attr:`TensorType.LOGITS` -> :attr:`TensorType.PROBS`

    Parameters
    ----------
    enable_pdl: bool, optional
        Whether to enable PDL for the fused kernel.
    r   r   r   c                     |                      |          }| j                            dd           }|t          |j        j                  }t          j        |j        d          }t          ||          S )N
enable_pdl)dim)	r"   default_paramsgetr	   r&   devicer   softmaxr   )r'   r   r   r(   r9   probss         r   r,   zSoftmaxOp.__call__Y   sk    //77(,,\4@@
+FK,>??Jfkr222E;///r   N)r.   r/   r0   r1   r   r2   r3   PROBSr4   r   r   r,   r5   r   r   r7   r7   H   sY          
	B

C0| 0s 0| 0 0 0 0 0 0r   r7   c                   B    e Zd ZdZej        Zej        Zdede	defdZ
dS )ProbsTopKOpag  
    Top-k filtering operator for probabilities.

    Keeps top-k probabilities, zeros out others, and renormalizes.

    :attr:`TensorType.PROBS` -> :attr:`TensorType.PROBS`

    Parameters
    ----------
    top_k : int or torch.Tensor
        Number of top tokens to keep.

    See Also
    --------
    :meth:`~flashinfer.sampling.top_k_renorm_probs`
    r   r   r   c                    |                      |          }|                     d|d          }t          |          \  }}|*t          |t                    r|dk    rt          d          t          d|j        j         d|j        j        d          }t                      
                    |j        |||          }t          ||          S )	Ntop_kTr   r   2top_k must be a positive integer or a tensor arraytop_k_renorm_probs_row_states_   	zero_init)r"   r#   r   r   intr%   r   r&   r>   r   top_k_renorm_probsr   )	r'   r   r   r(   rE   maybe_top_k_arr	top_k_valrow_states_bufferrenorm_probss	            r   r,   zProbsTopKOp.__call__y   s    //77$??%<U%C%C""9c** #.71nnQRRR +AV[-?AAK	
 
 
 +,,??K)5F
 
 L+666r   Nr.   r/   r0   r1   r   rA   r3   r4   r   r   r,   r5   r   r   rC   rC   d   sY         " 
	B

C7| 7s 7| 7 7 7 7 7 7r   rC   c                   B    e Zd ZdZej        Zej        Zdede	defdZ
dS )LogitsTopKOpaB  
    Top-k filtering operator for logits.

    Masks rejected logits to -inf.

    :attr:`TensorType.LOGITS` -> :attr:`TensorType.LOGITS`

    Parameters
    ----------
    top_k : int or torch.Tensor
        Number of top tokens to keep.

    See Also
    --------
    :class:`~flashinfer.sampling.top_k_mask_logits`
    r   r   r   c                    |                      |          }|                     d|d          }t          |          \  }}|*t          |t                    r|dk    rt          d          t          d|j        j         d|j        j        d          }t                      
                    |j        |||          }t          ||          S )	NrE   Tr   r   rF   top_k_mask_logits_row_states_rH   rI   )r"   r#   r   r   rK   r%   r   r&   r>   r   top_k_mask_logitsr   )	r'   r   r   r(   rE   rM   rN   rO   masked_logitss	            r   r,   zLogitsTopKOp.__call__   s    //77$??%<U%C%C""9c** #.71nnQRRR +@FK,>@@K	
 
 
 ,--??K)5F
 
 M;777r   Nr-   r5   r   r   rS   rS      sY         " 
	B

C8| 8s 8| 8 8 8 8 8 8r   rS   c                   B    e Zd ZdZej        Zej        Zdede	defdZ
dS )TopPOpa  
    Top-p (nucleus) filtering operator.

    Keeps tokens with cumulative probability up to threshold p, zeros out others, and renormalizes.

    :attr:`TensorType.PROBS` -> :attr:`TensorType.PROBS`

    Parameters
    ----------
    top_p : float or torch.Tensor
        Cumulative probability threshold in (0, 1].

    See Also
    --------
    :meth:`~flashinfer.sampling.top_p_renorm_probs`
    r   r   r   c                 0   |                      |          }|                     d|d          }t          |          \  }}|d|cxk     rdk    sn t          d          t	                                          |j        ||          }t          ||          S )Ntop_pTr   r   r
   /top_p must be float in (0, 1] or a tensor array)r"   r#   r   r%   r   top_p_renorm_probsr&   r   )r'   r   r   r(   r[   maybe_top_p_arr	top_p_valrP   s           r   r,   zTopPOp.__call__   s    //77$??%<U%C%C""A	,>,>,>,>Q,>,>,>,>NOOO*,,??K)
 
 L+666r   NrQ   r5   r   r   rY   rY      sY         " 
	B

C7| 7s 7| 7 7 7 7 7 7r   rY   c                   B    e Zd ZdZej        Zej        Zdede	defdZ
dS )MinPOpa  
    Min-p filtering operator.

    Keeps tokens with probability at least p times the maximum probability, zeros out others, and renormalizes.

    :attr:`TensorType.PROBS` -> :attr:`TensorType.PROBS`

    Parameters
    ----------
    min_p : float or torch.Tensor
        Minimum probability threshold as ratio of max probability.

    See Also
    --------
    :meth:`~flashinfer.sampling.min_p_renorm_probs`
    r   r   r   c                 6   |                      |          }|                     d|d          }t          |          \  }}|d|cxk     rdk    sn t          d          |B|j        |                    d          |j                            dd          d         z  k    }n.|j        ||j                            dd          d         z  k    }|j                                        }d|| <   ||                    dd          z  }	t          |	|          S )	Nmin_pTr   r   r
   /min_p must be float in (0, 1] or a tensor arrayr:   )r;   keepdim)
r"   r#   r   r%   r&   	unsqueezemaxclonesumr   )
r'   r   r   r(   rc   maybe_min_p_arr	min_p_val
min_p_maskmasked_probsr@   s
             r   r,   zMinPOp.__call__   s/   //77$??%<U%C%C""A	,>,>,>,>Q,>,>,>,>NOOO&))"--BPT0U0UVW0XXJJ  FKOODOAA!DDJ {((**$%j[!|//B/EEEE;///r   NrQ   r5   r   r   ra   ra      sY         " 
	B

C0| 0s 0| 0 0 0 0 0 0r   ra   c                   B    e Zd ZdZej        Zej        Zde	de
de	fdZdS )ProbsSampleOpa)  
    Sampling operator for probabilities.

    Samples token indices from probability distribution using inverse transform sampling.

    :attr:`TensorType.PROBS` -> :attr:`TensorType.INDICES`

    Parameters
    ----------
    deterministic : bool, optional
        Whether to use deterministic kernel implementation.
    indices : torch.Tensor, optional
        Indices for batched sampling.
    generator : torch.Generator, optional
        Random number generator.

    See Also
    --------
    :meth:`~flashinfer.sampling.sampling_from_probs`
    r   r   r   c                 4   |                      |          }| j                            dd          }|                     d|d          }|                     d|d          }t	                                          |j        |||          }t          ||          S NdeterministicTindicesFr   	generator)r"   r<   r=   r#   r   sampling_from_probsr&   r   r'   r   r   r(   rr   rs   rt   sampless           r   r,   zProbsSampleOp.__call__+  s    //77+//FF//)Ve/DDOOK%OHH	%'';;K-
 
 G[111r   N)r.   r/   r0   r1   r   rA   r3   INDICESr4   r   r   r,   r5   r   r   ro   ro     sY         * 
	B

C2| 2s 2| 2 2 2 2 2 2r   ro   c                   B    e Zd ZdZej        Zej        Zde	de
de	fdZdS )LogitsSampleOpa  
    Sampling operator for logits.

    Samples token indices from logits using Gumbel-max trick.

    :attr:`TensorType.LOGITS` -> :attr:`TensorType.INDICES`

    Parameters
    ----------
    deterministic : bool, optional
        Whether to use deterministic kernel implementation.
    indices : torch.Tensor, optional
        Indices for batched sampling.
    generator : torch.Generator, optional
        Random number generator.

    See Also
    --------
    :meth:`~flashinfer.sampling.sampling_from_logits`
    r   r   r   c                 4   |                      |          }| j                            dd          }|                     d|d          }|                     d|d          }t	                                          |j        |||          }t          ||          S rq   )r"   r<   r=   r#   r   sampling_from_logitsr&   r   rv   s           r   r,   zLogitsSampleOp.__call__S  s    //77+//FF//)Ve/DDOOK%OHH	%''<<K-
 
 G[111r   N)r.   r/   r0   r1   r   r2   r3   rx   r4   r   r   r,   r5   r   r   rz   rz   :  sY         * 
	B

C2| 2s 2| 2 2 2 2 2 2r   rz   c                   j     e Zd ZdZej        Zej        Zd
de	e
         def fdZdededefd	Z xZS )FusedTemperatureSoftmaxOpay  
    Fused temperature scaling and softmax operator.

    :attr:`TensorType.LOGITS` -> :attr:`TensorType.PROBS`

    Parameters
    ----------
    enable_pdl: bool, optional
        Whether to enable PDL for the fused kernel.
    temperature : float or torch.Tensor
        Temperature value for scaling.

    See Also
    --------
    :meth:`~flashinfer.sampling.softmax`
    Nr9   r<   c                 >     t                      j        dd|i| d S )Nr9   r5   super__init__)r'   r9   r<   	__class__s      r   r   z"FusedTemperatureSoftmaxOp.__init__x  s+    AAJA.AAAAAr   r   r   r   c                    |                      |          }|                     d|d          }t          |          \  }}|*t          |t                    r|dk    rt          d          t          dd|j        j                  }| j	        
                    dd           }|t          |j        j                  }t                                          ||j        |||          }	t          |	|          S )	Nr   Tr   r   r!   softmax_workspacerH   r9   )r"   r#   r   r   r$   r%   r   r&   r>   r<   r=   r	   r   r?   r   )
r'   r   r   r(   r   r)   r*   workspace_bufferr9   r@   s
             r   r,   z"FusedTemperatureSoftmaxOp.__call__{  s    //77oomVdoKK1H1U1U. (?E22 )6E6J6JSTTT)fk.@
 
 (,,\4@@
+FK,>??J#%%--K!
 
 E;///r   )N)r.   r/   r0   r1   r   r2   r3   rA   r4   r   boolr   r   r   r,   __classcell__r   s   @r   r~   r~   c  s         " 
	B

CB B8D> BC B B B B B B0| 0s 0| 0 0 0 0 0 0 0 0r   r~   c                   ^     e Zd ZdZej        Zej        Zd
de	de
f fdZdede
defd	Z xZS )FusedProbsTopKSampleOpa  
    Fused top-k filtering and sampling operator for probabilities.

    Use rejection sampling to directly sample from the top-k probabilities.

    :attr:`TensorType.PROBS` -> :attr:`TensorType.INDICES`

    Parameters
    ----------
    deterministic : bool, optional
        Whether to use deterministic kernel implementation.
    top_k : int or torch.Tensor
        Number of top tokens to keep.
    indices : torch.Tensor, optional
        Indices for batched sampling.
    generator : torch.Generator, optional
        Random number generator.

    See Also
    --------
    :meth:`~flashinfer.sampling.top_k_sampling_from_probs`
    Trr   r<   c                 >     t                      j        dd|i| d S Nrr   r5   r   r'   rr   r<   r   s      r   r   zFusedProbsTopKSampleOp.__init__  +    GG}GGGGGGr   r   r   r   c                    |                      |          }| j                            dd          }|                     d|d          }t	          |          \  }}|*t          |t                    r|dk    rt          d          |                     d|d          }|                     d	|d          }	t                      	                    |j
        |||||	          }
t          |
|          S )
Nrr   TrE   r   r   rF   rs   Frt   )r"   r<   r=   r#   r   r   rK   r%   r   top_k_sampling_from_probsr&   r   )r'   r   r   r(   rr   rE   rM   rN   rs   rt   rw   s              r   r,   zFusedProbsTopKSampleOp.__call__  s    //77+//FF$??%<U%C%C""9c** #.71nnQRRR//)Ve/DDOOK%OHH	%''AAK/9mY
 
 G[111r   Tr.   r/   r0   r1   r   rA   r3   rx   r4   r   r   r   r   r,   r   r   s   @r   r   r     s         . 
	B

CH Hd HS H H H H H H2| 2s 2| 2 2 2 2 2 2 2 2r   r   c                   ^     e Zd ZdZej        Zej        Zd
de	de
f fdZdede
defd	Z xZS )FusedProbsTopPSampleOpa  
    Fused top-p filtering and sampling operator for probabilities.

    Use rejection sampling to directly sample from the top-p probabilities.

    :attr:`TensorType.PROBS` -> :attr:`TensorType.INDICES`

    Parameters
    ----------
    deterministic : bool, optional
        Whether to use deterministic kernel implementation.
    top_p : float or torch.Tensor
        Cumulative probability threshold.
    indices : torch.Tensor, optional
        Indices for batched sampling.
    generator : torch.Generator, optional
        Random number generator.

    See Also
    --------
    :meth:`~flashinfer.sampling.top_p_sampling_from_probs`
    Trr   r<   c                 >     t                      j        dd|i| d S r   r   r   s      r   r   zFusedProbsTopPSampleOp.__init__  r   r   r   r   r   c                    |                      |          }| j                            dd          }|                     d|d          }t	          |          \  }}|d|cxk     rdk    sn t          d          |                     d|d	          }|                     d
|d	          }	t                                          |j        |||||	          }
t          |
|          S )Nrr   Tr[   r   r   r
   r\   rs   Frt   )
r"   r<   r=   r#   r   r%   r   top_p_sampling_from_probsr&   r   )r'   r   r   r(   rr   r[   r^   r_   rs   rt   rw   s              r   r,   zFusedProbsTopPSampleOp.__call__      //77+//FF$??%<U%C%C""A	,>,>,>,>Q,>,>,>,>NOOO//)Ve/DDOOK%OHH	%''AAK/9mY
 
 G[111r   r   r   r   s   @r   r   r              . 
	B

CH Hd HS H H H H H H2| 2s 2| 2 2 2 2 2 2 2 2r   r   c                   ^     e Zd ZdZej        Zej        Zd
de	de
f fdZdede
defd	Z xZS )FusedProbsMinPSampleOpu_  
    Fused min-p filtering and sampling operator for probabilities.

    Use rejection sampling to directly sample from the min-p probabilities.

    PROBS → INDICES

    Parameters
    ----------
    deterministic : bool, optional
        Whether to use deterministic kernel implementation.
    min_p : float or torch.Tensor
        Minimum probability threshold.
    indices : torch.Tensor, optional
        Indices for batched sampling.
    generator : torch.Generator, optional
        Random number generator.

    See Also
    --------
    :meth:`~flashinfer.sampling.min_p_sampling_from_probs`
    Trr   r<   c                 >     t                      j        dd|i| d S r   r   r   s      r   r   zFusedProbsMinPSampleOp.__init__  r   r   r   r   r   c                    |                      |          }| j                            dd          }|                     d|d          }t	          |          \  }}|d|cxk     rdk    sn t          d          |                     d|d	          }|                     d
|d	          }	t                                          |j        |||||	          }
t          |
|          S )Nrr   Trc   r   r   r
   rd   rs   Frt   )
r"   r<   r=   r#   r   r%   r   min_p_sampling_from_probsr&   r   )r'   r   r   r(   rr   rc   rj   rk   rs   rt   rw   s              r   r,   zFusedProbsMinPSampleOp.__call__  r   r   r   r   r   s   @r   r   r      r   r   r   c                   ^     e Zd ZdZej        Zej        Zd
de	de
f fdZdede
defd	Z xZS )FusedProbsTopKTopPSampleOpa9  
    Fused top-k, top-p filtering and sampling operator for probabilities.

    Use rejection sampling to directly sample from the probabilities, top-k and top-p filtering are applied jointly (rather than applying first -> renormalize -> second).

    :attr:`TensorType.PROBS` -> :attr:`TensorType.INDICES`

    Parameters
    ----------
    deterministic : bool, optional
        Whether to use deterministic kernel implementation.
    top_k : int or torch.Tensor
        Number of top tokens to keep.
    top_p : float or torch.Tensor
        Cumulative probability threshold.
    indices : torch.Tensor, optional
        Indices for batched sampling.
    generator : torch.Generator, optional
        Random number generator.

    See Also
    --------
    :meth:`~flashinfer.sampling.top_k_top_p_sampling_from_probs`
    Trr   r<   c                 >     t                      j        dd|i| d S r   r   r   s      r   r   z#FusedProbsTopKTopPSampleOp.__init__P  r   r   r   r   r   c           
      |   |                      |          }| j                            dd          }|                     d|d          }t	          |          \  }}|                     d|d          }t	          |          \  }	}
|*t          |t                    r|dk    rt          d          |	d|
cxk     rdk    sn t          d	          |                     d
|d          }|                     d|d          }t                      	                    |j
        ||||	|
||          }t          ||          S )Nrr   TrE   r   r[   r   rF   r
   r\   rs   Frt   )r"   r<   r=   r#   r   r   rK   r%   r   top_k_top_p_sampling_from_probsr&   r   )r'   r   r   r(   rr   rE   rM   rN   r[   r^   r_   rs   rt   rw   s                 r   r,   z#FusedProbsTopKTopPSampleOp.__call__S  sW   //77+//FF$??%<U%C%C"$??%<U%C%C""9c** #.71nnQRRR"A	,>,>,>,>Q,>,>,>,>NOOO//)Ve/DDOOK%OHH	%''GGK	
 	
 G[111r   r   r   r   s   @r   r   r   3  s         2 
	B

CH Hd HS H H H H H H!2| !2s !2| !2 !2 !2 !2 !2 !2 !2 !2r   r   )"r1   typingr   r   r   r   r   flashinfer.samplingr   flashinfer.utilsr   r	   opr   typesr   r   r   r$   rK   r   r   r7   rC   rS   rY   ra   ro   rz   r~   r   r   r   r   r5   r   r   <module>r      s     / . . . . . . . . . . .  3 3 3 3 3 3 ? ? ? ? ? ? ? ?       + + + + + + + +U\5#%&
8EL!5#445    8  8  8  8  8O  8  8  8F0 0 0 0 0 0 0 08+7 +7 +7 +7 +7/ +7 +7 +7\*8 *8 *8 *8 *8? *8 *8 *8Z"7 "7 "7 "7 "7_ "7 "7 "7J+0 +0 +0 +0 +0_ +0 +0 +0\%2 %2 %2 %2 %2O %2 %2 %2P%2 %2 %2 %2 %2_ %2 %2 %2R20 20 20 20 20 20 20 20j22 22 22 22 22_ 22 22 22j02 02 02 02 02_ 02 02 02f02 02 02 02 02_ 02 02 02fA2 A2 A2 A2 A2 A2 A2 A2 A2 A2r   