
    )`i8                     
   d Z ddlmZmZ ddlmZmZmZ ddlm	Z	 ddl
mZ  G d de          Z G d	 d
e          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          ZdS )a3  
Copyright (c) 2025 by FlashInfer team.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
    )ABCabstractmethod)AnyListOptional   )Op)
TensorTypec                   V    e Zd ZdZdefdZededee	         fd            Z
defdZdS )	LogitsProcessora  
    LogitsProcessor defines high-level transformations that can be applied to
    logits or probabilities. Each processor is automatically
    legalized into low-level :class:`Op` or :class:`ParameterizedOp` that can be type-checked, validated, and
    fused for optimal performance. Users can extend this class to implement their own processors.

    Parameters
    ----------
    **params : Any
        Processor-specific parameters at compile-time.

    Examples
    --------
    >>> import torch
    >>> from flashinfer.logits_processor import LogitsPipe, TopK, Sample, TensorType
    >>> torch.manual_seed(42)
    >>>
    >>> # Create a pipeline that legalizes to a fused op.
    >>> pipe = LogitsPipe([
    ...     TopK(),         # Top-k filtering on logits
    ...     Sample()        # Sample from the filtered distribution
    ... ], input_type=TensorType.PROBS)  # assume the input is probabilities
    >>>
    >>> pipe
    LogitsPipe([TopK -> Sample], ops=[ProbsTopKOp -> ProbsSampleOp], compiled_ops=[FusedProbsTopKSampleOp])

    Notes
    -----
    Subclasses must implement the :meth:`legalize` method to convert the high-level
    processor into one or more low-level operators with specific input/output types
    paramsc                     || _         dS )z
        Initialize the processor.

        Parameters
        ----------
        **params : Any
            Processor-specific parameters at compile-time.
        N)r   )selfr   s     z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/flashinfer/logits_processor/processors.py__init__zLogitsProcessor.__init__9   s         
input_typereturnc                     t           )a-  
        Legalize the processor into a list of low-level operators.

        Parameters
        ----------
        input_type : TensorType
            The expected input tensor type of the processor.

        Returns
        -------
        List[Op]
            A list of low-level operators.
        )NotImplementedError)r   r   s     r   legalizezLogitsProcessor.legalizeD   s
     "!r   c                     d                     d | j                                        D                       }| j        j         d| dS )Nz, c              3   *   K   | ]\  }}| d | V  dS )=N ).0kvs      r   	<genexpr>z+LogitsProcessor.__repr__.<locals>.<genexpr>V   s0      JJda!zzazzJJJJJJr   ())joinr   items	__class____name__)r   
params_strs     r   __repr__zLogitsProcessor.__repr__U   sL    YYJJdk6G6G6I6IJJJJJ
.)99J9999r   N)r%   
__module____qualname____doc__r   r   r   r
   r   r	   r   strr'   r   r   r   r   r      s         @	 	 	 	 	 ": "$r( " " " ^" :# : : : : : :r   r   c                   D     e Zd ZdZdef fdZdedee         fdZ	 xZ
S )Temperaturea7  
    Temperature scaling processor for logits.

    Scales logits by dividing by a temperature value.

    :attr:`TensorType.LOGITS` -> :attr:`TensorType.LOGITS`

    Parameters
    ----------
    temperature : float or torch.Tensor, Runtime
        Temperature value for scaling. Must be positive. Can be a scalar or per-batch tensor.

    Examples
    --------
    >>> import torch
    >>> from flashinfer.logits_processor import LogitsPipe, Temperature, Sample
    >>> torch.manual_seed(42)
    >>> pipe = LogitsPipe([Temperature()])
    >>> logits = torch.randn(2, 2, device="cuda")
    >>> logits
    tensor([[ 0.1940,  2.1614], [ -0.1721,  0.8491]], device='cuda:0')
    >>> scaled_logits = pipe(logits, temperature=0.8)
    >>> scaled_logits
    tensor([[ 0.2425,  2.7017], [-0.2151,  1.0613]], device='cuda:0')
    r   c                 :     t                      j        di | dS )z_
        Constructor for Temperature processor. No compile-time parameters are needed.
        Nr   superr   r   r   r$   s     r   r   zTemperature.__init__u   (     	""6"""""r   r   r   c                 n    ddl m} |t          j        k    rt	          d|            |di | j        gS )L
        Legalize the processor into a list of low-level operators.
        r   )TemperatureOpz/Temperature can only be applied to LOGITS, got r   )	operatorsr5   r
   LOGITS
ValueErrorr   )r   r   r5   s      r   r   zTemperature.legalize{   s^     	-,,,,,***N*NN   ,,,,--r   r%   r(   r)   r*   r   r   r
   r   r	   r   __classcell__r$   s   @r   r-   r-   Z   st         4# # # # # # #.: .$r( . . . . . . . .r   r-   c                   V     e Zd ZdZd	dee         def fdZdede	e
         fdZ xZS )
Softmaxa3  
    Softmax processor to convert logits to probabilities.

    Applies the softmax function.

    :attr:`TensorType.LOGITS` -> :attr:`TensorType.PROBS`

    Parameters
    ----------
    enable_pdl : bool, optional, Compile-time
        Whether to enable PDL for the kernel implementation.
        Default is True.

    Examples
    --------
    >>> import torch
    >>> from flashinfer.logits_processor import LogitsPipe, Softmax, Sample
    >>> torch.manual_seed(42)
    >>> pipe = LogitsPipe([Softmax()])
    >>> logits = torch.randn(2, 2, device="cuda")
    >>> logits
    tensor([[ 0.1940,  2.1614], [ -0.1721,  0.8491]], device='cuda:0')
    >>> probs = pipe(logits)
    >>> probs
    tensor([[0.1227, 0.8773], [0.2648, 0.7352]], device='cuda:0')

    Notes
    -----
    Can only appear once in a pipeline.
    N
enable_pdlr   c                 >     t                      j        dd|i| dS )aC  
        Constructor for Softmax processor.

        Parameters
        ----------
        enable_pdl : bool, optional, Compile-time
            Whether to enable PDL for the kernel implementation.
            Default is None, which means the kernel will be automatically enabled if PDL is supported on the device.
        r>   Nr   r/   )r   r>   r   r$   s      r   r   zSoftmax.__init__   s-     	99J9&99999r   r   r   c                 n    ddl m} |t          j        k    rt	          d|            |di | j        gS )r4   r   )	SoftmaxOpz+Softmax can only be applied to LOGITS, got r   )r6   rA   r
   r7   r8   r   )r   r   rA   s      r   r   zSoftmax.legalize   sV     	)(((((***W:WWXXX	((DK(())r   )N)r%   r(   r)   r*   r   boolr   r   r
   r   r	   r   r:   r;   s   @r   r=   r=      s         >
: 
:8D> 
:C 
: 
: 
: 
: 
: 
:	*: 	*$r( 	* 	* 	* 	* 	* 	* 	* 	*r   r=   c                   J     e Zd ZdZd	dedef fdZdedee	         fdZ
 xZS )
TopKa*  
    Top-k filtering processor.

    Keeps only the top-k highest probability tokens and masks out the rest.

    :attr:`TensorType.LOGITS` -> :attr:`TensorType.LOGITS` | :attr:`TensorType.PROBS` -> :attr:`TensorType.PROBS`

    Parameters
    ----------
    joint_topk_topp : bool, optional, Compile-time
        Whether to enable joint top-k and top-p filtering when followed by TopP.
        Default is False.

    top_k : int or torch.Tensor, Runtime
        Number of top tokens to keep. Can be a scalar or per-batch tensor.

    Examples
    --------
    >>> import torch
    >>> from flashinfer.logits_processor import LogitsPipe, TopK, Sample, TensorType
    >>> torch.manual_seed(42)
    >>>
    >>> # Top-k filtering on logits
    >>> pipe = LogitsPipe([TopK()], input_type=TensorType.LOGITS)
    >>> logits = torch.randn(2, 2, device="cuda")
    >>> logits
    tensor([[ 0.1940,  2.1614], [ -0.1721,  0.8491]], device='cuda:0')
    >>> topk_logits = pipe(logits, top_k=1)
    >>> topk_logits
    tensor([[  -inf, 2.1614], [  -inf, 0.8491]], device='cuda:0')
    >>>
    >>> # Top-k filtering on probabilities
    >>> pipe = LogitsPipe([TopK()], input_type=TensorType.PROBS)
    >>> probs = torch.randn(2, 2, device="cuda")
    >>> probs_normed = probs / probs.sum(dim=-1, keepdim=True)
    >>> probs_normed
    tensor([[  4.4998,  -3.4998], [-18.2893,  19.2893]], device='cuda:0')
    >>> topk_probs = pipe(probs_normed, top_k=1)
    >>> topk_probs
    tensor([[1., 0.], [0., 1.]], device='cuda:0')

    Notes
    -----
    When applied to :attr:`TensorType.LOGITS`, sets non-top-k values to -inf.
    When applied to :attr:`TensorType.PROBS`, zeros out non-top-k values and renormalizes.

    See Also
    --------
    :meth:`~flashinfer.sampling.top_k_mask_logits`
    :meth:`~flashinfer.sampling.top_k_renorm_probs`
    Fjoint_topk_toppr   c                 >     t                      j        dd|i| dS )a  
        Constructor for TopK processor.

        Parameters
        ----------
        joint_topk_topp : bool, optional, Compile-time
            Whether to enable joint top-k and top-p filtering when followed by TopP.
            Default is False.
        rE   Nr   r/   )r   rE   r   r$   s      r   r   zTopK.__init__   s-     	CCCFCCCCCr   r   r   c                     ddl m}m} |t          j        k    r |di | j        gS |t          j        k    r |di | j        gS t          d|           )r4   r   )LogitsTopKOpProbsTopKOpzTopK cannot be applied to Nr   )r6   rH   rI   r
   r7   r   PROBSr8   )r   r   rH   rI   s       r   r   zTopK.legalize  s     	98888888*** L//4;//00:+++K..$+..//F*FFGGGr   )Fr%   r(   r)   r*   rB   r   r   r
   r   r	   r   r:   r;   s   @r   rD   rD      s        2 2h
D 
D 
D 
D 
D 
D 
D 
D 
DH: H$r( H H H H H H H Hr   rD   c                   D     e Zd ZdZdef fdZdedee         fdZ	 xZ
S )TopPa  
    Top-p (nucleus) filtering processor.

    Keeps tokens with cumulative probability up to threshold p.

    :attr:`TensorType.PROBS` -> :attr:`TensorType.PROBS`

    Parameters
    ----------
    top_p : float or torch.Tensor, Runtime
        Cumulative probability threshold in (0, 1]. Can be a scalar or per-batch tensor.

    Examples
    --------
    >>> import torch
    >>> from flashinfer.logits_processor import LogitsPipe, Softmax, TopP, Sample
    >>> torch.manual_seed(42)
    >>> pipe = LogitsPipe([TopP()])
    >>> probs = torch.randn(2, 2, device="cuda")
    >>> probs_normed = probs / probs.sum(dim=-1, keepdim=True)
    >>> probs_normed
    tensor([[ 0.0824,  0.9176], [-0.2541,  1.2541]], device='cuda:0')
    >>> topp_probs = pipe(probs_normed, top_p=0.9)
    >>> topp_probs
    tensor([[0., 1.], [0., 1.]], device='cuda:0')

    See Also
    --------
    :meth:`~flashinfer.sampling.top_p_renorm_probs`
    r   c                 :     t                      j        di | dS )zX
        Constructor for TopP processor. No compile-time parameters are needed.
        Nr   r/   r1   s     r   r   zTopP.__init__0  r2   r   r   r   c                 n    ddl m} |t          j        k    rt	          d|            |di | j        gS )r4   r   )TopPOpz'TopP can only be applied to PROBS, got r   )r6   rP   r
   rJ   r8   r   )r   r   rP   s      r   r   zTopP.legalize6  V     	&%%%%%)))SzSSTTT%%%%&&r   r9   r;   s   @r   rM   rM     st         ># # # # # # #	': 	'$r( 	' 	' 	' 	' 	' 	' 	' 	'r   rM   c                   D     e Zd ZdZdef fdZdedee         fdZ	 xZ
S )MinPa  
    Min-p filtering processor.

    Keeps tokens with probability at least p times the maximum probability.

    :attr:`TensorType.PROBS` -> :attr:`TensorType.PROBS`

    Parameters
    ----------
    min_p : float or torch.Tensor, Runtime
        Minimum probability threshold as a ratio of max probability.
        Must be in (0, 1]. Can be a scalar or per-batch tensor.

    Examples
    --------
    >>> import torch
    >>> from flashinfer.logits_processor import LogitsPipe, Softmax, MinP, Sample
    >>> torch.manual_seed(42)
    >>> pipe = LogitsPipe([MinP()])
    >>> probs = torch.randn(2, 2, device="cuda")
    >>> probs_normed = probs / probs.sum(dim=-1, keepdim=True)
    >>> probs_normed
    tensor([[ 0.0824,  0.9176], [-0.2541,  1.2541]], device='cuda:0')
    >>> minp_probs = pipe(probs_normed, min_p=0.05)
    >>> minp_probs
    tensor([[0.0824, 0.9176], [0.0000, 1.0000]], device='cuda:0')

    r   c                 :     t                      j        di | dS )zX
        Constructor for MinP processor. No compile-time parameters are needed.
        Nr   r/   r1   s     r   r   zMinP.__init__`  r2   r   r   r   c                 n    ddl m} |t          j        k    rt	          d|            |di | j        gS )r4   r   )MinPOpz'MinP can only be applied to PROBS, got r   )r6   rV   r
   rJ   r8   r   )r   r   rV   s      r   r   zMinP.legalizef  rQ   r   r9   r;   s   @r   rS   rS   B  st         :# # # # # # #	': 	'$r( 	' 	' 	' 	' 	' 	' 	' 	'r   rS   c                   J     e Zd ZdZd	dedef fdZdedee	         fdZ
 xZS )
Samplea  
    Sampling processor to generate token indices.

    Samples tokens from logits or probability distributions.

    :attr:`TensorType.LOGITS` -> :attr:`TensorType.INDICES` | :attr:`TensorType.PROBS` -> :attr:`TensorType.INDICES`

    Parameters
    ----------
    deterministic : bool, optional, Compile-time
        Whether to use deterministic kernel implementation.
        Default is True.

    indices : torch.Tensor, optional, Runtime
        Indices for batched sampling when probability tensors are shared.
    generator : torch.Generator, optional, Runtime
        Random number generator for reproducible sampling.

    Examples
    --------
    >>> import torch
    >>> from flashinfer.logits_processor import LogitsPipe, Sample, TensorType
    >>> torch.manual_seed(42)
    >>>
    >>> # Sampling from logits
    >>> pipe = LogitsPipe([Sample(deterministic=True)], input_type=TensorType.LOGITS)
    >>> logits = torch.randn(2, 5, device="cuda")
    >>> logits
    tensor([[ 0.1940,  2.1614, -0.1721,  0.8491, -1.9244],
            [ 0.6530, -0.6494, -0.8175,  0.5280, -1.2753]], device='cuda:0')
    >>> tokens = pipe(logits, top_k=1)
    >>> tokens
    tensor([0, 1], device='cuda:0')
    >>>
    >>> # Sampling from probabilities
    >>> pipe = LogitsPipe([Sample(deterministic=True)], input_type=TensorType.PROBS)
    >>> probs = torch.randn(2, 5, device="cuda")
    >>> probs_normed = probs / probs.sum(dim=-1, keepdim=True)
    >>> probs_normed
    tensor([[ 2.8827,  0.0870,  0.2340, -3.2731,  1.0694],
            [ 0.3526,  0.0928,  0.1601, -0.1737,  0.5683]], device='cuda:0')
    >>> tokens = pipe(probs_normed, top_k=1)
    >>> tokens
    tensor([0, 0], device='cuda:0')

    Notes
    -----
    Outputs :attr:`TensorType.INDICES` - no operators can follow

    See Also
    --------
    :meth:`~flashinfer.sampling.sampling_from_logits`
    :meth:`~flashinfer.sampling.sampling_from_probs`
    Tdeterministicr   c                 >     t                      j        dd|i| dS )z
        Constructor for Sample processor.

        Parameters
        ----------
        deterministic : bool, optional
            Whether to use deterministic kernel implementation.
            Default is True.
        rY   Nr   r/   )r   rY   r   r$   s      r   r   zSample.__init__  s-     	??}??????r   r   r   c                     ddl m}m} |t          j        k    r |di | j        gS |t          j        k    r |di | j        gS t          d|           )r4   r   )LogitsSampleOpProbsSampleOpzSampling cannot be applied to Nr   )r6   r\   r]   r
   r7   r   rJ   r8   )r   r   r\   r]   s       r   r   zSample.legalize  s     	=<<<<<<<***"N11T[1122:+++!M00DK0011JjJJKKKr   )TrK   r;   s   @r   rX   rX   r  s        5 5n
@ 
@d 
@S 
@ 
@ 
@ 
@ 
@ 
@L: L$r( L L L L L L L Lr   rX   N)r*   abcr   r   typingr   r   r   opr	   typesr
   r   r-   r=   rD   rM   rS   rX   r   r   r   <module>rb      s     $ # # # # # # # & & & & & & & & & &            ?: ?: ?: ?: ?:c ?: ?: ?:D,. ,. ,. ,. ,./ ,. ,. ,.^5* 5* 5* 5* 5*o 5* 5* 5*pLH LH LH LH LH? LH LH LH^/' /' /' /' /'? /' /' /'d-' -' -' -' -'? -' -' -'`OL OL OL OL OL_ OL OL OL OL OLr   