
    
`i{	                         d dl mZmZmZ d dlZd dlmZ ddl	m
Z
 ddlmZmZmZ d dlmZ ej        dej        dej        d	ej        fd
            Zdej        deej                 d	ej        fdZdS )    )TupleListcastN   )	LLMatcher)get_bitmask_shapeallocate_token_bitmaskfill_next_token_bitmask)NDArraydatamaskreturnc                 <   d}t           j                            dg ddg|          }t          j        t	          d           g| j                  } || ||gd| j        fg| j        d	         | j        d
         d	fd| j        g| j        g          }|d
         }|S )Na  
        uint batch = thread_position_in_grid.y;  // Batch index
        uint elem = thread_position_in_grid.x;   // Element index within batch

        // Bounds check to prevent out-of-bounds access
        // assert(batch < inp_shape[0] && elem < inp_shape[1]);

        uint word_idx = elem / 32;  // Which u32 word
        uint bit_idx = elem % 32;   // Which bit in the word

        // Bounds check for mask access
        // assert(word_idx < mask_shape[1] && batch < mask_shape[0]);

        uint bit = word_idx < mask_shape[1] && (mask[batch * mask_shape[1] + word_idx] >> bit_idx) & 1;
        out[batch * inp_shape[1] + elem] = bit ? inp[batch * inp_shape[1] + elem] : neg_inf[0];
    bitmask_apply_batched)inpr   neg_infout)nameinput_namesoutput_namessourceinf)dtypeTr   r   )   r   r   )inputstemplategridthreadgroupoutput_shapesoutput_dtypes)mxfastmetal_kernelarrayfloatr   shape)r   r   r   kernelr   outputsas          b/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/llguidance/mlx.pyapply_token_bitmask_kernelr,      s    F" W!!$...W	 "  F hudj999GfdG$
#$jmTZ]zlzl  G !*AH    logitsmask_npc                     t          j        |          }t          | j                  dk    rt          j        | d          } t          |j                  dk    rt          j        |d          }|j        t           j        k    s
J d            t          | j                  dk    s
J d            | j        \  }}|j        \  }}||k    s
J d            t          t           j        t          | |                    }|S )Nr   r   )axiszMask must be int32   zLogits must be 2DzBatch size mismatch)	r"   r%   lenr'   expand_dimsr   int32r   r,   )r.   r/   r   batchvocabm_batchm_vocabrs           r+   apply_token_bitmaskr;   6   s    8GD
6<AQ///
4:!~d+++:!!!#7!!!v|!!!#6!!!<LE5zGWG2RX1&$??@@AHr-   )typingr   r   r   numpynpmlx.corecorer"   _libr   r   r	   r
   numpy.typingr   custom_functionr%   r,   r5   r;    r-   r+   <module>rE      s   % $ $ $ $ $ $ $ $ $                 U U U U U U U U U U             'RX 'RX '"( ' ' ' 'T !(!279x     r-   