
    .`i                     f    d dl mZmZ ej        d             Zej        dej        fd            ZdS )    )tltritonc                    t          j        d          }||k    rdS t          j        | |z             }d}|dk    r|}nt          j        | |z   dz
            }	||	z
  }t          j        ||z             }
|dz   |
z
  }t          j        |dk    |d          }t          j        ||z   dz             dz
  }||z
  }t          j        ||z   |           t          j        ||z   |           dS )a  
    Fused kernel for Eagle prepare_input_padded. This kernel computes the
    token index to sample for each request, taking into account the number
    of draft tokens and the number of valid sampled tokens (which is one more than
    the number of accepted tokens).
    r   axisN   )r   
program_idloadwherestore)cu_num_draft_tokens_ptrvalid_sampled_tokens_count_ptrquery_start_loc_gpu_ptrtoken_indices_to_sample_ptrnum_rejected_tokens_gpu_ptrnum_reqsreq_idxcu_draft_currnum_draft_tokenscu_draft_prevvalid_countnum_rejected_tokensq_last_tok_idxindex_to_samples                 m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/spec_decode/utils.py"eagle_prepare_inputs_padded_kernelr      s    m###G( G3g=>>M!||( 7' AA EFF(=8'87BCCK*Q.<(#3a#79LaPP W4w>BCCaGN$'::OH(72ODDDH(724GHHHHH    BLOCK_SIZE_TOKENSc
                 |   t          j        d          }
|
|k    rdS t          j        ||
z             }|rjt          j        ||
z             }t          j        ddt           j                  }t          j        ||
z   |           t          j        ||
z   |           dS t          j        d|	          }||k     }| |
|z  z   }t          j        ||z   |d          }|dk    ||k     z  |z  }t          j        |          }|dk    rmt          j        t          j	        ||d                    }t          j        t          j	        ||k    |d                    }t          j        ||
z   |           n/t          j        ||
z             }t          j        ||
z   |           t          j        ||
z   |           dS )a  
    Fused kernel for Eagle prepare_next_token_ids_padded. This kernel computes the
    number of valid (1 + accepted) tokens for each request, and the corresponding
    "next" token id to sample from during speculative decoding. This is the
    "last accepted token" from the sampled tokens, or the backup token if no
    tokens were accepted or if the request is marked as discarded.
    r   r   N )dtype)maskother)
r   r	   r
   fulluint32r   arangesummaxr   )sampled_token_ids_ptrdiscard_request_mask_ptrbackup_next_token_ids_ptrnext_token_ids_ptrr   
vocab_sizenum_sampled_tokens_per_reqr   stride_sampled_token_idsr   r   is_discardedbackup_tokenr   
token_offs
token_maskrow_ptr	token_idsis_valid_masklast_valid_indexlast_valid_tokens                        r   &eagle_prepare_next_token_padded_kernelr:   1   s   ( m###G( 73g=>>L !Hw87BCCgb!29555
#g-|<<<
/'9;GGGGG Yq"344
"<<
''4L*LLGGj0zLLL	 #bY-CDzQf]++??  "vbh}j"&M&MNN  "v'77AFF    H''13CDDDD 7#<w#FGGLH''1<@@@
/'9;GGGGGr   N)vllm.triton_utilsr   r   jitr   	constexprr:   r    r   r   <module>r>      s    ) ( ( ( ( ( ( ( 'I 'I 'IT ;H |;H ;H ;H ;H ;H ;Hr   