
    .`i                        d dl mZ d dlZerd dlmZ dej        deej        ej        ej        eeeef         fdZ	ddd	e
edz           d
edededej        deej        ej        ej        ej        e
e         f         fdZdS )    )TYPE_CHECKINGN)LoRAMappingtoken_lora_tensorreturnc                    t          j        | d          \  }}t          j        |d          }t          j        |          }|dd                             |dd                    |                                                                }|                                                                }|                    d          }d}|dk    r|dk    rd}|||||||fS )	at  
    Get the information required for the sgmv kernel. With the  features:
    1. If consecutive requests in the batch use the same LoRA, this function
    will combine them into a single request, improving sgmv kernel inference
    performance.
    2. At the beginning of each prefill stage inference, recalculations are
    needed based on the input, but only once.
    T)return_countsr   )dim   NF)	torchunique_consecutivecumsum
zeros_likecopy_maxitemsumsize)	r   lora_indices_tensorseq_length_tensor
cum_resultb_seq_start_tensor
max_length
token_nums
batch_sizeno_loras	            r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/lora/punica_wrapper/utils.pycompute_metar      s    .3-E. . .** /Q777J)*;<<qrr  CRC111"&&((--//J"&&((--//J$))!,,JG Q."44     mappingr   lora_index_to_id	max_loras
vocab_sizeextra_vocab_sizedevicec                    t          | j                                                  }|                                }|                                }fd| j        D             }	d}
t	          t          |                    D ]C}||         dk    r                    ||                   nd}
||         dk    r|
nd||<   |
||<   D|||g}t          j        |t          j	        |          }t          j        |	t          j	        |          }t          j
        |d         |z  |d         ||z   z  g          }t          j        |dk    |dz
  |          }|d         }|}|                                }t          j        |dk    |dz
  |          }t          j        dt          |          |t          j	                  |t          |          z  z   }|j        d         |j        d         |j        d         |j        d         g}|||||fS )	ax  Converts LoRAMapping to index tensors.

    Args:
        mapping: LoRAMapping mapping rows in a batch to LoRA ids.
        lora_index_to_id: List mapping LoRA ids to LoRA indices.
        max_loras: Maximum number of LoRAs.
        vocab_size: Model vocab size.
        extra_vocab_size: Extra vocab size each LoRA can have.

    Returns:
        A tuple of tensors:
            base_indices: Tensor of shape [batch_size] mapping batch rows to
                LoRA indices.
            sampler_indices: Tensor of shape [batch_size] mapping requests to
                LoRA indices for sampler. For generation, this will be the
                same as base_indices. For prefill, this will map requests
                to LoRA indices.
            sampler_indices_padded: Tensor of shape [batch_size] mapping
                requests to LoRA indices for sampler with padding.
                Same as sampler_indices, but -1 is replaced with
                max_loras.
            embeddings_indices: Tensor of shape [2, batch_size] mapping
                requests to embedding indices. First row is for embeddings
                added by the LoRAs, second row is for the LoRA.lora_a
                embeddings.
            indices_len: List of lengths of the above tensors. It contains
                (base_indices, sampler_indices, sampler_indices_padded,
                embeddings_indices).
    c                 J    g | ]}|d k    r                     |          nd S )r   r   )index).0xr!   s     r   
<listcomp>z#convert_mapping.<locals>.<listcomp>]   sB     ! ! !78QUUq!!!! ! !r   Nr   r   )dtyper%      r
   )r%   r,   )listindex_mappingcopyprompt_mappingrangelenr(   r   tensorlongstackwhereclonearangeshape)r    r!   r"   r#   r$   r%   index_mapping_indicesembedding_indiceslora_indicesr1   lora_idxiindices_listindicesprompt_mapping_tensorembeddings_indicesbase_indicessampler_indicessampler_indices_paddedindices_lens    `                  r   convert_mappingrH   4   s{   J (,G,A'B'B'G'G'I'I-2244(--//L! ! ! !<C<R! ! !N H3,--.. # # %Q'!++ ""#8#;<<< 	
 ,A+Ca+G+GxxQ!"Q 	4L l<uz&IIIG!Lej   AJ))AJ*'778	
  b )a-1C  1:L+O,2244"["$i!m5K  #\	3%&&vUZ  	#&<"="=	=? 	2b!$R( $	K 	 r   )typingr   r   vllm.lora.layersr   Tensortupleintboolr   r.   r%   rH    r   r   <module>rP      s   !            -,,,,,,#|#
5<u|S#sDHI# # # #Nbb3:&b b 	b
 b Lb 5<u|U\49LMb b b b b br   