
    .`i/                         d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ dd	lmZ  G d
 de          ZdS )    N)PretrainedConfig)
LoRAConfig)get_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)LogitsProcessor)VocabParallelEmbedding)current_platform   )BaseLayerWithLoRAc                   ,    e Zd ZdZdededej        dej        de	e         dz  ddf fd	Z
ed
             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Z	 d#dedededz  ddfdZdefdZdedej        e	ej                 z  dej        e	ej                 z  fdZ	 d#dej        dedej        dz  dej        dz  fdZd Ze	 d#d ej        ded!e	dedz  de f
d"            Z! xZ"S )$LogitsProcessorWithLoRAa  
    LoRA wrapper for LogitsProcessor, with extra logic to handle the
    application of the LoRA adapter and added LoRA vocabulary.

    Args:
        base_layer: LogitsProcessor layer
        hidden_size: hidden size of the model
        dtype: data type of the model
        device: device of the model
        sharded_to_full_mapping: index mapping from sharded vocab to full vocab
            received from base_layer.get_sharded_to_full_mapping(). If None,
            no reindexing will be done.
    
base_layerhidden_sizedtypedevicesharded_to_full_mappingNreturnc                     t                                                       || _        || _        || _        || _        t                      | _        t                      | _	        || _
        d S N)super__init__r   r   r   r   r   tp_sizer   tp_rankr   )selfr   r   r   r   r   	__class__s         u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/lora/layers/logits_processor.pyr   z LogitsProcessorWithLoRA.__init__$   s`     	$&
;==577'>$$$    c                     | j         j        S r   )r   logits_as_inputr   s    r   r   z'LogitsProcessorWithLoRA.logits_as_input5   s    ..r   c                     | j         j        S r   )r   
vocab_sizer    s    r   r"   z"LogitsProcessorWithLoRA.vocab_size9   s    ))r   c                     | j         j        S r   )r   scaler    s    r   r$   zLogitsProcessorWithLoRA.scale=   s    $$r   c                     | j         j        S r   )r   soft_capr    s    r   r&   z LogitsProcessorWithLoRA.soft_capA   s    ''r   c                     | j         j        S r   )r   use_all_gatherr    s    r   r(   z&LogitsProcessorWithLoRA.use_all_gatherE       --r   c                     | j         j        S r   )r   org_vocab_sizer    s    r   r+   z&LogitsProcessorWithLoRA.org_vocab_sizeI   r)   r   c                     | j         j        S r   )r   include_gpu_probs_tensorr    s    r   r-   z0LogitsProcessorWithLoRA.include_gpu_probs_tensorM   s    77r   c                     | j         j        S r   )r   "should_modify_greedy_probs_inplacer    s    r   r/   z:LogitsProcessorWithLoRA.should_modify_greedy_probs_inplaceQ   s    AAr   	max_loraslora_configmodel_configc                    d| j         j        cxk     rdk    rn nt          d          t          j        |d|j        | j        f|j        | j                  | _	        t          j        |d| j         j        |j        f|j        | j                  | _
        | j        2t          j        | j        | j        t          j                  | _        d S d | _        d S )Ni }  i  zAWhen using LoRA, vocab size must be 32000 >= vocab_size <= 257024r
   )r   r   )r   r   )r   r"   
ValueErrortorchzerosmax_lora_rankr   
lora_dtyper   lora_a_stackedlora_b_stackedr   tensorlongsharded_to_full_mapping_gpu)r   r0   r1   r2   s       r   create_lora_weightsz+LogitsProcessorWithLoRA.create_lora_weightsU   s    4?-666666666S   $k) 	 (;	
 	
 	
 $k*)	 (;	
 	
 	
 '3/4|,T[
0 0 0D,,, 04D,,,r   indexc                 .    d| j         |<   d| j        |<   d S )Nr   )r9   r:   )r   r?   s     r   
reset_loraz"LogitsProcessorWithLoRA.reset_lora|   s"    %&E"%&E"""r   lora_alora_bc                    t          |t          j                  sJ t          |t          j                  sJ |                     |           | j        |dd |j        d         d |j        d         f                             |d           | j        |dd |j        d         d |j        d         f                             |d           d S )Nr   r
   T)non_blocking)
isinstancer5   TensorrA   r9   shapecopy_r:   )r   r?   rB   rC   s       r   set_loraz LogitsProcessorWithLoRA.set_lora   s     &%,/////&%,/////E1&7Q&79J6<?9JJKQQ 	R 	
 	
 	
 	E1&7Q&79J6<?9JJKQQ 	R 	
 	
 	
 	
 	
r   hidden_stateslm_headembedding_biasc                 `   |j                             ||          }|||z  }| j                            |          }|d S | j        |d d | j        f         }| j                            ||| j        | j        d          }t          j
                    s|}|d d d | j        j        f         }|S )Ng      ?)quant_methodapplyr   _gather_logitsr=   punica_wrapperadd_lora_logitsr9   r:   r	   can_update_inplacer"   )r   rK   rL   rM   logitslora_outputs         r   _get_logitsz#LogitsProcessorWithLoRA._get_logits   s     %++G]CC%n$F //77>4+7" AAAt??@F+/+>+N+NM4#68KS,
 ,
  244 	! F 7T_7778r   c                 H     t          | j                  j        | g|R i |S r   )typer   forward)r   argskwargss      r   rZ   zLogitsProcessorWithLoRA.forward   s/    ,tDO$$,TCDCCCFCCCr   source_layerpacked_modules_listc                     dS )NF )clsr]   r1   r^   r2   s        r   can_replace_layerz)LogitsProcessorWithLoRA.can_replace_layer   s	     ur   r   )#__name__
__module____qualname____doc__r   intr5   r   r   listr   propertyr   r"   r$   r&   r(   r+   r-   r/   r   r   r>   rA   rG   rJ   r   rW   rZ   classmethodnnModuleboolrb   __classcell__)r   s   @r   r   r      s        ?#? ? {	?
 ? "&cT!1? 
? ? ? ? ? ?" / / X/ * * X* % % X% ( ( X( . . X. . . X. 8 8 X8 B B XB 15	%4 %4%4  %4 '-	%4
 
%4 %4 %4 %4N' ' ' ' '

 tEL11
 tEL11	
 
 
 
( /3	- -|- (- t+	-
 
	- - - -^D D D  15 i   "	
 '- 
   [    r   r   )r5   torch.nnrk   transformersr   vllm.config.lorar   vllm.distributedr   r   +vllm.model_executor.layers.logits_processorr   3vllm.model_executor.layers.vocab_parallel_embeddingr   vllm.platformsr	   baser   r   r`   r   r   <module>rw      s   
        ) ) ) ) ) ) ' ' ' ' ' '        H G G G G G V V V V V V + + + + + + # # # # # #v v v v v/ v v v v vr   