
    .`i+                         d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ ddlmZ dd	lmZmZ  G d
 de          Z G d de          ZdS )    N)PretrainedConfig)
LoRAConfig)split_tensor_along_last_dim tensor_model_parallel_all_reduce)RowParallelLinear)current_platform   )BaseLinearLayerWithLoRA)_fully_sharded_can_replace_not_fully_sharded_can_replacec                       e Zd Zdeddf fdZdej        dej        fdZdej        dej        fdZd	ej        dej        e	ej        ej        dz  f         z  fd
Z
ee	 ddej        dedededz  def
d                        Z xZS )RowParallelLinearWithLoRA
base_layerreturnNc                     t                                          |           | j        j        | _        | j        j        | _        d| _        d S Nr	   )super__init__r   input_size_per_partition
input_sizeoutput_sizen_slices)selfr   	__class__s     x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/lora/layers/row_parallel_linear.pyr   z"RowParallelLinearWithLoRA.__init__   sA    $$$ /B?6    lora_ac                 ^    | j         }| j        |z  }| j        dz   |z  }|d d ||f         }|S r   )r   tp_rank)r   r   
shard_size	start_idxend_idxs        r   slice_lora_az&RowParallelLinearWithLoRA.slice_lora_a   sC    _
L:-	<!#z19W,,-r   lora_bc                     |S N )r   r$   s     r   slice_lora_bz&RowParallelLinearWithLoRA.slice_lora_b&   s    r   input_c                    | j         j        r|}n5t          || j                  }|| j                                                 }| j        dk    s| j         j        rdn| j         j        }|                     ||          }| j         j	        r| j        dk    rt          |          }n|}| j         j        r| j         j        nd}| j         j        s|S ||fS )a*  Forward of RowParallelLinear

        Args:
            input_: tensor whose last dimension is `input_size`. If
                    `input_is_parallel` is set, then the last dimension
                    is `input_size // tp_size`.

        Returns:
            - output
            - bias
        )num_partitionsr   Nr	   )r   input_is_parallelr   tp_sizer   
contiguousskip_bias_addbiasapplyreduce_resultsr   return_bias)r   r)   input_parallelsplitted_inputbias_output_paralleloutputoutput_biass           r   forwardz!RowParallelLinearWithLoRA.forward)   s     ?, 	G#NN 9t|  N ,DL9DDFFN
 q  DO$A  D% 	
 **^U;;?) 	%dlQ.>.>5oFFFF$F.2o.KUdo**QU* 	M{""r   source_layerlora_configpacked_modules_listmodel_configc                 .    t          |          t          u S r&   )typer   )clsr;   r<   r=   r>   s        r   can_replace_layerz+RowParallelLinearWithLoRA.can_replace_layerS   s     L!!%666r   r&   )__name__
__module____qualname__r   r   torchTensorr#   r(   tupler:   classmethodr   nnModuler   listr   boolrB   __classcell__r   s   @r   r   r      sE       #4       5< EL    5< EL    (#l(#	elEL4,??@	@(# (# (# (#T # 157 7i7  7 "	7
 '-7 
7 7 7 $# [7 7 7 7 7r   r   c                        e Zd ZdZdej        dej        fdZddej        dej        dz  dej        fdZee		 dd	e
j        d
edededz  def
 fd                        Z xZS ) RowParallelLinearWithShardedLoRAa  
    Differs from RowParallelLinearWithLoRA by slicing the
    LoRA B's also.

    Based on S-LoRA, slicing happens along the output dim.
    This yields a combined partial sum from the row parallel base
    layer and column partitioned output from the LoRA.
    r$   r   c                     | j         d         j        d         }| j        |z  }| j        dz   |z  }|||d d f         }|S )Nr      r	   )lora_b_stackedshaper   )r   r$   r    r!   r"   s        r   r(   z-RowParallelLinearWithShardedLoRA.slice_lora_bn   sQ    (+1!4
L:-	<!#z1	')111,-r   Nxr0   c                    | j         j                            | j         ||          }|                    d|j        d                   }|                    d|j        d                   |j        }}t          j        | j        |j        d         | j        d         j        d         ft
          j	        |j
                  }| j                            ||| j        d          }t          j                    s|}| j        dk    rt!          |          }| j        d         j        d         }| j        |z  }| j                            ||| j        | j        |d          }	t          j                    s|	} |j        | }|S )	Nr   rS   )dtypedeviceg      ?r	   T)offset_start	add_input)r   quant_methodr1   viewrU   rF   zerosr   lora_a_stackedfloat32rZ   punica_wrapper
add_shrinkr   can_update_inplacer-   r   rT   r   
add_expandoutput_slices)
r   rV   r0   r8   out_orig_shapebuffershrunk_bufferr    r[   lora_outputs
             r   r1   z&RowParallelLinearWithShardedLoRA.applyu   sw   -33DOQMMFF2qwr{##!'Rb1A!B!BFL]AGAJ(;A(>(DQ(GH-8
 
 
 .2-@-K-KAt*C.
 .
  244 	#"F<!5f==F (+1!4
|j0+/+>+I+I% ,J ,
 ,
  244 	! Fn-r   r;   r<   r=   r>   c                 P    t                                          ||||d          S )NF)r;   r<   r=   r>   decorate)r   rB   )rA   r;   r<   r=   r>   r   s        r   rB   z2RowParallelLinearWithShardedLoRA.can_replace_layer   s5     ww((%# 3% ) 
 
 	
r   r&   )rC   rD   rE   __doc__rF   rG   r(   r1   rI   r   rJ   rK   r   rL   r   rM   rB   rN   rO   s   @r   rQ   rQ   d   s         5< EL    ) )u| )5<$+> )%, ) ) ) )V  15
 
i
  
 "	

 '-
 

 
 
 
 
   [
 
 
 
 
r   rQ   )rF   torch.nnrJ   transformersr   vllm.config.lorar   vllm.distributedr   r   !vllm.model_executor.layers.linearr   vllm.platformsr   base_linearr
   utilsr   r   r   rQ   r'   r   r   <module>rv      s2  
        ) ) ) ) ) ) ' ' ' ' ' '        @ ? ? ? ? ? + + + + + + 0 0 0 0 0 0 M M M M M M M MG7 G7 G7 G7 G7 7 G7 G7 G7^L
 L
 L
 L
 L
'@ L
 L
 L
 L
 L
r   