
    .`i)                     x   U d dl Z d dlZd dl mZ d dlmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZ  ee          Z e            Zi Zeeed
f         eej        d
f         f         ed<   i Zeeed
f         eej        d
f         f         ed<   deej                 dej        fdZdeej                 dedej        fdZ e j        de!de"dz  dedz  fd            Z#e j        	 	 dde!dededededede"dz  dedz  dee!edz  f         fd            Z$ed dej        dz  de"fd            Z%dS )!    N)	lru_cache)Path)Any)envs)init_logger)vllm_is_batch_invariant)current_platform)next_power_of_2._LORA_A_PTR_DICT_LORA_B_PTR_DICTlora_a_weightsdevicec                 X   t          d | D                       }t                              |          x}r|S g }g }g }g }| D ]}|j        dk    r2|                    d          dk    sJ |                    d          }n|j        dk    sJ |                                sJ |                    |                                           |                    |	                    d                     |                    |	                    d                     |                    |	                    d                     t          |           dk    r"t          j        ||t          j                  }	n| d         }	t          t          |                    dk    s@t          t          |                    dk    s t          t          |                    dk    rt          d	          |	|d         |d         |d         ft          |<   t                              |          S )
a  
    `_LORA_A_PTR_DICT` collects the required information during `profile_run`,
    After this, it remains constant and subsequent usage is through LUT.
    Refer to:
    https://github.com/triton-lang/triton/blob/release/3.1.x/python/tutorials/08-grouped-gemm.py
    c              3   >   K   | ]}|                                 V  d S Ndata_ptr.0lora_weights     r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/lora/ops/triton_ops/utils.py	<genexpr>z"_get_lora_a_ptr.<locals>.<genexpr>    s.      II;$$&&IIIIII          dim   r      r   dtypez+All LoRA weights must have the same stride.)tupler   getndimsizesqueezeis_contiguousappendr   stridelentorchtensoruint64set
ValueError)
r   r   keyvalueslora_strides_d0lora_strides_d1lora_strides_d2tensor_ptrslora_a_weightlora_ptr_tensors
             r   _get_lora_a_ptrr8      s6    II.III
I
IC!%%c***v OOOK' 
8 
8"" %%a((A----)11a188MM %******,,,,,=1133444}33A66777}33A66777}33A667777
>Q,{6VVV(+ 	C  !!A%%s?##$$q((s?##$$q((FGGG 		S $$$r   lora_weightsoffset_startc                 ~   t          d | D                       }t                              |          x}r|S g }g }g }g }g }	g }
|}| D ]W}|j        dk    r2|                    d          dk    sJ |                    d          }n|j        dk    sJ |                                sJ |                    |                                           |                    |	                    d                     |                    |	                    d                     |	                    |	                    d                     |                    |           ||                    d          z  }|
                    |                    d                     Yt          |           dk    rCt          j        ||t          j                  }t          j        ||t          j                  }n|d         }|d         }t          t          |                    dk    rt          t          |                    dk    rct          t          |	                    dk    rCt          t          |
                    dk    r#|d         }|d         }|	d         }|
d         }d	}nZt          j        ||
          }t          j        ||
          }t          j        |	|
          }t          j        |
|
          }d}t          |
          }||||||||ft          |<   t                              |          S )a  
     `_LORA_B_PTR_DICT` collects the required information during `profile_run`,
    After this, it remains constant and subsequent usage is through LUT.
    Refer to:
    https://github.com/triton-lang/triton/blob/release/3.1.x/python/tutorials/08-grouped-gemm.py

    c              3   >   K   | ]}|                                 V  d S r   r   r   s     r   r   z"_get_lora_b_ptr.<locals>.<genexpr>T   s.      GG;$$&&GGGGGGr   r   r   r   r   r   r   r    Tr   F)r"   r   r#   r$   r%   r&   r'   r(   r   r)   r*   r+   r,   r-   r.   max)r9   r:   r   r0   r1   slice_offset_lstr5   r2   r3   r4   hidden_sizesslice_offsetlora_b_weightr7   slice_start_tensorlora_strides_d0_tensorlora_strides_d1_tensorlora_strides_d2_tensorhidden_sizes_tensorsame_strideMAX_Ns                        r   _get_lora_b_ptrrJ   I   sc    GG,GGG
G
GC!%%c***v KOOOLL% 3 3"" %%a((A----)11a188MM %******,,,,,=1133444}33A66777}33A66777}33A66777---**1---M..q112222
<1,{6VVV"\V5<
 
 
 .a0'*
 	C  !!Q&&O$$%%**O$$%%**
c,
 
 A
%
%!0!3!0!3!0!3*1o "'of!M!M!M!&of!M!M!M!&of!M!M!M#l<GGGE	S $$$r   op_type
add_inputsreturnc                    t           j        }|_t          sWt          j                                        }|                    dd          }|                    dd          }d }| dk    r=| d|                                  dt          |                                           d}n| d|                                  d}t          | d|           }|
                                st                              d|            d S t                              d| d	           t          t          |                    5 }t          j        |          }d d d            n# 1 swxY w Y   nd }|S )
N _-expandz.json/z No LoRA kernel configs found in z%Using tuned LoRA kernel configs from .)r   VLLM_TUNED_CONFIG_FOLDERis_batch_invariantr+   cudaget_device_namereplaceupperstrr   existsloggerwarning_once	info_onceopenjsonload)rK   rL   user_defined_config_foldergpu_nameconfig_fnameconfig_pathfconfig_datas           r   load_lora_op_configri      s   !%!>!-6H-:--//##C--##C--hNNgmmooNNJ0E0E0G0GNNN L '?????L8II<IIJJ!!## 	 P; P PQQQ4 	OOOOPPP#k""## 	'q)A,,K	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' s   E##E'*E'	max_lorasbatchhidden_sizerank
num_slicesmoe_intermediate_sizec                 X   | dv sJ i }| dk    r(|dk     rdnd}	t           rd}	dd|dk     rd	nd|	d
dddd d	}n| dv r&dt          dt          |                    dd
dddd}n`| dv r4ddt          dt          dt          |                              d
dddd}n(dt          dt          d|z                      dd
ddd d}|| dk    r||fn||f\  t	          | |          }
|
st
                              d           |S |
                    t                              p+|
t          |
	                                fd                   }
|
t          |                   }
|
                    t                              p+|
t          |
	                                fd                   }
|
                    t                              p+|
t          |
	                                fd                   }
|
                    t                              p+|
t          |
	                                fd                   }
|P||
                    t                              p+|
t          |
	                                fd                   }
|
J |
S )N)shrinkrR   fused_moe_lora_w13_shrinkfused_moe_lora_w13_expandfused_moe_lora_w2_shrinkfused_moe_lora_w2_expandrq      @      r             r   r   )	block_mblock_nblock_ksplit_k	num_warpsnum_ctasgroup_size_m
num_stagesmax_nreg)rr   rt   r   )r|   r}   r~   r   r   r   r   )rs   ru   )r|   r}   r~   r   r   r   r   z!Using default LoRA kernel configsc                 B    t          t          |           z
            S r   absint)xrj   s    r   <lambda>z%get_lora_op_configs.<locals>.<lambda>  s    SQ)AS=T=T r   )r0   c                 B    t          t          |           z
            S r   r   )r   ms    r   r   z%get_lora_op_configs.<locals>.<lambda>      SQ!__ r   c                 B    t          t          |           z
            S r   r   )r   ks    r   r   z%get_lora_op_configs.<locals>.<lambda>  r   r   c                 B    t          t          |           z
            S r   r   )r   ns    r   r   z%get_lora_op_configs.<locals>.<lambda>$  r   r   c                 B    t          t          |           z
            S r   r   )r   is    r   r   z%get_lora_op_configs.<locals>.<lambda>,  s    SVVaZ r   )
rV   minr
   r>   ri   r]   r^   r#   r[   keys)rK   rj   rk   rl   rm   rn   rL   ro   defaultr   rh   r   r   r   r   s    `         @@@@r   get_lora_op_configsr      sr          G("" 	G#ckkssr

 

 
  
 

 2t4455
 
 
  
 

 2s2t'<'<==>>
 
 2sj/@AABB
 
 	A")X"5"5KD+;NDAq &gz::K ?@@@
 	I'' 	Ws;++--3T3T3T3TUUUV 
 c*oo.K 	A 	Os;++--3L3L3L3LMMMN  	A 	Os;++--3L3L3L3LMMMN  	A 	Os;++--3L3L3L3LMMMN  (!OOCFF## S3{//117P7P7P7PQQQR 	
 """r   c                 j    t          j                    o t          j        d          ot          j         S )zz
    Refer to: https://github.com/triton-lang/triton/blob/v3.5.0/python/tutorials/11-programmatic-dependent-launch.py
    Z   )r	   is_cudahas_device_capabilityr   VLLM_LORA_DISABLE_PDLr=   s    r   supports_pdlr   3  s6     	 "" 	+2266	+**r   )NNr   )&	functoolsra   r   pathlibr   typingr   r+   vllmr   vllm.loggerr   *vllm.model_executor.layers.batch_invariantr   vllm.platformsr	   vllm.utils.math_utilsr
   __name__r]   rV   r   dictr"   r   r,   __annotations__r   listTensorr   r8   rJ   r[   boolri   r   r    r   r   <module>r      s                                  # # # # # # N N N N N N + + + + + + 1 1 1 1 1 1	X		,,.. DF $uS#XelC.?(@@A F F FDF $uS#XelC.?(@@A F F F-%D$6 -% -% -% -% -%`M%u|$M%47M%AFM% M% M% M%`  $+ $+    @  #(,u uuu u 	u
 u u tu :u 
#sTz/u u u up 
 
, 
 
 
 
 
 
 
r   