
    .`i3                        U d dl Z d dlmZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ  ee          Zd	ed
ef         dedeeef         deeef         fdZej        d             Z ej        e          dej        dej        dej        dej        dej        dej        dej        dej        dej        fd            Z	 dAdej        dej        dej        dz  fdZej        dej        dej        dej        dej        dej        dej        fd            Zej        d ej        fd!            ZdBd#ej        d$edej        fd%Zej        d ej        fd&            Z 	 	 dCd#ej        d$ed(e!d)ej"        dz  dej        f
d*Z#d+ Z$dd,d-Z%dd,d.Z&d/ Z'd0 Z(dAd1Z)dCd)ej"        dz  fd2Z*ej        d ej        fd3            Z+	 dDd#ej        d5ej        d6e,dej        fd7Z-	 dDd#ej        d5ej        d6e,dej        fd8Z.dAd9Z/d'a0da1da2da3da4da5da6d: Z7de!fd;Z8 e8            Z9e!e:d<<   de!fd=Z;d>edz  fd?Z<d>edz  fd@Z=dS )E    N)Callable)Any)init_logger)current_platform)tltriton)is_torch_equal_or_newer)AttentionBackendEnumgrid.kernelargsreturnc           
      Z   i }|d         |d         |d         }}}|j          d| d| d| d|d<   d	|v r |j          d| d| d| d
|d	         dd
|d<   d|v r|d                                         }n|d         rdnd}d|z  |z  |z  |d|dz   <   |||z  ||z  z   ||z  z   z  |d<   |S )NMNKz [M=z, N=z, K=]nametiles_per_updatez, tiles_per_update=02c_ptr
FP8_OUTPUT      g       @flops   bytes)r   element_size)r   r   r   retmnkbytes_per_elems           ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/layers/batch_invariant.py_matmul_launch_metadatar%      s1    C3icDI!qA[88a88Q88A888CKT!!{ ? ? ? ?q ? ?a ? ? $%7 8>? ? ? 	F $g3355"<07a(+a!aC$"$$%!QUQU]QU%:;CLJ    c                 d    | |z  }||z  }t          ||z
  |          }|| |z  z   }| |z  |z  }	||	fS N)min)
tile_idnum_pid_in_group	num_pid_mGROUP_SIZE_MNUM_SMSgroup_idfirst_pid_mgroup_size_mpid_mpid_ns
             r$   _compute_pidr4   &   sT    **H\)Ky;.==L7\12E''L8E%<r&   )launch_metadataBLOCK_SIZE_MBLOCK_SIZE_NBLOCK_SIZE_Kr-   r.   A_LARGEB_LARGEC_LARGEHAS_BIASc           	      t   t          j        d          }t          j        ||          }t          j        ||          }t          j        ||          }||z  }||z
  }t          j        d|          }||z  }t          j        |||d          D ]}t          |||||          \  }} ||z  }!| |z  }"|!t          j        d|          z   }#|"t          j        d|          z   }$|r|#                    t           j                  }#|r|$                    t           j                  }$t          j        |#|k     |#d          }#t          j        |$|k     |$d          }$t          j	        t          j
        |#|          |          }#t          j	        t          j
        |$|          |          }$t          j        ||ft           j                  }%t	          |          D ]}&|s|r9|&|z  t          j        d|                              t           j                  z   }'n|&|z  t          j        d|          z   }'| |#d d d f         |z  |'d d d f         |z  z   z   }(||'d d d f         |	z  |$d d d f         |
z  z   z   })t          j        |(|d d d f         ||&|z  z
  k     d          }*t          j        |)|d d d f         ||&|z  z
  k     d          }+t          j        |*|+|%          }%||z  }t          |||||          \  }} ||z  t          j        d|          z   },| |z  t          j        d|          z   }-|r>|,                    t           j                  },|-                    t           j                  }-|||,d d d f         z  z   ||-d d d f         z  z   }.|,d d d f         |k     |-d d d f         |k     z  }/|rB||-z   }0t          j        |0|-|k     d                              t           j                  }1|%|1z  }%|%                    |j        j                  }2t          j        |.|2|/           d S )	Nr   )axisT)flattendtype        maskotherrD   )r   
program_idcdivarangeranger4   toint64wheremax_contiguousmultiple_ofzerosfloat32loaddotrA   
element_tystore)3a_ptrb_ptrr   bias_ptrr   r   r   	stride_am	stride_ak	stride_bk	stride_bn	stride_cm	stride_cnr6   r7   r8   r-   r.   r9   r:   r;   r<   	start_pidr,   	num_pid_nk_tiles	num_tiles	tile_id_coffs_k_for_maskr+   r*   r2   r3   start_mstart_noffs_amoffs_bnaccumulatorkioffs_ka_ptrsb_ptrsaboffs_cmoffs_cnc_ptrsc_mask	bias_ptrsbiascs3                                                      r$   matmul_kernel_persistentrw   0   s   2 1%%%I<((I<((Iga&&GI%IG#Ii<00O#i/8Iy'4HHH 6) 6)#%y,
 
u ,&,&BIa666BIa666 	+jj**G 	+jj**G(7Q;33(7Q;33#BN7L$I$I<XX#BN7L$I$I<XXhl;2:NNN.. 	4 	4B H' Hl*RYq,-G-G-J-J28-T-TTl*RYq,-G-GG4 9,vdAAAg/JJF qqq$w)+gdAAAg.>.JJF _T111W5B<M8MMUX  A _QQQW5B<M8MMUX  A &A{33KKW	#'L'
 
u ,&1l)C)CC,&1l)C)CC 	+jj**Gjj**GWQQQW%555	GDRSRSRSGDT8TT!!!T'"Q&747+;a+?@ 	  7*I797Q;cBBBEEbjQQD4KNN5;122
(((((m6) 6)r&   rn   ro   ru   c                   	
 | j         d         |j         d         k    s
J d            | j        |j        k    s
J d            |"|                                dk    s
J d            t          j                            d          j        
| j         \  }|j         \  }	| j        }t          j        	f| j        |          }	
fd}t          j	        d	d	d
ddddt          j
        d	dd
ddddt          j        d	d	dddddi}t          |         | |||	||                     d          |                     d          |                    d          |                    d          |                    d          |                    d          f
|                                 dk    |                                dk    |                                dk    |d ud||          |S )Nr   r   zIncompatible dimensionszIncompatible dtypeszCCurrently assuming bias is 1D, let Horace know if you run into thiscudadevicerA   c           	          t          t          j        | d                   t          j        | d                   z            fS )Nr6   r7   )r)   r   rH   )METAr   r   r.   s    r$   r   zmatmul_persistent.<locals>.grid   sJ    AtN344+an!5667 
 	
r&      @   r      )r6   r7   r8   r-   
num_stages	num_warps               )r.   r9   r:   r;   r<   )shaperA   dimtorchry   get_device_propertiesmulti_processor_countemptyr{   bfloat16float16rQ   rw   stridenumel)rn   ro   ru   r   rA   rv   r   configsr   r   r.   s           @@@r$   matmul_persistentr      s-    71:###%>###7ag4<488::???M +?* j..v66LG7DAq7DAqGEQF185999A
 
 
 
 
 
 
 	
 
 	
 
 	
 
#G6 T"												 		E!		E!		E!T!% & %.'  * Hr&   c                    t          j        d          }t          j        d          }||k    rdS t          j        ||          }t          j        ||          }||z  }||z  }||k    s||k    rdS ||z  t          j        d|          z   }||z  t          j        d|          z   }||k     }||k     }|s|s|r>|                    t           j                  }|                    t           j                  }t          j        ||d          }t          j        ||d          }t          j        t          j        ||          |          }t          j        t          j        ||          |          }| ||z  z   } |||
z  z   }!|||z  z   }"t          j	        ||ft           j
                  }#t          j        ||          }$t          j        d|          }%t          |$          D ]!}&|s|r9|&|z  t          j        d|                              t           j                  z   }'n|&|z  t          j        d|          z   }'| |dddf         |z  |'dddf         |	z  z   z   }(|!|'dddf         |z  |dddf         |z  z   z   })|%||&|z  z
  k     }*|dddf         |*dddf         z  }+|*dddf         |dddf         z  },t          j        |(|+d          }-t          j        |)|,d          }.t          j        |-|.|#          }##|}/|}0|r>|/                    t           j                  }/|0                    t           j                  }0|"||/dddf         z  z   ||0dddf         z  z   }1|dddf         |dddf         z  }2|#                    |j        j                  }3t          j        |1|3|2           dS )zBatched GEMM: (B, M, K) x (B, K, N) -> (B, M, N)

    Each program computes one (batch_idx, tile_m, tile_n) tile, accumulating
    along K in a fixed order to preserve batch invariance.
    r   r   Nr@   rB   rC   rF   )r   rG   rH   rI   rK   rL   rM   rN   rO   rP   rQ   rJ   rR   rS   rA   rT   rU   )4rV   rW   r   Br   r   r   	stride_abrY   rZ   	stride_bbr[   r\   	stride_cbr]   r^   r6   r7   r8   r9   r:   r;   pid_bpidr,   r`   r2   r3   offs_moffs_nmask_mmask_na_batch_ptrb_batch_ptrc_batch_ptrri   ra   offs_k_maskrj   rk   rl   rm   k_valida_maskb_maskrn   ro   c_mc_nrr   rs   rv   s4                                                       r$   
bmm_kernelr      s(   < M!E
-

Czz <((I<((I9E)OE	Ui// \!BIa$>$>>F\!BIa$>$>>FaZFaZF %' %W %28$$28$$Xffa((FXffa((F r~flCC\RRFr~flCC\RRF %)++K%)++K%)++K(L,7rzJJJKga&&G)A|,,KGnn %0 %0 	Dg 	D,&1l)C)C)F)Frx)P)PPFF,&1l)C)CCF 111d7Oi'&qqq/I*EE

 111d7Oi'&qqq/I*EE

 R,%6!674747#33D!F47O3 G
 
 
 G
 
 

 fQ;// C
C ffRXffRX 9s111d7|33i#dAAAg,6NNFAAAtG_vdAAAg.Fu{-..AHVQV$$$$$$r&   
BLOCK_SIZEc           	         t          j        d                              t           j                  }| ||z  z   }|||z  z   }t	          d           }	t          d||          D ]o}
|
t          j        d|          z   }||k     }t          j        ||z   |t	          d                     }t          j        t          j	        ||	                    }	pd}t          d||          D ]|}
|
t          j        d|          z   }||k     }t          j        ||z   |d          }t          j
        ||	z
            }|t          j        t          j        ||d                    z  }}t          j        |          }t          d||          D ][}
|
t          j        d|          z   }||k     }t          j        ||z   |          }||	z
  |z
  }t          j        ||z   ||           \dS )zz
    Compute log_softmax along the last dimension of a 2D tensor.
    Each block handles one row of the input tensor.
    r   infrC   rB   rF   N)r   rG   rK   rL   floatrJ   rI   rR   maxmaximumexpsumrM   logrU   )	input_ptr
output_ptrinput_row_strideoutput_row_striden_colsr   row_idxrow_start_ptroutput_row_start_ptrmax_val
col_offsetcol_idxrD   valssum_expexp_valslog_sum_expoutputs                     r$   _log_softmax_kernelr   _  s    mA!!"(++G *: ::M%2C(CC U||mGAvz22 4 4
ryJ777 w}w.T%,,OOO &D'2233 GAvz22 	9 	9
ryJ777 w}w.TEEE 6$.))26"(4377888 &//K Avz22 D D
ryJ777 w}w.T::: +- 	%/dCCCCCD Dr&   inputr   c                    |dk    r|| j         dz
  k    rt          d          | j        }|                     d| j        d                   }|                                }|j        \  }}t          j        |          }d}|f}t          |         |||                    d          |                    d          ||           |                    |          S )a*  
    Compute log_softmax using Triton kernel.

    Args:
        input: Input tensor
        dim: Dimension along which to compute log_softmax
             (only -1 or last dim supported)
    >> Stashed changes
    Returns:
        Tensor with log_softmax applied along the specified dimension
    r   r   zFThis implementation only supports log_softmax along the last dimension   r   r   )	ndim
ValueErrorr   reshape
contiguousr   
empty_liker   r   )	r   r   original_shapeinput_2dn_rowsr   r   r   r   s	            r$   log_softmaxr     s     byySEJN**T
 
 	

 [N}}RR11H""$$H^NFF h''F J 9Da    >>.)))r&   c                    t          j        d          }||	z  }||	z  }||k    s||	k    rdS d}t          d||
          D ]b}|t          j        d|
          z   }||k     }||z  ||z  z   ||z  z   }t          j        | |z   |d          }|t          j        |          z  }c||z  }||z  ||z  z   }t          j        ||z   |           dS )z
    Kernel for computing mean along a single dimension.
    Input is viewed as (M, N, K) where N is the dimension being reduced.
    r   NrB   rC   )r   rG   rJ   rI   rR   r   rU   )r   r   input_stride0input_stride1input_stride2output_stride0output_stride1r   r   r   r   r   m_idxk_idxaccn_start	n_offsetsrD   	input_idxr   mean_val
output_idxs                         r$   mean_kernelr     s   & -

C 1HE!GE zzUaZZ CAz**  bi:666	1} M!I$==@UU 	
 wy9,4sCCCrvd|| QwH'%.*@@JHZ*$h/////r&   FkeepdimrA   c                    | j          |cxk    r| j         k     sn J d| d| j          d            |dk     r
|| j         z   }|I| j        t          j        t          j        t          j        t          j        fv rt          j        }n| j        }| j        |k    r|                     |          } t          | j
                  }d}t          |          D ]}|||         z  }||         }d}t          |dz   t          |                    D ]}|||         z  }|                     |||          }	|r|                                }
d|
|<   n|d|         ||dz   d         z   }
t          j        |
|| j                  }|r*|                    |d|                              d          n|                    ||          }||z  f}d}t%          |         |	||	                    d          |	                    d          |	                    d	          |                    d          |j         dk    r|                    d          nd||||           |S )
a  
    Triton implementation of torch.mean with single dimension reduction.

    Args:
        input: Input tensor
        dim: Single dimension along which to compute mean
        keepdim: Whether to keep the reduced dimension
        dtype: Output dtype. If None, uses input dtype
               (or float32 for integer inputs)

    Returns:
        Tensor with mean values along specified dimension
    zInvalid dimension z for tensor with z dimensionsr   Nr   )rA   r{   r   r   )r   rA   r   int8int16int32rL   rQ   rK   listr   rJ   lenr   copyr   r{   squeezer   r   )r   r   r   rA   r   r   ir   r   input_3doutput_shaper   	output_2dr   r   s                  r$   mean_dimr     s   ( J;#****
*****JSJJ5:JJJ +**
 QwwEJ };5:u{EKMMMMEEKE {e E 	
A3ZZ  	U1Xc
A	A37CJJ''  	U1X }}Q1%%H  6zz||STcT{U3799%55 [U5<HHHF 7>Wq!Q''//2226>>RSUVCWCWI E8DJ(~11	q			   Mr&   c                 "    t          | |          S r(   r   )rn   ro   s     r$   mm_batch_invariantr   V  s    Q"""r&   outc                   | j         dk    r6|j         dk    r+t          | |          }||                    |           |S |S | j         dk    r|j         dk    rt          | ||          S | j         dk    rn|j         dk    rc| j        \  }}}|                     d|          }t          ||          }|                    ||d          }||                    |           |S |S | j         dk    rR|j         dk    rG|                     d                              |j        d         dd          }	t          |	||          S | j         dk    r|j         dk    r| j        \  }}
}}|j        \  }}}}|                     ||
z  ||          }|                    ||
z  ||          }t          ||          }|                    ||
||          }||                    |           |S |S t          d| j         d|j                   )	Nr   r   r   r   r      zkmatmul_batch_invariant currently only supports 2D x 2D, 3D x 3D, 3D x 2D, 2D x 3D, and 4D x 4D, got shapes  and )	r   r   copy_bmm_batch_invariantr   r   	unsqueezeexpandr   )rn   ro   r   resultbatchseqhiddena_2d	result_2d
a_expandedheadsseq_adim_a_dim_bseq_ba_3db_3d	result_3ds                      r$   matmul_batch_invariantr  Z  s2    	v{{qv{{"1a((?IIfJ	
11"1aS1111	
11 WsFyyV$$%dA..	""5#r22?IIfJ	
11 [[^^**171:r2>>
":qc::::	
11 &'W"ueUW1eU yyu55yyu55 (d33	 ""5%>>?IIfJ2'2 2()2 2
 
 	
r&   c                   | j         dk    r|j         dk    st          d| j         d|j                   | j        d         |j        d         k    r,t          d| j        d          d|j        d          d          | j        d         |j        d         k    r t          d	| j         d|j         d          | j        |j        k    r t          d
| j         d|j         d          | j        \  }}}|j        \  }}}| j        }| t	          j        |||f| j        |          }	n?|j        |||fk    s
J d            |j        |k    r|j        | j        k    s
J d            |}	t          j        ddddddt          j        ddddddt          j	        ddddddi}
|
|         }|t          j        ||d                   t          j        ||d                   z  f}t          |         | ||	|||||                     d          |                     d          |                     d          |                    d          |                    d          |                    d          |	                    d          |	                    d          |	                    d          f|                                 dk    |                                dk    |	                                dk    d| |	S )Nr   z3bmm_batch_invariant expects 3D tensors, got shapes r   r   z0Batch dimensions of tensors must match, but got .r   r   z.Incompatible inner dimensions for matmul: got zIncompatible dtypes: got rz   zout tensor has incorrect shapezout tensor mismatchr~   r   r   )r6   r7   r8   r   r   r   r   r6   r7   r   )r9   r:   r;   )r   r   r   rA   r   r   r{   r   r   rQ   r   rH   r   r   r   )rn   ro   r   r   r   r   r  r   rA   rv   r   cfgr   s                r$   r   r     sR   FaKKAFaKK2'2 2()2 2
 
 	

 	wqzQWQZ6wqz6 6()
6 6 6
 
 	
 	wqzQWQZUQWUU17UUU
 
 	
 	w!'MQWMM17MMMNNNgGAq!gGAq!GE
{KAq	!(%@@@yQ1I%%%'G%%%yE!!cjAH&<&<&<>S&<&<< 	
 
 	
 
 	
 
G0 %.C 	
As>*++fk!S=P.Q.QQD
 t																!" 		E!		E!		E!' ( )  . Hr&   c                 &    t          |||           S )N)ru   r   )ru   rn   ro   s      r$   addmm_batch_invariantr    s    Q----r&   c                 <    |r
J d            t          | |          S )Nznot implemented)r   )r   )r   r   _half_to_floats      r$   _log_softmax_batch_invariantr    s*    00000u#&&&&r&   c                     t          j        | |d          }| |z
  } t          j        |           }t          j        ||d          }||z  S )NTr   r   )r   amaxr   r   )r   r   rA   	input_maxexp_x	sum_exp_xs         r$   softmax_batch_invariantr    sT     
5c4888IIEIeE	%S$777I9r&   c                     ||t           j        k    sJ d|                                  t           j                  }t          |          dk    r+d t	          t           j                            D             }t           fd|D             d          }|D ]}t          ||d          }|s|D ]}|                    |          }|S )Nzunsupported dtype: r   c                     g | ]}|S  r  ).0r   s     r$   
<listcomp>z(mean_batch_invariant.<locals>.<listcomp>  s    222Qq222r&   c                 $    g | ]}|j         z  S r  )r   )r  dr   s     r$   r  z(mean_batch_invariant.<locals>.<listcomp>	  s    666Q!ej.666r&   T)reverser  )	r   rQ   rK   r   rJ   r   sortedr   r   )r   r   r   rA   r   sorted_dimsr  s   `      r$   mean_batch_invariantr"    s    =EU]2224Q%4Q4Q222XXem$$F
3xx1}}22%EK 0 011222 6666#666EEEK  7 7&a666 ' 	' 	'A^^A&&FFMr&   c           	         t          j        d                              t           j                  }| ||z  z   }	|||z  z   }
t          j        dgt           j                  }t          d||          D ]}|t          j        d|          z   }||k     }t          j        |	|z   |d          }|                    t           j                  }||z  }|t          j	        t          j
        ||d                    z  }||z  }t          j        ||z             }d|z  }t          d||          D ]}|t          j        d|          z   }||k     }t          j        |	|z   |d          }t          j        ||z   |d          }|                    t           j                  }|                    t           j                  }||z  |z  }|                    |j                  }t          j        |
|z   ||           dS )	z
    Compute RMS normalization along the last dimension of a 2D tensor.
    RMS Norm: y = x / sqrt(mean(x^2) + eps) * weight
    Each block handles one row of the input tensor.
    r   r   r@   rB   rC   g      ?rF   N)r   rG   rK   rL   rP   rQ   rJ   rI   rR   r   rM   sqrtrA   rU   )r   
weight_ptrr   r   r   r   epsr   r   r   r   sum_sqr   r   rD   r   vals_f32sq_valsmean_sqrmsinv_rmsweight
weight_f32
output_f32r   s                            r$   _rms_norm_kernelr0    s     mA!!"(++G*: ::M%2C(CC Xqc,,,FAvz22 7 7
ryJ777w}w.TEEE772:&&X%"&$55666 voG
''C-
 
 CCiG Avz22 
D 
D
ryJ777w}w.TEEEg-DDDD772:&&YYrz**
'*4
tz**
%/dCCCCC
D 
Dr&   ư>r-  r&  c           
      b   |                                 dk    s
J d            | j        d         |j        d         k    s'J d| j        d          d|j        d          d            | j        }|                     d| j        d                   }|                                }|                                }|j        \  }}t	          j        |          }d}|f}	t          |	         ||||                    d          |                    d          |||	           |                    |          S )
a  
    Compute RMS normalization using Triton kernel.

    RMS Norm normalizes the input by the root mean square and scales by weight:
    output = input / sqrt(mean(input^2) + eps) * weight

    Args:
        input: Input tensor of shape (..., hidden_size)
        weight: Weight tensor of shape (hidden_size,)
        eps: Small constant for numerical stability

    Returns:
        Tensor with RMS normalization applied along the last dimension
    r   zWeight must be 1-dimensionalr   r   zInput last dimension (z) must match weight dimension ()r   r   )r   r   r   r   r   r   r0  r   )
r   r-  r&  r   r   r   r   r   r   r   s
             r$   rms_normr4  J  sE   " ::<<1<;r?fl1o---	0R 	0 	0#\!_	0 	0 	0 .-- [N}}RR11H""$$H  F^NFFh''FJ9DTa	 	 	 	 >>.)))r&   c                 &    t          | ||          S )a  
    Batch-invariant wrapper for RMS normalization.

    This function provides a deterministic, batch-invariant implementation
    of RMS normalization for use with the batch_invariant mode.

    Args:
        input: Input tensor of shape (..., hidden_size)
        weight: Weight tensor of shape (hidden_size,)
        eps: Small constant for numerical stability

    Returns:
        RMS normalized tensor
    )r&  )r4  )r   r-  r&  s      r$   rms_norm_batch_invariantr6  y  s    " E6s++++r&   c                 X    t          | |                                          }|||z   }|S r(   )r  t)r   r-  ru   r   s       r$   linear_batch_invariantr9    s-    #E688::66F$Mr&   c                  f   t           rd S da t          j                            dd          at          j        d          s(t          j        d          st          j        d          rt                              dt          d           t                              d	t          d           t                              d
t          d           t                              dt          d           n^t          j                            dd           at          j                            dd           adt          j        d<   dt          j        d<   t                              dt$          d           t                              dt&          d           t                              dt&          d           t                              dt(          d           t                              dt*          d           t          j        at*          t          _        t          j        j        j        j        at          j        j        j        j        at?          d          rdnd} | t          j        j        j        _        | t          j        j        j        _        t          j        j                             d           d S )NTatenIMPLd   P   Y   zaten::mmCUDAzaten::addmmzaten::matmulzaten::linearCUBLAS_WORKSPACE_CONFIGCUBLASLT_WORKSPACE_SIZEz:16:81zaten::_log_softmaxzaten::softmaxzaten::_softmaxzaten::mean.dimz	aten::bmmz
2.10.0.dev)FFFcublaslt)backend)!_batch_invariant_MODEr   libraryLibrary_batch_invariant_LIBr   is_device_capability_familyis_device_capabilityimplr   r  r  r9  osenvironget_original_cublas_workspace_cfg!_original_cublaslt_workspace_sizer  r  r"  r   bmm_original_torch_bmmbackendsry   matmul&allow_bf16_reduced_precision_reduction"_original_bf16_reduction_precision&allow_fp16_reduced_precision_reduction"_original_fp16_reduction_precisionr	   preferred_blas_library)reduced_precision_vals    r$   enable_batch_invariant_moder\    sR       =00@@ 	4S9940444 0444 	!!*.@&III!!-1FOOO!!.2H&QQQ!!.2H&QQQQ *,8QSW)X)X&,.JNN%t-
 -
) 18
,-03
,-:F   o/FOOO.0GPPP.0DfMMM k+>GGG)#EI 	"I ' 	"I '
 2,??JU  	 
NE 	 
NE 
N..z.BBBBBr&   c                  v    t          j        dd          } 	 t          |           dk    S # t          $ r Y dS w xY w)NVLLM_BATCH_INVARIANT0r   F)rM  getenvintr   )vals    r$   _read_vllm_batch_invariantrc    sK    
)*C
0
0C3xx1}   uus   * 
88r^  c                      t           S r(   )r^  r  r&   r$   vllm_is_batch_invariantre    s    r&   attention_backendc                    t           j        t           j        t           j        t           j        g}| |vr/d |D             }| r| j        nd }d| d| d}t          |          | |d         k    rd}t                              |d           d	t          j
        d
<   dt          j
        d<   dt          j
        d<   d	t          j
        d<   d	t          j
        d<   dt          j
        d<   dt          j
        d<   dt          j
        d<   dt          j
        d<   dt          j
        d<   dt          j
        d<   dt          j
        d<   d	t          j
        d<   d S )Nc                     g | ]	}|j         
S r  )r   )r  ro   s     r$   r  z0override_envs_for_invariance.<locals>.<listcomp>  s    >>>a16>>>r&   z;VLLM batch_invariant mode requires an attention backend in z, but got 'z{'. Please use --attention-backend or attention_config to set one of the supported backends before enabling batch_invariant.r   zqYou are using a decode-invariant form of batch invariance. This will not be invariant between prefill and decode.local)scoper_  VLLM_ALLREDUCE_USE_SYMM_MEMz:4096:8rA  GROUPNCCL_LAUNCH_MODENCCL_COLLNET_ENABLENCCL_NVLS_ENABLErC  NCCL_P2P_NET_DISABLENCCL_MIN_NCHANNELSNCCL_MAX_NCHANNELSSimple
NCCL_PROTOzallreduce:tree	NCCL_ALGONCCL_NTHREADSNCCL_SOCKET_NTHREADSVLLM_USE_AOT_COMPILE)r
   
FLASH_ATTN
FLASHINFERFLASH_ATTN_MLA
TRITON_MLAr   RuntimeErrorloggerwarning_oncerM  rN  )rf  supported_backendssupported_namesbackend_nameerrorwarnings         r$   override_envs_for_invariancer    s    	''+'		  222>>+=>>>1BL(--MM M+7M M M 	 5!!!.q111E 	 	G733303BJ,-,5BJ() &-BJ!"(+BJ$%%(BJ!"),BJ%&'*BJ#$'*BJ#$'BJ|.BJ{"%BJ),BJ%& *-BJ%&&&r&   c                    t                      rpt          |            t                       dt          j        j        j        _        dt          j        j        j	        _        dt          j        j        j
        _        d S d S )Nieee)re  r  r\  r   rT  ry   rU  fp32_precisioncudnnconvrnn)rf  s    r$   init_batch_invariancer    so        9$%6777#%%% 5;"139!028 ///9 9r&   r(   )r   )FN)r1  )>rM  collections.abcr   typingr   r   vllm.loggerr   vllm.platformsr   vllm.triton_utilsr   r   vllm.utils.torch_utilsr	   #vllm.v1.attention.backends.registryr
   __name__r~  dictstrr%   jitr4   	constexprrw   Tensorr   r   r   ra  r   r   boolrA   r   r   r  r   r  r  r  r"  r0  r   r4  r6  r9  rF  rI  rS  rY  rW  rP  rQ  r\  rc  r^  __annotations__re  r  r  r  r&   r$   <module>r     s   
				 $ $ $ $ $ $        # # # # # # + + + + + + ( ( ( ( ( ( ( ( : : : : : : D D D D D D	X		
38
&)15c3h	#s(^   (    3444Y) ,Y) ,Y)  ,!Y)" ,#Y)$ \%Y)& \'Y)( \)Y)* \+Y), l-Y) Y) Y) 54Y)z CGJ J|JJ,1L4,?J J J JZ A%" ,#A%$ ,%A%& ,'A%( \)A%* \+A%, \-A% A% A% A%H ;D ;D ;D ;D ;D|)* )*u| )*# )*u| )* )* )* )*X .0 .0 .0 .0 .0h  $	W W<W	W W ;	W
 \W W W Wt# # # )- 7
 7
 7
 7
 7
t &* U U U U Up. . .' ' '
    5;;M    0 /D /D /D /D /Df =A,* ,*<,*!&,*49,*
\,* ,* ,* ,*` =A, ,<,!&,49,
\, , , ,(       %) "%) "!% $( !;C ;C ;C|D     8799 d 9 9 9         .-+d2.- .- .- .-b9+d29 9 9 9 9 9r&   