
    )`ip                     l   d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZ ddlZddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZmZmZmZ ddlmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z% ddl&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZE ddlFmGZGmHZH ddlImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQmRZR ddlSmTZTmUZU ddlSmVZW ddlXmYZY ddlZm[Z[ dej\        d ej\        d!e]d"e]d#e^d$e^d%eeT         fd&Z_d'ej\        d ej\        d(ej\        d!e]d"e]d#e^d$e^d%eeT         fd)Z`d*e	ej\                 d+e	ej\                 d,e	e
e]e]f                  d-e	e
e]e]f                  d.e	e^         d/e	e^         d0e^d1e^d2e^d3e^d%eeT         fd4Zad5e	ej\                 d6e	e^         d7e	e]         d8e	e]         d9e	e]         d.e	e^         d0e^d1e^d:e^d;e^d%eeT         fd<Zbd*e	ej\                 d+e	ej\                 d,e	e
e]e]f                  d-e	e
e]e]f                  d.e	e^         d/e	e^         d=ecd>e^d2e^d3e^d?e^d@e^dAe^dBe^d%e	eT         fdCZddDe	eT         dEed%dfdFZe	 	 	 dXdEee         dIedJedKecdLe^dMe^d%dfdNZfdOegd%e^fdPZhdQegd%e
e]e]f         fdRZidS ZjdT Zkd%e]fdUZldV ZmendWk    r em             dS dS )Ya  
Copyright (c) 2025 by FlashInfer team.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

AOT build script for FlashInfer.

NOTE (Zihao): The following modules are intentionally excluded from the AOT build:
- gen_pod_module
- gen_deepgemm_sm100_module (it doesn't involve host-side compilation)
    N)product)Path)ListTupleIteratorOptional)Version   )act_func_def_strgen_act_and_mul_module)gen_cascade_module) gen_fp4_quantization_sm90_module!gen_fp4_quantization_sm100_module!gen_fp4_quantization_sm103_module!gen_fp4_quantization_sm110_module!gen_fp4_quantization_sm120_module!gen_fp4_quantization_sm121_module)#gen_mxfp8_quantization_sm100_module)gen_gdn_prefill_sm90_module)"gen_cutlass_fused_moe_sm120_module"gen_cutlass_fused_moe_sm103_module"gen_cutlass_fused_moe_sm100_module!gen_cutlass_fused_moe_sm90_module%gen_trtllm_gen_fused_moe_sm100_module)
gen_gemm_modulegen_gemm_sm90_modulegen_gemm_sm100_module!gen_gemm_sm100_module_cutlass_fp4!gen_gemm_sm100_module_cutlass_fp8gen_tgv_gemm_sm10x_modulegen_gemm_sm120_module!gen_gemm_sm120_module_cutlass_fp4gen_trtllm_gen_gemm_module"gen_trtllm_low_latency_gemm_module)gen_spdlog_module)gen_mla_module)!gen_selective_state_update_module&gen_selective_state_update_sm90_module)gen_norm_module)gen_page_module)gen_quantization_module)gen_rope_module)gen_sampling_module)gen_topk_module)gen_trtllm_utils_module)gen_xqa_modulegen_xqa_module_mla)	gen_batch_attention_modulegen_batch_decode_modulegen_batch_mla_modulegen_batch_prefill_modulegen_cudnn_fmha_modulegen_fmha_cutlass_sm100a_modulegen_single_decode_modulegen_single_prefill_modulegen_trtllm_gen_fmha_module)JitSpecbuild_jit_specs)env)get_cuda_version)CompilationContextdtype_qodtype_kvhead_dim_qkhead_dim_vouse_sliding_windowuse_logits_soft_capreturnc              #   P  K   | j         |j         k    r| |k    rd S | j         dk    rd S t          d| || ||d||d
  
        V  t          d| || t          j        ||d||d          V  t          | || ||d||          V  t          | || t          j        ||d||	  	        V  d S )	Nr
   fa2r   F)
backenddtype_qrA   dtype_orB   rC   pos_encoding_moderD   rE   use_fp16_qk_reductionrI   rJ   rA   rK   	dtype_idxrB   rC   rL   rD   rE   rM   )rJ   rA   rK   rB   rC   rL   rD   rE   	rJ   rA   rK   rO   rB   rC   rL   rD   rE   )itemsizer9   r5   torchint32r8   r3   r@   rA   rB   rC   rD   rE   s         b/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/flashinfer/aot.pygen_fa2rV   _   sE      H---(h2F2FA
#-/#      #+-/#      #-/	 	 	 	 	 	 "+-/
 
 
 
 
 
 
 
    rJ   rK   c              #      K   | |k    rd S | j         dk    r| |k    rd S |j         dk    r|dk    s|dk    rd S t          d| ||t          j        ||d||d          V  d S )	N   r
      @   fa3r   FrN   )rQ   r5   rR   rS   rJ   rA   rK   rB   rC   rD   rE   s          rU   gen_fa3r^      s       (1gFA#!2!2F
"+-/#       rW   
f16_dtype_	f8_dtype_fa2_head_dim_fa3_head_dim_use_sliding_window_use_logits_soft_cap_has_sm90	has_sm100	add_gemmaadd_oai_ossc
              #     K   d}
d}t          || | |z   ||          D ]J\  \  }}}}}}t          ||||||          E d {V  t          |||t          j        ||d|d	  	        V  K|r>t          || |z   | ||          D ]'\  \  }}}}}}t          |||||||          E d {V  (|rwt          | | |z   dg          D ]$\  }}\  }}t          ||d	d	||          E d {V  %|r;t          | |z   | dg          D ]%\  }}\  }}t          |||d	d	||          E d {V  &|	r5d
dlm} | D ],}dD ]'}dD ]"} |||||t          j        ddd|	  	        V  #(-|rRt          t          j	        t          j	        t          j	        t          j        ddddd	  	        V  t                      V  dg|rdgng z   }| D ]*}|D ]%}t          ||||t          j        |
|d          V  &+|rt                      V  d S d S )Ni   r[   rT   r   F)	rJ   rA   rK   rO   rB   rC   rL   rE   use_profilerr]   )TT   r
   )'gen_batch_prefill_attention_sink_module)rH   r\   )TF)	rI   rJ   rA   rK   rO   rB   rC   rL   rD      rP   rH   r\   )rI   rJ   rA   rK   rO   head_dim_ckvhead_dim_kperj   )r   rV   r2   rR   rS   r^   jit.attentionrl   r7   bfloat16r:   r4   r&   )r_   r`   ra   rb   rc   rd   re   rf   rg   rh   rn   ro   rB   rC   r@   rA   rD   rE   	dtype_qkvrK   rl   dtyperI   use_swamla_backend_s                            rU   gen_attentionrv      sU      LL 
Y
 
 
  
 	"k ##1 3
 
 
 	
 	
 	
 	
 	
 	
 	
 )k## 3
 
 
 	
 	
 	
 	
   " 
 
	 	 
&[+ !"''#5$7            $
 "N
 
		 	 
5!4 !!#5$7            	
 Y& 	  9#%8 #%&# # #'9(;            JJJJJJ 	 	E)  ,  GAA ' %!& %"'+$&$&*++2
 
 
 
 
 
 
   +,N^Nk$ %

 

 

 
	
 
	
 
	
 )***** 79uggr:L  # 
	 
	G& ! +))"	 	 	 	 	 	 	
	   rW   input_type_fp8_kv_cache_token_per_page_
head_size_head_grp_size_	has_sm120	has_sm121c
           
   #   N  K   |s|s|s|	sdS t          | |||||          D ]M\  }
}}}}}|dz  dk    s|dk    s|dk     r|dvr$|rt          j        }n|
}t          |
||||||
          V  N|s|	r0|D ]/}t	          t          j        t          j        |ddd	
          V  .dS dS )z0Generate XQA modules for various configurations.N   r   rk   r       r[   rm   )input_dtypekv_cache_dtype	page_sizehead_dimhead_group_ratiorD   output_dtypei@  rm   F)r   r   r   r   r   rD   )r   rR   float8_e4m3fnr0   r1   )rw   rx   ry   rz   r{   rc   re   rf   r|   r}   
input_typefp8_kv_cachetoken_per_page	head_sizehead_grp_sizerD   r   s                    rU   gen_xqar   m  sc       I i 	  

 
"
 "
 	 r>Q)c//Y^^!222 	("0NN'N")$*1#
 
 
 	
 	
 	
 	
  	I 	- 	 	N$!/$2(!$#(      	 		 	rW   sm_capabilitiesadd_commadd_moeadd_actadd_miscadd_xqac                 4   g }|                     t                                 |                    dd          }|                    dd          }|                    dd          }|                    dd          }|                    dd          }|                    dd          }|                    dd          }|t          t	          | |||||||||	
  
                  z  }|r,t
          D ]$}|                     t          |                     %|
r||                     t                                 |rc|                     t                                 |                     t                                 |                     t                                 |r|                     t                                 |                     t                                 |                     t                                 |                     t                                 |                     t                                 |                     t!          t"          j        d	                     |                     t!          t"          j        d	                     |                     t)                                 |                     t+                                 |                     t-                                 |                     t/                                 |r\|                     t!          t"          j        d
	                     |                     t!          t"          j        d
	                     |rB|                     t1                                 |                     t3                                 |r!|                     t5                                 |r|                     t7                                 |                     t9                                 |                     t;                                 |                     t=                                 |r!|                     t?                                 |rddl m!}m"} ddl m#} ddl m$} ddl m%} ddl m&} |                      |                       |                      |                       |rW|                      |                       |                      |                       |                      |                       |                      |                       |r|tO                      tQ                      tS                      tU                      tW                      tY                      t[                      t]                      gz  }|rc|                     t_                                 |                     ta                                 |                     tc                                 |rote                      tg          d          k    rPt"          j        t"          j        g}d
dg}g d}g d} g d}!|t          ti          |||| |!|||||
  
                  z  }|                     tk                                 tm                      }"g }#|D ]:}$|$j7        |"vr/|"8                    |$j7                   |#                     |$           ;|#S )Nsm90Fsm100sm100fsm103sm110sm120sm121)use_sm_100fTr
   )gen_trtllm_comm_modulegen_vllm_comm_module)gen_nvshmem_module)gen_comm_alltoall_module)gen_trtllm_mnnvl_comm_module)gen_moe_alltoall_module12.8r   )r[   rm   rk   )r
   rY         )9appendr%   getlistrv   r   r   r   r   r   r   r   r   r   r   r   r    rR   rq   float16r   r#   r$   r   r   r   r   r   r   r!   r"   r   jit.commr   r   r   r   r   r   r   r)   r*   r+   r,   r-   r.   r'   r(   r/   r   r>   r	   r   r6   setnameadd)%r_   r`   ra   rb   rc   rd   r   r   rg   rh   r   r   r   r   	jit_specsre   rf   
has_sm100f	has_sm103	has_sm110r|   r}   act_namer   r   r   r   r   r   xqa_input_type_xqa_fp8_kv_cache_xqa_token_per_page_xqa_head_size_xqa_head_grp_size_namesretjit_specs%                                        rU   gen_all_modulesr     sF      "I&(()))""6511H##GU33I $$Xu55J##GU33I##GU33I##GU33I##GU33I 	
 	
  I  ?( 	? 	?H3H==>>>> (B**+++ 	B133444=??@@@>@@AAA 	F>@@AAA?AABBB244555>@@AAA>@@AAA)%.eLLL   )%-UKKK   @BBCCC799:::?AABBBBDDEEE 	Y)%.dKKK   6u}RVWWWXXX 	C>@@AAA?AABBB 	B>@@AAA 	B>@@AAA?AABBB244555>@@AAA 	B>@@AAA 1JJJJJJJJ000000666666::::::555555++--...1133444 	8335566699;;<<<4466777--//000 <  #%%!!-//	
 		
	  	<CEEFFF4667778::;;; 	
$&&88 !=%.9!5M///')\\T!#"# 
 
 	
	  *,,--- EEEC ! !=%%IIhm$$$JJx   JrW   r   out_dirc                 X   |                                 rt          j        |           |                    dd           | D ]g}t          j        |j        z  |j         dz  }||j        z  |j         dz  }|j                            dd           t          j        ||           hd S )NTFparentsexist_okz.so)r   r   )	existsshutilrmtreemkdirjit_envFLASHINFER_JIT_DIRr   parentcopy2)r   r   r   srcdsts        rU   copy_built_kernelsr   I  s     ~~ gMM$M///  (8=8hm;P;P;PP%8=(=(=(==
%777S#	 rW   FT	build_dirproject_rootconfigverboseskip_prebuiltc                 B   t                      }||                    |           |}dt          j        vrt	          d          t                      }|dz  t          _        |dz  t          _        |dz  dz  dz  |dz  dz  dz  d	z  dz  gt          _	        |dz  d
z  dz  t          _
        |t          _        |dz  t          _        |dz  t          _        t          j                            dd           t          j                            dd           |r2t          d           | t          d|            t          d|           t          d|d                    t          d|d                    t          d|d                    t          d|d                    t          d|d                    t          d|d                    t          dt          j        d                    t          d           |                                D ]\  }}	|	rt          d | d!           d"D ]}
t          d#|
 d$||
                    |rt          d%           t#          |d         |d         |d         |d         |d         |d         ||d&         |d'         |d(         |d)         |d*         |d+         |d,                   }|rt          d-t%          |                     t'          |||.           | t)          ||            |rt          d/|            dS dS )0a  
    Compile and package modules based on the provided configuration.

    Args:
        out_dir: Output directory for packaged modules
        build_dir: Build directory for compilation
        project_root: Project root directory
        config: Configuration dictionary to override defaults (optional)
        verbose: Whether to print verbose build output
        skip_prebuilt: Whether to skip pre-built modules
    NFLASHINFER_CUDA_ARCH_LISTz8Please explicitly set env var FLASHINFER_CUDA_ARCH_LIST.csrcinclude3rdpartycutlasstoolsutilspdlog
cached_ops	generatedTr   zAOT build summary:z
  out_dir:z  build_dir:z  fa2_head_dim:fa2_head_dimz  fa3_head_dim:fa3_head_dimz  f16_dtype:	f16_dtypez  f8_dtype:f8_dtypez  use_sliding_window:rD   z  use_logits_soft_cap:rE   z  FLASHINFER_CUDA_ARCH_LIST:z  SM capabilities detected:z    z: Truer   rg   rh   r   r   r   r   z  :zGenerating JIT specs...r   rg   rh   r   r   r   r   z
Total ops:)r   r   zAOT kernels saved to:)get_default_configupdateosenvironRuntimeErrordetect_sm_capabilitiesr   FLASHINFER_CSRC_DIRFLASHINFER_INCLUDE_DIRCUTLASS_INCLUDE_DIRSSPDLOG_INCLUDE_DIRFLASHINFER_WORKSPACE_DIRr   FLASHINFER_GEN_SRC_DIRr   printitemsr   lenr<   r   )r   r   r   r   r   r   final_configr   sm_namehas_smkeyr   s               rU   compile_and_package_modulesr   W  s   ( &''LF###F""*44UVVV,..O #/"7G%1I%=G"z!I-	9z!I-7&@9L$G  ".
!:X!E	!QG (1G$!*\!9G%.%<G"$$TD$AAA"(((EEE  ,"###,(((ni(((!7888!7888nf[1222mVJ/000%v.B'CDDD&/D(EFFF,bj9T.UVVV+,,,.4466 	. 	.OGV .,W,,,---
 		, 		,C +s+++vc{++++  )'((({z~~#$$%z{}yyzy I   ,lC	NN+++ IwmLLLL 9g... 0%w/////0 0rW   sc                     |                                  dv rdS |                                  dv rdS t          d|            )N)true1T)false0FzInvalid boolean value: )lower
ValueError)r   s    rU   
parse_boolr     sI    wwyyM!!t	
n	$	$u6166777rW   r   c                 `    t          t          |                     d                    \  }}||fS )N,)mapintsplit)r   qokvs      rU   parse_head_dimr    s*    hnnS))**FBr6MrW   c                  t    g dg dt           j        t           j        gt           j        gddgddgddddddddS )zGet default AOT configuration)r[   r[   rm   rm   rk   rk   ))rZ   rm   r
  r	  r  FT)r   r   r   r   rD   rE   r   rg   rh   r   r   r   r   )rR   r   rq   r    rW   rU   r   r     s]     ;::FFFmU^4()$dm %t}  rW   c            
      "   t                      } |                     d          dt          dt          dt          ffd} |dd           |d	d
           |d	d           |dd           |dd           |dd
           |dd          dS )zDetect SM capabilitiesN)supported_major_versionscomputeversionrF   c                 ~     t           fdD                       sdS t                      t          |          k    S )Nc              3       K   | ]}|v V  	d S )Nr  ).0flagr  s     rU   	<genexpr>z9detect_sm_capabilities.<locals>.has_sm.<locals>.<genexpr>  s'      BBt7d?BBBBBBrW   F)anyr>   r	   )r  r  gencode_flags_lists   ` rU   r   z&detect_sm_capabilities.<locals>.has_sm  sH    BBBB/ABBBBB 	5!!WW%5%555rW   
compute_90z12.3compute_100r   z12.9compute_103compute_110z13.0compute_120compute_121)r   r   r   r   r   r   r   )r?   get_nvcc_flags_liststrbool)compilation_contextr   r  s     @rU   r   r     s    ,..,@@!% A  6 6c 6d 6 6 6 6 6 6 |V,,v..&//v..v..v..v..  rW   c                  ,   t                      } t                      }t          | d         | d         | d         | d         | d         | d         || d         | d         | d	         | d
         | d         | d         | d                   }t          |          S )z#Register the default set of modulesr   r   r   r   rD   rE   r   rg   rh   r   r   r   r   )r   r   r   r   )r   r   r   s      rU   register_default_modulesr#    s    !!F,..O{z~~#$$%z{}yyzy I  y>>rW   c                  <   t          j        d          } |                     dt          d           |                     dt          d           |                     dd	d
           |                     dd	d           |                     dd	ddgd           |                     dd	ddgd           |                     dd	d           |                     dd	d           |                     dt          d           |                     dt          d           |                     dt          d            |                     d!t          d"           |                     d#t          d$           |                     d%t          d&           |                     d't          d(           |                                 }t          t                                                    j        d)         }t                      }t          j        }d }|j        rt          |j                  }|j        rt          |j                  }|j        rd* |j        D             |d+<   |j        rd, |j        D             |d-<   |j        rd. |j        D             |d/<   |j        rd0 |j        D             |d1<   |j        rd2 |j        D             |d3<   |j        rd4 |j        D             |d5<   d6D ]}t)          ||d           }||||<   t+          ||||d7d89           d S ):Nz%Ahead-of-Time (AOT) build all modules)descriptionz	--out-dirzOutput directory)typehelpz--build-dirzBuild directoryz--fa2-head-dim*z2FA2 head dim pair of qk and vo, separated by comma)nargsr'  z--fa3-head-dimz2FA3 head dim pair of qk and vo, separated by commaz--f16-dtyper   rq   z16-bit data type)r)  choicesr'  z
--f8-dtyper   float8_e5m2z8-bit data typez--use-sliding-windowzUse sliding window attentionz--use-logits-soft-capzUse logits soft capz
--add-commz2Add communication kernels (trtllm_comm, vllm_comm)z--add-gemmazSAdd kernels for Gemma Model (head_dim=256, use_sliding_window, use_logits_soft_cap)z--add-oai-ossz?Add kernels for OAI OSS Model (head_dim=64, use_sliding_window)z	--add-moezAdd MoE kernelsz	--add-actzAdd activation kernelsz
--add-misczAdd miscellaneous kernelsz	--add-xqaz'Add XQA (Cross-Query Attention) kernelsr
   c                 ,    g | ]}t          |          S r  r  r  dims     rU   
<listcomp>zmain.<locals>.<listcomp>V       !S!S!S#."5"5!S!S!SrW   r   c                 ,    g | ]}t          |          S r  r-  r.  s     rU   r0  zmain.<locals>.<listcomp>X  r1  rW   r   c                 8    g | ]}t          t          |          S r  getattrrR   r  rs   s     rU   r0  zmain.<locals>.<listcomp>Z  s"    QQQwue44QQQrW   r   c                 8    g | ]}t          t          |          S r  r4  r6  s     rU   r0  zmain.<locals>.<listcomp>\  s"    OOOgeU33OOOrW   r   c                 ,    g | ]}t          |          S r  r   r  r   s     rU   r0  zmain.<locals>.<listcomp>^  s    'W'W'W!
1'W'W'WrW   rD   c                 ,    g | ]}t          |          S r  r9  r:  s     rU   r0  zmain.<locals>.<listcomp>`  s+     )
 )
 )
JqMM)
 )
 )
rW   rE   r   TF)r   r   r   r   r   r   )argparseArgumentParseradd_argumentr   r   
parse_args__file__resolver   r   r   r   r   r   r   r   r   r   rD   rE   r5  r   )parserargsr   r   r   r   r   	arg_values           rU   mainrE    s   $;  F $5GHHH
D7HIII
A    
 A    
 J'	      -0	     c0N     /sAVWWW
A    
 b    
 N    
 *;LMMM
*;STTT
:<WXXX
*+T     D >>))++3A6L!!F0I"G | %t|$$~ )((	 T!S!SAR!S!S!S~ T!S!SAR!S!S!S~~ RQQ$.QQQ{} POOOOOz X'W'Wt?V'W'W'W#$ 
)
 )
#'#;)
 )
 )
$% $ $ D#t,,	 #F3K  !     rW   __main__)NFT)o__doc__r<  r   r   	itertoolsr   pathlibr   typingr   r   r   r   rR   packaging.versionr	   jit.activationr   r   jit.cascader   jit.fp4_quantizationr   r   r   r   r   r   jit.fp8_quantizationr   jit.gdnr   jit.fused_moer   r   r   r   r   jit.gemmr   r   r   r   r   r    r!   r"   r#   r$   
jit.spdlogr%   jit.mlar&   	jit.mambar'   r(   jit.normr)   jit.pager*   jit.quantizationr+   jit.roper,   jit.samplingr-   jit.topkr.   jit.tllm_utilsr/   jit.xqar0   r1   rp   r2   r3   r4   r5   r6   r7   r8   r9   r:   jitr;   r<   r=   r   jit.cpp_extr>   r!  r?   rs   r  r   rV   r^   rv   r   dictr   r   r   r  r   r  r   r   r#  rE  __name__r  rW   rU   <module>rb     s   ,  				              2 2 2 2 2 2 2 2 2 2 2 2  % % % % % % D D D D D D D D + + + + + +                F E E E E E 0 0 0 0 0 0                                     * ) ) ) ) ) # # # # # #        & % % % % % % % % % % % 5 5 5 5 5 5 % % % % % % - - - - - - % % % % % % 3 3 3 3 3 3 7 7 7 7 7 7 7 7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 * ) ) ) ) ) ) )       ) ) ) ) ) ) 3 3 3 3 3 3=k=k= = 	=
 = = g= = = =@[k [ 	
    g   DiU[!iEK i c3h(i c3h(	i
 di t*i i i i i gi i i iX=ek"=:= #Y= S		=
 I= d= = = = = g= = = =@YU[!YEK Y c3h(Y c3h(	Y
 dY t*Y Y Y Y Y Y Y Y Y 
']Y Y Y YxG} 
   $ g0 g0d^g0g0 g0 	g0
 g0 g0 
g0 g0 g0 g0T8# 8$ 8 8 8 8S U38_    
  &  2#    0f f fR zDFFFFF rW   