
    )`i6                     ~    d dl Z d dlmZ d dlmZ ddlmZmZmZm	Z	m
Z
 edefd            Zded	efd
Zded	efdZdS )    N)contextmanager)Path   )InputLayoutencode_nameenumerate_hmma_flash_kernelsenumerate_qmma_flash_kernelsgenerate_filespathc              #      K   t          j                    }t          j        |            	 dV  t          j        |           dS # t          j        |           w xY w)z8Context manager to temporarily change working directory.N)osgetcwdchdir)r   original_dirs     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/flashinfer/jit/attention/fmha_v2/generate_kernels.pyworking_directoryr      sZ       9;;LHTNNN
s   A A
src_targetgen_dirc                 H   |                     dd           |dz  }|                                s|                                r|                                 |                    | d           |dz                       d           |dz                       d           dS )	zHSetup output directory with symlinks to TensorRT-LLM source directories.T)parentsexist_oksrc)target_is_directory	generated)r   binN)mkdir
is_symlinkexistsunlink
symlink_to)r   r   src_links      r   _setup_output_directoryr"      s    MM$M... H  1 1 
==={!!4!000u_T*****    c                 P  	
 t          | |           t          |          5  g }t          |ddd           t          |dddg           t          |dddgd           g }d	 }|D ]~		j        }	j        }	j        } ||          r	fd
|D             n	g
 ||          r
fd|D             n

 ||          r
fd|D             n

|                    
           d |D             }d |D             }t          |           d d d            d S # 1 swxY w Y   d S )Nx   bf16   )smdtypehead_size_v	e4m3_fp32   )r(   r)   
head_sizes)r(   r)   r-   output_dtypec                 :    t          | t          t          f          S )N)
isinstancelisttuple)xs    r   <lambda>z#enumerate_kernels.<locals>.<lambda>7   s    jT5M:: r#   c                 <    g | ]}                     |           S ))seq_len_replace).0skspecs     r   
<listcomp>z%enumerate_kernels.<locals>.<listcomp>=   s'    :::q**:::r#   c                 F    g | ]}D ]}|                     |           S ))	head_sizer7   )r9   dtmp_kstmp_exps      r   r<   z%enumerate_kernels.<locals>.<listcomp>B   s4    SSS!7SS1--SSSSr#   c                 F    g | ]}D ]}|                     |           S ))r)   r7   )r9   dtr@   rA   s      r   r<   z%enumerate_kernels.<locals>.<listcomp>G   s4    UUUrWUU6r**UUUUr#   c                 2    g | ]}|j         |j        k    |S  )r(   sm_mmar9   r;   s     r   r<   z%enumerate_kernels.<locals>.<listcomp>N   s&    XXXEux5<?W?W%?W?W?Wr#   c                    g | ]}|j         d k    rU|j        dv rL|j        dk    rA|j        dk    r6|j        /|j        dk    r$|j        s|j        r|j        t          j
        k    sG|j         dk    r5|j        dv r,|j        dk    r!|j        r|j        dk    r|j        s|j        r|j         dk    rU|j        dv rL|j        d k    rA|j        dk    r6|j        /|j        dk    r$|j        s|j        r|j        t          j
        k    s|j         d	v rc|j        d
v rZ|j        dk    rO|j        dk    rD|j        t          j        k    r/|j        (|j        dk    r|j        s|j        r|j        s|j        s;|j         d	v rw|j        dv rn|j        dk    rc|j        dk    rX|j        t          j
        k    rC|j        <|j        dk    r1|j        s*|j        r#|j        r|j        r|j        s|j        r|j        r|j         dk    rU|j        dv rL|j        dk    rA|j        dv r8|j        s1|j        r*|j        r#|j        t          j        k    r|j        s|j        r[|j         dk    r|j        dv rx|j        dv ro|j        dv rf|j        dk    r[|j        sT|j        rM|j        sF|j        t          j        k    r1|j        dk    s|j        dk    s|j        |gt'          |          R S )P   )fp16r&   	fp16_fp32e4m3r+      r   N   Z   )rJ   r&   rK   d   )rO   rP   r%   )r&   r+   i@  i   )r&   rL   r+   r,   r'   )rI   r'   ))@   rQ   rM   Y   ))rQ       rS   )rJ   r&   )r(   r)   r>   r*   sage_block_sizesversion	cross_mhaflash_attentioninput_layoutr   SEPARATE_Q_K_Vldgsts_q
Q_PAGED_KVwarp_specializationtiledalibienable_attn_logit_softcapping
PACKED_QKVr.   r   rG   s     r   r<   z%enumerate_kernels.<locals>.<listcomp>P   s    c
 c
 c
 BK#UUUOs**%***2MQ&& ') ' &+*DDDHNN'DDD3.. /**!O +!1 + HOOIJ J2--)Q...6**!O +- + *k.HHH H..'<<<3..)S00*k.DDD.6**!O +- + "5 + 	 + H..'DDD3..)S00*k.HHH.6**!O +- + 2	 + <A;	 +
 !& 9 +
 ?Dk + "? + HNN944**./AA!O B- B 1 B *k.DDD!K E!? E HNN944..@@*.>>>**!O +- + "5 + *k.DDD
 3&&?c)): * ([''((~ *))r#   )	r"   r   r   r	   r6   r>   r)   extendr
   )r   r   specsspecs_expanded	list_liketmp_stmp_d	tmp_dtypespecs_namesr;   rA   s            @@r   enumerate_kernelsri   (   s2   J000 
7	#	# I$ I$$Us&cRRRR$Us+SVRWXXXX$c#V	
 	
 	
 	

 ::	 	+ 	+EMEOEI 9U##::::E::::W  9U##SSSSuSSSS  9Y''UUUUYUUUU 
 !!'**** YX^XXXc
 c
'c
 c
 c
L 	{###SI$ I$ I$ I$ I$ I$ I$ I$ I$ I$ I$ I$ I$ I$ I$ I$ I$ I$s   C,DD"D)r   
contextlibr   pathlibr   generator_utilsr   r   r   r	   r
   r   r"   ri   rE   r#   r   <module>rm      s    				 % % % % % %                    D    + +t + + + +N$$ N$ N$ N$ N$ N$ N$ N$r#   