
    .`i                     r   d Z ddlZddlmZ ddlmZmZ ddlmZ ej	        dej
        dej
        dej
        d	ej
        d
ej
        dej
        dej
        dej
        fd            Zdej        defdZ	 	 	 	 ddej        dej        dej        dej        dej        dej        dedededz  dedz  dedz  fdZdS ) zD
Memory-efficient attention for prefill.
It supports page size = 1.
    N)current_platform)tltriton)RCP_LN2kv_group_numBLOCK_MBLOCK_DMODELBLOCK_N	IS_CAUSALSLIDING_WINDOW_QSLIDING_WINDOW_KLkc           	      
	   t          j        d          }t          j        d          }t          j        d          }||z  }t          j        ||z             }t          j        ||z             }||z  }t          j        d|          }t          j        d|          }||z  t          j        d|          z   } || d d d f         z   |z  ||z  z   |d d d f         z   }!|d d d f         |	z  ||
z  z   |d d d f         z   }"|d d d f         |z  ||z  z   |d d d f         z   }#||k     }$t          j        | |!z   | d d d f         |k     |$d d d f         z  d          }%||"z   }&||#z   }'t          j        |gt           j                  t          d          z
  }(t          j        |gt           j                  })t          j        ||gt           j                  }*t          j        ||k     dd          }+|},|rt          j        |,|dz   |z            n|,},d}-|+|,z  }.t          |-|.|          D ]}/| d d d f         }0|/|d d d f         z   }1|1|k     }2|r	|2|0|1k    z  }2|dk    r	|0|1z
  |k    nd }3|dk    r	|1|0z
  |k    nd }4|3|2|3z  }2|4|2|4z  }2t          j
        |/|          }/t          j        |&||/z   |	z  z   |1|k     |$d d d f         z  d          }5t          j        |%|5          }6t          j        |2|6|z  d          }6t          j        |(t          j        |6d                    }7|6|7d d d f         z  }6t           j                            |6          }8t          j        |8d          }9t           j                            |(|7z
            }:|)|:z  |9z   })|*|:d d d f         z  }*t          j        |'||/z   |z  z   |/|d d d f         z   |k     |$d d d f         z  d          };|8                    |;j                  }8t          j        |8|;|*          }*|7}(|*|)d d d f         z  }*|| d d d f         z   |z  ||z  z   |d d d f         z   }<||<z   }=t          j        |=|*| d d d f         |k     |$d d d f         z  	           d S )
Nr         g        )maskotherdtypeinfg    ח)r   )r   
program_idloadarangezerosfloat32floatwhereminimumrangemultiple_ofdotmaximummaxmathexp2sumtor   store)>QKVsm_scaleB_Start_LocB_SeqlenOut
stride_qbs	stride_qh
stride_kbs	stride_kh
stride_vbs	stride_vh
stride_obs	stride_ohr   r   r	   r
   r   r   r   r   	cur_batchcur_headstart_mcur_kv_headcur_batch_seq_lencur_batch_in_all_start_indexblock_start_locoffs_noffs_doffs_moff_qoff_koff_vmask_dqk_ptrsv_ptrsm_il_iacc
block_maskend_nstart_n_limitend_n_limitstart_npos_qpos_kr   sliding_mask_qsliding_mask_kkqkm_ijpl_ijalphavoff_oout_ptrss>                                                                 /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/attention/ops/triton_prefill_attention.py_fwd_kernelr_   $   s~   4 a  I}QHmAGl*K9 455#%7;+B#C#C 'O Yq'""FYq,''Fw1g!6!66F	%qqq$w	7:E
Y
	
qqq/	 

 47Oj(;+BBVAAAtG_TE111d7Oj(;+BBVDRSRSRSG_TEb[F
	E	QQQWo 11fT111WoF	 	 	A YFYF (G9BJ
/
/
/%,,
>C
(G9BJ
/
/
/C
(G\*"*
=
=
=C/,==q!DDJ E ;DNBJuw{g5666E Mu$KW== 5 5 qqq$w&qqq/) (( 	#EUN"D 2BA1E1EEEM---4 	 2BA1E1EEEM---4 	 %N"D%N"D.'22G2W<
JJ++qqq$w@
 
 
 VAq\\XdBM622z#rvb!}}--
d111d7mGLLva|| S4Z((EkD E!!!T'N"G2W<
JJVAAAtG_,0AAfTSTSTSTWoV
 
 

 DDMMfQ3
AAAtG
C	%qqq$w	7:E
Y
	
qqq/	 

 U{HH#VAAAtG_/@@VDRSRSRSG_U         r   returnc                 |    | t           j        k    rdS t          j                    rt          j        d          rdS dS )N    P      @   )torchr   r   is_cuda_alikehas_device_capabilityr   s    r^   get_block_sizerj      sI    r		'	)	) .>.T
/ /  srr`   TrF   rU   r[   ob_start_loc	b_seq_lenmax_input_len	is_causalsoftmax_scalesliding_window_qsliding_window_kc                    t          | j                  }| j        d         |j        d         |j        d         }}}|d|dz  z  n|}|t          z  }|j        d         | j        d         }}| j        d         |j        d         z  }||t	          j        ||          f}|dk    rdnd	}|	|	nd}	|
|
nd}
t          |         | |||||||                     d          |                     d          |                    d          |                    d          |                    d          |                    d          |                    d          |                    d          f||t	          j        |          |||	|
|d|d

 dS )zs
    q, k, v: [b * s, head, head_dim]
    b_start_loc: [b]
    b_seq_len: [b]
    out: [b * s, head, head_dim]
    Ng      ?g      ?r   r   rf         )
r   r   r	   r
   r   r   r   	num_warps
num_stagesr   )	rj   r   shaper   r   cdivr_   stridenext_power_of_2)rF   rU   r[   rk   rl   rm   rn   ro   rp   rq   rr   BLOCKLqr   _r,   batchheadr   gridrw   s                        r^   context_attention_fwdr      s   & 17##EQWR[!'"+AB"/"7sb#g]HH/!$agaj4E71:+L4]E::;D2XX1I+;+G''Q+;+G''Q												  "+B//))3     r`   )TNNN)__doc__rg   vllm.platformsr   vllm.triton_utilsr   r   vllm.utils.math_utilsr   jit	constexprr_   r   intrj   Tensorboolr   r    r`   r^   <module>r      s  ,   + + + + + + ( ( ( ( ( ( ( ( ) ) ) ) ) ) L  ,!L" \#L$ ,%L& \'L( |)L* l+L, l-L. 	/L L L L^%+ #    & "&#'#'> >|>|> |> |	>
 > |> > > 4<> Dj> Dj> > > > > >r`   