
    .`i@                         d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
 ddlmZmZmZmZ ddlmZmZ  G d d	e          Ze G d
 d                      Z G d de	e                   ZdS )z$Backend for GatedDeltaNet attention.    )	dataclassN)
VllmConfig)AttentionBackendAttentionCGSupportAttentionMetadataBuilderCommonAttentionMetadata)PAD_SLOT_IDcompute_causal_conv1d_metadatamamba_get_block_table_tensorsplit_decodes_and_prefills)AttentionSpec	MambaSpecc                   R    e Zd Zedefd            Zeded         fd            ZdS )GDNAttentionBackendreturnc                      dS )NGDN_ATTN r       w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/attention/backends/gdn_attn.pyget_namezGDNAttentionBackend.get_name   s    zr   GDNAttentionMetadataBuilderc                      t           S )N)r   r   r   r   get_builder_clsz#GDNAttentionBackend.get_builder_cls   s    **r   N)__name__
__module____qualname__staticmethodstrr   typer   r   r   r   r   r      sf        c    \ +T"?@ + + + \+ + +r   r   c                      e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   dZej        dz  ed	<   dZej        dz  ed
<   dZ	ej        dz  ed<   dZ
ej        dz  ed<   dZej        dz  ed<   dZej        dz  ed<   dZej        dz  ed<   dZej        dz  ed<   dZej        dz  ed<   dZedz  ed<   dZej        dz  ed<   dZej        dz  ed<   dS )GDNAttentionMetadatanum_prefillsnum_prefill_tokensnum_decodesnum_decode_tokensnum_spec_decodesnum_spec_decode_tokensnum_actual_tokensNhas_initial_statespec_query_start_locnon_spec_query_start_locspec_state_indices_tensornon_spec_state_indices_tensorspec_sequence_masksspec_token_indxnon_spec_token_indxnum_accepted_tokens	nums_dict	batch_ptrtoken_chunk_offset_ptr)r   r   r   int__annotations__r*   torchTensorr+   r,   r-   r.   r/   r0   r1   r2   r3   dictr4   r5   r   r   r   r"   r"   #   s        -1u|d*11104%,-444 elT1    6:u|d2999 "5<$#6    04,333+/OU\D(////3,333/3,333 "Itd{!!!%)Iu|d")))26EL4/66666r   r"   c                       e Zd ZU ej        ZdZeed<   de	de
e         dedej        fdZ	 	 	 dd
ededej        dz  dej        dz  dedefdZdefdZdS )r      reorder_batch_thresholdkv_cache_speclayer_namesvllm_configdevicec                 $   t          |t                    sJ || _        |j        | _        |j        | _        || _        | j        r | j        j        J | j        j        | _        nd| _        | j        dk    | _        | 	                    d| j                   | j        j
                                        | _        | j        j        j        | j        dz   z  | _        | j        j        $t#          | j        | j        j                  | _        t%          j        | j        | j        dz   ft$          j        |          | _        t%          j        | j        ft$          j        |          | _        t%          j        | j        ft$          j        |          | _        t%          j        | j        | j        dz   z  ft$          j        |          | _        t%          j        | j        | j        dz   z  ft$          j        |          | _        t%          j        | j        dz   ft$          j        |          | _        t%          j        | j        dz   ft$          j        |          | _        t%          j        | j        ft$          j        |          | _        d S )Nr   r<   dtyperA   )
isinstancer   r@   compilation_configspeculative_configr>   num_speculative_tokensnum_specuse_spec_decode_init_reorder_batch_thresholdcudagraph_modehas_full_cudagraphsuse_full_cuda_graphscheduler_configmax_num_seqsdecode_cudagraph_max_bsmax_cudagraph_capture_sizeminr8   emptyint32r-   r.   boolr/   r0   r1   r+   r,   r2   )selfr>   r?   r@   rA   s        r   __init__z$GDNAttentionMetadataBuilder.__init__I   s    -33333&"-"@"-"@*" 	*AMMM!%!8!ODMMDM#}q0**1d.BCCC #2FFHH 	 
 -:dma>OP 	$ "=I+.,'B, ,D(
 */)4=1+<=+*
 *
 *
&
 .3[)++.
 .
 .
*
 $);)+*$
 $
 $
 
  %{)T]Q->?A+ 
  
  

 $);)T]Q->?A+$
 $
 $
 
 %*K)A-/+%
 %
 %
!
 ).)A-/+)
 )
 )
%
 $);)++$
 $
 $
   r   NFcommon_prefix_lencommon_attn_metadatar2   num_decode_draft_tokens_cpu
fast_buildr   c                 z   |}|j         }|j        }|                                }	d\  }
}}t          |j        |j        | j        | j        j        j	                  }d }| j
        r6|4||dk                                                                             dk    rd }d}nS|dk    }|                                                                }|dk    rd }d }n|                    |j        d          }|4t          |d          \  }}}}d}d }d }d }|d d df         }d }|}|}d }n|dd          |d d         z
  }|J |dd          |d d         z
  }||          }|dk                                                                    }|                    d          |z
  }|}|                                                                |z
  }|                                                                |z
  |z
  }|dk    r|dk    rt#          || j        dz   z  |d                                                   } t'          j        | t&          j        |j                  }t'          j        dt&          j        |j                  }|d d d | j        dz   f         }d }|}d }d }nut'          j        ||          }!t'          j        |!d	          }"||z   }#|"d |#         }|"|#d          }||d | j        dz   f         }|| df         }t'          j        |dz   t&          j        |j                  }t'          j        ||         d|dd          
           t'          j        |                    d          |z
  dz   t&          j        |j                  }t'          j        ||          d|dd          
           t'          j        |                    d          |z
  dz   t&          j                  }t'          j        ||          d|dd          
           |J ||         }|dk    r0|	dk    }$||$|          }$|J t7          ||j                  \  }
}}nd }$|j        }%| j        r4|dk    r-|dk    r&|| j        k    r|| j        k    r| j        d |                              |d           | j        d |%         }||d          !                    tD                     | j#        d |                              |d           | j#        d |%         }||d          !                    d           ||J | j$        d |                    d                                        |d           | j$        d |                    d                   }| j%        d |                    d                                        |d           | j%        d |                    d                   }| j&        d |dz                                 |d           |d         }&| j&        d |%dz            }||dz   d          !                    |&           | j'        d |                              |d           | j'        d |%         }||d          !                    d           | j        r|dk    r|dk    r|| j        k    r| j(        d |                              |d           | j(        d |%         }||d          !                    tD                     | j)        d |dz                                 |d           |d         }'| j)        d |%dz            }||dz   d          !                    |'           tU          d!i d|d|d|d|d|d|d|j        d|$d|d|d|d|d|d|d|d|d|
d|d |}(|(S )"N)NNNr   T)non_blockingr<   )decode_thresholdrC   )stable)dimout)rD   )rA   Fr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r   )+query_start_locquery_start_loc_cpucompute_num_computed_tokensr   block_table_tensorseq_lensr>   r@   cache_configmamba_cache_moderJ   sumitemtorA   r   sizerS   rI   r8   arangerU   rT   repeat_interleaveargsortzeroscumsumr
   r)   rN   rQ   r-   copy_fill_r	   r/   r1   r0   r+   r2   r.   r,   r"   ))rW   rY   rZ   r2   r[   r\   mrd   re   context_lens_tensorr3   r4   r5   rg   spec_sequence_masks_cpur/   r'   r%   r#   r&   r$   r(   r0   r1   r-   r.   r+   r,   non_spec_query_start_loc_cpu
query_lensquery_lens_cpunon_spec_query_lensspec_token_sizespec_token_masksindexnum_non_spec_tokensr*   
batch_sizespec_num_query_tokensnon_spec_num_query_tokensattn_metadatas)                                            r   buildz!GDNAttentionMetadataBuilder.build   s
    !+3;;==7G4	949 J):	
 
 8<$	*2*+F!+KLSUUTVV 
 #' &AQ&F#6::<<AACC1$$&*#*.''&=&@&@#* 'A ' '# &*1qAAA MK'8:L &'""O"&(,%,>qqq!t,D)#' '6$+>("&(,ss/CCJ*666047J3B37OON",.A-A"B.!388::??AAK.33A66DL +!4!8!8!:!:!?!?!A!ADU!U  %%''*<<?PP # q  [A%5%5"%$(9:#B',,..# # #(,#+*1# # #
 ',kU[1G' ' '# -?qqqBUDMTUDUBU?U,V)04-'6$+/(/3,,#(#:'$ $  &6tDDD&8;L&L#&+,@-@,@&A#"'(;(<(<"=,>')<4=1+<)<<-) 1C((!+1- (-{$q(+*1( ( ($
 23@TUVUWUW@X    ,1;OOA&&)99A=+*1, , ,(
  33404   
 05{"''**-==A+0 0 0, "$;#;<4QRR8    '222"56I"J! 3a 7".$57J6J$K!3???.0*1   9Iy"8"8 !% (
 $+	<!!q   D$@@@&$*FFF*+<,<+<=CC) D    )-(F{
{(S%%&6&7&78>>{KKK$%6&6%67==#$ >    #'":;J;"G 0 1 1288???&27R7RR$%B':'?'?'B'B%BCII#$ J    #'":-%**1---#  !:?#7#7#:#:!:;AAd B    #23L_5I5I!5L5L3LMO%&<(81(<&<=CC$4 D    %9$<!#'#<=MzA~=M#N  !1A!5!7!78>>?TUUU$%6&6%67==#$ >    #'":;J;"G 0 1 1288;;; $	Y!! A%%t;;;.||<BB-D C    -1,N-) *+,,7==kJJJ)*;K!O*;<BB(t C    )A(D%'+'DEUzTU~EU'V$$[1_%6%67==>WXXX, 
 
 
%
11
 $
 0/	

 .-
 $:#9
  11
 0/
 "6!5
 &>%=
 '@&?
 +H*G
 !4 3
 ,O
 !4 3
  !4 3!
"  i#
$  i%
& $:#9'
* r   c           
      ,   |}|j         | j        k    r|j        | j        k    s+J d|j          d| j         d|j         d| j         d	            t          j        |j                  }|dz
                                  }|                     d|||          S )z
        This method builds the metadata for full cudagraph capture.
        Currently, only decode is supported for full cudagraphs with Mamba.
        zLGDN only supports decode-only full CUDAGraph capture. Make sure batch size (z) <= cudagraph capture sizes (z), and number of tokens (z).r<   r   )num_reqsrQ   r)   r8   diffrd   cpur   )rW   rZ   rv   r2   r[   s        r   build_for_cudagraph_capturez7GDNAttentionMetadataBuilder.build_for_cudagraph_capture  s     ! J$666#t'CCCCI%&ZI I(,(DI I &'%8I I )-(D	I I I DCD $j):;;':Q'>&C&C&E&E#zz!Q 35PQQQr   )NNF)r   r   r   r   UNIFORM_BATCH_cudagraph_supportr=   r6   r7   r   listr   r   r8   rA   rX   r   r9   rV   r"   r   r   r   r   r   r   r   D   s        +9#$S$$$I
$I
 #YI
  	I

 I
 I
 I
 I
^ 48;? p pp 6p #\D0	p
 &+\D%8p p 
p p p pdR$;R R R R R Rr   r   )__doc__dataclassesr   r8   vllm.configr   vllm.v1.attention.backendr   r   r   r    vllm.v1.attention.backends.utilsr	   r
   r   r   vllm.v1.kv_cache_interfacer   r   r   r"   r   r   r   r   <module>r      s|   + * ! ! ! ! ! !  " " " " " "                       @ ? ? ? ? ? ? ?+ + + + +* + + + 7 7 7 7 7 7 7 7@YR YR YR YR YR":;O"P YR YR YR YR YRr   