
    .`i$                     h   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZ d d	lmZ e
j        d
e	j        fd            Zd Zdedeee         ef         fdZdee         dee         dee         dedeedf         dee         dededededeeef         fdZdee         dee         dee         fdZdedededeeef         d ed!eeef         deeef         deedf         fd"Z deded ed!eeef         deeef         deeef         deedf         fd#Z!dS )$    N)Any)CacheConfig)MambaStateCopyFunc)tltriton)SchedulerOutput)KVCacheConfig	MambaSpec)CachedRequestState)GPUInputBatch
BLOCK_SIZEc                 R   t          j        d          }t          j        | |z             }t          j        ||z             }t          j        ||z             }t          j        d|          }t	          d||          D ]}	|	|z   |k     }
||	z   |z                       t          j        t           j                            }||	z   |z                       t          j        t           j                            }t          j        ||
          }t          j        |||
           d S )Nr   )mask)	r   
program_idloadarangerangetopointer_typeuint8store)src_ptrsdst_ptrssizesr   pidsrc_ptrdst_ptrsizeoffsetsir   curr_src_ptrcurr_dst_ptrdatas                 n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/worker/mamba_utils.pybatch_memcpy_kernelr%      s   
-

Cghn%%Gghn%%G753;Di:&&G1dJ'' 0 0Gt#!g-11"/"(2K2KLL!g-11"/"(2K2KLLw|$///
t$/////0 0    c                     | j         d         }|j         d         |k    sJ |j         d         |k    sJ |f}d}t          |         | |||           d S )Nr   i   )r   )shaper%   )r   r   r   batchgridr   s         r$   batch_memcpyr+   &   sk    N1E>!%%%%;q>U""""8DJh%JOOOOOOr&   kv_cache_configreturnc                 ~   g }g t          t          | j                            D ]S}| j        |         j        }t	          |t
                    r*|                    |                               |           Tt          |          dk    s
J d            t          fdD                       sJ |d         fS )Nr   zno mamba layers in the modelc              3   0   K   | ]}d          |k    V  dS )r   N ).0specmamba_specss     r$   	<genexpr>z#get_mamba_groups.<locals>.<genexpr>9   s,      >>${1~%>>>>>>r&   )r   lenkv_cache_groupskv_cache_spec
isinstancer
   appendall)r,   mamba_group_idsr    r7   r3   s       @r$   get_mamba_groupsr<   0   s    !#O#%K367788 . .'7:HmY// 	.""1%%%}---!###%C###>>>>+>>>>>>>>KN**r&   src_state_listdest_state_listnum_elements_listmamba_state_copy_funcs.r;   src_block_idxdest_block_idxaccept_token_bias	req_stateforward_contextc           
         ||k    r|dk    rd S |D ]}|	j         |         }||         }|j        |         j        }|D ]}|
|         }|j        d         }t	          ||          D ]\  }} |||||dz             }|                     |j                   |                    ||                                                    |                    |j        |	                                z             d S )Nr      )
	block_idsr6   layer_nameskv_cachezipr9   
start_addrdata_ptrnum_elementselement_size)r=   r>   r?   r,   r@   r;   rA   rB   rC   rD   rE   mamba_group_idrH   dest_block_idrI   
layer_name	attention	kv_cachesstatestate_copy_func	copy_specs                        r$   collect_mamba_copy_metarX   =   sA    &&+<+A+A) X X'7	!.1%5nEQ% 
	X 
	XJ'
3I,5,>q,AI*-i9O*P*P X X&+O9m5F5J 	 %%i&:;;;&&u]';'D'D'F'FGGG!(()?%BTBTBVBV)VWWWWX
	X	X Xr&   c                    t          |           dk    rd S t          |           t          |          k    sJ t          |           t          |          k    sJ t          j        | dt          j                  }t          j        |dt          j                  }t          j        |dt          j                  }t          |||           d S )Nr   cuda)devicedtype)r5   torchtensorint64int32r+   )r=   r>   r?   src_state_ptrsdst_state_ptrsrN   s         r$   do_mamba_copy_blockrc   ^   s    
 >a~#o"6"66666~#&7"8"88888\.u{SSSN\/&TTTN< 1&TTTL>>>>>r&   scheduler_outputcache_configmamba_state_idxinput_batchrequestsc                 z   t          |          \  }}	|	j        }
|j        sJ |	j        }| j        }| j        pt                      }t          j        ||          D ]}|	                    |d           g }g }g }t          |j                  D ]\  }}||         }|                    |          }||j        dz
  |z  }t          |j        |d                            }|dz
  |
z
  }|||<   |dk    r7||k    r1t!          |||||||||j        |         dz
  ||           d|j        |<   t%          |||           dS )zc
    Copy the mamba state of previous step to the last
    (1 + num_speculative_blocks) block.
    NrG   r   )r<   num_speculative_blocksenable_prefix_caching
block_sizefinished_req_idspreempted_req_idsset	itertoolschainpop	enumeratereq_idsgetnum_computed_tokensr5   rH   rX   num_accepted_tokens_cpurc   )rd   r,   re   rf   rg   rh   rE   r@   r;   
mamba_specrk   rm   rn   ro   req_idr=   r>   r?   r    rD   prev_state_idx
num_blockscurr_state_idxs                          r$   preprocess_mambar~   n   s    #3?"C"COZ'>----&J'8(:Ccee/"24EFF * *FD)))) "N!#O#%{233 $7 $7	6V$	(,,V44! (;a?JNN,_Q-?@AA
 $a*@@"0RNn$D$D#!&3A6:   67K/29JKKKKKr&   c                    | j         }| j        }|j        }	t          |          \  }
}g }g }g }t	          |j                  D ]\  }}||         }|j        }t          |                    |g                     }||         }|	|         }||z   |z
  }||z   dz
  }||j	        z  |j	        z  }||k    r>||z
  }||         }||j	        z  dz
  }t          ||||||
|||||           ||k    rd|	|<   t          |||           dS )z
    If a blocks is converted from partial block to full block in this step, copy the
    state from the block for running state to the new full block.
    rG   N)num_scheduled_tokensscheduled_spec_decode_tokensrx   r<   rt   ru   rw   r5   rv   rm   rX   rc   )rd   r,   rg   rh   rf   rE   r@   num_scheduled_tokens_dict!scheduled_spec_decode_tokens_dictrx   r;   ry   r=   r>   r?   r    rz   rD   rw   num_draft_tokensr   num_accepted_tokensnum_tokens_running_statenew_num_computed_tokensaligned_new_computed_tokensrC   rA   rB   s                               r$   postprocess_mambar      s    !1 E(8(U%)A"2?"C"COZ "N!#O#%{233  /  /	6V$	';@DDVRPPQQ8@5a8"669II 	! #;=P"PST"T#z'<<z?TT 	$ '*BBB ;>V V+F3M8J<QQTUUN#!&!   ..-.'*9JKKKKKr&   )"rq   typingr   r]   vllm.configr   ,vllm.model_executor.layers.mamba.mamba_utilsr   vllm.triton_utilsr   r   vllm.v1.core.sched.outputr   vllm.v1.kv_cache_interfacer	   r
   vllm.v1.worker.gpu_input_batchr   &vllm.v1.worker.lora_model_runner_mixinr   jit	constexprr%   r+   tuplelistintr<   dictstrrX   rc   r~   r   r0   r&   r$   <module>r      sP              # # # # # #      ) ( ( ( ( ( ( ( 5 5 5 5 5 5 ? ? ? ? ? ? ? ? = = = = = = @ @ @ @ @ @ 0r| 0 0 0 0$P P P
+m 
+d3i>R8S 
+ 
+ 
+ 
+XIX#YX CyX #	X
 ""4c"9:X #YX X X X "X #s(^X X X XB?I?#Y? Cy? ? ? ? @L%@L"@L @L #s(^	@L
 @L 3**+@L #s(^@L ""4c"9:@L @L @L @LF6L%6L"6L 6L 3**+	6L
 #s(^6L #s(^6L ""4c"9:6L 6L 6L 6L 6L 6Lr&   