
    )`i                     V   d dl Z d dlZd dlmZ d dlZddlmZ ddlm	Z	 ej
        d             Zdej        fdZdefd	Zdej        fd
Zdej        dededefdZdej        dej        ddfdZddZdefdZdefdZdee         dej        dej        dej        fdZddZddZdS )    N)Sequence   )env)gen_nvshmem_modulec                  6   t          j                    } d }ddg}| D ](}|D ]}||z  }|                                r|} n | n)|t          d|            t	          j        |t          j                   t                                                      }|S )Nzlibnvshmem_host.sozlibnvshmem_host.so.3z=Could not find libnvshmem_host.so or libnvshmem_host.so.3 in )mode)	jit_envget_nvshmem_lib_dirsexistsFileNotFoundErrorctypesCDLLRTLD_GLOBALr   build_and_load)lib_dirslib_path	lib_nameslib_dirlib_namecandidate_pathmodules          k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/flashinfer/comm/nvshmem.pyget_nvshmem_moduler      s     +--HH%'=>I  ! 	 	H$x/N$$&& ) E   VHVV
 
 	
 Kv12222!!0022FM    returnc                  d    t                      } t                                          |            | S N)alloc_empty_unique_idr   nvshmem_get_unique_id)uids    r   get_unique_idr!   &   s,    

!
!C..s333Jr   c                  B    t                                                      S r   )r   nvshmem_unique_id_size r   r   unique_id_sizer%   ,   s    66888r   c                  \    t          j        t                      t           j        d          S )Ncpu)dtypedevice)torchzerosr%   uint8r$   r   r   r   r   0   s!    ;~''u{5IIIIr   r    rank
world_sizec                     t                                          | ||          }t          j                                         |S r   )r   nvshmem_initr*   cudasynchronize)r    r-   r.   statuss       r   initr4   4   s8    !!..sD*EEF	JMr   destsourcec                 F    t                                          | |          S r   )r   nvshmem_alltoall)r5   r6   s     r   alltoallr9   :   s    00v>>>r   c                      t           j                                         t                                                       d S r   )r*   r1   r2   r   nvshmem_finalizer$   r   r   finalizer<   >   s4    	J))+++++r   c                  B    t                                                      S r   )r   nvshmem_my_per$   r   r   my_per?   C       --///r   c                  B    t                                                      S r   )r   nvshmem_n_pesr$   r   r   n_pesrC   G   r@   r   shaper(   r)   c                 p    t                                          | ||          }t          j        |          S )a  Allocates memory using NVSHMEM collective malloc operation.

    This is a collective operation that requires participation by all PEs (Processing Elements).
    All participants must call this function with the same parameters.

    Note: This tensor should be explicitly deleted (del tensor) to ensure proper ordering
    of nvshmem_free operations rather than relying on garbage collection.

    Args:
        shape: The shape of the tensor to allocate.
        dtype: The data type of the tensor.
        device: The device to allocate the tensor on.

    Returns:
        A tensor allocated using NVSHMEM collective malloc.

    Reference:
        https://docs.nvidia.com/nvshmem/api/gen/api/memory.html#nvshmem-malloc-nvshmem-free-nvshmem-align
    )r   nvshmem_mallocr*   from_dlpack)rD   r(   r)   outputs       r   mallocrI   K   s2    2  !!00vFFFV$$$r   c                  F    t                                                       d S r   )r   nvshmem_barrier_allr$   r   r   barrier_allrL   h   s     ,,.....r   c                  F    t                                                       d S r   )r   %nvshmem_barrier_all_on_current_streamr$   r   r   barrier_all_on_current_streamrO   l   s     >>@@@@@r   )r   N)r   	functoolstypingr   r*   jitr   r	   jit.commr   cacher   Tensorr!   intr%   r   r4   r9   r<   r?   rC   r(   r)   rI   rL   rO   r$   r   r   <module>rW      s                           ) ) ) ) ) )   4u|    9 9 9 9 9Ju| J J J Jel # 3 3    ?5< ? ?$ ? ? ? ?, , , ,
0s 0 0 0 00s 0 0 0 0%C=%;% L% \	% % % %:/ / / /A A A A A Ar   