
    &`i#                        d Z ddlZ	 ddlZddlmZmZ ddlmZmZm	Z	m
Z
mZ n# e$ r  ed          w xY wddlmZmZ ej        ej        ej        ej        ej        ej        ej        ej        iZej        ej        ej        ej        ej        ej        ej        ej         ej!        ej"        ej#        ej$        ej%        ej        ej&        ej'        ej(        ej)        ej*        ej+        ej,        ej-        ej.        ej/        iZ0 e            rGddl1Z1ddl2Z1e1j3        ej"        e1j4        ej5        e1j        ej        e1j!        ej"        e1j#        ej$        e1j%        ej        e1j6        ej        e1j&        ej'        e1j7        ej8        e1j(        ej)        e1j*        ej+        e1j,        ej-        e1j.        ej/        iZ9 e:ed          rej;        e9e1j<        <   e1j4        ej#        e1j        ej        e1j!        ej!        e1j#        ej#        e1j%        ej%        e1j6        ej%        e1j&        ej&        e1j7        ej*        e1j(        ej(        e1j*        ej*        e1j,        ej,        iZ=d Z>d	 Z?d
 Z@d ZAd ZBd ZCd ZDd ZEd ZFd ZGd ZHd ZId ZJd ZKd ZLdS )z!Code to wrap some NCCL API calls.    N)Devicenccl)NcclCommunicatorget_build_versionget_versiongroupEnd
groupStartz*NCCL in Ray requires Cupy being available!)ReduceOptorch_availableNCCL_BFLOAT16c                  H    t           j        j                                        S )z+Returns the number of compute-capable GPUs.)cupycudaruntimegetDeviceCount     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/util/collective/collective_group/nccl_util.pyget_num_gpusr   Z   s    9++---r   c                      t                      S N)r   r   r   r   get_nccl_build_versionr   _   s    r   c                      t                      S r   )r   r   r   r   get_nccl_runtime_versionr   c   s    ==r   c                  (    t          j                    S r   )r   get_unique_idr   r   r   get_nccl_unique_idr   g   s    r   c                 (    t          | ||          }|S )a(  Create an NCCL communicator using NCCL APIs.

    Args:
        world_size: the number of processes of this communicator group.
        nccl_unique_id: the NCCLUniqueID for this group.
        rank: the rank of this process.
    Returns:
        comm (nccl.ncclComm_t): an NCCL communicator.
    )r   )
world_sizenccl_unique_idrankcomms       r   create_nccl_communicatorr#   k   s     J==DKr   c                 r    | t           vr"t          d                    |                     t           |          S )zMap the reduce op to NCCL reduce op type.

    Args:
        reduce_op: ReduceOp Enum (SUM/PRODUCT/MIN/MAX).
    Returns:
        (nccl.ncclRedOp_t): the mapped NCCL reduce op.
    z&NCCL does not support reduce op: '{}'.)NCCL_REDUCE_OP_MAPRuntimeErrorformat)	reduce_ops    r   get_nccl_reduce_opr)   y   s8     ***CJJ9UUVVVi((r   c                 6   t          | t          j                  rt          | j        j                 S t                      r,t          | t          j                  rt          | j                 S t          d                    t          |                               )z2Return the corresponded NCCL dtype given a tensor.]Unsupported tensor type. Got: {}. Supported GPU tensor types are: torch.Tensor, cupy.ndarray.)
isinstancer   ndarrayNUMPY_NCCL_DTYPE_MAPdtypetyper   torchTensorTORCH_NCCL_DTYPE_MAP
ValueErrorr'   tensors    r   get_nccl_tensor_dtyper7      s    &$,'' 7#FL$566 6fel++ 	6'55
	tF||,,  r   c                     t          | t          j                  r| j        j        S t                      r,t          | t          j                  rt          | j                 S t          d
                    t	          |                               )z2Return the corresponded Cupy dtype given a tensor.r+   )r,   r   r-   r/   r0   r   r1   r2   TORCH_NUMPY_DTYPE_MAPr4   r'   r5   s    r   get_cupy_tensor_dtyper:      sz    &$,'' !|   7fel++ 	7(66
	tF||,,  r   c                    t          | t          j                  r| j        j        S t          | t
          j                  r| j        S t                      rDt          | t          j                  r*| j	        st          d          |                                 S t          d                    t          |                               )z@Return the pointer to the underlying memory storage of a tensor.z8Torch tensor must be on GPU when using NCCL collectives.r+   )r,   r   r-   dataptrnumpyr   r1   r2   is_cudar&   data_ptrr4   r'   r0   r5   s    r   get_tensor_ptrrA      s    &$,'' {&%-(( { %fel++ 	%> "N   ??$$$
	tF||,,  r   c                 N   t          | t          j                  st          | t          j                  r| j        S t                      r.t          | t          j                  rt          j        |           S t          d
                    t          |                               )z*Return the number of elements in a tensor.r+   )r,   r   r-   r>   sizer   r1   r2   numelr4   r'   r0   r5   s    r   get_tensor_n_elementsrE      s    &$,'' :fem+L+L { 'fel++ 	';v&&&
	tF||,,  r   c                 N   t          | t          j                  rt          | j                  S t                      r;t          | t          j                  r!t          |                                           S t          d
                    t          |                               )z)Return the shape of the tensor as a list.r+   )r,   r   r-   listshaper   r1   r2   rC   r4   r'   r0   r5   s    r   get_tensor_shaperI      s    &$,'' "FL!!! 'fel++ 	'&&&
	tF||,,  r   c                 N    t           t          j                  r fd j        D             S t	                      r;t           t
          j                  r!t                                                     S t          d
                    t                                         )z+Return the strides of the tensor as a list.c                 H    g | ]}t          |j        j        z            S r   )intr/   itemsize).0strider6   s     r   
<listcomp>z&get_tensor_strides.<locals>.<listcomp>   s+    QQQFV\2233QQQr   r+   )r,   r   r-   stridesr   r1   r2   rG   rO   r4   r'   r0   r5   s   `r   get_tensor_stridesrR      s    &$,'' RQQQQ&.QQQQ )fel++ 	)(((
	tF||,,  r   c                    t          | t          j                  r0	 | j        j        }n# t
          $ r}t          d          |d}~ww xY wt                      rKt          | t          j	                  r1| j        j
        }t          |t                    st          d          n/t          d                    t          |                               |S )z!Return the GPU index of a tensor.z!The tensor is not on a valid GPU.Nz!Unsupported tensor type. Got: {}.)r,   r   r-   deviceidAttributeErrorr&   r   r1   r2   indexrL   r4   r'   r0   )r6   rT   execs      r   get_tensor_devicerY      s    &$,'' 
S	N]%FF 	N 	N 	NBCCM	N			 Sz&%,?? S$&#&& 	DBCCC	D <CCDLLQQRRRMs   ) 
AAAc                    d}t          | t          j                  r1t          |t          j                  rt          j        | |           n_t	                      rNt          | t
          j                  r1t          |t
          j                  r|                     |           nt          | t
          j                  rft          |t          j                  rLt
          j        j	        
                    |                                          }|                     |           nt          | t          j                  rft          |t
          j                  rLt          j        t
          j        j	                            |                    }t          j        | |           nd}nd}|s=t          d                    t!          |           t!          |                              dS )zCopy the content from src_tensor to dst_tensor.

    Args:
        dst_tensor: the tensor to copy from.
        src_tensor: the tensor to copy to.

    Returns:
        None
    TFzdUnsupported tensor type. Got: {} and {}. Supported GPU tensor types are: torch.Tensor, cupy.ndarray.N)r,   r   r-   copytor   r1   r2   copy_utilsdlpackfrom_dlpacktoDlpack
fromDlpack	to_dlpackr4   r'   r0   )
dst_tensor
src_tensorcopiedts       r   copy_tensorrg      s    F*dl++ 
:t|0T0T J
++++			 j%,// 	J5
 5
 	 Z((((
EL11 	j7
 7
 	 "..z/B/B/D/DEEAQ
DL11 	j7
 7
 	  2 < <Z H HIIAK
A&&&&FF 
@@FZ  $z"2"2A A
 
 	

 
r   c                     t          | t                    s/t          d                    t	          |                               d | D             }|S )zReturns the gpu devices of the list of input tensors.

    Args:
        tensors: a list of tensors, each locates on a GPU.

    Returns:
        list: the list of GPU devices.

    zAExpect a list of tensors each locates on a GPU device. Got: '{}'.c                 ,    g | ]}t          |          S r   )rY   )rN   rf   s     r   rP   z*get_tensor_device_list.<locals>.<listcomp>(  s!    555 ##555r   )r,   rG   r&   r'   r0   )tensorsdevicess     r   get_tensor_device_listrl     sZ     gt$$ 
W..
 
 	
 65W555GNr   )M__doc__r>   r   	cupy.cudar   r   cupy.cuda.ncclr   r   r   r   r	   ImportErrorray.util.collective.typesr
   r   SUMNCCL_SUMPRODUCT	NCCL_PRODMINNCCL_MINMAXNCCL_MAXr%   int_
NCCL_INT64uint8
NCCL_UINT8uint32NCCL_UINT32uint64NCCL_UINT64int8	NCCL_INT8int32
NCCL_INT32int64half	NCCL_HALFfloat16NCCL_FLOAT16float32NCCL_FLOAT32float64NCCL_FLOAT64doubleNCCL_DOUBLEr.   r1   torch.utils.dlpackboolrL   NCCL_INTlongfloat
NCCL_FLOATr3   hasattrr   bfloat16r9   r   r   r   r   r#   r)   r7   r:   rA   rE   rI   rR   rY   rg   rl   r   r   r   <module>r      s   ' ' DKKK                       D D D
+B
C
CCD @ ? ? ? ? ? ? ? L$-dnL$-L$-	  
J	K	L$"	L$"	J	K	K	J	M4$	M4$	M4$	L$" " ? (LLL 	
DN	4=T_
DNT_T_
DO
DNT_t(t(t(d&& wt_%% B/3/AU^, 		5;U[
EJU[U[
EK
EJU]u}u}u}". . .
           
) 
) 
)      (         &
 &
 &
R    s   # 4