
    -`iN6                         d dl Z d dlmZ d dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZ d dlmZ d dlmZ d dlmZ  ee          Zdad Z G d	 d
          ZdS )    N)ProcessGroupReduceOp)NCCLLibrarybuffer_typecudaStream_t
ncclComm_tncclDataTypeEnumncclRedOpTypeEnumncclUniqueId)StatelessProcessGroup)init_logger)current_streamFc                      ddl m ddlm} t          rd S dadt
          j        dt
          j        f fd}dt
          j        dt
          j        fd} |d	||
           d S )Nr   )nccl_symm_mem_context)direct_register_custom_opTinput_tensorreturnc                                5  t          j        |           }t          j        |           }d d d            n# 1 swxY w Y   |                    |                                ||          }|S N)torch
empty_likecopy_
all_reduce)r   
symm_inputsymm_outputr   pynccl_comms      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/distributed/device_communicators/pynccl.py#all_reduce_symmetric_with_copy_implzHregister_nccl_symmetric_ops.<locals>.all_reduce_symmetric_with_copy_impl(   s    "";// 	9 	9),77J*<88K	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	&&&!,,ZEEs   )AA	Ac                 *    t          j        |           S r   )r   r   )r   s    r   #all_reduce_symmetric_with_copy_fakezHregister_nccl_symmetric_ops.<locals>.all_reduce_symmetric_with_copy_fake0   s    ---    all_reduce_symmetric_with_copy)op_nameop_func	fake_impl)6vllm.distributed.device_communicators.pynccl_allocatorr   vllm.utils.torch_utilsr   _NCCL_SYMM_OPS_REGISTEREDr   Tensor)r   r   r   r    r   s   `   @r   register_nccl_symmetric_opsr*      s          A@@@@@ !  $%, 5<       .%, .5< . . . . 035     r!   c            	          e Zd Z	 ddeez  deez  ej        z  dedz  fdZ	de
j        dfdej        dej        de
d	ej        fd
Z	 ddej        dej        fdZ	 ddej        dej        dee         fdZe
j        dfdej        dej        de
fdZe
j        dfdej        dej        dee         de
fdZddej        defdZddej        defdZddej        defdZd Zd Zdej        fdZdedefdZd ZdS ) PyNcclCommunicatorNgroupdevicelibrary_pathc                 F   t          |t                    syt          j                    sJ t          j        |          t          j        j        k    s
J d            t          j        |          | _        t          j	        |          | _
        n|j        | _        |j
        | _
        || _        | j
        dk    st          j        rd| _        d| _        dS 	 t!          |          | _        n# t$          $ r d| _        d| _        Y dS w xY wd| _        d| _        | j                                        | _        | j        dk    rS| j                                        | _        t.                              d| j                                        d	           nt5                      | _        t          |t                    st7          j        t;          | j        j                            }t          j        |          }t          j         ||d         |
           |!                                }tE          |          D ]\  }}|| j        j        |<   n!|#                    | j        d          | _        t          |tH                    rt7          j%        d|           }n)t          |tL                    rt7          j%        |          }t          |t6          j%                  sJ || _%        t6          j'        %                    |          5  | j        (                    | j
        | j        | j                  | _)        tU                      }	t7          j+        d|          }
| ,                    |
           |	-                                 ~
ddd           dS # 1 swxY w Y   dS )a  
        Args:
            group: the process group to work on. If None, it will use the
                default process group.
            device: the device to bind the PyNcclCommunicator to. If None,
                it will be bound to f"cuda:{local_rank}".
            library_path: the path to the NCCL library. If None, it will
                use the default library path.
        It is the caller's responsibility to make sure each communicator
        is bind to a unique device.
        z:PyNcclCommunicator should be attached to a non-NCCL group.   FTNr   zvLLM is using nccl==%slocal)scope)srcr-   )r4   zcuda:)r.   ).
isinstancer   distis_initializedget_backendBackendNCCLget_rankrankget_world_size
world_sizer-   envsVLLM_DISABLE_PYNCCL	availabledisabledr   nccl	ExceptionncclGetRawVersionnccl_versionncclGetUniqueId	unique_idlogger	info_oncencclGetVersionr   r   
ByteTensorlistinternalget_process_group_ranks	broadcasttolist	enumeratebroadcast_objintr.   strcudancclCommInitRankcommr   zerosr   synchronize)selfr-   r.   r/   tensorranks	byte_listibytestreamdatas              r   __init__zPyNcclCommunicator.__init__;   s   " %!677 
	/&(((((#E**dl.????L @?? e,,DI"1%88DOO
DI#.DO
 ?a4#;"DN DMF	#L11DII 	 	 	 #DN DMFF	  I77999>>!Y6688DN($)*B*B*D*DG     
 *^^DN%!677 		H%d4>+B&C&CDDF077EN6uQxu====I$Y// 2 24-1'**2 #00Q0GGDNfc"" 	*\"2&"2"233FF$$ 	*\&))F&%,///// Zv&& 
	 
	$(I$>$>% %DI $%%F;q000DOOD!!!   
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	s%   C+ +DD
A?NNN	in_tensor
out_tensoropr   c                    | j         rd S |j        | j        k    sJ d| j         d|j                     |t          j        |          }|t	                      }| j                            t          |                                          t          |                                          |	                                t          j        |j                  t          j        |          | j        t          |j                             |S N-this nccl communicator is created to work on , but the input tensor is on )rB   r.   r   r   r   rC   ncclAllReducer   data_ptrnumelr	   
from_torchdtyper
   rX   r   cuda_stream)r[   rd   re   rf   ra   s        r   r   zPyNcclCommunicator.all_reduce   s    = 	4 4;...=DK = =*3*:= = /..
 ))44J>#%%F		**,,--
++--..OO'	88(,,I+,,	
 	
 	
 r!   output_tensorr   c           
         | j         rd S |j        | j        k    sJ d| j         d|j                     |t                      }| j                            t          |                                          t          |                                          |                                t          j	        |j
                  | j        t          |j                             d S rh   )rB   r.   r   rC   ncclAllGatherr   rl   rm   r	   rn   ro   rX   r   rp   )r[   rq   r   ra   s       r   
all_gatherzPyNcclCommunicator.all_gather   s     = 	F "dk111@DK @ @*6*=@ @ 211 >#%%F	--//00..0011  '(:;;I+,,	
 	
 	
 	
 	
r!   sizesc                    | j         rd S |j        | j        k    sJ d| j         d|j                     |t                      }|j        d         t	          |          k    sJ d}| j                                         t          |          D ]\  }}||||z            }| j                            t          |
                                          t          |
                                          |                                t          j        |j                  || j        t!          |j                             ||z  }| j                                         d S )Nri   rj   r   )rB   r.   r   shapesumrC   ncclGroupStartrR   ncclBroadcastr   rl   rm   r	   rn   ro   rX   r   rp   ncclGroupEnd)	r[   rq   r   ru   ra   split_offsetroot
split_size	dst_slices	            r   all_gathervzPyNcclCommunicator.all_gatherv   sy    = 	F "dk111@DK @ @*6*=@ @ 211 >#%%F"1%U3333	  """ )% 0 0 	' 	'D*%l\J5N&NOII##L113344I..0011!! +L,>??	V/00   J&LL	     r!   c                    | j         rd S |j        | j        k    sJ d| j         d|j                     |t                      }| j                            t          |                                          t          |                                          |                                t          j	        |j
                  t          j	        |          | j        t          |j                             d S rh   )rB   r.   r   rC   ncclReduceScatterr   rl   rm   r	   rn   ro   r
   rX   r   rp   )r[   rq   r   rf   ra   s        r   reduce_scatterz!PyNcclCommunicator.reduce_scatter   s     = 	F "dk111@DK @ @*6*=@ @ 211 >#%%F	##--//00..0011!!'(:;;(,,I+,,	
 	
 	
 	
 	
r!   c                    | j         rd S |j        | j        k    sJ d| j         d|j                     |t                      }d}| j                                         t          |          D ]\  }}||||z   df         }	| j                            t          |	                                          t          |                                          |		                                t          j        |j                  t          j        |          || j        t          |j                             ||z  }| j                                         d S )Nri   rj   r   .)rB   r.   r   rC   ry   rR   
ncclReducer   rl   rm   r	   rn   ro   r
   rX   r   rp   r{   )
r[   rq   r   ru   rf   ra   r|   r}   r~   chunks
             r   reduce_scattervz"PyNcclCommunicator.reduce_scatterv  si    = 	F "dk111@DK @ @*6*=@ @ 211 >#%%F	  """ )% 0 0 	' 	'D* z0I!I3!NOEI  ENN,,--M224455 +L,>??!,R00	V/00	 	 	 J&LL	     r!   r\   dstc           
         | j         rd S |j        | j        k    sJ d| j         d|j                     |t                      }| j                            t          |                                          |                                t          j	        |j
                  || j        t          |j                             d S rh   )rB   r.   r   rC   ncclSendr   rl   rm   r	   rn   ro   rX   r   rp   )r[   r\   r   ra   s       r   sendzPyNcclCommunicator.send2      = 	F}+++:DK : :*0-: : ,++ >#%%F	))**LLNN'55I+,,	
 	
 	
 	
 	
r!   r4   c           
         | j         rd S |j        | j        k    sJ d| j         d|j                     |t                      }| j                            t          |                                          |                                t          j	        |j
                  || j        t          |j                             d S rh   )rB   r.   r   rC   ncclRecvr   rl   rm   r	   rn   ro   rX   r   rp   )r[   r\   r4   ra   s       r   recvzPyNcclCommunicator.recvD  r   r!   c                 F   | j         rd S |j        | j        k    sJ d| j         d|j                     |t                      }|| j        k    rCt	          |                                          }t	          |                                          }n/t	                      }t	          |                                          }| j                            |||                                t          j
        |j                  || j        t          |j                             d S rh   )rB   r.   r   r<   r   rl   rC   rz   rm   r	   rn   ro   rX   r   rp   )r[   r\   r4   ra   sendbuffrecvbuffs         r   rP   zPyNcclCommunicator.broadcastV  s   = 	F}+++:DK : :*0-: : ,++ >#%%F$)"6??#4#455H"6??#4#455HH"}}H"6??#4#455H	LLNN'55I+,,	
 	
 	
 	
 	
r!   c                 8    | j                                          d S r   )rC   ry   r[   s    r   group_startzPyNcclCommunicator.group_startp  s    	  """""r!   c                 8    | j                                          d S r   )rC   r{   r   s    r   	group_endzPyNcclCommunicator.group_ends  s    	     r!   c                     | j                             | j        t          |                                          |                                |                                z  d          S Nr1   )rC   ncclCommWindowRegisterrX   r   rl   rm   element_size)r[   r\   s     r   register_comm_windowz'PyNcclCommunicator.register_comm_windowv  sU    y//I))**LLNNV00222	
 
 	
r!   ptrsizec                 `    | j                             | j        t          |          |d          S r   )rC   r   rX   r   )r[   r   r   s      r   register_comm_window_rawz+PyNcclCommunicator.register_comm_window_raw~  s)    y//	;s;K;KTSTUUUr!   c                 B    | j                             | j        |          S r   )rC   ncclCommWindowDeregisterrX   )r[   windows     r   deregister_comm_windowz)PyNcclCommunicator.deregister_comm_window  s    y11$)VDDDr!   r   )__name__
__module____qualname__r   r   rT   rU   r   r.   rc   r   SUMr)   r   rt   rM   r   r   r   r   r   rP   r   r   r   r   r    r!   r   r,   r,   :   s       
 $(	Y Y33Y c	EL(Y Dj	Y Y Y Y| $(| < L 	 
   D OS
 
"\
9>
 
 
 
8 !! !!|!! l!! Cy	!! !! !! !!N  |
 
|
 l
 	
 
 
 
B  |#! #!|#! l#! Cy	#!
 #! #! #! #!J
 
5< 
c 
 
 
 
$
 
5< 
c 
 
 
 
$
 
 
3 
 
 
 
4# # #! ! !
5< 
 
 
 
VC Vs V V V VE E E E Er!   r,   )r   torch.distributeddistributedr6   r   r   	vllm.envsr?   4vllm.distributed.device_communicators.pynccl_wrapperr   r   r   r   r	   r
   r   vllm.distributed.utilsr   vllm.loggerr   r'   r   r   rI   r(   r*   r,   r   r!   r   <module>r      sH                4 4 4 4 4 4 4 4                        9 8 8 8 8 8 # # # # # # 1 1 1 1 1 1	X		!   :HE HE HE HE HE HE HE HE HE HEr!   