
    `i                     L    d dl Z d dlmZ d dlmZ d dlmZ deiZddddddZdS )	    N)nccl)_store)NCCLBackendr   F)backendhostportuse_mpic                   | dk    rt          d|            d|cxk    r| k     sn t          d| d|            |t          vrt          | d          |dk    rt          j        st	          d          |*t
          j                            d	t          j	                  }|7t          t
          j                            d
t          j                            }t          |         | ||||          S )a	  Start `cupyx.distributed` and obtain a communicator.

    This call initializes the distributed environment, it needs to be
    called for every process that is involved in the communications.

    A single device per returned communication is only allowed. It is the user
    responsibility of setting the appropiated gpu to be used before creating
    and using the communicator.

    Currently the user needs to specify each process rank and the total
    number of processes, and start all the processes in different hosts
    manually.

    The process with rank 0 will spawn a TCP server using a
    subprocess that listens in the port indicated by
    the env var `CUPYX_DISTRIBUTED_PORT`, the rank 0 must be executed
    in the host determined by the env var `CUPYX_DISTRIBUTED_HOST`.
    In case their values are not specified, `'127.0.0.1'` and `13333` will be
    used by default.

    Note that this feature is expected to be used within a trusted cluster
    environment.

    Example:

        >>> import cupy
        >>> def process_0():
        ...     import cupyx.distributed
        ...     cupy.cuda.Device(0).use()
        ...     comm = cupyx.distributed.init_process_group(2, 0)
        ...     array = cupy.ones(1)
        ...     comm.broadcast(array, 0)
        ...
        >>> def process_1():
        ...     import cupyx.distributed
        ...     cupy.cuda.Device(1).use()
        ...     comm = cupyx.distributed.init_process_group(2, 1)
        ...     array = cupy.zeros(1)
        ...     comm.broadcast(array, 0)
        ...     cupy.equal(array, cupy.ones(1))

    Args:
        n_devices (int): Total number of devices that will be used in the
            distributed execution.
        rank (int): Unique id of the GPU that the communicator is associated to
            its value needs to be `0 <= rank < n_devices`.
        backend (str): Backend to use for the communications. Optional,
            defaults to `"nccl"`.
        host (str): host address for the process rendezvous on initialization
            defaults to `None`.
        port (int): port for the process rendezvous on initialization
            defaults to `None`.
        use_mpi (bool): if ``False``, it avoids using MPI for synchronization
            and uses the provided TCP server for exchanging CPU only
            information.
            defaults to `False`.
    Returns:
        Backend: object used to perform communications, adheres to the
            :class:`~cupyx.distributed.Backend` specification:
    r   zInvalid number of devices zInvalid number of rank  z is not supportedr   zNCCL is not availableNCUPYX_DISTRIBUTED_HOSTCUPYX_DISTRIBUTED_PORT)
ValueError	_backendsr   	availableRuntimeErrorosenvirongetr   _DEFAULT_HOSTint_DEFAULT_PORT)	n_devicesrankr   r   r   r	   s         k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/cupyx/distributed/_init.pyinit_process_groupr      s   ~ A~~AiAABBB!!!!	!!!!E4EE)EEFFFiG666777&2333|z~~68LMM|2:>>$f&:< < = = WitT7CCC    )	r   	cupy.cudar   cupyx.distributedr   cupyx.distributed._nccl_commr   r   r    r   r   <module>r!      s    				       $ $ $ $ $ $ 4 4 4 4 4 4 [!	 %+DMD MD MD MD MD MD MDr   