
    Pi                         d dl Z d dlmZ d dlmZmZ d dlZd dlmZ d dl	m
Z
  e
d          Zerd dlmZ nej        Zdeeef         fd	Zdefd
Z e            Zdee         fdZdej        dej        fdZdefdZdej        ddfdZddee         dej        fdZdedej        ddfdZ G d de          ZdefdZdefdZ dS )    N)Enum)OptionalTuple)_SUPPORTS_FLEX_ATTENTION)
get_loggerDEBUG)	BlockMaskreturnc                      t           j                                        rZt           j                                        r<t           j                                        t           j                                        fS dS )zFunction that gets the current world size (aka total number
    of ranks) and rank number of the current process in the default process group.

    Returns:
        Tuple[int, int]: world size, rank
    )   r   )torchdistributedis_availableis_initializedget_world_sizeget_rank     k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/utils/_device.pyget_world_size_and_rankr      s_     %%'' E,=,L,L,N,N  //1153D3M3M3O3OOOtr   c                  j    	 ddl } t          j                                        S # t          $ r Y dS w xY w)zCheck the availability of NPUr   NF)	torch_npur   npur   ImportError)r   s    r   is_torch_npu_availabler   %   sI    y%%'''   uus   !$ 
22c                  f    t           j                            d          } | t          |           } | S )zuFunction that gets the local rank from the environment.

    Returns:
        local_rank int or None if not set.
    
LOCAL_RANK)osenvirongetint)
local_ranks    r   _get_local_rankr#   2   s.     --J__
r   devicec                    t                      pd}t                      }|j        }|j        }t	                      }| j        t          j        ||          } | j        |                                k    rt          d| d          t          |d          st          d| d          |                    |            | S )	aU  Function that sets the CUDA-like device and infers the device
    index if not set.

    Args:
        device (torch.device): The device to set.

    Raises:
        RuntimeError: If device index is not available.
        AttributeError: If ``set_device`` is not supported for the device type (e.g. on MPS).

    Returns:
        device
    r   N)typeindexz6The local rank is larger than the number of available zs.
set_devicezThe device type z* does not support the `set_device` method.)r#   get_device_supportdevice_typedevice_nameget_torch_device_namespacer'   r   r$   device_countRuntimeErrorhasattrAttributeErrorr(   )r$   r"   device_supportr*   r+   torch_devices         r   _setup_devicer3   >   s     !""'aJ'))N ,K ,K-//L|;jAAA ||002222T[TTT
 
 	
 <.. 
V{VVV
 
 	
 F###Mr   c                      t           j                                        rd} n-t          rd} n#t           j                                        rd} nd} | S )zFunction that gets the torch.device based on the current machine.

    This currently only supports CPU, CUDA, NPU.

    Returns:
        device
    cudar   xpucpu)r   r5   r   is_npu_availabler6   r$   s    r   _get_device_type_from_envr:   b   sW     z   	 				!	! Mr   c                     t                      }| j        dk    r"| | j        |k    rt          d|  d|           	 t	          j        d|            dS # t          $ r}t          d|  d          |d}~ww xY w)	a  Function that validates the device is correct given the current machine.
    This will raise an error if the device is not available or doesn't match the
    assigned process device on distributed runs.

    Args:
        device (torch.device): The device to validate.

    Raises:
        RuntimeError: If the device is not available or doesn't match the assigned process device.

    Returns:
        device
    r7   NzVYou can't specify a device index when using distributed training. Device specified is z but local rank is:r   r9   zThe device z" is not available on this machine.)r#   r&   r'   r.   r   empty)r$   r"   es      r   _validate_device_from_envr>   u   s     !""J {e
 6<:%%O'-O OBLO O  Af%%%%%%   D&DDD
 
	s   A 
A8A33A8c                     | t                      } t          j        |           } | j        dv rt	          |           } t          |            | S )ax  Function that takes an optional device string, verifies it's correct and available given the machine and
    distributed settings, and returns a :func:`~torch.device`. If device string is not provided, this function will
    infer the device based on the environment.

    If CUDA-like is available and being used, this function also sets the CUDA-like device.

    Args:
        device (Optional[str]): The name of the device to use, e.g. "cuda" or "cpu" or "npu" or "xpu".

    Example:
        >>> device = get_device("cuda")
        >>> device
        device(type='cuda', index=0)

    Returns:
        torch.device: Device
    N)r5   r   r6   )r:   r   r$   r&   r3   r>   r9   s    r   
get_devicer@      sQ    $ ~*,,\&!!F{,,,v&&f%%%Mr   batchc           	         |                                  D ]\  }}t          |t                    rt          ||           +t          |t          j                  r|                    |          | |<   ^t          r.t          |t                    r|                    |          | |<   t          d| dt          |                     dS )a  Function that takes a dictionary (or nested dictionary) of tensors and sets them
    all to the same device. This utility is intended to be used for batches of data to be
    moved to device, the update is inplace.

    Args:
        batch (dict): dict of Tensors or more nested dicts of tensors.
        device (torch.device): torch device to move the tensors to.

    Raises:
        ValueError: if batch dict contains anything other than ``torch.Tensor``.

    zUTo use batch_to_device, all elements in the batch must be a dict or Tensor.
Got key "z" with value of type N)items
isinstancedictbatch_to_devicer   Tensortor   r	   
ValueErrorr&   )rA   r$   kvs       r   rF   rF      s       1a 
	Av&&&&5<(( 	ttF||E!HH% 	*Q	*B*B 	ttF||E!HH.
. ."&q''. .   r   c                   R    e Zd ZdZdZdZdZdZdededefd	Z	e
defd
            ZdS )DeviceSupporta  
    This is a simple enum for compute devices,
    This currently only supports CPU, CUDA, NPU, and XPU.
    The following enumeration defines various device configurations with attributes:
    1. `device_type` (str): The type of device (e.g., "cpu", "cuda", "npu", "xpu").
    2. `device_name` (str): A user-friendly name for the device (e.g., "CPU", "GPU", "NPU", "XPU").
    3. `communication_backend` (str): Specifies the backend used for communication on this device
    (e.g., "gloo", "nccl", "hccl", "ccl").
    )r7   CPUgloo)r5   GPUnccl)r   NPUhccl)r6   XPUcclr*   r+   communication_backendc                 0    || _         || _        || _        d S N)r*   r+   rV   )selfr*   r+   rV   s       r   __init__zDeviceSupport.__init__   s"     '&%:"""r   c                 Z    t           D ]}|j        | k    r|c S t          d|  d          )NzUnknown device type: .)rM   r*   rI   )r*   members     r   	from_typezDeviceSupport.from_type   sG    # 	 	F![00 1????@@@r   N)__name__
__module____qualname____doc__rN   CUDArR   rT   strrZ   staticmethodr^   r   r   r   rM   rM      s          !C"D
 C
C;; ;  #	; ; ; ; As A A A \A A Ar   rM   c                  R    t                      } t                              |           S )zfunction that gets the DeviceSupport with compute devices based on the current machine.

    This currently only supports CPU, CUDA, NPU, XPU.

    Returns:
        device_support: DeviceSupport
    )r:   rM   r^   r*   s    r   r)   r)      s#     ,--K"";///r   c                      t                      j        } 	 t          t          |           S # t          $ r- t
                              d|  d           t          j        cY S w xY w)zReturn the corresponding torch attribute based on the device type string.

    Returns:
        module: The corresponding torch device namespace, or torch.cuda if not found.
    zDevice namespace 'z-' not found in torch, try to load torch.cuda.)r)   r*   getattrr   r0   loggerwarningr5   rg   s    r   r,   r,      sv     %&&2Kuk***   [[[[	
 	
 	
 z	s   * 4A! A!rX   )!r   enumr   typingr   r   r   torchtune.utils._import_guardr   torchtune.utils._loggingr   rj   !torch.nn.attention.flex_attentionr	   rG   r!   r   boolr   r8   r#   r$   r3   rd   r:   r>   r@   rE   rF   rM   r)   anyr,   r   r   r   <module>rs      s:   
			       " " " " " " " "  B B B B B B / / / / / /	G		 ;;;;;;;I
sCx 
 
 
 
     *)++ 	# 	 	 	 	!%, !5< ! ! ! !H3    &el t    D x}     64  $    6A A A A AD A A AD	0M 	0 	0 	0 	0C      r   