
    Pi0                     |   d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl
mZ  G d de	j        j                  Z G d de	j        j                  Z G d	 d
ee          ZdedededefdZdej        ddfdee	j        j        ee	j        j                 f         dedee         dee         dee         ddfdZdS )    N)Enum)AnyCallableIterableOptionalUnion)slice_str_to_arrayc            
            e Zd ZdZ	 	 	 	 ddedee         dee         d	ee         f fd
Zde	e
ej        j        f         dej        dej        fdZ xZS )LayerDropouta  
    A module that applies layer dropout to the input tensor of an underlying module.
    It drops a portion of an input tensor, applies the underlying module on the
    remaining parts of the tensor, and then concatenates with the dropped portion of the tensor.
    When applied during training, it can have a regularization effect, and can potentially speedup training.

    Args:
        prob (float): The probability of dropping an input. Defaults to 0.0.
        dim (Optional[int]): The dimension of input tensor along which to drop layers. Defaults to 0 (i.e., batch size).
        disable_on_eval (Optional[bool]): Whether to disable layer dropout during evaluation. Defaults to True.
        seed (Optional[int]): The seed for the random number generator. Defaults to None.
    Examples:
        >>> import torch
        >>> # Apply layer dropout to a lambda function
        >>> layer_dropout = LayerDropout(prob=0.5)
        >>> output = layer_dropout(lambda x: x**2, torch.randn(1))
        >>> # Apply layer dropout to a torch.nn.Linear module
        >>> linear = torch.nn.Linear(5, 3)
        >>> layer_dropout = LayerDropout(prob=0.5)
        >>> output = layer_dropout(linear, torch.randn(1, 5))
            r   TNprobdimdisable_on_evalseedc                     t                                                       || _        || _        || _        t          j        d          | _        d | _        || j        	                    |           d S d S )Ncpu)device)
super__init__r   r   r   torch	Generator	generatorinferredmanual_seed)selfr   r   r   r   	__class__s        s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/modules/layer_dropout.pyr   zLayerDropout.__init__'   sv     		%4666#N&&t,,,,,     functioninputreturnc                    |j         | j                 }| j        dk    s| j        r| j        du rd| _         ||g|R i |S t          j        t          j        || j        gz            | j	                  
                    |j                  
                    |j                  }dt          j        |          z
  | _        |dk                                                                    }|                                dk    r't          j        || j        |          } ||g|R i |}	|                                }
| j        dk    s
J d            |                                dk    r|	|
|<   |
S )a  
        Apply layer dropout to the input tensor.

        Args:
            function (Union[Callable, torch.nn.Module]): The function or module to apply to the input tensor.
            input (torch.Tensor): The input tensor.
            *args: Additional positional arguments passed to the function.
            **kwargs: Additional keyword arguments passed to the function.
        Returns:
            torch.Tensor: The output tensor after applying layer dropout.
        r   F      ?)r      zCCurrently only supporting dropping elements along the 0th dimension)shaper   r   r   trainingr   r   	bernoulliTensorr   tor   dtypemeannonzerosqueezenumelindex_selectclone)r   r   r    argskwargsnskipind_selected
x_selectedout_selectedouts              r   forwardzLayerDropout.forward8   sr   $ K!9>>d2>t}7M7MDM8E3D333F333 OEL!	{):;;t~VVVRR__ 	
 EJt,,,	**,,4466!##+E48\JJJ#8J@@@@@@LkkmmHMMMP MM!## ,C
r   )r   r   TN)__name__
__module____qualname____doc__floatr   intboolr   r   r   r   nnModuler(   r9   __classcell__r   s   @r   r   r      s         0 *."- -- c]- "$	-
 sm- - - - - -"*%(/12* |* 
* * * * * * * *r   r   c                        e Zd ZdZdej        j        def fdZdej	        fdZ
dedef fd	Zded
edef fdZdedefdZd Zd Z xZS )ModuleLayerDropoutWrappera<  
    A wrapper module that adds layer dropout functionality to a given module.
    This class wraps a given module and applies layer dropout to it. It also
    provides getter and setter methods for the wrapped module's attributes.

    Args:
        module (torch.nn.Module): The module to wrap.
        dropout (LayerDropout): The layer dropout object.
    Examples:
        >>> import torch
        >>> from torch import nn
        >>> # Define a simple model
        >>> class MyModel(nn.Module):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.fc1 = nn.Linear(5, 3)
        ...         self.fc2 = nn.Linear(3, 2)
        ...
        ...     def forward(self, x):
        ...         return self.fc2(self.fc1(x))
        >>> model = MyModel()
        >>> fc1 = model.fc1
        >>> fc2 = model.fc2
        >>> # Apply layer dropout to the model
        >>> layer_dropout = LayerDropout(prob=0.5)
        >>> model = ModuleLayerDropoutWrapper(model, layer_dropout)
        >>> # Accessing attributes of the wrapped model
        >>> assert model.dropout.prob == 0.5
        >>> assert model.fc1 == fc1
        >>> assert model.fc2 == fc2
        >>> # Pass an input to the wrapped model as if you are passing it to the original model
        >>> output = model(torch.randn(1, 5))
    moduledropoutc                 d    t                                                       || _        || _        d S N)r   r   rG   rH   )r   rG   rH   r   s      r   r   z"ModuleLayerDropoutWrapper.__init__   s+    r   r    c                 0     | j         | j        |g|R i |S rJ   )rH   rG   )r   r    r1   r2   s       r   r9   z!ModuleLayerDropoutWrapper.forward   s(    t|DK@@@@@@@r   namer!   c                     	 t                                          |          S # t          $ r t          | j        |          cY S w xY wz-Forward missing attributes to wrapped module.)r   __getattr__AttributeErrorgetattrrG   )r   rL   r   s     r   rO   z%ModuleLayerDropoutWrapper.__getattr__   sS    	.77&&t,,, 	. 	. 	.4;-----	.s    $ AAvaluec                     	 t                                          ||          S # t          $ r t          | j        ||          cY S w xY wrN   )r   __setattr__rP   setattrrG   )r   rL   rR   r   s      r   rT   z%ModuleLayerDropoutWrapper.__setattr__   sW    	577&&tU333 	5 	5 	54;e44444	5s   !%  AAkeyc                 6    | j                             |          S )z=Forward indexing calls in case the module is a nn.Sequential.)rG   __getitem__)r   rV   s     r   rX   z%ModuleLayerDropoutWrapper.__getitem__   s    {&&s+++r   c                 &     | j         j        |i |S )z2Return the state dictionary of the wrapped module.)rG   
state_dict)r   r1   r2   s      r   rZ   z$ModuleLayerDropoutWrapper.state_dict   s    %t{%t6v666r   c                 2     | j         j        |g|R i | dS )z2Load the state dictionary into the wrapped module.N)rG   load_state_dict)r   rZ   r1   r2   s       r   r\   z)ModuleLayerDropoutWrapper.load_state_dict   s,    ##J@@@@@@@r   )r:   r;   r<   r=   r   rA   rB   r   r   r(   r9   strr   rO   rT   r?   rX   rZ   r\   rC   rD   s   @r   rF   rF   e   s          Dux       
AU\ A A A A. . . . . . . .5 5C 5C 5 5 5 5 5 5,s ,s , , , ,7 7 7      r   rF   c                   *    e Zd ZdZdZdZdZdZdZdZ	dS )		ScaleTypeuniformexplinearlogsinsigmoidstepN)
r:   r;   r<   UNIFORMEXPLINEARLOGSINSIGMOIDSTEP r   r   r_   r_      s1        G
CF
C
CGDDDr   r_   
scale_typescale_periodvalr!   c                    |dk    rdS ||k    rdS t           j        dt           j        t          j        d||z            dz
  t           j        ||z  t           j        t          j        |dz   |dz             t           j        t          j	        dt          j
        z  |z  |z            t           j        ddt          j        d||z  dz
  z            z   z  i|          }t          t          |d          d          S )a  
    Compute a scaling factor based on the provided scale type, period, and value.
    The scaling factor is designed to be 0 when the value is 0 and 1 when the value
    reaches or is larger than the scale period.

    Args:
        scale_type (ScaleType): The type of scaling to use.
        scale_period (int): The period over which the scaling factor increases from 0 to 1.
        val (int): The current value used to compute the scaling factor.
    Returns:
        float: The computed scaling factor.
    Examples:
        >>> get_scale(ScaleType.LINEAR, 10, 5)
        0.5
        >>> get_scale(ScaleType.LINEAR, 10, 0)
        0.0
        >>> get_scale(ScaleType.LOG, 10, 10)
        1.0
    r   r#      r$   g      ?ir   )r_   rg   rh   mathpowri   rj   rc   rk   rd   pirl   ra   maxmin)ro   rp   rq   scales       r   	get_scalerz      s    0 qs
ls 	3tx3#566:#,txa)9::txdg 3l BCC1DHSC,4F4L-M$N$N NO E s5#$$$r   r   Tlayersprob_maxprob_layer_scale
layers_strr   c                 d   t          |           }|rt          ||          ndg|z  }t          t          |                     D ]k}||         r|t          ||dz
  |          z  nd}|dk    r||k    sJ d| d|             t	          |||          }	t          | |         |	          | |<   ldS )	a  
    Prepare a model's layers for layer dropout by wrapping each layer with a ModuleLayerDropoutWrapper.
    This function takes in a list of layers, the maximum probability of dropping a layer,
    the scaling type for the layer dropout probability, a string specifying which
    layers to apply dropout to, and a boolean indicating whether to disable dropout
    during evaluation. It then wraps each layer of the model inplace with a
    ModuleLayerDropoutWrapper, which applies layer dropout to the input tensor.

    Args:
        layers (Union[torch.nn.ModuleList, Iterable[torch.nn.Module]]): The list of layers to prepare for layer dropout.
        prob_max (float): The maximum probability of dropping a layer. Defaults to 0.0.
        prob_layer_scale (Optional[ScaleType]): The scaling type for the dropout probability across layers. Defaults to
            ScaleType.UNIFORM.
        layers_str (Optional[str]): A string specifying which layers to apply dropout to. Defaults to None which means
            apply to all layers.
        disable_on_eval (Optional[bool]): Whether to disable dropout during evaluation. Defaults to True.
    Returns:
        None
    Example:
        >>> import torch
        >>> from torch import nn
        >>> # Define a simple model
        >>> class MyModel(nn.Module):
        ...     def __init__(self):
        ...         super().__init__()
        ...         self.layers = nn.ModuleList([
        ...             nn.Linear(5, 3),
        ...             nn.Linear(3, 2),
        ...             nn.Linear(2, 1),
        ...             nn.Linear(1, 2),
        ...             nn.Linear(2, 3),
        ...         ])
        ...
        ...     def forward(self, x):
        ...         for layer in self.layers:
        ...             x = layer(x)
        ...         return x
        >>> model = MyModel()
        >>> # Apply layer dropout uniformly to all layers
        >>> prepare_layer_dropout(model.layers, prob_max=0.2, prob_layer_scale=ScaleType.UNIFORM)
        >>> # Apply layer dropout every other layer, as described in LayerDrop paper
            (Fan et al., https://arxiv.org/abs/1909.11556v1)
        >>> prepare_layer_dropout(model.layers, prob_max=0.2, prob_layer_scale=ScaleType.UNIFORM, layers_str="::2")
        >>> # Apply layer dropout that increases linearly across layers, as described in Progressive Layer
            Dropout paper (Zhang et al., https://arxiv.org/abs/2010.13369)
        >>> prepare_layer_dropout(model.layers, prob_max=0.2, prob_layer_scale=ScaleType.LINEAR)
        >>> # Apply layer dropout that increases exponentially across layers, as described in
            LayerSkip paper (Elhoushi et al., https://arxiv.org/abs/2404.16710)
        >>> prepare_layer_dropout(model.layers, prob_max=0.2, prob_layer_scale=ScaleType.EXP)
    Tr$   )ro   rp   rq   r   zprob=z should be between 0 and )r   r   N)lenr	   rangerz   r   rF   )
r{   r|   r}   r~   r   
num_layershas_dropoutlayer_idr   layer_dropouts
             r   prepare_layer_dropoutr      s   r VJ 	!:z222Vj  
 #f++&& V V 8$H+'!^     	 CKKDH,,,<4<<(<< -,, %/
 
 
 5VH5E}UUx'V Vr   )rt   enumr   typingr   r   r   r   r   r   torchtune.modules.common_utilsr	   rA   rB   r   rF   r]   r_   r?   r>   rz   rg   
ModuleListr@   r   rn   r   r   <module>r      s          ; ; ; ; ; ; ; ; ; ; ; ; ; ;  = = = = = =R R R R R58? R R RjD D D D D D D DN    T   (%(%(% 
(% 	(% (% (% (%Z ,5,= $&*RV RV%(%x'@@ARVRV y)RV 	RV
 d^RV 
RV RV RV RV RV RVr   