
    Pi"                     d    d Z ddlZ G d d          Z G d de          Z G d de          ZdS )	z'Stochastic optimization methods for MLP    Nc                   ,    e Zd ZdZddZd Zd Zd ZdS )	BaseOptimizera9  Base (Stochastic) gradient descent optimizer

    Parameters
    ----------
    learning_rate_init : float, default=0.1
        The initial learning rate used. It controls the step-size in updating
        the weights

    Attributes
    ----------
    learning_rate : float
        the current learning rate
    皙?c                 <    || _         t          |          | _        d S N)learning_rate_initfloatlearning_rate)selfr   s     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/sklearn/neural_network/_stochastic_optimizers.py__init__zBaseOptimizer.__init__   s!    "4"#566    c                 z    |                      |          }t          d |D             |          D ]
\  }}||z  }dS )a  Update parameters with given gradients

        Parameters
        ----------
        params : list of length = len(coefs_) + len(intercepts_)
            The concatenated list containing coefs_ and intercepts_ in MLP
            model. Used for initializing velocities and updating params

        grads : list of length = len(params)
            Containing gradients with respect to coefs_ and intercepts_ in MLP
            model. So length should be aligned with params
        c              3      K   | ]}|V  d S r    ).0ps     r   	<genexpr>z.BaseOptimizer.update_params.<locals>.<genexpr>*   s"      !4!4!!4!4!4!4!4!4r   N)_get_updateszip)r   paramsgradsupdatesparamupdates         r   update_paramszBaseOptimizer.update_params   sX     ##E** !4!4V!4!4!4g>> 	 	ME6VOEE	 	r   c                     dS )zhPerform update to learning rate and potentially other states at the
        end of an iteration
        Nr   r   	time_steps     r   iteration_endszBaseOptimizer.iteration_ends-   s	     	r   c                 .    |rt          |dz              dS )aH  Decides whether it is time to stop training

        Parameters
        ----------
        msg : str
            Message passed in for verbose output

        verbose : bool
            Print message to stdin if True

        Returns
        -------
        is_stopping : bool
            True if training needs to stop
        
 Stopping.T)printr   msgverboses      r   trigger_stoppingzBaseOptimizer.trigger_stopping3   s$       	&#$%%%tr   N)r   )__name__
__module____qualname____doc__r   r   r    r'   r   r   r   r   r   	   s_         7 7 7 7  "      r   r   c                   @     e Zd ZdZ	 	 	 	 	 d fd	Zd Zd	 Zd
 Z xZS )SGDOptimizera  Stochastic gradient descent optimizer with momentum

    Parameters
    ----------
    params : list, length = len(coefs_) + len(intercepts_)
        The concatenated list containing coefs_ and intercepts_ in MLP model.
        Used for initializing velocities and updating params

    learning_rate_init : float, default=0.1
        The initial learning rate used. It controls the step-size in updating
        the weights

    lr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant'
        Learning rate schedule for weight updates.

        -'constant', is a constant learning rate given by
         'learning_rate_init'.

        -'invscaling' gradually decreases the learning rate 'learning_rate_' at
          each time step 't' using an inverse scaling exponent of 'power_t'.
          learning_rate_ = learning_rate_init / pow(t, power_t)

        -'adaptive', keeps the learning rate constant to
         'learning_rate_init' as long as the training keeps decreasing.
         Each time 2 consecutive epochs fail to decrease the training loss by
         tol, or fail to increase validation score by tol if 'early_stopping'
         is on, the current learning rate is divided by 5.

    momentum : float, default=0.9
        Value of momentum used, must be larger than or equal to 0

    nesterov : bool, default=True
        Whether to use nesterov's momentum or not. Use nesterov's if True

    power_t : float, default=0.5
        Power of time step 't' in inverse scaling. See `lr_schedule` for
        more details.

    Attributes
    ----------
    learning_rate : float
        the current learning rate

    velocities : list, length = len(params)
        velocities that are used to update params
    r   constant?T      ?c                     t                                          |           || _        || _        || _        || _        d |D             | _        d S )Nc                 6    g | ]}t          j        |          S r   np
zeros_liker   r   s     r   
<listcomp>z)SGDOptimizer.__init__.<locals>.<listcomp>   s"    DDDE2=//DDDr   )superr   lr_schedulemomentumnesterovpower_t
velocities)r   r   r   r9   r:   r;   r<   	__class__s          r   r   zSGDOptimizer.__init__x   sT     	+,,,&  DDVDDDr   c                 n    | j         dk    r)t          | j                  |dz   | j        z  z  | _        dS dS )a  Perform updates to learning rate and potential other states at the
        end of an iteration

        Parameters
        ----------
        time_step : int
            number of training samples trained on so far, used to update
            learning rate for 'invscaling'
        
invscaling   N)r9   r	   r   r<   r
   r   s     r   r    zSGDOptimizer.iteration_ends   sF     |++d-..)a-DL1PP  ,+r   c                     | j         dk    r|rt          |dz              dS | j        dk    r|rt          |dz              dS | xj        dz  c_        |rt          |d| j        z  z              dS )	Nadaptiver"   Tgư>z# Learning rate too small. Stopping.g      @z Setting learning rate to %fF)r9   r#   r
   r$   s      r   r'   zSGDOptimizer.trigger_stopping   s    z)) *cL()))4%% CcAABBB4c! 	M#69KKKLLLur   c                       fdt           j        |          D             }| _         j        r! fdt           j        |          D             }|S )  Get the values used to update params with given gradients

        Parameters
        ----------
        grads : list, length = len(coefs_) + len(intercepts_)
            Containing gradients with respect to coefs_ and intercepts_ in MLP
            model. So length should be aligned with params

        Returns
        -------
        updates : list, length = len(grads)
            The values to add to params
        c                 @    g | ]\  }}j         |z  j        |z  z
  S r   r:   r
   r   velocitygradr   s      r   r7   z-SGDOptimizer._get_updates.<locals>.<listcomp>   sA     
 
 
$ MH$t'9D'@@
 
 
r   c                 @    g | ]\  }}j         |z  j        |z  z
  S r   rG   rH   s      r   r7   z-SGDOptimizer._get_updates.<locals>.<listcomp>   sA       "Hd (4+=+DD  r   )r   r=   r;   r   r   r   s   `  r   r   zSGDOptimizer._get_updates   s    
 
 
 
"%dou"="=
 
 
 "= 	   &)$/5&A&A  G
 r   )r   r.   r/   Tr0   )	r(   r)   r*   r+   r   r    r'   r   __classcell__r>   s   @r   r-   r-   H   s        - -d E E E E E E"           r   r-   c                   ,     e Zd ZdZ	 d fd	Zd Z xZS )	AdamOptimizera  Stochastic gradient descent optimizer with Adam

    Note: All default values are from the original Adam paper

    Parameters
    ----------
    params : list, length = len(coefs_) + len(intercepts_)
        The concatenated list containing coefs_ and intercepts_ in MLP model.
        Used for initializing velocities and updating params

    learning_rate_init : float, default=0.001
        The initial learning rate used. It controls the step-size in updating
        the weights

    beta_1 : float, default=0.9
        Exponential decay rate for estimates of first moment vector, should be
        in [0, 1)

    beta_2 : float, default=0.999
        Exponential decay rate for estimates of second moment vector, should be
        in [0, 1)

    epsilon : float, default=1e-8
        Value for numerical stability

    Attributes
    ----------
    learning_rate : float
        The current learning rate

    t : int
        Timestep

    ms : list, length = len(params)
        First moment vectors

    vs : list, length = len(params)
        Second moment vectors

    References
    ----------
    :arxiv:`Kingma, Diederik, and Jimmy Ba (2014) "Adam: A method for
        stochastic optimization." <1412.6980>
    MbP?r/   +?:0yE>c                     t                                          |           || _        || _        || _        d| _        d |D             | _        d |D             | _        d S )Nr   c                 6    g | ]}t          j        |          S r   r3   r6   s     r   r7   z*AdamOptimizer.__init__.<locals>.<listcomp>   "    <<<E2=''<<<r   c                 6    g | ]}t          j        |          S r   r3   r6   s     r   r7   z*AdamOptimizer.__init__.<locals>.<listcomp>   rV   r   )r8   r   beta_1beta_2epsilontmsvs)r   r   r   rX   rY   rZ   r>   s         r   r   zAdamOptimizer.__init__   sh     	+,,,<<V<<<<<V<<<r   c                      xj         dz  c_          fdt           j        |          D              _         fdt           j        |          D              _         j        t          j        d j         j         z  z
            z  d j         j         z  z
  z   _	         fdt           j         j                  D             }|S )rE   rA   c                 F    g | ]\  }}j         |z  d j         z
  |z  z   S )rA   )rX   )r   mrJ   r   s      r   r7   z.AdamOptimizer._get_updates.<locals>.<listcomp>  sC     
 
 
4 K!Oq4;$66
 
 
r   c                 L    g | ] \  }}j         |z  d j         z
  |dz  z  z   !S )rA      )rY   )r   vrJ   r   s      r   r7   z.AdamOptimizer._get_updates.<locals>.<listcomp>  sG     
 
 
4 K!Oq4;47;;
 
 
r   c                 f    g | ]-\  }}j          |z  t          j        |          j        z   z  .S r   )r
   r4   sqrtrZ   )r   r`   rc   r   s      r   r7   z.AdamOptimizer._get_updates.<locals>.<listcomp>  sL     
 
 
1 !#rwqzzDL'@A
 
 
r   )
r[   r   r\   r]   r   r4   re   rY   rX   r
   rL   s   `  r   r   zAdamOptimizer._get_updates   s    	!
 
 
 
tw..
 
 

 
 
 
tw..
 
 

 #ga$+tv--../4;&&( 	

 
 
 
DGTW--
 
 
 r   )rQ   r/   rR   rS   )r(   r)   r*   r+   r   r   rM   rN   s   @r   rP   rP      s^        + +\ SW
= 
= 
= 
= 
= 
=             r   rP   )r+   numpyr4   r   r-   rP   r   r   r   <module>rg      s    - -
    < < < < < < < <~z z z z z= z z zzZ Z Z Z ZM Z Z Z Z Zr   