
    Pip                    L   d dl mZ d dlmZ d dlmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlm	c mZ d dlmZmZ d d	lmZmZmZmZ d d
lmZmZ d dlmZmZ 	 d Zd Zd ZddZ ddZ! G d de          Z G d de          Z" e#e           e"_          e#e!          e"_!        dS )    )annotations)Callable)partial)	randrangeN)nn)Module)tree_flattentree_unflatten)	rearrangerepeatreduceeinsum)Reduce	Rearrange)ResidualRMSNormc                
    | d uS N )vs    /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/hyper_connections/hyper_connections_channel_first.pyexistsr      s    D=    c                (    t          |           r| n|S r   )r   )r   ds     r   defaultr   "   s    q		 11q r   c                    | S r   r   )ts    r   identityr   %   s    Hr   Fc                    |r&t          j                    t          j                    fS t          dd|           }t          dd|           }||fS )Nzb ... -> (b s) ...r   )pattern	reductionsz(b s) ... -> b ...sum)r   Identityr   )num_streamsdisable	expand_fn	reduce_fns       r   "get_expand_reduce_stream_functionsr*   *   sW     .r{}}--!58Q\]]]I!55kZZZIir   c                    t          || dk              }|st          nt          }t          ||           }t	          | |          }|g|R S )N   )r'   )r   HyperConnectionsr   r   r*   )r&   r'   hyper_conn_klassinit_hyper_conn_fnexpand_reduce_fnss        r   +get_init_and_expand_reduce_stream_functionsr1   4   s^    g{a/00G/6D''H !1;??:;RYZZZ3!2333r   c                  $     e Zd Z fdZd Z xZS )r   c                    t                                                       |dz  | _        t          j        t          j        |dd                    | _        d S )Ng      ?r,   )super__init__scaler   	Parametertorchzerosgamma)selfdim	__class__s     r   r5   zRMSNorm.__init__B   sG    CZ
\%+c1a"8"899


r   c                T    t          j        |d          | j        z  | j        dz   z  S )Nr,   r<   )F	normalizer6   r:   )r;   xs     r   forwardzRMSNorm.forwardG   s)    {1A&&&3tzA~FFr   )__name__
__module____qualname__r5   rC   __classcell__r=   s   @r   r   r   A   sN        : : : : :
G G G G G G Gr   r   c                  H     e Zd Zddddddd fdZd Zd	 ZddZd Z xZS )r-   NTg        )branchlayer_indextanhchannel_firstdropoutrJ   Module | Nonec                  t                                                       || _        |rt          j                    nt          j                    | _        t          |          | _        |dk    s
J d            || _	        t          |t          |                    |z  }t          j        t          j        |                    | _        t          j        |df          }	d|	|df<   t          j        t          j        |	t          j        |          gd                    | _        t          j        ||dz   dd          | _        t          j                            | j        j                   t          j        t          j        |ddd          t5          d                    | _        t          j                            | j        d         j                   t          j        t          j        d	          d
z            | _        t          j        t          j        d	          d
z            | _        t          j        |          | _        dS )zN
        Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
        r   z-`num_residual_streams` must be greater than 0r,   g      ?r?   F)biaszb 1 ... -> b ...r   g{Gz?N) r4   r5   rJ   r   Tanhr%   actr   normnum_residual_streamsr   r   r7   r8   onesstatic_betar9   cateyestatic_alphaConv2ddynamic_alpha_fninitzeros_weight
Sequentialr   dynamic_beta_fndynamic_alpha_scaledynamic_beta_scaleDropoutrN   )r;   rU   r<   rJ   rK   rL   rM   rN   init_residual_indexinit_alpha0r=   s             r   r5   zHyperConnections.__init__M   s    	 !%727999"+--CLL	#a''')X'''$8!%k9=Q3R3RSSVjj<
3G(H(HIIk#7";<<.0'*+LKK_A`A`3aij)k)k)kll "	#/Ca/GSX Y Y Y
t,3444!}Ic1a...()) 
  

 	t+A.5666#%<
20E#F#F "$,uz"~~/D"E"E z'**r   c                ~   |                      |          }|                     |                     |                    }|| j        z  }t	          |d| j                  }|t	          | j        d          z   }|                     |                     |                    }|| j        z  }t	          |d| j                  }|t	          | j	        d          z   }t	          |d| j                  }t          ||d          }	|	d d ddf         |	d d dd df         }}
t	          |d	          }|
|t          |
          fS )Nz(b s) ... -> b s ...)r#   zs t -> s t 1 1z
s -> s 1 1z!b s t ..., b s d ... -> b t d ...r   .r,   b s d ... -> (b s) d ...)beta)rT   rS   r\   rb   r   rU   rZ   ra   rc   rW   r   dict)r;   	residualsnormed	wc_weightdynamic_alphaalpha	dc_weightdynamic_betari   mix_hbranch_inputs              r   width_connectionz!HyperConnections.width_connection   sM    9%% HHT226::;;	!D$<<!-1GTMfggg	$*;=M N NN HHT11&99::	 4#:: /E4Kdeeei(8,GGGi)?TE^___	ui)LMM"'1c	"2E!!!QRR*4Eii)CDD	YD(9(9(999r   c               x    t          ||d          }t          |d          }||z   }|                     |          S )Nzb d ..., b s ... -> b s d ...rh   )r   r   rN   )r;   branch_outputrk   ri   outputs        r   depth_connectionz!HyperConnections.depth_connection   sB     t-LMM6#=>>&	||I&&&r   r   c                R     t           j                  r
J d             fd}|S )Nz"branch was already wrapped on initc                f                         |           \  }} |g|R i |} ||          } | S r   )rC   )residualargskwargsrs   add_residualrv   rJ   r;   s         r   forward_and_add_residualzBHyperConnections.decorate_branch.<locals>.forward_and_add_residual   sL    )-h)?)?&L,"F<A$AAA&AAM#|M22HOr   )r   rJ   )r;   rJ   r   s   `` r   decorate_branchz HyperConnections.decorate_branch   sI    $+&&LL(LLL&	 	 	 	 	 	 ('r   c                                                     \  } fd}t           j                  s||fS   j        |g|R i |} ||          S )Nc                r    t          |           \  ^} }} j        | fi } t          | g|R |          S r   )r	   rx   r
   )
branch_outrest	tree_specresidual_kwargsrk   r;   s      r   add_residual_fnz1HyperConnections.forward.<locals>.add_residual_fn   sQ    -9*-E-E*Z$..z9XXXXJ!:"5"5"5yAAAr   )rt   r   rJ   )r;   rk   branch_argsbranch_kwargsrs   r   rv   r   s   ``     @r   rC   zHyperConnections.forward   s    373H3H3S3S0i	B 	B 	B 	B 	B 	B 	B dk"" 	100#LP;PPP-PP}---r   )rJ   rO   )rJ   r   )	rD   rE   rF   r5   rt   rx   r   rC   rG   rH   s   @r   r-   r-   L   s         !%3+ 3+ 3+ 3+ 3+ 3+ 3+ 3+j: : :<' ' '( ( ( (. . . . . . .r   r-   )Fr   )$
__future__r   typingr   	functoolsr   randomr   r8   r   torch.nnr   torch.nn.functional
functionalr@   torch.utils._pytreer	   r
   einopsr   r   r   r   einops.layers.torchr   r   #hyper_connections.hyper_connectionsr   r   r   r   r   r*   r1   r-   staticmethodr   r   r   <module>r      s   " " " " " "                                         < < < < < < < < 4 4 4 4 4 4 4 4 4 4 4 4 1 1 1 1 1 1 1 1       
  ! ! !  
       	4 	4 	4 	4G G G G Gf G G G|. |. |. |. |.v |. |. |.| 7ClCe6f6f  3?K|Lw?x?x  < < <r   