
    Pi$:                       d dl mZ d dlmZ d dlmZ d dlmZ d dlZd dlm	Z	m
Z
 d dlm	c mZ d dlmZmZ d dlmZmZ d d	lmZmZmZmZ d d
lmZmZ 	 d Zd Zd Zd Zd Z 	 	 	 ddZ!	 	 	 	 ddZ" G d de          Z# G d de          Z$ G d de          Z% e&e!          e%_!         e&e"          e%_"         G d de          Z' G d de          Z(dS )     )annotations)Callable)partial)	randrangeN)nncat)Module
Sequential)tree_flattentree_unflatten)	rearrangerepeatreduceeinsum)	RearrangeReducec                
    | d uS N )vs    w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/hyper_connections/hyper_connections.pyexistsr      s    D=    c                    | |z  dk    S )Nr   r   )numdens     r   divisible_byr      s    #I!r   c                (    t          |           r| n|S r   )r   )r   ds     r   defaultr    "   s    q		 11q r   c                    | S r   r   )ts    r   identityr#   %   s    Hr   c                    | |z   S r   r   )xys     r   addr'   (   s    q5Lr   Fc                    |r&t          j                    t          j                    fS |r,t          |          s
J d            t          | |d          }nt	          dd|           }t	          dd|           }||fS )	Nz`dim` must be passed into get_init_and_expand_reduce_stream_functions for returning an expansion function with stream embeddings addedT)expand_to_streamsb ... -> (b s) ...r   )pattern	reductionsz(b s) ... -> b ...sum)r   Identityr   StreamEmbedr   )num_streamsadd_stream_embeddimdisable	expand_fn	reduce_fns         r   "get_expand_reduce_stream_functionsr7   -   s      .r{}}-- bc{{  	e  	e  e  	e  	e{SdKKK		%9xU`aaa	!55kZZZIir      c                    t          || dk    o|dk              }|st          nt          }t          || |          }t	          | |||          }t          |          rt          ||          }|g|R S )Nr8   )	num_fracs)r2   r3   r4   r3   )r    HyperConnectionsResidualr   r7   r   )r1   r:   r3   r2   r4   hyper_conn_klassinit_hyper_conn_fnexpand_reduce_fnss           r   +get_init_and_expand_reduce_stream_functionsrA   A   s     g{a/BINCCG/6D''H !1;IVVV:;[ksv  CJ  K  K  Kc{{ D$%7sCCC3!2333r   c                  $     e Zd Z fdZd Z xZS )RMSNormc                    t                                                       |dz  | _        t          j        t          j        |                    | _        d S )Ng      ?)super__init__scaler   	Parametertorchzerosgamma)selfr3   	__class__s     r   rF   zRMSNorm.__init__W   sC    CZ
\%+c"2"233


r   c                T    t          j        |d          | j        z  | j        dz   z  S )Nr;   r8   )F	normalizerG   rK   )rL   r%   s     r   forwardzRMSNorm.forward\   s)    {1B'''$*4
QGGr   __name__
__module____qualname__rF   rR   __classcell__rM   s   @r   rC   rC   V   sN        4 4 4 4 4
H H H H H H Hr   rC   c                  B     e Zd Zdddd fdZd Zd Zdd
Zd Z xZS )r=   N)branchresidual_transformrZ   Module | Noner[   c                   t                                                       || _        t          |t	          j                              | _        d S r   )rE   rF   rZ   r    r   r/   r[   )rL   rZ   r[   argskwargsrM   s        r   rF   zResidual.__init__d   sA     	")*<bkmm"L"Lr   c                $    ||t                      fS r   )dictrL   	residualss     r   width_connectionzResidual.width_connectiono   s     )TVV++r   c                2    ||                      |          z   S r   )r[   )rL   branch_outputrc   s      r   depth_connectionzResidual.depth_connectionu   s     t66yAAAAr   r   c                R     t           j                  r
J d             fd}|S )N"branch was already wrapped on initc                f                         |           \  }} |g|R i |} ||          } | S r   rR   residualr^   r_   branch_inputadd_residualrf   rZ   rL   s         r   forward_and_add_residualz:Residual.decorate_branch.<locals>.forward_and_add_residual   L    )-h)?)?&L,"F<A$AAA&AAM#|M22HOr   r   rZ   rL   rZ   rp   s   `` r   decorate_branchzResidual.decorate_branch}   K     $+&&LL(LLL&	 	 	 	 	 	 ('r   c                                                     \  } fd}t           j                  s||fS   j        |g|R i |} ||          S )Nc                r    t          |           \  ^} }} j        | fi } t          | g|R |          S r   )r   rg   r   
branch_outrest	tree_specresidual_kwargsrc   rL   s      r   add_residual_fnz)Residual.forward.<locals>.add_residual_fn   sQ    -9*-E-E*Z$..z9XXXXJ!:"5"5"5yAAAr   rd   r   rZ   rL   rc   branch_argsbranch_kwargsrn   r}   rf   r|   s   ``     @r   rR   zResidual.forward   s     483H3H3S3S0i	B 	B 	B 	B 	B 	B 	B dk"" 	100#LP;PPP-PP}---r   )rZ   r\   r[   r\   rZ   r   )	rT   rU   rV   rF   rd   rg   rt   rR   rW   rX   s   @r   r=   r=   c   s         !%,0		M 	M 	M 	M 	M 	M 	M 	M, , ,B B B( ( ( (". . . . . . .r   r=   c            
      P     e Zd Zdddddddedd	d fd	Zd
 Zd ZddZd Z xZ	S )r<   NTFg        r8   )	rZ   layer_indextanhchannel_firstdropoutadd_branch_out_to_residualnum_input_viewsdepth_residual_fnr:   rZ   r\   c       
        >   t                                                       || _        |rt          j                    nt          j                    | _        |dk    sJ || _        |dk    | _        t          d|          | _
        t          d          | _        t          ||          sJ d| d| d            ||z  }t          |          | _        |dk    s
J d	            || _        t!          |t#          |                    |z  }||z  }|	|z  }|	dk    sJ |	| _        t'          j        ||f          }d
||ddf<   t          j        t-          |t'          j        |          fd                    | _        t          j        t'          j        |||z                       | _        t          j        t'          j        d          dz            | _        || _        |rt          j        t'          j        |                    | _        |dk    r|fn||f}t          j        t'          j        |                    | _        t          j        t'          j        d          dz            | _        t          j         |          | _!        || _"        |
| _#        dS )zN
        Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
        r8   zb ... (f d) -> b ... f d)fzb ... f d -> b ... (f d)zfeature dimension (z() must be divisible by the `num_fracs` ()r   z-`num_residual_streams` must be greater than 0g      ?Nr;   r   g{Gz?)$rE   rF   rZ   r   Tanhr/   actr:   	has_fracsr   split_fracsmerge_fracsr   rC   normnum_residual_streamsr    r   r   rI   rJ   rH   r   eyestatic_alphadynamic_alpha_fnonesdynamic_alpha_scaler   static_betadynamic_beta_fndynamic_beta_scaleDropoutr   r   r   )rL   r   r3   rZ   r   r   r   r   r   r   r   r:   init_residual_indexnum_residual_streams_fracsnum_input_views_fracsinit_alpha0dynamic_beta_shaperM   s                    r   rF   zHyperConnections.__init__   s   $ 	 !%727999"+-- A~~~~""Q$%?YOOO$%?@@C++||-|3-|-|py-|-|-|||+	 CLL	#a''')X'''$8!%k9=Q3R3RSSVjj &:I%E" /) ; !####. k#=?T"UVV.0'*+Lk59E_;`;`-aij)k)k)kll "U[>X[p>p-q-q r r#%<
20E#F#F  +E'% 	J!|EJ7Q,R,RSSD+4>>#Y?O#%<<N0O0O#P#PD &(l5:b>>D3H&I&ID# z'** + "3r   c                   | j         }| j        rt          |d          }|                     |          }t          |d|          }|                     |          }|                     || j        z            }|| j        z  }t          | j        d|          }||z   }|                     |          }d }| j	        rZ|                     || j
        z            }	| j        st          |	d          }	|	| j        z  }
t          | j        d|          }|
|z   }t          ||d          }| j        dk    r|d	d
d d f         |d	dd d d f         }}n8|d	d | j        d d f         |d	| j        d d d f         }}t          |d          }| j        rt          |d          }|                     |          }| j        rt          |d          }nt          |d          }||t#          |          fS )Nb d ... -> b ... d(b s) ... d -> b ... s dr-   z(f s) d -> f s dz... -> ... 1z... (s f) -> ... s fz'... f1 s f2 t, ... f1 s d -> ... f2 t dr8   .r   zb ... v d -> v b ... db ... d -> b d ...zb ... f s d -> (b s) (f d) ...zb ... f s d -> (b s) ... (f d))beta)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ra   )rL   rc   streamsnormed	wc_weightdynamic_alphar   alphar   	dc_weightdynamic_betar   mix_hrn   s                 r   rd   z!HyperConnections.width_connection  sQ    +  	C!)-ABBI $$Y//	 i)CQQQ	 9%% HHVd&;;<<	!D$<< !24FGTTT,  '' * 
	.$*>!>??I> A%i@@	$t'>>L#D$46LRYZZZK+-Dui)RSS1$$&+CAAAI&6c122qqqj8I)LL&+C1F$2F1F,I&JERUW[WkWlWlnononoRoLp)L$\3KLLL 	I$\3GHHL ''55  	O!)-MNNII!)-MNNIYD(9(9(999r   c               \   | j         sJ |                     |          }| j        rt          |d          }t	          ||d          }t          |d          }|                     |          }| j        rt          |d          }|                     ||          }|                     |          S )Nr   z)b ... f1 d, b ... f1 s f2 -> b ... f2 s db ... s d -> (b s) ... dr   )r   r   r   r   r   r   r   r   )rL   rf   rc   r   outputs        r   rg   z!HyperConnections.depth_connectionS  s     .... ((77  	K%m5IJJMt-XYY6#=>> !!&))  	=v';<<F**69==	||I&&&r   r   c                R     t           j                  r
J d             fd}|S )Nri   c                f                         |           \  }} |g|R i |} ||          } | S r   rk   rl   s         r   rp   zBHyperConnections.decorate_branch.<locals>.forward_and_add_residual|  rq   r   rr   rs   s   `` r   rt   z HyperConnections.decorate_branchv  ru   r   c                                                     \  } fd}t           j                  s||fS   j        |g|R i |} ||          S )Nc                    j         s| S t          |           \  ^} }} j        | fi } t          | g|R |          S r   )r   r   rg   r   rx   s      r   r}   z1HyperConnections.forward.<locals>.add_residual_fn  sb    2 "!!-9*-E-E*Z$..z9XXXXJ!:"5"5"5yAAAr   r~   r   s   ``     @r   rR   zHyperConnections.forward  s     483H3H3S3S0i		B 		B 		B 		B 		B 		B 		B dk"" 	100#LP;PPP-PP}---r   )rZ   r\   r   )
rT   rU   rV   r'   rF   rd   rg   rt   rR   rW   rX   s   @r   r<   r<      s         !%%)]3 ]3 ]3 ]3 ]3 ]3 ]3 ]3~J: J: J:X!' !' !'F( ( ( (". . . . . . .r   r<   c                  *     e Zd Z	 	 d fd	Zd Z xZS )r0   Fc                    t                                                       || _        || _        || _        t          j        t          j        ||                    | _	        d S r   )
rE   rF   r   r1   r)   r   rH   rI   rJ   stream_embed)rL   r1   r3   r   r)   rM   s        r   rF   zStreamEmbed.__init__  sW     	*&!2L[#)F)FGGr   c                .   | j         rt          |d| j                  }| j        rt	          |d| j                  }nt	          |d| j                  }|| j        z   }| j        rt	          |d| j                  }nt	          |d| j                  }|S )Nr*   r   (b s) d ... -> b ... s dr   zb ... s d -> (b s) d ...r   )r)   r   r1   r   r   r   rb   s     r   rR   zStreamEmbed.forward  s    ! 	Vy*>DDTUUUI 	_!)-GTM]^^^II!)-GTM]^^^I 11	 	_!)-GTM]^^^II!)-GTM]^^^Ir   )FFrS   rX   s   @r   r0   r0     sZ        
 !H H H H H H      r   r0   c                  (     e Zd Z	 d fd	Zd Z xZS )AttentionPoolReduceStreamFc                   t                                                       || _        || _        t	          j        ||d          | _        | j        j        j        	                    t          j        |                     d S )NF)bias)rE   rF   r1   r   r   Linearto_attn_logitsweightdatacopy_rI   r   )rL   r1   r3   r   rM   s       r   rF   z"AttentionPoolReduceStream.__init__  so     	&* iS???"'--einn=====r   c                   | j         rt          |d| j                  }nt          |d| j                  }|                     |          }|                    d          }t          ||z  dd          }| j         rt          |d          }|S )	Nr   r   r   r;   zb ... s d -> b ... dr.   r   )r   r   r1   r   softmaxr   )rL   rc   attn_logitsattns       r   rR   z!AttentionPoolReduceStream.forward  s     	_!)-GTM]^^^II!)-GTM]^^^I)))44""",,9t+-CUKK	 	C!)-ABBIr   )FrS   rX   s   @r   r   r     sQ        
 	> > > > > >      r   r   )FNF)r8   NFN))
__future__r   typingr   	functoolsr   randomr   rI   r   r   torch.nn.functional
functionalrP   torch.nnr	   r
   torch.utils._pytreer   r   einopsr   r   r   r   einops.layers.torchr   r   r   r   r    r#   r'   r7   rA   rC   r=   r<   staticmethodr0   r   r   r   r   <module>r      s   " " " " " "                                     ' ' ' ' ' ' ' ' < < < < < < < < 4 4 4 4 4 4 4 4 4 4 4 4 1 1 1 1 1 1 1 1    ! ! !     
	       , 
4 4 4 4*H H H H Hf H H H@. @. @. @. @.v @. @. @.Hy. y. y. y. y.v y. y. y.v 7ClCe6f6f  3?K|Lw?x?x  <         &      H         r   