
    Pi/                    H   d dl mZ d dlmZ d dlmZ d dlZd dlmZ d dlmc m	Z
 d dlmZmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d Zd Zd Zd ZddZ G d de          Z G d de          Zd dZ G d de          Z edddg          Z  G d de          Z!dS )!    )annotations)deepcopy)
namedtupleN)nn)Module
ModuleList)	rearrangeeinsum)LocalAttention)apply_rotary_pos_emb)+get_init_and_expand_reduce_stream_functionsc                
    | d uS N )vals    o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/local_attention/transformer.pyexistsr      s    d?    c                (    t          |           r| n|S r   )r   )r   ds     r   defaultr      s    ++$331$r   c                .    t          j        | d          S )Ndim)F	normalize)ts    r   l2normr      s    ;q####r   c                      fd}|S )Nc                    | j         }|                                   | g|R i |}|                     |           |S r   )trainingevaltrain)modelargskwargswas_trainingoutfns        r   innerzeval_decorator.<locals>.inner   sM    ~

b((((((L!!!
r   r   )r*   r+   s   ` r   eval_decoratorr,      s#         Lr   ?c                    t          d|z
  | j        d         z            }t          j        | |          \  }}t          j        | t          d                    }|                    d||           |S )N   r   z-inf)intshapetorchtopk	full_likefloatscatter_)logitsthreskr   indprobss         r   top_kr<   '   sf    QY&,r**++Az&!$$HCOFE&MM22E	NN1c3Lr   c                  H     e Zd Zdddddddddddd fd
Z	 	 	 	 d	dZ xZS )
LocalMHA@              FN)dim_headheadsdropoutcausalprenorm
qk_rmsnormqk_scaleuse_xposxpos_scale_baseexact_windowsizegate_values_per_headc                  t                                                       ||z  }|rt          j        |          nd | _        || _        t          j        ||dz  d          | _        || _        |rVt          j	        t          j        |                    | _        t          j	        t          j        |                    | _        || _        || _        t!          |d          | _        t%          d|||d|r|	nd | j        |
|d|| _        d | _        |r,t          j        t          j        ||                    | _        t          j        ||d          | _        d S )N   FbiasT)r   window_sizerE   autopadscalerK   rI   rJ   r   )super__init__r   	LayerNormnormrC   Linearto_qkvrG   	Parameterr2   onesq_scalek_scalerE   rQ   r   rK   r   attn_fn	to_v_gate
Sequentialto_out)selfr   rQ   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   r'   	inner_dim	__class__s                   r   rU   zLocalMHA.__init__1   sd   $ 	u$	)0:BL%%%d	
iY]5AAA$ 	><
8(<(<==DL<
8(<(<==DL& '(8$ ? ?% 

%!+5XX#4-

 

 

 

  	]	#u%% DN i	3u===r   c                &    |j         d         }t           j                  r                     |          }                     |                              dd          \  }}}	t           fd|||	f          \  }}}	 j        r.t          t          ||f          \  }}| j        z  }| j	        z  }t          |          r|dk    sJ  j
        rt          |          r
J d            |\  }
}||j         d         dz  z  }t          j        |
|fd          }t          j        ||	fd          }	 j        j         j        z  } j        r|dz    n|j         d         }|| j        z  z    t#          fd	||	fD                       \  }}	t           j        j                  r0 j        j        } ||          \  }}t'          ||||
          \  }}t)          ||d          }t          |          r@|j         d         }|ddd | d f         }|j         d         |j         d         k    sJ ||z   }|                    d          }t)          ||	d          }n                     |||	||          }|rt          j        ||	f          }t           j                  r<                     |          }t1          |d          }||                                z  }t1          |d          }                     |          }|s|S ||fS )NrN   r   r   c                2    t          | dj                  S )Nzb n (h d) -> b h n d)h)r	   rC   )r   rb   s    r   <lambda>z"LocalMHA.forward.<locals>.<lambda>x   s    	!-C T T T r   r/   z-only allow caching for specific configurationg      c              3  6   K   | ]}|d dddf         V  dS ).Nr   ).0r   kv_start_indexs     r   	<genexpr>z#LocalMHA.forward.<locals>.<genexpr>   s7      DD323DDDDDDr   )rS   zb h i d, b h j d -> b h i j.zb h i j, b h j d -> b h i d)mask	attn_biaszb n h -> b h n 1zb h n d -> b n (h d))r1   r   rW   rY   chunkmaprG   r   r\   r]   rE   r2   catr^   look_backwardrQ   rK   tuplerel_posr   r
   softmaxstackr_   r	   sigmoidra   )rb   xrn   ro   cachereturn_cacheseq_lenqr9   vckcveffective_window_sizeru   pos_emb
xpos_scalesimk_lenattnr)   kvgatesrl   s   `                     @r   forwardzLocalMHA.forwardj   sV    '"+$) 			!A++a..&&q&331aTTTTWXZ[]^V_``1a? 	!v1v&&DAqDL ADL A%== '	La<<<<;dvd||dd5ddd3FBQWR[D()A	2q',,,A	2q',,,A$(L$>AQ$Q!$ Y#81#<!='"+#8GdFV<V#W!XDDDDaVDDDDDDAqdl*++ O,.&-gajj#+Aq':NNN1A<==Ci   &%c233&78	 r*cim;;;;Io;;R;((Dq"?@@CC ,,q!Qt,KKC 	%aV$$B$.!! 	(NN1%%Ee%788E'C344kk# 	JBwr   )NNNF)__name__
__module____qualname__rU   r   __classcell__rd   s   @r   r>   r>   0   s         $7> 7> 7> 7> 7> 7> 7>x L L L L L L L Lr   r>   c                      e Zd Zd ZdS )GEGLUc                d    |                     dd          \  }}|t          j        |          z  S )N   r   r   )rp   r   gelu)rb   ry   gates      r   r   zGEGLU.forward   s.    ''!2'&&416$<<r   N)r   r   r   r   r   r   r   r   r      s#                 r   r      rA   c                   t          | |z  dz  dz            }t          j        t          j        |           t          j        | |dz  d          t                      t          j        |          t          j        || d                    S )Nr   rN   FrO   )r0   r   r`   rV   rX   r   Dropout)r   multrD   rc   s       r   FeedForwardr      s}    C$JNQ&''I=
S
	#y1}U333

7
	)S///  r   c                  :     e Zd Z fdZed             Zd Z xZS )DynamicPositionBiasc           
     8   t                                                       t          j        t          j        d|          t          j                    t          j        ||          t          j                    t          j        ||                    | _        d S )Nr/   )rT   rU   r   r`   rX   SiLUmlp)rb   r   rC   rd   s      r   rU   zDynamicPositionBias.__init__   sq    
 	=IaGIIIc3GIIIc5!!
 
r   c                N    t          |                                           j        S r   )next
parametersdevice)rb   s    r   r   zDynamicPositionBias.device   s    DOO%%&&--r   c                   | j         }||k    sJ t          j        |t          j        |          }|                     t          |d                    }t          j        ||z
  ||          }t          j        ||          }t          |d          t          |d          z
                                  }t          ||         d          }|S )N)dtyper   z... -> ... 1r   zi -> i 1zj -> 1 jzi j h -> h i j)r   r2   aranger5   r   r	   abs)	rb   ijr   rel_distrP   i_seqj_seqrel_dist_indicess	            r   r   zDynamicPositionBias.forward   s    Avvvv<5;HHHxx	(N;;<<QUA777Q000%eZ889UJ;W;WW\\^^./1ABBr   )r   r   r   rU   propertyr   r   r   r   s   @r   r   r      sb        
 
 
 
 
 . . X.      r   r   Cachecache_kvmaybe_cached_attn_biasc                       e Zd Zdddddddddd	dd	d	dd
d fdZ ej                    e	 	 	 dd                        Z	 	 	 	 ddZ xZ	S )LocalTransformerTi   r?   r@   r   rA   r   FN)rE   local_attn_window_sizerB   rC   ff_multattn_dropout
ff_dropoutignore_indexrI   rJ   use_dynamic_pos_biasglobal_attn_layerlayers_insert_global_attnnum_residual_streamsr   Module | Noner   tuple[int, ...] | Nonec                  t                                                       t          |          | _        | j        r4t	          j        ||          | _        t	          j        ||          | _        || _        t          g           | _
        || _        d | _        |rt          |dz  |          | _        t          ||dk              \  }| _        | _        t#          |t%          t'          ddz                                 }t)          fd|D                       sJ t+          |          }t          g           | _        t'                    D ]}|dz   }| j                            t          |          r||v r ||t1          |                    nd            | j
                            t	          j	         ||t3          d||||
||||| dd
|           ||t5          ||	|	                    g                     || _        | j        rCt	          j        t	          j        |          t	          j        ||d
                    | _        d S d S )Nr   )r   rC   r/   )disablec                0    g | ]}d |cxk     ok    nc S )r   r   )rk   layerdepths     r   
<listcomp>z-LocalTransformer.__init__.<locals>.<listcomp>  s5    NNN5A&&&&&&&&NNNr   )r   branchT)
r   rB   rC   rD   rE   rQ   rI   rJ   use_rotary_pos_embrF   )r   r   rD   FrO   r   ) rT   rU   r   has_embed_unembedr   	Embedding	token_embr   max_seq_lenr   layersr   dynamic_pos_biasr   r   expand_streamsreduce_streamsr   rt   rangeallsetglobal_layersappendr   r>   r   r   r`   rV   rX   	to_logits)rb   
num_tokensr   r   r   rE   r   rB   rC   r   r   r   r   rI   rJ   r   r   r   r   r'   init_hyper_connglobal_attn_layersindexr   rd   s       `                   r   rU   zLocalTransformer.__init__   s$   . 	!'
!3!3! 	:\*c::DN<S99DL& nn&<# $ 	W$7cQhPU$V$V$VD!Do  qE  Qe  ij  Qj  Ek  Ek  EkA,d.A %,,EuUSTV[^_V_M`M`GaGa$b$b!NNNN4MNNNOOOOO !:;;'^^ 5\\ 	 	EAIE%%jp  rC  kD  kD  'n  IN  Rd  Id  IdooC(SdJeJe&f&f&f&f  jn  o  o  oKr}cH  5|3S[ej  wC  NT  dz  GO  cr  Ma  Ia  mq  5|  5|  u{  5|  5|  }  }  }cKcRYeo4p4p4pqqq.        
 )! 	]S!!	#z%888 DNNN	 	r         ?r-   c                   | j         sJ |dk    sJ |j        d         |j        }}|}	d }
t          |          D ]} | j        |	d d | j         d f         f|
dd|\  }}|r|}
t          |d d df         |          }|dk    r|                    dd          }n.t          j	        ||z  d          }t          j        |d          }t          j        |	|fd          }	|	d d |d f         S )	NrA   r/   T)rz   r{   r   )r8   )r   keepdimr   )r   r1   r   r   r   r   r<   argmaxr   rv   r2   multinomialrr   )rb   primer|   temperaturefilter_thresuse_kv_cacher'   nr   r)   rz   _r7   	new_cachefiltered_logitssampledr;   s                    r   generatezLocalTransformer.generate9  sP    %%%%b    KNEL6w 	6 	6A ,AAA(()))*!#! ! 	! !FI  "!#F111b5M<HHHOb  )00rT0JJ	/K"?rJJJ+E155)S'N"555CC111abb5zr   c                @   |r|d d d df         |d d dd f         }}|j         d         |j        }}| j        rN|                     |          }|| j        k    sJ ||                     t          j        ||                    z   }t          |          }	d x}
}|	r|\  }
}g }t          t          |
g                     }|	r|d d dd f         }|}t          |          s4t          | j                  r | j        }|                     ||dz            }|                     |          }t          | j        | j                  D ]c\  \  }}}t          |          r ||          } ||||dt#          |d                     \  }}|                    |            ||          }d|                     |          }| j        s|S |                     |          }|s|s|S |t+          ||          fS t-          j        t1          |d          || j                  }|S )	Nr   r/   r   r   T)rn   ro   r{   rz   zb n c -> b c n)r   )r1   r   r   r   r   r   r2   r   r   iterr   r   r   r   zipr   r   r   r   r   r   r   r   cross_entropyr	   r   )rb   ry   rn   rz   return_lossr{   labelsr   r   	has_cache	cached_kvcached_attn_biasnew_cached_kviter_cached_kvro   wr   ffglobal_layerlayer_cached_kvr7   losss                         r   r   zLocalTransformer.forwarde  s     	,!!!SbS&	1QQQU8vAGAJ6! 	Cq!!A(((((DLLa&!A!A!ABBBA 5MM	'++	$ 	0*/'I'gi4455 	!!!RSS&	A %	i   	8VD,A%B%B 	8+A--aQ77I ""(+DK9K(L(L 	 	$JT2l## $ LOO!%%#^T22" " "A   1111AA""% 	H "" 	; 5	:::: f.//,
 
 
 r   )r   r   r   r   )r   r-   T)NNFF)
r   r   r   rU   r2   no_gradr,   r   r   r   r   s   @r   r   r      s         !$$+/<@ )D D D D D D D DL U]__
 ( ( ( ^ _(Z V V V V V V V Vr   r   )r-   )r   rA   )"
__future__r   copyr   collectionsr   r2   r   torch.nn.functional
functionalr   torch.nnr   r   einopsr	   r
   local_attention.local_attentionr   local_attention.rotaryr   hyper_connectionsr   r   r   r   r,   r<   r>   r   r   r   r   r   r   r   r   <module>r     s   " " " " " "       " " " " " "                 ' ' ' ' ' ' ' ' $ $ $ $ $ $ $ $ : : : : : : 7 7 7 7 7 7 I I I I I I  % % %$ $ $     F F F F Fv F F FT         F      
	 	 	 	         &      H 	
7Z)ABCCI I I I Iv I I I I Ir   