
     `iQ                        d dl mZmZmZmZ d dlZd dlmc mZ	 d dlmZ ddl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% ddl&m'Z'  e            r	d dl(m)Z)m*Z* nd\  Z)Z*e)e*fZ+ e,e+          Z- ej.        e/          Z0 G d de"          Z1 G d de#          Z2 G d dej3                  Z4 G d d          Z5 G d de          Z6 G d dej3                  Z7 G d  d!e          Z8 G d" d#e!          Z9 G d$ d%e           Z: G d& d'e          Z;g d(Z<dS ))    )AnyCallableOptionalUnionN)nn   )create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)deprecate_kwarg)is_causal_conv1d_available   )apply_mask_to_padding_states)LlamaAttentionLlamaForCausalLM
LlamaModelLlamaPreTrainedModelLlamaRMSNormLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )
Lfm2Config)causal_conv1d_fncausal_conv1d_updateNNc                       e Zd ZdS )Lfm2RMSNormN__name__
__module____qualname__     y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/lfm2/modular_lfm2.pyr"   r"   7           Dr(   r"   c                       e Zd ZdS )Lfm2RotaryEmbeddingNr#   r'   r(   r)   r,   r,   ;   r*   r(   r,   c                   *     e Zd Zdef fdZd Z xZS )Lfm2MLPconfigc                    t                                                       |j        }|j        rPt	          d|z  dz            }|j        4t	          |j        |z            }|j        ||j        z   dz
  |j        z  z  }t          j        |j	        |d          | _
        t          j        |j	        |d          | _        t          j        ||j	        d          | _        d S )Nr   r   r   Fbias)super__init__intermediate_sizeblock_auto_adjust_ff_dimintblock_ffn_dim_multiplierblock_multiple_ofr   Linearhidden_sizew1w3w2)selfr/   r5   	__class__s      r)   r4   zLfm2MLP.__init__@   s    "4* 	 #A(9$9A$= > >.:$'(GJ[([$\$\!$*$<&)AAAE&Jbb%! )F.0ANNN)F.0ANNN)-v/ANNNr(   c                     |                      t          j        |                     |                    |                     |          z            S N)r>   Fsilur<   r=   )r?   xs     r)   forwardzLfm2MLP.forwardO   s7    wwqvdggajj))DGGAJJ6777r(   )r$   r%   r&   r   r4   rF   __classcell__r@   s   @r)   r.   r.   ?   sZ        Oz O O O O O O8 8 8 8 8 8 8r(   r.   c                      e Zd ZdZdZdZdZdZej	        dfde
dedej        deej        edf         fdZ	 dd	ej        d
ej        dedeeeef                  deej        ej        f         f
dZdej        fdZddee         defdZdej        dedeeef         fdZdefdZdefdZdedeej        ej        f         fdZd ZdS )Lfm2HybridConvCachea  
    Attention and conv cache for Lfm2.

    It stores the Key and Value states as a list of tensors, one for each layer.
    Attention layer cache shape: `[batch_size, num_heads, seq_len, head_dim]`.
    Conv layer cache shape: `[batch_size, hidden_size, L_cache-1]`.
    NFr/   max_batch_sizedtypedevicec                    g | _         g | _        || _        |j        | _        | j                            d          | _        |j        | _        || _        g | _        |t          j
        |          nd }t          |j                  D ]h}t          j        | j        |j        | j        | j        |          }t          j                            |           | j                            |           id S )Nfull_attention)rL   rM   )	key_cachevalue_cacherK   layer_typesindexfirst_attention_layerconv_L_cache_dtype
conv_cachetorchrM   rangenum_hidden_layerszerosr;   _dynamomark_static_addressappend)r?   r/   rK   rL   rM   _
conv_states          r)   r4   zLfm2HybridConvCache.__init__b   s     ,!-%)%5%;%;<L%M%M""/.0)/);f%%%v/00 		/ 		/A#"!k  J M--j999O"":....		/ 		/r(   
key_statesvalue_states	layer_idxcache_kwargsreturnc                    |Tt          | j                  |k    rt          t          | j                  |          D ]Z}| j                            t	          j        g                      | j                            t	          j        g                      [| j                            |           | j                            |           n| j        |                                         s|| j        |<   || j        |<   nVt	          j        | j        |         |gd          | j        |<   t	          j        | j        |         |gd          | j        |<   | j        |         | j        |         fS )a  
        Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx`.

        Parameters:
            key_states (`torch.Tensor`):
                The new key states to cache.
            value_states (`torch.Tensor`):
                The new value states to cache.
            layer_idx (`int`):
                The index of the layer to cache the states for.
            cache_kwargs (`Dict[str, Any]`, `optional`):
                Additional arguments for the cache subclass. No additional arguments are used in `DynamicCache`.

        Return:
            A tuple containing the updated key and value states.
        Ndim)	lenrP   rY   r^   rX   tensorrQ   numelcat)r?   ra   rb   rc   rd   r_   s         r)   updatezLfm2HybridConvCache.update   s^   0 !4>""i//s4>22I>> > >AN))%,r*:*:;;;$++EL,<,<====%%j111 ''5555N9-3355m -7y).: ++,1It~i7PR\6]ce,f,f,fy).3i9I)9TVb8cik.l.l.l +~i($*:9*EEEr(   beam_idxc                 "   t          t          | j                            D ]}| j        |         j        }| j        |                             d|                    |                    | j        |<   | j        |         j        }| j        |                             d|                    |                    | j        |<   | j        |         j        }| j        |                             d|                    |                    | j        |<   dS )zDReorders the cache for beam search, given the selected beam indices.r   N)rY   rj   rP   rM   index_selecttorQ   rW   )r?   ro   rc   rM   s       r)   reorder_cachez!Lfm2HybridConvCache.reorder_cache   s    s4>2233 	i 	iI^I.5F(,y(A(N(NqRZR]R]^dReRe(f(fDN9%%i07F*.*:9*E*R*RSTV^VaVabhViVi*j*jDY'_Y/6F)-)C)P)PQRT\T_T_`fTgTg)h)hDOI&&	i 	ir(   r   c                     | j         |         dk    r| j        n|}t          | j                  |k    s#| j        |                                         dk    rdS | j        |         j        d         S )zYReturns the sequence length of the cached states. A layer index can be optionally passed.rO   r   rg   )rR   rT   rj   rP   rl   shaper?   rc   s     r)   get_seq_lengthz"Lfm2HybridConvCache.get_seq_length   sr     372B92MQa2a2aD..gp	t~)++t~i/H/N/N/P/PTU/U/U1~i(.r22r(   cache_positionc                 Z    d}|j         d         }|                                 }||z   }||fS )aB  
        Return a tuple (kv_length, kv_offset) corresponding to the length and offset that will be returned for
        the given layer at `layer_idx`.
        The masks are then prepared according to the given lengths (kv_length, kv_offset) and patterns (i.e. sliding_window, chunk_size),
        for each layer.
        r   )ru   rw   )r?   rx   rc   full_mask_kv_offsetquery_lengthpast_seen_tokens	kv_lengths          r)   get_mask_sizesz"Lfm2HybridConvCache.get_mask_sizes   s@      %+A...00 #33	---r(   
max_lengthc                    |dk     r$|                                  t          |          z
  }|                                  |k    rdS t          t          | j                            D ]e}| j        |                                         rD| j        |         dd|ddf         | j        |<   | j        |         dd|ddf         | j        |<   fdS )z"Crop the cache to the given lengthr   N.)rw   absrY   rj   rP   rl   rQ   )r?   r   idxs      r)   cropzLfm2HybridConvCache.crop   s    >>,,..Z@J  J..FT^,,-- 	S 	SC~c"((** S&*nS&9#{
{AAA:M&Ns#(,(8(=c;J;PQPQPQ>Q(R %	S 	Sr(   c                 *    t          | j                  S rB   )rj   rP   )r?   s    r)   __len__zLfm2HybridConvCache.__len__   s    4>"""r(   c                 6    | j         |         | j        |         fS rB   )rP   rQ   rv   s     r)   __getitem__zLfm2HybridConvCache.__getitem__   s    ~i($*:9*EEEr(   c                     t          t          | j                            D ]!}| j        |                                          "d S rB   )rY   rj   rW   zero_rv   s     r)   resetzLfm2HybridConvCache.reset   sI    s4?3344 	/ 	/IOI&,,....	/ 	/r(   rB   )r   )r$   r%   r&   __doc__rK   is_compileablerP   rQ   rX   float32r   r7   rL   r   rM   strr4   Tensorr   dictr   tuplern   
LongTensorrs   rw   r~   r   r   r   r   r'   r(   r)   rJ   rJ   S   s
         NNIK #]15/ // / {	/
 elC-./ / / /D 26)F )FL)F l)F 	)F
 tCH~.)F 
u|U\)	*)F )F )F )FV	ie&6 	i 	i 	i 	i3 3 3c 3 3 3 3.U\ .c .eTWY\T\o . . . .Ss S S S S# # # # #FS FU5<3M-N F F F F/ / / / /r(   rJ   c                       e Zd Zdedef fdZ eddd          	 	 dd	ej        d
e	ej        ej        f         de
ej                 de
e         de
ej                 de	ej        e
ej                 f         fd            Z xZS )Lfm2Attentionr/   rc   c                 D   t                                          ||           t          j        |j        |j        | j        z  d          | _        t          j        |j        |j        | j        z  d          | _	        t          j        |j        |j        | j        z  d          | _
        t          j        |j        | j        z  |j        d          | _        t          | j        |j                  | _        t          | j        |j                  | _        | `| `d S )NFr1   eps)r3   r4   r   r:   r;   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projout_projr"   norm_epsq_layernormk_layernormo_projattention_dropoutr?   r/   rc   r@   s      r)   r4   zLfm2Attention.__init__   s    +++i 2F4NQUQ^4^ejkkki 2F4NQUQ^4^ejkkki 2F4NQUQ^4^ejkkk	&"<t}"LfN`glmmm&t}&/JJJ&t}&/JJJK"""r(   past_key_valuepast_key_values4.58new_nameversionNhidden_statesposition_embeddingsattention_maskrx   re   c                 2   |j         d d         }g |d| j        R }|                      |                     |          j        |                               dd          }	|                      |                     |          j        |                               dd          }
 |                     |          j        |                     dd          }|\  }}t          |	|
||          \  }	}
|&|||d}|
                    |
|| j        |          \  }
}t          }| j        j        dk    rt          | j        j                 } || |	|
||fd| j        d|\  }} |j        g |dR                                  }|                     |          }||fS )Nr   r   )sincosrx   eagerg        )dropoutscaling)ru   r   r   r   view	transposer   r   r   r   rn   rc   r   r/   _attn_implementationr   r   reshape
contiguousr   )r?   r   r   r   r   rx   kwargsinput_shapehidden_shapequery_statesra   rb   r   r   rd   attention_interfaceattn_outputattn_weightsoutputs                      r)   rF   zLfm2Attention.forward   s    $)#2#.88b8$-88''(GM(B(B(G(VWWaabcefgg%%&Edkk-&@&@&E|&TUU__`acdee
6t{{=116EOOPQSTUU&S#7jRUWZ#[#[ j&#&snUUL'6'='=j,X\Xfht'u'u$J(?;+w66"9$+:Z"[$7$7	%
 L	%
 	%
 	%
 	%
!\ *k);;;;;;FFHH{++|##r(   r    )r$   r%   r&   r   r7   r4   r   rX   r   r   r   rJ   r   rF   rG   rH   s   @r)   r   r      s        	#z 	#c 	# 	# 	# 	# 	# 	# _%0A6RRR :>59'$ '$|'$ #5<#=>'$ !.	'$
 ""56'$ !!12'$ 
u|Xel33	4'$ '$ '$ SR'$ '$ '$ '$ '$r(   r   c                       e Zd Zdedef fdZ eddd          	 	 	 dd	ej        de	e
         d
e	ej                 de	ej                 fd            Z eddd          	 	 	 dd	ej        de	e
         d
e	ej                 de	ej                 fd            Z eddd          	 	 	 ddej        de	e
         d
e	ej                 de	ej                 fd            Z xZS )Lfm2ShortConvr/   rc   c           	         t                                                       || _        || _        |j        | _        |j        | _        t          j	        |j
        |j
        | j        |j
        | j        | j        dz
            | _        t          j        |j
        d|j
        z  | j                  | _        t          j        |j
        |j
        | j                  | _        d S )Nr   )in_channelsout_channelskernel_sizegroupsr2   paddingr   r1   )r3   r4   r/   rc   rU   L_cache	conv_biasr2   r   Conv1dr;   convr:   in_projr   r   s      r)   r4   zLfm2ShortConv.__init__  s    
 	"*$	I*+%L1$
 
 
	 y!3Q9K5KRVR[\\\	&"4f6HtyYYYr(   r   r   r   r   NrE   rx   r   c                    t          ||          }|                     |                              dd          }|                    dd          \  }}}||z  }| j        j                            | j        j                            d          | j        j                            d                    }	|b|d         dk    rVt          |	                    d          |j
        | j                 |	| j        j        d           }
|
                    d          }
ny|Zt          j                            || j        |j        d         z
  df          }|j
        | j                                     |           t)          ||	| j        j        d           }
||
z  }|                     |                    dd                                                    }|S )Nr   rg   r   rh   r   r   )
activation)r   r   r   chunkr   weightr   sizer   squeezerW   rc   r2   	unsqueezer   
functionalpadr   ru   copy_r   r   r   )r?   rE   r   rx   r   BCxBCBxconv_weightsconv_outr`   ys                r)   cuda_kernels_forwardz"Lfm2ShortConv.cuda_kernels_forward1  s    )N;;ll1oo''B//))A2)&&1aUy',,TY-=-B-B1-E-EtyGWG\G\]^G_G_``&>!+<q+@+@+

2*4>:	 H  ))"--HH*]..rDL28B<4OQR3STT
*4>:@@LLL'L$).UYZZZHLMM!++b"--88::;;r(   c                    |j         d         }t          ||          }|                     |                              dd          }|                    dd          \  }}}||z  }	||d         dk    r|j        | j                 }
|                    d| j        dz
            }|
	                    dd          }
|	
                    |
j        |
j                  |
d d d d |f<   |j        | j                                     |
           t          j        |

                    |	j                  | j        j        d d dd d f         z  d          }| j        r|| j        j        z  }|                    d          }n{|Zt(          j                            |	| j        |	j         d         z
  df          }
|j        | j                                     |
           |                     |	          d	d |f         }||z  }|                    dd                                          }|                     |          }|S )
Nr   r   rg   r   rh   r   )shiftsdims)rM   rL   .)ru   r   r   r   r   rW   rc   clampr   rollrr   rM   rL   r   rX   sumr   r   r2   r   r   r   r   r   r   )r?   rE   r   rx   r   seqlenr   r   r   r   r`   r   r   s                r)   slow_forwardzLfm2ShortConv.slow_forwardT  s    (N;;ll1oo''B//))A2)&&1aU&>!+<q+@+@(3DNCJ+11!T\A5EFFN#<<J/1uuJ<MU_Ueu/f/fJqqq!!!^+,&t~6<<ZHHHyry!9!9DI<LQQQPQSTSTSTW<U!U[]^^^Hy +DIN*))"--HH*]..rDL28B<4OQR3STT
*4>:@@LLLyy}}S'6'\2HLKKB**,,MM!r(   r   c                     t           rDd|j        j        v r6t          j                                        s|                     ||||          S |                     ||||          S )Ncuda)is_fast_path_availablerM   typerX   r\   is_compilingr   r   )r?   r   r   rx   r   s        r)   rF   zLfm2ShortConv.forward{  sh     " 	mf0D0I&I&IRWR_RlRlRnRn&I,,]O^]klll  Q_```r(   )NNN)r$   r%   r&   r   r7   r4   r   rX   r   r   rJ   r   r   r   rF   rG   rH   s   @r)   r   r     s       ZZ Z Z Z Z Z Z, _%0A6RRR :>5915   <  ""56  !!12	 
 !.      SR D _%0A6RRR :>5915$ $<$ ""56$ !!12	$
 !.$ $ $ SR$L _%0A6RRR :>5915	a 	a|	a ""56	a !!12		a
 !.	a 	a 	a SR	a 	a 	a 	a 	ar(   r   c                       e Zd Zdedef fdZ eddd          	 	 	 	 dd	ej        d
e	ej        ej        f         de
ej                 de
ej                 de
e         de
ej                 dej        fd            Z xZS )Lfm2DecoderLayerr/   rc   c                    t                                                       |j        |         dk    | _        | j        rt	          ||          | _        nt          ||          | _        t          |          | _	        t          |j        |j                  | _        t          |j        |j                  | _        d S )NrO   r   )r3   r4   rR   is_attention_layerr   	self_attnr   r   r.   feed_forwardr"   r;   r   operator_normffn_normr   s      r)   r4   zLfm2DecoderLayer.__init__  s    "("4Y"?CS"S" 	9*69==DNN%fi88DI#FOO();QQQ#F$6FOLLLr(   r   r   r   r   Nr   r   r   position_idsrx   re   c           
      &   |}| j         r+ | j        d|                     |          |||||d|\  }}	n,|                     |                     |          |||          }||z   }||                     |                     |                    z   }|S )N)r   r   r   r   r   rx   )r   r   rx   r   r'   )r   r   r   r   r   r   )
r?   r   r   r   r   r   rx   r   residualr_   s
             r)   rF   zLfm2DecoderLayer.forward  s     !" 	-t~  "00??$7-) /-       M11 !II"00?? /--	 &  M &0%(9(9$--:V:V(W(WWr(   )NNNN)r$   r%   r&   r   r7   r4   r   rX   r   r   r   r   rJ   rF   rG   rH   s   @r)   r   r     s       
Mz 
Mc 
M 
M 
M 
M 
M 
M _%0A6RRR
 26379=59 | #5<#=> !.	
 u/0 ""56 !!12 
   SR    r(   r   c                       e Zd ZdZdS )Lfm2PreTrainedModelFN)r$   r%   r&   _can_compile_fullgraphr'   r(   r)   r   r     s        "r(   r   c                        e Zd Zdef fdZ	 	 	 	 	 	 	 ddeej                 deej                 deej                 dee	         deej
                 d	ee         d
eej                 dee         defdZ xZS )	Lfm2Modelr/   c                     t                                          |           t          |          | _        t	          |j        |j                  | _        | `| `	d S )Nr   )
r3   r4   r,   pos_embr"   r;   r   embedding_normnorm
rotary_emv)r?   r/   r@   s     r)   r4   zLfm2Model.__init__  sR       *622)&*<&/RRRIOOOr(   N	input_idsr   r   r   inputs_embeds	use_cacherx   r   re   c           
         |d u |d uz  rt          d          ||                     |          }|r1|/|j        d         }	t          | j        |	| j        | j                  }|B||                                nd}
t          j	        |
|
|j        d         z   |j                  }||
                    d          }t          | j        |||||          }|}|                     ||          }| j        d | j        j                 D ]} ||f|||||d|}|                     |          }t!          ||          S )	Nz:You must specify exactly one of input_ids or inputs_embedsr   )r/   rK   rL   rM   r   )rM   )r/   input_embedsr   rx   r   r   )r   r   r   rx   r   )last_hidden_stater   )
ValueErrorembed_tokensru   rJ   r/   rL   rM   rw   rX   aranger   r	   r   layersrZ   r   r   )r?   r  r   r   r   r  r  rx   r   
batch_sizer|   causal_maskr   r   decoder_layers                  r)   rF   zLfm2Model.forward  s    -t";< 	[YZZZ  --i88M 	0&,Q/J1{:TZX\Xc  O !CRC^==???de"\ "2]5H5K"KTaTh  N )33A66L(;&))+%
 
 
 &"ll=,GG "[)H4;+H)HI 		 		M)M*) /-$7   MM ++M::&++
 
 
 	
r(   )NNNNNNN)r$   r%   r&   r   r4   r   rX   r   r   rJ   FloatTensorboolr   r   r   rF   rG   rH   s   @r)   r   r     s       z       1515379=59$(59=
 =
E,-=
 !.=
 u/0	=

 ""56=
   12=
 D>=
 !!12=
 +,=
 
!=
 =
 =
 =
 =
 =
 =
 =
r(   r   c                       e Zd ZdS )Lfm2ForCausalLMNr#   r'   r(   r)   r  r    r*   r(   r  )r  r   r   )=typingr   r   r   r   rX   torch.nn.functionalr   r   rC   masking_utilsr	   modeling_layersr
   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   utils.deprecationr   utils.import_utilsr   bamba.modeling_bambar   llama.modeling_llamar   r   r   r   r   r   r   r   configuration_lfm2r   causal_conv1dr   r   kernel_modulesallr   
get_loggerr$   loggerr"   r,   Moduler.   rJ   r   r   r   r   r   r  __all__r'   r(   r)   <module>r(     s   2 1 1 1 1 1 1 1 1 1 1 1                 / / / / / / 9 9 9 9 9 9 7 7 7 7 7 7 5 5 5 5 5 5 & & & & & & 0 0 0 0 0 0 0 0 0 0 0 0 0 0 < < < < < < ? ? ? ? ? ?	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 + * * * * *  8DDDDDDDDD-7** #$89^,,  
	H	%	%	 	 	 	 	, 	 	 		 	 	 	 	. 	 	 	8 8 8 8 8bi 8 8 8(M/ M/ M/ M/ M/ M/ M/ M/`4$ 4$ 4$ 4$ 4$N 4$ 4$ 4$nka ka ka ka kaBI ka ka ka\- - - - -1 - - -`# # # # #. # # #E
 E
 E
 E
 E

 E
 E
 E
P	 	 	 	 	& 	 	 	 B
A
Ar(   