
     `i0                     <   d dl mZmZmZ d dlZd dlmZ ddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZmZ ddlmZmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0m1Z1  e)j2        e3          Z4 G d dej5                  Z6 G d dej5                  Z7 G d dej5                  Z8d Z9dNdZ:dej;        de<dej;        fdZ=	 	 	 dOd ej5        d!ej;        d"ej;        d#ej;        d$eej;                 d%e>d&ee>         d'ee>         de?ej;        ej;        f         fd(Z@ G d) d*ej5                  ZA G d+ d,ej5                  ZB G d- d.e          ZC G d/ d0eC          ZD G d1 d2ej5                  ZE G d3 d4ej5                  ZF G d5 d6ej5                  ZGe' G d7 d8e"                      ZHd$eej;                 defd9ZId:e<defd;ZJd<eejK                 dej;        d=ee<         dej;        fd>ZL G d? d@eH          ZM G dA dBeM          ZNe' G dC dDeH                      ZOe' G dE dFeH                      ZP G dG dHeHe          ZQe' G dI dJeH                      ZRe' G dK dLeH                      ZSg dMZTdS )P    )CallableOptionalUnionN   )ACT2FN)CacheDynamicCacheEncoderDecoderCache)GenerationMixin)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GradientCheckpointingLayer)BaseModelOutput)BaseModelOutputWithPastAndCrossAttentionsSeq2SeqLMOutputSeq2SeqModelOutputSequenceClassifierOutputTokenClassifierOutput)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuplelogging)deprecate_kwarg)OutputRecordercheck_model_inputs   )T5GemmaConfigT5GemmaModuleConfigc                   <     e Zd Zddedef fdZd Zd Zd Z xZ	S )	T5GemmaRMSNormư>dimepsc                     t                                                       || _        t          j        t          j        |                    | _        d S N)super__init__r)   nn	Parametertorchzerosweight)selfr(   r)   	__class__s      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/t5gemma/modeling_t5gemma.pyr-   zT5GemmaRMSNorm.__init__6   s?    l5;s#3#344    c                     |t          j        |                    d                              dd          | j        z             z  S )N   T)keepdim)r0   rsqrtpowmeanr)   )r3   xs     r5   _normzT5GemmaRMSNorm._norm;   s8    5;quuQxx}}R}>>IJJJJr6   c                     |                      |                                          }|d| j                                        z   z  }|                    |          S )Ng      ?)r?   floatr2   type_as)r3   r>   outputs      r5   forwardzT5GemmaRMSNorm.forward>   sL    AGGII&& 3!2!2!4!445~~a   r6   c                 H    t          | j        j                   d| j         S )Nz, eps=)tupler2   shaper)   r3   s    r5   
extra_reprzT5GemmaRMSNorm.extra_reprE   s%    )**<<$(<<<r6   )r'   )
__name__
__module____qualname__intrA   r-   r?   rD   rI   __classcell__r4   s   @r5   r&   r&   5   s        5 5C 5e 5 5 5 5 5 5
K K K! ! != = = = = = =r6   r&   c                   $     e Zd Z fdZd Z xZS )
T5GemmaMLPc                    t                                                       || _        |j        | _        |j        | _        t          j        | j        | j        d          | _        t          j        | j        | j        d          | _        t          j        | j        | j        d          | _	        t          |j                 | _        t          j        |j                  | _        d S )NFbias)r,   r-   confighidden_sizeintermediate_sizer.   Linear	gate_projup_proj	down_projr   hidden_activationact_fnDropoutdropout_ratedropoutr3   rU   r4   s     r5   r-   zT5GemmaMLP.__init__J   s    !-!'!94#3T5KRWXXXy!143IPUVVV4#94;KRWXXXV56z&"566r6   c                     |                      |                     |                    |                     |          z  }|                     |          }|                     |          }|S r+   )r]   rY   rZ   r`   r[   )r3   r>   hidden_statesr[   s       r5   rD   zT5GemmaMLP.forwardU   sV    DNN1$5$566aH]33NN=11	r6   )rJ   rK   rL   r-   rD   rN   rO   s   @r5   rQ   rQ   I   sG        	7 	7 	7 	7 	7      r6   rQ   c                   v     e Zd ZU ej        ed<   d fd	Z ej                    ed                         Z	 xZ
S )T5GemmaRotaryEmbeddinginv_freqNc                    t                                                       t          |d          rSt          |j        t
                    r9|j                            d|j                            d                    | _        nd| _        |j        | _	        |j        | _
        || _        t          | j                 | _        |                     | j        |          \  }| _        |                     d|d           | j        | _        d S )Nrope_scaling	rope_typetypedefaultrf   F)
persistent)r,   r-   hasattr
isinstancerh   dictgetri   max_position_embeddingsmax_seq_len_cachedoriginal_max_seq_lenrU   r   rope_init_fnattention_scalingregister_bufferrf   original_inv_freq)r3   rU   devicerf   r4   s       r5   r-   zT5GemmaRotaryEmbedding.__init___   s    6>** 	'z&:Mt/T/T 	'#044[&BUBYBYZ`BaBabbDNN&DN"("@$*$B!/?+/+<+<T[&+Q+Q($(ZeDDD!%r6   c                 X   | j         d d d d f                                                             |j        d         dd                              |j                  }|d d d d d f                                         }t          |j        j        t                    r|j        j        dk    r|j        j        nd}t          j
        |d          5  |                                |                                z                      dd          }t          j        ||fd	          }|                                | j        z  }|                                | j        z  }	d d d            n# 1 swxY w Y   |                    |j        
          |	                    |j        
          fS )Nr   r9   r"   mpscpuF)device_typeenabledr8   r(   dtype)rf   rA   expandrG   torx   rn   rj   strr0   autocast	transposecatcosru   sinr   )
r3   r>   position_idsinv_freq_expandedposition_ids_expandedr|   freqsembr   r   s
             r5   rD   zT5GemmaRotaryEmbedding.forwardp   s    !M$4-8>>@@GGHZ[\H]_acdeehhijiqrr ,QQQaaaZ 8 > > @ @'1!(-'E'Ek!(-[`J`J`ahmmfk^UCCC 	5 	5&,,..1F1L1L1N1NNYYZ[]^__E)UEN333C''))d44C''))d44C		5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 vvAGv$$cff17f&;&;;;s   BE++E/2E/r+   )rJ   rK   rL   r0   Tensor__annotations__r-   no_gradr   rD   rN   rO   s   @r5   re   re   \   s{         l/ / / / / /" U]__< <  _< < < < <r6   re   c                     | dd| j         d         dz  f         }| d| j         d         dz  df         }t          j        | |fd          S )z*Rotates half the hidden dims of the input..Nr9   r8   r~   )rG   r0   r   )r>   x1x2s      r5   rotate_halfr      s]    	
3"!'"+"""	#B	
3q """	#B9rc2YB''''r6   c                     |                     |          }|                     |          }| |z  t          |           |z  z   }||z  t          |          |z  z   }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )	unsqueezer   )qkr   r   r   unsqueeze_dimq_embedk_embeds           r5   apply_rotary_pos_embr      sc    ( --
&
&C
--
&
&C3w;q>>C/0G3w;q>>C/0GGr6   rc   n_repreturnc                     | j         \  }}}}|dk    r| S | dddddddddf                             |||||          } |                     |||z  ||          S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r"   N)rG   r   reshape)rc   r   batchnum_key_value_headsslenhead_dims         r5   	repeat_kvr      s    
 2?1D.Ehzz!!!!QQQaaa"23::5BUW\^bdlmmM  (;e(CT8TTTr6           modulequerykeyvalueattention_maskr`   scalingsoftcapc                    |
| j         dz  }t          || j                  }	t          || j                  }
t          j        ||	                    dd                    |z  }|||z  }t          j        |          }||z  }|$|d d d d d d d |	j        d         f         }||z   }t          j	        
                    |dt          j                                      |j                  }t          j	                            ||| j                  }t          j        ||
          }|                    dd                                          }||fS )	N      r8   r   r9   )r(   r   )ptrainingr"   )r   r   num_key_value_groupsr0   matmulr   tanhrG   r.   
functionalsoftmaxfloat32r   r   r`   r   
contiguous)r   r   r   r   r   r`   r   r   kwargs
key_statesvalue_statesattn_weightscausal_maskattn_outputs                 r5   eager_attention_forwardr      sR    /4'3 ;<<JUF$?@@L<z';';Aq'A'ABBWLL#g-z,//#g-!$QQQ111.D
0@0D.D%DE#k1 =((2U](SSVVW\WbccL=((6?([[L,|\::K''1--88::K$$r6   c                   D    e Zd ZdZdedef fdZ eddd          	 	 dd
ej	        de
ej	        ej	        f         deej	                 dee         deej                 dee         de
ej	        eej	                 ee
ej	                          f         fd            Z xZS )T5GemmaSelfAttention=Multi-headed attention from 'Attention Is All You Need' paperrU   	layer_idxc                 "   t                                                       || _        || _        t	          |d|j        |j        z            | _        |j        |j        z  | _	        |j
        dz  | _        | j        j        | _        |j        | _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        | j        z  |j        |j                  | _        | j        j        | _        |j        |         dk    r|j        nd | _        d S )Nr   r   rS   sliding_attention)r,   r-   rU   r   getattrrV   num_attention_headsr   r   r   query_pre_attn_scalarr   attention_dropout
is_decoder	is_causalr.   rX   attention_biasq_projk_projv_projo_projattn_logit_softcappinglayer_typessliding_windowr3   rU   r   r4   s      r5   r-   zT5GemmaSelfAttention.__init__   s}   "
F4F&Jd4dee$*$>&B\$\!3T9!%!>*i :T] JQWQf
 
 
 i :T] JQWQf
 
 
 i :T] JQWQf
 
 
 i&68JQWQf
 
 
 '+k&H#7=7I)7TXk7k7kf33qur6   past_key_valuepast_key_values4.58new_nameversionNrc   position_embeddingsr   cache_positionr   r   c                 \   |j         d d         }g |d| j        R }|                     |                              |                              dd          }	|                     |                              |                              dd          }
|                     |                              |                              dd          }|\  }}t          |	|
||          \  }	}
|&|||d}|                    |
|| j	        |          \  }
}t          }| j        j        dk    rt          | j        j                 } || |	|
||f| j        r| j        nd| j        | j        | j        d|\  }} |j        g |dR                                  }|                     |          }||fS Nr9   r"   r8   )r   r   r   eagerr   r`   r   r   r   rG   r   r   viewr   r   r   r   updater   r   rU   _attn_implementationr   r   r   r   r   r   r   r   r   r3   rc   r   r   r   r   r   input_shapehidden_shapequery_statesr   r   r   r   cache_kwargsattention_interfacer   r   s                     r5   rD   zT5GemmaSelfAttention.forward       $)#2#.88b8$-88{{=1166|DDNNqRSTT[[//44\BBLLQPQRR
{{=1166|DDNNqRSTT&S#7jRUWZ#[#[ j&#&snUUL'6'='=j,X\Xfht'u'u$J(?;+w66"9$+:Z"[$7$7%
 /3mDD**L./%
 %
 %
 %
!\ *k);;;;;;FFHHkk+..L((r6   NN)rJ   rK   rL   __doc__r$   rM   r-   r   r0   r   rF   r   r   
LongTensorr   r   rD   rN   rO   s   @r5   r   r      s*       GGv2 vs v v v v v v4 _%0A6RRR ,059+) +)|+) #5<#=>+) !.	+)
 "%+) !!12+) -.+) 
u|Xel3XeEL>Q5RR	S+) +) +) SR+) +) +) +) +)r6   r   c                       e Zd ZdZdedef fdZ eddd          	 dd
ej	        de
ej	                 de
ej	                 de
e         dee         deej	        e
ej	                 e
eej	                          f         fd            Z xZS )T5GemmaCrossAttentionr   rU   r   c                    t                                                       || _        || _        t	          |d|j        |j        z            | _        |j        |j        z  | _	        |j
        dz  | _        | j        j        | _        d| _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        | j        z  |j        |j                  | _        | j        j        | _        |j        t/          d          d S )Nr   r   FrS   zBCross-attention needs cross_attention_hidden_size to be specified.)r,   r-   rU   r   r   rV   r   r   r   r   r   r   r   r   r.   rX   r   r   cross_attention_hidden_sizer   r   r   r   
ValueErrorr   s      r5   r-   zT5GemmaCrossAttention.__init__   sp   "
F4F&Jd4dee$*$>&B\$\!3T9!%!>i :T] JQWQf
 
 
 i.0JT]0Zagav
 
 
 i.0JT]0Zagav
 
 
 i&68JQWQf
 
 
 '+k&H#-5abbb 65r6   r   r   r   r   Nrc   r   encoder_hidden_statesr   r   c                 @   |t          d          |j        d d         }g |d| j        R }|                     |                              |                              dd          }|&|j                            | j                  }	|j	        }
||	s|j        d d         }g |d| j        R }| 
                    |                              |                              dd          }|                     |                              |                              dd          }|.|
                    ||| j                  \  }}d|j        | j        <   n.|
j        | j                 j        }|
j        | j                 j        }t           }| j        j        dk    rt&          | j        j                 } || ||||f| j        r| j        nd| j        d | j        d|\  }} |j        g |dR                                  }|                     |          }||fS )	Nz5Encoder hidden state is required for cross attention.r9   r"   r8   Tr   r   r   )r   rG   r   r   r   r   
is_updatedrp   r   cross_attention_cacher   r   r   layerskeysvaluesr   rU   r   r   r   r   r   r   r   r   r   )r3   rc   r   r   r   r   r   r   r   r   curr_past_key_valueencoder_input_shapeencoder_hidden_shaper   r   r   r   r   s                     r5   rD   zT5GemmaCrossAttention.forward<  s\    !(TUUU#)#2#.88b8$-88{{=1166|DDNNqRSTT&(377GGJ"1"G"*""7"=crc"B#L%8#L"#Ldm#L#L %:;;@@AUVV``abdeffJ;;'<==BBCWXXbbcdfghhL*+>+E+EjR^`d`n+o+o(
L=A*4>:,3DNCHJ.5dnELL(?;+w66"9$+:Z"[$7$7%
 /3mDD**L/%
 %
 %
 %
!\ *k);;;;;;FFHHkk+..L((r6   r+   )rJ   rK   rL   r   r$   rM   r-   r   r0   r   r   r   r   r   rF   rD   rN   rO   s   @r5   r   r     s       GGc2 cs c c c c c c8 _%0A6RRR ,03) 3)|3) !.3)  (5	3)
 "%3) -.3) 
u|Xel3XeEL>Q5RR	S3) 3) 3) SR3) 3) 3) 3) 3)r6   r   c                        e Zd ZdZdef fdZ	 	 ddej        deej        ej        f         de	ej                 de	ej
                 d	eej        f         f
d
Z xZS )T5GemmaEncoderLayerzEncoder sub-layer.r   c                 0   t                                                       |j        | _        || _        || _        |j        |         | _        t          ||          | _        t          |j        |j
                  | _        t          |j        |j
                  | _        t          |          | _        t          |j        |j
                  | _        t          |j        |j
                  | _        t#          j        |j                  | _        d S N)rU   r   r)   )r,   r-   rV   rU   r   r   attention_typer   	self_attnr&   rms_norm_epspre_self_attn_layernormpost_self_attn_layernormrQ   mlppre_feedforward_layernormpost_feedforward_layernormr.   r^   r_   r`   r   s      r5   r-   zT5GemmaEncoderLayer.__init__v  s    !-"$0;-
 
 
 (6f6HfNa'b'b'b$(6v7IvOb(c(c(c%f%%)78JPVPc)d)d)d&*89KQWQd*e*e*e'z&"566r6   Nrc   r   r   r   r   c           	      l   |}|                      |          } | j        d||||d d|\  }}|                     |          }||                     |          z   }|}|                     |          }|                     |          }|                     |          }||                     |          z   }|S )N)rc   r   r   r   r    )r  r  r  r`   r
  r	  r  )r3   rc   r   r   r   r   residual_s           r5   rD   zT5GemmaEncoderLayer.forward  s     !44]CC)4> 
' 3)% 
 
 
 
q 55mDD 4<<#>#>> 66}EE//77FF 4<<#>#>>r6   r   )rJ   rK   rL   r   rM   r-   r0   r   rF   r   r   FloatTensorrD   rN   rO   s   @r5   r   r   s  s        7# 7 7 7 7 7 70 2637 | #5<#=> !.	
 u/0 
u !	"       r6   r   c                   N    e Zd ZdZdef fdZ eddd          	 	 	 	 	 	 	 dd
ej        de	ej        ej        f         de
ej                 de
ej                 de
e         de
e         de
ej                 de
ej                 de
ej                 dej        fd            Z xZS )T5GemmaDecoderLayerz2Decoder sub-layer: an extra cross-attention layer.r   c                     t                                          ||           t          ||          | _        t	          |j        |j                  | _        t	          |j        |j                  | _        d S r  )	r,   r-   r   
cross_attnr&   rV   r  pre_cross_attn_layernormpost_cross_attn_layernormr   s      r5   r-   zT5GemmaDecoderLayer.__init__  sn    +++/vSSS(6v7IvOb(c(c(c%)78JPVPc)d)d)d&&&r6   r   r   r   r   NFrc   r   r   r   	use_cacher   r   encoder_attention_maskr   c
                 6   |}|                      |          } | j        d||||||j        nd ||d|
\  }}|                     |          }||                     |          z   }|}|                     |          } | j        d|||	||d|
\  }}|                     |          }||                     |          z   }|}|                     |          }| 	                    |          }| 
                    |          }||                     |          z   }|S )N)rc   r   r   r   r   r  r   )rc   r   r   r   r  r  )r  r  self_attention_cacher  r`   r  r  r  r
  r	  r  )r3   rc   r   r   r   r   r  r   r   r  r   r  r  s                r5   rD   zT5GemmaDecoderLayer.forward  sl    !44]CC)4> 	
' 3)%DSD_O@@ei)	
 	
 	
 	
q 55mDD 4<<#>#>> 55mDD*4? 
'"71+
 
 
 
q 66}EE 4<<#>#>> 66}EE//77FF 4<<#>#>>r6   )NNNFNNN)rJ   rK   rL   r   rM   r-   r   r0   r   rF   r   r   r
   boolr  rD   rN   rO   s   @r5   r  r    sF       <<e# e e e e e e _%0A6RRR
 26379=$)598<9=. .|. #5<#=>. !.	.
 u/0. ""56. D>. !!12.  (5. !) 6. 
	. . . SR. . . . .r6   r  c                   V     e Zd ZdZd
dededef fdZdej        dej        fd	Z	 xZ
S )T5GemmaClassificationHeadz-Head for sentence-level classification tasks.r   rV   
num_labelsclassifier_dropout_ratec                     t                                                       t          j        |          | _        t          j        ||          | _        d S )N)r   )r,   r-   r.   r^   r`   rX   out_proj)r3   rV   r  r  r4   s       r5   r-   z"T5GemmaClassificationHead.__init__  sE    z$;<<<	+z::r6   rc   r   c                 Z    |                      |          }|                     |          }|S r+   )r`   r!  )r3   rc   s     r5   rD   z!T5GemmaClassificationHead.forward  s*    ]33m44r6   )r   )rJ   rK   rL   r   rM   rA   r-   r0   r   rD   rN   rO   s   @r5   r  r    s        77; ;C ;S ;SX ; ; ; ; ; ;
U\ el        r6   r  c                   V     e Zd ZdZd
dededef fdZdej        dej        fd	Z	 xZ
S )T5GemmaLMHeadz.Head for language modeling (generation) tasks.FrV   
vocab_sizerT   c                     t                                                       t          j        |||          | _        d S )NrS   )r,   r-   r.   rX   r!  )r3   rV   r%  rT   r4   s       r5   r-   zT5GemmaLMHead.__init__  s5    	+zEEEr6   rc   r   c                 0    |                      |          }|S r+   )r!  )r3   rc   logitss      r5   rD   zT5GemmaLMHead.forward  s    }--r6   )F)rJ   rK   rL   r   rM   r  r-   r0   r   rD   rN   rO   s   @r5   r$  r$    s        88F FC FS F F F F F F FU\ el        r6   r$  c                   D    e Zd ZdZdedef fdZ eddd          	 	 dd
ej	        de
ej	        ej	        f         deej	                 dee         deej                 dee         de
ej	        eej	                 ee
ej	                          f         fd            Z xZS )T5GemmaAttentionr   rU   r   c                    t                                                       || _        || _        t	          |d|j        |j        z            | _        |j        |j        z  | _	        |j
        dz  | _        | j        j        | _        d| _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        | j        z  |j        |j                  | _        | j        j        | _        |j        |         dk    r|j        nd | _        d S )Nr   r   TrS   r   )r,   r-   rU   r   r   rV   r   r   r   r   r   r   r   r   r.   rX   r   r   r   r   r   r   r   r   r   s      r5   r-   zT5GemmaAttention.__init__  sz   "
F4F&Jd4dee$*$>&B\$\!3T9!%!>i :T] JQWQf
 
 
 i :T] JQWQf
 
 
 i :T] JQWQf
 
 
 i&68JQWQf
 
 
 '+k&H#7=7I)7TXk7k7kf33qur6   r   r   r   r   Nrc   r   r   r   r   r   c                 \   |j         d d         }g |d| j        R }|                     |                              |                              dd          }	|                     |                              |                              dd          }
|                     |                              |                              dd          }|\  }}t          |	|
||          \  }	}
|&|||d}|                    |
|| j	        |          \  }
}t          }| j        j        dk    rt          | j        j                 } || |	|
||f| j        r| j        nd| j        | j        | j        d|\  }} |j        g |dR                                  }|                     |          }||fS r   r   r   s                     r5   rD   zT5GemmaAttention.forward  r   r6   r   )rJ   rK   rL   r   r#   rM   r-   r   r0   r   rF   r   r   r   r   r   rD   rN   rO   s   @r5   r*  r*    s)       GGv} v v v v v v v2 _%0A6RRR ,059+) +)|+) #5<#=>+) !.	+)
 "%+) !!12+) -.+) 
u|Xel3XeEL>Q5RR	S+) +) +) SR+) +) +) +) +)r6   r*  c                   d     e Zd ZU eed<   dZdZddgZdgZdZ	dZ
dZdZdZeedZ fdZd	 Z xZS )
T5GemmaPreTrainedModelrU   modelTr   r  r   )rc   
attentionsc                    t                                          |           | j        j        }t	          |t
                    r|j        j        j        d         dz  }|j        j        j	        
                    d||z             t          |j        d          r1|j        j        '|j        j        j	                                         d S d S d S t	          |t                    rS| j        j        sE|j        j        j        d         dz  }|j        j        j	        
                    d||z             d S d S d|j        j        v r |j        j	                                         d S d S )Nr   r   r   )r=   stdrT   RMSNorm)r,   _init_weightsrU   initializer_rangern   r  r!  r2   rG   datanormal_rm   rT   zero_r$  tie_word_embeddingsr4   rJ   )r3   r   r2  scaler4   s       r5   r4  z$T5GemmaPreTrainedModel._init_weightsY  sh   f%%%k+f788 	'O*03t;EO"'//ScEk/JJJv// 2FO4H4T$)//111112 24T4T.. 	';2 O.4Q74?&+33#+3NNNNNO O &*333M$$&&&&& 43r6   c                 J   | j         j        j        }| j         j        j        }|t	          d          |                    |j                  }|dddf                                         |dddf<   ||d<   |t	          d          |                    |dk    |           |S )	z
        Shifts input_ids to the right, prepends the decoder_start_token_id, and handles
        pad_token_id replacement for labels that were -100.
        This is a common preparation step for decoder inputs in sequence-to-sequence models.
        Nz:self.model.config.decoder.bos_token_id has to be defined. .r9   r"   ).r   z9self.model.config.decoder.pad_token_id has to be defined.i)	rU   decoderbos_token_idpad_token_idr   	new_zerosrG   clonemasked_fill_)r3   	input_idsdecoder_start_token_idr>  shifted_input_idss        r5   _shift_rightz#T5GemmaPreTrainedModel._shift_rightj  s     "&!4!A{*7!)YZZZ &//	@@%.sCRCx%8%>%>%@%@#qrr'"$:&!XYYY 	&&'8D'@,OOO  r6   )rJ   rK   rL   r#   r   base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_flash_attn_supports_sdpa_supports_flex_attn_can_compile_fullgraph_supports_attention_backendr  r*  _can_record_outputsr4  rE  rN   rO   s   @r5   r.  r.  G  s         &*#.0EF#4"5N!"&,& 
' ' ' ' '"! ! ! ! ! ! !r6   r.  c           
      Z     dt           dt           dt           dt           dt          f
 fd}|S )z4
    This creates bidirectional attention mask.
    	batch_idxhead_idxq_idxkv_idxr   c                      t          j        dt           j                  S | |f                             t           j                  S )Nr  r   )r0   onesr  r   )rQ  rR  rS  rT  r   s       r5   
inner_maskz/bidirectional_mask_function.<locals>.inner_mask  s@    !:b
3333i/033EJ???r6   rM   r  )r   rW  s   ` r5   bidirectional_mask_functionrY    sW    
@c @S @ @c @d @ @ @ @ @ @
 r6   r   c           
      Z     dt           dt           dt           dt           dt          f
 fd}|S )zH
    This creates bidirectional attention mask with sliding window.
    rQ  rR  rS  rT  r   c                 *    |z
  |k     ||z   k     z  S r+   r  )rQ  rR  rS  rT  r   s       r5   rW  z>sliding_window_bidirectional_mask_function.<locals>.inner_mask  s"    &/FU^=S4STTr6   rX  )r   rW  s   ` r5   *sliding_window_bidirectional_mask_functionr\    sW    
Uc US U Uc Ud U U U U U U r6   	token_idsr>  c                     | ;|t          d          | |k                        |j        t          j                  }n>t          j        |j        d         |j        d         f|j        t          j                  }|S )z%Construct the default attention mask.Nz3`pad_token_id` is required for padding information.r   r"   rx   r   )r   r   rx   r0   longrV  rG   )r]  rc   r>  r   s       r5   make_default_2d_attention_maskra    s     RSSS#|3778LejYY #]%8%;<]EYafak
 
 
 r6   c                        e Zd ZeedZ fdZe	 	 	 	 ddee	j
                 dee	j                 dee	j
                 dee	j                 dee         d	efd
            Z xZS )T5GemmaEncoder)r0  rc   c                    t                                                     j        | _        j        | _        t          j        j        j        | j                  | _        t          j        j
                  | _        t                    | _        d| _        t          j        fdt!          j                  D                       | _        t          j        j                  | _        |                                  d S )Nr  rU   Fc                 0    g | ]}t          |          S r  )r   .0r   rU   s     r5   
<listcomp>z+T5GemmaEncoder.__init__.<locals>.<listcomp>  $    eee	 33eeer6   )r,   r-   r>  padding_idxr%  r.   	EmbeddingrV   embed_tokensr&   r  normre   
rotary_embgradient_checkpointing
ModuleListrangenum_hidden_layersr   r^   r_   r`   	post_initra   s    `r5   r-   zT5GemmaEncoder.__init__  s       !. +L):F<NPTP`aa"6#56;NOOO	0???&+#meeeeU6KcEdEdeee
 
 z&"566 	r6   NrB  r   r   inputs_embedsr   r   c           	         |d u |d uz  rt          d          |                    dd            ||                     |          }t          j        d|j        d         |j                  }||                    d          }|t          ||| j	        j
                  }t          |x}t                    sa| j	        |||d |d}t          di |dt          |          it          di |t!          | j	        j                  t          |          dd	}|}	|                     |	|          }
t          j        | j	        j        d
z  |	j                  }|	|z  }	|                     |	          }	| j        d | j	        j                 D ]} ||	|
||j                 |fi |}	|                     |	          }	|                     |	          }	t7          |	          S )N:You must specify exactly one of input_ids or inputs_embedsr   r   r"   rx   rU   input_embedsr   r   r   r   or_mask_function)r{  and_mask_functionfull_attentionr         ?r   )last_hidden_stater  )r   poprm  r0   arangerG   rx   r   ra  rU   r>  rn   ro   r   rY  r   r\  r   ro  tensorrV   r   r`   r   rs  r  rn  r   )r3   rB  r   r   ru  r   r   self_attn_mask_mappingmask_kwargsrc   r   
normalizerlayer_modules                r5   rD   zT5GemmaEncoder.forward  sc    -t";< 	[YZZZ 	

$d+++  --i88Ma)<Q)?H\]]])33A66L!;I}VZVaVnooNNB0DII 	+ -"0"0#' , K #5 # #!# #%@%P%P# # # &G & &!&%OPTP[Pj%k%k&A.&Q&Q& & & &
& 
&" &"oom\JJ\$+"93">mFYZZZ
%
2]33 K(G$+*G(GH 	 	L(L#&|'BC	 
  MM 		-00]33+
 
 
 	
r6   NNNN)rJ   rK   rL   r   r   rO  r-   r!   r   r0   r   r   r  r   r   r   rD   rN   rO   s   @r5   rc  rc    s        *, 
    $  15153759A
 A
E,-A
 !.A
 u/0	A

   12A
 +,A
 
A
 A
 A
 A
 A
 A
 A
 A
r6   rc  c                   l    e Zd Z eed           eed          edZ fdZe		 	 	 	 	 	 	 	 	 dde
ej                 de
ej                 de
ej                 d	e
e         d
e
ej                 de
e         de
ej                 de
ej                 de
ej                 dee         defd            Z xZS )T5GemmaDecoderr"   )index)r0  cross_attentionsrc   c                     t                                                     t          j        fdt	          j                  D                       | _        |                                  d S )Nc                 0    g | ]}t          |          S r  )r  rg  s     r5   ri  z+T5GemmaDecoder.__init__.<locals>.<listcomp>  rj  r6   )r,   r-   r.   rq  rr  rs  r   rt  ra   s    `r5   r-   zT5GemmaDecoder.__init__  si       meeeeU6KcEdEdeee
 
 	r6   NrB  r   r   r   ru  r  r   r   r  r   r   c
                    |d u |d uz  rt          d          |t          d          ||                     |          }| j        s:|r8|6t          t	          | j                  t	          | j                            }|B||                                nd}t          j        |||j	        d         z   |j
                  }||                    d          }||t          ||| j        j                  }t          |x}t                    s0| j        |||||j        nd |d}t#          di |t%          di |d}t          |	x}t                    s-| j        ||	|d d d}d	t#          di |d
t'          |	          ii}|}|                     ||          }t          j        | j        j        dz  |j                  }||z  }|                     |          }| j        d | j        j                 D ]$} |||||j                 ||||||d	         f	i |
}%|                     |          }|                     |          }t;          ||          S )Nrw  z0`encoder_hidden_states` must be given in decoderre  r   r"   rx  ry  r}  r~  r{  r  r   )r  r   r  )r   rm  r   r
   r	   rU   get_seq_lengthr0   r  rG   rx   r   ra  r>  rn   ro   r  r   r   rY  ro  r  rV   r   r`   r   rs  r  rn  r   )r3   rB  r   r   r   ru  r  r   r   r  r   past_seen_tokensr  r  cross_attn_mask_mappingrc   r   r  r  s                      r5   rD   zT5GemmaDecoder.forward  s%    -t";< 	[YZZZ (OPPP  --i88M} 	v 	v/F1,dk2R2R2RT`hlhsTtTtTtuuO!CRC^==???de"\ "2]5H5K"KTaTh  N )33A66L!o&=;I}VZVaVnooNNB0DII 	+ -"0"0KZKf?#G#Glp , K #5"C"C{"C"C%F%U%U%U%U& &"
 5KK1TRR 	+ 5"8"0#' $ K !"4 # #!# #%@AW%X%X# # #'# &"oom\JJ\$+"93">mFYZZZ
%
2]33 K(G$+*G(GH 	 	L(L#&|'BC%'(89   MM 		-00]338++
 
 
 	
r6   )	NNNNNNNNN)rJ   rK   rL   r    r   r   r  rO  r-   r!   r   r0   r   r   r
   r  r  r   r   r   rD   rN   rO   s   @r5   r  r    sv       $n%9CCC*N+@JJJ,       1515379=59$(598<9=Z
 Z
E,-Z
 !.Z
 u/0	Z

 ""56Z
   12Z
 D>Z
 !!12Z
  (5Z
 !) 6Z
 +,Z
 
3Z
 Z
 Z
 Z
 Z
 Z
 Z
 Z
r6   r  c                       e Zd Zdef fdZd Zd Zd Zee		 	 	 	 	 	 	 	 	 	 	 	 dde
ej                 de
ej                 d	e
ej                 d
e
ej                 de
ej                 de
ej                 de
e         de
e         de
ej                 de
ej                 de
e         de
ej                 dee         defd                        Z xZS )T5GemmaModelrU   c                    t                                          |           |j        st          d          t	          |j                  | _        t          |j                  | _        |                                  d S )NzVT5GemmaModel only support encoder-decoder modeling. Use `T5GemmaEncoderModel` instead.)	r,   r-   is_encoder_decoderr   rc  encoderr  r<  rt  ra   s     r5   r-   zT5GemmaModel.__init__z  sn       ( 	wuvvv%fn55%fn55r6   c                     | j         S r+   r  rH   s    r5   get_encoderzT5GemmaModel.get_encoder  s
    |r6   c                 4    | j                                         S r+   r  get_input_embeddingsrH   s    r5   r  z!T5GemmaModel.get_input_embeddings      |00222r6   c                 6    | j                             |          S r+   r  set_input_embeddingsr3   new_embeddingss     r5   r  z!T5GemmaModel.set_input_embeddings      |00@@@r6   NrB  r   r   decoder_input_idsdecoder_attention_maskdecoder_position_idsencoder_outputsr   ru  decoder_inputs_embedsr  r   r   r   c                     | | j         d||||	d|}|j        } | j        d||||
|||||d	|}t          |j        |j        |                    dd          r|j        n|j        f|j        |j        |j        |j        |j                  S )aX  
        decoder_position_ids (`torch.LongTensor` of shape `(batch_size, decoder_sequence_length)`, *optional*):
            Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the range `[0,
            config.decoder.n_positions - 1]`. [What are position IDs?](../glossary#position-ids)
        NrB  r   r   ru  )	rB  r   r   ru  r   r   r  r  r   output_hidden_statesF)r  r   decoder_hidden_statesdecoder_attentionsr  encoder_last_hidden_stater   encoder_attentionsr  )	r  r  r<  r   r   rp   rc   r0  r  )r3   rB  r   r   r  r  r  r  r   ru  r  r  r   r   r   decoder_outputss                   r5   rD   zT5GemmaModel.forward  s    . "*dl #-)+	 
  O !0 A&$, 
'1-/+"7#1)
 
 
 
 "-?+;zz0%88#6/"?"?!35.9,=&5&G"1"?.9
 
 
 	
r6   )NNNNNNNNNNNN)rJ   rK   rL   r#   r-   r  r  r  r   r   r   r0   r   r  
BoolTensorr   r
   r   r  r   r   r   rD   rN   rO   s   @r5   r  r  x  s       	} 	 	 	 	 	 	  3 3 3A A A  156:378<=A;?599=048<$(598
 8
E,-8
 !!238
 u/0	8

 $E$458
 !))9 :8
 'u'788
 "/28
 ""568
  -8
  (58
 D>8
 !!128
 +,8
 
8
 8
 8
 ^ 8
 8
 8
 8
 8
r6   r  c                        e Zd Zdef fdZd Zd Zee	 	 	 	 dde	e
j                 de	e
j                 de	e
j                 d	e	e
j                 d
ee         defd                        Z xZS )T5GemmaEncoderModelrU   c                     t                                          |           |j        rt          d          t	          |j                  | _        |                                  d S )NzQT5GemmaEncoderModel only supports encoder-only model. Use `T5GemmaModel` instead.)r,   r-   r  r   rc  r  rt  ra   s     r5   r-   zT5GemmaEncoderModel.__init__  s]       $ 	rpqqq%fn55r6   c                 4    | j                                         S r+   r  rH   s    r5   r  z(T5GemmaEncoderModel.get_input_embeddings  r  r6   c                 6    | j                             |          S r+   r  r  s     r5   r  z(T5GemmaEncoderModel.set_input_embeddings  r  r6   NrB  r   r   ru  r   r   c                 *     | j         d||||d|}|S )Nr  r  r  )r3   rB  r   r   ru  r   r  s          r5   rD   zT5GemmaEncoderModel.forward  s?     '$, 
)%'	
 

 
 
 r6   r  )rJ   rK   rL   r#   r-   r  r  r   r   r   r0   r   r  r   r   r   r   rD   rN   rO   s   @r5   r  r    s       }      3 3 3A A A  156:3704 E,- !!23 u/0	
  - +, 
   ^     r6   r  c            %       J    e Zd ZddgZddiZddgdgfiZdef fdZd	 Zd
 Z	d Z
d Zd Zee	 	 	 	 	 	 	 	 	 	 	 	 	 	 d"deej                 deej                 deej                 deej                 deej                 deej                 dee         dee         deej                 deej                 deej                 dee         deej                 deeej        f         dee         deeej                 ef         f d                         Zdej        fd!Z xZ S )#T5GemmaForConditionalGenerationz!model.decoder.embed_tokens.weightzlm_head.out_proj.weightzlm_head.out_projcolwise_reprc   r(  rU   c                     d|_         t                                          |           t          |          | _        |j        j        | _        t          |j        j        | j                  | _	        d| _
        |                                  d S )NTForMaskedLM)r  r,   r-   r  r/  r<  r%  r$  rV   lm_head	loss_typert  ra   s     r5   r-   z(T5GemmaForConditionalGeneration.__init__  ss    $(!   !&))
 .3$V^%?QQ&r6   c                     || j         _        d S r+   r  r!  r  s     r5   set_output_embeddingsz5T5GemmaForConditionalGeneration.set_output_embeddings   s     .r6   c                     | j         j        S r+   r  rH   s    r5   get_output_embeddingsz5T5GemmaForConditionalGeneration.get_output_embeddings  s    |$$r6   c                     | j         j        rF|                     | j        j        |                                                                            d S d S r+   )rU   r9  _tie_or_clone_weightsr  r!  get_decoderr  rH   s    r5   _tie_weightsz,T5GemmaForConditionalGeneration._tie_weights  sU    ;* 	i&&t|'<d>N>N>P>P>e>e>g>ghhhhh	i 	ir6   c                     | j         j        S r+   )r/  r  rH   s    r5   r  z+T5GemmaForConditionalGeneration.get_encoder      z!!r6   c                     | j         j        S r+   )r/  r<  rH   s    r5   r  z+T5GemmaForConditionalGeneration.get_decoder  r  r6   Nr   rB  r   r   r  r  r  r  r   ru  r  labelsr  r   logits_to_keepr   r   c                 F   |||
|                      |          } | j        d|||||||||	|
||d|}|j        }t          |t                    rt          | d          n|}|                     |dd|ddf                   }|                                 j        }|j	        (||j	        z  }t          j        |          }||j	        z  }d}| | j        ||| j        fi |}t          |||j        |j        |j        |j        |j        |j        |j        	  	        S )a  
        decoder_position_ids (`torch.LongTensor` of shape `(batch_size, decoder_sequence_length)`, *optional*):
            Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the range `[0,
            config.decoder.n_positions - 1]`. [What are position IDs?](../glossary#position-ids)
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
        N)rB  r   r   r  r  r  r  r   ru  r  r  r   )	lossr(  r   r  r  r  r  r   r  r  )rE  r/  r  rn   rM   slicer  r  rU   final_logit_softcappingr0   r   loss_functionr%  r   r   r  r  r  r  r   r  )r3   rB  r   r   r  r  r  r  r   ru  r  r  r  r   r  r   r  rc   slice_indicesr(  decoder_configr  s                         r5   rD   z'T5GemmaForConditionalGeneration.forward  s   < "3";@U@] $ 1 1& 9 9.8dj /
)%/#9!5++'"7)/
 /
 /
 /
  (98B>SV8W8Wk~ot444]kmAAA}aaa,?@AA))++21=nDDFZ''FnDDF%4%ffdoPPPPD+;"1"G.A,=&5&O"1"G.A

 

 

 
	
r6   c                 ,    |                      |          S r+   )rE  )r3   r  s     r5   %prepare_decoder_input_ids_from_labelszET5GemmaForConditionalGeneration.prepare_decoder_input_ids_from_labels^  s      (((r6   )NNNNNNNNNNNNNr   )!rJ   rK   rL   _tied_weights_keys_tp_plan_pp_planr#   r-   r  r  r  r  r  r   r   r   r0   r   r  r  r   r
   r  r   rM   r   r   r   rF   r   rD   r  rN   rO   s   @r5   r  r    sl       =?XY"M2H"o%6
$CDH	} 	 	 	 	 	 	/ / /% % %i i i
" " "" " "  156:378<=A;?599=59=A-1$(5934I
 I
E,-I
 !!23I
 u/0	I

 $E$45I
 !))9 :I
 'u'78I
 "/2I
 ""56I
   12I
  ((9:I
 )*I
 D>I
 !!12I
 c5</0I
  +,!I
" 
uU&'8	9#I
 I
 I
 ^ I
V)EL ) ) ) ) ) ) ) )r6   r  c                       e Zd Zddedee         f fdZd Zd Ze	e
	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 d	eej                 d
eej                 deej                 deej                 dee         deej                 deej                 deej                 dee         defd                        Z xZS ) T5GemmaForSequenceClassificationNrU   r  c                    |||_         t                                          |           |j        | _        |j         rt	          |          | _        nt          |          | _        |j        j        }|j         r|j	        j        }t          |dd          }t          || j        |          | _        |                                  dS )z
        is_encoder_decoder (`Optional`, *optional*):
            Whether use encoder_decoder for sequence classification. When set to False, only encoder is used.
        Nr  皙?r  r,   r-   r  r  r/  r  r  rV   r<  r   r  scorert  r3   rU   r  rV   classifier_dropoutr4   s        r5   r-   z)T5GemmaForSequenceClassification.__init__d  s    
 )(:F%    +$ 	5%f--DJJ,V44DJn0$ 	5 .4K$V-FLL.{DOM_``
r6   c                 4    | j                                         S r+   r/  r  rH   s    r5   r  z5T5GemmaForSequenceClassification.get_input_embeddings{      z..000r6   c                 :    | j                             |           d S r+   r/  r  r3   r   s     r5   r  z5T5GemmaForSequenceClassification.set_input_embeddings~      
''.....r6   rB  r   r   r  r  r  r  ru  r  r  r   r   c                    | j         j        r!||t          d| j        j         d          | j         j        r*|(|	&|t          d          |                     |          }| j         j        r. | j        |f||||||||	dd	|}|j        }|j	        }|j
        }n' | j        |f|||d|}|j        }|j        }|j        }|                     |          }||j        d         }n|j        d         }| j         j        |d	k    rt          d
          | j         j        d}n||| j         j        k                        |j        t$          j                  }t%          j        |j        d         |j        t$          j                  }||z                      d          }| j         j        r)|d	z  }t%          j        ||j        d         d	z
            }n)d}t.                              | j        j         d           |t%          j        ||j                  |f         }d}|
|                     ||
|| j                   }t5          ||||          S )  
        decoder_position_ids (`torch.LongTensor` of shape `(batch_size, decoder_sequence_length)`, *optional*):
            Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the range `[0,
            config.decoder.n_positions - 1]`. [What are position IDs?](../glossary#position-ids)
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        N8Passing input embeddings is currently not supported for  in encoder-decoder mode.If no `decoder_input_ids` or `decoder_inputs_embeds` are passed, `input_ids` cannot be `None`. Please pass either `input_ids` or `decoder_input_ids` or `decoder_inputs_embeds`.F	r   r   r  r  r  r  ru  r  r  r   r   ru  r   r"   z=Cannot handle batch sizes > 1 if no padding token is defined.r9   r_  )maxz will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`rx  )r(  r  pooled_logitsrU   r  r(  rc   r0  )rU   r  NotImplementedErrorr4   rJ   r   rE  r/  r  r  r  rc   r0  r  rG   r>  r   rx   r0   int32r  argmaxclamploggerwarning_oncer  r   )r3   rB  r   r   r  r  r  r  ru  r  r  r   outputsr  rc   r0  r(  
batch_sizelast_non_pad_tokennon_pad_masktoken_indicesr  r  s                          r5   rD   z(T5GemmaForSequenceClassification.forward  s   2 ;) 	y/@]E^%}4>Kb}}}  
 ;) 	=/@/HMbMj  U  
 !% 1 1) < <;) 	,*4$*+-)"3'=%9 /+&;+ + + +G !( 9#9M 3JJ'1tz(-)+	( (
 ( (G !( 9#1M +J-.. "+JJ&,Q/J;#+
a\]]];#+!#"%)AAEEfmUZU`aaL!L)<V]Z_ZefffM"/,">!F!Fr!J!J{- j"a'"%*[1CIZI`acIdghIh%i%i%i"!#>* Z Z Z  
 u|Jv}MMMOaab%%VFR_hlhs%ttD' '!	
 
 
 	
r6   r+   
NNNNNNNNNN)rJ   rK   rL   r#   r   r  r-   r  r  r   r   r0   r   r   r   r  r   r   r   rD   rN   rO   s   @r5   r  r  b  s        } (4.      .1 1 1/ / /  1515378<9=;?5959=A-1i
 i
E,-i
 !.i
 u/0	i

 $E$45i
 !) 6i
 'u'78i
 "/2i
   12i
  ((9:i
 )*i
 +,i
 
"i
 i
 i
 ^ i
 i
 i
 i
 i
r6   r  c                       e Zd Zddedee         f fdZd Zd Ze	e
	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 d	eej                 d
eej                 deej                 deej                 dee         deej                 deej                 deej                 dee         defd                        Z xZS )T5GemmaForTokenClassificationNrU   r  c                    |||_         t                                          |           |j        | _        |j         rt	          |          | _        nt          |          | _        |j        j        }|j         r|j	        j        }t          |dd          }t          || j        |          | _        |                                  dS )z
        is_encoder_decoder (`Optional`, *optional*):
            Whether use encoder_decoder for token classification. When set to False, only encoder is used.
        Nr  r  r  r  s        r5   r-   z&T5GemmaForTokenClassification.__init__  s    
 )(:F%    +$ 	5%f--DJJ,V44DJn0$ 	5 .4K$V-FLL.{DOM_``
r6   c                 4    | j                                         S r+   r  rH   s    r5   r  z2T5GemmaForTokenClassification.get_input_embeddings	  r  r6   c                 :    | j                             |           d S r+   r  r  s     r5   r  z2T5GemmaForTokenClassification.set_input_embeddings  r  r6   rB  r   r   r  r  r  r  ru  r  r  r   r   c                    | j         j        r!||t          d| j        j         d          | j         j        r*|(|	&|t          d          |                     |          }| j         j        r. | j        |f||||||||	dd	|}|j        }|j	        }|j
        }n' | j        |f|||d|}|j        }|j        }|j        }|                     |          }d}|
|                     ||
| j                   }t          ||||          S )	r  Nr  r  r  Fr  r  r  )rU   r  r  r4   rJ   r   rE  r/  r  r  r  rc   r0  r  r  r   )r3   rB  r   r   r  r  r  r  ru  r  r  r   r  r  rc   r0  r(  r  s                     r5   rD   z%T5GemmaForTokenClassification.forward  s   4 ;) 	y/@]E^%}4>Kb}}}   ;) 	=/@/HMbMj  U  
 !% 1 1) < <;) 	,*4$*+-)"3'=%9 /+&;+ + + +G !( 9#9M 3JJ'1tz(-)+	( (
 ( (G !( 9#1M +J-..%%ffdkBBD$'!	
 
 
 	
r6   r+   r  )rJ   rK   rL   r#   r   r  r-   r  r  r   r   r0   r   r   r   r  r   r   r   rD   rN   rO   s   @r5   r   r     s        } (4.      01 1 1/ / /  1515378<9=;?5959=A-1N
 N
E,-N
 !.N
 u/0	N

 $E$45N
 !) 6N
 'u'78N
 "/2N
   12N
  ((9:N
 )*N
 +,N
 
N
 N
 N
 ^ N
 N
 N
 N
 N
r6   r   )r  r  r  r.  r  r   )Nr"   )r   NN)Utypingr   r   r   r0   torch.nnr.   activationsr   cache_utilsr   r	   r
   
generationr   masking_utilsr   r   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   utils.deprecationr   utils.genericr    r!   configuration_t5gemmar#   r$   
get_loggerrJ   r  Moduler&   rQ   re   r   r   r   rM   r   rA   rF   r   r   r   r   r  r  r$  r*  r.  rY  r\  r   ra  rc  r  r  r  r  r  r   __all__r  r6   r5   <module>r     s  , - , , , , , , , , ,        ! ! ! ! ! ! C C C C C C C C C C ) ) ) ) ) ) R R R R R R R R B B B B B B 9 9 9 9 9 9                L K K K K K K K F F F F F F F F & & & & & & R R R R R R R R R R R R 0 0 0 0 0 0 ? ? ? ? ? ? ? ? E E E E E E E E 
	H	%	%= = = = =RY = = =(       &!< !< !< !< !<RY !< !< !<H( ( (   6	UU\ 	U# 	U%, 	U 	U 	U 	U$ ## %  %I %< % 
 % <	 %
 U\* %  % e_ % e_ % 5<%& %  %  %  %FI) I) I) I) I)29 I) I) I)XS) S) S) S) S)BI S) S) S)l1 1 1 1 14 1 1 1h8 8 8 8 8- 8 8 8v    	   	 	 	 	 	BI 	 	 	H) H) H) H) H)ry H) H) H)V :! :! :! :! :!_ :! :! :!z
0F 
8 
 
 
 
s x    ()< 3- \	   "Z
 Z
 Z
 Z
 Z
+ Z
 Z
 Z
zj
 j
 j
 j
 j
^ j
 j
 j
Z O
 O
 O
 O
 O
) O
 O
 O
d ! ! ! ! !0 ! ! !Ho) o) o) o) o)&<o o) o) o)d I
 I
 I
 I
 I
'= I
 I
 I
X o
 o
 o
 o
 o
$: o
 o
 o
d  r6   