
     `iM                        d dl Z d dlmZ d dlmZ d dlmZmZ d dlZd dl	m
Z
 ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZmZ ddlmZmZm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6m7Z7  e.j8        e9          Z:e e,d           G d de                                  Z;e e,d           G d de*                                  Z< G d d e
j=                  Z> G d! d"e
j?                  Z@ G d# d$e
j?                  ZA G d% d&e
j?                  ZBd' ZCdSd(ZDd)ejE        d*eFd+ejE        fd,ZG	 	 	 dTd.e
j?        d/ejE        d0ejE        d1ejE        d2eejE                 d3eHd4eeH         d5eeH         d+eIejE        ejE        f         fd6ZJ G d7 d8e
j?                  ZK G d9 d:e          ZLe, G d; d<e&                      ZMd=eFd+eeFeFeFeFgeNf         fd>ZOe, G d? d@eM                      ZPe, G dA dBeMe                      ZQ G dC dDe
j?                  ZRdEeejE                 dFeejE                 dGeFd+ee         fdHZS e,dI           G dJ dKeM                      ZT e,dI           G dL dMeMe                      ZU G dN dOeM          ZV G dP dQeeM          ZWg dRZXdS )U    N)Callable)	dataclass)OptionalUnion   )ACT2FN)CacheDynamicCache)PretrainedConfig)GenerationMixin)create_causal_maskcreate_masks_for_generate!create_sliding_window_causal_mask)FlashAttentionKwargs) GenericForSequenceClassificationGradientCheckpointingLayer)BaseModelOutputWithPastCausalLMOutputWithPast SequenceClassifierOutputWithPast)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstringcan_return_tuplelogging)deprecate_kwarg)check_model_inputs   )	AutoModel   )Gemma3ConfigGemma3TextConfigzK
    Base class for Gemma3 outputs, with hidden states and attentions.
    )custom_introc                   8    e Zd ZU dZdZeej                 ed<   dS )Gemma3ModelOutputWithPasta  
    image_hidden_states (`torch.FloatTensor`, *optional*):
        A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`.
        image_hidden_states of the model produced by the vision encoder and after projecting the last hidden state.
    Nimage_hidden_states)	__name__
__module____qualname____doc__r*   r   torchFloatTensor__annotations__     ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/gemma3/modeling_gemma3.pyr)   r)   3   s7           8<%"34;;;;;r3   r)   zR
    Base class for Gemma3 causal language model (or autoregressive) outputs.
    c                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
ee         ed<   dZeeej                          ed<   dZeeej                          ed<   dZeej                 ed<   dS )	Gemma3CausalLMOutputWithPasta8  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Language modeling loss (for next-token prediction).
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.text_config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).

        Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    image_hidden_states (`torch.FloatTensor`, *optional*):
        A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`.
        image_hidden_states of the model produced by the vision encoder after projecting last hidden state.
    Nlosslogitspast_key_valueshidden_states
attentionsr*   )r+   r,   r-   r.   r7   r   r/   r0   r1   r8   r9   r	   r:   tupler;   r*   r2   r3   r4   r6   r6   C   s           )-D(5$
%,,,*.FHU&'...'+OXe_+++8<M8E%"345<<<59Ju0129997;%"34;;;;;r3   r6   c            	       P     e Zd ZdZd
dedededef fdZdej        f fd	Z	 xZ
S )Gemma3TextScaledWordEmbeddingz\
    This module overrides nn.Embeddings' forward by multiplying with embeddings scale.
          ?num_embeddingsembedding_dimpadding_idxembed_scalec                     t                                          |||           |                     dt          j        |          d           d S )NrC   F
persistent)super__init__register_bufferr/   tensor)selfr@   rA   rB   rC   	__class__s        r4   rH   z&Gemma3TextScaledWordEmbedding.__init__f   sK    DDD]EL,E,ERWXXXXXr3   	input_idsc                     t                                          |          | j                            | j        j                  z  S N)rG   forwardrC   toweightdtype)rK   rM   rL   s     r4   rP   z%Gemma3TextScaledWordEmbedding.forwardj   s4    wwy))D,<,?,?@Q,R,RRRr3   )r?   )r+   r,   r-   r.   intfloatrH   r/   TensorrP   __classcell__rL   s   @r4   r>   r>   a   s         Y Ys Y3 YS Y_d Y Y Y Y Y YS S S S S S S S S S Sr3   r>   c                   *     e Zd Zdef fdZd Z xZS )	Gemma3MLPconfigc                    t                                                       || _        |j        | _        |j        | _        t          j        | j        | j        d          | _        t          j        | j        | j        d          | _        t          j        | j        | j        d          | _	        t          |j                 | _        d S NFbias)rG   rH   r[   hidden_sizeintermediate_sizennLinear	gate_projup_proj	down_projr   hidden_activationact_fnrK   r[   rL   s     r4   rH   zGemma3MLP.__init__o   s    !-!'!94#3T5KRWXXXy!143IPUVVV4#94;KRWXXXV56r3   c                     |                      |                     |                     |                    |                     |          z            }|S rO   )rf   rh   rd   re   )rK   xrf   s      r4   rP   zGemma3MLP.forwardy   sA    NN4;;t~~a/@/@#A#ADLLQROO#STT	r3   )r+   r,   r-   r&   rH   rP   rW   rX   s   @r4   rZ   rZ   n   sT        7/ 7 7 7 7 7 7      r3   rZ   c                   <     e Zd Zddedef fdZd Zd Zd Z xZ	S )	Gemma3RMSNormư>dimepsc                     t                                                       || _        t          j        t          j        |                    | _        d S rO   )rG   rH   rp   rb   	Parameterr/   zerosrR   )rK   ro   rp   rL   s      r4   rH   zGemma3RMSNorm.__init__   s?    l5;s#3#344r3   c                     |t          j        |                    d                              dd          | j        z             z  S )Nr"   T)keepdim)r/   rsqrtpowmeanrp   )rK   rk   s     r4   _normzGemma3RMSNorm._norm   s8    5;quuQxx}}R}>>IJJJJr3   c                     |                      |                                          }|d| j                                        z   z  }|                    |          S )Nr?   )rz   rU   rR   type_as)rK   rk   outputs      r4   rP   zGemma3RMSNorm.forward   sL    AGGII&& 3!2!2!4!445~~a   r3   c                 H    t          | j        j                   d| j         S )Nz, eps=)r<   rR   shaperp   rK   s    r4   
extra_reprzGemma3RMSNorm.extra_repr   s%    )**<<$(<<<r3   )rn   )
r+   r,   r-   rT   rU   rH   rz   rP   r   rW   rX   s   @r4   rm   rm   ~   s        5 5C 5e 5 5 5 5 5 5
K K K! ! != = = = = = =r3   rm   c                   |     e Zd ZU ej        ed<   ddef fdZ ej                    e	d                         Z
 xZS )Gemma3RotaryEmbeddinginv_freqNr[   c                    t                                                       t          |d          rSt          |j        t
                    r9|j                            d|j                            d                    | _        nd| _        |j        | _	        |j        | _
        || _        t          | j                 | _        |                     | j        |          \  }| _        |                     d|d           | j        | _        d S )Nrope_scaling	rope_typetypedefaultr   FrE   )rG   rH   hasattr
isinstancer   dictgetr   max_position_embeddingsmax_seq_len_cachedoriginal_max_seq_lenr[   r   rope_init_fnattention_scalingrI   r   original_inv_freq)rK   r[   devicer   rL   s       r4   rH   zGemma3RotaryEmbedding.__init__   s    6>** 	'z&:Mt/T/T 	'#044[&BUBYBYZ`BaBabbDNN&DN"("@$*$B!/?+/+<+<T[&+Q+Q($(ZeDDD!%r3   c                 X   | j         d d d d f                                                             |j        d         dd                              |j                  }|d d d d d f                                         }t          |j        j        t                    r|j        j        dk    r|j        j        nd}t          j
        |d          5  |                                |                                z                      dd          }t          j        ||fd	          }|                                | j        z  }|                                | j        z  }	d d d            n# 1 swxY w Y   |                    |j        
          |	                    |j        
          fS )Nr   ru   r$   mpscpuF)device_typeenabledr"   ro   rS   )r   rU   expandr   rQ   r   r   r   strr/   autocast	transposecatcosr   sinrS   )
rK   rk   position_idsinv_freq_expandedposition_ids_expandedr   freqsembr   r   s
             r4   rP   zGemma3RotaryEmbedding.forward   s    !M$4-8>>@@GGHZ[\H]_acdeehhijiqrr ,QQQaaaZ 8 > > @ @'1!(-'E'Ek!(-[`J`J`ahmmfk^UCCC 	5 	5&,,..1F1L1L1N1NNYYZ[]^__E)UEN333C''))d44C''))d44C		5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 vvAGv$$cff17f&;&;;;s   BE++E/2E/rO   )r+   r,   r-   r/   rV   r1   r&   rH   no_gradr   rP   rW   rX   s   @r4   r   r      s         l/ // / / / / / /" U]__< <  _< < < < <r3   r   c                     | dd| j         d         dz  f         }| d| j         d         dz  df         }t          j        | |fd          S )z*Rotates half the hidden dims of the input..Nru   r"   r   )r   r/   r   )rk   x1x2s      r4   rotate_halfr      s]    	
3"!'"+"""	#B	
3q """	#B9rc2YB''''r3   c                     |                     |          }|                     |          }| |z  t          |           |z  z   }||z  t          |          |z  z   }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )	unsqueezer   )qkr   r   r   unsqueeze_dimq_embedk_embeds           r4   apply_rotary_pos_embr      sc    ( --
&
&C
--
&
&C3w;q>>C/0G3w;q>>C/0GGr3   r:   n_repreturnc                     | j         \  }}}}|dk    r| S | dddddddddf                             |||||          } |                     |||z  ||          S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r$   N)r   r   reshape)r:   r   batchnum_key_value_headsslenhead_dims         r4   	repeat_kvr      s    
 2?1D.Ehzz!!!!QQQaaa"23::5BUW\^bdlmmM  (;e(CT8TTTr3           modulequerykeyvalueattention_maskdropoutscalingsoftcapc                    |
| j         dz  }t          || j                  }	t          || j                  }
t          j        ||	                    dd                    |z  }|||z  }t          j        |          }||z  }|$|d d d d d d d |	j        d         f         }||z   }t          j	        
                    |dt          j                                      |j                  }t          j	                            ||| j                  }t          j        ||
          }|                    dd                                          }||fS )	N      r"   r   ru   )ro   rS   )ptrainingr$   )r   r   num_key_value_groupsr/   matmulr   tanhr   rb   
functionalsoftmaxfloat32rQ   rS   r   r   
contiguous)r   r   r   r   r   r   r   r   kwargs
key_statesvalue_statesattn_weightscausal_maskattn_outputs                 r4   eager_attention_forwardr      sR    /4'3 ;<<JUF$?@@L<z';';Aq'A'ABBWLL#g-z,//#g-!$QQQ111.D
0@0D.D%DE#k1 =((2U](SSVVW\WbccL=((6?([[L,|\::K''1--88::K$$r3   c                   *    e Zd ZdZdedef fdZ eddd          	 	 dd
ej	        dej	        de
ej	                 de
e         de
ej                 dee         deej	        e
ej	                 e
eej	                          f         fd            Z xZS )Gemma3Attentionz=Multi-headed attention from 'Attention Is All You Need' paperr[   	layer_idxc                    t                                                       |j        |         dk    | _        || _        || _        t          |d|j        |j        z            | _	        |j        |j
        z  | _        |j        dz  | _        | j        j        | _        | j        j         | _        t#          j        |j        |j        | j	        z  |j                  | _        t#          j        |j        |j
        | j	        z  |j                  | _        t#          j        |j        |j
        | j	        z  |j                  | _        t#          j        |j        | j	        z  |j        |j                  | _        | j        j        | _        | j        r|j        nd | _        t5          |j	        |j                  | _        t5          |j	        |j                  | _        d S )Nsliding_attentionr   r   r^   )ro   rp   )rG   rH   layer_types
is_slidingr[   r   getattrr`   num_attention_headsr   r   r   query_pre_attn_scalarr   attention_dropoutuse_bidirectional_attention	is_causalrb   rc   attention_biasq_projk_projv_projo_projattn_logit_softcappingsliding_windowrm   rms_norm_epsq_normk_normrK   r[   r   rL   s      r4   rH   zGemma3Attention.__init__
  s    ,Y7;NN"
F4F&Jd4dee$*$>&B\$\!3T9!%!>![DDi :T] JQWQf
 
 
 i :T] JQWQf
 
 
 i :T] JQWQf
 
 
 i&68JQWQf
 
 
 '+k&H#7;Pf33D#V=PQQQ#V=PQQQr3   past_key_valuer9   4.58new_nameversionNr:   position_embeddingsr   cache_positionr   r   c                    |j         d d         }g |d| j        R }|                     |                              |                              dd          }	|                     |                              |                              dd          }
|                     |                              |                              dd          }|                     |	          }	|                     |
          }
|\  }}t          |	|
||          \  }	}
|&|||d}|
                    |
|| j        |          \  }
}t          }| j        j        dk    rt          | j        j                 } || |	|
||f| j        r| j        nd| j        | j        d|\  }} |j        g |dR                                  }|                     |          }||fS )Nru   r$   r"   )r   r   r   eagerr   )r   r   r   )r   r   r   viewr   r   r   r   r   r   updater   r   r[   _attn_implementationr   r   r   r   r   r   r   r   )rK   r:   r   r   r9   r   r   input_shapehidden_shapequery_statesr   r   r   r   cache_kwargsattention_interfacer   r   s                     r4   rP   zGemma3Attention.forward'  s    $)#2#.88b8$-88{{=1166|DDNNqRSTT[[//44\BBLLQPQRR
{{=1166|DDNNqRSTT{{<00[[,,
&S#7jRUWZ#[#[ j&#&snUUL'6'='=j,X\Xfht'u'u$J(?;+w66"9$+:Z"[$7$7
%
 /3mDD**L.
%
 
%
 
%
 
%
!\ *k);;;;;;FFHHkk+..L((r3   )NN)r+   r,   r-   r.   r&   rT   rH   r    r/   rV   r   r	   
LongTensorr   r   r<   rP   rW   rX   s   @r4   r   r     s       GGR/ RC R R R R R R: _%0A6RRR ,059-) -)|-) #\-) !.	-)
 "%-) !!12-) -.-) 
u|Xel3XeEL>Q5RR	S-) -) -) SR-) -) -) -) -)r3   r   c                   \    e Zd Zdedef fdZ eddd          	 	 	 	 	 	 dd
ej        dej        dej        de	ej                 de	ej
                 de	e         de	e         de	e         de	ej
                 deej        e	eej        ej        f                  f         fd            Z xZS )Gemma3DecoderLayerr[   r   c                    t                                                       || _        |j        | _        || _        |j        |         | _        t          ||          | _        t          |          | _
        t          | j        |j                  | _        t          | j        |j                  | _        t          | j        |j                  | _        t          | j        |j                  | _        d S )N)r[   r   rp   )rG   rH   r[   r`   r   r   attention_typer   	self_attnrZ   mlprm   r   input_layernormpost_attention_layernormpre_feedforward_layernormpost_feedforward_layernormr   s      r4   rH   zGemma3DecoderLayer.__init__Y  s    !-"$0;()LLLV$$,T-=6CVWWW(5d6FFL_(`(`(`%)6t7GVM`)a)a)a&*78HfNa*b*b*b'''r3   r   r9   r   r   NFr:   position_embeddings_globalposition_embeddings_localr   r   output_attentions	use_cacher   r   c
                 ^   |}|                      |          }| j        j        r|}n|} | j        d||||||||	d|
\  }}|                     |          }||z   }|}|                     |          }|                     |          }|                     |          }||z   }|f}|r||fz  }|S )N)r:   r   r   r   r9   r  r  r   r2   )r  r	  r   r  r  r
  r  )rK   r:   r  r  r   r   r9   r  r  r   r   residualr   self_attn_weightsoutputss                  r4   rP   zGemma3DecoderLayer.forwardf  s    !,,];; >$ 	=";"<+94> 
,
' 3)%+/)
,
 
,
 
,
 
,
(( 55mDD =0 66}EE//77FF =0 " 	,)++Gr3   )NNNFFN)r+   r,   r-   r&   rT   rH   r    r/   rV   r   r  r	   boolr<   r0   rP   rW   rX   s   @r4   r  r  X  sO       c/ cC c c c c c c _%0A6RRR 2637+/,1$)590 0|0 %*L0 $)<	0
 !.0 u/00 "%0 $D>0 D>0 !!120 
u (51BEDU1U+V"WW	X0 0 0 SR0 0 0 0 0r3   r  c                   ^     e Zd ZU eed<   dZdZg dZdgZdZ	dZ
dZdZdZeedZ fdZ xZS )Gemma3PreTrainedModelr[    T)r  SiglipVisionEmbeddingsSiglipEncoderLayer#SiglipMultiheadAttentionPoolingHeadr9   )r:   r;   c                    t                                          |           t          |t                    r |j        j                                         d S d|j        j        v r |j	        j                                         d S d S )NRMSNorm)
rG   _init_weightsr   Gemma3MultiModalProjectormm_input_projection_weightdatazero_rL   r+   rR   )rK   r   rL   s     r4   r   z#Gemma3PreTrainedModel._init_weights  s    f%%%f788 	'-288:::::&*333M$$&&&&& 43r3   )r+   r,   r-   r%   r1   base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_flash_attn_supports_sdpa_supports_flex_attn_can_compile_fullgraph_supports_attention_backendr  r   _can_record_outputsr   rW   rX   s   @r4   r  r    s         &*#   $5"5N!"&+% 
' ' ' ' ' ' ' ' 'r3   r  r   c           
      Z     dt           dt           dt           dt           dt          f
 fd}|S )zA
    Enables a bidirectional mask within the sliding window.
    	batch_idxhead_idxq_idxkv_idxr   c                 0    t          ||z
            k     S )zA token can attend to any other token if their absolute distance is within
        the (exclusive) sliding window size (distance < sliding_window).)abs)r0  r1  r2  r3  r   s       r4   
inner_maskz1_bidirectional_window_overlay.<locals>.inner_mask  s     56>""^33r3   rT   r  )r   r6  s   ` r4   _bidirectional_window_overlayr8    sL    
4c 4S 4 4c 4d 4 4 4 4 4 4
 r3   c                   B    e Zd ZU eed<   def fdZee	 	 	 	 	 	 	 	 	 ddee	j
                 dee	j                 dee	j
                 dee         dee	j                 d	ee         d
ee         dee         dee	j
                 dee         defd                        Z xZS )Gemma3TextModelr[   c                 n   t                                                     j        | _        j        | _        t          j        j        | j        | j        j        dz            | _        t          j
        fdt          j                  D                       | _        t          j        j                  | _        t#                    | _        d| _        t)          j                  j        _        ddi_        t#                    | _        |                                  d S )	N      ?)rC   c                 0    g | ]}t          |          S r2   )r  ).0r   r[   s     r4   
<listcomp>z,Gemma3TextModel.__init__.<locals>.<listcomp>  s$    dddy	22dddr3   r  r[   Fr   r   )rG   rH   pad_token_idrB   
vocab_sizer>   r`   r[   embed_tokensrb   
ModuleListrangenum_hidden_layerslayersrm   r   normr   
rotary_embgradient_checkpointingcopydeepcopyrope_local_base_freq
rope_thetar   rotary_emb_local	post_initri   s    `r4   rH   zGemma3TextModel.__init__  s*      !. + :v143CQUQ\QhjmQm
 
 
 mddddE&JbDcDcddd
 
 "&"4&:MNNN	/v>>>&+# v&&"7*I6 5V D D D 	r3   NrM   r   r   r9   inputs_embedsr  r  output_hidden_statesr   r   r   c
                    ||n| j         j        }||n| j         j        }||n| j         j        }|d u |d uz  rt	          d          | j        r%| j        r|rt                              d           d}|| 	                    |          }|r|| j        st          | j                   }|	B||                                nd}t          j        |||j        d         z   |j                  }	||	                    d          }t#          |x}t$                    si| j         |||	||d}|                                }| j         j        r"d	 |d
<   t+          | j         j                  |d
<   t/          di |t1          di |d}|}|                     ||          }|                     ||          }|rdnd }|rdnd }| j        d | j         j                 D ]=}|r||fz  } ||f||||j                 |||||	d|
}|d         }|r||d         fz  }>|                     |          }|r||fz  }t?          ||||          S )N:You must specify exactly one of input_ids or inputs_embedszX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.Fr@  r   r$   r   r[   input_embedsr   r   r9   r   c                  B    t          j        dt           j                  S )NTr   )r/   rJ   r  )argss    r4   <lambda>z)Gemma3TextModel.forward.<locals>.<lambda>"  s    TY^Yc@d@d@d r3   or_mask_functionfull_attentionr   r2   )r  r  r   r   r9   r  r  r   )last_hidden_stater9   r:   r;   ) r[   r  rR  r  
ValueErrorrJ  r   loggerwarning_oncerC  r
   get_seq_lengthr/   aranger   r   r   r   r   rK  r   r8  r   r   r   rI  rO  rG  rF  r  rH  r   )rK   rM   r   r   r9   rQ  r  r  rR  r   r   past_seen_tokenscausal_mask_mappingmask_kwargssliding_mask_kwargsr:   r  r  all_hidden_statesall_self_attnsdecoder_layerlayer_outputss                         r4   rP   zGemma3TextModel.forward  sU    2C1N--TXT_Tq$8$D  $+Jj 	 "+!6IIDK<Q	-t";< 	[YZZZ& 	4= 	Y 	j   I  --i88M 	?00*$+>>>O!CRC^==???de"\  =#6q#99$+  N )33A66L ?-FF 	 + -"0"0#2 , K #."2"2"4"4{6 t2d2d./:WX\XcXr:s:s#$67 #5"C"C{"C"C%F%]%]I\%]%]# # & &*__]L%Q%Q"$($9$9-$V$V! #7@BBD0:d![)H4;+H)HI 	6 	6M# 6!m%55!)M+E*C2=3OP) /"3#-   M *!,M  6=#3"55		-00 	2-!11&+++%	
 
 
 	
r3   	NNNNNNNNN)r+   r,   r-   r&   r1   rH   r!   r   r   r/   r  rV   r	   r0   r  r   r   r   rP   rW   rX   s   @r4   r:  r:    s]        /      4  151537+/59$(,0/359o
 o
E,-o
 !.o
 u/0	o

 "%o
   12o
 D>o
 $D>o
 'tno
 !!12o
 +,o
 
!o
 o
 o
 ^ o
 o
 o
 o
 o
r3   r:  c                       e Zd ZU dgZddiZddgdgfiZeed<   dZdef fdZ	e
e	 	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 dee         deej                 deej                 dee         dee         dee         deej                 deeej        f         defd                        Z xZS )Gemma3ForCausalLMlm_head.weightlm_headcolwise_repr:   r8   r[   language_modelc                     t                                          |           t          |          | _        |j        | _        t          j        |j        |j        d          | _        | 	                                 d S r]   )
rG   rH   r:  modelrB  rb   rc   r`   rp  rP  ri   s     r4   rH   zGemma3ForCausalLM.__init__a  sj       $V,,
 +y!3V5FUSSS 	r3   Nr   rM   r   r   r9   rQ  labelsr  r  rR  r   logits_to_keepr   c                    ||n| j         j        }|	|	n| j         j        }	 | j        d||||||||	|
d	|}|j        }t          |t                    rt          | d          n|}|                     |dd|ddf                   }| j         j	        2|| j         j	        z  }t          j        |          }|| j         j	        z  }d}| | j        ||| j        fi |}t          |||j        |j        |j                  S )a  
        Example:

        ```python
        >>> from transformers import AutoTokenizer, Gemma3ForCausalLM

        >>> model = Gemma3ForCausalLM.from_pretrained("google/gemma-2-9b")
        >>> tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b")

        >>> prompt = "What is your favorite condiment?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "What is your favorite condiment?"
        ```N)	rM   r   r   r9   rQ  r  r  rR  r   r7   r8   r9   r:   r;   r2   )r[   r  rR  rt  r^  r   rT   slicerp  final_logit_softcappingr/   r   loss_functionrB  r   r9   r:   r;   )rK   rM   r   r   r9   rQ  ru  r  r  rR  r   rv  r   r  r:   slice_indicesr8   r7   s                     r4   rP   zGemma3ForCausalLM.forwardj  sb   F 2C1N--TXT_Tq$8$D  $+Jj 	 ,64: ,
)%+'/!5),
 ,
 ,
 ,
  18B>SV8W8Wk~ot444]kmAAA}aaa,?@AA;.:dkAAFZ''FdkAAF%4%ffdoPPPPD%#3!/)
 
 
 	
r3   )NNNNNNNNNNr   )r+   r,   r-   _tied_weights_keys_tp_plan_pp_planr&   r1   r%  rH   r   r   r   r/   r  rV   r	   r0   r  r   rT   r   rP   rW   rX   s   @r4   rn  rn  Y  s        *+=)H_-z:;H(/        151537+/59-1$(,0/35934F
 F
E,-F
 !.F
 u/0	F

 "%F
   12F
 )*F
 D>F
 $D>F
 'tnF
 !!12F
 c5</0F
 
 F
 F
 F
 ^ F
 F
 F
 F
 F
r3   rn  c                   :     e Zd Zdef fdZdej        fdZ xZS )r!  r[   c                    t                                                       t          j        t	          j        |j        j        |j        j                            | _	        t          |j        j        |j        j                  | _        t          |j        j        |j        j        z            | _        t          |j        dz            | _        | j        | j        z  | _        t          j        | j        | j                  | _        d S )Nr  r<  )kernel_sizestride)rG   rH   rb   rr   r/   rs   vision_configr`   text_configr"  rm   layer_norm_epsmm_soft_emb_normrT   
image_size
patch_sizepatches_per_imagemm_tokens_per_imagetokens_per_sider  	AvgPool2davg_poolri   s     r4   rH   z"Gemma3MultiModalProjector.__init__  s    *,,K,8&:L:XYY+
 +
' !. ,&2F2U!
 !
 !
 "%V%9%DH\Hg%g!h!h"6#=s#BCC1T5II1A$JZ[[[r3   vision_outputsc                    |j         \  }}}|                    dd          }|                    ||| j        | j                  }|                                }|                     |          }|                    d          }|                    dd          }|                     |          }t          j	        || j
                  }|                    |          S )Nr$   r"   )r   r   r   r  r   r  flattenr  r/   r   r"  r|   )	rK   r  
batch_size_
seq_lengthreshaped_vision_outputspooled_vision_outputsnormed_vision_outputsprojected_vision_outputss	            r4   rP   z!Gemma3MultiModalProjector.forward  s    $2$8!
Az"0":":1a"@"@"9"A"A
D$:D<R#
 #
 #:"D"D"F"F $.E F F 5 = =a @ @ 5 ? ?1 E E $ 5 56K L L#(<0EtGf#g#g '//???r3   )	r+   r,   r-   r%   rH   r/   rV   rP   rW   rX   s   @r4   r!  r!    sq        \| \ \ \ \ \ \ @el @ @ @ @ @ @ @ @r3   r!  token_type_idsimage_group_idstokens_per_imagec           
      f      dS dt           dt           dt           dt           dt          f
 fd}|S )z
    This function adds the correct offsets to the `q_idx` and `kv_idx` as the torch API can only accept lengths,
    not start and end indices.
    Nr0  r1  r2  r3  r   c                 Z   t          j        |
j        d         k     |d          }
| |f         }t          j        |
j        d         k     |d          }	| |f         }t          j        |	j        d         k     |d          }
| |f         dk    |dk    z  }	| |f         |k    }||z  S )Nr$   r   ru   )r/   wherer   )r0  r1  r2  r3  safe_idxtoken_type_ids_at_kv_idximage_group_ids_at_kv_idxis_image_blocksame_image_blockr  r  s            r4   r6  z0token_type_ids_mask_function.<locals>.inner_mask  s     ;v(<Q(??KK#1)X2E#F #(;v8LQ8O/OQikl#m#m $3Ix4G$H!$)K9Nq9Q0QSlnp$q$q!(E)9:a?D\`aDab*9e+;<@YY  000r3   r7  )r  r  r  r6  s   ``  r4   token_type_ids_mask_functionr    s_     t1c 1S 1 1c 1d 1 1 1 1 1 1 1" r3   zy
    The Base Gemma3 model which consists of a vision backbone and a language model without language modeling head.,
    c            !           e Zd ZddiZdZdef fdZd Zd Zd Z	d	 Z
d
ej        dej        fdZdej        dej        dej        fdZee	 	 	 	 	 	 	 	 	 	 	 	 	 ddeej                 d
eej                 deej                 deej                 dee         deej                 deej                 deej                 deej                 dee         dee         dee         dee         deeef         fd                        Z xZS )Gemma3Modelzlanguage_model.modelrr  Fr[   c                 z   t                                          |           t          j        |j                  | _        t          |          | _        |j        j	        | _	        t          j        |j                  }|| _
        | j        j        | j        j        nd| _        |                                  d S )Nr@  ru   )rG   rH   r#   from_configr  vision_towerr!  multi_modal_projectorr  rB  rr  r[   rA  rP  )rK   r[   rr  rL   s      r4   rH   zGemma3Model.__init__  s       %19MNNN%>v%F%F" ,7".f6HIII,8<8P8\DK44bdr3   c                 4    | j                                         S rO   )rr  get_input_embeddingsr   s    r4   r  z Gemma3Model.get_input_embeddings  s    "77999r3   c                 :    | j                             |           d S rO   )rr  set_input_embeddingsrK   r   s     r4   r  z Gemma3Model.set_input_embeddings  s    0077777r3   c                     || _         d S rO   rr  rK   decoders     r4   set_decoderzGemma3Model.set_decoder  s    %r3   c                     | j         S rO   r  r   s    r4   get_decoderzGemma3Model.get_decoder  s    ""r3   pixel_valuesr   c                 f    |                      |          j        }|                     |          }|S )a  
        Projects the last hidden state from the vision model into language model space.

        Args:
            pixel_values (`torch.FloatTensor]` of shape `(batch_size, channels, height, width)`)
               The tensors corresponding to the input images.
        Returns:
            image_features (`torch.Tensor`): Image feature tensor of shape `(num_images, image_length, embed_dim)`).
        )r  )r  r^  r  )rK   r  r  image_featuress       r4   get_image_featureszGemma3Model.get_image_features  s6     ***EEW33NCCr3   rM   rQ  r  c                 \   |e| |                                  t          j        | j        j        t          j        |j                            k    }|                    d          }n|| j        j        k    }|                                }|	                    d          
                    |                              |j                  }|j        d         |j        d         z  }||                                         |                                k    rt          d| d|           |S )z
        Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is
        equal to the length of multimodal features. If the lengths are different, an error is raised.
        N)rS   r   ru   r   r$   z6Image features and image tokens do not match: tokens: z, features )r  r/   rJ   r[   image_token_idlongr   allsumr   	expand_asrQ   r   numelr_  )rK   rM   rQ  r  special_image_maskn_image_tokensn_image_featuress          r4   get_placeholder_maskz Gemma3Model.get_placeholder_mask*  s/    !.2M$2K2K2M2MT[7uzR_Rfggg3 3 " "4!7!7!;!;!*dk.H!H+//11/99"==GGVVYYZgZnoo)/2^5I!5LL+,22448L8L8N8NNNvvvdtvv   "!r3   Nr   r   r9   r  r   ru  r  r  rR  return_dictc                    |du |duz  rt          d          ||n| j        j        }||n| j        j        }||n| j        j        }|?| j        j        | j        k    r*|| j        j        k    }|                                }d||<   n|}| |                                 |          }|B||	                                nd}t          j        |||j        d         z   |j                  }|c|                     |          }|                    |j        |j                  }|                     |||          }|                    ||          }t'          |x}t(                    s1| j                                        |||||d}|
 p|du p|j         p|du}||r|dk                        |j                  }|t.          j                            |dd	          dddd
f          z  }t          j        |                                d          dz
  }t          j        ||t          j        |d
|j                            }t=          |                    |j                  || j        j                  |d<   tA          di |tC          di |d} | j"        d|||||
||d|d	|}tG          |j$        |
r|j%        nd|j&        |j'        ||nd          S )a]  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.text_config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.text_config.vocab_size]`.

        Example:

        ```python
        >>> from PIL import Image
        >>> import requests
        >>> from transformers import AutoProcessor, Gemma3ForConditionalGeneration

        >>> model = Gemma3ForConditionalGeneration.from_pretrained("google/gemma32-3b-mix-224")
        >>> processor = AutoProcessor.from_pretrained("google/gemma32-3b-mix-224")

        >>> prompt = "Where is the cat standing?"
        >>> url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> inputs = processor(images=image, text=prompt,  return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(**inputs,)
        >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Where is the cat standing?\nsnow"
        ```NrT  r   r$   rU  )rQ  r  rV  r$   r   r   ru   r   r[  r\  T)	r   r   r9   rQ  r  r  rR  r  r   )r^  r9   r:   r;   r*   r2   )(r_  r[   r  rR  use_return_dictr  rB  cloner  rb  r/   rc  r   r   r  rQ   rS   r  masked_scatterr   r   get_text_configis_initializedrb   r   padcumsumrT   r  	full_liker  r  r   r   rr  r)   r^  r9   r:   r;   )rK   rM   r  r   r   r9   r  r   rQ  ru  r  r  rR  r  	lm_kwargsr  llm_input_idsrd  r  re  rf  
is_prefillis_imagenew_image_startr  r  s                             r4   rP   zGemma3Model.forwardB  s   \ -t";< 	[YZZZ1B1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B]  T[%?4?%R%R!*dk.H!H%OO--M01M,--%M 7D5577FFM!CRC^==???de"\ "2]5H5K"KTaTh  N
 #!44\BBN+..}/C]EXYYN!%!:!:~ "; " " *889K^\\M ?-FF '	 +5577 -"0"0#2 , K  ,"d*,&55,  t+	  )j) +a/33N4IJJ"*bm.?.?&XY.?.Z.Z[\[\[\^a_a^a[a.b-b"b"',/B/B/D/D!"L"L"Lq"P"'+ou~rZbZi/j/j/j# # 3O"%%n&;<<ot{On3 3./ #5"C"C{"C"C%F%U%U%U%U# #
 &$% 
.%+'/!5)
 
 
 
 )%77@JG33d!/)2>2JPT
 
 
 	
r3   )NNNNNNNNNNNNN)r+   r,   r-   _checkpoint_conversion_mappingaccepts_loss_kwargsr%   rH   r  r  r  r  r/   rV   r  r  r0   r  r   r   r   r	   r  r   r<   r)   rP   rW   rX   s   @r4   r  r    sG        '=>N%O"
| 
 
 
 
 
 
: : :8 8 8& & &# # #u|     ")":?:K"]b]n" " " "0  15481537+/595959-1$(,0/3&*L
 L
E,-L
 u01L
 !.	L

 u/0L
 "%L
 !!12L
 !!12L
   12L
 )*L
 D>L
 $D>L
 'tnL
 d^L
  
u//	0!L
 L
 L
 ^ L
 L
 L
 L
 L
r3   r  c            "           e Zd ZdddddZdgZdZdef fd	Zd
 Zd Z	d Z
d Zd Zed             Zed             Zed             Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 d(deej                 deej                 deej                 deej                 dee         deej                 deej                 deej                 deej                 dee         dee         dee         d ee         d!eeej        f         d"eeef         fd#            Z	 	 	 	 	 	 	 	 	 	 d) fd%	Ze	 d*de d&ej        deej                 dej        dee         deej                 deej                 d"e!fd'            Z" xZ#S )+Gemma3ForConditionalGenerationmodel.language_modelmodel.vision_towermodel.multi_modal_projectorrp  )^language_model.model^vision_tower^multi_modal_projectorz^language_model.lm_headro  Fr[   c                     t                                          |           t          |          | _        t	          j        |j        j        |j        j        d          | _	        | 
                                 d S r]   )rG   rH   r  rt  rb   rc   r  r`   rB  rp  rP  ri   s     r4   rH   z'Gemma3ForConditionalGeneration.__init__  se        ((
y!3!?ASA^ejkkkr3   c                 4    | j                                         S rO   rt  r  r   s    r4   r  z3Gemma3ForConditionalGeneration.get_input_embeddings      z..000r3   c                 :    | j                             |           d S rO   rt  r  r  s     r4   r  z3Gemma3ForConditionalGeneration.set_input_embeddings      
''.....r3   c                 :    | j                             |           d S rO   )rt  r  r  s     r4   r  z*Gemma3ForConditionalGeneration.set_decoder  s    
w'''''r3   c                 4    | j                                         S rO   )rt  r  r   s    r4   r  z*Gemma3ForConditionalGeneration.get_decoder  s    z%%'''r3   c                 6    | j                             |          S rO   )rt  r  )rK   r  s     r4   r  z1Gemma3ForConditionalGeneration.get_image_features  s    z,,\:::r3   c                     | j         j        S rO   )rt  rr  r   s    r4   rr  z-Gemma3ForConditionalGeneration.language_model  s    z((r3   c                     | j         j        S rO   )rt  r  r   s    r4   r  z+Gemma3ForConditionalGeneration.vision_tower  s    z&&r3   c                     | j         j        S rO   )rt  r  r   s    r4   r  z4Gemma3ForConditionalGeneration.multi_modal_projector  s    z//r3   Nr   rM   r  r   r   r9   r  r   rQ  ru  r  r  rR  r  rv  r   c                    ||n| j         j        }||n| j         j        }||n| j         j        } | j        d||||||||
|	||||d|}|d         }t          |t                    rt          | d          n|}|                     |dd|ddf                   }d}|	i|	                                }|dddddf         }|	dddf         }||dd|j
        d          df                             |j                  }||                    |j                  dk                                             }||                    |j                  dk                                             }n(|                                }|                                }t          j                    }|                    d| j         j        j                  }|                    d                              |j                  } |||          }|s|f|dd         z   }||f|z   n|S t'          |||j        |j        |j        |j                  S )	a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.text_config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.text_config.vocab_size]`.

        Example:

        ```python
        >>> from PIL import Image
        >>> import requests
        >>> from transformers import AutoProcessor, Gemma3ForConditionalGeneration

        >>> model = Gemma3ForConditionalGeneration.from_pretrained("google/gemma-3-4b-it")
        >>> processor = AutoProcessor.from_pretrained("google/gemma-3-4b-it")

        >>> messages = [
        ...     {
        ...         "role": "system",
        ...         "content": [
        ...             {"type": "text", "text": "You are a helpful assistant."}
        ...         ]
        ...     },
        ...     {
        ...         "role": "user", "content": [
        ...             {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"},
        ...             {"type": "text", "text": "Where is the cat standing?"},
        ...         ]
        ...     },
        ... ]

        >>> inputs = processor.apply_chat_template(
        ...     messages,
        ...     tokenize=True,
        ...     return_dict=True,
        ...     return_tensors="pt",
        ...     add_generation_prompt=True
        ... )
        >>> # Generate
        >>> generate_ids = model.generate(**inputs)
        >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "user\nYou are a helpful assistant.\n\n\n\n\n\nWhere is the cat standing?\nmodel\nBased on the image, the cat is standing in a snowy area, likely outdoors. It appears to"
        ```
        N)rM   r  r  r   r   r9   rQ  r  ru  r  rR  r  r   r   .ru   r$   )r7   r8   r9   r:   r;   r*   r2   )r[   r  rR  r  rt  r   rT   ry  rp  rU   r   rQ   r   r   rb   CrossEntropyLossr   r  rB  r6   r9   r:   r;   r*   )rK   rM   r  r   r   r9   r  r   rQ  ru  r  r  rR  r  rv  r  r  r:   r|  r8   r7   shift_logitsshift_labelsshift_attention_maskloss_fctflat_logitsflat_labelsr}   s                               r4   rP   z&Gemma3ForConditionalGeneration.forward  s   @ 2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B]$* 
%))%+'/!5#)
 
 
 
"  
8B>SV8W8Wk~ot444]kmAAA}aaa,?@AA\\^^F!#ssAAA+.L!#qrr'?L) (6aaa,:LQ:O9O9Q9Q6Q'R'U'UV\Vc'd'd$+,@,C,CFM,R,RVW,WXccee+,@,C,CLDW,X,X\],]^iikk+6688+6688*,,H&++B0G0RSSK&++B//22<3FGGK8K55D 	DY,F'+'7D7V##VC+#3!/) ' ;
 
 
 	
r3   Tc                 p     t                      j        |f||||||	|
|d|}|d         dk    r||d<   |S )N)r9   rQ  r   r   r   r  rv  r  r   r  )rG   prepare_inputs_for_generation)rK   rM   r9   rQ  r   r   r  r   r  r  rv  ru  r   model_inputsrL   s                 r4   r  z<Gemma3ForConditionalGeneration.prepare_inputs_for_generation  sn      =uww<
+')%)))
 
 
 
 !!!+7L(r3   rW  c                     |                                  |||||d}||j        d         dk    r|dk                        |j                  }	|	t          j                            |	dd          d d d df          z  }
t          j        |
	                                d          dz
  }t          j
        |	|t          j        |d                    }t          |                    |j                  || j                  |d<   t          d	i |S )
NrV  r$   r  r   r  ru   r   r[  r2   )r  r   rQ   r   rb   r   r  r/   r  rT   r  r  r  r  r   )r[   rW  r   r   r9   r   r  r   rf  r  r  r  s               r4   r   z8Gemma3ForConditionalGeneration.create_masks_for_generate  s.    ,,..(,,.(
 
 %,*<Q*?1*D*D
 '!+//0EFFH&"-*;*;HfTU*;*V*VWXWXWXZ][]Z]W]*^)^^O#l?+>+>+@+@aHHH1LO#k(OU_UcegEhEhiiO.J!!."788/6Ke/ /K*+ )77;777r3   )NNNNNNNNNNNNNr   )
NNNNNNNTNNrO   )$r+   r,   r-   r  r}  r  r%   rH   r  r  r  r  r  propertyrr  r  r  r   r   r/   r  r0   rV   r	   r  r   rT   r<   r6   rP   r  staticmethodr   r   r   rW   rX   s   @r4   r  r    sD        "8-"?#,	& &" ++  |      1 1 1/ / /( ( (( ( (; ; ; ) ) X) ' ' X' 0 0 X0  15481537+/595959-1$(,0/3&*34|
 |
E,-|
 u01|
 !.	|

 u/0|
 "%|
 !!12|
 !!12|
   12|
 )*|
 D>|
 $D>|
 'tn|
 d^|
 c5</0|
" 
u22	3#|
 |
 |
 ^|
B " " " " " "H  26!8 !8 !8l!8 !.!8 	!8
 "%!8 u|,!8 !.!8 
!8 !8 !8 \!8 !8 !8 !8 !8r3   r  c                   \    e Zd ZddddZ fdZd Zd Zee	 	 	 	 	 	 	 	 	 dd	e	e
j                 d
e	e
j                 de	e
j                 de	e
j                 de	e         de	e
j                 de	e
j                 de	e
j                 de	e         dee         defd                        Z xZS )Gemma3ForSequenceClassificationr  r  r  )r  r  r  c                    t                                          |           |j        | _        t          |          | _        t          j        |j        j        | j        d          | _	        | 
                                 d S r]   )rG   rH   
num_labelsr  rt  rb   rc   r  r`   scorerP  ri   s     r4   rH   z(Gemma3ForSequenceClassification.__init__  sm        + ((
Yv1=tUZ[[[
 	r3   c                 4    | j                                         S rO   r  r   s    r4   r  z4Gemma3ForSequenceClassification.get_input_embeddings  r  r3   c                 :    | j                             |           d S rO   r  r  s     r4   r  z4Gemma3ForSequenceClassification.set_input_embeddings  r  r3   NrM   r  r   r   r9   rQ  r  ru  r  r   r   c
                 >    | j         |f|||||||	d|
}|j        }|                     |          }||j        d         }n|j        d         }| j        j        j        |dk    rt          d          | j        j        j        d}n||| j        j        j        k                        |j	        t          j                  }t          j        |j        d         |j	        t          j                  }||z                      d          }n)d}t                              | j        j         d           |t          j        ||j	        	          |f         }d}||                     |||| j        
          }t'          |||j        |j        |j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        )r   r  r   r9   rQ  r  r  Nr   r$   z=Cannot handle batch sizes > 1 if no padding token is defined.ru   )r   rS   z will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`rU  )r8   ru  pooled_logitsr[   rx  )rt  r^  r   r   r[   r  rA  r_  rQ   r   r/   int32rc  argmaxr`  ra  rL   r+   r{  r   r9   r:   r;   )rK   rM   r  r   r   r9   rQ  r  ru  r  r   transformer_outputsr:   r8   r  last_non_pad_tokennon_pad_masktoken_indicesr  r7   s                       r4   rP   z'Gemma3ForSequenceClassification.forward  s   , )dj

)%%+')

 

 

 

 ,=M** "+JJ&,Q/J;"/7J!OO\]]];"/7!#"%)@)MMQQRXR_afalmmL!L)<V]Z_ZefffM"/,">!F!Fr!J!J!#>* Z Z Z  
 u|Jv}MMMOaab%%VFR_hlhs%ttD/ /?-;*5
 
 
 	
r3   rl  )r+   r,   r-   r  rH   r  r  r   r   r   r/   r  r0   rV   r	   r  r   r   r   rP   rW   rX   s   @r4   r  r    s~       !7-"?& &"    1 1 1/ / /  15481537+/5959-1$(C
 C
E,-C
 u01C
 !.	C

 u/0C
 "%C
   12C
 !!12C
 )*C
 D>C
 +,C
 
*C
 C
 C
 ^ C
 C
 C
 C
 C
r3   r  c                       e Zd ZU dZeed<   dS )#Gemma3TextForSequenceClassificationz
    Gemma3TextForSequenceClassification is a text-only sequence classification model that works with Gemma3TextConfig.
    It uses the generic sequence classification implementation for efficiency and consistency.
    r[   N)r+   r,   r-   r.   r&   r1   r2   r3   r4   r  r  ,  s*          
 r3   r  )r  r:  rn  r  r  r  r  )Nr$   )r   NN)YrK  collections.abcr   dataclassesr   typingr   r   r/   torch.nnrb   activationsr   cache_utilsr	   r
   configuration_utilsr   
generationr   masking_utilsr   r   r   modeling_flash_attention_utilsr   modeling_layersr   r   modeling_outputsr   r   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   r   utils.deprecationr    utils.genericr!   autor#   configuration_gemma3r%   r&   
get_loggerr+   r`  r)   r6   	Embeddingr>   ModulerZ   rm   r   r   r   rV   rT   r   rU   r<   r   r   r  r  r  r8  r:  rn  r!  r  r  r  r  r  __all__r2   r3   r4   <module>r%     sH  ,  $ $ $ $ $ $ ! ! ! ! ! ! " " " " " " " "        ! ! ! ! ! ! . . . . . . . . 3 3 3 3 3 3 ) ) ) ) ) ) m m m m m m m m m m B B B B B B [ [ [ [ [ [ [ [ q q q q q q q q q q K K K K K K K K F F F F F F F F & & & & & & _ _ _ _ _ _ _ _ _ _ _ _ _ _ 0 0 0 0 0 0 / / / / / /       @ @ @ @ @ @ @ @ 
	H	%	%   
< < < < < 7 < <  <   
< < < < <; < <  <0
S 
S 
S 
S 
SBL 
S 
S 
S    	    = = = = =BI = = =(!< !< !< !< !<BI !< !< !<H( ( (   6	UU\ 	U# 	U%, 	U 	U 	U 	U$ ## %  %I %< % 
 % <	 %
 U\* %  % e_ % e_ % 5<%& %  %  %  %FN) N) N) N) N)bi N) N) N)b? ? ? ? ?3 ? ? ?D ' ' ' ' 'O ' ' '>
# 
(CcSVCWY]C]:^ 
 
 
 
 N
 N
 N
 N
 N
+ N
 N
 N
b X
 X
 X
 X
 X
- X
 X
 X
v!@ !@ !@ !@ !@	 !@ !@ !@HU\*el+  h	   B   
Q
 Q
 Q
 Q
 Q
' Q
 Q
 
Q
h   
s8 s8 s8 s8 s8%:O s8 s8 
s8l[
 [
 [
 [
 [
&; [
 [
 [
|    *JLa     r3   