
    ePiF                       d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlZ		 d dl
Z
d dlZd dlZn# e$ r Y nw xY wd dlmZ d dlmZ d dlmZmZmZmZmZmZmZ ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        d	Z ej!        j"        ej!        j#        ej!        j$        ej!        j#        dZ%ej&        j'        ej&        j(        dZ)i Z*d	 Z+ G d
 de          Z, G d de j-                  Z. e+d           G d de.                      Z/ e+d           G d de/                      Z0 e+d           G d de/                      Z1 e+d           G d de/                      Z2 e+d           G d de/                      Z3 e+d           G d de/                      Z4 e+d            G d! d"e.                      Z5 e+d#           G d$ d%e.                      Z6 e+d&           G d' d(e.                      Z7 e+d)           G d* d+e.                      Z8 e+d,           G d- d.e.                      Z9 e+d/           G d0 d1e/                      Z: e+d2           G d3 d4e/                      Z; e+d5           G d6 d7e/                      Z< e+d8           G d9 d:e.                      Z= e+d;           G d< d=e=                      Z> e+d>           G d? d@e.                      Z? e+dA           G dB dCe.                      Z@ e+dD           G dE dFe.                      ZA e+dG           G dH dIe.                      ZB e+dJ           G dK dLe.                      ZC e+dM           e+dN           G dO dPe.                                  ZD e+dQ           G dR dSe.                      ZE e+dT           G dU dVe.                      ZF e+dW           G dX dYe.                      ZG e+dZ           G d[ d\e.                      ZH e+d]           G d^ d_e.                      ZI e+d`           G da dbe.                      ZJ e+dc           G dd dee.                      ZK e+df           G dg dheK                      ZL e+di           G dj dke.                      ZM e+dl           G dm dne.                      ZN e+do           G dp dqe.                      ZO e+dr           G ds dte.                      ZP e+du           G dv dwe.                      ZQdx ZReSdyk    r
 eR             g dzg d{g d|g d}g d~g dg dg dg dg dg ddZT e+d           G d de.                      ZUdS )    N)ListOptional)utils)	Converter)attention_speccommon_spec
model_spectransformer_specwav2vec2_specwav2vec2bert_specwhisper_spec)	gelu	gelu_fastgelu_newgelu_pythongelu_pytorch_tanh
quick_gelurelusiluswish)linearsullama3longrope)gemmgemvc                       fd}|S )z5Registers a model loader for this configuration name.c                 ,     |             t           <   | S N)_MODEL_LOADERS)clsconfig_names    w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ctranslate2/converters/transformers.py	decoratorz"register_loader.<locals>.decorator<   s    &)cee{#
     )r"   r$   s   ` r#   register_loaderr'   9   s$         r%   c                       e Zd ZdZ	 	 	 	 	 	 ddedee         deee                  dedee         d	ed
efdZd Z	d Z
d Zd ZdS )TransformersConverterz/Converts models from Hugging Face Transformers.NFmodel_name_or_pathactivation_scales
copy_filesload_as_float16revisionlow_cpu_mem_usagetrust_remote_codec                 h    || _         || _        || _        || _        || _        || _        || _        dS )a  Initializes the converter.

        Arguments:
          model_name_or_path: Name of the pretrained model to download, or path to the
            directory containing the pretrained model.
          activation_scales: Path to the pre-computed activation scales. Models may
            use them to rescale some weights to smooth the intermediate activations
            and improve the quantization accuracy. See
            https://github.com/mit-han-lab/smoothquant.
          copy_files: List of filenames to copy from the Hugging Face model to the
            converted model directory.
          load_as_float16: Load the model weights as float16. More precisely, the model
            will be loaded with ``from_pretrained(..., dtype=torch.float16)``.
          revision: Revision of the model to download from the Hugging Face Hub.
          low_cpu_mem_usage: Enable the flag ``low_cpu_mem_usage`` when loading the model
            with ``from_pretrained``.
          trust_remote_code: Allow converting models using custom code.
        N)_model_name_or_path_activation_scales_copy_files_load_as_float16	_revision_low_cpu_mem_usage_trust_remote_code)selfr*   r+   r,   r-   r.   r/   r0   s           r#   __init__zTransformersConverter.__init__F   sA    8 $6 "3% /!"3"3r%   c                 0   t          j                    5  t          j                            | j        | j                  }|j        j        }t          
                    |          }|Mt          d|dd                    t          t                                                              d          t          t          |j                  }t          j        }d| j        rt           j        n!t          |dd           pt          |dd           i}| j        r
| j        |d<   | j        r
| j        |d	<   | j        r
| j        |d
<    | j        || j        fi |}i }| j        r
| j        |d
<    | j        || j        fi |}	 |||	          }
| j        r1t          j        | j        d          }|                    |
|           | j        r2| j        D ]*}|
                    |                     |                     +|
cd d d            S # 1 swxY w Y   d S )N)r0   z8No conversion is registered for the model configuration z  (supported configurations are: , )dtypetorch_dtyper.   r/   r0   cpu)map_location)torchno_gradtransformers
AutoConfigfrom_pretrainedr2   r8   	__class____name__r    get
ValueErrorjoinsortedkeysgetattrarchitecture_nameAutoTokenizerr5   float16r6   r7   
load_modelload_tokenizerr3   loadsmooth_activationr4   register_fileget_model_file)r9   configr"   loadermodel_classtokenizer_classkwargsmodeltokenizer_kwargs	tokenizerspecr+   filenames                r#   _loadzTransformersConverter._loadj   s   ]__ 8	 8	!,<<(D<S =  F !*3K#''44F~ j #{{DIIf^5H5H5J5J.K.K$L$L$L$LN   ",0HIIK*8O ,<EMM $77 <v}d;;F ~ 4%)^z"& F.2.E*+& F.2.E*+#DOK1ITTVTTE!& P8<8O !45++!9 =M I 6%++D& B$)J+%% % %! ((/@AAA F $ 0 F FH&&t':':8'D'DEEEEq8	 8	 8	 8	 8	 8	 8	 8	 8	 8	 8	 8	 8	 8	 8	 8	 8	 8	s   G*HHHc                      |j         |fi |S r   rF   )r9   rZ   r*   r\   s       r#   rR   z TransformersConverter.load_model   s    *{*+=HHHHHr%   c                      |j         |fi |S r   rd   )r9   r[   r*   r\   s       r#   rS   z$TransformersConverter.load_tokenizer   s    ../ALLVLLLr%   c                    t           j                            | j                  r&t           j                            | j        |          }n9	 t          j        | j        |          }n# t
          j        j        $ r d }Y nw xY w|t           j        	                    |          st          d|d| j                  |S )N)repo_idra   zFile z does not exist in model )ospathisdirr2   rK   huggingface_hubhf_hub_downloadr   EntryNotFoundErrorisfilerJ   )r9   ra   ri   s      r#   rW   z$TransformersConverter.get_model_file   s    7==122 	7<< 8(CCDD&6 4x   #(;    <rw~~d33<*88T557  
 s   A( (B B)NNFNFF)rH   
__module____qualname____doc__strr   r   boolr:   rb   rR   rS   rW   r&   r%   r#   r)   r)   C   s        99
 ,0*. %"&"'"'"4 "4"4 $C="4 T#Y'	"4
 "4 3-"4  "4  "4 "4 "4 "4H9 9 9vI I IM M M    r%   r)   c                       e Zd ZdZed             Zej        d             Zd Z	d Z
d Zd Zd Zej        j        fd	Zd
 Zd Zd Zd ZdS )ModelLoaderzRBase class for loading Transformers models into a CTranslate2 model specification.c                     d S r   r&   r9   s    r#   rO   zModelLoader.architecture_name   s    tr%   c                     t                      r   NotImplementedErrorr9   r]   s     r#   get_model_speczModelLoader.get_model_spec   s    !###r%   c                     |                      |          }|                     |j        ||           |                     ||          }|                     ||           |S r   )r|   
set_configrX   get_vocabularyset_vocabulary)r9   r]   r_   r`   tokenss        r#   __call__zModelLoader.__call__   s]    ""5))UI666$$UI66D&)))r%   c                     d t          |                                                                d           D             S )Nc                     g | ]\  }}|S r&   r&   ).0token_s      r#   
<listcomp>z.ModelLoader.get_vocabulary.<locals>.<listcomp>   s,     
 
 
q 
 
 
r%   c                     | d         S N   r&   )items    r#   <lambda>z,ModelLoader.get_vocabulary.<locals>.<lambda>   s
    Q r%   )key)rL   	get_vocabitemsr9   r]   r_   s      r#   r   zModelLoader.get_vocabulary   sS    
 
"##%%++--3G3G  
 
 
 	
r%   c                     d S r   r&   r9   r`   r   s      r#   r   zModelLoader.set_vocabulary       r%   c                     d S r   r&   r9   rX   r]   r_   s       r#   r~   zModelLoader.set_config   r   r%   c                 6    |j         |_        |j        |_        d S r   weightgammabiasbetar9   r`   modules      r#   set_layer_normzModelLoader.set_layer_norm   s    ]
K			r%   c                 T   |t           j        j        k    r|j        |_        n$|j        |_        |j        |_        |j        |_        t          |t          j                  r |j                            dd          |_        t          |d          r|j        |j        |_        d S d S d S )Nr   r   r   )r   QuantizationCT2r   qweightscalesweight_scaleqzerosweight_zero
isinstancerD   Conv1D	transposehasattrr   )r9   r`   r   
quant_types       r#   
set_linearzModelLoader.set_linear   s    1555 -DKK .DK &D%}Dfl122 	6+//155DK66"" 	$v{'>DIII	$ 	$'>'>r%   c                     |j         |_         d S r   )r   r   s      r#   set_embeddingszModelLoader.set_embeddings   s    mr%   c                 x    |j         |_        t          |dd          }|dk    r|j        |d          |_        d S d S )Noffsetr   r   	encodingsrN   r9   r`   r   r   s       r#   set_position_encodingsz"ModelLoader.set_position_encodings   sB    1--A::!^FGG4DNNN :r%   c                      t          d          )Nz7No activation smoothing logic is defined for this modelry   )r9   r`   r+   s      r#   rU   zModelLoader.smooth_activation   s    !E
 
 	
r%   c           	         t          |dd           }|r|                    d          p|                    d          }|dk    rd }n[t                              |          }|?t          d|dd                    t                                                              |                    dd	          }|                    d
|          }nd }d	}t          |d
|          }|||fS )Nrope_scalingtype	rope_typedefaultRoPE scaling type 'T' is not yet implemented. The following RoPE scaling types are currently supported: r<   factorr   
rope_theta)rN   rI   _SUPPORTED_ROPE_SCALINGrz   rK   rM   )r9   rX   default_rope_thetar   r   rotary_scaling_typerotary_scaling_factorr   s           r#   get_rotary_paramszModelLoader.get_rotary_params   s   v~t<< 	K$((00QL4D4D[4Q4QII%%&*##&=&A&A)&L&L#&.-- %99dii0G0L0L0N0N&O&O&OQ  
 %1$4$4Xq$A$A!%)),8JKKJJ"&$%! 7IJJJ"$9:EEr%   N)rH   ro   rp   rq   propertyrO   abcabstractmethodr|   r   r   r   r~   r   r   r   r   r   r   r   rU   r   r&   r%   r#   ru   ru      s        \\  X 	$ $ $  
 
 
          3>2J2N $ $ $ $$ $ $5 5 5
 
 

F F F F Fr%   ru   
BartConfigc                   `     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
dd	Zd
 Z xZS )
BartLoaderc                     dS )NBartForConditionalGenerationr&   rw   s    r#   rO   zBartLoader.architecture_name  s    --r%   c                    t           j                            |j        j        |j        j        f|j        j        |j        j        t          |j        j	                 t          |j        dd                    }|                     |j        |j        j                   |                     |j        |j        j                   |                     |j        j        |j                   t          |dd           }|M|                                                                dk    r#|                                |j        j        _        |S )Nnormalize_embeddingTpre_norm
activationlayernorm_embeddingfinal_logits_biasr   )r
   TransformerSpecfrom_configrX   encoder_layersdecoder_layersencoder_attention_headsnormalize_before_SUPPORTED_ACTIVATIONSactivation_functionrN   set_encoderencoderr]   set_decoderdecoderr   
projectionlm_headnonzeronumelsqueezer   )r9   r]   r`   r   s       r#   r|   zBartLoader.get_model_spec  s   /;;\(%,*EFL0\2-el.NO '6KT R R < 
 
 	u{':;;;u{':;;;/???#E+>EE(->-F-F-H-H-N-N-P-PTU-U-U+<+D+D+F+FDL#(r%   c                     t                                          ||          }|j        j        t	          |          k     r|d |j        j                 }|S r   )superr   rX   
vocab_sizelenr9   r]   r_   r   rG   s       r#   r   zBartLoader.get_vocabulary2  sL    ''y99<"S[[005el556Fr%   c                 Z    |                     |           |                    |           d S r   register_source_vocabularyregister_target_vocabularyr   s      r#   r   zBartLoader.set_vocabulary8  0    ''///''/////r%   c                     |j         |_         |j        |_        |j        |_        |                    |j        j                  |_        d S r   )	bos_token	eos_token	unk_tokenconvert_ids_to_tokensrX   decoder_start_token_iddecoder_start_tokenr   s       r#   r~   zBartLoader.set_config<  sI    $.$.$.%.%D%DL/&
 &
"""r%   c                    |                      ||           t          |j        |j                  D ]\  }}|                     |j        |j        d           |                     |j        j        |j	                   | 
                    |j        j        |j                   | 
                    |j        j        |j                   |                     |j        j        |j                   d S NTself_attention)set_common_layersziplayerlayersset_attentionr   	self_attnr   
layer_normself_attn_layer_normr   ffnlinear_0fc1linear_1fc2final_layer_norm)r9   r`   r   
layer_specr   s        r#   r   zBartLoader.set_encoderD  s    tW---!$TZ!@!@ 	S 	SJ)#    
 )4*  
 OOJN3UY???OOJN3UY???
 95;QRRRR	S 	Sr%   c                    |                      ||           t          |j        |j                  D ]\  }}|                     |j        |j        d           |                     |j        j        |j	                   t          |d          rG|                     |j        |j        d           |                     |j        j        |j                   |                     |j        j        |j                   |                     |j        j        |j                   |                     |j        j        |j                   d S )NTr   encoder_attnF)r   r   r   r   r   r   r   r   r   r  r   	attentionr
  encoder_attn_layer_normr   r  r  r  r  r  r  )r9   r`   r   r  r   s        r#   r   zBartLoader.set_decoderV  s^   tW---!$TZ!@!@ 	S 	SJ)#    
 )4*  
 un-- 	""(&#( #   
 ##(31  
 OOJN3UY???OOJN3UY???
 95;QRRRR1	S 	Sr%   Fc                 0   d t          d          D             }|                     |d         |j                   |                     |d         |j                   |                     |d         |j                   |r!t          j        |j        d         |           nPt          j        |j        d         |d d                    t          j        |j        d         |dd                     |                     |j        d         |j                   d S )Nc                 4    g | ]}t          j                    S r&   r   
LinearSpecr   r   s     r#   r   z,BartLoader.set_attention.<locals>.<listcomp>t  !    CCCQ.00CCCr%      r   r      )	ranger   q_projk_projv_projr   fuse_linearr   out_projr9   r`   r  r   split_layerss        r#   r   zBartLoader.set_attentions  s    CC%((CCCQ)9:::Q)9:::Q)9::: 	@dk!nl;;;;dk!nl2A2.>???dk!nl122.>???B);<<<<<r%   c                 8   dd l }t          |d          s.|j        j        r|                    |j        j                  nd}n|j        }||_        |                     |j	        |j
                   |                     t          |j        t                    r|j        d         n|j        |j                   t          |d          r |                     |j        |j                   t          |d          r"|                     |j        |j                   d S d S )Nr   embed_scale      ?r   r   )mathr   rX   scale_embeddingsqrtd_modelr  scale_embeddingsr   position_encodingsembed_positionsr   r   
embeddingslistembed_tokensr   r   r   )r9   r`   r   r!  r  s        r#   r   zBartLoader.set_common_layers  s1   v}-- 	- =0		&-/000 K !,K +##D$;V=STTT dot44%""_	
 	
 	
 6<(( 	D1BCCC6011 	V 8&:TUUUUU	V 	Vr%   F)rH   ro   rp   r   rO   r|   r   r   r~   r   r   r   r   __classcell__rG   s   @r#   r   r     s        . . X.  &    0 0 0
 
 
S S S$S S S:= = = =V V V V V V Vr%   r   MarianConfigc                   T     e Zd Zed             Z fdZd Z fdZ fdZd Z	 xZ
S )MarianMTLoaderc                     dS )NMarianMTModelr&   rw   s    r#   rO   z MarianMTLoader.architecture_name  s    r%   c                     d|j         _        d|j         _        t                                          |          }|                     |           |S NF)rX   r   r   r   r|   _remove_pad_weights)r9   r]   r`   rG   s      r#   r|   zMarianMTLoader.get_model_spec  sG    (-%+0(ww%%e,,  &&&r%   c                 N    |j         |_         |j        |_        |j         |_        d S r   )r   r   r   r   s       r#   r~   zMarianMTLoader.set_config  s+    $.$. &/%8"""r%   c                 Z    d|_         t                                          ||           d S NT)start_from_zero_embeddingr   r   r9   r`   r   rG   s      r#   r   zMarianMTLoader.set_decoder  s+    )-&D'*****r%   c                     t                                          ||          }|d         dk    r|                                 |S )Nr  z<pad>)r   r   popr   s       r#   r   zMarianMTLoader.get_vocabulary  s?     ''y99":  JJLLLr%   c                    |j         j        d         |j        j        |j        j        g}|d         j        j        d         dz
  }|D ]}|j        j        d         |dz   k    r|j        d d         |_        t          |t          j                  rA|	                                r-|j
        j        d         |dz   k    r|j
        d d         |_
        d S )Nr   r   r  )r   r(  r   r   r   shaper   r   r  has_biasr   )r9   r`   vocab_specsnew_vocab_size
vocab_specs        r#   r5  z"MarianMTLoader._remove_pad_weights  s    L#A&L#L#
 %Q.4Q7!;% 	7 	7J &q)^a-???$.$5crc$:
!:{'=>>7''))7 O)!,0BBB",/#2#"6
	7 	7r%   )rH   ro   rp   r   rO   r|   r~   r   r   r5  r,  r-  s   @r#   r0  r0    s          X    9 9 9+ + + + +    7 7 7 7 7 7 7r%   r0  M2M100Configc                   D     e Zd Zed             Z fdZd Z fdZ xZS )M2M100Loaderc                     dS )NM2M100ForConditionalGenerationr&   rw   s    r#   rO   zM2M100Loader.architecture_name  s    //r%   c                 v    d|j         _        d|j         _        t                                          |          S )NTF)rX   r   r   r   r|   )r9   r]   rG   s     r#   r|   zM2M100Loader.get_model_spec  s/    (,%+0(ww%%e,,,r%   c                 8    |j         |j        d          |_        d S r   )weightsr   r   r   s      r#   r   z#M2M100Loader.set_position_encodings  s    8r%   c                    t                                          ||          }|d         |j        k    r-|                    |j        |                                           |j                            dg           D ]}||vr|                    |           t          |d|j
        j        t          |          z
            }|dk    r|d t          |          D             z  }|S )Nr  additional_special_tokensnum_madeup_wordsr   c                     g | ]}d |z  S )zmadeupword%dr&   r   is     r#   r   z/M2M100Loader.get_vocabulary.<locals>.<listcomp>  s    KKKa~)KKKr%   )r   r   r   insertunk_token_idr<  special_tokens_maprI   appendrN   rX   r   r   r  )r9   r]   r_   r   r   rM  rG   s         r#   r   zM2M100Loader.get_vocabulary  s    ''y99 ":,,,MM)0&**,,???1556QSUVV 	% 	%EF""e$$$")5<+BS[[+P
 
 aKK59I3J3JKKKKFr%   )	rH   ro   rp   r   rO   r|   r   r   r,  r-  s   @r#   rE  rE    sy        0 0 X0- - - - -
9 9 9        r%   rE  MBartConfigc                   *    e Zd Zed             Zd ZdS )MBartLoaderc                     dS )NMBartForConditionalGenerationr&   rw   s    r#   rO   zMBartLoader.architecture_name  s    ..r%   c                     |j         |_         |j        |_        |j        |_        t          |j        dd           dv r	d |_        d S |j        |_        d S )Nr[   )MBartTokenizerN)r   r   r   rN   rX   r   r   s       r#   r~   zMBartLoader.set_config  s_    $.$.$. 5<!2D99=UUU)-F&&&)2)<F&&&r%   NrH   ro   rp   r   rO   r~   r&   r%   r#   rW  rW    s<        / / X/	= 	= 	= 	= 	=r%   rW  PegasusConfigc                   *    e Zd Zed             Zd ZdS )PegasusLoaderc                     dS )NPegasusForConditionalGenerationr&   rw   s    r#   rO   zPegasusLoader.architecture_name      00r%   c                 f    |j         |_        |j        |_        |j        |_        |j         |_        d S r   )	pad_tokenr   r   r   r   r   s       r#   r~   zPegasusLoader.set_config  s4    $.$.$.%.%8"""r%   Nr\  r&   r%   r#   r_  r_  	  s<        1 1 X19 9 9 9 9r%   r_  	OPTConfigc                   \     e Zd Zed             Zd Zd Zd Zd Z fdZ	d Z
 fdZ xZS )		OPTLoaderc                     dS )NOPTForCausalLMr&   rw   s    r#   rO   zOPTLoader.architecture_name      r%   c                 x   t           j                            |j        j        |j        j        |j        j        t          |j        j                 |j        j	        |j        j
        k              }|                     |j        |j        j                   |                     |j        j        |j                   |S )N)r   r   project_in_out)r
   TransformerDecoderModelSpecr   rX   num_hidden_layersnum_attention_headsdo_layer_norm_beforer   r   word_embed_proj_dimhidden_sizer   r   r]   r   r   r   r9   r]   r`   s      r#   r|   zOPTLoader.get_model_spec  s    ;GGL*L,\6-el.NO <;u|?WW H 
 
 	u{':;;;/???r%   c                 &   t          |j        j                  D ]v\  }}d|z  }t          j        |j        j        |j        j        d         |d|z                      t          j        |j        j        |j        j	        |d|z                      wd S )Nzmodel.decoder.layers.%dr   z%s.self_attn.q_projz%s.fc1)
	enumerater   r   r   rU   r   r   r   r  r  )r9   r`   r+   rP  r   layer_scopes         r#   rU   zOPTLoader.smooth_activation)  s    !$,"455 	 	HAu3a7K#$/$+A.!"7+"EF   #	$	"!(["89   	 	r%   c                 0    |                     |           d S r   register_vocabularyr   s      r#   r   zOPTLoader.set_vocabulary9        (((((r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r   r   r   r   s       r#   r~   zOPTLoader.set_config<  )    $.$.$.r%   c                 :   t                                          ||           |j         |                     |j        |j                   |j         |                     |j        |j                   |j        "|                     |j        |j                   d S d S r   )r   r   
project_inr   project_outr  r   r   r:  s      r#   r   zOPTLoader.set_decoderA  s    D'***)OODOW-?@@@*OOD,g.ABBB#/1IJJJJJ 0/r%   c                     d|_         |                     |j        |j                   |                     |j        |j                   d S r4  )r%  r   r&  r'  r   r(  r*  r   s      r#   r   zOPTLoader.set_common_layersK  sG     %##D$;V=STTTDOV-@AAAAAr%   c                    t                                          ||          }d}t          |          dz  dk    rId                    |          }||vr|                    |           |dz  }t          |          dz  dk    I|S )Nr      zmadeupword{:04d}r   )r   r   r   formatrT  )r9   r]   r_   r   rP  symbolrG   s         r#   r   zOPTLoader.get_vocabularyP  s    ''y99&kkAo""'..q11FV##f%%%FA	 &kkAo"" r%   )rH   ro   rp   r   rO   r|   rU   r   r~   r   r   r   r,  r-  s   @r#   rg  rg    s            X      ) ) )/ / /
K K K K KB B B

 
 
 
 
 
 
 
 
r%   rg  GPTBigCodeConfigc                   L     e Zd Zed             Zd Zd Z fdZd Zd Z	 xZ
S )GPTBigCodeMHALoaderc                     dS )NGPTBigCodeForCausalLMr&   rw   s    r#   rO   z%GPTBigCodeMHALoader.architecture_name_  s    &&r%   c                 *   t           j                            |j        j        |j        j        dt          |j        j                 d          }|                     |j	        |j
                   |                     |j	        j        |j                   |S )NT)r   r   multi_query_attentionr
   rm  r   rX   n_layern_headr   r   r   r   transformerr   r   r   rs  s      r#   r|   z"GPTBigCodeMHALoader.get_model_specc  s    ;GGL L-el.NO"& H 
 
 	u'8999/???r%   c                 0    |                     |           d S r   rx  r   s      r#   r   z"GPTBigCodeMHALoader.set_vocabularyp  rz  r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S Nz<extra_id_%d>r   r   rX   r   r   r  rT  r9   r]   r_   r   	extra_idsrP  rG   s         r#   r   z"GPTBigCodeMHALoader.get_vocabularys  g    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r|  r   s       r#   r~   zGPTBigCodeMHALoader.set_config|  r}  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   |                     |j        |j	                   t          |j        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S NFr   r   r%  r   r(  wter   r&  wper   r   ln_fr   r   hr   ln_1r   r   attnc_attnc_projr  ln_2r  mlpc_fcr  r9   r`   r   r  r   s        r#   r   zGPTBigCodeMHALoader.set_decoder  J    %DOVZ888##D$;VZHHHDOV[999!$TZ!:!: 	G 	GJ
 9 DejQQQOOJ5<Q?ARSSSOOJ5<Q?ARSSS
 95:FFFOOJN3UY^DDDOOJN3UY5EFFFF	G 	Gr%   )rH   ro   rp   r   rO   r|   r   r   r~   r   r,  r-  s   @r#   r  r  ]  s        ' ' X'  ) ) )    / / /
G G G G G G Gr%   r  
GPT2Configc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )
GPT2Loaderc                     dS )NGPT2LMHeadModelr&   rw   s    r#   rO   zGPT2Loader.architecture_name        r%   c                 (   t           j                            |j        j        |j        j        dt          |j        j                           }|                     |j	        |j
                   |                     |j	        j        |j                   |S )NT)r   r   r  rs  s      r#   r|   zGPT2Loader.get_model_spec  s}    ;GGL L-el.NO	 H 
 
 	u'8999/???r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zGPT2Loader.set_vocabulary  rz  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r|  r   s       r#   r~   zGPT2Loader.set_config  r}  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   |                     |j        |j	                   t          |j        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  r  r  s        r#   r   zGPT2Loader.set_decoder  r  r%   N	rH   ro   rp   r   rO   r|   r   r~   r   r&   r%   r#   r  r    sn        ! ! X!
 
 
) ) )/ / /
G G G G Gr%   r  
GPTJConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )
GPTJLoaderc                     dS )NGPTJForCausalLMr&   rw   s    r#   rO   zGPTJLoader.architecture_name  r  r%   c           
      p   t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }| 	                    |j
        |j        |j        j        |j        j                   |                     |j
        j        |j                   |S NTFr   r   
rotary_dimrotary_interleaveparallel_residualshared_layer_norm)r
   rm  r   rX   r  r  r   r   r  r   r   r  r   r   r   rs  s      r#   r|   zGPTJLoader.get_model_spec  s    ;GGL L-el.NO|.#"" H 	
 	
 	LL#L		
 	
 	
 	/???r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zGPTJLoader.set_vocabulary  rz  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r|  r   s       r#   r~   zGPTJLoader.set_config  r}  r%   c                 6   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]5\  }}|                     |j
        |j                   |j        j        j        }|j        j        j        }|j        j        j        }	t#          j        |||          }t#          j        |||          }t'          j        |||	f          |j        j        d         _        |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   7d S r  )r%  r   r(  r  r   r   r  r   r   r  r  r  r  r  r   r  r  r   permute_for_sliced_rotaryrB   catr   r   r   r  r  r  r  fc_inr  fc_out)
r9   r`   r   r  	num_headsr  r   qwkwvws
             r#   r   zGPTJLoader.set_decoder  s]    %DOVZ888DOV[999!$TZ!:!: 	G 	GJ
 <ejIII")B")B")B0Y
KKB0Y
KKB9>BB<9P9PJ%,Q/6OOJ5<Q?ATUUUOOJN3UY_EEEOOJN3UY5EFFFF	G 	Gr%   Nr  r&   r%   r#   r  r    sn        ! ! X!  *) ) )/ / /
G G G G Gr%   r  CodeGenConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )CodeGenLoaderc                     dS )NCodeGenForCausalLMr&   rw   s    r#   rO   zCodeGenLoader.architecture_name      ##r%   c           
         t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }d}t          |j        d          r|j        j
        dv rd}|                     |j        |j        |j        j        |j        j        |j        j        |           |                     |j        j        |j                   |S )	NTFr     head_dim)      r  )mp_num)r
   rm  r   rX   r  r  r   r   r  r   r  r   r   r  n_embdr   r   r   )r9   r]   r`   r  s       r#   r|   zCodeGenLoader.get_model_spec  s    ;GGL L-el.NO|.#"" H 	
 	
 5<,, 	1F*1T1T FLL#LL 	 	
 	
 	
 	/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zCodeGenLoader.get_vocabulary  sg    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zCodeGenLoader.set_vocabulary   rz  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r|  r   s       r#   r~   zCodeGenLoader.set_config#  r}  r%   c                 6   d|_         |                     |j        |j                   |                     |j        |j                   t          j        d|dz            	                    dd          j
                                                                        }||z  t          j        fd|D                       }t          |j        |j                  D ]:\  }	}
|                     |	j        |
j                   |
j        j        j        }||d d f         }|                    dd          \  }}}t1          j        |||          }t1          j        |||          }t          j        |||f          |	j        j        d         _        |                     |	j        j        d         |
j        j                   |                     |	j        j        |
j         j!                   |                     |	j        j"        |
j         j#                   <d S )NFr   r  r  c                 L    g | ] }t          j        |z  |d z   z            !S )r   )rB   arange)r   rP  	local_dims     r#   r   z-CodeGenLoader.set_decoder.<locals>.<listcomp>0  s2    XXX!U\!i-!a%9)<==XXXr%   dimr   )$r%  r   r(  r  r   r   r  npr  reshapeTflattentolistrB   r  r   r   r  r  r  r  qkv_projr   chunkr   r  r   r   r   r  r  r  r  r  r  r  )r9   r`   r   r  r  	embed_dimr  base_permutationpermutationr  r   r  new_qkv_projr  r  r  r  s                   @r#   r   zCodeGenLoader.set_decoder(  s    %DOVZ888DOV[9999Q
33;;BBBDLLNNUUWW'	iXXXXGWXXX
 
 "%TZ!:!: 	G 	GJ
 <ejIII z*1H $KN3L%++A1+55JBB 0Y
KKB0Y
KKB9>BB<9P9PJ%,Q/6OOJ5<Q?ATUUUOOJN3UY_EEEOOJN3UY5EFFFF-	G 	Gr%   rH   ro   rp   r   rO   r|   r   r   r~   r   r,  r-  s   @r#   r  r    s        $ $ X$  <    ) ) )/ / /
!G !G !G !G !G !G !Gr%   r  GPTNeoXConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )GPTNeoXLoaderc                     dS )NGPTNeoXForCausalLMr&   rw   s    r#   rO   zGPTNeoXLoader.architecture_nameN  r  r%   c                    t           j                            |j        j        |j        j        dt          |j        j                 t          |j        j	        |j        j
        |j        j        z  z            d|j        j        d          }|                     |j        |j        |j        j                   |                     |j        j        |j                   |S r  )r
   rm  r   rX   rn  ro  r   
hidden_actint
rotary_pctrr  use_parallel_residualr   r   gpt_neoxr   r   	embed_outrs  s      r#   r|   zGPTNeoXLoader.get_model_specR  s    ;GGL*L,-el.EF'<+u|/OOQ  $#l@# H 
 
 	u~u|7WXXX/AAAr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zGPTNeoXLoader.get_vocabularye  r  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zGPTNeoXLoader.set_vocabularyn  rz  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r|  r   s       r#   r~   zGPTNeoXLoader.set_configq  r}  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]	\  }}t          |d          rA|                     |j        |j                   |                     |j        |j                   nJ|                     |j        j        |j                   |                     |j        j        |j                   |j        j        j        }|j        j        j        }|                    |dd|j        d                                       dd                              d|j        d                   }|                    |dd                              dd                              d          }||j        j        d         _        ||j        j        d         _        |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S )NFinput_layer_normr  r  r   r   ) r%  r   r(  embed_inr   r   r  r   r   r   r   r  input_layernormpost_attention_layer_normpost_attention_layernormr   r  r  query_key_valuer   r   r  r>  swapaxesr   r   denser  r  dense_h_to_4hr  dense_4h_to_h)r9   r`   r   r  r  r   qkv_wqkv_bs           r#   r   zGPTNeoXLoader.set_decoderv  s3    %DOV_===DOV-DEEE!$TZ!?!? 	N 	NJz#566 ##J$?AVWWW##8%:X    ##-8%:O   ##N-u/M   O3:EO38E iBB@@!QU[_-- 
 MM)Q33<<QBBJJ2NNE9>J%,Q/67<J%,Q/4OOJ5<Q?AVWWWOOJN3UY5LMMMOOJN3UY5LMMMM=	N 	Nr%   r  r-  s   @r#   r  r  L  s        $ $ X$  &    ) ) )/ / /
#N #N #N #N #N #N #Nr%   r  WhisperConfigc                   l     e Zd Zed             Zd Zd Zd Z fdZd Z	 fdZ
 fdZd	 Zd
 Z xZS )WhisperLoaderc                     dS )NWhisperForConditionalGenerationr&   rw   s    r#   rO   zWhisperLoader.architecture_name  rb  r%   c                 b   t          j        |j        j        |j        j        |j        j        |j        j                  }|                     |j        |j	        j                   | 
                    |j        |j	        j                   |                     |j        j        |j                   |S r   )r   WhisperSpecrX   r   r   r   decoder_attention_headsr   r   r]   r   r   r   r   proj_outrs  s      r#   r|   zWhisperLoader.get_model_spec  s    'L'L0L'L0	
 
 	u{':;;;u{':;;;/@@@r%   c                 V    g dt          dg           }|sg S fd|D             S )N)z<|endoftext|>z<|startoftranscript|>z<|translate|>z<|transcribe|>z<|startoflm|>z<|startofprev|>z<|nocaptions|>z<|notimestamps|>rL  c                 B    g | ]}|v                     |          S r&   )convert_tokens_to_ids)r   r   non_lang_special_tokensr_   s     r#   r   z>WhisperLoader._get_lang_ids_from_tokenizer.<locals>.<listcomp>  s=     
 
 
333 ++E22333r%   )rN   )r9   r_   additional_tokensr  s    ` @r#   _get_lang_ids_from_tokenizerz*WhisperLoader._get_lang_ids_from_tokenizer  sh    	#
 	#
 	#
 $I/JBOO  	I
 
 
 
 
*
 
 
 	
r%   c                    t          |dd           }|p|j        |_        |j        |_        t          |d          r|j        |_        t          |d          r+t          |j        	                                          |_
        nF|j        j        |_        |j        j        |_        t                              |j                  |_        t          |dd           |                     |          |_
        |j        _|j        j        }|j        j        }t%          t'          j        t+          |dz  |          t+          |                              |_        d S d S )Ngeneration_configalignment_heads
lang_to_idlang_idsr  )rN   suppress_tokenssuppress_idsbegin_suppress_tokenssuppress_ids_beginr   r  rL   r  valuesr  rX   _WHISPER_ALIGNMENT_HEADSrI   name_or_pathr  r   r  r)  	itertoolsproductr  )r9   rX   r]   r_   
gen_config
num_layersr  s          r#   r~   zWhisperLoader.set_config  sD   U$7>>
!","<F(2(HF%z#455 D)3)C&z<00 I"()>)E)E)G)G"H"H"',">F(-(JF%%=%A%A%BT%U%UF"6:t,,4"??	JJFO!)4J<I%)!*/:66)$$ & &F"""	 *)r%   c           	          t                                          ||          }|                    d t          |j        j        t          |          z
            D                        |S )Nc              3   &   K   | ]}d |dz  z  V  dS )z<|%.2f|>g{Gz?Nr&   rO  s     r#   	<genexpr>z/WhisperLoader.get_vocabulary.<locals>.<genexpr>  s?       
 
 !d(#
 
 
 
 
 
r%   )r   r   extendr  rX   r   r   r   s       r#   r   zWhisperLoader.get_vocabulary  so    ''y99 	 
 
5<2S[[@AA
 
 
 	
 	
 	

 r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zWhisperLoader.set_vocabulary  rz  r%   c                     |                      |j        |j                   |                      |j        |j                   t                                          ||           d S r   )
set_conv1dconv1conv2r   r   )r9   r`   r   rG   s      r#   r   zWhisperLoader.set_encoder  sS    
GM222
GM222D'*****r%   c                     |                      |j        |j                   t                                          ||           d S r   )r   r(  r*  r   r   r:  s      r#   r   zWhisperLoader.set_decoder  s>    DOW-ABBBD'*****r%   c                     |                      |j        |j                   |                     |j        |j                   d S r   )r   r&  r'  r   r   r   s      r#   r   zWhisperLoader.set_common_layers  s?    ##D$;V=STTTDOV->?????r%   c                 6    |j         |_         |j        |_        d S r   r   r   r   s      r#   r.  zWhisperLoader.set_conv1d  s    mK			r%   )rH   ro   rp   r   rO   r|   r  r~   r   r   r   r   r   r.  r,  r-  s   @r#   r  r    s        1 1 X1  
 
 
,  :	 	 	 	 	) ) )+ + + + +
+ + + + +@ @ @             r%   r  Wav2Vec2Configc                   d     e Zd Zed             Zd Zd Zd Zd Zd Z	d Z
d Z fd	Zd
 Z xZS )Wav2Vec2Loaderc                     dS )NWav2Vec2ForCTCr&   rw   s    r#   rO   z Wav2Vec2Loader.architecture_name  rj  r%   c                    t          |j        j        dd          }t          j        |j        j        j        |j        j        j        j        |j        j        j        j        |j	        j
        j        d         |          }|j        j        j        D ]M}|j        |_        |j        |_        |j        j        |_        |j        j        |_        |j        j        |_        N|                     |j        ||j        j                   |S Nreturn_hiddenFr   )rN   wav2vec2rX   r   Wav2Vec2Specnum_feat_extract_layersr   rn  ro  r   r   r>  r   r  r   r   r  feed_forwardintermediate_act_fnactivation_fnintermediate_denser  output_denser  r   )r9   r]   r<  r`   r   s        r#   r|   zWav2Vec2Loader.get_model_spec	  s     5NN)N!9N");N")=M &q)
 
 ^+2 	8 	8E#oEO).)9E&"'"4"HE*=EI*7EIIuen.CDDDr%   c                     d S r   r&   r   s       r#   r~   zWav2Vec2Loader.set_config      r%   c                 *    |                                 S r   r   r   s      r#   r   zWav2Vec2Loader.get_vocabulary!      ""$$$r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zWav2Vec2Loader.set_vocabulary$  rz  r%   c                    |j         d         j        j        |j        j        _        |j         d         j        j        |j        j        _        |                     |j        j        |j         d         j                   t          |j        |j         dd                    D ]Q\  }}|j        j        |j        _        |j        j        |j        _        |                     |j        |j                   Rd S )Nr   r   )	conv_layersconvr   feat_layer0r   r   r   r   
feat_layer)r9   r`   feature_extractor
spec_layermodule_layers        r#   set_feature_extractorz$Wav2Vec2Loader.set_feature_extractor'  s    '8'DQ'G'L'S$%6%B1%E%J%O"'):)Fq)I)T	
 	
 	
 ),O.:122>)
 )
 	P 	P$J &2%6%=JO"#/#4#9JO 
 5|7NOOOO	P 	Pr%   c                     |                      |j        |j                   |                     |j        |j                   d S r   r   fp_layer_normr   r   fp_projectionr   r9   r`   feature_projections      r#   set_feature_projectionz%Wav2Vec2Loader.set_feature_projection4  @    D.0B0MNNN*,>,IJJJJJr%   c                 >   |j         j        j        j                                        |j         j        j        _        |j         j        j                                        |j         j        j        _        |j                                         D ] }|j                                        |_        !|                     t          j        dd|j	        f                     |j         j        j        |j         j        _        |j         j        j        |j         j        _        d S r   )
pos_conv_embedrM  r   datafloatr   
parametersrB   randnrr  )r9   r`   r   rX   params        r#   set_pos_conv_embedz!Wav2Vec2Loader.set_pos_conv_embed8  s     "'.399;; 	#*/ 180F0K0P0V0V0X0X#(-+6688 	, 	,E))++EJJu{Aq&2D+EFFGGG*1*@*E*L '(/(>(C(H %%%r%   c                    |                      ||j        j                   |                     ||j        j                   |                     ||j        j        |           t                                          ||j        j                   t          |j        j
        dd          }|s"|                     |j        |j                   d S d S Nr<  F)rS  r=  rP  rZ  rY  rc  r   r   r   rN   rX   r   r   )r9   r`   r]   rX   r<  rG   s        r#   r   zWav2Vec2Loader.set_encoderE  s    ""4)IJJJ##D%.*KLLLen&<fEEED%."8999 5NN 	9OODL%-88888	9 	9r%   c                 F    |                      |j        |j                   d S r   )r   r   r   s      r#   r   z Wav2Vec2Loader.set_common_layersN  s#    DOV->?????r%   )rH   ro   rp   r   rO   r|   r~   r   r   rS  rZ  rc  r   r   r,  r-  s   @r#   r7  r7    s            X   *  % % %) ) )P P PK K KI I I9 9 9 9 9@ @ @ @ @ @ @r%   r7  Wav2Vec2BertConfigc                   j    e Zd Zed             Zd Zd Zd Zd Zd Z		 ddZ
d	 Zd
 Zd Zd Zd ZdS )Wav2Vec2BertLoaderc                     dS )NWav2Vec2BertForCTCr&   rw   s    r#   rO   z$Wav2Vec2BertLoader.architecture_nameT  r  r%   c                    t          |j        j        dd          }t          j        |j        j        j        |j        j        j        |j        j        j	        d         |          }| 
                    |j        |           |S r;  )rN   wav2vec2_bertrX   r   Wav2Vec2BertSpecnum_adapter_layersrn  r   r   r>  r   r   )r9   r]   r<  r`   s       r#   r|   z!Wav2Vec2BertLoader.get_model_specX  su     3 :OUSS 1&9&8M &q)	
 
 	u---r%   c                     d S r   r&   r   s       r#   r~   zWav2Vec2BertLoader.set_configc  rF  r%   c                 *    |                                 S r   rH  r   s      r#   r   z!Wav2Vec2BertLoader.get_vocabularyf  rI  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   z!Wav2Vec2BertLoader.set_vocabularyi  rz  r%   c                     |                      |j        |j                   |                     |j        |j                   d S r   rU  rX  s      r#   rZ  z)Wav2Vec2BertLoader.set_feature_projectionl  r[  r%   Nc                 h   d t          d          D             }|                     |d         |j                   |                     |d         |j                   |                     |d         |j                   t          j        |j        d         |           |                     |j        d         |j                   |s|rk|j	        j
        |_        t          j        d                              |          |_        t          j        d                              |          |_        d S d S )Nc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z4Wav2Vec2BertLoader.set_attention.<locals>.<listcomp>s  r  r%   r  r   r   r  r  int32)r  r   linear_qlinear_klinear_vr   r  r   
linear_outdistance_embeddingr   !relative_asymmetric_position_keysr  r>   r   relative_left_max_positionrelative_right_max_position)r9   r`   r  left_max_positionright_max_positionr  s         r#   r   z Wav2Vec2BertLoader.set_attentionp  s    DC%((CCCQ);<<<Q);<<<Q);<<<$+a.,777B)=>>> 	 2 	5>5Q5XD2.0hw.?.?.D.DEV.W.WD+/1x/@/@/E/E"0 0D,,,	 	r%   c                 <   t          ||          D ]	\  }}|                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j	        |j        j
                   |                     |j        |j        ||           |                     |j        |j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j	        |j        j
                   |                     |j         |j!                   d S r   )"r   r   enc_ffn1_layer_normffn1_layer_normr   enc_ffn1r  ffn1rC  r  rD  r   enc_attnr   enc_attn_layer_normr  enc_conv_layer_normconv_moduler   r.  enc_conv_pointwise_conv1pointwise_conv1enc_conv_depthwise_convdepthwise_convenc_conv_depthwise_layer_normdepthwise_layer_normenc_conv_pointwise_conv2pointwise_conv2enc_ffn2_layer_normffn2_layer_normenc_ffn2ffn2enc_final_layer_normr  )r9   spec_layersr   r  r  slayerr   s          r#   set_wav2vec2bert_encoderz+Wav2Vec2BertLoader.set_wav2vec2bert_encoder  s    !f55 	U 	UMFE :E<QRRROOFO4ej6STTTOOFO4ej6MNNN2CEW    :E<VWWW*E,=,H   OO/1B1R   OO.0A0P   4!6   OO/1B1R    :E<QRRROOFO4ej6STTTOOFO4ej6MNNN ;U=STTTT7	U 	Ur%   c                 ^   t          ||          D ]\  }}|                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j	        |j
                   |                     |j        |j                   |                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r   )r   r   adpt_residual_layer_normresidual_layer_normr.  adpt_residual_convresidual_convadpt_attn_layer_normr  adpt_attn_convself_attn_convr   adpt_attn_layerr   adpt_ffn_layer_normffn_layer_normr   adpt_ffnr  r  rC  r  rD  )r9   r  r   r  r   s        r#   set_wav2vec2bert_adapterz+Wav2Vec2BertLoader.set_wav2vec2bert_adapter  s    f55 
	N 
	NMFE/1J   OOF5u7JKKK ;U=WXXXOOF153GHHHv5uGGG :E<PQQQOOFO4ei6RSSSOOFO4ei6LMMMM
	N 
	Nr%   c                    |                      ||j        j                   |                     |j        |j        j        j        |j        j        j        |j        j        j	                   | 
                    |j        |j        j        j                   t          |j        j        dd          }|s"|                     |j        |j                   d S d S re  )rZ  rm  rY  r  r   r   r   rX   left_max_position_embeddingsright_max_position_embeddingsr  adapter_layersadapterrN   r   r   )r9   r`   r]   r<  s       r#   r   zWav2Vec2BertLoader.set_encoder  s    ##D%*=*PQQQ%%'.&C&D		
 	
 	
 	%%!4!<!C	
 	
 	
   3 :OUSS 	9OODL%-88888	9 	9r%   c                 H    |j         |_         |j        |j        |_        d S d S r   r4  r   s      r#   r.  zWav2Vec2BertLoader.set_conv1d  s(    m;"DIII #"r%   c                 H    |j         |_        |j        |j        |_        d S d S r   r   r   s      r#   r   z!Wav2Vec2BertLoader.set_layer_norm  s(    ]
;"DIII #"r%   )NN)rH   ro   rp   r   rO   r|   r~   r   r   rZ  r   r  r  r   r.  r   r&   r%   r#   ri  ri  R  s        $ $ X$	 	 	  % % %) ) )K K K
 KO    U U U@N N N9 9 9$ $ $
$ $ $ $ $r%   ri  T5Configc                   n     e Zd Zed             Zd Z fdZd Zd ZddZ	d Z
d	 Zd
 ZddZd Z xZS )T5Loaderc                     dS )NT5ForConditionalGenerationr&   rw   s    r#   rO   zT5Loader.architecture_name  s    ++r%   c           	         t           j                            |j        j        |j        j        f|j        j        dt          |j        j                 |j        j	        dd          }| 
                    |j        |j                   | 
                    |j        |j        d           |                     |j        j        |j                   |j        j        r|j        j        dz  |j        _        |S )NT)r   r   ffn_glurelative_attention_biasrms_norm)
is_decoderg      )r
   r   r   rX   r'  num_decoder_layersr  r   dense_act_fnis_gated_act	set_stackr   r   r   r   r   tie_word_embeddingsr$  scale_outputsrs  s      r#   r|   zT5Loader.get_model_spec  s    /;;\$el&EFL"-el.GHL-$( < 
 
 	t|U]333t|U]tDDD/???<+ 	D).)=t)CDL&r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zT5Loader.get_vocabulary  r  r%   c                 Z    |                     |           |                    |           d S r   r   r   s      r#   r   zT5Loader.set_vocabulary  r   r%   c                     |j         |_        |j        |_        |j        |_        t	          |j        d          r&|                    |j        j                  |_        d S |j         |_        d S )Nr   )	rd  r   r   r   r   rX   r   r   r   r   s       r#   r~   zT5Loader.set_config  sq    $.$.$.5<!9:: 	=)2)H)H3* *F&&& *3)<F&&&r%   Fc                    |                      |j        |j                   |                     t	          |j        t                    r|j        d         n|j        |j                   d|_        t          t          |j        |j                            D ]\  }\  }}|                     |j        |j        d                    |dk    r4|j        d         j        }|j        |j        _        |j        |j        _        |r&|                     |j        |j        d                    |                     |j        |j        d                    d S )Nr   Fr   r  )r   r   r  r   r   r(  r)  r*  r%  ru  r   r   blockset_self_attentionr   r  relative_attention_max_distanceset_cross_attentionr  set_ffnr  )r9   r`   r   r  rP  r  r  first_self_attentions           r#   r  zT5Loader.set_stack  sI   DOV-DEEE dot44%""_	
 	
 	
 !&&/DJ0M0M&N&N 	: 	:"A"
E##J$=u{1~NNN1uu'+z!}'C$(@ )A )H )I  O(()=u{1~NNNLLR9999!	: 	:r%   c                    t          |d          rK|                     |j        |j        j                   |                     |j        |j        j                   n%|                     |j        |j        j                   |                     |j        |j        j	                   | 
                    |j        |j                   d S )Nlinear_0_noact)r   r   r  DenseReluDensewi_0r  wi_1wir  wor   r   r   s      r#   r  zT5Loader.set_ffn  s    4)** 	EOODM6+@+EFFFOOD/1F1KLLLLOODM6+@+CDDDv'<'?@@@DOV->?????r%   c                     |                      ||j        d           |                     |j        |j                   d S r   )r   SelfAttentionr   r   r   s      r#   r  zT5Loader.set_self_attention"  sA    4!5dKKKDOV->?????r%   c                 |    |                      ||j                   |                     |j        |j                   d S r   )r   EncDecAttentionr   r   r   s      r#   r  zT5Loader.set_cross_attention&  s<    4!7888DOV->?????r%   c                    d|_         d t          d          D             }|                     |d         |j                   |                     |d         |j                   |                     |d         |j                   |r!t          j        |j        d         |           nPt          j        |j        d         |d d                    t          j        |j        d         |dd                     |                     |j        d         |j	                   |j
        rD|j        j        |_        t          j        d                              |j                  |_        d S d S )	Nr   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z*T5Loader.set_attention.<locals>.<listcomp>-  r  r%   r  r   r   r  r  rv  )queries_scaler  r   qkvr   r  r   ohas_relative_attention_biasr  r   r  r>   r   r  r  s        r#   r   zT5Loader.set_attention*  sG    CC%((CCCQ555Q555Q555 	@dk!nl;;;;dk!nl2A2.>???dk!nl122.>???B5550 	+4+L+SD(358G3D3D3I3I94 4D000	 	r%   c                     |j         |_        d S r   r   r   r9   r`   r   s      r#   r   zT5Loader.set_layer_norm@      &


r%   r+  )rH   ro   rp   r   rO   r|   r   r   r~   r  r  r  r  r   r   r,  r-  s   @r#   r  r    s        , , X,  (    0 0 0	= 	= 	=: : : :>@ @ @@ @ @@ @ @   ,' ' ' ' ' ' 'r%   r  	MT5Configc                   $    e Zd Zed             ZdS )	MT5Loaderc                     dS )NMT5ForConditionalGenerationr&   rw   s    r#   rO   zMT5Loader.architecture_nameF  s    ,,r%   N)rH   ro   rp   r   rO   r&   r%   r#   r  r  D  s-        - - X- - -r%   r  BloomConfigc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )BloomLoaderc                     dS )NBloomForCausalLMr&   rw   s    r#   rO   zBloomLoader.architecture_nameM      !!r%   c           	      "   t           j                            |j        j        |j        j        dt          j        j        ddd          }| 	                    |j
        |j                   |                     |j
        j        |j                   |S )NT)r   r   r   alibialibi_use_positive_positions)r
   rm  r   rX   r  r  r   
ActivationGELUTanhr   r   r  r   r   r   rs  s      r#   r|   zBloomLoader.get_model_specQ  s    ;GGL L"-6 $)- H 
 
 	u'8999/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zBloomLoader.get_vocabulary`  r  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zBloomLoader.set_vocabularyi  rz  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r|  r   s       r#   r~   zBloomLoader.set_configl  r}  r%   c                 *   d|_         |                     |j        |j                   |                     |j        |j                   |                     |j        |j                   t          |j
        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j        |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  )r%  r   r(  word_embeddingsr   r   word_embeddings_layernormr   r  r   r   r  r   r  set_qkv_linearr   r  r  r   r  r  r  r  r  r  r  r  r  s        r#   r   zBloomLoader.set_decoderq  s    %DOV-CDDDD4f6VWWWDOV[999!$TZ!:!: 	N 	NJ)4e6K   )03$4$.  
 OO)03U5I5O   )5+I   OOJN3UY5LMMMOOJN3UY5LMMMM#	N 	Nr%   c                 v   |j         }|                    |dd|j        d                   }|                    dd          }|                    d|j        d                   }|j        }|                    |dd          }|                    dd          }|                    d          }||_         ||_        d S )Nr  r  r   r   )r   r  r>  r   r   )r9   r`   r   r  r   r   s         r#   r  zBloomLoader.set_qkv_linear  s    	1b&,r2BCC!!!Q''FL$455{||Iq"--~~a##||B			r%   )rH   ro   rp   r   rO   r|   r   r   r~   r   r  r,  r-  s   @r#   r  r  K  s        " " X"      ) ) )/ / /
N N N2      r%   r  	MPTConfigc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )	MPTLoaderc                     dS NAutoModelForCausalLMr&   rw   s    r#   rO   zMPTLoader.architecture_name      %%r%   c                     t           j                            |j        j        |j        j        dt          j        j        d          }| 	                    |j
        |j                   |S )NT)r   r   r  )r
   rm  r   rX   n_layersn_headsr   r  GELUr   r   r  rs  s      r#   r|   zMPTLoader.get_model_spec  sa    ;GGL!L "-2 H 
 
 	u'8999r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zMPTLoader.get_vocabulary  r  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zMPTLoader.set_vocabulary  rz  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r|  r   s       r#   r~   zMPTLoader.set_config  r}  r%   c                     |                      |j        |j                   |                     |j        |j                   d|_        |j        j        |j        _        t          |j
        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  )r   r(  r  r   r   norm_fr%  r   r   r   r   blocksr   norm_1r   r   r  Wqkvr  r  norm_2r  up_projr  	down_projr  s        r#   r   zMPTLoader.set_decoder  s>   DOVZ888DOV];;; %!%!7!$TZ!?!? 	J 	JJ
 9 DelSSSOOJ5<Q?QQQOOJ5<Q?ATUUU
 95<HHHOOJN3UY5FGGGOOJN3UY5HIIII	J 	Jr%   c                 Z    |j         |_        t          j        |j                  |_        d S r   )r   r   rB   
zeros_liker   r   s      r#   r   zMPTLoader.set_layer_norm  s#    ]
$TZ00			r%   )rH   ro   rp   r   rO   r|   r   r   r~   r   r   r,  r-  s   @r#   r  r    s        & & X&
 
 
    ) ) )/ / /
J J J 1 1 1 1 1 1 1r%   r  GemmaConfigc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )GemmaLoaderc                     dS )NGemmaForCausalLMr&   rw   s    r#   rO   zGemmaLoader.architecture_name  r  r%   c                 @   |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }t          j                            |||dk    rt          j        j	        nt          j        j
        dddddt          |j         dd	          ||j         j        
          }|                     |j        |j                   |                     |j        j        |j                   |j         j        dz  |j        j        _        |S )Nnum_key_value_headshidden_activationr   r   Tr   Fr   '  )	r   r   r  r  r  r  rotary_basenum_heads_kvr        ?rX   rn  ro  rN   r
   rm  r   r   r  r  r  r  r   r   r]   r   r   r   rr  r(  multiply_by_sqrt_depthr9   r]   r'  r  r  activation_configr`   s          r#   r|   zGemmaLoader.get_model_spec  s   \3
L4	u|-BINN9$$L#L-/B
 
  ;GG %.. &++ +4#lEBB%\* H 
 
$ 	u{333/???9>9QSV9V6r%   c                 :   t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |j        j        t	          |          k     r|d |j        j                 }|S r  r  r  s         r#   r   zGemmaLoader.get_vocabulary      ''y99L+c&kk9	y!! 	/ 	/AMM/A-....<"S[[005el556Fr%   c                 0    |                     |           d S r   rx  r   s      r#   r   zGemmaLoader.set_vocabulary  rz  r%   c                 p    |j         |_         |j        |_        |j        |_        |j        j        |_        d S r   r   r   r   rX   rms_norm_epslayer_norm_epsilonr   s       r#   r~   zGemmaLoader.set_config  6    $.$.$.$)L$=!!!r%   c                 ,    |j         |_        d|_        d S r8  r   r   layer_norm_use_residualr  s      r#   r   zGemmaLoader.set_layer_norm      &
'+$$$r%   c                    d|_         d|_        |                     |j        |j                   |                     |j        |j                   t          |j	        |j
                  D ]\  }}|                     |j        j        |j                   |                     |j        j        |j                   |j        j        j        }|j        j        j        }|j        j        j        }|j        j        j        }t+          j        |||g          |j        j        d         _        ||j        j        d         _        |                     |j        j        |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   tA          |d           tA          |d           tC          j"                     d S NTFr   r   r   r  )#r%  r9  r   r(  r*  r   r   normr   r   r   r   r  r  r  r   r  r   r  r  o_projrB   r  r   r   r  r  	gate_projr  r  r  r  delattrgccollect	r9   r`   r   r  r   wqwkwvr  s	            r#   r   zGemmaLoader.set_decoder  s    $).&DOV-@AAADOV[999!$TZ!?!? 	 	J)4e6K   )5+I   '.B'.B'.B'.B9>BB<9P9PJ%,Q/69;J%,Q/6OOJN3UY5HIIIOOJN959;LMMMOOJN3UY5HIIIE;'''E5!!!JLLLL-	 	r%   rH   ro   rp   r   rO   r|   r   r   r~   r   r   r,  r-  s   @r#   r  r    s        " " X"! ! !F	 	 	 	 	) ) )> > >, , ,      r%   r  Gemma2Configc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )Gemma2Loaderc                     dS )NGemma2ForCausalLMr&   rw   s    r#   rO   zGemma2Loader.architecture_name3      ""r%   c                 B   |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }t          j                            |||dk    rt          j        j	        nt          j        j
        dddddt          |j         dd	          ||j         j        d
          }|                     |j        |j                   |                     |j        j        |j                   |j         j        dz  |j        j        _        |S )Nr  r  r   r   Tr   Fr   r  )
r   r   r  r  r  r  r  r  r  pre_post_layer_normr  r  r  s          r#   r|   zGemma2Loader.get_model_spec7  s!   \3
L4	u|-BINN9$$L#L-/B
 
  ;GG %.. &++ +4#lEBB%\* $! H 
 
& 	u{333/???9>9QSV9V6r%   c                 :   t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |j        j        t	          |          k     r|d |j        j                 }|S r  r  r  s         r#   r   zGemma2Loader.get_vocabulary[  r   r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zGemma2Loader.set_vocabularyf  rz  r%   c                 p    |j         |_         |j        |_        |j        |_        |j        j        |_        d S r   r#  r   s       r#   r~   zGemma2Loader.set_configi  r&  r%   c                 ,    |j         |_        d|_        d S r8  r(  r  s      r#   r   zGemma2Loader.set_layer_normo  r*  r%   c                 V   d|_         d|_        |                     |j        |j                   |                     |j        |j                   t          |j	        |j
                  D ]\  }}|                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |j        j        j        }|j        j        j        }|j        j        j        }|j        j        j        }t3          j        |||g          |j        j        d         _        ||j        j        d         _        |                     |j        j        |j         j!                   |                     |j        j"        |j         j#                   |                     |j        j$        |j         j%                   tM          |d           tM          |d           tO          j(                     d S r,  ))r%  r9  r   r(  r*  r   r   r-  r   r   r   r  r  r   r  pre_feedforward_layer_normpre_feedforward_layernormpost_feedforward_layer_normpost_feedforward_layernormr   r  r   r  r  r.  rB   r  r   r   r   r  r  r  r/  r  r  r  r  r0  r1  r2  r3  s	            r#   r   zGemma2Loader.set_decoders  s    $).&DOV-@AAADOV[999!$TZ!?!? 	 	J
 ;U=RSSS4e6T   5u7V   68X   '.B'.B'.B'.B9>BB<9P9PJ%,Q/69;J%,Q/6OOJN3UY5HIIIOOJN959;LMMMOOJN3UY5HIIIE;'''E5!!!JLLLL;	 	r%   r7  r-  s   @r#   r:  r:  1  s        # # X#" " "H	 	 	 	 	) ) )> > >, , ,# # # # # # #r%   r:  LlamaConfigc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )LlamaLoaderc                     dS )NLlamaForCausalLMr&   rw   s    r#   rO   zLlamaLoader.architecture_name  r  r%   c                    |j         j        }|j         j        }t          |j         d|          }||k    rd }|                     |j         d          \  }}}t          |j         dd           }|rd }	|j        dk    rt                              |j                  }	|	Dt          d|j        dd
                    t                                                              |j        }
|j        }nt          j        j        }	d }
d }t"          j                            ||t          j        j        dddd	d
|||||	|
|          }|                     |j        |j        |	           |                     |j        j        |j                   t          |j         dd           }|t8          j        j        k    r3|j        j        D ]&}|d         |j         _!        |d         |j         _"        '|S )Nr  r  quantization_configawqQuantization type 'T' is not yet implemented. The following Quantization types are currently supported: r<   Tr   Fr   r   r  r  r  r  r   r   r  r  r   quant_group_size
quant_bitsr   low_freq_factorhigh_freq_factor)#rX   rn  ro  rN   r   quant_method_SUPPORTED_QUANTIZATIONrI   versionrz   rK   rM   
group_sizebitsr   r   r   r
   rm  r   r  SWISHr   r   r]   r   r   r   r   RotaryScalingTypeLlama3r   r   rotary_low_freq_factorrotary_high_freq_factor)r9   r]   r'  r  r  r   r   r   rO  r   rT  rU  r`   r   r   s                  r#   r|   zLlamaLoader.get_model_spec  s   \3
L4	u|-BINN9$$LAEAWAWL&B
 B
>2J &el4I4PP 	J"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3# 3"7"%!-! H 
 
$ 	u{J???/??? u|^TBB."B"III+  >J%?$; @L&@$<< r%   c                 :   t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |j        j        t	          |          k     r|d |j        j                 }|S r  r  r  s         r#   r   zLlamaLoader.get_vocabulary  r   r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zLlamaLoader.set_vocabulary  rz  r%   c                     |j         |_         |j        |_        |j        |j        nd|_        |j        j        |_        d S N r#  r   s       r#   r~   zLlamaLoader.set_config  sH    $.$.#,#6#BI 	 %*L$=!!!r%   c                     |j         |_        d S r   r  r  s      r#   r   zLlamaLoader.set_layer_norm  r  r%   c                 T   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]D\  }}|                     |j
        j        |j                   |                     |j        j        |j                   d t          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t(          j        j        k    r&t/          j        |j
        j        d         |           n?|t(          j        j        k    rdnd}t/          j        |j
        j        d         ||           |                     |j
        j        d         |j        j        |           |                     |j        j        |j        j        |           |                     |j        j         |j        j!        |           |                     |j        j"        |j        j#        |           tI          |d           tI          |d	           tK          j&                     Fd S )
NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z+LlamaLoader.set_decoder.<locals>.<listcomp>  !    GGGK244GGGr%   r  r   r   r   r  r   r  'r%  r   r(  r*  r   r   r-  r   r   r   r   r  r  r  r  r   r   r  r  r  r   r   r   r   r  r   AWQ_GEMMfuse_linear_prequantr.  r  r  r/  r  r  r  r  r0  r1  r2  r9   r`   r   r   r  r   r  cc_dims           r#   r   zLlamaLoader.set_decoder  s    %DOV-@AAADOV[999!$TZ!?!? ,	 ,	J)4e6K   )5+I   HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLLY,	 ,	r%   rH   ro   rp   r   rO   r|   r   r   r~   r   r   r   r   r   r,  r-  s   @r#   rK  rK    s        " " X"@ @ @D	 	 	 	 	) ) )> > >' ' ' 4?3K3O 1 1 1 1 1 1 1 1r%   rK  Gemma3TextConfigGemma3Configc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )Gemma3Loaderc                     dS )NGemma3ForCausalLMr&   rw   s    r#   rO   zGemma3Loader.architecture_name1  r=  r%   c                    |j         j        }|j         j        }t          |j         d|          }||k    rd }|j         j        }t          |j         dd          }t          |j         dd          }t          |j         dd          }t          |j         dd	          }	t          |j         d
d           }
t          |j         dd           }|rR|j        dk    rt                              |j                  }|t          d|j        z            |j
        }|j        }nt          j        j        }d }d }t          j                            |||dk    rt          j        j        nt          j        j        ddd|d||||	d|||d          }|
| _        t-          |
          D ]\  }}|j        j        |         }|dk    rct3          j        d                              |          |j        _        t3          j        d                              d          |j        _        |dk    rbt3          j        d                              |          |j        _        t3          j        d                              |	          |j        _        |                     |j        |j         |           | !                    |j        j"        |j#                   |S )Nr  r  r   r   @B rope_local_base_freqr  sliding_windowi   layer_typesrO  rP  z.Quantization type '%s' is not yet implemented.r   TF)r   r   r  r  r  r  r  r  r  r{  r?  r   rT  rU  qk_normfull_attentionfloat32rv  r   sliding_attention)$rX   rn  ro  rN   r  rX  rY  rI   rZ  rz   r[  r\  r   r   r   r
   rm  r   r  r  r  _layer_typesru  r   r   r  r>   r   r   r  r{  r   r]   r   r   r   )r9   r]   r'  r  r  r  r  r   rz  r{  r|  rO  r   rT  rU  r`   rP  
layer_typer   s                      r#   r|   zGemma3Loader.get_model_spec5  s   \3
L4	u|-BINN9$$L<(#L-/B
 

 U\<CC
&L0& 
  

 !/?FFelM4@@%el4I4PP 	"/5884889L9TUU
!)D)67    3=,1JJ$15J#J  ;GG %.. &++ +4#,%) $!-!+ H 
 
2 ( '{33 	 	MAzL&q)E---358I3F3F3K3KJ3W3W$068hw6G6G6L6LQ6O6O$33222358I3F3F3K3K(4 4$0 79hw6G6G6L6L"7 7$3 	u{J???/???r%   c                 :   t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |j        j        t	          |          k     r|d |j        j                 }|S r  r  r  s         r#   r   zGemma3Loader.get_vocabulary  r   r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zGemma3Loader.set_vocabulary  rz  r%   c                     |j         |_         |j        |_        t          |d          r<t          |j        t
                    r"|j                                        r	d|_        d S |j        |_        d S )Nchat_templatez<end_of_turn>)r   r   r   r   r  rr   stripr   r   s       r#   r~   zGemma3Loader.set_config  s{    $.$. I//	392C88	3 '--//	3
  /F(2Fr%   c                 ,    |j         |_        d|_        d S r8  r(  r  s      r#   r   zGemma3Loader.set_layer_norm  r*  r%   c                 v   d|_         d|_        |                     |j        |j                   |                     |j        |j                   t          |j	        |j
                  D ]\  }}|                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d t/          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t8          j        j        k    r&t?          j         |j        j!        d         |           n?|t8          j        j"        k    rdnd}t?          j#        |j        j!        d         ||           |                     |j        j!        d         |j        j$        |           |                     |j%        j&        |j'        j(        |           |                     |j%        j)        |j'        j*        |           |                     |j%        j+        |j'        j,        |           t[          |d	           t[          |d
           t]          j/                     d S )NTFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z,Gemma3Loader.set_decoder.<locals>.<listcomp>  rj  r%   r  r   rk  r   r  r   r  )0r%  r9  r   r(  r*  r   r   r-  r   r   r   r  r  r   r  rE  rF  rG  rH  r   q_normr   k_normr  r   r  r  r  r   r   r   r   r  r   rm  rn  r.  r  r  r  r/  r  r  r  r  r0  r1  r2  ro  s           r#   r   zGemma3Loader.set_decoder  sV    $).&DOV-@AAADOV[999!$TZ!?!? >	 >	J
 ;U=RSSS4e6T   5u7V   68X  
 )0%/2H   )0%/2H  
 HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLL}>	 >	r%   rq  r-  s   @r#   ru  ru  .  s         # # X#S S Sj	 	 	 	 	) ) )3 3 3, , , 4?3K3O D D D D D D D Dr%   ru  MistralConfigc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )MistralLoaderc                     dS )NMistralForCausalLMr&   rw   s    r#   rO   zMistralLoader.architecture_name  r  r%   c                 T   |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }|                     |j         d          \  }}}t          |j         dd           }	|	r|	j        dk    rt                              |	j                  }
|
Dt          d|	j        dd	
                    t                                                              |	j        }|	j        }nt          j        j        }
d }d }t"          j                            ||t          j        j        d
d
d
dd||||||
|||j         j                  }|                     |j        |j        |
           |                     |j        j        |j                   |S )Nr  r{  r   r  rO  rP  rQ  rR  r<   TF)r   r   r  r  r  r  r   r   r  r  r{  r   rT  rU  r  rk  )rX   rn  ro  rN   r   rX  rY  rI   rZ  rz   rK   rM   r[  r\  r   r   r   r
   rm  r   r  r]  r  r   r   r]   r   r   r   )r9   r]   r'  r  r  r{  r   r   r   rO  r   rT  rU  r`   s                 r#   r|   zMistralLoader.get_model_spec  s   \3
L4	u|-BINN9$$L /?CCAEAWAWL&B
 B
>2J &el4I4PP 	"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3# 3"7"%)!-!\*# H 
 
( 	u{zJJJ/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zMistralLoader.get_vocabulary0  r  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zMistralLoader.set_vocabulary9  rz  r%   c                 p    |j         |_         |j        |_        |j        |_        |j        j        |_        d S r   r#  r   s       r#   r~   zMistralLoader.set_config<  r&  r%   c                     |j         |_        d S r   r  r  s      r#   r   zMistralLoader.set_layer_normB  r  r%   c                 T   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]D\  }}|                     |j
        j        |j                   |                     |j        j        |j                   d t          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t(          j        j        k    r&t/          j        |j
        j        d         |           n?|t(          j        j        k    rdnd}t/          j        |j
        j        d         ||           |                     |j
        j        d         |j        j        |           |                     |j        j        |j        j        |           |                     |j        j         |j        j!        |           |                     |j        j"        |j        j#        |           tI          |d           tI          |d	           tK          j&                     Fd S )
NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-MistralLoader.set_decoder.<locals>.<listcomp>Q  rj  r%   r  r   rk  r   r  r   r  rl  ro  s           r#   r   zMistralLoader.set_decoderE  s    %DOV-@AAADOV[999!$TZ!?!? +	 +	J)4e6K   )5+I   HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLLW+	 +	r%   rq  r-  s   @r#   r  r    s        $ $ X$8 8 8t    ) ) )> > >' ' ' 4?3K3O 0 0 0 0 0 0 0 0r%   r  Qwen2Configc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )Qwen2Loaderc                     dS )NQwen2ForCausalLMr&   rw   s    r#   rO   zQwen2Loader.architecture_namez  r  r%   c                    |j         j        }|j         j        }t          |j         d|          }||k    rd }|                     |j         d          \  }}}t          |j         dd           }|rd }	|j        dk    rt                              |j                  }	|	Dt          d|j        dd
                    t                                                              |j        }
|j        }nt          j        j        }	d }
d }t"          j                            ||t          j        j        dddd	d
|||||	|
|          }|                     |j        |j        |	           |                     |j        j        |j                   |S )Nr  r  rO  rP  rQ  rR  r<   Tr   FrS  )rX   rn  ro  rN   r   rX  rY  rI   rZ  rz   rK   rM   r[  r\  r   r   r   r
   rm  r   r  r]  r   r   r]   r   r   r   )r9   r]   r'  r  r  r   r   r   rO  r   rT  rU  r`   s                r#   r|   zQwen2Loader.get_model_spec~  s   \3
L4	u|-BINN9$$LAEAWAWL&B
 B
>2J
 &el4I4PP 	J"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3# 3"7"%!-! H 
 
$ 	u{J???/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zQwen2Loader.get_vocabulary  sg    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zQwen2Loader.set_vocabulary  rz  r%   c                     |j         |j         n|j        |_         |j        |_        |j        |j        nd|_        |j        j        |_        d S re  r   rd  r   r   rX   r$  r%  r   s       r#   r~   zQwen2Loader.set_config  c     ". $ 	
 %.#,#6#BI 	 %*L$=!!!r%   c                     |j         |_        d S r   r  r  s      r#   r   zQwen2Loader.set_layer_norm  r  r%   c                 T   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]D\  }}|                     |j
        j        |j                   |                     |j        j        |j                   d t          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t(          j        j        k    r&t/          j        |j
        j        d         |           n?|t(          j        j        k    rdnd}t/          j        |j
        j        d         ||           |                     |j
        j        d         |j        j        |           |                     |j        j        |j        j        |           |                     |j        j         |j        j!        |           |                     |j        j"        |j        j#        |           tI          |d           tI          |d	           tK          j&                     Fd S )
NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z+Qwen2Loader.set_decoder.<locals>.<listcomp>  rj  r%   r  r   rk  r   r  r   r  rl  ro  s           r#   r   zQwen2Loader.set_decoder  s    %DOV-@AAADOV[999!$TZ!?!? -	 -	J)4e6K   )5+I   HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLL[-	 -	r%   rq  r-  s   @r#   r  r  x  s        " " X"6 6 6p    ) ) )
> 
> 
>' ' ' 4?3K3O 2 2 2 2 2 2 2 2r%   r  Qwen3Configc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )Qwen3Loaderc                     dS )NQwen3ForCausalLMr&   rw   s    r#   rO   zQwen3Loader.architecture_name	  r  r%   c                 p   |j         j        }|j         j        }t          |j         d|          }t          |j         d|j         j        |z            }||k    rd }|                     |j         d          \  }}}t          |j         dd           }	|	rd }
|	j        dk    rt                              |	j	                  }
|
Dt          d|	j        dd                    t                                                              |	j        }|	j        }nt          j        j        }
d }d }t$          j                            ||t          j        j        d	d	d	|j         j        d
|||||d	|
||          }|                     |j        |j        |
           |                     |j        j        |j                   |S )Nr  r  ry  rO  rP  rQ  rR  r<   TF)r   r   r  r  r  r  r   r   r  r  r  r}  r   rT  rU  )rX   rn  ro  rN   rr  r   rX  rY  rI   rZ  rz   rK   rM   r[  r\  r   r   r   r
   rm  r   r  r]  r  r   r   r]   r   r   r   )r9   r]   r'  r  r  r  r   r   r   rO  r   rT  rU  r`   s                 r#   r|   zQwen3Loader.get_model_spec	  s   \3
L4	u|-BINNL*el&>)&K
 
 9$$LAEAWAWL)B
 B
>2J &el4I4PP 	J"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3|,# 3"7"%!-!# H 
 
( 	u{J???/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zQwen3Loader.get_vocabularyG	  sg    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zQwen3Loader.set_vocabularyN	  rz  r%   c                     |j         |j         n|j        |_         |j        |_        |j        |j        nd|_        |j        j        |_        d S re  r  r   s       r#   r~   zQwen3Loader.set_configQ	  r  r%   c                     |j         |_        d S r   r  r  s      r#   r   zQwen3Loader.set_layer_norm]	  r  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   t          t          |j	        |j
                            D ]\  }\  }}|                     |j        j        |j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d t%          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t.          j        j        k    r&t5          j        |j        j        d         |           n?|t.          j        j        k    rdnd}t5          j        |j        j        d         ||           |                     |j        j        d         |j        j        |           |                     |j        j         |j!        j"        |           |                     |j        j#        |j!        j$        |           |                     |j        j%        |j!        j&        |           tO          |d           tO          |d	           tQ          j)                     d S )
NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z+Qwen3Loader.set_decoder.<locals>.<listcomp>t	  rj  r%   r  r   rk  r   r  r   r  )*r%  r   r(  r*  r   r   r-  ru  r   r   r   r   r  r  r  r  r   r  r  r   r  r  r  r   r   r   r   r  r   rm  rn  r.  r  r  r/  r  r  r  r  r0  r1  r2  )	r9   r`   r   r   	layer_idxr  r   r  rp  s	            r#   r   zQwen3Loader.set_decoder`	  s    %DOV-@AAADOV[999.7DJ8V8V.W.W 4	 4	*I*
E)4e6K   )5+I   )0%/2H   )0%/2H   HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLLi4	 4	r%   rq  r-  s   @r#   r  r  	  s        " " X": : :x    ) ) )
> 
> 
>' ' ' 4?3K3O 9 9 9 9 9 9 9 9r%   r  MixFormerSequentialConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )MixFormerSequentialLoaderc                     dS r  r&   rw   s    r#   rO   z+MixFormerSequentialLoader.architecture_name	  r  r%   c           
      Z   t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }| 	                    |j
        |j                   |                     |j
        j        |j        d         j                   |S )NTFr'  r  r   r   r  r  r  r  r  )r
   rm  r   rX   r  r  r   r   r  r   r   r   r   r   r   rs  s      r#   r|   z(MixFormerSequentialLoader.get_model_spec	  s    ;GG|+l)-el.NO|.#"" H 	
 	
 	u|444/b1A1HIIIr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   z(MixFormerSequentialLoader.get_vocabulary	  r  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   z(MixFormerSequentialLoader.set_vocabulary	  rz  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r|  r   s       r#   r~   z$MixFormerSequentialLoader.set_config	  r}  r%   c                    d|_         |                     |j        |d         j                   |                     |j        |d         j                   t          |j        |dd                   D ]\  }}|                     |j	        |j                   | 
                    |j        j        d         |j        j                   | 
                    |j        j        d         |j        j                   | 
                    |j        j        |j        j                   | 
                    |j        j        |j        j                   d S )NFr   r  r   )r%  r   r(  r  r   r   lnr   r   r  r   r   r   mixerr	  r  r  r  r  r  r  r  r  s        r#   r   z%MixFormerSequentialLoader.set_decoder	  s    %DOVAY];;;DOVBZ];;;!$TZ"!>!> 	D 	DJ
 <ehGGGOOJ5<Q?AQRRROOJ5<Q?AUVVVOOJN3UY]CCCOOJN3UY]CCCC	D 	Dr%   r  r-  s   @r#   r  r  	  s        & & X&       ) ) )/ / /

D 
D 
D 
D 
D 
D 
Dr%   r  	PhiConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )	PhiLoaderc                     dS r  r&   rw   s    r#   rO   zPhiLoader.architecture_name	  r  r%   c           
         t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }| 	                    |j
        |j                   |                     |j
        j        |j        j                   |                     |j
        j        |j        j                   |S )NTFr  )r
   rm  r   rX   r  r  r   r   r  r   r   r  r   r   r   r   r   r   r  rs  s      r#   r|   zPhiLoader.get_model_spec	  s    ;GG|+l)-el.NO|.#"" H 	
 	
 	u'8999/1EFFFDL3U]5EFFFr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zPhiLoader.get_vocabulary	  r  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zPhiLoader.set_vocabulary	  rz  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r|  r   s       r#   r~   zPhiLoader.set_config	  r}  r%   c                 F   d|_         |                     |j        |j        j                   t          |j        |j                  D ]\  }}|                     |j	        |j
                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  )r%  r   r(  embdr  r   r   r  r   r  r  r   r   r   r  r	  r  r  r  r  r  r  r  r  s        r#   r   zPhiLoader.set_decoder	  s     %DOV[_===!$TZ!:!: 	D 	DJ
 <ehGGGOOJ5<Q?AQRRROOJ5<Q?AUVVVOOJN3UY]CCCOOJN3UY]CCCC	D 	Dr%   r  r-  s   @r#   r  r  	  s        & & X&  "    ) ) )/ / /
	D 	D 	D 	D 	D 	D 	Dr%   r  
Phi3Configc                   p     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
ej        j        fdZ xZS )	
Phi3Loaderc                     dS r  r&   rw   s    r#   rO   zPhi3Loader.architecture_name
  r  r%   c                    |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }t          |j         dd          }t          |j         dd           }|r~t                              |d                   }|                    dd          }	|Et          d	|d         d
d                    t                                                              nd }d}	t          |j         dd           }
|
rd }|
j	        dk    rt                              |
j                  }|Dt          d|
j	        dd                    t                                                              |
j        }|
j        }nt          j        j        }d }d }t"          j                            ||t          j        j        ddddd||	t          |j         dd          ||||||          }|                     |j        |j        |           |                     |j        j        |j                   |S )Nr   original_max_position_embeddingsr   max_position_embeddingsr   r   r   r   r   r   r<   rO  rP  rQ  rR  TFr   r  )r   r   r  r  r  r  r   r   r  r  r  r  r   rT  rU  )rX   rn  ro  rN   r   rI   rz   rK   rM   rX  rY  rZ  r[  r\  r   r   r   r
   rm  r   r  r]  r   r   r]   r   r   r   )r9   r]   r'  r  r  r  r  r   r   r   rO  r   rT  rU  r`   s                  r#   r|   zPhi3Loader.get_model_spec

  s|   \3
L4	u|-BINN9$$L+2L<a,
 ,
( #*%,8QST"U"Uu|^TBB 	&"9"="=l6>R"S"S$0$4$4Xq$A$A!"*)) $F+++TYY7N7S7S7U7U-V-V-VX   + #'$%! &el4I4PP 	J"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3# 3"7lEBB-M$;%!-!# H 
 
( 	u{J???/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zPhi3Loader.get_vocabularyS
  r  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zPhi3Loader.set_vocabulary\
  rz  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r|  r   s       r#   r~   zPhi3Loader.set_config_
  r}  r%   c                     |j         |_        d S r   r  r  s      r#   r   zPhi3Loader.set_layer_normd
  r  r%   c                     t          j        |t           j                  |_        t          j        |t           j                  |_        d S )N)r>   )rB   tensorr  rotary_scaling_long_factorrotary_scaling_short_factor)r9   r`   r  r  s       r#   set_rotary_embeddingsz Phi3Loader.set_rotary_embeddingsg
  sL     +0,&em+
 +
 +
' ,1<'u},
 ,
 ,
(((r%   c                 .   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]\  }}|                     |j
        j        |j                   |                     |j        j        |j                   |                     |j
        j        d         |j        j        |           |                     |j
        j        d         |j        j        |           |j        j        j        K|j        j        j        :|                     |j
        |j        j        j        |j        j        j                   |t.          j        j        k    rL|j        j        j                            dd          \  }}||j        j        _        ||j        j        _        n|j        j        j                             dd          \  }}	|j        j        j!                            dd          \  }
}|j        j        j"                            dd          \  }}||j        j        _        |
|j        j        _#        ||j        j        _$        |	|j        j        _        ||j        j        _#        ||j        j        _$        |                     |j        j%        |j        j&        |           tO          |d           tO          |d           tQ          j)                     d S )	NFr   rk  r   r  r  r   r  )*r%  r   r(  r*  r   r   r-  r   r   r   r   r  r  r  r   r   r   r  r.  
rotary_emblong_factorshort_factorr  r   r   r   r  gate_up_projr   r  r  r  r   r   r   r   r   r  r  r0  r1  r2  )r9   r`   r   r   r  r   r/  r  gate_qweight
up_qweightgate_scales	up_scalesgate_qzeros	up_qzeross                 r#   r   zPhi3Loader.set_decoderq
  s    %DOV-@AAADOV[999!$TZ!?!? 7	 7	J)4e6K   )5+I   OO)03(%    
 OO)03&%     *6BO.;G**-O.:O.;   [5999%*Y%;%B%H%HPQ%H%R%R"	71:
'.7>
-44 ,19+A+I+O+O1 ,P , ,(j */)?)F)L)LQTU)L)V)V&Y).)?)F)L)LQTU)L)V)V&Y1=
'.7B
'46A
'37A
-4=F
-:<E
-9OO')<     E;'''E5!!!JLLLLo7	 7	r%   )rH   ro   rp   r   rO   r|   r   r   r~   r   r  r   r   r   r   r,  r-  s   @r#   r  r  
  s        & & X&G G GR    ) ) )/ / /
' ' '
 
 
 4?3K3O < < < < < < < <r%   r  RWConfigc                   Z     e Zd Zed             Zd Zd Z fdZd Zd Z	d Z
d
d	Z xZS )RWLoaderc                     dS r  r&   rw   s    r#   rO   zRWLoader.architecture_name
  r  r%   c                     |j         j        | _        |j         j        | _        t          |j         dd           | _        d| _        d S )N	n_head_kvnum_kv)rX   r  _num_layersr  
_num_headsrN   _num_heads_kv_num_kv_attrr{   s     r#   get_falcon_speczRWLoader.get_falcon_spec
  s?     </,-$U\;EE$r%   c                    |                      |           t          |j        dd          rd}n| j        }t          j                            | j        | j        dt          j
        j        |j        j        dd|j        j        rdnd d|j        j        |dk    |          }|                     |j        |j                   |                     |j        j        |j                   |S )Nmulti_queryFr   Tr   )
r   r   r  r  scale_alibir  r  r  r  r  )r  rN   rX   r  r
   rm  r   r  r  r   r  r  r  rotaryparallel_attnr   r   r  r   r   r   )r9   r]   r  r`   s       r#   r|   zRWLoader.get_model_spec
  s    U###5<66 	.LL-L;GGO"-2,$)-!L/9qqT##l8*a/% H 
 
 	u'8999/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zRWLoader.get_vocabulary
  r  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zRWLoader.set_vocabulary
  rz  r%   c                 N    |j         |_        |j         |_         |j         |_        d S r   )r   r   r   r   s       r#   r~   zRWLoader.set_config
  r}  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]\  }}t          |d          rA|                     |j        |j                   |                     |j        |j                   n{t          |d          r!|                     |j        |j                   nJ|                     |j        j        |j                   |                     |j        j        |j                   t)          |j        | j                  }|dk    r1|                     |j        j        d         |j        j                   nN|                     |j        j        d         |j        j        |j        j        ||j        j        k     r|nd            |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                    d S )NFln_attnr  r   r   )!r%  r   r(  r  r   r   r  r   r   r  r   r  r  r   ln_mlpr  r  r   r  r  rN   r  r   r   r  r  r  r  r  r  r  r  r  )r9   r`   r   r  r   r  s         r#   r   zRWLoader.set_decoder
  sL    %DOV-CDDDDOV[999!$TZ!:!: !	N !	NJui(( ##J$?OOO##J$H%,WWWW%899 ##J$@%BWXXXX##-8%:O   ##N-u/M   U143DEEF{{-4Q7(8   
 ##-4Q7(8(2$u';'EEEFF4	   OO)03U5I5O   OOJN3UY5LMMMOOJN3UY5LMMMMC!	N !	Nr%   Nc                 :   |j         }|[|                    |dd|j        d                   }|                    dd          }|                    d|j        d                   }n|j        d         ||dz  z   z  }|                    d||z  dz   ||j        d                   }|                    ||z  ddgd          \  }}}	t          j        |                    ||z  d          |                    ||z  d          |	                    ||z  d          g          }||_         |j        |j        }
|C|
                    |dd          }
|
                    dd          }
|
                    d          }
n|
                    d||z  dz   |          }
|
                    ||z  ddgd          \  }}}	t          j        |                    ||z            |                    ||z            |	                    ||z            g          }
|
|_        d S d S )Nr  r  r   r   r  r  )r   r  r>  r   splitrB   r  r   )r9   r`   r   r  r  r   r  r  r  r  r   s              r#   r  zRWLoader.set_qkv_linear  s)   >^^Iq"fl26FGGF%%a++F^^BR(899FF|A9vz+ABH^^I'!+Xv|B7G F llI$7A#>AlFFGAq!YIIi(2B77IIfx/44IIfx/44 F ;";D~||Iq"55~~a++||B''||B	V(;a(?JJ**i6&91a%@a*HH1ay		)h"677		&8"344		&8"344  DIII% #"r%   r   )rH   ro   rp   r   rO   r  r|   r   r   r~   r   r  r,  r-  s   @r#   r  r  
  s        & & X&% % %  6    ) ) )/ / /
&N &N &NP) ) ) ) ) ) ) )r%   r  FalconConfigc                       e Zd Zd ZdS )FalconLoaderc                     |j         j        | _        |j         j        | _        t          |j         dd           | _        d| _        d S )Nnum_kv_heads)rX   rn  r  ro  r  rN   r  r  r{   s     r#   r  zFalconLoader.get_falcon_spec>  s?     <9,:$U\>4HH*r%   N)rH   ro   rp   r  r&   r%   r#   r   r   <  s#        + + + + +r%   r   DistilBertConfigc                   6    e Zd Zed             Zd Zd Zd ZdS )DistilBertLoaderc                     dS )NDistilBertModelr&   rw   s    r#   rO   z"DistilBertLoader.architecture_nameG  r  r%   c                 &   t          j        |j        j        |j        j        dt
          |j        j                 d          }t          j        |          }d|j        _	        | 
                    |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   t'          |j        j        |j        j                  D ]\  }}d t-          d          D             }|                     |d         |j        j                   |                     |d         |j        j                   |                     |d         |j        j                   t9          j        |j        j        d         |           |                     |j        j        d         |j        j                    |                     |j        j!        |j"                   |                     |j#        j$        |j#        j%                   |                     |j#        j&        |j#        j'                   |                     |j#        j!        |j(                   |S )	NFTr   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z3DistilBertLoader.get_model_spec.<locals>.<listcomp>d  rj  r%   r  r   r  ))r
   TransformerEncoderSpecrX   r  r   r   r   TransformerEncoderModelSpecr   r%  r   r(  r  r   r&  position_embeddingsr   r   	LayerNormr   r   r  r  r   r  q_link_linv_linr   r  r   r   out_linr   sa_layer_normr  r  lin1r  lin2output_layer_normr9   r]   encoder_specr`   r  r   r  s          r#   r|   zDistilBertLoader.get_model_specK  sP   '>L!L -el.EF $
 
 
  ;
 
 ).%L#A&(8(H	
 	
 	
 	##L+U-=-Q	
 	
 	
 	L,e.>.H	
 	
 	
 "%T\%79J9P!Q!Q 	T 	TJGGeAhhGGGLOOLOU_-BCCCOOLOU_-BCCCOOLOU_-BCCCj7>qA<PPPOO)03U_5L   )4e6I   OOJN3UY^DDDOOJN3UY^DDD
 95;RSSSSr%   c                 0    |                     |           d S r   rx  r   s      r#   r   zDistilBertLoader.set_vocabularyw  rz  r%   c                 ,    |j         |_         d|_        d S )Ng-q=)r   r%  r   s       r#   r~   zDistilBertLoader.set_configz  s    $.$)!!!r%   N)rH   ro   rp   r   rO   r|   r   r~   r&   r%   r#   r  r  E  s[        ! ! X!* * *X) ) )* * * * *r%   r  
BertConfigc                   F     e Zd Zed             Zd Z fdZd Zd Z xZ	S )
BertLoaderc                     dS )N	BertModelr&   rw   s    r#   rO   zBertLoader.architecture_name  s    {r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }t          j        |dt          j        j                  }d|j        _        |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |                     |j        |j        j                   t=          |j        j        |j        j                  D ]\  }}d	 tA          d
          D             }|                     |d         |j!        j"        j#                   |                     |d         |j!        j"        j$                   |                     |d         |j!        j"        j%                   tM          j'        |j(        j)        d         |           |                     |j(        j)        d         |j!        j*        j                   |                     |j(        j+        |j!        j*        j                   |                     |j,        j-        |j.        j                   |                     |j,        j/        |j*        j                   |                     |j,        j+        |j*        j                   |S )NabsoluteFTr  r   r   r   num_source_embeddingsembeddings_mergepooling_layerpooling_activationr   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-BertLoader.get_model_spec.<locals>.<listcomp>  rj  r%   r  )0rX   position_embedding_typer
   r
  rn  ro  r   r  r   EmbeddingsMergeADDr  r  Tanhr   r%  r   r(  r  token_type_embeddingsr   r&  r  r   r   r  r   pooler_densepoolerr  r   r   r  r  r9   queryr   valuer   r  r   r   outputr   r  r  intermediater  r  s          r#   r|   zBertLoader.get_model_spec  s   |3zAAAA'>L*L,-el.EF $"#(8<
 
 
  ;*5:
 
 
 ).%L#A&(8(H	
 	
 	
 	L#A&(8(N	
 	
 	
 	##L+U-=-Q	
 	
 	
 	L,e.>.H	
 	
 	
 	)5<+=>>>!$T\%79L!M!M 	S 	SJGGeAhhGGGLOOLOU_-A-GHHHOOLOU_-A-EFFFOOLOU_-A-GHHHj7>qA<PPPOO)03U_5K5Q   )4eo6L6V   OOJN3U5G5MNNNOOJN3U\5GHHH
 95<;QRRRRr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zBertLoader.get_vocabulary  r  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zBertLoader.set_vocabulary  rz  r%   c                 @    |j         |_         |j        j        |_        d S r   r   rX   layer_norm_epsr%  r   s       r#   r~   zBertLoader.set_config       $.$)L$?!!!r%   )
rH   ro   rp   r   rO   r|   r   r   r~   r,  r-  s   @r#   r  r    s          X6 6 6p    ) ) )@ @ @ @ @ @ @r%   r  XLMRobertaConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )XLMRobertaLoaderc                     dS )N#XLMRobertaForSequenceClassificationr&   rw   s    r#   rO   z"XLMRobertaLoader.architecture_name  s    44r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }|j        j        d}nd}t          j        ||t          j        j                  }d|j        _        |                     |j        j        d         |j        j        j                   |                     |j        j        d         |j        j        j                   |                     |j        j        |j        j        j                   |                     |j        j        |j        j        j                   |r*|                     |j        |j        j        j                   t?          |j        j         |j        j        j                   D ]\  }}d	 tC          d
          D             }|                     |d         |j"        j#        j$                   |                     |d         |j"        j#        j%                   |                     |d         |j"        j#        j&                   tO          j(        |j)        j*        d         |           |                     |j)        j*        d         |j"        j+        j                   |                     |j)        j,        |j"        j+        j                   |                     |j-        j.        |j/        j                   |                     |j-        j0        |j+        j                   |                     |j-        j,        |j+        j                   |S )Nr   FTr  r!  r$  r   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z3XLMRobertaLoader.get_model_spec.<locals>.<listcomp>  rj  r%   r  )1rX   r(  r
   r
  rn  ro  r   r  r   r)  r*  robertar.  r  r  r+  r   r%  r   r(  r  r,  r   r&  r  r   r   r  r   r-  r  r   r   r  r  r9   r/  r   r0  r   r  r   r   r1  r   r  r  r2  r  r9   r]   r  r%  r`   r  r   r  s           r#   r|   zXLMRobertaLoader.get_model_spec  s   |3zAAAA'>L*L,-el.EF $"#(8<
 
 
 ='!MM M;'*5:
 
 
 ).%L#A&(@(P	
 	
 	
 	L#A&(@(V	
 	
 	
 	##L+M$8	
 	
 	
 	L,em.F.P	
 	
 	
  	KOOD-u}/C/IJJJ!$T\%79N9T!U!U 	S 	SJGGeAhhGGGLOOLOU_-A-GHHHOOLOU_-A-EFFFOOLOU_-A-GHHHj7>qA<PPPOO)03U_5K5Q   )4eo6L6V   OOJN3U5G5MNNNOOJN3U\5GHHH
 95<;QRRRRr%   c                 0    |                     |           d S r   rx  r   s      r#   r   zXLMRobertaLoader.set_vocabulary  rz  r%   c                 @    |j         |_         |j        j        |_        d S r   r6  r   s       r#   r~   zXLMRobertaLoader.set_config  r8  r%   c                 ~    |j         |_        t          |dd          }|dk    r|j        |dz   d          |_        d S d S Npadding_idxr   r   r   r   s       r#   r   z'XLMRobertaLoader.set_position_encodings  F    22A::!^FQJLL9DNNN :r%   N	rH   ro   rp   r   rO   r|   r   r~   r   r&   r%   r#   r;  r;    sm        5 5 X5< < <|) ) )@ @ @: : : : :r%   r;  RobertaConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )RobertaLoaderc                     dS )NRobertaModelr&   rw   s    r#   rO   zRobertaLoader.architecture_name"  s    ~r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }|j        d}nd}t          j        ||t          j        j                  }d|j        _        |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |r%|                     |j        |j        j                   t=          |j        j        |j        j                  D ]\  }}d	 tA          d
          D             }|                     |d         |j!        j"        j#                   |                     |d         |j!        j"        j$                   |                     |d         |j!        j"        j%                   tM          j'        |j(        j)        d         |           |                     |j(        j)        d         |j!        j*        j                   |                     |j(        j+        |j!        j*        j                   |                     |j,        j-        |j.        j                   |                     |j,        j/        |j*        j                   |                     |j,        j+        |j*        j                   |S )Nr   FTr  r!  r$  r   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z0RobertaLoader.get_model_spec.<locals>.<listcomp>Q  rj  r%   r  0rX   r(  r
   r
  rn  ro  r   r  r   r)  r*  r.  r  r  r+  r   r%  r   r(  r  r,  r   r&  r  r   r   r  r   r-  r  r   r   r  r  r9   r/  r   r0  r   r  r   r   r1  r   r  r  r2  r  rA  s           r#   r|   zRobertaLoader.get_model_spec&     |3zAAAA'>L*L,-el.EF $"#(8<
 
 
 <!MM M;'*5:
 
 
 ).%L#A&(8(H	
 	
 	
 	L#A&(8(N	
 	
 	
 	##L+0	
 	
 	
 	L,e.>.H	
 	
 	
  	COOD-u|/ABBB!$T\%79L!M!M 	S 	SJGGeAhhGGGLOOLOU_-A-GHHHOOLOU_-A-EFFFOOLOU_-A-GHHHj7>qA<PPPOO)03U_5K5Q   )4eo6L6V   OOJN3U5G5MNNNOOJN3U\5GHHH
 95<;QRRRRr%   c                 0    |                     |           d S r   rx  r   s      r#   r   zRobertaLoader.set_vocabularyd  rz  r%   c                 @    |j         |_         |j        j        |_        d S r   r6  r   s       r#   r~   zRobertaLoader.set_configg  r8  r%   c                 ~    |j         |_        t          |dd          }|dk    r|j        |dz   d          |_        d S d S rE  r   r   s       r#   r   z$RobertaLoader.set_position_encodingsk  rG  r%   NrH  r&   r%   r#   rK  rK     sm          X< < <|) ) )@ @ @: : : : :r%   rK  CamembertConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )CamembertLoaderc                     dS )NCamembertModelr&   rw   s    r#   rO   z!CamembertLoader.architecture_namet  rj  r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }|j        d}nd}t          j        ||t          j        j                  }d|j        _        |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |r%|                     |j        |j        j                   t=          |j        j        |j        j                  D ]\  }}d	 tA          d
          D             }|                     |d         |j!        j"        j#                   |                     |d         |j!        j"        j$                   |                     |d         |j!        j"        j%                   tM          j'        |j(        j)        d         |           |                     |j(        j)        d         |j!        j*        j                   |                     |j(        j+        |j!        j*        j                   |                     |j,        j-        |j.        j                   |                     |j,        j/        |j*        j                   |                     |j,        j+        |j*        j                   |S )Nr   FTr  r!  r$  r   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z2CamembertLoader.get_model_spec.<locals>.<listcomp>  rj  r%   r  rP  rA  s           r#   r|   zCamembertLoader.get_model_specx  rQ  r%   c                 0    |                     |           d S r   rx  r   s      r#   r   zCamembertLoader.set_vocabulary  rz  r%   c                 @    |j         |_         |j        j        |_        d S r   r6  r   s       r#   r~   zCamembertLoader.set_config  r8  r%   c                 ~    |j         |_        t          |dd          }|dk    r|j        |dz   d          |_        d S d S rE  r   r   s       r#   r   z&CamembertLoader.set_position_encodings  rG  r%   NrH  r&   r%   r#   rW  rW  r  sm            X < < <|) ) )@ @ @: : : : :r%   rW  c            	      P   t          j        t           j                  } |                     ddd           |                     dd           |                     d	d
d           |                     dd           |                     ddd           |                     ddd           t	          j        |            |                                 }t          |j        |j	        |j
        |j        dv |j        |j        |j                  }|                    |           d S )N)formatter_classz--modelTzaName of the pretrained model to download, or path to a directory containing the pretrained model.)requiredhelpz--activation_scaleszPath to the pre-computed activation scales. Models may use them to rescale some weights to smooth the intermediate activations and improve the quantization accuracy. See https://github.com/mit-han-lab/smoothquant.)rb  z--copy_files+zWList of filenames to copy from the Hugging Face model to the converted model directory.)nargsrb  z
--revisionz<Revision of the model to download from the Hugging Face Hub.z--low_cpu_mem_usage
store_truezNEnable the flag low_cpu_mem_usage when loading the model with from_pretrained.)actionrb  z--trust_remote_codez*Allow converting models using custom code.)rQ   int8_float16)r+   r,   r-   r.   r/   r0   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsr)   r]   r+   r,   quantizationr.   r/   r0   convert_from_args)parserargs	converters      r#   mainrs    s   $ >  F F	     :     	     K     ]    
 9     '''D%
0?)-HH00  I %%%%%r%   __main__))r   r   )r  r   )r     r  r   r  r   r  r  r  r  r  r  ))r  r  rv  rx  ry  rz  )r  ru  )ry  r     ru  r   )ru  ru  )ru  r|  )rw  )r  r  )r  r  r{  r}  )ru  r  )ru  r  )ru     ))r~  r~  r|  r   )r|  r  )r|  r  )r  r  )r  ru  r  r|  	   r   )r  r  )r  r  )r  
   )r  r   )r  r   )r  r  )r  r  )r  r~  )r     r  r  r  r  )
)ru  r  )ru  r  )r  r   )r  r  r  )r  r  r  )r  r|  )r  r  )r  ru  )r  )   r   )r     )r  r     r  )   r   )r  r  )r  r  )   r  r  r  )   r|  )r  r  )r  r     r   )r  r  )r  r  )r  r  )   r  ))   r  r  )r  r  r  r   r  )   r  )	)r     r  r  )r  r  )   r|  )r  r  )r  r  )r  r  )r  r  ))r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  ru  )r  r|  )r  r  )   ru  )   r   )r  r  )   r  )
r  )r  r  )r  r  )r  r  r  r  r  )r  r  )   r   )r  r~  )zopenai/whisper-tiny.enzopenai/whisper-tinyzopenai/whisper-base.enzopenai/whisper-basezopenai/whisper-small.enzopenai/whisper-smallzopenai/whisper-medium.enzopenai/whisper-mediumzopenai/whisper-largezopenai/whisper-large-v2zopenai/whisper-large-v3T5GemmaConfigc                   x    e Zd Zed             Zd Zd Zd Zd Ze	j
        j        fdZe	j
        j        fdZdS )	T5GemmaLoaderc                     dS )NT5GemmaForConditionalGenerationr&   rw   s    r#   rO   zT5GemmaLoader.architecture_name  rb  r%   c                 .    |j         j        dz   |_        d S )Nr   )r   r^  r   r  s      r#   r   zT5GemmaLoader.set_layer_norm  s    &+c1


r%   c                 v   |j         j        }|j         j        }t          |j         dd          }|j        }t          |d|          }||k    rd }t          j        |j        |j        dt          |j	                 dd|j
        dt          |dd          |d||j
                  }|j        }t          |d|          }	|	|k    rd }	t          j        |j        |j        dt          |j	                 ddd|j
        dt          |dd          |dd|	|j
        	          }
t          j        ||
          }|                     |j        |j        j        |           |                     |j        |j        j        |t           j        j                   |                     |j        j        |j        j        j                   |S )
Nr{  i   r  TFr   r  )r   r   r  r  r  r  r  r{  r?  r  r  )r   r   r  r  with_encoder_attentionr  r  r  r{  r?   external_pre_post_encoder_layersr  r  )rX   r   r   rN   ro  r
   r
  rn  r   r  r  TransformerDecoderSpecr   r   r]   r   r   r   r   r   r   r*  )r9   r]   encoder_configdecoder_configr{  encoder_num_headsencoder_num_heads_kvr   decoder_num_headsdecoder_num_heads_kvr   r`   s               r#   r|   zT5GemmaLoader.get_model_spec  s   -- /?FF*>&13D 
  
  #444#' "9,.-n.NO%.#eDD) $-#,
 
 
  +>&13D 
  
  #444#' "9,.-n.NO#'%.#eDD) $-1-#,
 
 
$  /AAu{':NKKKLK$(		
 	
 	
 	/1D1QRRRr%   c                 Z    |                     |           |                    |           d S r   r   r   s      r#   r   zT5GemmaLoader.set_vocabulary  r   r%   c                    |j         |_         |j        |_        |j        |_        t          |j        d          r|j        j        j        |_        n.t          |j        d          r|j        j        |_        nd|_        |j         |_        d S )Nr   r$  gư>)	r   r   r   r   rX   r   r$  r%  r   r   s       r#   r~   zT5GemmaLoader.set_config  s    $.$.$.5<++ 	-(-(<(IF%%U\>22 	-(-(AF%%(,F%%.%8"""r%   c                    d|_         t          |j        t                    r|j        d         n|j        }|                     ||j                   |j        dz  |_        |                     |j	        |j
                   |}t          t          |j        |j                            D ](\  }\  }}	|                     |j        |	j                   |                     |j        |	j                   d t'          d          D             }
|                     |
d         |	j        j        |           |                     |
d         |	j        j        |           |                     |
d         |	j        j        |           t3          j        |j        j        d         |
           |                     |j        j        d         |	j        j        |           |                     |j        |	j                   |                     |j         |	j!                   |                     |j"        j#        |	j$        j%        |           |                     |j"        j&        |	j$        j'        |           |                     |j"        j(        |	j$        j)        |           tU          |	d	           tU          |	d
           tW          j,                     *d S )NTr   r  c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-T5GemmaLoader.set_encoder.<locals>.<listcomp>  !    KKKQ 6 8 8KKKr%   r  rk  r   r  r   r  )-r%  r   r(  r)  r   r*  rr  r  r   r   r-  ru  r   r   r   r  pre_self_attn_layernormr   post_self_attn_layernormr  r   r   r  r  r  r   r  r   r   r.  rE  rF  rG  rH  r  r  r  r/  r  r  r  r  r0  r1  r2  )r9   r`   r   r  r   encoder_emb_specr   rP  r  r   qkv_split_layerss              r#   r   zT5GemmaLoader.set_encoder  s     !% #-T_d"C"CXDOA 	 	,g.BCCC2@2Lc2Q/DOW\:::&/DJ0N0N&O&O 1	 1	"A"
E+U-J   4e6T  
  LK%((KKKOO #U_%;
     OO #U_%;
     OO #U_%;
     j7>qACSTTTOO)03&%     5u7V   68X  
 OO')<     OO-uy/@Z     OO')<    
 E;'''E5!!!JLLLLc1	 1	r%   c                    d|_         d|_        |                     |j        |j                   |j        dz  |j        _        |                     |j        |j	                   t          t          |j        |j                            D ]j\  }\  }}|                     |j        |j                   |                     |j        |j                   d t%          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d	         |j        j        |           t1          j        |j        j        d         |           |                     |j        j        d         |j        j        |           |                     |j        |j                   |                     |j        |j                    |                     |j!        j        d         |j"        j        |           d
 t%          d	          D             }	|                     |	d         |j"        j        |           |                     |	d         |j"        j        |           t1          j        |j!        j        d         |	           |                     |j!        j        d	         |j"        j        |           |                     |j#        |j$                   |                     |j%        |j&                   |                     |j'        j(        |j)        j*        |           |                     |j'        j+        |j)        j,        |           |                     |j'        j-        |j)        j.        |           t_          |d           t_          |d           t_          |d           ta          j1                     ld S )NTFr  c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-T5GemmaLoader.set_decoder.<locals>.<listcomp>:  r  r%   r  r   rk  r   r  c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-T5GemmaLoader.set_decoder.<locals>.<listcomp>^  s!    JJJA{577JJJr%   r   
cross_attnr  )2r%  r9  r   r(  r*  rr  r  r   r   r-  ru  r   r   r   r  r  r   r  r  r   r   r  r  r  r   r  r   r   r.  )external_pre_encoder_attention_layer_normpre_cross_attn_layernorm*external_post_encoder_attention_layer_normpost_cross_attn_layernormr  r  rE  rF  rG  rH  r  r  r  r/  r  r  r  r  r0  r1  r2  )
r9   r`   r   r  r   rP  r  r   r  kv_split_layerss
             r#   r   zT5GemmaLoader.set_decoder&  sD    !%).&DOV-@AAA1?1KS1P.DOV[999&/DJ0N0N&O&O Y	 Y	"A"
E+U-J   4e6T  
  LK%((KKKOO #U_%;
     OO #U_%;
     OO #U_%;
     j7>qACSTTTOO)03&%     D.  
 E/   OO$+A. '%     KJqJJJOOO" '%    
 OO" '%    
 j29!<oNNN OO$+A. '%     5u7V   68X  
 OO')<     OO-uy/@Z     OO')<    
 E;'''E<(((E5!!!JLLLLsY	 Y	r%   N)rH   ro   rp   r   rO   r   r|   r   r~   r   r   r   r   r   r&   r%   r#   r  r    s        1 1 X12 2 2B B BH0 0 09 9 9 9D8P8T? ? ? ?D 8C7O7Sc c c c c cr%   r  )Vr   rh  r1  r$  rh   typingr   r   numpyr  rk   rB   rD   ImportErrorctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r   r	   r
   r   r   r   r  r  r  GELUSigmoidRELUr]  r   r^  LinearSur_  r   r   rm  AWQ_GEMVrY  r    r'   r)   ABCru   r   r0  rE  rW  r_  rg  r  r  r  r  r  r  r7  ri  r  r  r  r  r  r:  rK  ru  r  r  r  r  r  r  r  r   r  r  r;  rK  rW  rs  rH   r"  r  r&   r%   r#   <module>r     s   



  				     				 ! ! ! ! ! ! ! !    	LLL 	 	 	D	 ) ( ( ( ( ( 6 6 6 6 6 6                  "''0&/).$/8(4"'"(#)
 
  .5

*
-.503	   $-$-  
   y y y y yI y y yxWF WF WF WF WF#' WF WF WFt @V @V @V @V @V @V @V @VF   47 47 47 47 47Z 47 47 ! 47n       :   ! B = = = = =* = =  =" !!	9 	9 	9 	9 	9J 	9 	9 "!	9 C C C C C
 C C CL #$$/G /G /G /G /G+ /G /G %$/Gd %G %G %G %G %G %G %G %GP 5G 5G 5G 5G 5G 5G 5G 5Gp !!VG VG VG VG VGK VG VG "!VGr !!LN LN LN LN LNK LN LN "!LN^ !!c  c  c  c  c J c  c  "!c L !""K@ K@ K@ K@ K@Z K@ K@ #"K@\ %&&q$ q$ q$ q$ q$ q$ q$ '&q$h y' y' y' y' y'{ y' y' y'x - - - - - - - - J J J J J+ J J  JZ 41 41 41 41 41 41 41 41n \ \ \ \ \+ \ \  \~   d d d d d; d d ! dN Q Q Q Q Q+ Q Q  Qh #$$  } } } } }; } } !  %$}@ !!D D D D DK D D "!DN I I I I I+ I I  IX S S S S S+ S S  Sl ,--0D 0D 0D 0D 0D 0D 0D .-0Df 0D 0D 0D 0D 0D 0D 0D 0Df h h h h h h h hV H H H H H{ H H HV   + + + + +8 + + ! + #$$6* 6* 6* 6* 6*{ 6* 6* %$6*r K@ K@ K@ K@ K@ K@ K@ K@\ #$$N: N: N: N: N:{ N: N: %$N:b !!N: N: N: N: N:K N: N: "!N:b "##N: N: N: N: N:k N: N: $#N:b7& 7& 7&t zDFFF	 	 	 LKKFFF	 	 	     *  ! ! !( VUU
 
 
     2     c} } B !!B B B B BK B B "!B B Bs   / 77