
    `i                    X    d dl mZ d dlmZ ddlmZmZmZmZ  G d d          Z	ddZ
dS )    )annotations)Sequence   )
MODEL_ARCHMODEL_TENSORMODEL_TENSORSTENSOR_NAMESc                     e Zd ZU ej        dej        dej        dej        dej        dej	        dej
        dej        dej        dej        d	i
Zd
ed<   i ej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej         di ej!        dej"        dej#        dej$        d ej%        d!ej&        d"ej'        d#ej(        d$ej)        d%ej*        d&ej+        d'ej
        d(ej,        d)ej-        d*ej.        d+ej/        d,ej0        d-i ej1        d.ej2        d/ej3        d0ej4        d1ej5        d2ej6        d3ej7        d4ej8        d5ej9        d6ej:        d7ej;        d8ej<        d9ej=        d:ej>        d;ej?        d<ej@        d=ejA        d>i ejB        d?ejC        d@ejD        dAejE        dBejF        dCejG        dDejH        dEejI        dFejJ        dGejK        dHejL        dIejM        dJejN        dKejO        dLejP        dMejQ        dNejR        dOi ejS        dPejT        dQejU        dRejV        dSejW        dTejX        dUejY        dVejZ        dWej[        dXej\        dYej]        dZej^        d[ej_        d\ej`        d]eja        d^ejb        d_ejc        d`i ejd        daeje        dbejf        dcejg        ddejh        deeji        dfejj        dgejk        dhejl        diejm        djejn        dkejo        dlejp        dmejq        dnejr        doejs        dpejt        dqi eju        drejv        dsejw        dtejx        duejy        dvejz        dwej{        dxej|        dyej}        dzej~        d{ej        d|ej        d}ej        d~ej        dej        dej        dej        di ej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        di ej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        di ej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        diZd
ed<   ej        ej        dej        diiZded<   ded<   ddʄZdddфZdddӄZdddՄZddքZdd؄ZddلZdS )TensorNameMap)zgpt_neox.embed_inztransformer.wteztransformer.word_embeddingsword_embeddingszmodel.embed_tokenstok_embeddingszembeddings.word_embeddingsz(language_model.embedding.word_embeddingswteztransformer.embd.wtezmodel.tok_embeddingszmodel.embeddingzbackbone.embeddingzbackbone.embeddingsztransformer.in_out_embedzembedding.word_embeddingsztransformer.token_embeddingssharedzrwkv.embeddingszmodel.embeddingszmodel.word_embeddingsz!language_model.model.embed_tokensencoder)z embeddings.token_type_embeddings)	word_embeddings_layernormzembeddings.LayerNormemb_lntransformer.normrwkv.blocks.0.pre_lnr   zmodel.pre_lnzmodel.layers.0.pre_normzbackbone.norm)ztransformer.wpezembeddings.position_embeddingswpe)		embed_outlm_headoutputword_embeddings_for_headzlm_head.linearoutput_layerheadzhead.outr   )zgpt_neox.final_layer_normztransformer.ln_f
model.normnormztransformer.norm_fln_fz&language_model.encoder.final_layernormzmodel.final_layernormz
lm_head.lnzmodel.norm_fzbackbone.norm_fztransformer.rms_normzencoder.final_layernormr   r   zrwkv.ln_outzmodel.ln_outzbackbone.final_layer_normr   )z
rope.freqszrotary_pos_emb.inv_freq )zbackbone.embedz#dict[MODEL_TENSOR, tuple[str, ...]]mappings_cfg)z%gpt_neox.layers.{bid}.input_layernormztransformer.h.{bid}.ln_1ztransformer.blocks.{bid}.norm_1z#transformer.h.{bid}.input_layernormzh.{bid}.input_layernormztransformer.h.{bid}.ln_mlp"model.layers.{bid}.input_layernormzlayers.{bid}.attention_normz3language_model.encoder.layers.{bid}.input_layernormmodel.layers.{bid}.ln1zh.{bid}.ln_1ztransformer.h.{bid}.lnzmodel.layers.layers.{bid}.normz!model.layers.{bid}.attention_normzmodel.layers.{bid}.normzbackbone.layers.{bid}.normz(transformer.decoder_layer.{bid}.rms_normz.transformer.blocks.{bid}.norm_attn_norm.norm_1z$encoder.layers.{bid}.input_layernormz"transformer.layers.{bid}.attn_normzrwkv.blocks.{bid}.ln1r"   r!   z(transformer_encoder.{bid}.attention_norm)ztransformer.h.{bid}.ln_attnz encoder.layer.{bid}.layer_norm_1zrwkv.blocks.{bid}.ln2model.layers.{bid}.ln2)z/gpt_neox.layers.{bid}.attention.query_key_valueztransformer.h.{bid}.attn.c_attnz"transformer.blocks.{bid}.attn.Wqkvz1transformer.blocks.{bid}.norm_attn_norm.attn.Wqkvz2transformer.h.{bid}.self_attention.query_key_valuez&h.{bid}.self_attention.query_key_valuezBlanguage_model.encoder.layers.{bid}.self_attention.query_key_valuez,model.layers.{bid}.self_attn.query_key_valuezh.{bid}.attn.c_attnztransformer.h.{bid}.mixer.Wqkvzencoder.layers.{bid}.attn.Wqkvzencoder.layers.{bid}.mixer.Wqkvz%model.layers.{bid}.self_attn.qkv_projz3encoder.layers.{bid}.self_attention.query_key_valuez&transformer.layers.{bid}.attn.qkv_projztransformer_encoder.{bid}.qkv)#model.layers.{bid}.self_attn.q_projz+model.layers.{bid}.self_attn.q_proj_no_permzlayers.{bid}.attention.wqz(encoder.layer.{bid}.attention.self.queryz'transformer.layer.{bid}.attention.q_linztransformer.h.{bid}.attn.q_projz*model.layers.layers.{bid}.self_attn.q_projzmodel.layers.{bid}.attention.wqz:transformer.decoder_layer.{bid}.multi_head_attention.queryz)transformer.h.{bid}.attn.attention.q_projr$   )#model.layers.{bid}.self_attn.k_projz+model.layers.{bid}.self_attn.k_proj_no_permzlayers.{bid}.attention.wkz&encoder.layer.{bid}.attention.self.keyz'transformer.layer.{bid}.attention.k_linztransformer.h.{bid}.attn.k_projztransformer.h.{bid}.attn.kz*model.layers.layers.{bid}.self_attn.k_projzmodel.layers.{bid}.attention.wkz8transformer.decoder_layer.{bid}.multi_head_attention.keyz)transformer.h.{bid}.attn.attention.k_projr%   )#model.layers.{bid}.self_attn.v_projzlayers.{bid}.attention.wvz(encoder.layer.{bid}.attention.self.valuez'transformer.layer.{bid}.attention.v_linztransformer.h.{bid}.attn.v_projztransformer.h.{bid}.attn.vz*model.layers.layers.{bid}.self_attn.v_projzmodel.layers.{bid}.attention.wvz:transformer.decoder_layer.{bid}.multi_head_attention.valuez)transformer.h.{bid}.attn.attention.v_projr&   )z%gpt_neox.layers.{bid}.attention.denseztransformer.h.{bid}.attn.c_projz&transformer.blocks.{bid}.attn.out_projz(transformer.h.{bid}.self_attention.densezh.{bid}.self_attention.dense#model.layers.{bid}.self_attn.o_projz(model.layers.{bid}.self_attn.linear_attnzlayers.{bid}.attention.woz*encoder.layer.{bid}.attention.output.densez)transformer.layer.{bid}.attention.out_linz!transformer.h.{bid}.attn.out_projz8language_model.encoder.layers.{bid}.self_attention.densez"model.layers.{bid}.self_attn.densezh.{bid}.attn.c_projz"transformer.h.{bid}.mixer.out_projz*model.layers.layers.{bid}.self_attn.o_projzmodel.layers.{bid}.attention.woz"encoder.layers.{bid}.attn.out_projz#encoder.layers.{bid}.mixer.out_projz;transformer.decoder_layer.{bid}.multi_head_attention.linearz5transformer.blocks.{bid}.norm_attn_norm.attn.out_projz)encoder.layers.{bid}.self_attention.densez&transformer.layers.{bid}.attn.out_projz+transformer.h.{bid}.attn.attention.out_projr'   ztransformer_encoder.{bid}.wo)z.encoder.layer.{bid}.attention.output.LayerNormz%transformer.layer.{bid}.sa_layer_normzencoder.layers.{bid}.norm1z*transformer.decoder_layer.{bid}.rms_norm_1z.transformer.blocks.{bid}.norm_attn_norm.norm_2)+model.layers.{bid}.post_attention_layernormz+model.layers.{bid}.post_self_attn_layernorm)z0model.layers.{bid}.self_attn.rotary_emb.inv_freqz1layers.{bid}.attention.inner_attention.rope.freqsz7model.layers.layers.{bid}.self_attn.rotary_emb.inv_freqz,transformer.h.{bid}.attn.rotary_emb.inv_freq)z.gpt_neox.layers.{bid}.post_attention_layernormztransformer.h.{bid}.ln_2z h.{bid}.post_attention_layernormztransformer.blocks.{bid}.norm_2r(   zlayers.{bid}.ffn_normz<language_model.encoder.layers.{bid}.post_attention_layernormr#   zh.{bid}.ln_2zmodel.layers.{bid}.ffn_normz*transformer.decoder_layer.{bid}.rms_norm_2z-encoder.layers.{bid}.post_attention_layernormz!transformer.layers.{bid}.ffn_normr(   z"transformer_encoder.{bid}.ffn_norm)z,model.layers.{bid}.pre_feedforward_layernorm)z-model.layers.{bid}.post_feedforward_layernormz%model.layers.{bid}.post_mlp_layernorm)zlayers.{bid}.feed_forward.gatez(model.layers.{bid}.block_sparse_moe.gatezmodel.layers.{bid}.mlp.gatez&transformer.decoder_layer.{bid}.routerz)transformer.blocks.{bid}.ffn.router.layerz0model.layers.{bid}.block_sparse_moe.router.layerz&model.layers.{bid}.feed_forward.routerz%encoder.layers.{bid}.mlp.router.layer)z)model.layers.{bid}.mlp.shared_expert_gate)z.model.layers.{bid}.mlp.gate.e_score_correction)z'gpt_neox.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fcz$transformer.blocks.{bid}.ffn.up_projz%transformer.h.{bid}.mlp.dense_h_to_4hzh.{bid}.mlp.dense_h_to_4hzmodel.layers.{bid}.mlp.up_projzlayers.{bid}.feed_forward.w3z&encoder.layer.{bid}.intermediate.densez transformer.layer.{bid}.ffn.lin1ztransformer.h.{bid}.mlp.fc_inz transformer.h.{bid}.mlp.linear_3z5language_model.encoder.layers.{bid}.mlp.dense_h_to_4hz$model.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.w1zh.{bid}.mlp.c_fcztransformer.h.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.fc1z#model.layers.{bid}.mlp.gate_up_projz%model.layers.layers.{bid}.mlp.up_projz"model.layers.{bid}.feed_forward.w3zencoder.layers.{bid}.mlp.fc11zencoder.layers.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.c_fcz&encoder.layer.{bid}.mlp.gated_layers_vz$encoder.layer.{bid}.mlp.gated_layersz&encoder.layer.{bid}.mlp.up_gated_layerz"model.layers.{bid}.residual_mlp.w3z&encoder.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fc_1z'model.layers.{bid}.feed_forward.up_projz!transformer_encoder.{bid}.ffn.w12)z$layers.{bid}.feed_forward.experts.w3z,transformer.decoder_layer.{bid}.moe.linear_vz+transformer.blocks.{bid}.ffn.experts.mlp.v1z&model.layers.{bid}.mlp.experts.up_projz.model.layers.{bid}.block_sparse_moe.experts.w3z/model.layers.{bid}.feed_forward.experts.up_projz'encoder.layers.{bid}.mlp.experts.mlp.w1)z,model.layers.{bid}.mlp.shared_expert.up_projz-model.layers.{bid}.mlp.shared_experts.up_projz5model.layers.{bid}.feed_forward.shared_expert.up_proj)z transformer.blocks.{bid}.ffn.act)z model.layers.{bid}.mlp.gate_projzlayers.{bid}.feed_forward.w1ztransformer.h.{bid}.mlp.w2ztransformer.h.{bid}.mlp.c_fc2z'model.layers.layers.{bid}.mlp.gate_projz"model.layers.{bid}.feed_forward.w1zencoder.layers.{bid}.mlp.fc12z&encoder.layer.{bid}.mlp.gated_layers_wz transformer.h.{bid}.mlp.linear_1z"model.layers.{bid}.residual_mlp.w1ztransformer.h.{bid}.mlp.c_fc_0z)model.layers.{bid}.feed_forward.gate_proj)z$layers.{bid}.feed_forward.experts.w1z*transformer.decoder_layer.{bid}.moe.linearz+transformer.blocks.{bid}.ffn.experts.mlp.w1z(model.layers.{bid}.mlp.experts.gate_projz.model.layers.{bid}.block_sparse_moe.experts.w1z1model.layers.{bid}.feed_forward.experts.gate_proj)z.model.layers.{bid}.mlp.shared_expert.gate_projz/model.layers.{bid}.mlp.shared_experts.gate_projz7model.layers.{bid}.feed_forward.shared_expert.gate_proj)z'gpt_neox.layers.{bid}.mlp.dense_4h_to_hztransformer.h.{bid}.mlp.c_projz&transformer.blocks.{bid}.ffn.down_projz%transformer.h.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.dense_4h_to_hz model.layers.{bid}.mlp.down_projzlayers.{bid}.feed_forward.w2z encoder.layer.{bid}.output.densez transformer.layer.{bid}.ffn.lin2ztransformer.h.{bid}.mlp.fc_outz5language_model.encoder.layers.{bid}.mlp.dense_4h_to_hz$model.layers.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.c_projztransformer.h.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.fc2z'model.layers.layers.{bid}.mlp.down_projz"model.layers.{bid}.feed_forward.w2zencoder.layers.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.c_projzencoder.layer.{bid}.mlp.woz#transformer.layers.{bid}.ffn.proj_2z"model.layers.{bid}.residual_mlp.w2z"encoder.layer.{bid}.mlp.down_layerz&encoder.layers.{bid}.mlp.dense_4h_to_hzmodel.layers.h.{bid}.mlp.c_projz)model.layers.{bid}.feed_forward.down_projz transformer_encoder.{bid}.ffn.w3)z$layers.{bid}.feed_forward.experts.w2z,transformer.decoder_layer.{bid}.moe.linear_1z+transformer.blocks.{bid}.ffn.experts.mlp.w2z(model.layers.{bid}.mlp.experts.down_projz1model.layers.{bid}.block_sparse_moe.output_linearz.model.layers.{bid}.block_sparse_moe.experts.w2z1model.layers.{bid}.feed_forward.experts.down_projz'encoder.layers.{bid}.mlp.experts.mlp.w2)z.model.layers.{bid}.mlp.shared_expert.down_projz/model.layers.{bid}.mlp.shared_experts.down_projz7model.layers.{bid}.feed_forward.shared_expert.down_projz+model.layers.{bid}.shared_mlp.output_linear)z>language_model.encoder.layers.{bid}.self_attention.q_layernormz(model.layers.{bid}.self_attn.q_layernormz#model.layers.{bid}.self_attn.q_normz"transformer.blocks.{bid}.attn.q_lnz/encoder.layer.{bid}.attention.self.layer_norm_qz$transformer.layers.{bid}.attn.q_norm)z>language_model.encoder.layers.{bid}.self_attention.k_layernormz(model.layers.{bid}.self_attn.k_layernormz#model.layers.{bid}.self_attn.k_normz"transformer.blocks.{bid}.attn.k_lnz/encoder.layer.{bid}.attention.self.layer_norm_kz$transformer.layers.{bid}.attn.k_norm)zFlanguage_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq)z$encoder.layer.{bid}.output.LayerNormz)transformer.layer.{bid}.output_layer_normzencoder.layers.{bid}.norm2z*transformer.decoder_layer.{bid}.rms_norm_3z!encoder.layer.{bid}.mlp.layernormz encoder.layer.{bid}.layer_norm_2)zmodel.layers.{bid}.in_projz#backbone.layers.{bid}.mixer.in_proj)zmodel.layers.{bid}.conv1dz"backbone.layers.{bid}.mixer.conv1d)zmodel.layers.{bid}.x_projz"backbone.layers.{bid}.mixer.x_proj)zmodel.layers.{bid}.dt_projz#backbone.layers.{bid}.mixer.dt_proj)zmodel.layers.{bid}.A_logz!backbone.layers.{bid}.mixer.A_log)zmodel.layers.{bid}.Dzbackbone.layers.{bid}.mixer.D)zmodel.layers.{bid}.out_projz$backbone.layers.{bid}.mixer.out_proj)zmodel.layers.{bid}.attention.w0)z'rwkv.blocks.{bid}.attention.time_maa_w1z(model.layers.{bid}.self_attn.time_maa_w1zmodel.layers.{bid}.attention.w1)z'rwkv.blocks.{bid}.attention.time_maa_w2z(model.layers.{bid}.self_attn.time_maa_w2zmodel.layers.{bid}.attention.w2)zmodel.layers.{bid}.attention.a0)zmodel.layers.{bid}.attention.a1)zmodel.layers.{bid}.attention.a2)zmodel.layers.{bid}.attention.v0)zmodel.layers.{bid}.attention.v1)zmodel.layers.{bid}.attention.v2)zmodel.layers.{bid}.attention.g1)zmodel.layers.{bid}.attention.g2)z model.layers.{bid}.attention.k_k)z model.layers.{bid}.attention.k_a)z model.layers.{bid}.attention.r_k)z&rwkv.blocks.{bid}.attention.time_maa_xz'model.layers.{bid}.self_attn.time_maa_x)z&rwkv.blocks.{bid}.attention.time_maa_kz'model.layers.{bid}.self_attn.time_maa_k)z&rwkv.blocks.{bid}.attention.time_maa_vz'model.layers.{bid}.self_attn.time_maa_v)z&rwkv.blocks.{bid}.attention.time_maa_rz'model.layers.{bid}.self_attn.time_maa_r)z&rwkv.blocks.{bid}.attention.time_maa_gz'model.layers.{bid}.self_attn.time_maa_g)z&rwkv.blocks.{bid}.attention.time_maa_wz'model.layers.{bid}.self_attn.time_maa_w)z&rwkv.blocks.{bid}.attention.time_faaaa)z&rwkv.blocks.{bid}.attention.time_decayz'model.layers.{bid}.self_attn.time_decay)z)rwkv.blocks.{bid}.attention.time_decay_w1z*model.layers.{bid}.self_attn.time_decay_w1)z)rwkv.blocks.{bid}.attention.time_decay_w2z*model.layers.{bid}.self_attn.time_decay_w2)zrwkv.blocks.{bid}.attention.keyr%   z model.layers.{bid}.attention.keyz#model.layers.{bid}.attention.k_proj)z!rwkv.blocks.{bid}.attention.valuer&   z"model.layers.{bid}.attention.valuez#model.layers.{bid}.attention.v_proj)z&rwkv.blocks.{bid}.attention.receptancer$   z'model.layers.{bid}.attention.receptancez#model.layers.{bid}.attention.r_proj)z rwkv.blocks.{bid}.attention.gatez!model.layers.{bid}.self_attn.gate)z rwkv.blocks.{bid}.attention.ln_xz!model.layers.{bid}.attention.ln_x)z"rwkv.blocks.{bid}.attention.outputr'   z#model.layers.{bid}.attention.outputz#model.layers.{bid}.attention.o_proj)z)rwkv.blocks.{bid}.feed_forward.time_maa_kz#model.layers.{bid}.feed_forward.x_k)z)rwkv.blocks.{bid}.feed_forward.time_maa_r)z"rwkv.blocks.{bid}.feed_forward.keyz#model.layers.{bid}.feed_forward.key)z)rwkv.blocks.{bid}.feed_forward.receptance)z$rwkv.blocks.{bid}.feed_forward.valuez%model.layers.{bid}.feed_forward.value)z%model.layers.{bid}.self_attn.q_a_proj)z%model.layers.{bid}.self_attn.q_b_proj)z/model.layers.{bid}.self_attn.kv_a_proj_with_mqa)z&model.layers.{bid}.self_attn.kv_b_proj)z%model.layers.{bid}.self_attn.k_b_proj)z%model.layers.{bid}.self_attn.v_b_proj)z*model.layers.{bid}.self_attn.q_a_layernorm)z+model.layers.{bid}.self_attn.kv_a_layernorm)z*model.layers.{bid}.self_attn.inner_attn_ln)z$model.layers.{bid}.mlp.ffn_layernorm)z&decoder.block.{bid}.layer.0.layer_norm)z+decoder.block.{bid}.layer.0.SelfAttention.q)z+decoder.block.{bid}.layer.0.SelfAttention.k)z+decoder.block.{bid}.layer.0.SelfAttention.v)z+decoder.block.{bid}.layer.0.SelfAttention.o)zAdecoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&decoder.block.{bid}.layer.1.layer_norm)z-decoder.block.{bid}.layer.1.EncDecAttention.q)z-decoder.block.{bid}.layer.1.EncDecAttention.k)z-decoder.block.{bid}.layer.1.EncDecAttention.v)z-decoder.block.{bid}.layer.1.EncDecAttention.o)zCdecoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias)z&decoder.block.{bid}.layer.2.layer_norm)z/decoder.block.{bid}.layer.2.DenseReluDense.wi_0)z-decoder.block.{bid}.layer.2.DenseReluDense.wiz/decoder.block.{bid}.layer.2.DenseReluDense.wi_1)z-decoder.block.{bid}.layer.2.DenseReluDense.wo)zdecoder.final_layer_norm)z&encoder.block.{bid}.layer.0.layer_norm)z+encoder.block.{bid}.layer.0.SelfAttention.q)z+encoder.block.{bid}.layer.0.SelfAttention.k)z+encoder.block.{bid}.layer.0.SelfAttention.v)z+encoder.block.{bid}.layer.0.SelfAttention.o)zAencoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&encoder.block.{bid}.layer.1.layer_norm)z/encoder.block.{bid}.layer.1.DenseReluDense.wi_0)z-encoder.block.{bid}.layer.1.DenseReluDense.wiz/encoder.block.{bid}.layer.1.DenseReluDense.wi_1)z-encoder.block.{bid}.layer.1.DenseReluDense.wo)zencoder.final_layer_norm
layer_norm)
classifierzclassifier.densepre_classifierdense)zclassifier.out_proj)zbackbone.convnext.{bid}.dwconv)zbackbone.convnext.{bid}.norm)zbackbone.convnext.{bid}.pwconv1)zbackbone.convnext.{bid}.pwconv2)zbackbone.convnext.{bid}.gamma)zbackbone.posnet.{bid}.conv1)zbackbone.posnet.{bid}.conv2)zbackbone.posnet.{bid}.norm)zbackbone.posnet.{bid}.norm1)zbackbone.posnet.{bid}.norm2)zbackbone.posnet.{bid}.q)zbackbone.posnet.{bid}.k)zbackbone.posnet.{bid}.v)zbackbone.posnet.{bid}.proj_out)z"multi_modal_projector.linear_{bid}zvisual.merger.mlp.{bid})z(model.connector.modality_projection.proj)z model.mm_projector.mlp.mlp.{bid}z'vision_model.vision_adapter.mlp.fc{bid}z
mlp1.{bid})z model.mm_projector.peg.peg.{bid})z4vision_tower.vision_model.embeddings.class_embeddingzvision_model.class_embedding)z4vision_tower.vision_model.embeddings.patch_embeddingzvpm.embeddings.patch_embeddingz-model.vision_model.embeddings.patch_embeddingzvision_tower.patch_convz#vision_model.patch_embedding.linearzvisual.patch_embed.proj)z7vision_tower.vision_model.embeddings.position_embeddingz!vpm.embeddings.position_embeddingz0model.vision_model.embeddings.position_embeddingz%vision_model.positional_embedding_vlm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_projz)vpm.encoder.layers.{bid}.self_attn.q_projz8model.vision_model.encoder.layers.{bid}.self_attn.q_projz0vision_model.model.layers.{bid}.self_attn.q_projz6vision_tower.transformer.layers.{bid}.attention.q_projzvisual.blocks.{bid}.attn.q)z:vision_tower.vision_model.encoder.layers.{bid}.attn.q_norm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_projz)vpm.encoder.layers.{bid}.self_attn.k_projz8model.vision_model.encoder.layers.{bid}.self_attn.k_projz0vision_model.model.layers.{bid}.self_attn.k_projz6vision_tower.transformer.layers.{bid}.attention.k_projzvisual.blocks.{bid}.attn.k)z:vision_tower.vision_model.encoder.layers.{bid}.attn.k_norm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_projz)vpm.encoder.layers.{bid}.self_attn.v_projz8model.vision_model.encoder.layers.{bid}.self_attn.v_projz0vision_model.model.layers.{bid}.self_attn.v_projz6vision_tower.transformer.layers.{bid}.attention.v_projzvisual.blocks.{bid}.attn.v)z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.vision_model.encoder.layers.{bid}.norm1z$vpm.encoder.layers.{bid}.layer_norm1z3model.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.transformer.layers.{bid}.attention_normz/vision_model.model.layers.{bid}.input_layernormzvisual.blocks.{bid}.norm1)zAvision_tower.vision_model.encoder.layers.{bid}.self_attn.out_projz8vision_tower.vision_model.encoder.layers.{bid}.attn.projz+vpm.encoder.layers.{bid}.self_attn.out_projz:model.vision_model.encoder.layers.{bid}.self_attn.out_projz0vision_model.model.layers.{bid}.self_attn.o_projz6vision_tower.transformer.layers.{bid}.attention.o_projzvisual.blocks.{bid}.attn.proj)z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm2z4vision_tower.vision_model.encoder.layers.{bid}.norm2z$vpm.encoder.layers.{bid}.layer_norm2z3model.vision_model.encoder.layers.{bid}.layer_norm2z8vision_model.model.layers.{bid}.post_attention_layernormz.vision_tower.transformer.layers.{bid}.ffn_normzvisual.blocks.{bid}.norm2)z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1z vpm.encoder.layers.{bid}.mlp.fc1z/model.vision_model.encoder.layers.{bid}.mlp.fc1z:vision_tower.transformer.layers.{bid}.feed_forward.up_projz'vision_model.model.layers.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.up_proj)z<vision_tower.transformer.layers.{bid}.feed_forward.gate_projz!visual.blocks.{bid}.mlp.gate_proj)z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2z vpm.encoder.layers.{bid}.mlp.fc2z/model.vision_model.encoder.layers.{bid}.mlp.fc2z<vision_tower.transformer.layers.{bid}.feed_forward.down_projz'vision_model.model.layers.{bid}.mlp.fc2zvisual.blocks.{bid}.mlp.fc2z!visual.blocks.{bid}.mlp.down_proj)z2vision_tower.vision_model.encoder.layers.{bid}.ls1)z2vision_tower.vision_model.encoder.layers.{bid}.ls2)z&vision_tower.vision_model.pre_layrnormzvision_tower.ln_prezvision_model.layernorm_pre)z(vision_tower.vision_model.post_layernormz!model.vision_model.post_layernormzvision_model.layernorm_postzvisual.merger.ln_q)z)multi_modal_projector.mm_input_projection)zmulti_modal_projector.norm)z&multi_modal_projector.mm_soft_emb_norm)zresampler.pos_embed_k)zresampler.attn.in_proj_q)zresampler.attn.in_proj_k)zresampler.attn.in_proj_v)zresampler.attn.out_proj)zresampler.kv_proj)zresampler.ln_post)zresampler.ln_kv)zresampler.ln_q)zresampler.proj)zresampler.query)zv.token_embd.img_break)z0multi_modal_projector.patch_merger.merging_layer)zaudio_tower.embed_positions)zaudio_tower.conv{bid})zaudio_tower.layer_normzaudio_tower.ln_post)z)audio_tower.layers.{bid}.self_attn.q_proj)z)audio_tower.layers.{bid}.self_attn.k_proj)z)audio_tower.layers.{bid}.self_attn.v_proj)z-audio_tower.layers.{bid}.self_attn_layer_norm)z+audio_tower.layers.{bid}.self_attn.out_proj)z)audio_tower.layers.{bid}.final_layer_norm)zaudio_tower.layers.{bid}.fc1)zaudio_tower.layers.{bid}.fc2)z(audio.multi_modal_projector.linear_{bid})z"audio.multi_modal_projector.linearzaudio_tower.proj)z"audio.multi_modal_projector.ln_pre)z"audio.multi_modal_projector.ln_midblock_mappings_cfg)z%model.layers.{bid}.residual_layernorm)r(   z5dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]]arch_block_mappings_cfgz#dict[str, tuple[MODEL_TENSOR, str]]mappingarchr   n_blocksintc                L   i | _         | j                                        D ]?\  }}|t          |         vrt          |         }||f| j         |<   |D ]}||f| j         |<   @|| j        v r%| j                            | j        |                    t          |          D ]}| j                                        D ]i\  }}|t          |         vrt          |         	                    |          }||f| j         |<   |D ]$}|	                    |          }||f| j         |<   %jd S )N)bid)
r/   r    itemsr   r	   r.   r-   updaterangeformat)selfr0   r1   tensorkeystensor_namekeyr4   s           g/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/gguf/tensor_mapping.py__init__zTensorNameMap.__init__  sn    -3355 	: 	:LFD]4000&v.K)/(=DL% : :%+[$9S!!:4///#**4+G+MNNN?? 		> 		>C $ 7 = = ? ? > >t!444*62999DD-3[,A[) > >C**3*//C)/(=DL%%>>		> 		>    r=   strtry_suffixesSequence[str]returntuple[MODEL_TENSOR, str] | Nonec                   | j                             |          }||S |D ]^}|                    |          rG| j                             |d t          |                              }||d         |d         |z   fc S _d S )Nr   r   )r/   getendswithlen)r9   r=   rB   resultsuffixs        r>   get_type_and_namezTensorNameMap.get_type_and_name  s    !!#&&M" 	9 	9F||F## 9))#mF|m*<==%!!9fQi&&88888tr@   
str | Nonec                H    |                      ||          }|d S |d         S )NrB   r   rL   r9   r=   rB   rJ   s       r>   get_namezTensorNameMap.get_name  .    ''L'II>4ayr@   MODEL_TENSOR | Nonec                H    |                      ||          }|d S |d         S )NrO   r   rP   rQ   s       r>   get_typezTensorNameMap.get_type  rS   r@   c                d    	 | j         |         d         S # t          $ r t          |          w xY w)Nr   )r/   KeyErrorr9   r=   s     r>   __getitem__zTensorNameMap.__getitem__  s@    	 <$Q'' 	  	  	 3--	 s    /boolc                    || j         v S N)r/   rY   s     r>   __contains__zTensorNameMap.__contains__  s    dl""r@   c                *    t          | j                  S r]   )reprr/   )r9   s    r>   __repr__zTensorNameMap.__repr__  s    DL!!!r@   N)r0   r   r1   r2   )r   )r=   rA   rB   rC   rD   rE   )r=   rA   rB   rC   rD   rM   )r=   rA   rB   rC   rD   rT   )r=   rA   rD   rA   )r=   rA   rD   r[   )rD   rA   )__name__
__module____qualname__r   
TOKEN_EMBDTOKEN_TYPESTOKEN_EMBD_NORMPOS_EMBDOUTPUTOUTPUT_NORM
ROPE_FREQSROPE_FACTORS_LONGROPE_FACTORS_SHORTCONV1Dr    __annotations__	ATTN_NORMATTN_NORM_2ATTN_QKVATTN_QATTN_KATTN_VATTN_OUTATTN_OUT_NORMATTN_POST_NORMATTN_ROT_EMBDFFN_NORMFFN_PRE_NORMFFN_POST_NORMFFN_GATE_INPFFN_GATE_INP_SHEXPFFN_EXP_PROBS_BFFN_UP
FFN_UP_EXPFFN_UP_SHEXPFFN_ACTFFN_GATEFFN_GATE_EXPFFN_GATE_SHEXPFFN_DOWNFFN_DOWN_EXPFFN_DOWN_SHEXPATTN_Q_NORMATTN_K_NORMLAYER_OUT_NORMSSM_IN
SSM_CONV1DSSM_XSSM_DTSSM_ASSM_DSSM_OUTTIME_MIX_W0TIME_MIX_W1TIME_MIX_W2TIME_MIX_A0TIME_MIX_A1TIME_MIX_A2TIME_MIX_V0TIME_MIX_V1TIME_MIX_V2TIME_MIX_G1TIME_MIX_G2TIME_MIX_K_KTIME_MIX_K_ATIME_MIX_R_KTIME_MIX_LERP_XTIME_MIX_LERP_KTIME_MIX_LERP_VTIME_MIX_LERP_RTIME_MIX_LERP_GTIME_MIX_LERP_WTIME_MIX_FIRSTTIME_MIX_DECAYTIME_MIX_DECAY_W1TIME_MIX_DECAY_W2TIME_MIX_KEYTIME_MIX_VALUETIME_MIX_RECEPTANCETIME_MIX_GATETIME_MIX_LNTIME_MIX_OUTPUTCHANNEL_MIX_LERP_KCHANNEL_MIX_LERP_RCHANNEL_MIX_KEYCHANNEL_MIX_RECEPTANCECHANNEL_MIX_VALUEATTN_Q_AATTN_Q_BATTN_KV_A_MQA	ATTN_KV_BATTN_K_BATTN_V_BATTN_Q_A_NORMATTN_KV_A_NORMATTN_SUB_NORMFFN_SUB_NORMDEC_ATTN_NORM
DEC_ATTN_Q
DEC_ATTN_K
DEC_ATTN_VDEC_ATTN_OUTDEC_ATTN_REL_BDEC_CROSS_ATTN_NORMDEC_CROSS_ATTN_QDEC_CROSS_ATTN_KDEC_CROSS_ATTN_VDEC_CROSS_ATTN_OUTDEC_CROSS_ATTN_REL_BDEC_FFN_NORMDEC_FFN_GATE
DEC_FFN_UPDEC_FFN_DOWNDEC_OUTPUT_NORMENC_ATTN_NORM
ENC_ATTN_Q
ENC_ATTN_K
ENC_ATTN_VENC_ATTN_OUTENC_ATTN_REL_BENC_FFN_NORMENC_FFN_GATE
ENC_FFN_UPENC_FFN_DOWNENC_OUTPUT_NORMCLSCLS_OUTCONVNEXT_DWCONVNEXT_NORMCONVNEXT_PW1CONVNEXT_PW2CONVNEXT_GAMMAPOSNET_CONV1POSNET_CONV2POSNET_NORMPOSNET_NORM1POSNET_NORM2POSNET_ATTN_NORMPOSNET_ATTN_QPOSNET_ATTN_KPOSNET_ATTN_VPOSNET_ATTN_OUTV_MMPROJV_MMPROJ_FCV_MMPROJ_MLPV_MMPROJ_PEGV_ENC_EMBD_CLSV_ENC_EMBD_PATCHV_ENC_EMBD_POSV_ENC_ATTN_QV_ENC_ATTN_Q_NORMV_ENC_ATTN_KV_ENC_ATTN_K_NORMV_ENC_ATTN_VV_ENC_INPUT_NORMV_ENC_ATTN_OV_ENC_POST_ATTN_NORMV_ENC_FFN_UPV_ENC_FFN_GATEV_ENC_FFN_DOWNV_LAYER_SCALE_1V_LAYER_SCALE_2
V_PRE_NORMV_POST_NORMV_MM_INP_PROJV_MM_INP_NORMV_MM_SOFT_EMB_NORMV_RESMPL_POS_EMBD_KV_RESMPL_ATTN_QV_RESMPL_ATTN_KV_RESMPL_ATTN_VV_RESMPL_ATTN_OUTV_RESMPL_KVV_RESMPL_POST_NORMV_RESMPL_KV_NORMV_RESMPL_Q_NORMV_RESMPL_PROJV_RESMPL_QUERYV_TOK_EMBD_IMG_BREAKV_MM_PATCH_MERGERA_ENC_EMBD_POSA_ENC_CONV1D
A_PRE_NORMA_POST_NORMA_ENC_ATTN_QA_ENC_ATTN_KA_ENC_ATTN_VA_ENC_INPUT_NORMA_ENC_OUTPUTA_ENC_OUTPUT_NORMA_ENC_FFN_UPA_ENC_FFN_GATEA_ENC_FFN_DOWNA_MMPROJA_MMPROJ_FCA_MM_NORM_PREA_MM_NORM_MIDr-   r   ARCTICFFN_NORM_EXPr.   r?   rL   rR   rV   rZ   r^   ra   r   r@   r>   r   r      s         	 "
6 	  #

 	$ 
'
 	  
 	 

 	  #
. 	 "

 	&' 
Ee9L e e e eNE? !
E?< 	  #
=E?L 	  
ME?t 	 
uE?R 	 
SE?r 	 
sE?P 	  
QE?L 	" %
ME?\ 	# &
]E?h 	" %
iE?x 	  
yE?^ 	! $
_E?h 	" %
iE?r 	! 	$
sE?H 	' *
IE?P 	$ '
QE?Z 	  
[E? E?^ 	 "
_E?r 	! $
sE?@ 	 
AE?J 	  
KE?h 	! $
iE?z 	# &
{E?H	 	  
I	E?D
 	! 	$
E
E?Z
 	# &
[
E?h
 	  #
i
E?z
 	  #
{
E?L 	 "
ME?T 	# &
UE?f 	 
gE?p 	 "
qE?z 	 
{E?D 	 
EE? E? E?N 	 
OE?X 	 
YE?b 	 
cE?l 	  #
mE?t 	  #
uE?@ 	  #
AE?L 	  #
ME?T 	  #
UE?\ 	  #
]E?d 	  #
eE?l 	  #
mE?t 	  #
uE?| 	  #
}E?D 	  #
EE?L 	! $
ME?T 	! $
UE?\ 	! $
]E? E? E?d 	$ '
eE?n 	$ '
oE?x 	$ '
yE?B 	$ '
CE?L 	$ '
ME?V 	$ '
WE?` 	# &
aE?h 	# &
iE?r 	& )
sE?| 	& )
}E?F 	! $
GE?T 	# &
UE?b 	( +
cE?p 	" %
qE?z 	  #
{E?D 	$ '
EE?R 	' *
SE? E? E?\ 	' *
]E?d 	$ '
eE?n 	+ .
oE?v 	& )
wE?@ 	  
AE?H 	  
IE?P 	" %
QE?X 	 !
YE?` 	  
aE?h 	  
iE?p 	" %
qE?x 	# &
yE?@ 	" %
AE?H 	! $
IE?P 	" %
QE?X 	 "
YE?` 	 "
aE? E? E?h 	 "
iE?p 	! $
qE?x 	# &
yE?@ 	( +
AE?H 	% (
IE?P 	% (
QE?X 	% (
YE?` 	' *
aE?h 	) ,
iE?p 	! $
qE?x 	! $
yE?@ 	 "
AE?J 	! $
KE?R 	$ '
SE?Z 	" %
[E?b 	 "
cE?j 	 "
kE? E? E?r 	 "
sE?z 	! $
{E?B 	# &
CE?J 	! $
KE?R 	! $
SE?Z 	 "
[E?d 	! $
eE?p 	$ '
qE?z 	 
{E?H 	 
IE?R 	  #
SE?Z 	" %
[E?b 	! $
cE?j 	! $
kE?r 	# &
sE?z 	! $
{E?B 	! $
CE? E? E?J 	  #
KE?R 	! $
SE?Z 	! $
[E?b 	% (
cE?j 	" %
kE?r 	" %
sE?z 	" %
{E?B 	$ '
CE?P 	  
QE?Z 	  #
[E?b 	! $
cE?n 	! $
oE?v 	# &
wE?@ 	% (
AE?R 	# &
SE?` 	! $
aE?r 	& )
sE? E? E?z 	! $
{E?L 	& )
ME?T 	! $
UE?f 	% (
gE?z 	! $
{E?N 	) ,
OE?b 	! $
cE?v 	# &
wE?@ 	# &
AE?T 	$ '
UE?\ 	$ '
]E?d 	 "
eE?p 	  #
qE?~ 	" %
E?F 	" %
GE?N 	' *
OE?V 	( +
WE? E? E?^ 	$ '
_E?f 	$ '
gE?n 	$ '
oE?v 	& )
wE?~ 	  #
E?F 	' *
GE?N 	% (
OE?V 	$ '
WE?^ 	" %
_E?f 	# &
gE?n 	) ,
oE?v 	& )
wE?B  	# &
C E?J  	! $
K E?R  	S E?V  	  #
W E?`  	! $
a E? E?h  	! $
 	! $
 	% (
 	! $
 	& )
 	! $
 	#R# &
 	  
 	  #

 	" %
 	" %
E"E? E? E E E ER" 	! $ % (	
	V 	 	 	 	 1000> > > >,	 	 	 	 	               # # # #" " " " " "r@   r   r0   r   r1   r2   rD   c                "    t          | |          S r]   )r   )r0   r1   s     r>   get_tensor_name_mapr(    s    x(((r@   N)r0   r   r1   r2   rD   r   )
__future__r   typingr   	constantsr   r   r   r	   r   r(  r   r@   r>   <module>r,     s    " " " " " "       L L L L L L L L L L L Lt" t" t" t" t" t" t" t"n') ) ) ) ) )r@   