
    .`iW                       d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	Z	ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0m1Z1m2Z2  G d de
j3                  Z4 G d de
j3                  Z5 G d de
j3                  Z6 eddddd !           G d" d#e
j3                              Z7 G d$ d%e
j3                  Z8dS )&zCInference-only LoopCoder model compatible with HuggingFace weights.    )annotations)Iterable)replace)AnyN)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)$get_tensor_model_parallel_world_size)RMSNorm)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)LlamaMLP)IntermediateTensors)AttentionType   )AutoWeightsLoaderextract_layer_indexmake_layersmaybe_prefixc                  F     e Zd Zddddej        ddfd  fdZ	 d!d"dZ xZS )#LoopCoderAttentioni   N r   configr   hidden_sizeint	num_headsnum_kv_headsmax_positioncache_configCacheConfig | Nonequant_configQuantizationConfig | Noneprefixstr	attn_typedual_chunk_attention_configdict[str, Any] | None	layer_idxreturnNonec                6   t                                                       || _        || _        t	                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _	        || j        z  | _
        | j        | j
        z  | _        | j	        | j
        z  | _        | j
        dz  | _        |
| _        t          |dd          | _        t          |dd          | _        |j        }t'          || j
        | j        | j        d|| d	
          | _        t+          | j        | j
        z  |d|| d
          | _        t/          | j
        ||j        |
          | _        t5          j                    | _        |}t;          | j                  D ]}t=          |          }||z  |z   }|                    d| d|           }|dk    r|}n/|t?          || j                  }ntA          | j        d          }| j        !                    tE          | j        | j
        | j        f| j	        |||	| dd|
r|dk    r||
dni            d S )Nr   r   g      loop_num   loop_window_size@   Fz	.qkv_proj)biasr,   r.   z.o_proj)r)   rope_parametersr1   zlayers.)sliding_windowauto)r=   cache_dtypez.attn)r(   r*   r,   r0   r.   )r3   r1   )#super__init__r3   r%   r   total_num_headsr'   total_num_kv_headsmaxr(   head_dimq_sizekv_sizescalingr1   getattrr7   r9   num_hidden_layersr   qkv_projr   o_projr   r<   
rotary_embr   
ModuleListattnranger   r   r   appendr	   )selfr$   r%   r'   r(   r)   r*   r,   r.   r0   r1   r3   tp_sizetotal_layersbase_cache_configloop_idxbase_layer_idxunique_layer_idxunique_prefixloop_cache_config	__class__s                       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/iquest_loopcoder.pyrA   zLoopCoderAttention.__init__=   s\    	"&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF#t';;nt}4(4=8}d*+F(  
A66 '0BB G G /)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M%"2(C	
 
 
 MOO	(dm,, '	 '	H088N',6G"NN*.**,H6F,H,H M 1}}$0!!$0(/)'+'<) ) )%%
 )4'+'<$*) ) )%
 INML "&!2!2!-'+222  3	 8@1}} &67R  
     -'	 '	    	positionstorch.Tensorhidden_statesrV   	gate_projLoopGateProjection | Nonec                ^   |dk    r| j         d         }|                     |          \  }}|                    | j        | j        | j        gd          \  }}	}
|                     |||	          \  }}	 |||	|
          }|                     |          \  }}|S | j         d         }| j         |         }|                     |          \  }}|                    | j        | j        | j        gd          \  }}	}
|                     |||	          \  }}	|j        \  }}| j        }| j	        }|
                    |||                              dd          } ||d d           } |||	|
          }|
J d             ||          }||z  |d|z
  z  z   }|                     |          \  }}|S )Nr   )dimr   z+gate_proj must be provided for loop_idx > 0)rO   rK   splitrF   rG   rM   rL   shaper'   rE   view	transpose)rR   r^   r`   rV   ra   rO   qkv_qkvattn_outputoutputglobal_attn
local_attn
num_tokensr'   rE   
q_reshapedglobal_attn_outputlocal_attn_outputgates                         r\   forwardzLoopCoderAttention.forward   s    q==9Q<D]]=11FCiidlDL IriRRGAq!??9a33DAq$q!Q--KK00IFAM)A,K8,J]]=11FCiidlDL IriRRGAq!??9a33DAqGMJI}H
Ix@@JJ1aPPJ!,Qd!;!; *
1a 3 3((*W(((9Z((D'$.1Ba$h1OOFF++IFAMr]   )r$   r   r%   r&   r'   r&   r(   r&   r)   r&   r*   r+   r,   r-   r.   r/   r0   r/   r1   r2   r3   r&   r4   r5   N
r^   r_   r`   r_   rV   r&   ra   rb   r4   r_   )__name__
__module____qualname__r   DECODERrA   rx   __classcell__r[   s   @r\   r"   r"   <   s         &+/26&.=Ao o o o o o ol 04! ! ! ! ! ! ! ! !r]   r"   c                  6     e Zd Z	 	 	 	 dd fdZ	 dddZ xZS )LoopCoderDecoderLayerNr#   r   r$   r   r*   r+   r,   r-   r.   r/   r3   r&   r4   r5   c                0   t                                                       |j        | _        t          |dd           }|| _        t          |dd          rt
          j        }nt
          j        }t          || j        |j	        |j
        |j        ||| d||| j                  | _        t          | j        |j        |j        || d          | _        t#          |j        |j                  | _        t#          |j        |j                  | _        d S )	Nr1   	is_causalTz
.self_attn)r$   r%   r'   r)   r(   r*   r,   r.   r0   r1   r3   z.mlp)r%   intermediate_size
hidden_actr,   r.   eps)r@   rA   r%   rI   r3   r   r~   ENCODER_ONLYr"   num_attention_headsmax_position_embeddingsnum_key_value_heads	self_attnr   r   r   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)	rR   r$   r*   r,   r.   r3   r1   r0   r[   s	           r\   rA   zLoopCoderDecoderLayer.__init__   s6    	!-&-14'
 '
# #6;-- 	3%-II%2I+(073%%((((Cn
 
 
 ($6(%???
 
 
  'v'9v?RSSS(/F$7)
 )
 )
%%%r]   r^   r_   r`   rV   ra   rb   c                    |}|                      |          }|                     ||||          }||z   }|}|                     |          }|                     |          }||z   }|S )N)r^   r`   rV   ra   )r   r   r   r   )rR   r^   r`   rV   ra   residuals         r\   rx   zLoopCoderDecoderLayer.forward   s     !,,];;'	 ' 
 
 &0 55mDD//%0r]   )NNr#   r   )r$   r   r*   r+   r,   r-   r.   r/   r3   r&   r4   r5   ry   rz   )r{   r|   r}   rA   rx   r   r   s   @r\   r   r      so         ,026*
 *
 *
 *
 *
 *
 *
b 04        r]   r   c                  2     e Zd ZdZ	 	 dd fdZddZ xZS )LoopGateProjectiona}  Gate projection for mixed attention in Loop 2+.

    Computes: g = sigmoid(linear(Q)) for each head independently.
    This gate determines how much to use Loop1's KV (global) vs current
    loop's KV (local).

    Supports tensor parallelism: each GPU handles a subset of heads.
    The weight matrix has shape [num_heads, head_dim] and is split along
    the head dimension.
    Nr#   rB   r&   rE   r,   r-   r.   r/   c           	        t                                                       || _        || _        t	                      }| j        |z  dk    sJ | j        |z  | _        t          || j        dd|| d          | _        d S )Nr   TFz
.gate_proj)r;   gather_outputr,   r.   )r@   rA   rB   rE   r   r'   r   ra   )rR   rB   rE   r,   r.   rS   r[   s         r\   rA   zLoopGateProjection.__init__#  s     	. 688#g-2222-8- %(((
 
 
r]   queryr_   r4   c                (   |j         \  }}}|| j        k    sJ d| j         d|             |                    d|          }|                     |          \  }}|                    ||| j                  }t	          j        |dd          }|                    dd          }|                    d          }t	          j        |          }	|	                    dd          }	|		                    dd|          }	|	                    |||z            }	|	S )an  Compute gate values from query tensor.

        Args:
            query: [num_heads, num_tokens, head_dim] (vLLM flattened format)
                where num_heads is the number of heads on this TP rank
                and num_tokens = batch * seq_len

        Returns:
            gate: [num_tokens, num_heads * head_dim] (flattened format matching q shape)
        z	Expected z heads, got rd   r   r8   )dim1dim2r   )
rg   r'   reshapera   torchdiagonalri   	unsqueezesigmoidexpand)
rR   r   r'   rs   rE   
query_flatgate_logits_flatrk   gate_logitsrw   s
             r\   rx   zLoopGateProjection.forward:  s4    +0+'	:xDN***???I?? +** ]]2x00
"nnZ88!&..z4>
 
 naa
 
 
 "++Aq11!++B// }[)) ~~a##{{2r8,,||	H,
 
 r]   )Nr#   )rB   r&   rE   r&   r,   r-   r.   r/   )r   r_   r4   r_   )r{   r|   r}   __doc__rA   rx   r   r   s   @r\   r   r     sh        	 	 37
 
 
 
 
 
 
.+ + + + + + + +r]   r   rd   )	input_idsr^   intermediate_tensorsinputs_embeds)dynamic_arg_dimsc                  F     e Zd Zdedd fd	ZddZ	 	 dddZddZ xZS )IQuestLoopCoderModelr#   )r.   decoder_layer_typevllm_configr   r.   r/   r   type[nn.Module]c                  t                                                       |j        j        |j        |j        j        Ht          d          r8j        j	        k    s(J d
                    j        j	                              | _        | _        j        | _        t          j        j        | d          | _        t!          | j        dd          | _        t!          | j        dd          | _        j        j        z  t)          j	        fd	| d
          \  }}| _        t)          j	        fd| d          \  | _        | _        | _        t3          j        j                  | _        d S )Nmax_window_layerszSliding window for some but all layers is not supported. This model uses sliding window but `max_window_layers` = {} is less than `num_hidden_layers` = {}. Please open an issue to discuss this feature.z.embed_tokensr,   r.   r7   r8   r9   r:   c                4    t          j        |           S )N)rB   rE   r,   r.   )r   r   )r.   r$   rE   r,   s    r\   <lambda>z/IQuestLoopCoderModel.__init__.<locals>.<lambda>  s&    - & :!)	   r]   z.gate_projectionsr.   c           	     F    t          | t          |                     S )N)r$   r*   r,   r.   r3   )r   r   )r.   r*   r$   r,   s    r\   r   z/IQuestLoopCoderModel.__init__.<locals>.<lambda>  s.    0))-f55   r]   z.layersr   )r@   rA   model_config	hf_configr*   r,   r=   hasattrr   rJ   formatr$   
vocab_sizer   r%   embed_tokensrI   r7   window_sizer   r   gate_projectionsstart_layer	end_layerlayersr   r   norm)
rR   r   r.   r   rk   r*   r$   rE   r,   r[   s
        @@@@r\   rA   zIQuestLoopCoderModel.__init__q  s    	)3"/"/ &2w'8
 8
2 +v/GGGG+ ,26,,, ,	 HGG ( +2%+++	
 
 
  Z;;"4;0BBGG %)CC&1$      ///	'
 	'
 	'
#1d# 9D$      %%%
9
 
9
 
9
5$.$+ F.F4GHHH			r]   r   r_   r4   c                ,    |                      |          S ry   )r   rR   r   s     r\   embed_input_idsz$IQuestLoopCoderModel.embed_input_ids  s      +++r]   Nr^   r   IntermediateTensors | Noner   torch.Tensor | None"torch.Tensor | IntermediateTensorsc                D   ||}n|                      |          }t          | j                  D ][}t          | j        | j        | j                           D ]2\  }}| j        |z   }	|dk    r| j        |	         nd }
 |||||
          }3\|                     |          }|S )Nr   )	r   rP   r7   	enumerater   r   r   r   r   )rR   r   r^   r   r   r`   rV   r3   layeractual_layer_idxra   s              r\   rx   zIQuestLoopCoderModel.forward  s     $)MM 00;;Mdm,, 
	U 
	UH$-D,t~=>% % 	U 	U 	5 $(#3i#?  @H!||D)*:;;QU  !&i) T T	U 		-00r]   weights"Iterable[tuple[str, torch.Tensor]]set[str]c                @   g d}t          |                     d                    }t                      }|D ]d\  }}d|v r| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]\  }
}}d|v r||vr|	                    ||
          }|
                    d          r||vr@|
                    d	          rt          ||          }|h||         }t          |dt                    }	|	t          k    r |	||           n |	|||            n|                    d
          r|
                    d          r|	                    dd          }n.|
                    d          r|	                    dd          }n||v rA||         }t          |dt                    }	 |	||           |                    |           |
                    d          r||vrt          ||          }|%||         }t          |dt                    }	 |	||           |                    |           f|S )N))rK   q_projrl   )rK   k_projrm   )rK   v_projrn   )gate_up_projra   r   )r   up_projr   F)remove_duplicatezrotary_emb.inv_freqweight_loaderr   r   z.biasscalezgate_projections.z.weightz.gate_proj.weightz.gate_proj.bias)dictnamed_parameterssetr,   get_cache_scalerI   r   re   addr   endswithr   
startswith)rR   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_id	vllm_names                 r\   load_weightsz!IQuestLoopCoderModel.load_weights  s'   "
 "
 "
 400%0HHII"%%%#* ?	$ ?	$D-$,, ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K /4 /41
K%--d**||K<<==)) d+.E.E==)) !4T;GGD| #D) '@U V V $999!M%7777!M%AAA??#677 }}Y// !$(LL<O$P$P		w// !$(LL:K$L$L		  K// +I 6(/!?4I) ) &e];;;%)))444 ==)) d+.E.E0{CC<#D) '@U V Ve]333d####r]   )r   r   r.   r/   r   r   r   r_   r4   r_   NN
r   r_   r^   r_   r   r   r   r   r4   r   r   r   r4   r   )	r{   r|   r}   r   rA   r   rx   r   r   r   s   @r\   r   r   h  s         .CAI AI AI AI AI AI AI AIF, , , , <@-1    4K K K K K K K Kr]   r   c                  L     e Zd Zddd fdZddZ	 	 dddZddZddZ xZS ) IQuestLoopCoderForCausalLMr#   r   r   r   r.   r/   c          	        t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        |j	        r| j        j
        | _        n0t          |j        |j        |t          |d                    | _        t          |j                  | _        d S )Nmodel)r   r.   lm_headr   )r@   rA   r   r   r,   r$   r   r    r   tie_word_embeddingsr   r   r   r   r%   r   logits_processor)rR   r   r.   r$   r,   r[   s        r\   rA   z#IQuestLoopCoderForCausalLM.__init__   s    )3"/()#L,I,I
 
 

 % 	:2DLL)!")#FI66	  DL !00A B Br]   r   r_   r4   c                6    | j                             |          S ry   )r   r   r   s     r\   r   z*IQuestLoopCoderForCausalLM.embed_input_ids8  s    z)))444r]   Nr^   r   r   r   r   r   c                6    |                      ||||          }|S ry   )r   )rR   r   r^   r   r   r`   s         r\   rx   z"IQuestLoopCoderForCausalLM.forward;  s)     

y"6
 
 r]   r`   c                <    |                      | j        |          }|S ry   )r   r   )rR   r`   logitss      r\   compute_logitsz)IQuestLoopCoderForCausalLM.compute_logitsG  s      &&t|]CCr]   r   r   r   c                l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   r$   r   r   )rR   r   loaders      r\   r   z'IQuestLoopCoderForCausalLM.load_weightsN  sC    "+/;+JTJ<<PT
 
 
 ""7+++r]   )r   r   r.   r/   r   r   r   )r`   r_   r4   r   r   )	r{   r|   r}   rA   r   rx   r   r   r   r   s   @r\   r   r     s        AC C C C C C C C C05 5 5 5 <@-1
 
 
 
 
   , , , , , , , ,r]   r   )9r   
__future__r   collections.abcr   dataclassesr   typingr   r   r   transformersr   vllm.attention.layerr	   vllm.compilation.decoratorsr
   vllm.configr   r   vllm.distributedr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r    vllm.model_executor.models.llamar   vllm.sequencer   vllm.v1.attention.backendr   utilsr   r   r   r    Moduler"   r   r   r   r    r]   r\   <module>r     sS   J I " " " " " " $ $ $ $ $ $                    ) ) ) ) ) ) * * * * * * = = = = = = / / / / / / / / A A A A A A 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               6 5 5 5 5 5 - - - - - - 3 3 3 3 3 3           S S S S S S S SlB B B B BBI B B BJN N N N N N N Nb  !	   l l l l l29 l l l^4, 4, 4, 4, 4, 4, 4, 4, 4, 4,r]   