
    .`iXI                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z" ddl#m$Z$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0m1Z1  G d dej2                  Z3 G d dej2                  Z4 G d dej2                  Z5 eddddd            G d! d"ej2                              Z6 G d# d$ej2        e+          Z7dS )%z>Inference-only Ouro model compatible with HuggingFace weights.    )Iterable)AnyN)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)AttentionType   )SupportsLoRA)AutoWeightsLoaderextract_layer_index'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   J     e Zd Z	 	 ddededededz  deddf fd	Zd
 Z xZS )OuroMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        |dk    rt          d| d          t                      | _        d S )	N   Fz.gate_up_projbiasr'   r(   z
.down_projsiluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr$   r%   r&   r'   r(   	__class__s         s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/ouro.pyr0   zOuroMLP.__init__H   s     	6!#%+++
 
 
 +%(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r1   r4   r2   )r5   xgate_up_s       r7   forwardzOuroMLP.forwarde   sD    &&q))
KK  ~~a  1r8   )Nr#   )	__name__
__module____qualname__intstrr   r0   r>   __classcell__r6   s   @r7   r"   r"   G   s         37# ## # 	#
 )4/# # 
# # # # # #:      r8   r"   c                        e Zd Zddddej        dfdededededed	edz  d
edz  de	de	de
e	ef         dz  ddf fdZdej        dej        dedej        fdZ xZS )OuroAttentioni   Nr#   configr$   	num_headsnum_kv_headsmax_positioncache_configr'   r(   	attn_typedual_chunk_attention_configr)   c                 h   t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        || j        z  | _	        | j        | j	        z  | _
        | j        | j	        z  | _        | j	        dz  | _        |
| _        t          |dd          }|j        }t!          || j	        | j        | j        d|| d          | _        t%          | j        | j	        z  |d|| d	          | _        t)          | j	        ||j        |

          | _        t/          j                    | _        t5          |          D ]}t7          |          }||z  |z   }|                    d| d|           }| j                            t=          | j        | j	        | j        f| j        |||	| dd|
r||
dni            d S )Nr   r   g      total_ut_steps   Fz	.qkv_projr,   z.o_proj)rK   rope_parametersrN   zlayers.z.attn)rJ   rL   r'   rM   r(   )	layer_idxrN   )r/   r0   r$   r   total_num_headsrI   total_num_kv_headsmaxrJ   head_dimq_sizekv_sizescalingrN   getattrnum_hidden_layersr   qkv_projr   o_projr   rR   
rotary_embr   
ModuleListattnranger   replaceappendr   )r5   rH   r$   rI   rJ   rK   rL   r'   r(   rM   rN   tp_sizerP   total_layersut_stepbase_layer_idxunique_layer_idxunique_prefixr6   s                     r7   r0   zOuroAttention.__init__m   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF#t';;nt}4(4=8}d*+F( !)91== /)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M%"2(C	
 
 
 MOO	^,, 	 	G088N&5F"NN*.**,H6F,H,H M INML "&!2!-!-'+222  3	%57R  
     	 	r8   	positionshidden_states
current_utc                 $   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }} | j        |         |||          }	|                     |	          \  }
}|
S )N)dim)r]   splitrX   rY   r_   ra   r^   )r5   rk   rl   rm   qkvr=   qkvattn_outputoutputs              r7   r>   zOuroAttention.forward   s     }--Q))T[$,E2)NN1ay!Q//1+di
+Aq!44KK,,	r8   )r?   r@   rA   r   DECODERr   rB   r	   r   rC   dictr   r0   torchTensorr>   rD   rE   s   @r7   rG   rG   l   s/        &+/26&.=AZ Z Z Z 	Z
 Z Z "D(Z )4/Z Z Z &*#s(^d%:Z 
Z Z Z Z Z Zx< | 	
 
       r8   rG   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZ	 dd	ej	        d
ej	        de
dej	        dz  deej	        ej	        f         f
dZ xZS )OuroDecoderLayerNr#   rH   rL   r'   r(   r)   c                    t                                                       |j        | _        t          |dd           }t          |dd          rt          j        }nt          j        }t          || j        |j        |j	        |j
        ||| d||
  
        | _        t          | j        |j        |j        || d          | _        t!          |j        |j                  | _        t!          |j        |j                  | _        t!          |j        |j                  | _        t!          |j        |j                  | _        d S )	NrN   	is_causalTz
.self_attn)
rH   r$   rI   rK   rJ   rL   r'   r(   rM   rN   z.mlp)r$   r%   r&   r'   r(   eps)r/   r0   r$   r[   r   rx   ENCODER_ONLYrG   num_attention_headsmax_position_embeddingsnum_key_value_heads	self_attnr"   r%   r&   mlpr   rms_norm_epsinput_layernorminput_layernorm_2post_attention_layernormpost_attention_layernorm_2)r5   rH   rL   r'   r(   rN   rM   r6   s          r7   r0   zOuroDecoderLayer.__init__   si    	!-&-14'
 '
# 6;-- 	3%-II%2I&(073%%((((C
 
 
 ($6(%???
 
 
  'v'9v?RSSS!();AT!U!U!U(/F$7)
 )
 )
% +2F$7+
 +
 +
'''r8   rk   rl   rm   residualc                 P   ||}|                      |          }n|                      ||          \  }}|                     |||          }|                     |          }|                     ||          \  }}|                     |          }|                     |          }||fS )N)rk   rl   rm   )r   r   r   r   r   r   )r5   rk   rl   rm   r   s        r7   r>   zOuroDecoderLayer.forward  s     $H 00??MM&*&:&:=(&S&S#M8} ' 
 
 ..}=="&"?"?x"X"Xx//77FFh&&r8   )NNr#   r:   )r?   r@   rA   r   r	   r   rC   r0   rz   r{   rB   tupler>   rD   rE   s   @r7   r}   r}      s         ,026-
 -
 -
 "D(-
 )4/	-

 -
 
-
 -
 -
 -
 -
 -
h )-' '<' |' 	'
 ,%' 
u|U\)	*' ' ' ' ' ' ' 'r8   r}   ro   )	input_idsrk   intermediate_tensorsinputs_embeds)dynamic_arg_dimsc                       e Zd Zdeddededeej                 f fdZ	de
j        de
j        fd	Z	 	 dde
j        de
j        ded
z  de
j        d
z  de
j        ez  f
dZdeeee
j        f                  dee         fdZ xZS )	OuroModelr#   )r(   decoder_layer_typevllm_configr(   r   c                   t                                                       |j        j        |j        |j        j        Ht          d          r8j        j	        k    s(J d
                    j        j	                              | _        | _        j        | _        t          j        j        | d          | _        pt           t#          j	        fd| d          \  | _        | _        | _        t+          dd	gj                  | _        t/          j        j        
          | _        t5          j        dd          | _        t9          | j        dd          | _        d S )Nmax_window_layerszSliding window for some but all layers is not supported. This model uses sliding window but `max_window_layers` = {} is less than `num_hidden_layers` = {}. Please open an issue to discuss this feature.z.embed_tokensr'   r(   c                 "     |           S )N)rH   rL   r'   r(    )r(   rL   rH   r   r'   s    r7   <lambda>z$OuroModel.__init__.<locals>.<lambda>R  s&    --))	   r8   z.layersr(   rl   r   r   r   T)r-   rP   rQ   )r/   r0   model_config	hf_configrL   r'   sliding_windowhasattrr   r\   formatrH   
vocab_sizer   r$   embed_tokensr}   r   start_layer	end_layerlayersr   make_empty_intermediate_tensorsr   r   normr   early_exit_gater[   rP   )r5   r   r(   r   rL   rH   r'   r6   s      `@@@r7   r0   zOuroModel.__init__(  s    	)3"/"/ &2w'8
 8
2 +v/GGGG+ ,26,,, ,	 HGG ( +2%+++	
 
 
 0C3C8C$       %%%	9
 	9
 	9
5$.$+ 0Wj)6+=0
 0
, F.F4GHHH	01CQTRRR%dk3CQGGr8   r   r)   c                 ,    |                      |          S r:   )r   r5   r   s     r7   embed_input_idszOuroModel.embed_input_idsc  s      +++r8   Nrk   r   r   c                     ||}n|                      |          }t          | j                  D ]J}d }| j        | j        | j                 D ]} |||||          \  }}|                     ||          \  }}	K|S r:   )r   rb   rP   r   r   r   r   )
r5   r   rk   r   r   rl   rm   r   layerr=   s
             r7   r>   zOuroModel.forwardf  s     $)MM 00;;M 344 	B 	BJHT%5%FG  */%}j(+ +'xx  $yyAAM11r8   weightsc                    g d}t          |                     d                    }t                      }|D ]\  }}d|v r| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]\  }
}}||vr|	                    ||
          }|
                    d          r||vr;|
                    d          rt          ||          }|c||         }t          |dt                    }	|	t          k    r |	||           n |	|||            nY|
                    d          r||vrUt          ||          }|i||         }t          |dt                    }	 |	||           |                    |           |S )	N))r]   q_projrs   )r]   k_projrt   )r]   v_projru   )r1   	gate_projr   )r1   up_projr   F)remove_duplicatezrotary_emb.inv_freqweight_loaderr   z.biasscale)ry   named_parameterssetr'   get_cache_scaler[   r   rp   addrc   endswithr   )r5   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                r7   load_weightszOuroModel.load_weights{  sU   "
 "
 "
 400%0HHII"%%%#* -	$ -	$D-$,, ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K 4 41
Kd**||K<<==)) d+.E.E==)) !4T;GGD| #D) '@U V V $999!M%7777!M%AAA ==)) d+.E.E0{CC<#D) '@U V Ve]333d####r8   NN)r?   r@   rA   r}   r
   rC   typer   Moduler0   rz   r{   r   r   r>   r   r   r   r   rD   rE   s   @r7   r   r     sF        .>9H 9H 9H  9H 	9H
 !O9H 9H 9H 9H 9H 9Hv, ,%, , , , , <@-1 < < 2D8	
 |d* 
+	+   *9HU33D-E$F 93s8 9 9 9 9 9 9 9 9r8   r   c                   ,    e Zd Zg dddgdZdddedef fd	Zd
ej        dej        fdZ		 	 dd
ej        dej        de
dz  dej        dz  dej        e
z  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )OuroForCausalLM)r   r   r   r   r   )r]   r1   r#   r   r   r(   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        |j	        r| j        j
        | _        n0t          |j        |j        |t          |d                    | _        t          |j                  | _        | j        j        | _        d S )Nmodel)r   r(   lm_headr   )r/   r0   r   r   r'   rH   r   r    r   tie_word_embeddingsr   r   r   r   r$   r   logits_processorr   )r5   r   r(   rH   r'   r6   s        r7   r0   zOuroForCausalLM.__init__  s    )3"/(#L,I,I
 
 

 % 	:2DLL)!")#FI66	  DL !00A B B J6 	,,,r8   r   r)   c                 6    | j                             |          S r:   )r   r   r   s     r7   r   zOuroForCausalLM.embed_input_ids  s    z)))444r8   Nrk   r   r   c                 6    |                      ||||          }|S r:   )r   )r5   r   rk   r   r   rl   s         r7   r>   zOuroForCausalLM.forward  s)     

y"6
 
 r8   rl   c                 <    |                      | j        |          }|S r:   )r   r   )r5   rl   logitss      r7   compute_logitszOuroForCausalLM.compute_logits  s      &&t|]CCr8   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   rH   r   r   )r5   r   loaders      r7   r   zOuroForCausalLM.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++r8   r   )r?   r@   rA   packed_modules_mappingr
   rC   r0   rz   r{   r   r   r>   r   r   r   r   r   rD   rE   s   @r7   r   r     s{       
 
 
 

 
 BD 
 
 
z 
3 
 
 
 
 
 
85 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r8   r   )8__doc__collections.abcr   typingr   rz   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.v1.attention.backendr   
interfacesr   utilsr   r   r   r   r    r   r"   rG   r}   r   r   r   r8   r7   <module>r      sO  6 E D $ $ $ $ $ $              ) ) ) ) ) ) * * * * * * = = = = = = / / / / / / / / A A A A A A < < < < < < 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - 3 3 3 3 3 3 $ $ $ $ $ $             " " " " "bi " " "Jh h h h hBI h h hVE' E' E' E' E'ry E' E' E'P  !	   M M M M M	 M M M`D, D, D, D, D,bi D, D, D, D, D,r8   