
    .`iC                     r   d Z ddlmZ ddlmZ ddlZddlmZ ddlmZm	Z	 ddl
mZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6  ej7        e)j8                  d             Z9 G d dej:                  Z; G d dej:                  Z< G d dej:                  Z= G d  d!ej:                  Z>e G d" d#ej:                              Z? G d$ d%ej:        e-e.e/          Z@dS )&zPyTorch Cohere model.    )Iterable)isliceN)nn)Cohere2ConfigCohereConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)VocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_namerow_parallel_weight_loader)set_weight_attrs)current_platform)IntermediateTensors   )SupportsLoRA
SupportsPPSupportsQuant)AutoWeightsLoaderextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefix)backendc                 ~   | j         }|                     t          j                  } |                     dd          }| |z
                      d                              dd          }| |z
  t          j        ||z             z  } |                    t          j                  | z  } |                     |          S )NT)keepdim   )dtypetotorchfloat32meanpowrsqrt)hidden_statesweightvariance_epsiloninput_dtyper/   variances         w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/commandr.pylayer_norm_funcr8   C   s    %K!$$U]33Mb$//D$))!,,11"d1CCH"T)U[DT9T-U-UUMIIem,,}<MK(((    c                   (     e Zd Zd fd	ZddZ xZS )	LayerNormNh㈵>c                     t                                                       t          j        t	          j        |                    | _        || _        t          | j        dt          i           d S )Nweight_loader)
super__init__r   	Parameterr-   onesr3   r4   r   r   )selfparam_shapeeps	__class__s      r7   r@   zLayerNorm.__init__O   sZ    l5:k#:#:;; #8R&STTTTTr9   c                 @    t          || j        | j                  }||fS N)r8   r3   r4   )rC   r2   	residualss      r7   forwardzLayerNorm.forwardU   s*    '4;(=
 
 i''r9   )Nr<   rH   )__name__
__module____qualname__r@   rJ   __classcell__rF   s   @r7   r;   r;   N   sW        U U U U U U( ( ( ( ( ( ( (r9   r;   c                   D     e Zd Z	 	 ddeez  dedz  def fdZd Z xZ	S )		CohereMLPN configquant_configprefixc                 L   t                                                       || _        |j        | _        |j        | _        t          | j        | j        gdz  d|| d          | _        t          | j        | j        d|| d          | _        t                      | _
        d S )Nr*   Fz.gate_up_projbiasrT   rU   z
.down_proj)r?   r@   rS   hidden_sizeintermediate_sizer   gate_up_projr   	down_projr   act_fn)rC   rS   rT   rU   rF   s       r7   r@   zCohereMLP.__init__^   s     	!-!'!96#$q(%+++
 
 
 +"%(((
 
 
 !llr9   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S rH   )r[   r]   r\   )rC   xgate_up_s       r7   rJ   zCohereMLP.forwardx   sD    &&q))
KK  ~~a  1r9   )NrR   )
rK   rL   rM   r   r   r   strr@   rJ   rN   rO   s   @r7   rQ   rQ   ]   s         37	# #},# )4/# 	# # # # # #4      r9   rQ   c            	            e Zd Z	 	 	 ddeez  dedz  dedz  def fdZd Z	d	e
j        d
e
j        de
j        fdZ xZS )CohereAttentionNrR   rS   cache_configrT   rU   c                 f   t                                                       t                      }|| _        |j        | _        |j        | _        |j        | _        | j        |z  | _        | j        | j        z  | _	        |j
        | _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        | j        | j	        z  | _        | j        | j	        z  | _        | j	        dz  | _        t#          |dd           pt#          |dd          | _        t#          |dd          | _        t)          | j        | j	        | j        | j        d|| d	
          | _        t-          | j        | j	        z  | j        d|| d
          | _        t1          | j	        | j        |j        d          | _        t7          |t8                    | _        d | _        | j        s,t?          |          }|j         |         dk    r|j        | _        tC          | j        | j	        | j        | j        ||| j        | d          | _"        | j        rPtG          | j        | j	        f|j$                  | _%        tG          | j        | j	        f|j$                  | _&        d S d S )Nr   r   g      model_max_lengthmax_position_embeddingsi    use_qk_normFz	.qkv_projrW   z.o_proj)max_positionrope_parametersis_neox_stylesliding_attentionz.attn)num_kv_headsre   rT   per_layer_sliding_windowrU   rD   rE   )'r?   r@   r   rS   attention_dropoutrY   num_attention_headstotal_num_heads	num_headshead_dimnum_key_value_headstotal_num_kv_headsmaxrn   q_sizekv_sizescalinggetattrrh   ri   r   qkv_projr   o_projr   rk   
rotary_emb
isinstancer   v1sliding_windowr!   layer_typesr   attnr;   layer_norm_epsq_normk_norm)rC   rS   re   rT   rU   tp_size	layer_idxrF   s          r7   r@   zCohereAttention.__init__   s    	688!'!9!-%9-8(D,@@"("<"g-- *W499999 T4499994#:g#EFFnt}4(4=8}d*'.&(
 (
 (>V6== 	$ #6=%@@)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M5"2	
 
 
 V\22"w 	<+F33I!),0CCC&,&;#NML*%%%)%8###	
 	
 	
	  	#!^T];AV  DK $!.>)  DKKK		 	r9   c                 f    |j         g |j        d d         d| j        R  } |j         g |j        d d         d| j        R  }|                     |          \  }}|                     |          \  }} |j         g |j        d d         dR  } |j         g |j        d d         dR  }||fS )Nr(   )viewshaperu   r   r   )rC   qkra   s       r7   _apply_qk_normzCohereAttention._apply_qk_norm   s    AF4AGCRCL4"4dm444AF4AGCRCL4"4dm444{{1~~1{{1~~1AF%AGCRCL%"%%%AF%AGCRCL%"%%%!tr9   	positionsr2   returnc                 ~   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}| j        r|                     ||          \  }}| j        s| j        r|                     |||          \  }}| 	                    |||          }| 
                    |          \  }	}|	S )Nr(   )dim)r}   splitry   rz   ri   r   r   r   r   r   r~   )
rC   r   r2   qkvra   r   r   vattn_outputoutputs
             r7   rJ   zCohereAttention.forward   s    
 }--Q))T[$,E2)NN1a 	-&&q!,,DAq7 	4d) 	4??9a33DAqii1a((KK,,	r9   NNrR   )rK   rL   rM   r   r   r
   r   rb   r@   r   r-   TensorrJ   rN   rO   s   @r7   rd   rd      s         ,026Q Q},Q "D(Q )4/	Q
 Q Q Q Q Q Qf  < | 
	       r9   rd   c            
            e Zd Z	 	 	 ddeez  dedz  dedz  def fdZde	j
        d	e	j
        d
e	j
        dz  dee	j
        e	j
        f         fdZ xZS )CohereDecoderLayerNrR   rS   re   rT   rU   c                 
   t                                                       |j        | _        t          |||| d          | _        t          ||| d          | _        t          |j        |j                  | _	        d S )Nz
.self_attn)rT   rU   z.mlprp   )
r?   r@   rY   rd   	self_attnrQ   mlpr;   r   input_layernorm)rC   rS   re   rT   rU   rF   s        r7   r@   zCohereDecoderLayer.__init__   s     	!-(%(((	
 
 
 V,&WWW(+&2G 
  
  
r9   r   r2   residualr   c                     |}|                      ||          \  }}|                     ||          }|                     |          }||z   |z   }||fS )N)r   r2   )r   r   r   )rC   r   r2   r   hidden_states_attentionhidden_states_mlps         r7   rJ   zCohereDecoderLayer.forward  sr     !"&"6"6}h"O"Ox"&..' #1 #
 #
 !HH]33 #::=NNh&&r9   r   )rK   rL   rM   r   r   r
   r   rb   r@   r-   r   tuplerJ   rN   rO   s   @r7   r   r      s         ,026
 
},
 "D(
 )4/	

 
 
 
 
 
 
,'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r9   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )CohereModelrR   rU   vllm_configrU   c                   t                                                       |j        j        |j        |j        | _        | _        j        | _        t          j        j	                  | _
        t          j        fd| d          \  | _        | _        | _        t!          j	        j                  | _        t'          ddgj	                  | _        d S )Nc                 *    t          |           S )Nr   )r   )rU   re   rS   rT   s    r7   <lambda>z&CohereModel.__init__.<locals>.<lambda>*  s     -l6   r9   z.layersr   rp   r2   r   )r?   r@   model_config	hf_configre   rT   rS   
vocab_sizer   rY   embed_tokensr$   num_hidden_layersstart_layer	end_layerlayersr;   r   normr#   make_empty_intermediate_tensors)rC   r   rU   re   rS   rT   rF   s      @@@r7   r@   zCohereModel.__init__  s   )3"/"/( +2v1
 
 9D$      %%%9
 9
 9
5$.$+ +&2G
 
 
	 0Wj)6+=0
 0
,,,r9   	input_idsr   c                 ,    |                      |          S rH   )r   rC   r   s     r7   embed_input_idszCohereModel.embed_input_ids6  s      +++r9   Nr   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nr2   r   )r2   r   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )	rC   r   r   r   r   r2   r   layerra   s	            r7   rJ   zCohereModel.forward9  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e' '#M88
 ~~* 	&"/XFF    99]H==qr9   weightsc                 b   g d}t          |                                           }t                      }|D ]w\  }}| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]i\  }
}}||vr|	                    ||
          }|
                    d          r||vr;t          ||           rL||         }|j        }	 |	|||            nk|
                    d          r||vrt          ||          }|&t          ||           r8||         }t          |dt                    }	 |	||           |                    |           y|S )N))r}   q_projr   )r}   k_projr   )r}   v_projr   )r[   	gate_projr   )r[   up_projr   r>   r   z.bias)dictnamed_parameterssetrT   get_cache_scaler|   r   r   addreplaceendswithr"   r>   r   )rC   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr>   
param_name
shard_nameshard_ids                r7   load_weightszCohereModel.load_weightsW  s   "
 "
 "
 4002233"%%%#* )	$ )	$D- ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---4J 4 40
JT))||J
;;==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E0{CC<*466 #D) '@U V Ve]333d####r9   rH   )rK   rL   rM   r   rb   r@   r-   r   r   r   rJ   r   r   r   r   rN   rO   s   @r7   r   r     s       AC 
 
 
z 
3 
 
 
 
 
 
:, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   <5HU33D-E$F 53s8 5 5 5 5 5 5 5 5r9   r   c                   ^    e Zd Zg dddgdZddiZddd	ed
ef fdZdej	        dej	        fdZ
 ej                    	 	 ddej	        dej	        dedz  dej	        dz  dej	        ez  f
d            Zdej	        dej	        dz  fdZdeeeej	        f                  dee         fdZ xZS )CohereForCausalLM)r   r   r   r   r   )r}   r[   r   input_embeddingsrR   r   r   rU   c                F   t                                                       |j        j        }|j        }|| _        |j        sJ || _        t          |j        |j	                  | _
        t          |t          |d                    | _        | j        j        | _        d S )N)scalemodel)r   rU   )r?   r@   r   r   rT   rS   tie_word_embeddingsr   r   logit_scalelogits_processorr   r%   r   r   )rC   r   rU   rS   rT   rF   s        r7   r@   zCohereForCausalLM.__init__  s    )3"/ ))))( /V%7!
 !
 !
 !#L,I,I
 
 

 J6 	,,,r9   r   r   c                 6    | j                             |          S rH   )r   r   r   s     r7   r   z!CohereForCausalLM.embed_input_ids  s    z)))444r9   Nr   r   r   c                 6    |                      ||||          }|S rH   )r   )rC   r   r   r   r   r2   s         r7   rJ   zCohereForCausalLM.forward  s)     

y"6
 
 r9   r2   c                     t          | j        j        d          }|r!|                     | j        j        |          }n%|                     | j        j        j        |          }|S )Nr3   )hasattrr   r   r   
base_layer)rC   r2   is_not_loralogitss       r7   compute_logitsz CohereForCausalLM.compute_logits  se     dj5x@@ 	**4:+BMRRFF**
'2M F r9   r   c                 R    t          | ddg          }|                    |          S )Nlm_headzrotary_emb.inv_freq)skip_prefixes)r    r   )rC   r   loaders      r7   r   zCohereForCausalLM.load_weights  s7    ",A B
 
 
 ""7+++r9   )NN)rK   rL   rM   packed_modules_mappingembedding_modulesr   rb   r@   r-   r   r   no_gradr   rJ   r   r   r   r   r   rN   rO   s   @r7   r   r     s       
 
 
 

 
 ();<AC 
 
 
z 
3 
 
 
 
 
 
*5 5%, 5 5 5 5 U]__
 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 _
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r9   r   )A__doc__collections.abcr   	itertoolsr   r-   r   transformersr   r   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   r   vllm.distributedr   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr   r   r   vllm.model_executor.utilsr   vllm.platformsr   vllm.sequencer   
interfacesr   r   r   utilsr    r!   r"   r#   r$   r%   compilesimple_compile_backendr8   Moduler;   rQ   rd   r   r   r    r9   r7   <module>r     s  0   $ $ $ $ $ $              4 4 4 4 4 4 4 4 * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <         
 H G G G G G F F F F F F @ @ @ @ @ @ V V V V V V         
 7 6 6 6 6 6 + + + + + + - - - - - - ? ? ? ? ? ? ? ? ? ?                '>???) ) @?)( ( ( ( (	 ( ( (    	   Dj j j j jbi j j jZ(' (' (' (' (' (' (' ('V t t t t t") t t tnF, F, F, F, F,	<] F, F, F, F, F,r9   