
    .`i;                        d dl mZ d dlmZ d dlZd dlmZ d dlmc mZ	 d dl
mZ d dlmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZ d dl m!Z! ddl"m#Z# eej$        dZ% G d dej&                  Z' G d dej&                  Z( G d dej&                  Z) G d dej&                  Z* G d dej&                  Z+ G d dej&                  Z, G d dej&                  Z-dS )    )Iterable)partialN)PretrainedConfig)divideget_tensor_model_parallel_rank$get_tensor_model_parallel_world_sizesplit_tensor_along_last_dim tensor_model_parallel_all_gather)
get_act_fn)MMEncoderAttention)Conv2dLayer)RMSNorm)ColumnParallelLinearQKVParallelLinearRowParallelLinear)QuantizationConfig)default_weight_loader   )run_dp_sharded_vision_model)rms_norm
layer_normc                        e Zd Zdef fdZdej        dedefdZdededej        fdZ	d	ej
        dej        fd
Z xZS )InternVisionEmbeddingsconfigc                 
   t                                                       || _        |j        | _        |j        | _        |j        | _        t          j        t          j
        dd| j                            | _        t          d| j        | j        | j                  | _        | j        | j        z  dz  | _        | j        dz   | _        t          j        t          j
        d| j        | j                            | _        d S )Nr      )in_channelsout_channelskernel_sizestride   )super__init__r   hidden_size	embed_dim
image_size
patch_sizenn	Parametertorchrandnclass_embeddingr   patch_embeddingnum_patchesnum_positionsposition_embeddingselfr   	__class__s     y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/intern_vit.pyr#   zInternVisionEmbeddings.__init__.   s    + + +!|EK1dn,M,MNN*?	 
  
  
 !Ot>1D!-1"$,K4-t~>>#
 #
    	pos_embedHWc                 |   |j         }|                                                    d| j        | j        z  | j        | j        z  d                              dddd          }t          j        |||fdd          }|                    dd||z                                ddd                              |          S )	Nr   r   r   r!   bicubicF)sizemodealign_corners)	dtypefloatreshaper&   r'   permuteFinterpolateto)r2   r6   r7   r8   target_dtypes        r4   _get_pos_embedz%InternVisionEmbeddings._get_pos_embedE   s     OOW4?24?2	  WQ1a   	 MQF%
 
 
	   BA..66q!Q??BB<PPPr5   returnc           
          | j         }| j        ||z  k    r|S t          j        |d d d dd d f         |                     |d d dd d d f         ||          gd          S )Nr   dim)r0   r.   r*   catrG   )r2   r7   r8   r0   s       r4   _get_position_embeddingz.InternVisionEmbeddings._get_position_embeddingV   s    !4q1u$$%%y"111bqb!!!8,##$6qqq!""aaax$@!QGG 
 
 
 	
r5   pixel_valuesc                    | j         j        j        }|                      |                    |                    }|j        \  }}}}|                    d                              dd          }| j                            |dd                              |          }t          j
        ||gd          }	|                     ||          }
|	|
                    |          z   }	|	S )Nr!   r   r:   rJ   )r-   weightr?   rE   shapeflatten	transposer,   expandr*   rL   rM   )r2   rN   rF   patch_embeds
batch_size_heightwidthclass_embeds
embeddingsr0   s              r4   forwardzInternVisionEmbeddings.forwardc   s    +28++OOL))
 
 (4'9$
Avu#++A..88A>>+22:q"EEHHVVYl;CCC
!99&%HH"4"7"7"E"EE
r5   )__name__
__module____qualname__r   r#   r*   TensorintrG   rM   FloatTensorr\   __classcell__r3   s   @r4   r   r   -   s        
/ 
 
 
 
 
 
.Q Q Q Q Q Q Q"
 
 
 
 
 
 
E$5 %,        r5   r   c                   n     e Zd Zdef fdZd Z	 	 d	dej        dz  dej        dz  dej        fdZ	 xZ
S )
InternVisionPatchModelr   c                 ~    t                                                       || _        t          |          | _        d S N)r"   r#   r   r   r[   r1   s     r4   r#   zInternVisionPatchModel.__init__r   s3    088r5   c                     | j         S rh   r[   r2   s    r4   get_input_embeddingsz+InternVisionPatchModel.get_input_embeddingsw   
    r5   NrN   pixel_embedsrH   c                     ||t          d          ||}n:|8|j        dk    r|                     |          }nt          d|j                   |S )N0You have to specify pixel_values or pixel_embeds   wrong pixel_values size: )
ValueErrorndimr[   rQ   )r2   rN   rn   hidden_statess       r4   r\   zInternVisionPatchModel.forwardz   sr    
 L$8OPPP#(MM% A%% $ = = !Q\=O!Q!QRRRr5   NN)r]   r^   r_   r   r#   rl   r*   r`   rb   r\   rc   rd   s   @r4   rf   rf   q   s        9/ 9 9 9 9 9 9
  
 -1,0 lT) lT) 
		       r5   rf   c                        e Zd ZdZ	 ddddddededz  d	ed
ededdf fdZ	de
j        de
j        fdZde
j        de
j        fdZ xZS )InternParallelAttentionz=Multi-headed attention from 'Attention Is All You Need' paperNr    Fnum_dummy_headsprefixuse_data_parallelr   quant_configr{   r|   r}   rH   c          	         t                                                       || _        |j        | _        |j        | _        | j        | j        z  | _        | j        | j        z  | j        k    r t          d| j         d| j         d          |rdnt                      | _
        |rdnt                      | _        || j        z   | j        z  | _        t          || j        z   | j
                  | _        | j        dz  | _        t#          | j        | j        || j        z   |j        || d|          | _        |j        | _        | j        rLt+          | j        |j        | j        	          | _        t+          | j        |j        | j        	          | _        t3          | j        | j        || d
|          | _        t7          | j        | j        | j                  | _        d S )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).r   r   g      z.qkvbiasr~   r|   
disable_tp)epsvar_hidden_sizez.proj)r~   r|   r   )r"   r#   r   r$   r%   num_attention_heads	num_headshead_dimrs   r   tp_sizer   tp_rank	dummy_dimr   num_heads_per_partitionscaler   qkv_biasqkvqk_normalizationr   layer_norm_epsq_normk_normr   projr   attn)r2   r   r~   r{   r|   r}   r3   s         r4   r#   z InternParallelAttention.__init__   s    	+3$.8=4>)T^;;'%)^' 'N' ' '   #NAA(L(N(N 	 .Sqq3Q3S3S *DN:dmK'-dn,dl(
 (
$ ]D(
$NMdn,%???(
 
 
 !' 7  
	!) $  DK
 ") $  DK &NN%###(
 
 
	 '($-
 
			r5   qkc                    | j         dk    rBt          |                                          }t          |                                          }|                     |          }|                     |          }| j         dk    rGt          t          | j                   } ||          | j                 } ||          | j                 }||fS )Nr   )num_partitions)r   r
   
contiguousr   r   r   r	   r   )r2   r   r   splitters       r4   _apply_qk_normz&InternParallelAttention._apply_qk_norm   s    <!0@@A0@@AKKNNKKNN<!:4<XXXHDL)ADL)A!tr5   xc                     |j         \  }}}|                     |          \  }}|                    dd          \  }}}| j        r|                     ||          \  }}|                     |||          }	|                     |	          \  }	}|	S )Nr   r:   rJ   )rQ   r   chunkr   r   r   r   )
r2   r   BNrW   r   r   r   vouts
             r4   r\   zInternParallelAttention.forward   s    '1a!Q))A2)&&1a  	-&&q!,,DAqii1a  3Q
r5   rh   )r]   r^   r_   __doc__r   r   ra   strboolr#   r*   r`   r   r\   rc   rd   s   @r4   rx   rx      s        GG
 37D

  !"'D
 D
 D
 D
 )4/D

 D
 D
  D
 
D
 D
 D
 D
 D
 D
L
 
 
 
 
 

 
%, 
 
 
 
 
 
 
 
r5   rx   c                   f     e Zd Z	 	 	 ddededz  dededdf
 fd	Zd
ej	        dej	        fdZ
 xZS )	InternMLPNry   Fr   r~   r|   r}   rH   c                 $   t                                                       || _        t          |j                  | _        t          |j        |j        d|| d|          | _	        t          |j        |j        d|| d|          | _        d S )NTz.fc1r   z.fc2)r"   r#   r   r   
hidden_actactivation_fnr   r$   intermediate_sizefc1r   fc2)r2   r   r~   r|   r}   r3   s        r4   r#   zInternMLP.__init__   s     	'(9::'$%???(
 
 
 %$%???(
 
 
r5   ru   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S rh   )r   r   r   )r2   ru   rW   s      r4   r\   zInternMLP.forward  sG    88M22q**=9988M22qr5   )Nry   F)r]   r^   r_   r   r   r   r   r#   r*   r`   r\   rc   rd   s   @r4   r   r      s         37"'
 
 
 )4/
 	

  
 

 
 
 
 
 
8U\ el        r5   r   c                        e Zd Z	 ddddeddededz  ded	ed
ede	e         ddf fdZ
ddddededz  ded	ed
ef
dZdej        fdZ xZS )InternVisionEncoderLayerNr   ry   F)r{   r|   r}   attn_clsr   r~   r{   r|   r}   r   rH   c                   t                                                       |j        | _        |j        | _        |j        | _        || _        |                     |||| d|          | _        t          ||| d|          | _
        t          | j                 | j        |j                  | _        t          | j                 | j        |j                  | _        t          j        |j        t%          j        | j                  z            | _        t          j        |j        t%          j        | j                  z            | _        d S )Nz.attnrz   z.mlp)r~   r|   r}   )r   )r"   r#   r$   r%   r   	norm_typer   
_init_attnr   r   mlpNORM2FNr   norm1norm2r(   r)   initializer_factorr*   onesls1ls2)r2   r   r~   r{   r|   r}   r   r3   s          r4   r#   z!InternVisionEncoderLayer.__init__  s-    	+!'!9) OO+###/ $ 
 
	 %???/	
 
 
 T^,T^AVWWW
T^,T^AVWWW
< 9EJt~<V<V VWW< 9EJt~<V<V VWWr5   )r|   r}   c                    |rdnt                      }|j        }|p||z   |z  dk    }|                     |||||          S )Nr   r   )r~   r{   r|   r}   )r   r   r   )r2   r   r~   r{   r|   r}   r   r   s           r4   r   z#InternVisionEncoderLayer._init_attn:  sk     )T!!.R.T.T.	
 M)o"=!HA!M 	 }}%+/  
 
 	
r5   ru   c                     ||                      |                     |                    | j        z  z   }||                     |                     |                    | j        z  z   }|S rh   )r   r   r   r   r   r   )r2   ru   s     r4   r\   z InternVisionEncoderLayer.forwardT  s\     &		$**]2K2K(L(Ltx(WW%M1J1J(K(Kdh(VVr5   rh   )r]   r^   r_   rx   r   r   ra   r   r   typer#   r   r*   r`   r\   rc   rd   s   @r4   r   r     s8        37#X
  !"'2I#X #X #X #X )4/#X
 #X #X  #X ./#X 
#X #X #X #X #X #XV "'
 
 
 
 )4/

 
 
  
 
 
 
4|       r5   r   c                   |     e Zd Z	 dddddeddededz  dedz  d	ed
edede	e         f fdZ
dej        fdZ xZS )InternVisionEncoderNr   ry   F)num_hidden_layers_overrider{   r|   r}   	layer_clsr   r~   r   r{   r|   r}   r   c                     t                                                        _        | _        |j        }n|}t          j         fdt          |          D                        _        d S )Nc           
      N    g | ]!}                      d |           "S )z.layers.rz   )r   ).0	layer_idxr   r{   r|   r~   r2   r}   s     r4   
<listcomp>z0InternVisionEncoder.__init__.<locals>.<listcomp>v  s[     	 	 	   $3$99i99&7   	 	 	r5   )	r"   r#   r   r   num_hidden_layersr(   
ModuleListrangelayers)
r2   r   r~   r   r{   r|   r}   r   r   r3   s
   ``` ```  r4   r#   zInternVisionEncoder.__init__`  s     	"%- & 8 :m	 	 	 	 	 	 	 	 	 "''8!9!9	 	 	
 
r5   inputs_embedsc                 4    |}| j         D ]} ||          }|S rh   )r   )r2   r   ru   encoder_layers       r4   r\   zInternVisionEncoder.forward  s/    %![ 	9 	9M)M-88MMr5   rh   )r]   r^   r_   r   r   r   ra   r   r   r   r#   r*   r`   r\   rc   rd   s   @r4   r   r   _  s         37 

 26 "'4L 
  
  
  
 )4/ 

 %($J 
  
  
   
 01 
  
  
  
  
  
DU\        r5   r   c                        e Zd ZddgiZ	 dddddddededz  d	edz  d
edededdf fdZ	d Z
	 	 ddej        dz  dej        dz  dej        fdZdeeeej        f                  dee         fdZ xZS )InternVisionModelr   Nr   ry   F)r   r{   r|   r}   r   r~   r   r{   r|   r}   rH   c                    t                                                       || _        || _        t	          |          | _        t          ||||| d|          | _        d S )Nz.encoder)r   r~   r   r{   r|   r}   )r"   r#   r   r}   r   r[   r   encoder)r2   r   r~   r   r{   r|   r}   r3   s          r4   r#   zInternVisionModel.__init__  sn     	!2088*%'A+&&&/
 
 
r5   c                     | j         S rh   rj   rk   s    r4   rl   z&InternVisionModel.get_input_embeddings  rm   r5   rN   rn   c                    ||t          d          ||}n:|8|j        dk    r|                     |          }nt          d|j                   | j        rt          || j                  }n|                     |          }|S )Nrp   rq   rr   )r   )rs   rt   r[   rQ   r}   r   r   )r2   rN   rn   ru   encoder_outputss        r4   r\   zInternVisionModel.forward  s    
 L$8OPPP#(MM% A%% $ = = !Q\=O!Q!QRRR! 	H9-VVOO"lllGGOr5   weightsc                     t          |                                           }t                      }|D ]D\  }}||         }t          |dt                    } |||           |                    |           E|S )Nweight_loader)dictnamed_parameterssetgetattrr   add)r2   r   params_dictloaded_paramsnameloaded_weightparamr   s           r4   load_weightszInternVisionModel.load_weights  s    4002233"%%%#* 	$ 	$D-%E#E?<QRRMM%///d####r5   rh   rv   )r]   r^   r_   packed_modules_mappingr   r   ra   r   r   r#   rl   r*   r`   rb   r\   r   tupler   r   rc   rd   s   @r4   r   r     sO       w 37

 26 "'
 
 
 
 )4/

 %($J
 
 
  
 

 
 
 
 
 
2  
 -1,0 lT) lT) 
		   .HU33D-E$F 3s8        r5   r   ).collections.abcr   	functoolsr   r*   torch.nnr(   torch.nn.functional
functionalrC   transformersr   vllm.distributedr   r   r   r	   r
   %vllm.model_executor.layers.activationr   9vllm.model_executor.layers.attention.mm_encoder_attentionr   vllm.model_executor.layers.convr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   'vllm.model_executor.layers.quantizationr   -vllm.model_executor.model_loader.weight_utilsr   visionr   	LayerNormr   Moduler   rf   rx   r   r   r   r    r5   r4   <module>r      s   % $ $ $ $ $                       ) ) ) ) ) )              = < < < < < X X X X X X 7 7 7 7 7 7 8 8 8 8 8 8         
 G F F F F F O O O O O O / / / / / / , A A A A ARY A A AH    RY   8_ _ _ _ _bi _ _ _D" " " " "	 " " "JH H H H Hry H H HV( ( ( ( (") ( ( (V@ @ @ @ @	 @ @ @ @ @r5   