
    .`imi              	          U d dl mZmZ d dlmZmZmZ d dlZd dlm	Z	 d dlm
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlm Z m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5m6Z6m7Z7 d dl8m9Z9m:Z:m;Z;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZB d dlCmDZD d dlEmFZFmGZG d dlHmIZImJZJ d dlKmLZL ddlMmNZNmOZO d ZP G d! d"eI          ZQ G d# d$eI          ZReQeRz  ZSeeTd%<    G d& d'e5          ZU G d( d)e7          ZV G d* d+e	jW                  ZX G d, d-e	jW                  ZY G d. d/e	jW                  ZZ G d0 d1e	jW                  Z[ G d2 d3e	jW                  Z\ G d4 d5e	jW                  Z] G d6 d7e	jW                  Z^ G d8 d9e	jW                  Z_ e@j`        e6eVeU:           G d; d<e	jW        e-e.e,                      ZadS )=    )IterableMapping)	AnnotatedLiteral	TypeAliasN)	LayerNorm)Qwen2VLProcessor)
VllmConfig)BaseDummyOptions)utils)get_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
SiluAndMul)MMEncoderAttention)Conv2dLayer)RMSNorm)ColumnParallelLinearMergedColumnParallelLinearQKVParallelLinearRowParallelLinear)QuantizationConfig)ApplyRotaryEmb)default_weight_loader)MultiModalEmbeddingsSupportsLoRASupportsMultiModal
SupportsPP)MultiModelKeys)Qwen2ForCausalLM)Qwen2VisionAttentionQwen2VLDummyInputsBuilderQwen2VLMultiModalProcessorQwen2VLProcessingInfo)AutoWeightsLoaderWeightsMapperinit_vllm_registered_modelmaybe_prefix)get_vit_attn_backend)MULTIMODAL_REGISTRY)MultiModalDataDict)IntermediateTensors)DotsOCRConfigDotsVisionConfig)TensorSchemaTensorShape)AttentionBackendEnum   )is_vit_use_data_parallel!run_dp_sharded_mrope_vision_modelz
<|imgpad|>c                       e Zd ZU dZed         ed<   eej         e	dd          f         ed<   eej         e	dd          f         ed<   d	S )
DotsOCRImagePixelInputsz
    Dimensions:
        - np: The total number of patches over each image over each prompt in
              the batch
        - ni: Number of images
        - cps: Number of channels * patch_size * patch_size
    pixel_valuestypenpcpsni   image_grid_thwN
__name__
__module____qualname____doc__r   __annotations__r   torchTensorr/        w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/dots_ocr.pyr5   r5   D   sw           .
!!!!EL++dE*B*BBCCCCelKKa,@,@@AAAAAArF   r5   c                       e Zd ZU dZed         ed<   eej         e	dd          f         ed<   eej         e	dd          f         ed<   d	S )
DotsOCRImageEmbeddingInputszu
    Dimensions:
        - nf: Number of image features
        - hs: Hidden size
        - ni: Number of images
    image_embedsr7   nfhsr:   r;   r<   Nr=   rE   rF   rG   rI   rI   S   sw           .
!!!!EL++dD*A*AABBBBelKKa,@,@@AAAAAArF   rI   DotsOCRImageInputsc            	       p    e Zd Zdeeef         defdZ	 ddedeeef         deeef         dz  defdZ	dS )	DotsOCRDummyInputsBuilder	mm_countsreturnc                 B    |                     dd          }t          |z  S )Nimager   )getIMAGE_TOKEN)selfrP   
num_imagess      rG   get_dummy_textz(DotsOCRDummyInputsBuilder.get_dummy_texte   s     ]]7A..
Z''rF   Nseq_len
mm_optionsc                     |                     dd          }| j                                        \  }}|r|                     d          nd }d|                     ||||          iS )NrS   r   )widthheightrW   	overrides)rT   info!get_image_size_with_most_features_get_dummy_images)rV   rY   rP   rZ   rW   target_widthtarget_heightimage_overridess           rG   get_dummy_mm_dataz+DotsOCRDummyInputsBuilder.get_dummy_mm_datai   s     ]]7A..
&*i&Q&Q '
 '
#m 6@I*..111T T++"$%)	 ,  
 	
rF   N)
r>   r?   r@   r   strintrX   r   r*   re   rE   rF   rG   rO   rO   d   s        (S(9 (c ( ( ( ( =A	
 

 38$
 C!112T9	

 

 
 
 
 
 
rF   rO   c                       e Zd ZdefdZdeeedz  f         fdZdedeeef         deeef         fdZ	de
defd	ZdS )
DotsOCRProcessingInforQ   c                    | j                                         }|j        j        dk    st	          dt          |                     t          |d          r0t          |j        t                    rt          di |j        |_        |S )Nr,   zExpected DotsOCRConfig, got vision_configrE   )ctxget_hf_config	__class__r>   	TypeErrorr7   hasattr
isinstancerl   dictr-   )rV   configs     rG   rn   z#DotsOCRProcessingInfo.get_hf_config   s    ''))(O;;I4<<IIJJJ6?++ 	L
6;OQU0V0V 	L#3#K#Kf6J#K#KF rF   Nc                 
    dd iS NrS   rE   rV   s    rG   get_supported_mm_limitsz-DotsOCRProcessingInfo.get_supported_mm_limits   s    rF   rY   rP   c                 2    |                                  }d|iS rv   )get_max_image_tokens)rV   rY   rP   max_image_tokenss       rG   get_mm_max_tokens_per_itemz0DotsOCRProcessingInfo.get_mm_max_tokens_per_item   s"    
  4466)**rF   kwargsc                     t           |                                 _         | j        j        t
          fi |}t           |_        d|_        |S )Nz<|video_pad|>)rU   get_tokenizerimage_tokenrm   get_hf_processorr	   video_token)rV   r}   	processors      rG   r   z&DotsOCRProcessingInfo.get_hf_processor   sV     ,7(-DH-
 

 
	 !,	 /	rF   )r>   r?   r@   r,   rn   r   rg   rh   rx   r|   objectr	   r   rE   rF   rG   rj   rj      s        }    cDj)A    ++ 38$+ 
c		+ + + + 
     rF   rj   c                   H     e Zd Zd	dededdf fdZdedej        fdZ xZ	S )
VisionRotaryEmbedding     @dimthetarQ   Nc                     t                                                       d|t          j        d|dt          j                  |z  z  z  }|                     d|d           d S )Ng      ?r      dtypeinv_freqF)
persistent)super__init__rC   arangefloatregister_buffer)rV   r   r   r   ro   s       rG   r   zVisionRotaryEmbedding.__init__   sd    %ELC%+$N$N$NQT$TUVZeDDDDDrF   seqlenc                     t          j        || j        j        | j        j                  }t          j        || j                  }|S )Ndevicer   )rC   r   r   r   r   outer)rV   r   seqfreqss       rG   forwardzVisionRotaryEmbedding.forward   sC    l4=/t}7J
 
 
 C//rF   )r   )
r>   r?   r@   rh   r   r   rC   rD   r   __classcell__ro   s   @rG   r   r      s        E EC E ED E E E E E E
c el        rF   r   c                   `     e Zd Z	 	 	 ddededededd	f
 fd
Zdej        dej        fdZ xZ	S )PatchMergerr   	layernorm r   context_dimspatial_merge_sizeprefixrQ   Nc                    t                                                       t                      }||dz  z  | _        || _        | j        dk    rt          |d          | _        n!| j        dk    rt          |d          | _        t          j	        t          | j        | j        dd| d|	          t          j                    t          | j        |dd| d
|	                    | _        d S )Nr   r   gư>epsrmsnormTFz.0)biasreturn_biasr   
disable_tpz.2)r   r   r2   hidden_sizepre_normr   ln_qr   nn
Sequentialr   GELUr   mlp)rV   r   r   r   r   r   use_data_parallelro   s          rG   r   zPatchMerger.__init__   s    	466&*<a*?@ =K''!+4888DII]i''666DI=   ! }}},   GII ! }}},  
 
rF   xc                     | j         rB|                     |                     |                              d| j                            }n.|                     |                    d| j                            }|S )N)r   r   r   viewr   )rV   r   s     rG   r   zPatchMerger.forward   sc    = 	71**2t/?@@AAAAD$45566ArF   )r   r   r   )
r>   r?   r@   rh   rg   r   rC   rD   r   r   r   s   @rG   r   r      s        
 #$#
 #
#
 #
  	#
 #
 
#
 #
 #
 #
 #
 #
J %,        rF   r   c                        e Zd Z	 	 dddddededed	edz  d
eddf fdZ	 ddddej	        dej	        dej	        dz  dej	        dz  dej	        f
dZ
 xZS )DotsVisionAttention   TNr   quant_configr   r   	num_headsr   r   r   rQ   c          	      F   t                                                       t                      }|| _        |rdnt	                      | _        |rdnt                      | _        t          j	        ||          | _
        t          j	        || j                  | _        t          || j
        |||| d|          | _        t          ||||| d|          | _        t!          | j        | j
        | j
        dz  | d	          | _        t%          d
d
          | _        d S )Nr1   r   z.qkv)r   	head_sizetotal_num_headsr   r   r   r   z.proj)
input_sizeoutput_sizer   r   r   r   g      .attn)r   r   scaler   T)enforce_enableenable_fp32_compute)r   r   r2   	embed_dimr   tp_sizer   tp_rank
dist_utilsdividehidden_size_per_attention_head!num_attention_heads_per_partitionr   qkvr   projr   attnr   apply_rotary_emb)	rV   rt   r   r   r   r   r   r   ro   s	           rG   r   zDotsVisionAttention.__init__   s[    	466"NAA(L(N(N 	 .Sqq3Q3S3S.8.?Y.O.O+1;1Bt|2
 2
. %9%%???(
 
 
 &%###(
 
 
	 '<95t;###	
 
 
	 !/ $!
 !
 !
rF   
max_seqlenhidden_states
cu_seqlensrotary_pos_embr   c                   |                     d          }|                     |          \  }}t          j        | |          \  }}}	|j        d         }
|                    dddd                                          }|                    dddd                                          }|	                    dddd                                          }	|mt          j        ||gd          }| 	                    ||
                                |                                          }t          j        |dd          \  }}|                     |||	||          }|                    dddd                                          }|                    |j        d         |
d          }|                     |          \  }}|                    d          S )Nr1   r   r   r;   r   )querykeyvaluer   r   r   )	unsqueezer   r    	split_qkvshapepermute
contiguousrC   catr   cossinchunkr   r   r   squeeze)rV   r   r   r   r   r   _qkvbs	qk_concat
qk_rotatedcontext_layerouts                  rG   r   zDotsVisionAttention.forward  s    ##A&&xx{{1&0q991aWQZIIaAq!!,,..IIaAq!!,,..IIaAq!!,,..%	1a&a000I..""$$""$$ J
 ;z1!444DAq		!! " 
 
 &--aAq99DDFF%**=+>q+A2rJJ=))Q{{1~~rF   )r   Trf   )r>   r?   r@   rh   boolr   rg   r   rC   rD   r   r   r   s   @rG   r   r      s       
 3
 373
 3
 3
 3
 	3

 3
 )4/3
 3
 
3
 3
 3
 3
 3
 3
r /3	' +/' ' '|' L' t+	' L4'' 
' ' ' ' ' ' ' 'rF   r   c                        e Zd Zddddedz  def fdZdej        dej        fd	Zd
e	e
eej        f                  dee         fdZ xZS )DotsSwiGLUFFNNr   r   r   r   c                0   t                                                       |j        }|j        }|j        }t                      }t          ||gdz  ||| d|          | _        t          ||||| d|          | _	        t                      | _        d S )Nr   z.fc13)r   r   r   r   z.fc2)r   r   intermediate_sizer   use_biasr2   r   fc13r   fc2r   act_fn)	rV   rt   r   r   hidden_featuresin_featuresr   r   ro   s	           rG   r   zDotsSwiGLUFFN.__init__A  s     	 2&466.!%###(
 
 
	 %%???(
 
 
 !llrF   r   rQ   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S rf   )r   r   r   )rV   r   r   s      rG   r   zDotsSwiGLUFFN.forwarda  s<    yy||1KKNNxx{{1rF   weightsc                    ddg}t          |                                           }t                      }|D ]\  }}|D ]X\  }}}	||vr|                    ||          }|                    d          r||vr;||         }
|
j        } ||
||	            nD|                    d          r||vrz||         }
t          |
dt                    } ||
|           |                    |           |S )N)r   fc1r   )r   fc3r1   z.biasweight_loader)	rs   named_parameterssetreplaceendswithr   getattrr   add)rV   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               rG   load_weightszDotsSwiGLUFFN.load_weightsg  s9   "
 4002233"%%%#* 	$ 	$D-5K 4 41
Kd**||K<<==)) d+.E.E#D) % 3e]H=== ==)) d+.E.E#D) '@U V Ve]333d####rF   )r>   r?   r@   r   rg   r   rC   rD   r   r   tupler  r  r   r   s   @rG   r   r   @  s        
 37# # # )4/	#
 # # # # # #@ %,    HU33D-E$F 3s8        rF   r   c                   D     e Zd Z fdZddej        dej        fdZ xZS )DotsPatchEmbedc                 j   t                                                       |j        | _        |j        | _        |j        | _        |j        | _        || _        t          |j        |j        |j        |j        f|j        |j        f          | _        t          |j        |j
                  | _        d S )N)kernel_sizestrider   )r   r   num_channels
patch_sizetemporal_patch_sizer   rt   r   r   r   rms_norm_epsnormrV   rt   ro   s     rG   r   zDotsPatchEmbed.__init__  s    "/ +#)#= )*F,=>%v'89	
 
 
	 F,&2EFFF			rF   Nr   rQ   c                     |                     d| j        | j        | j        | j                  d d d d df         }|                     |                               d| j                  }|                     |          }|S )Nr   r   )r   r  r  r  r   r   r  )rV   r   grid_thws      rG   r   zDotsPatchEmbed.forward  s}    FF$OO
 
 !!QQQ' IIaLLb$.11IIaLLrF   rf   r>   r?   r@   r   rC   rD   r   r   r   s   @rG   r  r    sh        G G G G G
 
 
 
 
 
 
 
 
 
 
rF   r  c                   D     e Zd Z fdZddej        dej        fdZ xZS )DotsViTPreprocessorc                     t                                                       |j        | _        |j        | _        |j        | _        || _        t          |          | _        d S rf   )	r   r   r  patch_hpatch_wr   rt   r  
patchifierr  s     rG   r   zDotsViTPreprocessor.__init__  sQ    (()(00rF   Nr   rQ   c                 2    |                      ||          }|S rf   )r%  )rV   r   r  tokenss       rG   r   zDotsViTPreprocessor.forward  s    H--rF   rf   r  r   s   @rG   r!  r!    sc        1 1 1 1 1          rF   r!  c                        e Zd Zddddedz  def fdZdddej        d	ej        d
ej        dedz  dej        f
dZ	 xZ
S )DotsVisionBlockNr   r   r   r   c          	      T   t                                                       t          ||j        |j        |j        || d          | _        t          |j        |j                  | _	        t          ||| d          | _        t          |j        |j                  | _        d S )Nr   )r   r   r   r   r   z.mlpr   )r   r   r   r   num_attention_headsr   r   r   r  norm1r   r   norm2)rV   rt   r   r   ro   s       rG   r   zDotsVisionBlock.__init__  s     	'0%###
 
 
	 V-63FGGG
 %???
 
 

 V-63FGGG


rF   r   r   r   r   r   rQ   c                    ||                      |                     |          |||          z   }||                     |                     |                    z   }|S )Nr   r   r   )r   r,  r   r-  )rV   r   r   r   r   s        rG   r   zDotsVisionBlock.forward  se     &		JJ}%%!)!	 )2 )
 )
 
 &M1J1J(K(KKrF   )r>   r?   r@   r   rg   r   rC   rD   rh   r   r   r   s   @rG   r)  r)    s        
 37H H H )4/	H
 H H H H H H> "&  | L	
  $J 
       rF   r)  c                   t    e Zd Z	 ddddddededz  dedz  dedz  ded	df fd
Ze	d	e
j        fd            Ze	d	e
j        fd            Zdeee                  d	ee
j                 fdZdeee                  d	e
j        fdZde
j        d	edz  fdZde
j        deee                  d	e
j        fdZ xZS )DotsVisionTransformerNr   )num_hidden_layers_overriderequire_post_normr   rt   r   r2  r3  r   rQ   c                   t                                                       | _        j        | _        t	                    | _        j        j        z  }t          |dz            | _	        t          |t          j                              | _        j        | _        |j        n|}t#          j        fdt'          |          D                       | _        |t+          | j                  j        k    }|r-| j        j        r!t/          j        j                  | _        nd | _        t5          j        j        j                  | _        d S )Nr   )r   r   c           	      >    g | ]}t           d |           S )z.blocks.r   )r)  ).0irt   r   r   s     rG   
<listcomp>z2DotsVisionTransformer.__init__.<locals>.<listcomp>  sQ          !-$11a11    rF   r   )r   r   r   )r   r   rt   r   r!  patch_embedr   r+  r   r   r(   rC   get_default_dtypeattn_backendr   out_hidden_sizenum_hidden_layersr   
ModuleListrangeblockslen	post_normr   r  post_trunk_normr   merger)	rV   rt   r   r2  r3  r   head_dim
num_layersro   s	    ``  `  rG   r   zDotsVisionTransformer.__init__  s    	"(";.v66#v'AA3HMBB0)++
 
 
  &1 *1 $$+ 	
 m      z**  	
 	
 $ #DK 0 0F4L L 	(!6 	(#*6+;AT#U#U#UD  #'D !"(%8
 
 
rF   c                 8    | j         j        j        j        j        S rf   )r9  r%  r   weightr   rw   s    rG   r   zDotsVisionTransformer.dtype  s    */6<<rF   c                 8    | j         j        j        j        j        S rf   )r9  r%  r   rH  r   rw   s    rG   r   zDotsVisionTransformer.device  s    */6==rF   r  c                    g }|D ]x\  }}}t          j        |                              d                              d|          }|                    || j        z  | j        || j        z  | j                  }|                    dddd          }|                                }t          j        |                              d                              |d          }|                    || j        z  | j        || j        z  | j                  }|                    dddd          }|                                }|                    t          j	        ||gd          
                    |d                     z|S )Nr1   r   r   r   r;   r   )rC   r   r   expandreshaper   r   flattenappendstackrepeat)rV   r  pos_idsthwhpos_idswpos_idss           rG   get_pos_ids_by_gridz)DotsVisionTransformer.get_pos_ids_by_grid  sx    	S 	SGAq!|A0033::2qAAH''T,,'T,,'	 H  ''1a33H''))H|A0033::1bAAH''T,,'T,,'	 H  ''1a33H''))HNN5;(';DDDKKAqQQRRRRrF   c                     |                      |          }t          j        |d          }t          d |D                       }|                     |          }||                             d          }|S )Nr   r   c              3   >   K   | ]\  }}}t          ||          V  d S rf   )max)r6  r   rS  rT  s       rG   	<genexpr>z4DotsVisionTransformer.rot_pos_emb.<locals>.<genexpr>6  s0      >>'!QC1II>>>>>>rF   r1   )rW  rC   r   rZ  r   rM  )rV   r  rQ  max_grid_sizerotary_pos_emb_fullr   s         rG   rot_pos_embz!DotsVisionTransformer.rot_pos_emb3  sw    **844)G+++>>X>>>>>"11-@@,W5==a@@rF   r   c                     d }| j         t          j        k    s| j         t          j        k    r'|dd          |d d         z
                                  }|S )Nr1   r   )r;  r0   
FLASH_ATTNROCM_AITER_FArZ  )rV   r   r   s      rG   compute_attn_mask_seqlenz.DotsVisionTransformer.compute_attn_mask_seqlen;  sW    
!5!@@@ $8$FFF$QRR.:crc?:??AAJrF   r   c                    |                      |          }t          j        ||j        t          j                  }|                    | j                  }|                     ||          }t          j        |d d df         |d d df         z  |d d df                   	                    dt          j
                                        r|j        nt          j                  }t          j        |                    d          |g          }|                     |          }| j        D ]} |||||          }| j        |                     |          }|                     |          }|S )Nr   r1   r   r   )r   r   r/  )r^  rC   tensorr   longtor   r9  repeat_interleavecumsumjit
is_tracingint32r   	new_zerosrb  r@  rC  rD  )rV   r   r  r   r   r   blks          rG   r   zDotsVisionTransformer.forwardD  sw    ))(33 <1EUZXXX%((44((AA,QQQTNXaaad^+Xaaad^
 

&$)I$8$8$:$:K(..  
 
 	 Y
 4 4Q 7 7DEE
22:>>
; 	 	CC%-%	  MM + 00??MM22rF   rf   )r>   r?   r@   r-   r   rh   r   rg   r   propertyrC   r   r   listrD   rW  r^  rb  r   r   r   s   @rG   r1  r1    s        371

 26)-1
 1
 1
 1
 )4/1

 %($J1
  $;1
 1
 
1
 1
 1
 1
 1
 1
f =u{ = = = X= > > > > X>DcO U\@R    4DcO     5< C$J    "\59$s)_	       rF   r1  )r_   dummy_inputsc                       e Zd Z edddddd          Zg dd	d
gdgddgdZdZedede	dedz  fd            Z
dddedef fdZdededz  fdZdedeej        df         fdZde	de	fd Zd!e	de	fd"Zdedefd#Z	 	 d.d$ej        d%ej        d&edz  d'ej        dz  dej        ez  f
d(Zd)ej        dej        dz  fd*Zd+eeeej        f                  dee         fd,Zdefd-Z xZ S )/DotsOCRForCausalLMz
.attn.qkv.z.attn.proj.)z.attn.qkv_proj.z.attn.out_proj.zlanguage_model.lm_head.zlanguage_model.model.)zlm_head.zmodel.)orig_to_new_substrorig_to_new_prefix)q_projk_projv_proj	gate_projup_proj	.attn.qkvr   r   )qkv_projgate_up_projrz  r   Tmodalityr7  rQ   Nc                 4    |                     d          rdS d S )NrS   z<|img|><|imgpad|><|endofimg|>)
startswith)clsr}  r7  s      rG   get_placeholder_strz&DotsOCRForCausalLM.get_placeholder_str  s(    w'' 	322	3 	3rF   r   )r   vllm_configr   c          	         t                                                       |j        j        | _        |j        | _        |j        j        }|j        dk    | _        t          | j        j
        t                    r#t          di | j        j
        }|| j        _
        n| j        j
        }|                     |d          5  t          || j        t          |d                    | _        d d d            n# 1 swxY w Y   |                     |          5  t%          || j        t          |d          dg          | _        d d d            n# 1 swxY w Y   | j        j        | _        d S )	NdatarS   vision_towerr   language_modelr   )r  	hf_configr   architecturesrE   )r   r   model_configr  rt   r   multimodal_configmm_encoder_tp_moder   rr   rl   rs   r-   _mark_tower_modelr1  r'   r  _mark_language_modelr&   r  make_empty_intermediate_tensors)rV   r  r   r  rl   ro   s        rG   r   zDotsOCRForCausalLM.__init__  s   %0%=%G'4'4F!2!E!Odk/66 	6,IIt{/HIIM(5DK%% K5M##K99 	 	 5!.#FN;;! ! !D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 &&{33 	 	4N'+#F,<==12	5 5 5D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ? 	,,,s$   ?+C66C:=C:-EEEr}   c                     |                     dd           }|                     dd           }|                     dd           }||d S |t          d||          S |t          d||          S d S )Nr6   rJ   r<   )r7   r6   r<   )r7   rJ   r<   )popr5   rI   )rV   r}   r6   rJ   r<   s        rG   _parse_and_validate_image_inputz2DotsOCRForCausalLM._parse_and_validate_image_input  s     zz.$77zz.$77$4d;;L$84#*#)-    #.#)-    $#rF   image_input.c                 p   |d         }|j         dk    sJ |                                }|d         dk    r&|d                             | j        j                  }np|d                             | j        j                  }| j        rt          | j        ||d          S |                     ||          d d d | j        j        f         }| j        j	        }t          j        |t          j                                      d	          ||z  z                                  }|                    |          S )
Nr<   r   r7   rJ   r6   rope_3d)	rope_typer   r   )ndimtolistr7   r  r   r   r3   rt   r   r   rC   rd  re  prodsplit)rV   r  r  grid_thw_listrJ   r6   
merge_sizesizess           rG   _process_image_inputz'DotsOCRForCausalLM._process_image_input  sC    /0}!!!! ))v.00&~6;;D<M<STTLL&~6;;D<M<STTL% 
8% !'	     $00}MMAA0000 
 &9
Lej999>>rBBZ')
&(( 	
 !!%(((rF   num_image_tokensc                 *    | j         j        }||dz  z  S Nr   r  r   )rV   r  r  s      rG   get_num_mm_encoder_tokensz,DotsOCRForCausalLM.get_num_mm_encoder_tokens  s    &9
:q=11rF   num_vision_tokensc                 *    | j         j        }||dz  z  S r  r  )rV   r  r  s      rG   get_num_mm_connector_tokensz.DotsOCRForCausalLM.get_num_mm_connector_tokens  s    &9
 Z]33rF   c                 R     | j         di |}|g S |                     |          }|S )NrE   )r  r  )rV   r}   r  vision_embeddingss       rG   embed_multimodalz#DotsOCRForCausalLM.embed_multimodal  s?    :d:DDVDDI 55kBB  rF   	input_ids	positionsintermediate_tensorsinputs_embedsc                 @    |d }|                      ||||          }|S )N)r  r  r  r  )r  )rV   r  r  r  r  r}   r   s          rG   r   zDotsOCRForCausalLM.forward  s=      + M++!5'	 , 
 
 rF   r   c                 6    | j                             |          S rf   )r  compute_logits)rV   r   s     rG   r  z!DotsOCRForCausalLM.compute_logits  s     "11-@@@rF   r   c                 X    t          |           }|                    || j                  S )N)mapper)r$   r  hf_to_vllm_mapper)rV   r   loaders      rG   r  zDotsOCRForCausalLM.load_weights  s+    "4((""743I"JJJrF   c                 0    t          j        ddd          S )z<
        Get the module prefix in multimodal models
        r  zvision_tower.mergerzvision_tower.)r  	connectortower_model)r   from_string_fieldrw   s    rG   get_mm_mappingz!DotsOCRForCausalLM.get_mm_mapping  s'     /++'
 
 
 	
rF   )NN)!r>   r?   r@   r%   r  packed_modules_mappingsupports_encoder_tp_dataclassmethodrg   rh   r  r
   r   r   rM   r  r  rC   rD   r  r  r  r   r  r+   r   r  r   r  r  r   r  r   r   s   @rG   rr  rr  f  s        &+,
 

 2-
 
	 	 	
 
 
 
 "]   $33 33 33: 3 3 3 [3 BD 
 
 
z 
3 
 
 
 
 
 
@	d	"   0)-)	u|S 	!) ) ) )B2# 2# 2 2 2 24S 4S 4 4 4 4! !4H ! ! ! ! <@-1 < < 2D8	
 |d* 
+	+   (A|A 
	A A A AKHU33D-E$F K3s8 K K K K
 
 
 
 
 
 
 
 
rF   rr  )bcollections.abcr   r   typingr   r   r   rC   torch.nnr   r   transformers.models.qwen2_vlr	   vllm.configr
   vllm.config.multimodalr   vllm.distributedr   r   vllm.distributed.parallel_stater   r   %vllm.model_executor.layers.activationr   9vllm.model_executor.layers.attention.mm_encoder_attentionr   vllm.model_executor.layers.convr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   'vllm.model_executor.layers.quantizationr   2vllm.model_executor.layers.rotary_embedding.commonr   -vllm.model_executor.model_loader.weight_utilsr   %vllm.model_executor.models.interfacesr   r   r   r   )vllm.model_executor.models.module_mappingr    vllm.model_executor.models.qwen2r   #vllm.model_executor.models.qwen2_vlr    r!   r"   r#    vllm.model_executor.models.utilsr$   r%   r&   r'   !vllm.model_executor.models.visionr(   vllm.multimodalr)   vllm.multimodal.inputsr*   vllm.sequencer+   'vllm.transformers_utils.configs.dotsocrr,   r-   vllm.utils.tensor_schemar.   r/   #vllm.v1.attention.backends.registryr0   visionr2   r3   rU   r5   rI   rM   rB   rO   rj   Moduler   r   r   r   r  r!  r)  r1  register_processorrr  rE   rF   rG   <module>r     s   . - - - - - - - - 0 0 0 0 0 0 0 0 0 0              9 9 9 9 9 9 " " " " " " 3 3 3 3 3 3 0 0 0 0 0 0        = < < < < <      8 7 7 7 7 7 8 8 8 8 8 8            G F F F F F      P O O O O O            E D D D D D = = = = = =                       C B B B B B / / / / / / 5 5 5 5 5 5 - - - - - - S S S S S S S S > > > > > > > > D D D D D D O O O O O O O OB B B B Bl B B BB B B B B, B B B !8:U U I U U U
 
 
 
 
 9 
 
 
8! ! ! ! !1 ! ! !H    BI   + + + + +") + + +\] ] ] ] ]") ] ] ]@C C C C CBI C C CL    RY   :    ")   ) ) ) ) )bi ) ) )XF F F F FBI F F FR ('	*  
n
 n
 n
 n
 n
$6
L n
 n
 
n
 n
 n
rF   