
    .`iV                        d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ ddlmZ ddlmZmZ erd dlmZmZ d dlmZ  ede ej!                           Z" ee#          Z$g dZ%dddej!        dz  fdZ&dej'        de(ddde)fdZ*de(de(de(fdZ+de"de"fdZ,de"de"fd Z-de"de"fd!Z. G d" d#e          Z/dej!        fd$Z0ed%e)fd&            Z1d'ee2e(e	j3        f                  fd(Z4d'ee2e(e	j3        f                  fd)Z5e4e5d*Z6d'ee2e(e	j3        f                  fd+Z7dS ),    )Iterable)contextmanager)TYPE_CHECKINGAnyTypeVarcastN)
VllmConfig)init_logger)
get_act_fn)VerifyAndUpdateConfig)try_get_dense_modules)get_hf_file_bytes   )supports_multimodal)VllmModelForPoolingis_pooling_model)ModelConfigr	   )Pooler_T)bound)ForCausalLMForConditionalGeneration	ChatModelLMHeadModelmodel_configr   returnc           	      @   t          | j        | j                  }|dS 	 g }|D ]}|d         }t          j        |d         |d         |                    dd          | j                  }t          |||           sY|                    |           |                    d	          x}r"|                    t          |                     t          j
        |                     | j        
          S # t          $ r t                              d           Y nw xY wdS )z3Load Sentence-Transformers Dense projection layers.)revisionNfolderin_featuresout_featuresbiasT)r"   dtypeactivation_function)r#   zST projector loading failed)r   modelr   nnLinearget
head_dtype_load_dense_weightsappendr   
Sequentialto	Exceptionlogger	exception)r   dense_moduleslayerslayer_configr   linearact_names          w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/adapters.py_load_st_projectorr7   '   sR    *\%:  M 8) 	4 	4L!(+FY]+^,!%%fd33"-	  F 'vv|DD MM&!!!'++,ABBBx 4j22333}f%((|/F(GGG 8 8 86777778 4s   CC4 4$DDr4   r   c                    ddl m} dD ]6}|r| d| n|}	 t          ||j        |j                  }|s-|                    d          rddlm}  ||          }n.ddl}	t          j        |	
                    |          dd	
          }dD ]}
|
|v rt          | j        d|          } || j        ||
                    |
                    dd          }| j        1||v r-t          | j        d|          } || j        ||                      d	S # t          $ r t                               d|           Y 4w xY wdS )z0Load weights using vLLM's weight_loader pattern.r   default_weight_loader)zmodel.safetensorszpytorch_model.bin/z.safetensors)loadNcpuT)map_locationweights_only)weightzlinear.weightzdense.weightweight_loaderr@   r"   zFailed to load %sF)-vllm.model_executor.model_loader.weight_utilsr:   r   r%   r   endswithsafetensors.torchr<   iotorchBytesIOgetattrr@   replacer"   r.   r/   r0   )r4   r   r   r:   filename	file_path
file_bytesload_safetensors
state_dictrE   
weight_keyrA   bias_keybias_loaders                 r6   r*   r*   G   s    TSSSSS> % %.4Bv*****(	"	*<-|/D J    00 	FFFFFF--j99

			"ZJJz**T  
 J    
++$+8M% %M "M&-J1GHHH)11(FCCH{.8z3I3I&-"K:O' ' $FKH1EFFF444 ,   	 	 	0(;;;H	 5s   DCDD%D>=D>orig_model_namepooling_suffixc                 N    | }t           D ]}|                    |          }||z   S N)_GENERATE_SUFFIXESremovesuffix)rR   rS   
model_namegenerate_suffixs       r6   _get_pooling_model_namerZ   w   s6     J- > >,,_==

&&    orig_clsc                 p    ddl m ddlm ddlmmm  G fdd| t                    }|S )Nr   )LogitsProcessorParallelLMHeadr   )AutoWeightsLoaderStageMissingLayerno_init_weightsc            	            e Zd ZdZdddddededd	f fd
Z	 ddddeddfdZdee	ee
j        f                  f fdZ xZS )2_create_pooling_model_cls.<locals>.ModelForPoolingT prefixvllm_configr	   rh   kwargsr   Nc                ~    
| fdf          5   t                      j        d||d| d d d            n# 1 swxY w Y   || _        t          | dd           }|s:t	          |           r+|                                 }t          |d          r|j        }|s|                     ||          }|| _        d S )Nc                      d|           S )Noutput )modrb   s    r6   <lambda>zM_create_pooling_model_cls.<locals>.ModelForPooling.__init__.<locals>.<lambda>   s    --h<< r[   )targets)ri   rh   poolerrg   rn   )	super__init__ri   rH   r   get_language_modelhasattrrr   _init_pooler)selfri   rh   rj   rr   language_modelr^   r`   rb   	__class__rc   s         r6   rt   z;_create_pooling_model_cls.<locals>.ModelForPooling.__init__   sK    !<<<<(.9   S S
 ! R[RR6RRRS S S S S S S S S S S S S S S  +D T8T22F 31$77 3!%!8!8!:!:>844 3+2F G**;v*FF DKKKs   >AAr   c                     t           rU   )NotImplementedError)rx   ri   rh   s      r6   rw   z?_create_pooling_model_cls.<locals>.ModelForPooling._init_pooler   s
    
 &%r[   weightsc                    
 t                                                     ddg}dt          t          t          t
          j        f                              }|D ]J\  
}|                    
|f           	 t          
fd|D                        n# t          $ r Y Gw xY wrl }
                    d          D ]}|rt           |          }t                              d|                                                                            fdg ||R D             } fd}t          t                      d|          }	 |	|          S )	Nrf   zmodel.c              3   *   K   | ]}|z   v 	|V  d S rU   rn   ).0rh   nameparams_dicts     r6   	<genexpr>zR_create_pooling_model_cls.<locals>.ModelForPooling.load_weights.<locals>.<genexpr>   s>       ) )"!D=K77 7777) )r[   .zGMapping weights to %s as they are relative to this model instead of %s.c              3   ,   K   | ]\  }}|z   |fV  d S rU   rn   )r   r   r@   target_prefixs      r6   r   zR_create_pooling_model_cls.<locals>.ModelForPooling.load_weights.<locals>.<genexpr>   sE         D& %v.     r[   c                 D               }|                     |           S rU   )load_weights)r}   loaderra   rx   s     r6   default_load_weightsz]_create_pooling_model_cls.<locals>.ModelForPooling.load_weights.<locals>.default_load_weights   s'    **400**7333r[   r   )dictnamed_parameterslisttuplestrrF   Tensorr+   nextStopIterationsplitrH   r/   info	_get_namers   )rx   r}   candidate_prefixesseen_weightsloaded_weighttarget_modelattrmapped_weightsr   r   r   r   r   ra   rz   s   `         @@@r6   r   z?_create_pooling_model_cls.<locals>.ModelForPooling.load_weights   s   t446677K #%hMc5<&7 89;;L'.  #m##T=$9:::
$( ) ) ) ) )&8) ) ) % %M
 E$    D
  #)//44 ; ;D ;'.tT':':< **,,NN$$	     $=l$=W$=$=  N
4 4 4 4 4 4 #577N<PQQL<///s   <B
B'&B'rf   )__name__
__module____qualname__r   r   r   rt   rw   r   r   rF   r   r   __classcell__)rz   ra   r^   r`   rb   rc   s   @r6   ModelForPoolingre      s        		! 	! 	! &	! 		!
 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	!B 	& 	&%	& 	& 		& 	& 	& 	&.	0sEL7H1I(J .	0 .	0 .	0 .	0 .	0 .	0 .	0 .	0 .	0 .	0 .	0r[   r   )	+vllm.model_executor.layers.logits_processorr^   3vllm.model_executor.layers.vocab_parallel_embeddingr`   utilsra   rb   rc   r   )r\   r   ra   r^   r`   rb   rc   s     @@@@@r6   _create_pooling_model_clsr      s    KKKKKKRRRRRRLLLLLLLLLLV0 V0 V0 V0 V0 V0 V0 V0 V0 V0 V0($7 V0 V0 V0p r[   clsc                     t          |           r| S ddlm  G fddt          |                     }t	          | j        d          |_        |S )a\  
    Subclass an existing vLLM model to support embeddings.

    By default, the embeddings of the whole prompt are extracted from the
    normalized hidden state corresponding to the last token.

    Note:
        We assume that no extra layers are added to the original model;
        please implement your own model if this is not the case.
    r   DispatchPoolerc                   ,    e Zd Z	 d	dddeddf fdZdS )
-as_embedding_model.<locals>.ModelForEmbeddingrf   ri   r	   rh   r   r   c                 N    |j         j        }|J                     |          S rU   )r   pooler_configfor_embedding)rx   ri   rh   r   r   s       r6   rw   z:as_embedding_model.<locals>.ModelForEmbedding._init_pooler   s0    
 (4BM ,,,!//>>>r[   Nr   )r   r   r   r   rw   r   s   r6   ModelForEmbeddingr      sY         	? 	?%	? 	? 		? 	? 	? 	? 	? 	? 	? 	?r[   r   ForEmbedding)r   !vllm.model_executor.layers.poolerr   r   rZ   r   )r   r   r   s     @r6   as_embedding_modelr      s      
 A@@@@@	? 	? 	? 	? 	? 	? 	?5c:: 	? 	? 	? "9~!V!Vr[   c                     t          |           r| S ddlm ddlm ddlm} ddlm  G fddt          |           |          }t          | j        d	          |_        |S )
a  
    Subclass an existing vLLM model to support classify and score tasks.

    By default, the class probabilities are extracted from the softmaxed
    hidden state corresponding to the last token.

    Note:
        We assume that the classification head is a single linear layer
        stored as the attribute `score` of the top-level model;
        please implement your own model if this is not the case.
    r   )ReplicatedLinearr   )SupportsCrossEncodingr   )maybe_prefixc                   l     e Zd Z	 d
dddeddffdZdeeeej        f                  f fd	Z	 xZ
S )8as_seq_cls_model.<locals>.ModelForSequenceClassificationrf   ri   r	   rh   r   r   c                 8   |j         j                                        }|j         }|j        } |                                |j        d|j         j        |d 	|d                    | _        |j         j        }|J 	                    || j                  S )NFscore)r"   params_dtypequant_configreturn_biasrh   )
classifier)
r   	hf_configget_text_configr   get_hidden_size
num_labelsr)   r   r   for_seq_cls)
rx   ri   rh   text_configr   r   r   r   r   r   s
          r6   rw   zEas_seq_cls_model.<locals>.ModelForSequenceClassification._init_pooler  s    
 &2<LLNNK&3L&3L)),,..&(5@)!#|FG44  DJ (4BM ,,,!--m
-SSSr[   r}   c           	      F     j         }|                                }t          |dt          |dd                     }t          |dt          |dd                     } fd} ||          }|#|!t                                          |          S t           |          S )Nclassifier_from_tokenmethodc              3   4  K   | D ]\  }}|dk    rj         j        j        }j         j        j        }|                    |                              |          }t
          j                            |          j         _        dj         _	        ||fV  d S )Nz
score.biasF)
r   r@   devicer#   r-   rF   r&   	Parameterr"   skip_bias_add)r}   r   r@   r   r#   r"   rx   s         r6   auto_set_score_biaszbas_seq_cls_model.<locals>.ModelForSequenceClassification.load_weights.<locals>.auto_set_score_bias@  s      $+ + +LD&|++!%!2!9 $
 1 7%yy0033E::*/(*<*<T*B*B
38
00"Fl****+ +r[   )configr   rH   rs   r   seq_cls_model_loader)rx   r}   r   r   tokensr   r   rz   s   `      r6   r   zEas_seq_cls_model.<locals>.ModelForSequenceClassification.load_weights6  s    I#3355K'%<dCC F
 Y'+xQU2V2VWWF	+ 	+ 	+ 	+ 	+ *)'22G~&.ww++G444 ,D':::r[   r   )r   r   r   r   rw   r   r   rF   r   r   r   )rz   r   r   r   s   @r6   ModelForSequenceClassificationr     s         	T 	T%	T 	T 		T 	T 	T 	T 	T 	T 	T 	T0	;sEL7H1I(J 	; 	; 	; 	; 	; 	; 	; 	; 	; 	;r[   r   ForSequenceClassification)r   !vllm.model_executor.layers.linearr   r   r   %vllm.model_executor.models.interfacesr   r   r   r   rZ   r   )r   r   r   r   r   r   s      @@@r6   as_seq_cls_modelr     s      
 CBBBBB@@@@@@KKKKKK######6; 6; 6; 6; 6; 6; 6; 6; 6;!#&&(=6; 6; 6;p /F1/ /"+ *)r[   c                   &    e Zd Zedd            ZdS )SequenceClassificationConfigri   r	   r   Nc           	         | j         j        }|                                }t          |dt          |dd                     }t          |dt          |dd                     }|d S |J |t          v sJ d| d            |dk    r$t          |          dk    sJ d|_        d|_        n(t          |          |_        t          |          |_        t          |dd	          }||_        d S )
Nr   r   method  not supportedfrom_2_way_softmax   r   use_sep_tokenF)r   r   r   rH   SEQ_CLS_LOAD_METHODSlenr   r   )ri   r   r   r   r   r   s         r6   verify_and_update_configz5SequenceClassificationConfig.verify_and_update_config[  s   ,6	//11Hgk8T.R.RSS#K!8$??
 
 >F!!!----/O/O/O/O---)))v;;!#####$I %&K""#&v;;I %([[K"  _eDD$1!!!r[   )ri   r	   r   N)r   r   r   staticmethodr   rn   r[   r6   r   r   Z  s2        2 2 2 \2 2 2r[   r   c                    t          |           r,	 |                                 }|| ur|S n# t          $ r Y nw xY wdD ]T}t          | |          rBt	          | |          }t          |t          j                  r|| urt          |d          r|c S U|                                 D ]5\  }}t          |          j
        }d|v sd|v rt          |d          r|c S 6| S )z
    Get the language model component for sequence classification conversion.
    For VLMs, returns the inner language model. For standard LLMs, returns model itself.
    )ry   lm
text_modelr%   r   LMHead)r   ru   r.   rv   rH   
isinstancer&   Modulenamed_childrentyper   )r%   r   	attr_name	candidater   child
child_names          r6   _get_language_model_for_seq_clsr   y  s8   
 5!! 	))++B	  	 	 	D	 < ! !	5)$$ 	!y11I9bi00!U**Iw// + !   ++--  e%[[)
Z''8z+A+Aw7H
 H
+A LLLLs   , 
99is_vlmc              #     K   |sdV  dS t          | dd          }|dV  dS |                                }t          |dd          }t          |dd          }t          |dd          }	 |d|_        |d|_        |d|_        dV  |||_        |||_        |	||_        dS dS # |||_        |||_        |||_        w xY w)z
    Context manager to temporarily disable sequence classification loading
    on inner VLM models to prevent recursive seq_cls_model_loader calls.
    Nr   r   r   )rH   r   r   r   )ry   r   inner_hf_configinner_text_configoriginal_methodoriginal_tokensoriginal_hf_tokenss          r6   '_disable_seq_cls_loading_on_inner_modelr     s=       nh==O'7799/4@@O/1H$OOO 2I4PPG&'+$&6:3)48O1&'6$&6E3)4FO111 *)	 &'6$&6E3)4FO1FFFFs   ,B* *Cr}   c           	         ddl m} ddlm} | j        j        }| j        j        }| j        }|                                }t          |dt          |dg                     }t          t          t                   |          }t          |          dk    sJ t          |           }	|	| u}
 ||j        |j        |          |	_        |j        rQ|	j        }t)          |d          r|j        n|                                }|	j                            |          |	_        t1          |	|
          5  t3          d t5          |           j        D                       }|                    | |          }d d d            n# 1 swxY w Y   dd	lm}  ||j        |j         |j!        |j"        
          }|#                    |d                   }|#                    |d                   }|	j        j$        }|j%        |g         &                    tN          j(                  |j%        |g         &                    tN          j(                  z
  }|
r|	j)        n| j)        }|j$        }t          |d|          } |||           |	`|
rdnd}|*                    |           d}t          | dd           x}r|+                    |          }|,                    |           |S )Nr   r_   r9   r   r   r   embed_tokensc              3   0   K   | ]}|j         d k    |V  dS r   Nr   r   xs     r6   r   z8load_weights_using_from_2_way_softmax.<locals>.<genexpr>  8       !
 !
aj<M.M.MA.M.M.M.M!
 !
r[   get_tokenizerr   tokenizer_modetrust_remote_coder   rA   language_model.score.weightscore.weightlm_head.weighthf_to_vllm_mapper)-r   r`   rB   r:   ri   r   r   r   r   rH   r   r   intr   r   
vocab_sizehidden_sizelm_headtie_word_embeddingsr%   rv   r   get_input_embeddingstie_weightsr   r   r   __mro__r   vllm.tokenizersr   	tokenizertokenizer_revisionr  r  convert_tokens_to_idsr@   datar-   rF   float32r   add	_map_namediscard)r%   r}   r`   r:   r   r   r   r   r   ry   r   text_backboner   pooling_model_clsloaded_weightsr   r  false_idtrue_idlm_head_weightscore_weightscore_layerparamrA   score_weight_namelm_head_namer  s                              r6   %load_weights_using_from_2_way_softmaxr$    s    SRRRRRSSSSSS$1L$1LI++--K4b99 F
 $s)V$$Fv;;!4U;;N5(F+^ 7l  N & 
R ', }n556M&&3355 	
 "0!7!C!CL!Q!Q	0	H	H H H ! !
 !
E{{*!
 !
 !
 
 
 +77wGGH H H H H H H H H H H H H H H .-----0#2&8	  I ..vay99H--fQi88G#+2N!&y144 XJ'**5=99:L +1A.&&ekKEE?4IJJMM%&&&9?S55^()))#L#E+>EEE A(22<@@<(((s   -AE;;E?E?c                    ddl m} ddlm} | j        j        }| j        j        }| j                                        }t          |dg           }t          t          t                   |          }t          |          dk    sJ t          |           }|| u}	 ||j        |j        |          |_        |j        rQ|j        }
t)          |
d          r|
j        n|
                                }|j                            |          |_        t1          ||	          5  t3          d t5          |           j        D                       }|                    | |          }d d d            n# 1 swxY w Y   ddlm}  ||j        |j         |j!        |j"        	          fd
|D             }|j        j#        j$        |         }|	r|j%        n| j%        }|j#        }t          |d|          } |||           |`|	rdnd}|&                    |           d}t          | dd           x}r|'                    |          }|(                    |           |S )Nr   r_   r9   r   r   r   c              3   0   K   | ]}|j         d k    |V  dS r   r   r   s     r6   r   z2load_weights_no_post_processing.<locals>.<genexpr>-  r   r[   r   r  c                 :    g | ]}                     |          S rn   )r  )r   tr  s     r6   
<listcomp>z3load_weights_no_post_processing.<locals>.<listcomp><  s'    DDD0033DDDr[   rA   r  r  r  r  ))r   r`   rB   r:   ri   r   r   r   r   rH   r   r   r  r   r   r	  r
  r  r  r%   rv   r   r  r  r   r   r   r  r   r  r   r  r  r  r  r@   r  r   r  r  r  )r%   r}   r`   r:   r   r   r   r   ry   r   r  r   r  r  r   	token_idsr  r   r!  rA   r"  r#  r  r  s                          @r6   load_weights_no_post_processingr+    s   RRRRRRSSSSSS$1L$1L,..00K["92>>F$s)V$$Fv;;????4U;;N5(F+^ 7l  N & 
R ', }n556M&&3355 	
 "0!7!C!CL!Q!Q	0	H	H H H  !
 !
E{{*!
 !
 !
 
 
 +77wGGH H H H H H H H H H H H H H H .-----0#2&8	  I EDDDVDDDI!)05i@L*0A.&&ekKEE?4IJJMM%&&&9?S55^()))#L#E+>EEE A(22<@@<(((s   AE++E/2E/)r   no_post_processingc           	          | j         j        j        }|                                }t	          |dt	          |dd                     }|t
          v sJ d| d            t          |         | |          S )Nr   r   r   )ri   r   r   r   rH   r   )r%   r}   r   r   r   s        r6   r   r   V  s{     !.8I++--KY'+x*N*NOOF))))+KV+K+K+K)))'w777r[   )8collections.abcr   
contextlibr   typingr   r   r   r   rF   torch.nnr&   vllm.configr	   vllm.loggerr
   %vllm.model_executor.layers.activationr   !vllm.model_executor.models.configr   vllm.transformers_utils.configr   "vllm.transformers_utils.repo_utilsr   
interfacesr   interfaces_baser   r   r   r   r   r   r   r   r   r/   rV   r7   r'   r   boolr*   rZ   r   r   r   r   r   r   r   r   r$  r+  r   r   rn   r[   r6   <module>r;     s   % $ $ $ $ $ % % % % % % 4 4 4 4 4 4 4 4 4 4 4 4        " " " " " " # # # # # # < < < < < < C C C C C C      A @ @ @ @ @ + + + + + + B B B B B B B B 933333333888888WTbi)))	X		   ] ry47G    @-I-"-2?-	- - - -`'S '# '# ' ' ' '_ _r _ _ _ _DB 2    DS*" S* S* S* S* S*l2 2 2 2 2#8 2 2 2>bi    B !GD !G !G !G !GHLU3#456L L L L^?HU3CT=U4V ? ? ? ?F @9  8%U\8I2J)K 8 8 8 8 8 8r[   