
    .`ik9                     6   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ erd dlZd dlmZ d dlZd dlmZ d dl m!Z! n ed e"            d          Z ee#          Z$ G d d          Z%dS )    N)Iterable)FutureThreadPoolExecutor)TYPE_CHECKING)
VllmConfig)init_logger)ReasoningParserManager)cached_tokenizer_from_config)
LazyLoader)GuidanceBackend)StructuredOutputBackendStructuredOutputGrammar)XgrammarBackend)ReasoningParser)Requesttorchc            	       
   e Zd ZdZdefdZddZdddefd	Zd
e	e
eeef                  ddfdZd
ee
eeef                  defdZdeedf         dee         deeee         f         ddfdZdddefdZdddefdZddZdS )StructuredOutputManagerz4Engine-level manager for structured output requests.vllm_configc                    d | _         d | _        || _        |j        j        dk    | _        d | _        t          j        dt          j	                  | _
        | j        j        j        }d| _        | j        |k     rNd| _        t          dt!          t#          j                    dz  d                    }t'          |	          | _        | j        j        j        st          dt#          j                    dz   dz            }t'          |	          | _        t1          | j        j        
          | _        | j        j        j        }| j        j        j        }|r't;          |          dk    rt=          j        |           | j        j        j        }|r*t=          j         |          } || j                  | _        | j        j        j!        | _!        d S )Nexternal_launcher)dtype               )max_workers)model_config   )	tokenizer)"backendreasonerr   parallel_configdistributed_executor_backend_use_async_grammar_compilation_grammar_bitmaskr   tensorint32
_full_maskscheduler_configmax_num_seqsfill_bitmask_parallel_threshold fill_bitmask_parallel_batch_sizemaxminmultiprocessing	cpu_countr   executor_for_fillmaskr    skip_tokenizer_initexecutorr
   r"   structured_outputs_configreasoning_parserreasoning_parser_pluginlenr	   import_reasoning_parserget_reasoning_parserenable_in_reasoning)selfr   max_batch_sizer   r8   r9   reasoner_clss          v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/structured_output/__init__.py__init__z StructuredOutputManager.__init__&   s   7;04& 'D"# 	+
 6:,r===):G/2,/.@@46D1 a_%>%@%@A%Eq!I!IJJK);)T)T)TD&,@ 	G a/";"="="Aa!GHHK.;GGGDM9!-:  DN  :K   :R $ ' X3/F+G+G!+K+K&>?VWWW  :K    G5J$    !-t~ F F F 6J 	       requestr   returnNc                    |j         d S t          r|j        |j        j        J | j        |j        J |j        j        j        }| j        j                                        }|dk    r"t          | j        | j
        |          | _        n|dk    r"t          | j        | j
        |          | _        nf|dk    r$ddlm}  || j        | j
        |          | _        n<|dk    r$ddlm}  || j        | j
        |          | _        nt!          d	|           | j        r!| j                            | j        |          }n|                     |          }||j         _        d S )
Nxgrammar)r"   
vocab_sizeguidanceoutlinesr   )OutlinesBackendzlm-format-enforcer)LMFormatEnforcerBackendz'Unsupported structured output backend: )structured_output_requestr   sampling_paramsstructured_outputsr#   _backendr   r    get_vocab_sizer   r"   r   *vllm.v1.structured_output.backend_outlinesrK   4vllm.v1.structured_output.backend_lm_format_enforcerrL   
ValueErrorr'   r6   submit_create_grammargrammar)r>   rD   r#   rH   rK   rL   rW   s          rA   grammar_initz$StructuredOutputManager.grammar_initc   s   ,4F 	'3+>JJK <*666-@IG)6EEGGJ*$$.$"n)     
 J&&.$"n)     
 J&&VVVVVV.$"n)     
 000       76$"n)      !!T7!T!TUUU. 	4m**4+?IIGG**733G4;)111rC   c                 l    |j         j        }|\  }}| j        J | j                            ||          S N)rM   structured_output_keyr#   compile_grammar)r>   rD   keyrequest_typegrammar_specs        rA   rV   z'StructuredOutputManager._create_grammar   s?    /E &)"l|'''|++L,GGGrC   batchc                     | j         J |D ]]\  }}}|r0|                                s|                    | j         |           8| j         |                             | j                   ^d S rZ   )r(   is_terminatedfill_bitmaskfill_r+   )r>   r`   rW   indexapply_bitmasks        rA   _fill_bitmasksz&StructuredOutputManager._fill_bitmasks   s     $000-2 	D 	D)GUM DW%:%:%<%< D$$T%:EBBBB
 %e,224?CCCC	D 	DrC   c                 B    | j                             | j        |          S rZ   )r4   rU   rg   )r>   r`   s     rA   _async_submit_fill_bitmaskz2StructuredOutputManager._async_submit_fill_bitmask   s      )001DeLLLrC   requestsstructured_output_request_idsscheduled_spec_decode_tokensznpt.NDArray[np.int32] | Nonec                 B   |sd S d}| j         j        | j         j        j        }| j        ?| j        J | j         j        j        }| j                            |d|z   z            | _        d}t          |          | j	        k    r|dk    rg }g }|D ]}	||	         }
|
j
        }t          r|J |j        J |j        }|                     |
          }|                    |||f           t          |          | j        k    r*|                    |                     |                     g }|dz  }|r(|                    |                     |                     |D ]}|                                 n|D ]}	||	         }
|
j
        }t          r|J |j        J |j        }|                     |
          }d}|                    |	d          }t'          j        |d          D ]i}|                     |||ff           |dk    rd}|r?|                                s+|                    |	|g          }|sJ ||	|f            |dz  }|dz  }j|dk    r|                    |           | j        }||j        d         k     r
|d |         }|                                S )Nr   r    )r   r   F)r   speculative_confignum_speculative_tokensr(   r#   r,   r-   allocate_token_bitmaskr:   r.   rM   r   rW   should_fill_bitmaskappendr/   ri   resultget	itertoolschainrg   rb   accept_tokensrollbackshapenumpy)r>   rj   rk   rl   max_num_spec_tokensr?   cumulative_indexpromisesr`   req_idrD   rM   rW   rf   promisestate_advancements
req_tokenstokenacceptedbitmask_tensors                       rA   grammar_bitmaskz'StructuredOutputManager.grammar_bitmask   s\    - 	4.: 3J    (<+++!->KN
 %)L$G$G!&9"9:% %D! 
 -..1UUU#q((HE7 & &"6*,3,M)  I4@@@4<HHH3; $ 8 8 A Ag'7GHHHu::!FFFOOD$C$CE$J$JKKKE A%   H ? ? F FGGG $ ! !    ! 8 9 9"6*,3,M)  I4@@@4<HHH3; $ 8 8 A A%&"9==fbII
&_Z?? 	* 	*E'''3C])S(UVVV{{(-$ 0W-B-B-D-D 0#*#8#8%#I#I'VV%9U)VVVx*a/*$)$$%))$$%7888.n21555+,=-=,=>N
 ##%%%rC   c                     | j         U| j        rdS |j        J |j        j        +| j                             |j        pg           |j        _        |j        j        S dS )NT)r$   r=   rM   reasoning_endedis_reasoning_endprompt_token_ids)r>   rD   s     rA   rr   z+StructuredOutputManager.should_fill_bitmask  sp    
 =$' t4@@@0@HM2273K3QrRR 1A 4DDtrC   c                    |j         sdS t          r|j        J |j        j        J | j        dS | j        rdS |j        }|j        rdS |j        |j        z
  }|j	        }| j        
                    |||d                    rd|_        dS )NFT)use_structured_outputr   rM   rW   r$   r=   r   num_computed_tokensnum_output_placeholdersall_token_idsis_reasoning_end_streaming)r>   rD   structured_req
delta_fromr   s        rA   should_advancez&StructuredOutputManager.should_advance.  s    , 	5  	I4@@@4<HHH = 4 # 	4 :) 	4 073RR
-=33=5
 
 	2
 .2N*urC   c                 J    | j         | j                                          d S d S rZ   )r#   destroy)r>   s    rA   clear_backendz%StructuredOutputManager.clear_backendP  s,    <#L  """"" $#rC   )rD   r   rE   N)rE   N)__name__
__module____qualname____doc__r   rB   rX   r   rV   r   tupleintboolrg   listr   ri   dictstrr   rr   r   r   rn   rC   rA   r   r   #   s       >>;
J ;
 ;
 ;
 ;
z8< 8< 8< 8<tHy H5L H H H HDe$;S$$FGHD	D D D DM% 7d BCDM	M M M M
`&sI~&`& (,Cy`& '+3S	>&:	`&
 
(`& `& `& `&D9       i  D        D# # # # # #rC   r   )&rv   r2   collections.abcr   concurrent.futuresr   r   typingr   vllm.configr   vllm.loggerr   vllm.reasoningr	   vllm.tokenizersr
   vllm.utils.import_utilsr   *vllm.v1.structured_output.backend_guidancer   'vllm.v1.structured_output.backend_typesr   r   *vllm.v1.structured_output.backend_xgrammarr   r{   npnumpy.typingnptr   r   vllm.v1.requestr   globalsr   loggerr   rn   rC   rA   <module>r      s           $ $ $ $ $ $ 9 9 9 9 9 9 9 9             " " " " " " # # # # # # 1 1 1 1 1 1 8 8 8 8 8 8 . . . . . . F F F F F F        G F F F F F 4LLL......'''''''Jw		733E 
X		o# o# o# o# o# o# o# o# o# o#rC   