
    .`i;                         d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ  ee          Z G d d	e          ZdS )
    )SequenceN)PreTrainedTokenizerBase)ChatCompletionRequest)DeltaMessage)init_logger)ReasoningParserc                   F    e Zd ZdZdef fdZdededeedz  edz  f         fdZ	d	ed
edede
e         de
e         de
e         dedz  fdZdedefdZdedefdZd
ededefdZd
edededefdZdededed
ededefdZd
edeedz  edz  edz  f         fdZ xZS )GraniteReasoningParserz
    Reasoning parser for IBM Granite.

    IBM granite models currently use "Here is my thought process:"
    and "Here is my response:" to separate its thinking / response outputs.
    	tokenizerc                 L    t                      j        |g|R i | d| _        d| _        t	          j        | j         d| j         dt          j                  | _        ddg| _        ddg| _	        d	| _
        d
| _        t          d | j        D                       | _        d S )Nz&(?:Here's|Here is) my thought process:z(?:Here's|Here is) my response:z(.*?)z(.*)zHere's my thought process:zHere is my thought process:zHere's my response:zHere is my response::Herec              3   4   K   | ]}t          |          V  d S N)len).0think_starts     {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/reasoning/granite_reasoning_parser.py	<genexpr>z2GraniteReasoningParser.__init__.<locals>.<genexpr>3   s;       '
 '
!,C'
 '
 '
 '
 '
 '
    )super__init__think_start_exprresponse_start_exprrecompileDOTALLreasoning_regexvalid_think_startsvalid_response_startsseq_boundary_endseq_boundary_startmaxlongest_think_start)selfr   argskwargs	__class__s       r   r   zGraniteReasoningParser.__init__   s    4T444V444
 !J#E !z%JJD,DJJJBI 
  

 ))#
 '<=S%T" !$"( $' '
 '
040G'
 '
 '
 $
 $
   r   model_outputrequestreturnNc                 l    | j                             |          }|sd|fS |d         \  }}|s|dfS ||fS )a  Extract the reasoning content & content sections, respectively.
        If the sequence doesn't match what we expect, i.e., the model generates
        something else, all content is considered non-reasoning content.

        Args:
            model_output (str): Output of the model to be parsed.
            request (ChatCompletionRequest): Request being processed.

        Returns:
            tuple[Optional[str], Optional[str]]: Tuple pair containing the
            reasoning content and non-reasoning content.
        Nr   )r   findall)r%   r)   r*   re_match	reasoningresponse_contents         r   extract_reasoningz(GraniteReasoningParser.extract_reasoning7   sZ     '//== 	&%%&.qk#	# 	#d?"***r   previous_textcurrent_text
delta_textprevious_token_idscurrent_token_idsdelta_token_idsc                     |                      |          \  }}}	|s|                     ||          }
n7|	s|                     |||          }
n|J |                     |||	||          }
|
j        s	|
j        sdS |
S )a  Extract the reasoning content / content emitted by granite models;
        If the sequence doesn't match what we expect, i.e., the model generates
        something else, all content is considered non-reasoning content.

        NOTE: Granite models do not use a special token to start their reasoning
        and response sections; instead they have token sequences, e.g.,

                Here is my thought process: Foo Here is my response: Bar

        This increases the complexity of correctly handling streams, since we
        need to watch for specific sequences and correctly parse them without
        dropping content that is potentially overlapping & spanning multiple
        delta messages.

        Args:
            previous_text (str): Previous text outside of this delta message.
            current_text (str): Previous text + delta text.
            delta_text (str): Text to consider and parse content from.
            previous_token_ids (Sequence[int]): Token IDs of previous_text.
            current_token_ids (Sequence[int]): Token IDs of current_text.
            delta_token_ids (Sequence[int]): Token IDs of delta_text.

        Returns:
            Union[DeltaMessage, None]
                DeltaMessage with either reasoning content or content, or None.
        N)_get_content_sections+_get_delta_message_with_no_reasoning_bounds*_get_delta_message_with_no_response_bounds#_get_delta_message_with_both_boundscontentr/   )r%   r2   r3   r4   r5   r6   r7   r/   resp_seq_lenr=   delta_messages              r   extract_reasoning_streamingz2GraniteReasoningParser.extract_reasoning_streamingN   s    F ,0+E+El+S+S(	<  	 LLj MM
  
	 KKi MM  +++ DDIwl M $ 	]-D 	4r   textc                 D    t          fd| j        D                       S )zCheck if a text matches one of the possible start reasoning seqs.

        Args:
            text (str): Text to check for leading substr.

        Returns:
            bool: True if any of the possible reasoning start seqs match.
        c              3   B   K   | ]}|                               V  d S r   
startswith)r   r   rA   s     r   r   zDGraniteReasoningParser._is_reasoning_start_substr.<locals>.<genexpr>   sB       
 
-8K""4((
 
 
 
 
 
r   )anyr   r%   rA   s    `r   _is_reasoning_start_substrz1GraniteReasoningParser._is_reasoning_start_substr   s@      
 
 
 
<@<S
 
 
 
 
 	
r   c                 D    t          fd| j        D                       S )zCheck if a text matches one of the possible start response seqs.

        Args:
            text (str): Text to check for leading substr.

        Returns:
            bool: True if any of the possible response start seqs match.
        c              3   B   K   | ]}|                               V  d S r   rD   )r   response_startrA   s     r   r   zCGraniteReasoningParser._is_response_start_substr.<locals>.<genexpr>   sE       
 
 %%d++
 
 
 
 
 
r   )rF   r    rG   s    `r   _is_response_start_substrz0GraniteReasoningParser._is_response_start_substr   s@      
 
 
 
"&"<
 
 
 
 
 	
r   c                    t          |          t          |          z
  }|                     |          }|                     |d|                   }|r|st          d|          S |rt          dd          S t          d|          S )a^  Parse the delta message when the current text has not yet completed
        its start of reasoning sequence.

        Args:
            current_text (str): The full previous + delta text.
            delta_text (str): Text to consider and parse content from.

        Returns:
            DeltaMessage: Message containing the parsed content.
        Nr/   r=   )r   rH   r   )r%   r3   r4   prev_longest_length	is_substr
was_substrs         r   r:   zBGraniteReasoningParser._get_delta_message_with_no_reasoning_bounds   s     ",//#j//A33LAA	44\BVCVBV5WXX

  	i 	$     	>$==== dJ????r   r/   c                    t          fd| j        D                       }||rt          dd          S |dt          |                    }||                    | j                  }|                    | j                  }|dk    r|                     ||d                   nd}|dk    r|                     |d                   nd}	|dk    r|                     ||d                   nd}
|	rt          dd          S |s,|
rt          |d|         d          S t          |d          S |
r&||d         |d|         z   }t          |d          S t          ||d         |z   d          S )a5  Parse the delta message when the current text has both reasoning
        content with no (response) content. NOTE that we may have overlapping
        tokens with the start of reasoning / start of response sequences on
        either side of the delta text.

        Args:
            current_text (str): The full previous + delta text.
            reasoning (str): reasoning content from current_text.
            delta_text (str): Text to consider and parse content from.

        Returns:
            DeltaMessage: Message containing the parsed content.
        c              3   B   K   | ]}                     |          V  d S r   )endswith)r   rK   r3   s     r   r   zTGraniteReasoningParser._get_delta_message_with_no_response_bounds.<locals>.<genexpr>   sE       +
 +
 !!.11+
 +
 +
 +
 +
 +
r   NrN   r   F)rF   r    r   r   rfindr"   rL   )r%   r3   r/   r4   ends_with_start_response_seqr2   prev_idx	delta_idxprev_was_substrdelta_continues_substrdelta_new_substrs    `         r   r;   zAGraniteReasoningParser._get_delta_message_with_no_response_bounds   s   . (+ +
 +
 +
 +
"&"<+
 +
 +
 (
 (
$  <$==== ""4S__$4"45  !&&t'>??$$T%<==	
 1}} **=+CDDD 	 1}} **<		+BCCC 	 A~~ **:ijj+ABBB 	 " 	>$==== 	C T#j).DdSSSS*dCCCC  	C%hii0:jyj3III)TBBBB#HII.;
 
 
 	
r   r0   response_seq_lenc                 `   |t          |           d         }t          |          t          |          |z   z
  }|dk     rd}n[t          |          |z   t          |          z   dz
  }	t          |          t          |          z
  }
|	|
z
  }|dk     rd}|||         }t          ||          S )a  Parse the delta message when the current text has both reasoning
        content and normal (response) content.

        Args:
            delta_text: Text to consider and parse content from.
            reasoning: reasoning content from current_text.
            response_content: response content from current_text.
            current_text: The full previous + delta text.
            response_seq_len: Len of the complete response sequence used.

        Returns:
            DeltaMessage: Message containing the parsed content.
        Nr      rN   )r   r   )r%   r4   r/   r0   r3   r\   delta_contentreasoning_end_idxdelta_reasoningstart_reasoning_idxdelta_offsetstart_offsets               r   r<   z:GraniteReasoningParser._get_delta_message_with_both_bounds  s    , #C(8$9$9#9#;#;<
OOs3C/D/DGW/WXq  "OO I!11C8H4I4IIAM   |,,s:>L.=La (6G)GHO%!
 
 
 	
r   c                     d}d}d} fdt          |          D             }|D ]}|||         }|% j        D ]}||dd         k    r|dz   }|dz   } n3|sp j        D ]h}	|t          |	           dz   d         |	dd         k    r?|t          |	          z
  }
|||
         }||dz   d         }|t          |	          |fc c S i|r|s||d         ddfS dS )a  Parse the text to extract the reasoning content / content
        if we have them.

        Args:
            current_text (str): The full previous + delta text.

        Returns:
            tuple[Optional[str], Optional[int], Optional[str]]: Tuple of len 3
            containing the reasoning content, the length of the response seq
            (if there is one) and the non-reasoning content.
        r   NFc                 0    g | ]\  }}|j         k    |S  )r!   )r   idxcharr%   s      r   
<listcomp>z@GraniteReasoningParser._get_content_sections.<locals>.<listcomp>Q  s5     
 
 
Tt,,, ,,,r   r^   )NNN)	enumerater   r    r   )r%   r3   current_chunk_startstart_reasoningparsed_contentdelimiter_idxscurrent_chunk_endcurrent_chunkr   rK   end_reasoningr/   r0   s   `            r   r9   z,GraniteReasoningParser._get_content_sections@  s     
 
 
 
&|44
 
 
 "0 	P 	P()<=N)NOM&#'#:  K$CRC(888*;a*?.?!.C+ 9 $ P&*&@ P PN$c.&9&9%9A%=%?%?@NSVTVSVDWWW ):C<O<O(O$01N$O	+78IA8M8O8O+P((#n*=*=?OOOOOOO X  	>> 	> 0 014==r   )__name__
__module____qualname____doc__r   r   strr   tupler1   r   intr   r@   boolrH   rL   r:   r;   r<   r9   __classcell__)r(   s   @r   r
   r
      sA        
"9 
 
 
 
 
 
8++*?+	sTz3:%	&+ + + +.99 9 	9
 %SM9 $C=9 "#9 
	9 9 9 9x
s 
t 
 
 
 

c 
d 
 
 
 
 @ @  @ 
	 @  @  @  @DL
L
 L
 	L

 
L
 L
 L
 L
\)
)
 )
 	)

 )
 )
 
)
 )
 )
 )
V. . 	sTz3:sTz1	2.  .  .  .  .  .  .  . r   r
   )collections.abcr   regexr   transformersr   0vllm.entrypoints.openai.chat_completion.protocolr   'vllm.entrypoints.openai.engine.protocolr   vllm.loggerr   vllm.reasoningr   rt   loggerr
   rg   r   r   <module>r      s    % $ $ $ $ $     0 0 0 0 0 0      A @ @ @ @ @ # # # # # # * * * * * *	X		[  [  [  [  [ _ [  [  [  [  [ r   