§
    ÇPƒiÄ  ã                   ó:  — d dl mZ d dlZd dlmc mZ d dlmZ d dl	m
Z
 	 ddej        dej        dedeej        ej        f         fd	„Z	 ddej        dej        dedej        fd„Ze
dfdej        dej        dededej        f
d„Zdej        dedej        fd„ZdS )é    )ÚTupleN)Úrlhf)ÚCROSS_ENTROPY_IGNORE_IDXÚ	sequencesÚstop_tokensÚ
fill_valueÚreturnc                 óŒ   — t          j        | |¦  «        }t          j        |d¬¦  «        }|dk    |dk    | z  z  }|| |<   || fS )aq  
    Truncates sequence(s) after the first stop token and pads with ``fill_value``.

    Args:
        sequences (torch.Tensor): tensor of shape [batch_size, sequence_length] or [sequence_length].
        stop_tokens (torch.Tensor): tensor containing stop tokens.
        fill_value (int): value to pad the sequence with after the first stop token, usually ``pad_id``.

    Returns:
        Tuple[torch.Tensor, torch.Tensor]: A tuple of two tensors with the same shape as ``sequences``:
            - padding_mask (torch.Tensor): a bool tensor where True indicates the token has been truncated.
            - sequences (torch.Tensor) a tensor of truncated and padded sequences.

    Example:
        >>> stop_token_ids = torch.tensor([2, 869])
        >>> fill_value = 0
        >>> sequences = torch.tensor(
        >>>     [
        >>>         [869, 30, 869],
        >>>         [2, 30, 869],
        >>>         [869, 30, 2],
        >>>         [50, 30, 869],
        >>>         [13, 30, 2],
        >>>         [13, 30, 5],
        >>>         [13, 2, 20],
        >>>         [13, 2, 2],
        >>>         [2, 2, 2],
        >>>     ]
        >>> )
        >>> eos_mask, truncated_sequences = rlhf.truncate_sequence_at_first_stop_token(
        >>>     sequences, stop_token_ids, fill_value
        >>> )
        >>> eos_mask
        >>> torch.tensor([
        >>>         [False, True, True],
        >>>         [False, True, True],
        >>>         [False, True, True],
        >>>         [False, False, False],
        >>>         [False, False, False],
        >>>         [False, False, False],
        >>>         [False, False, True],
        >>>         [False, False, True],
        >>>         [False, True, True],
        >>>     ]
        >>> )
        >>> truncated_sequences
        >>> torch.tensor([
        >>>         [869, 0, 0],
        >>>         [2, 0, 0],
        >>>         [869, 0, 0],
        >>>         [50, 30, 869],
        >>>         [13, 30, 2],
        >>>         [13, 30, 5],
        >>>         [13, 2, 0],
        >>>         [13, 2, 0],
        >>>         [2, 0, 0],
        >>>     ]
        >>> )
    é   ©Údim)ÚtorchÚisinÚcumsum)r   r   r   Úeos_maskÚseq_lensÚpadding_masks         úv/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/rlhf/sequence_processing.pyÚ%truncate_sequence_at_first_stop_tokenr      sX   € õ| Œz˜) [Ñ1Ô1€HÝŒ|˜H¨!Ð,Ñ,Ô,€HØ˜q’L h°!¢m¸°yÑ%@ÑA€LØ(€IˆlÑØ˜Ð"Ð"ó    ç      ð?ÚlogitsÚtemperaturec                 ó¨   — t          j        t          j        | |z  d¬¦  «        d|                     d¦  «        ¦  «                             d¦  «        S )aø  
    Converts logits corresponding to a generated sequence to logprobs over the generated tokens.

    Args:
        logits (torch.Tensor): The logits tensor of shape [b, response_length, vocab_size].
        sequences (torch.Tensor): The corresponding tokens of shape [b, response_length].
        temperature (float): The temperature to scale the logits. Default 1.0
    Returns:
        torch.Tensor: The log probabilities corresponding to each token in ``sequences``. Shape [b, response_length].
    éÿÿÿÿr   é   )r   ÚgatherÚFÚlog_softmaxÚ	unsqueezeÚsqueeze)r   r   r   s      r   Úlogits_to_logprobsr"   T   sP   € õ Œ<Ý	Œf˜{Ñ*°Ð3Ñ3Ô3Ø	Ø×Ò˜BÑÔñô ÷ ‚gˆbk„kð	r   FÚlabelsÚlabel_pad_token_idÚreturn_average_logprobsc                 óV  — | j         dd…         |j         k    rt          d¦  «        ‚|dd…dd…f                              ¦   «         }| dd…dd…dd…f         } ||k    }d|||k    <   t          | |d¬¦  «        }|rt	          j        ||d¬¦  «        S ||z                       d¦  «        S )	a(  
    Calculate log probabilities based on provided logits and labels.

    Args:
        logits (torch.FloatTensor): direct logits output of the model of shape (b, s, v)
        labels (torch.LongTensor): ground-truth labels to compute log probs with, shape (b, s).
            Label tokens with a value of label_pad_token_id are ignored.
        label_pad_token_id (int): token id to ignore in labels.
        return_average_logprobs (bool): If True, return the average log probs across the sequence. Default
            is False. See https://github.com/eric-mitchell/direct-preference-optimization/blob/f8b8c0f49dc92a430bae41585f9d467d3618fe2f/trainers.py#L96 # noqa

    Returns:
        Calculated log probs of shape (b, )

    Raises:
        ValueError: If logits and labels have different shapes.
    Nr   zKLogits (batch and sequence length dim) and labels must have the same shape.r   r   r   )r   r   )ÚshapeÚ
ValueErrorÚcloner"   r   Úmasked_meanÚsum)r   r#   r$   r%   Ú	loss_maskÚper_token_log_probss         r   Úget_batch_log_probsr.   h   sÚ   € ð0 „|CRCÔ˜FœLÒ(Ð(ÝØYñ
ô 
ð 	
ð AAAqrrEŒ]× Ò Ñ"Ô"€FØAAAs˜s˜A˜A˜AIÔ€FØÐ,Ò,€Ià+,€Fˆ6Ð'Ò'Ñ(å,¨V°VÈÐMÑMÔMÐàð 9ÝÔÐ 3°YÀBÐGÑGÔGÐGà# iÑ/×4Ò4°RÑ8Ô8Ð8r   Úquery_response_logitsÚcontext_lengthc                 ó$   — | dd…|dz
  d…f         S )a  
    Truncates logits generated over a sequence for estimating logprobs over the tokens in the sequence.
    This assumes the sequence is of the (query, response) format with length (context_length + response_length)
    Args:
        query_response_logits (torch.Tensor): The logits tensor of shape [b, context_length + response_length, vocab_size].
        context_length (int): The length of the context.

    Returns:
        torch.Tensor: The truncated logits for the response with shape [b, response_length, vocab_size].Nr   r   © )r/   r0   s     r   Útruncate_sequence_for_logprobsr3   “   s"   € ð !    N°QÑ$6¸Ð$;Ð!;Ô<Ð<r   )r   )r   )Útypingr   r   Útorch.nn.functionalÚnnÚ
functionalr   Ú	torchtuner   Útorchtune.datar   ÚTensorÚintr   Úfloatr"   ÚFloatTensorÚ
LongTensorÚboolr.   r3   r2   r   r   ú<module>r@      s–  ðð Ð Ð Ð Ð Ð à €€€Ø Ð Ð Ð Ð Ð Ð Ð Ð Ø Ð Ð Ð Ð Ð Ø 3Ð 3Ð 3Ð 3Ð 3Ð 3ð KLðB#ð B#ØŒ|ðB#Ø*/¬,ðB#ØDGðB#à
ˆ5Œ<˜œÐ%Ô&ðB#ð B#ð B#ð B#ðL ILðð ØŒLðØ%*¤\ðØ@Eðà
„\ðð ð ð ð. 7Ø$)ð	(9ð (9ØÔð(9àÔð(9ð ð(9ð "ð	(9ð
 Ôð(9ð (9ð (9ð (9ðV=Ø œ<ð=Ø9<ð=à
„\ð=ð =ð =ð =ð =ð =r   