
    Pi0?                        d dl mZmZmZmZ d dlZd dlmZ dej        dej        dej        fdZ	dddd	d
ej        de
dee         deej                 dej        f
dZ	 d#dddddedej        dej        deej                 deej                 de
dee         deej        ej        f         fdZdej        dej        dej        dej        fdZ	 d#dej        dee         dej        fdZdej        fdZ ej                    d dddddddedej        dedede
dee         deee                  d eej                 d!ee         deej        ej        f         fd"            ZdS )$    )CallableListOptionalTupleN)TransformerDecoderprobsqreturnc                 r    t          j        | |z  dd                              t           j                  S )z(Samples from a multinomial distribution.T)dimkeepdim)dtype)torchargmaxtoint)r   r	   s     t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/generation/_generation.pymultinomial_sample_oner      s0    <	r4888;;%);LLL    g      ?temperaturetop_kr	   logitsr   r   c          	         | t          |d          z  } |t          j        | t          ||                     d                              \  }}|                    dd                              d          }t          j        | |k     t          d           |           } t          j	        j
                            | d          }|'t          j        |                              d          }t          ||          S )a:  Generic sample from a probability distribution. Includes support for Top-K sampling
    and Temperature.

    Args:
        logits (torch.Tensor): logits from which to sample
        temperature (float): value to scale the predicted logits by, default 1.0.
        top_k (Optional[int]): If specified, we prune the sampling to only token ids within the top_k probabilities
        q (Optional[torch.Tensor]): randomly sampled tensor for softmax sampling trick. If None,
            we use the default softmax sampling trick. Default None.

    Example:
        >>> from torchtune.generation import sample
        >>> logits = torch.empty(3, 3).uniform_(0, 1)
        >>> sample(logits)
        tensor([[1],
                [2],
                [0]], dtype=torch.int32)

    Returns:
        torch.Tensor: sampled token id
    gh㈵>Nr   Infr      )maxr   topkminsizeselect	unsqueezewherefloatnn
functionalsoftmax
empty_likeexponential_r   )r   r   r   r	   v_pivotr   s           r   sampler/      s    : c+t,,,Fz&#eV[[__"="=>>1R  **2.. Ve^eEll]FCC H''B'77E 	yU##0033!%+++r   )maskr   r   model	input_posxr0   c                     | |||          dddf         }t          |                                |||          |                    d          fS )a  
    Generates the next tokens given a prompt, and also returns the corresponding logits.

    Args:
        model (TransformerDecoder): model used for generation
        input_pos (torch.Tensor): tensor with the positional encodings associated with the given prompt,
            with shape [bsz x seq_length].
        x (torch.Tensor): tensor with the token IDs associated with the given prompt,
            with shape [bsz x seq_length].
        q (Optional[torch.Tensor]): randomly sampled tensor for softmax sampling trick.
            See https://github.com/pytorch-labs/gpt-fast/blob/32971d3129541c5bfb4f715abc33d1c5f408d204/generate.py#L40
        mask (Optional[torch.Tensor]): attention mask with shape [bsz x seq_length x seq_length],
            default None.
        temperature (float): value to scale the predicted logits by, default 1.0.
        top_k (Optional[int]): Top-k value to use for sampling, default None.

    Returns:
        Tuple[torch.Tensor, torch.Tensor]: tuple of two tensors:
            - tokens (torch.Tensor): tensor with the generated tokens,
                with shape [bsz x 1].
            - logits (torch.Tensor): tensor with the logits associated with the generated tokens,
                with shape [bsz x 1 x vocab_size].

    )r2   r0   Nr   r   r   )r/   cloner$   )r1   r2   r3   r	   r0   r   r   r   s           r   generate_next_tokenr6   B   s_    H U1	555aaae<Fv||~~;eqIII r   tokensstop_tokensstop_token_reachedc                 ^    t          j        | |                                          }||z  }|S )z2Updates which sequences have reached a stop token.)r   isinflatten)r7   r8   r9   stop_token_reached_currs       r   update_stop_tokens_trackerr>   m   s4     $j==EEGG11r   padding_masktarget_seq_lenc                    | j         \  }}||n|}||k     rt          d          t          j        t          j        ||| j        t                    d                              |dd          }|                    dd|          	                    | dddddf         
                    d|d                     |                    dd	                              t          j        d
g                     |S )a  
    Converts a padding mask of shape ``[bsz, seq_len]`` to a ``[bsz, seq_len, seq_len]`` causal attention mask suitable for
    consumption by :func:`~torch.nn.functional.scaled_dot_product_attention`. If ``target_seq_len``
    is provided, this will return a mask of shape ``[bsz, seq_len, target_seq_len]``. This is useful
    when generating masks for static KV caches where the maximum length the caches have been setup with
    are longer than the current sequence.

    Args:
        padding_mask (torch.Tensor): Boolean tensor where False indicates the corresponding token in the sequence
            is a padding token and should be masked out in attention, with shape [bsz x seq_length]
        target_seq_len (Optional[int]): target sequence length to create attention mask with. Default None.

    Returns:
        torch.Tensor: Boolean causal mask with shape
            - [bsz, seq_length, seq_length] or
            - [bsz, seq_length, target_seq_len] if ``target_seq_len`` was specified.

    Raises:
        AssertionError: if ``target_seq_len < seq_len``, the sequence length of the padding mask.

    Example:
        >>> padding_mask = torch.tensor([[False, True, True, True]])
        >>> get_causal_mask_from_padding_mask(padding_mask, target_seq_len=5)
        tensor([[[ True, False, False, False, False],
                  [False,  True, False, False, False],
                  [False,  True,  True, False, False],
                  [False,  True,  True,  True, False]]])
        ])
    NzNtarget_seq_len cannot be shorter than the sequence length of the padding mask.devicer   r   )diagonalr      r   )dim1dim2T)shapeAssertionErrorr   trilonesrC   boolrepeatnarrowmul_expandrD   copy_Tensor)r?   r@   bszseq_lenr0   s        r   !get_causal_mask_from_padding_maskrU   y   s
   @  %LC . 6WWNN\
 
 	
 :
7N<3FdSSS   fS!Q 	 	KK1g##LD!!!$<$C$CBQS$T$TUUUMMqqM!!''dV(<(<===Kr   c                 r    |                      d          dz
  | z                      t          j                  S )a  
    Calculates position ids given a padding mask which right-shifts position ids to start
    from the first valid token.

    Args:
        padding_mask (torch.Tensor): Boolean tensor where False indicates the corresponding token in the sequence
            is a padding token and should be masked out in attention. Shape [bsz, seq_len]

    Returns:
        torch.Tensor: position ids which are appropriately shifted according to any padding values.

    Example:
        >>> padding_mask = torch.tensor([False, False, False, True, True, True, True, True])
        >>> get_position_ids_from_padding_mask(padding_mask)
        torch.Tensor([0, 0, 0, 0, 1, 2, 3, 4])
    r   r   )cumsumr   r   r   )r?   s    r   "get_position_ids_from_padding_maskrX      s2    &   $$q(L8<<UYGGGr   )pad_idr   r   r8   rngcustom_generate_next_tokenpromptmax_generated_tokensrY   rZ   r[   c          
         |j         dk    r|                    dd          n|}|t          }|                                \  }	}
|
|z   }|                                }|                                 }|s|n| j        }||k    }|                                sJt          j	        j
                            |d|fd          }t          ||          }t          |          }n{t          j        t          j        ||t          j        |j                                                d          }t          j        d||j        	                              d          }|r|ddd|
f         }n|ddd|
d|
f         }d}|<t          j        |	| j        j        f|j        	                              d|
          }t          | |ddd|
f                                         |||||          \  }}t          j        ||gd          }|
}t          j        |	t          j        |j                  }|r!t          j        ||j        |j                  nd}t          j        |	|
dz   ft          j        |j                  }|;t=          |||          }|                                                                r||fS tA          |dz
            D ]v}|-t          j        ||!                    |	d           gd          }|rA|dd|f         "                                }|dd|dddf         "                                }n<|                                }|ddd|dz   f         }|ddd|dz   d|dz   f         }d}|<t          j        |	| j        j        f|j        	                              d|
          } || ||                                ||||          \  }}t          j        ||gd          }t          j        ||gd          }|dz  }|'t=          |||          }|                                r nx|#||z  }||dd|j#        d          ddf         z  }||fS )a1	  
    Generates tokens from a model conditioned on a prompt, and also returns logits for the generations.

    Args:
        model (TransformerDecoder): model used for generation
        prompt (torch.Tensor): tensor with the token IDs associated with the given prompt,
            with shape either [seq_length] or [bsz x seq_length].
        max_generated_tokens (int): number of tokens to be generated
        pad_id (int): token ID to use for padding, default 0.
        temperature (float): value to scale the predicted logits by, default 1.0.
        top_k (Optional[int]): If specified, we prune the sampling to only token ids within the top_k probabilities,
            default None.
        stop_tokens (Optional[List[int]]): If specified, generation is stopped when any of these tokens are generated,
            default None.
        rng (Optional[torch.Generator]): random number generator, default None.
        custom_generate_next_token (Optional[Callable]): If specified, we'll use the
            ``custom_generate_next_token function``. This is generally only useful if
            you want to specify a ``torch.compile`` version of the generate next token for
            performance reasons. If None, we use the default :func:`generate_next_token`.
            Default is None.

    Note:
        This function has only been tested with decoder-only models.

    Examples:
        >>> model = torchtune.models.llama3.llama3_8b()
        >>> tokenizer = torchtune.models.llama3.llama3_tokenizer()
        >>> prompt = tokenizer.encode("Hi my name is")
        >>> rng.manual_seed(42)
        >>> output, logits = generate(model, torch.tensor(prompt), max_generated_tokens=100, pad_id=0)
        >>> print(tokenizer.decode(output[0].tolist()))
        Hi my name is Jeremy and I'm a friendly language model assistant!

    Returns:
        Tuple[torch.Tensor, torch.Tensor]: tuple of two tensors:
            - tokens (torch.Tensor): tensor with the generated tokens,
                with shape ``[bsz x seq_len + num_generated_tokens]`` where ``num_generated_tokens``
                may be less than ``max_generated_tokens`` if ``stop_tokens`` are provided.
            - logits (torch.Tensor): tensor with the logits associated with the generated tokens,
                with shape ``[bsz x num_generated_tokens x vocab_size]``.
    r   r   Nr   T)value)r@   )r   rC   )rC   )	generator)r2   r0   r3   r   r   r	   r   rB   )r2   r3   r0   r   r   r	   )$ndimviewr6   r"   r5   caches_are_enableddecoder_max_cache_seq_lenallr   r'   r(   padrU   rX   rJ   rK   rL   rC   r$   arangeemptytok_embeddingsnum_embeddingsr+   squeezecatzerostensorr   int32r>   itemrangereshape
contiguousrH   )r1   r\   r]   rY   r   r   r8   rZ   r[   rS   prompt_lengthtotal_response_lengthgenerated_tokensincremental_decodingmax_seq_lenpadding_masksmasksr2   
curr_masksr	   r7   generated_logitscurr_posr9   stop_token_maskr-   curr_input_posr   s                               r   generater      s   l $*;!#3#3V[[BF!)%8"C),@@||~~ 3355 $	-,  %.M  +//A34D 0 
 

 2+
 
 

 7}EE		 
J%j}	  
 
 )A,, 	 L$-=-D
 
 

)A,, 	  > 111n}n,-

 111n}nn}n<=
A
K%&56v}
 
 

,qC,
(
( 	
  3AAA~~-.6688

     F y"2F!;DDDH S
6=QQQ 	[flKKKK  j	ma FM  O
 7K!3
 
 !!##((** 	6#%555'!+,, * * "#i #5#=#=c1#E#E"EFB  O   	B&qqq({3>>@@Nqqq(D!!!34??AAJJ%++--F&qqq.HqL.'89Nqqq.HqL..HqL.@AJ?e*9:6=  l1l,,  43$llnn#
 
 
 !9&6%?RHHH 9&6%?QGGGA"!;%7" " "%%''  O+OAAA0@0Fq0I/I/K/KT,QRR---r   )N)typingr   r   r   r   r   torchtune.modules.transformerr   rR   r   r&   r   r/   r6   r>   rU   rX   no_grad	Generatorr    r   r   <module>r      s   3 2 2 2 2 2 2 2 2 2 2 2  < < < < < <M%, M5< MEL M M M M  $-, -, -,L-, -, C=	-,
 -, \-, -, -, -,h !%	( $(( ( ((|( |( 	( 5<
 ( ( C=( 5<%&( ( ( (V	L	',|	IN	
\	 	 	 	 AE. .,.08.
\. . . .bH,H H H H,  '+%)59J. J. J.J.LJ. 	J.
 J. J. C=J. $s)$J. 
%/	"J. !) 2J. 5<%&J. J. J. J. J. J.r   