
    Piw                      *   d dl Z d dlZd dlZd dlmZmZmZ d dlZd dlm	Z	m
Z
 d dlmZmZmZ d dlmZmZmZ d dlmZ d dlmZ  G d d	e          Z G d
 d          Zej        de	ddfd            Zedk    r ej         e                       dS dS )    N)AnyDictList)
DictConfig	OmegaConf)configtrainingutils)
load_imageMessage$padded_collate_tiled_images_and_mask)sample)	Transformc                   >    e Zd ZdZdeeef         dee         fdZ	dS )SingleTurnYAMLToMessagesa  
    Converts a single turn conversation in YAML format to a list of messages.

    Expects the YAML to look like:
        system: You are a helpful AI assistant.
        user: What is the capital of France?

    or if it includes an image:
        system: You are a helpful AI assistant.
        user:
            image: url or path_to_image
            text: Describe the image in detail.
    promptreturnc                    g }|                                 D ]\  }}|t          |t                    rd|dg}njd|                                v r(|d         }t	          |          }d|dd|d         dg}n,d|                                v s
J d            d|d         dg}|                    t          ||                     |                    t          dd                     |S )Ntext)typecontentimagez4Multiple entries per role expect at least a text key)roler   	assistant )items
isinstancestrkeysr   appendr   )selfr   messagesr   r   new_content	image_locr   s           k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/recipes/dev/generate_v2.py__call__z!SingleTurnYAMLToMessages.__call__%   s"    $\\^^ 	E 	EMD'GS)) M(.7CCDGLLNN**#G,	"9--$77#@@ gllnn,,,I -,,(.76?KKLOOG{CCCDDDD 	["===>>>    N)
__name__
__module____qualname____doc__r   r   r   r   r   r&    r'   r%   r   r      sL         tCH~ $w-      r'   r   c                   |    e Zd ZdZdeddfdZdeddfdZdededdfd	Z	 e
j                    defd
            ZdS )InferenceRecipeaE  
    Recipe for generating tokens from a dense Transformer-based LLM.
    This works for text-only generation and image-text generation.

    This *does not* currently support the following features:
        - torch.compile
        - quantization through torchao
        - multi-GPU generation
        - batch generation
    cfgr   Nc                 (   t          j        |j                  | _        t	          j        |j        | j                  | _        t          j        |j	                  | _
        t	          j        |j        |                    dd                      d S )Ndevice)dtyper2   cudnn_deterministic_mode)seed
debug_mode)r
   
get_devicer2   _devicer	   	get_dtyper3   _dtype
get_logger	log_level_loggerset_seedr5   get)r!   r/   s     r%   __init__zInferenceRecipe.__init__M   s    'sz:::(syNNN'66cgg.H$&O&O	
 	
 	
 	
 	
 	
r'   c                 2   t          j        |j                  }|                                }t	          j        | j                  5  | j        5  t          j        |j                  }ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |	                    |t          j
                            || _        | j                            d| j         d           t          j        |j                  | _        t                      | _        dS )zSetup the model and transforms.Nz%Model was initialized with precision .)r   instantiatecheckpointerload_checkpointr	   set_default_dtyper:   r8   modelload_state_dict	MODEL_KEYr=   info	tokenizermodel_transformr   to_messages)r!   r/   _checkpointer
_ckpt_dictrG   s        r%   setupzInferenceRecipe.setupU   sr    *3+;<<"2244
 '44 	2 	2dl 	2 	2&sy11E	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2j);<===
P$+PPPQQQ  &1#-@@355s6   BA5)B5A9	9B<A9	=BBB
total_timetokens_per_secondc                    t          d t          j        | j                                        | j                                                  D                       }| j                            d|dd|dd           | j                            d||z  dz  dd           | j        j	        d	k    rIt          j                    }| j                            d
|                                dz  dd           dS dS )zLogs the following metrics: total time for inference, tokens/sec,
        bandwidth achieved, and max memory allocated.

        Feel free to modify this function to log additional metrics.
        c                 P    g | ]#}|                                 |j        j        z  $S r,   )numelr3   itemsize).0ps     r%   
<listcomp>z/InferenceRecipe.log_metrics.<locals>.<listcomp>m   s:        		AG,,  r'   zTime for inference: z.02fz sec total, z tokens/seczBandwidth achieved: i   @z GiB/scpuzMax memory allocated: z GiBN)sum	itertoolschainrG   
parametersbuffersr=   rJ   r8   r   r
   get_torch_device_namespacemax_memory_allocated)r!   rQ   rR   
model_sizetorch_devices        r%   log_metricszInferenceRecipe.log_metricsf   s7     ")>)>)@)@$*BTBTBVBVWW  
 

 	c:ccc@Qcccc	
 	
 	
 	Z:0A#AW#MZZZZ	
 	
 	
 <%% ;==LLc)J)J)L)LPW)Xcccc     &%r'   c                 z   |                      t          j        |j                            }t	          d |D                       }|                     d|id          }t          |d                   }||j        z   }| j        5  | j	        
                    d| j        |r| j        j        nd|           ddd           n# 1 swxY w Y   t          j        t          j        ||ft          j        | j        	                    }t          j        |          }i }	|rct%          |gd
d| j        j                  }	|	d         ddd|f         |	d<   |	                    d                              | j                  }
n4t          j        |d         | j                                      d          }
|dd|f         |	d<   |dd|f         |	d<   t1          j        |	| j                   g }t5          j                    } | j	        |
fi |	dddf         }t9          ||j        |j                  }|                    |                                            |r,|	                    d           |	d         ddddf         |	d<   tC          |j                  D ]}|d|f         |	d<   |d|dddf         |	d<   |                                 | j        j"        v r na | j	        |fi |	dddf         }t9          ||j        |j                  }|                    |                                            |dz  }t5          j                    |z
  }| j        #                    |          }| j$        %                    d| d           t          |          |z  }| &                    ||           dS )z9The main entry point for generating tokens from a prompt.c                     g | ]	}|j         
S r,   )contains_media)rW   ms     r%   rY   z,InferenceRecipe.generate.<locals>.<listcomp>   s    "F"F"F1#3"F"F"Fr'   r"   T)	inferencetokens   N)
batch_sizer3   encoder_max_seq_lendecoder_max_seq_len)sizer3   r2   left)pad_directionpad_max_imagespad_max_tilesencoder_maskr1   r   mask	input_pos)temperaturetop_kencoder_inputz


)rQ   rR   )'rM   r   to_containerr   anyrL   lenmax_new_tokensr8   rG   setup_cachesr:   image_seq_lentorchtrilonesboolaranger   max_num_tilespoptotensor	unsqueezer
   batch_to_devicetimeperf_counterr   rx   ry   r    itemrangestop_tokensdecoder=   rJ   rd   )r!   r/   r"   is_multimodal_inputmodel_inputsseq_lentotal_response_lengthcausal_maskrv   batchr   generated_tokenst0logitstokenitdecodedrR   s                      r%   generatezInferenceRecipe.generate~   sY    ##I$:3:$F$FGG!"F"FX"F"F"FGG ++Z,Bd+SSl8,-- '#*< < \ 	 	J##k:MWD(66SW$9 $   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 jJ+-BCj|  
 
 L!677	  	8$ "2@	  E %*.$9!!!XgX+$FE.!YYx((++DL99FF\X&t|  ill  $D(7(N3f&tXgX~6keT\222   F,,e,,QQQU3v3?#)LLL

--- 	B IIo&&&$).$9!!!RSS&$AE.! s)** 	 	A "+4=!9E+'gtQQQ(>?E&Mzz||t3???TZ////26F6sciPPPE##EJJLL111qLGG"$ &--.>??,,,,---   011A5A9JKKKKKs   2CC
C)r(   r)   r*   r+   r   r@   rP   intfloatrd   r   inference_moder   r,   r'   r%   r.   r.   A   s        	 	
J 
4 
 
 
 
6 6 6 6 6 6"c e     0 UWLJ WL WL WL WL WL WLr'   r.   r/   r   c                     t          j        d|            t          |           }|                    |            |                    |            d S )Nr.   )recipe_namer/   )r/   )r   
log_configr.   rP   r   )r/   recipes     r%   mainr      sW    
"3====%%%F
LLSL
OOOr'   __main__)r\   sysr   typingr   r   r   r   	omegaconfr   r   	torchtuner   r	   r
   torchtune.datar   r   r   torchtune.generationr   torchtune.modules.transformsr   r   r.   parser   r(   exitr,   r'   r%   <module>r      s       



  " " " " " " " " " "  + + + + + + + + - - - - - - - - - - T T T T T T T T T T ' ' ' ' ' ' 2 2 2 2 2 2( ( ( ( (y ( ( (VUL UL UL UL UL UL UL ULp j T     zCHTTVV r'   