
    Pi('                     B   d dl Z d dlZd dlZd dlmZmZmZ d dlZd dlm	Z
 d dlmZmZ d dlmZ d dlmZmZmZ d dlmZmZmZ d dlmZ d dlmZ  G d	 d
e          Z G d d          Zej        deddfd            Ze dk    r ej!         e                       dS dS )    N)AnyDictList)
DictConfig	OmegaConf)parallelize_module)configtrainingutils)
load_imageMessage$padded_collate_tiled_images_and_mask)sample)	Transformc                   >    e Zd ZdZdeeef         dee         fdZ	dS )SingleTurnYAMLToMessagesa  
    Converts a single turn conversation in YAML format to a list of messages.

    Expects the YAML to look like:
        system: You are a helpful AI assistant.
        user: What is the capital of France?

    or if it includes an image:
        system: You are a helpful AI assistant.
        user:
            image: url or path_to_image
            text: Describe the image in detail.
    promptreturnc                    g }|                                 D ]\  }}|t          |t                    rd|dg}njd|                                v r(|d         }t	          |          }d|dd|d         dg}n,d|                                v s
J d            d|d         dg}|                    t          ||                     |                    t          dd                     |S )Ntext)typecontentimagez4Multiple entries per role expect at least a text key)roler   	assistant )items
isinstancestrkeysr   appendr   )selfr   messagesr   r   new_content	image_locr   s           w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/recipes/dev/generate_v2_distributed.py__call__z!SingleTurnYAMLToMessages.__call__'   s"    $\\^^ 	E 	EMD'GS)) M(.7CCDGLLNN**#G,	"9--$77#@@ gllnn,,,I -,,(.76?KKLOOG{CCCDDDD 	["===>>>    N)
__name__
__module____qualname____doc__r   r   r   r   r   r'    r(   r&   r   r      sL         tCH~ $w-      r(   r   c                   |    e Zd ZdZdeddfdZdeddfdZdededdfd	Z	 e
j                    defd
            ZdS )InferenceRecipea  
    Recipe for generating tokens from a dense Transformer-based LLM.
    This works for text-only generation and image-text generation.

    Supports distributed inference using Tensor Paralellism(TP) for
    large models that don't fit on a single GPU. For more information
    on TP, see: https://pytorch.org/docs/stable/distributed.tensor.parallel.html.

    This *does not* currently support the following features:
        - torch.compile
        - quantization through torchao
        - batch generation
    cfgr   Nc                    t          j        |j                  | _        t	          j        |j        | j                  | _        t          j        |j	                  | _
        t          j        d           t          j                    \  }}|dk    | _        t	          j        |j        |                    dd                      d S )Ndevice)dtyper3   nccl)backendr   cudnn_deterministic_mode)seed
debug_mode)r   
get_devicer3   _devicer
   	get_dtyper4   _dtype
get_logger	log_level_loggerdistinit_process_groupget_world_size_and_rank_is_rank_zeroset_seedr8   get)r"   r0   _ranks       r&   __init__zInferenceRecipe.__init__R   s    'sz:::(syNNN'66/////114!QYcgg.H$&O&O	
 	
 	
 	
 	
 	
r(   c                 \   t          j        |j                  }|                                }t	          j        | j                  5  t          j        d          5  t          j        |j	                  }ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   t          j                    }|f}t          j        d|          }t	          j        ||          }t          ||t          j        |j                             t	          j        | j                  5  | j        5  |                                D ]&}t%          |d          r|                                 '	 ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   t	          j        ||t          j                 | j        dd           || _	        | j        r&| j                            d	| j         d
| d           t          j        |j                  | _        t7                      | _        dS )zSetup the model and transforms.metaNcuda)parallelize_plan	rope_initTF)modelfull_sdr3   strictcpu_offloadz%Model was initialized with precision z and TP degree .)r	   instantiatecheckpointerload_checkpointr
   set_default_dtyper=   torchr3   rO   rA   get_world_sizeinit_device_meshprepare_mha_for_tpr   tensor_parallel_planr;   moduleshasattrrN   load_from_full_model_state_dict	MODEL_KEYrD   r@   info	tokenizermodel_transformr   to_messages)	r"   r0   _checkpointer
_ckpt_dictrO   	tp_degreetp_mesh_shapetp_device_meshms	            r&   setupzInferenceRecipe.setup^   s    *3+;<<"2244
 '44 	2 	2el66J6J 	2 	2&sy11E	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 '))	".v}EE +E>BB#/0HII	
 	
 	
 	
 '44 	" 	"dl 	" 	"]]__ " "1k** "KKMMM"	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	0x12<	
 	
 	
 	
 
 	L```T]```  
  &1#-@@355sl   BB6BB	B	B	
BB B&F.<E7+F7E;	;F>E;	?FFF
total_timetokens_per_secondc                    t          d t          j        | j                                        | j                                                  D                       }| j                            d|dd|dd           | j                            d||z  dz  dd           | j        j	        d	k    rIt          j                    }| j                            d
|                                dz  dd           dS dS )zLogs the following metrics: total time for inference, tokens/sec,
        bandwidth achieved, and max memory allocated.

        Feel free to modify this function to log additional metrics.
        c                 P    g | ]#}|                                 |j        j        z  $S r-   )numelr4   itemsize).0ps     r&   
<listcomp>z/InferenceRecipe.log_metrics.<locals>.<listcomp>   s:        		AG,,  r(   zTime for inference: z.02fz sec total, z tokens/seczBandwidth achieved: i   @z GiB/scpuzMax memory allocated: z GiBN)sum	itertoolschainrO   
parametersbuffersr@   ra   r;   r   r   get_torch_device_namespacemax_memory_allocated)r"   rl   rm   
model_sizetorch_devices        r&   log_metricszInferenceRecipe.log_metrics   s7     ")>)>)@)@$*BTBTBVBVWW  
 

 	c:ccc@Qcccc	
 	
 	
 	Z:0A#AW#MZZZZ	
 	
 	
 <%% ;==LLc)J)J)L)LPW)Xcccc     &%r(   c                    |                      t          j        |j                            }t	          d |D                       }|                     d|id          }t          |d                   }||j        z   }| j        5  | j	        
                    d| j        |r| j        j        nd|           ddd           n# 1 swxY w Y   t          j        t          j        ||ft          j        | j        	                    }t          j        |          }i }	|rct%          |gd
d| j        j                  }	|	d         ddd|f         |	d<   |	                    d                              | j                  }
n4t          j        |d         | j                                      d          }
|dd|f         |	d<   |dd|f         |	d<   t1          j        |	| j                   g }t5          j                    } | j	        |
fi |	dddf         }t9          ||j        |j                  }|                    |                                            |r,|	                    d           |	d         ddddf         |	d<   tC          |j                  D ]}|d|f         |	d<   |d|dddf         |	d<   |                                 | j        j"        v r na | j	        |fi |	dddf         }t9          ||j        |j                  }|                    |                                            |dz  }t5          j                    |z
  }| j        #                    |          }| j$        r| j%        &                    d| d           t          |          |z  }| j$        r| '                    ||           dS dS )z9The main entry point for generating tokens from a prompt.c                     g | ]	}|j         
S r-   )contains_media)rr   rj   s     r&   rt   z,InferenceRecipe.generate.<locals>.<listcomp>   s    "F"F"F1#3"F"F"Fr(   r#   T)	inferencetokens   N)
batch_sizer4   encoder_max_seq_lendecoder_max_seq_len)sizer4   r3   left)pad_directionpad_max_imagespad_max_tilesencoder_maskr2   r   mask	input_pos)temperaturetop_kencoder_inputz


)rl   rm   )(rd   r   to_containerr   anyrc   lenmax_new_tokensr;   rO   setup_cachesr=   image_seq_lenrX   trilonesboolaranger   max_num_tilespoptotensor	unsqueezer   batch_to_devicetimeperf_counterr   r   r   r!   itemrangestop_tokensdecoderD   r@   ra   r   )r"   r0   r#   is_multimodal_inputmodel_inputsseq_lentotal_response_lengthcausal_maskr   batchr   generated_tokenst0logitstokenitdecodedrm   s                      r&   generatezInferenceRecipe.generate   s|    ##I$:3:$F$FGG!"F"FX"F"F"FGG ++Z,Bd+SSl8,-- '#*< < \ 	 	J##k:MWD(66SW$9 $   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 jJ+-BCj|  
 
 L!677	  	8$ "2@	  E %*.$9!!!XgX+$FE.!YYx((++DL99FF\X&t|  ill  $D(7(N3f&tXgX~6keT\222   F,,e,,QQQU3v3?#)LLL

--- 	B IIo&&&$).$9!!!RSS&$AE.! s)** 	 	A "+4=!9E+'gtQQQ(>?E&Mzz||t3???TZ////26F6sciPPPE##EJJLL111qLGG"$ &--.>?? 	2L0W000111   011A5 	P=NOOOOO	P 	Ps   2CC
C)r)   r*   r+   r,   r   rI   rk   intfloatr   rX   inference_moder   r-   r(   r&   r/   r/   C   s         

J 

4 

 

 

 

/6 /6 /6 /6 /6 /6bc e     0 UYPJ YP YP YP YP YP YPr(   r/   r0   r   c                     t          j        d|            t          |           }|                    |            |                    |            d S )Nr/   )recipe_namer0   )r0   )r	   
log_configr/   rk   r   )r0   recipes     r&   mainr     sW    
"3====%%%F
LLSL
OOOr(   __main__)"rw   sysr   typingr   r   r   rX   torch.distributeddistributedrA   	omegaconfr   r   !torch.distributed.tensor.parallelr   	torchtuner	   r
   r   torchtune.datar   r   r   torchtune.generationr   torchtune.modules.transformsr   r   r/   parser   r)   exitr-   r(   r&   <module>r      s       



  " " " " " " " " " "              + + + + + + + + @ @ @ @ @ @ - - - - - - - - - - T T T T T T T T T T ' ' ' ' ' ' 2 2 2 2 2 2( ( ( ( (y ( ( (V~P ~P ~P ~P ~P ~P ~P ~PB j T     zCHTTVV r(   