
    PiP                        d dl Z d dlZd dlmZmZmZmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZmZmZmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/  G d de          Z0 G d de          Z1 G d de-          Z2ej3        deddfd            Z4e5dk    r e j6         e4                       dS dS )    N)DictListTupleUnion)evaluate)HFMultimodalLM)HFLM)get_task_dictTaskManager)
make_table)
DictConfig)configtrainingutils)format_content_with_imagesleft_pad_sequenceMessage$padded_collate_tiled_images_and_mask)generatesample)TransformerDecoder)local_kv_cache)DeepFusionModel)	Transform)ModelTokenizer)EvalRecipeInterface)FullModelTorchTuneCheckpointerc                   8   e Zd ZdZddej        ddddded	ed
ej        de	de	dej
        dedede	fdZed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zdee	         fdZd)defdZ	 d*d!ee         d"eeej        j                          d#e	fd$Z ej                    d%e eej!        f         d&e	d'ee         fd(            Z"d S )+_VLMEvalWrappera  An EvalWrapper for EleutherAI's eval harness based on gpt-fast's
    EvalWrapper: https://github.com/pytorch-labs/gpt-fast/blob/main/eval.py.

    Note:
        This is ONLY for vision-language models.

    Args:
        model (DeepFusionModel): The VLM to evaluate.
        transform (Transform): The transform (tokenizer) to use for preprocessing.
        device (torch.device): The device to use.
        max_seq_length (int): The maximum sequence length.
        batch_size (int): The batch size.
        dtype (torch.dtype): dtype for the model caches during generation.
        enable_kv_cache (bool): Whether to enable KV cache for generation.
        image_tag (str): The string to use for the image token. Default is "<image>", which
            is the default used by the MMMU dataset.
        max_images_per_sample (int): The maximum number of images per sample. Defaults to
            the max number of images in MMMU.
          Tz<image>   )max_seq_length
batch_sizedtypeenable_kv_cache	image_tagmax_images_per_samplemodel	transformdevicer#   r$   r%   r&   r'   r(   c                    || _         || _        || _        || _        || _        || _        d| _        || _        |	| _        d S NT)	_model
_transform_device_max_seq_length_batch_size_dtype_enable_kv_cache
_image_tag_max_images_per_sample)
selfr)   r*   r+   r#   r$   r%   r&   r'   r(   s
             i/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/recipes/eleuther_eval.py__init__z_VLMEvalWrapper.__init__=   sM     #-% $#&;###    c                 2    | j         | j        _        | j        S N)r3   r.   r%   r7   s    r8   r)   z_VLMEvalWrapper.modelW   s     !K{r:   c                     | j         S r<   )r/   r=   s    r8   model_transformz_VLMEvalWrapper.model_transform^   s
    r:   c                     | j         S r<   r0   r=   s    r8   r+   z_VLMEvalWrapper.deviceb   
    |r:   c                 0     G d d          } |            S )Nc                       e Zd Zd ZdS )2_VLMEvalWrapper.cache_hook.<locals>.DummyCacheHookc                     d | _         d S )Nc                     dS r-    )xyzs      r8   <lambda>zM_VLMEvalWrapper.cache_hook.<locals>.DummyCacheHook.__init__.<locals>.<lambda>k   s    4 r:   )add_partialr=   s    r8   r9   z;_VLMEvalWrapper.cache_hook.<locals>.DummyCacheHook.__init__j   s    #7#7   r:   N)__name__
__module____qualname__r9   rH   r:   r8   DummyCacheHookrE   i   s#        8 8 8 8 8r:   rQ   rH   )r7   rQ   s     r8   
cache_hookz_VLMEvalWrapper.cache_hookf   s:    	8 	8 	8 	8 	8 	8 	8 	8 ~r:   c                     dS )Nr   rH   r=   s    r8   rankz_VLMEvalWrapper.ranko   	     qr:   c                     dS )N   rH   r=   s    r8   
world_sizez_VLMEvalWrapper.world_sizet   rU   r:   c                     | j         S r<   r2   r=   s    r8   r$   z_VLMEvalWrapper.batch_sizey       r:   c                 $    | j         j        j        S r<   )r/   	tokenizereos_idr=   s    r8   eos_token_idz_VLMEvalWrapper.eos_token_id}       (//r:   c                 $    | j         j        j        S r<   )r/   r]   eot_idr=   s    r8   eot_token_idz_VLMEvalWrapper.eot_token_id   r`   r:   c                     | j         S r<   r1   r=   s    r8   
max_lengthz_VLMEvalWrapper.max_length       ##r:   c                     dS r-   rH   r=   s    r8   
truncationz_VLMEvalWrapper.truncation   s    tr:   returnc                 F    | j         j                            |dd          S )NF)add_bosadd_eos)r/   r]   encode)r7   stringkwargss      r8   
tok_encodez_VLMEvalWrapper.tok_encode   s$     (//u/UUUr:   c                 t    t          |t                    r|g}| j        j                            ||          S )N)skip_special_tokens)
isinstanceintr/   r]   decode)r7   tokensrs   s      r8   
tok_decodez_VLMEvalWrapper.tok_decode   sC    fc"" 	XF(//(; 0 
 
 	
r:   N	all_texts
all_imagesleft_truncate_lenc                    g }t          ||          D ]\  }}g }	|D ]7}
|
j        dk    r|
                    d          }
|	                    |
           8g }t	          || j        |	          }|                    t          d|                     |                    t          dd                     |                     d|id	          }|                    |           t          |d
| j	        | j
        j                  }t          j        || j                   |                    d          |d<   ||d         d d | d f         |d<   |S )NRGB)r'   imagesuser)rolecontent	assistant messagesT)	inferenceleft)pad_directionpad_max_imagespad_max_tilesrw   	input_ids)zipmodeconvertappendr   r5   r   r?   r   r6   r/   max_num_tilesr   batch_to_devicer+   pop)r7   ry   rz   r{   argsrp   all_encoded_messagestextr~   proper_imagesimager   r   	tok_batchs                 r8   tok_batch_multimodal_encodez+_VLMEvalWrapper.tok_batch_multimodal_encode   s     "	:66 	3 	3LD&M , ,:&&!MM%00E$$U++++ H0  G OOGAAABBBOOGbAAABBB ,,j(-Ct,TTI ''	2222 9  6/7	
 
 
	 	i555 "+x!8!8	+ (%.{%;AAA@Q?Q?R?R<R%SIk"r:   batchrf   stopc                 \   |                     d          }|j        \  }}|                    dd          }|                    dd          }	|	s|dk    rt          d          |dk    rt	          d| d	          | j        j        | j        z  }
| j        5  t          j
        t          j        | j        | j        ft          j        
                    }t          j        | j                  }d d d            n# 1 swxY w Y   |d d |f         |d<   |d d |f         |d<   t          | j        | j        | j        | j        |
| j                  5  g } | j        |fi |d d df         }t'          |dd           }|                    |                                           |d         d d dd f         }t-          |          D ]}|                                | j        j        v r nw|                     ||d |d d d f         d ||d |f                   d d df         }t'          |dd           }|                    |                                           |dz  }d d d            n# 1 swxY w Y   t          j        |t          j                                      d          S )Nr   temperature        	do_sampleF9Any decoding strategy other than greedy is not supported.rW   zGot a batch size of 'zA'. Batch size > 1 is not yet supported for multimodal generation.)sizer%   	input_posmask)r$   r+   r%   encoder_max_seq_lendecoder_max_seq_len)r   top_kencoder_mask)r   encoder_inputr   r   )r%   r   )r   shapegetRuntimeError
ValueErrorr?   image_seq_lenr6   r+   torchtrilonesrf   boolaranger   r)   r$   r3   r   r   itemrangestop_tokenstensorint32	unsqueeze)r7   r   rf   r   generation_kwargspromptbszseq_lenr   r   r   causal_maskr   generated_tokenslogitstoken
cache_mask_s                     r8   _model_multimodal_generatez*_VLMEvalWrapper._model_multimodal_generate   s    ;''|W'++M3??%))+u==	 	s**K   77) ) ) )    .1LL 	 [ 	6 	6*
/4?;*   K T_55I	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 'tXgX~6k#D(7(N3f J;+ 3 $
 
 
 	 	  "TZ00%00B7F6s$???E##EJJLL111~.qqq"##v6J :&&  ::<<4#7#CCCE$T7D!!!%;<"&!+'g6 $   !!R% v3dCCC ''

5551;	 	 	 	 	 	 	 	 	 	 	 	 	 	 	@ |,EK@@@JJ1MMMs&   AC;;C?C?DI00I47I4)Tr<   )#rN   rO   rP   __doc__r   bfloat16r   r   r+   ru   r%   r   strr9   propertyr)   r?   rR   rT   rX   r$   r_   rc   rf   ri   r   rq   rx   PILImager   inference_moder   Tensorr   rH   r:   r8   r   r   (   s        4 #"^ $ #%&< < << <
 < < < {< < <  #< < < <4   X   X   X     X    X   X     X  0 0 X0 0 0 X0 $ $ X$   XVd3i V V V V

 
c 
 
 
 
 "&	1 191 cio./1 	1 1 1 1f UINC%&IN IN 3i	IN IN IN IN IN INr:   r   c                       e Zd ZdZddej        dddededej        d	e	d
e	dej
        def fdZed             Zed             Zed             Zed             Zed             Zed             Zed             Zdedee	         fdZ	 d!dee         de	deej        ej        f         fdZdeee	         e	f         defdZdej        dej        fdZ ej                    dej        dej        fd             Z xZS )"_LLMEvalWrappera  An EvalWrapper for EleutherAI's eval harness based on gpt-fast's
    EvalWrapper: https://github.com/pytorch-labs/gpt-fast/blob/main/eval.py.

    Note:
        This is for text-only decoder models.

    Args:
        model (TransformerDecoder): The model to evaluate.
        tokenizer (ModelTokenizer): Tokenizer associated with the model being evaluated.
            This should be the same tokenizer used when fine-tuning the model.
        device (torch.device): The device to use.
        max_seq_length (int): The maximum sequence length to use.
        batch_size (int): The batch size per GPU to use.
        dtype (torch.dtype): dtype for the model caches during generation.
        enable_kv_cache (bool): Whether to enable KV cache for generation.
    r    r!   T)r#   r$   r%   r&   r)   r]   r+   r#   r$   r%   r&   c                    t                                          dt          |                     || _        || _        || _        || _        || _        || _        d S )Ngpt2)
pretrainedr+   )	superr9   r   r.   
_tokenizerr1   r2   r3   r4   )	r7   r)   r]   r+   r#   r$   r%   r&   	__class__s	           r8   r9   z_LLMEvalWrapper.__init__+  s[     	F3v;;???#-% /r:   c                     | j         S r<   r.   r=   s    r8   r)   z_LLMEvalWrapper.model?  s
    {r:   c                     | j         j        S r<   )r   r^   r=   s    r8   rc   z_LLMEvalWrapper.eot_token_idC  s    %%r:   c                     | j         S r<   re   r=   s    r8   rf   z_LLMEvalWrapper.max_lengthG  rg   r:   c                     dS )N   rH   r=   s    r8   max_gen_toksz_LLMEvalWrapper.max_gen_toksK  s    sr:   c                     | j         S r<   rZ   r=   s    r8   r$   z_LLMEvalWrapper.batch_sizeO  r[   r:   c                     | j         S r<   rA   r=   s    r8   r+   z_LLMEvalWrapper.deviceS  rB   r:   c                     | j         S r<   )r4   r=   s    r8   r&   z_LLMEvalWrapper.enable_kv_cacheW  s    $$r:   r   rj   c                 <    | j                             |dd          S )NF)r   rl   rm   )r   rn   )r7   r   rp   s      r8   rq   z_LLMEvalWrapper.tok_encode[  s!     %%4%NNNr:   Nr{   c                       fd|D             }t          d |D             d j        j                  }||d d | d f         }|t          j        |          fS )Nc                 :    g | ]}                     |          S rH   )rq   ).0rI   r7   s     r8   
<listcomp>z4_LLMEvalWrapper.tok_batch_encode.<locals>.<listcomp>f  s%    ;;;$//!,,;;;r:   c                 6    g | ]}t          j        |          S rH   )r   r   )r   rI   s     r8   r   z4_LLMEvalWrapper.tok_batch_encode.<locals>.<listcomp>j  s     555U\!__555r:   T)batch_firstpadding_value)r   r   pad_idr   	ones_like)r7   r   r{   rp   tokenized_textrI   s   `     r8   tok_batch_encodez _LLMEvalWrapper.tok_batch_encodec  s     <;;;d;;; 55n555/0
 
 
 (!!!''((()A%/!$$$$r:   rw   c                 f    t          |t                    r|g}| j                            |          S r<   )rt   ru   r   rv   )r7   rw   rp   s      r8   rx   z_LLMEvalWrapper.tok_decodev  s1    fc"" 	XF%%f---r:   inpsc                 ,    |                      |          S r<   r   )r7   r   rp   s      r8   _model_callz_LLMEvalWrapper._model_call{  s    {{4   r:   contextc           
      "   |j         \  }}|                    dd          }|                    dd          }|s|dk    rt          d          t          j        j                            |ddd| j        |z
  f| j        j	                  }t          | j        | j        | j        | j        | j                  5  t!          | j        || j        |d | j        j        | j        j        	          \  }}	d d d            n# 1 swxY w Y   |d |         S )
Nr   r   r   Fr   r   )value)r$   r+   r%   r   )max_generated_tokensr   r   r   r   )r   r   r   r   nn
functionalpadr2   r   r^   r   r)   r$   r+   r3   rf   r   r   r   r   )
r7   r   r   r   r   r   r   maybe_padded_contexttoksr   s
             r8   _model_generatez_LLMEvalWrapper._model_generate~  st    }W'++M3??%))+u==	 	s**K    %x2661d&,-/(  7  
  

 J;+ $
 
 
 	 	 
$%)%6'- O7  GD!	 	 	 	 	 	 	 	 	 	 	 	 	 	 	" DSDzs   88C<<D D r<   ) rN   rO   rP   r   r   float32r   r   r+   ru   r%   r   r9   r   r)   rc   rf   r   r$   r&   r   r   rq   r   r   r   r   rx   r   r   r   __classcell__)r   s   @r8   r   r     so        . #"] $0 0 0!0 "0
 0 0 0 {0 0 0 0 0 0 0(   X & & X& $ $ X$   X     X    X % % X%Os Oc O O O O 9=% %I%25%	u|U\)	*% % % %&.tCy#~!6 .S . . . .
! !5< ! ! ! ! U&|&	& & & & & & & &r:   r   c                   :    e Zd ZdZdeddfdZdeddfdZddZdS )	EleutherEvalRecipea|  
    This recipe runs evaluation on a trained model using EleutherAI's eval harness.
    This assumes the user has the EleutherAI eval harness installed. See
    https://github.com/EleutherAI/lm-evaluation-harness for more details.

    Features:
        - Single GPU evaluation. Multi-GPU evaluation is currently not supported.
        - Quantization (for text-only models) is supported.
        - Any task from the EleutherAI eval harness

    We recommend launching evaluation using the tune CLI::

        tune run eleuther_eval --config eleuther_evaluation             tasks=["truthfulqa_mc2","hellaswag"]             limit=50     cfgrj   Nc                 \   ddl m}  |d          dk     rt          d          t          j        |j                  | _        t          j        |j        | j                  | _        t          j	        |
                    dd	                    | _        t          j        |j        |
                    d
d                      |j        | _        t          |j                  | _        |j        | _        |
                    dd          | _        |
                    dd           | _        d S )Nr   )versionzlm-evalz0.4.5zoThis recipe requires EleutherAI Eval Harness v0.4.5 or higher. Please install with `pip install lm-eval>=0.4.5`)r+   )r%   r+   	log_levelinfocudnn_deterministic_mode)seed
debug_moder&   Tinclude_path)importlib.metadatar   r   r   
get_devicer+   r   	get_dtyper%   
get_loggerr   loggerset_seedr  limitlisttasksr$   r&   r  )r7   r   r   s      r8   r9   zEleutherEvalRecipe.__init__  s   ......79''C   &cj999'ciLLL
&sww{F'C'CDDcgg.H$&O&O	
 	
 	
 	

 Y
#)__
."ww'8$??GGND99r:   c           	      R   t          j        |j                  }t          j        |          }t          j        |j                  }t          j        | j                  5  | j        5  t          j        |j	                  }d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |t          |t                    st          d          d|v rt          d          |                    |          }|                    | j        | j                  }|                    d          t          j                 }|                                D ]"\  }}|                    | j                  ||<   #|                    |d           n9|                                t          j                 }|                    |           | j                            d	| j         d
           |                                 t          j        |j                  }	t          |t.                    r)t0          }
| j        s| j                            d           nt          |t6                    rt8          }
 |
||	| j        |j        | j        | j        | j                  | _        d S )NzQuantization is only supported for models quantized and saved with the FullModelTorchTuneCheckpointer - please ensure you have quantized your model and are using the quantized weights!qata  You have specified a quantizer with 'QAT' - QAT quantizers should only be used during quantization aware training and when quantizing models. Please use the corresponding post-training quantizer e.g. Int8DynActInt4WeightQuantizer for Int8DynActInt4WeightQATQuantizer.)r+   r%   F)weights_onlyT)assignz$Model is initialized with precision .zReceived enable_kv_cache=False, but KV cache is required for running multimodal generation in a timely manner. Setting enable_kv_cache=True.)r+   r#   r$   r%   r&   ) r   instantiate	quantizerr   get_quantizer_modecheckpointerset_default_dtyper%   r+   r)   rt   r   r   quantizetoload_checkpoint	MODEL_KEYitemsload_state_dictr	  r   evalr]   r   r   r&   debugr   r   r#   r$   eleuther_model_wrapper)r7   r   r  quantization_moder  r)   	ckpt_dictkvr?   r   s              r8   setupzEleutherEvalRecipe.setup  s&   &s}55	$7	BB )#*:;; '
33 	2 	2T[ 	2 	2&sy11E	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 (l,JKK  A  
 ))) i   &&u--EHHDKtzHBBE$44%4HH"I ")) 1 11 ttDK00	!!!)D!9999$4466x7IJI!!),,, 	M
MMMNNN
 	

 !,S];; e_-- 	5%4"' !!^   122 	5%4"&<&<;-* 0'
 '
 '
###s6    B%(BB%B	B%B	B%%B),B)c                 n   t          | j                  }t          | j        |          }t	          j                    }| j                            d| j                    t          | j        || j	                  }t	          j                    |z
  }| j                            d|dd           | j
        j        dk    rGt          j                    }| j                            d|                                d	z  dd
           t          |          }| j                            d| d           d S )N)r  z+Running evaluation on the following tasks: )r  zEval completed in z.02fz	 seconds.cpuzMax memory allocated: g    eAz GBz


)r   r  r
   r  timer	  r   r   r   r  r+   typer   get_torch_device_namespacemax_memory_allocatedr   )r7   task_manager	task_dictt0outputt1torch_deviceformatted_outputs           r8   r   zEleutherEvalRecipe.evaluate  sB   "0ABBB!$*l;;	 Y[[StzSSTTT'*
 
 

 Y[[2 	@b@@@@AAA;u$$ ;==LK\)J)J)L)Ls)R\\\\   &f--4 044455555r:   )rj   N)rN   rO   rP   r   r   r9   r%  r   rH   r:   r8   r   r     s}         ":J :4 : : : :2D
 D
 D
 D
 D
 D
L6 6 6 6 6 6r:   r   r   rj   c                     t          j        d|            t          |           }|                    |            |                                 dS )zEntry point for the recipe.r   )recipe_namer   )r   N)r   
log_configr   r%  r   )r   recipes     r8   recipe_mainr8  3  sU     "6C@@@@C(((F
LLSL
OOr:   __main__)7sysr)  typingr   r   r   r   r   r   lm_eval.evaluatorr   lm_eval.models.hf_vlmsr   lm_eval.models.huggingfacer	   lm_eval.tasksr
   r   lm_eval.utilsr   	omegaconfr   	torchtuner   r   r   torchtune.datar   r   r   r   torchtune.generationr   r   torchtune.modulesr   torchtune.modules.common_utilsr   torchtune.modules.model_fusionr   torchtune.modules.transformsr   'torchtune.modules.transforms.tokenizersr   torchtune.recipe_interfacesr   torchtune.trainingr   r   r   r   parser8  rN   exitrH   r:   r8   <module>rN     s   


  + + + + + + + + + + + + 



  & & & & & & 1 1 1 1 1 1 + + + + + + 4 4 4 4 4 4 4 4 $ $ $ $ $ $             - - - - - - - - - -            2 1 1 1 1 1 1 1 0 0 0 0 0 0 9 9 9 9 9 9 : : : : : : 2 2 2 2 2 2 B B B B B B ; ; ; ; ; ; = = = = = =nN nN nN nN nNn nN nN nNbL L L L Ld L L L^H6 H6 H6 H6 H6, H6 H6 H6V Z D     zCH[[]] r:   