
     `i'                         d dl Z d dlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlmZmZ dd	lmZmZ d
dlmZ  e            r
d dlZd dlmZ dZdZdZdZdZ	 ddZ G d d          ZdS )    N)Image   )create_causal_mask)_get_model_class)
AutoConfig)MODEL_FOR_PRETRAINING_MAPPINGMODEL_MAPPING)PROCESSOR_MAPPING_NAMESAutoProcessor)TOKENIZER_MAPPING_NAMESAutoTokenizer   )is_torch_availablez[92mz[93mz[0mu   ■u   ⬚<img>c                 ^	                                     j        dk    rdddddf         j        dk    rddddddf         t                     t          d  D                       }d}g }t	                     D ]F\  }	|	k    r|s	}df<   |dk    r*|	k    s	dz
  k    rdz
  k    rdz  d||f<   d}Gdfdt                    D             d	                    fd
t                    D                       }
||dk    }t          j        |	                    d          dz  |z   
                                |	                    d          d          t          j        d|dz   |          }t          j        |          t           t           t           dt            t           t           d}|                    d	|z              d	|dz   z  d                    t          |
          dz            z   }|dz  }|                    |           g }t	                     D ]\  }||f         dk    rg|                    d t'          t)          |                              t          t)                                                  D                        z|                    t'          t)          |                              t          t)                                                             t'          t-          t&          t/          |                     }|D ]P}|                    7|dz   d	z  d	                    |          z   dz   d	                    |          z   nd           Qt	                     D ]\  t1                                        |          }v rt            | t           n|}d	                     fdt                    D                       }d}3d	                     fdt                    D                       }|                    | dt)                                        d           d	| d|            d                    |          S )z
    Generates an attention matrix from a given attention mask.

    Optionally applies a sliding window mask (e.g., for Gemma2/3) and
    marks regions where image tokens occur based on the specified `img_token`.
       r   N   c              3   N   K   | ] }t          t          |                    V  !d S N)lenrepr).0words     {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/utils/attention_visualizer.py	<genexpr>z6generate_attention_matrix_from_mask.<locals>.<genexpr>8   s.      <<d#d4jj//<<<<<<    r   r   c                 J    g | ]fd t                    D             S )c                 >    g | ]}d |z
  cxk    rk     rn ndnd S )r   r    )r   jisliding_windows     r   
<listcomp>zBgenerate_attention_matrix_from_mask.<locals>.<listcomp>.<listcomp>I   sB    \\\aa1q5&A&A&A&A>&A&A&A&A&A\\\r   )range)r   r!   nr"   s    @r   r#   z7generate_attention_matrix_from_mask.<locals>.<listcomp>I   s=    oooab\\\\\SXYZS[S[\\\ooor    c              3      K   | ]Z}d |f         rt            t           t           n4d |k    rt           t           t           nd |f         rt          nt          V  [dS r   NYELLOWBLACK_SQUARERESETGREENWHITE_SQUARE)r   r    masks     r   r   z6generate_attention_matrix_from_mask.<locals>.<genexpr>K   s       	 	  1:	6(<(((( 66 ,|,U,,, 1:\\	 	 	 	 	 	r      )
boundariesz: i == j (diagonal)   z: token_type_idszAttention MatrixzSliding Window Maskc                 2    g | ]}t            | t           S r   )r*   r,   )r   ks     r   r#   z7generate_attention_matrix_from_mask.<locals>.<listcomp>i   s(    #e#e#eav$9q$9%$9$9#e#e#er   z	    |     c              3      K   | ]h}|         v r%|f         rv rt            t           t           n4|k    rt           t           t           n|f         rt          nt          V  id S r   r)   )r   r    r!   	img_tokenr/   r   wordss     r   r   z6generate_attention_matrix_from_mask.<locals>.<genexpr>x   s       	
 	
  E!H$$ad$	T8I8I ,|,U,,, Avv 0<0000 AqDz	
 	
 	
 	
 	
 	
r   c              3      K   | ]x}|         v r1v r-d f         d |f         k    rt            t           t           n8|k    rt           t           t           n         |         rt          nt          V  ydS r(   r)   )r   r    r!   r7   sliding_window_masktoken_type_bucketsr   r8   s     r   r   z6generate_attention_matrix_from_mask.<locals>.<genexpr>   s       	* 	*  a((Y$->->CUVWYZVZC[_qrsuvrv_wCwCw 0<0000 66 4|4U444 'q)!,"\\!	* 	* 	* 	* 	* 	*r   z: 
)intndimr   max	enumerater$   jointorchwherecumsumboolarange	bucketizer-   r+   r,   r*   appendljustliststrrjustmapzipr   )r8   r/   r7   r"   token_type_idsimage_seq_lengthmax_word_lengthfirst_img_idxoutputr4   	row_dummy
is_specialr2   legendf_stringvertical_headeridxrow	word_reprcolored_wordrow_displaysliding_window_rowr!   r%   r:   r;   r   s   ````                  @@@@@r   #generate_attention_matrix_from_maskr_   (   sv    88::DyA~~Aqqq!!!G}yA~~Aq!!!QQQJE

A<<e<<<<<OMF%    1	>>->MDAJ1!y..AQJJAEzzQ56Dq-/12M !ooooofklmfnfnooo 	 	 	 	 q	 	 	 	 	I !#q(
"[""2&&*Z7==??AVAVWYAZAZ\]
 
 \!%5%9;KLL
"_-?JWWW o|oUoo&o,oX]oooF
MM#,o)*-?-E-Ec)nnXYFY-Z-ZZH!))
MM(Ou%% F F	TS>Q""#e#e4CWZ[^_`[a[aWbWbHcHcCdCd#e#e#effff""4Cs3q66{{(C(C#D#DEEEE3tS/%:;;<<O 
 
) q C'#((3--7+EQTUU	
 	
 	
 	

 U## g g4JJ$$_55	8AT8I8I&4)4U444yhh 	
 	
 	
 	
 	
 	
 	
 	
 1XX	
 	
 	
 	
 	
  %!$ 	* 	* 	* 	* 	* 	* 	* 	* 	* q	* 	* 	* 	" 	" 	eeQaee;eeQceeffff99Vr   c                   6    e Zd ZdefdZddefdZddefdZdS )	AttentionMaskVisualizer
model_namec                 .   t          j        |          }d| _        t          |                                d          r(t          |                                dd           | _        	 t          |t                    }n%# t          $ r t          |t                    }Y nw xY w|t          d| d          || _         G d d|t          j                  } |||          | _        | j                            |j                   || _        || _        d S )Nr   r"   zModel name z- is not supported for attention visualizationc                       e Zd Zd ZdS )7AttentionMaskVisualizer.__init__.<locals>._ModelWrapperc                     t           j                            |            t          j        dd          | _        || _        d S )Nr   )nnModule__init__Lineardummy_moduleconfig)selfrl   rb   s      r   ri   z@AttentionMaskVisualizer.__init__.<locals>._ModelWrapper.__init__   s5    	""4((($&IaOO!$r   N)__name__
__module____qualname__ri   r   r   r   _ModelWrapperre      s#        % % % % %r   rq   )r   from_pretrainedimage_tokenhasattrget_text_configgetattrr"   r   r	   	Exceptionr   
ValueError
mapped_clsrg   rh   modeltodtyperepo_idrl   )rm   rb   rl   ry   rq   s        r   ri   z AttentionMaskVisualizer.__init__   s:   +J77"6))++-=>> 	\")&*@*@*B*BDTVZ"["[D	Q)&-@@JJ 	Q 	Q 	Q)&2OPPJJJ	Q d:dddeee$	% 	% 	% 	% 	%J	 	% 	% 	% #]6:66

fl###!s   'A= =BBr5   input_sentencec                 4    |                      ||           d S )N)suffix)visualize_attention_mask)rm   r~   r   s      r   __call__z AttentionMaskVisualizer.__call__   s!    %%nV%DDDDDr   c           
          | j         }i }d }| j        j        t          v rd}t	          j        t          j        |d          j                  }d}t          j
        | j        |          }t          |d          r|j        }n&|j                            |j        g          d         }|r|                    d|          } ||||d	
          }	|j                            |j        g          d         | _        |	d         }
d|	v r|	d         |d<   |j                            |	d         d                   }nr| j        j        t$          v rBt'          j
        | j                  }|                    |          } ||d	          d         }
nt+          d|j        j         d          d|j        _        |                                 |
j        \  }}t3          j        |||j        j        f| j         j                  }t3          j        |          }t=          |j        ||
|d           }||                                 }
n>|
                     d                               d          !                    |d||          }
dtE          d| j        j         d| j#                   dz   z  }d}tI          d|            tI          dd| j        j         d| j         d| j#        j%         &                    tE          |                    z   dz   |z              tI          |            tO          ||
| j        tQ          | j        dd           |                    d          |          }tI          |           tI          |            d S ) Nzchttps://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=trueT)streamr1   )rP   rs   r   r   pt)imagestextr   return_tensorsattention_maskrO   	input_ids)r   zModel type z) does not support attention visualizationeager)r|   )rl   input_embedsr   cache_positionpast_key_valuesr   z##zAttention visualization for z | r   r<   z"  Attention visualization for [1m:z[0m z    r"   )r7   r"   rO   rP   ))rz   rl   
model_typer
   r   openrequestsgetrawr   rr   r}   rt   rs   	tokenizerconvert_ids_to_tokensimage_token_idreplacer   r   tokenizerx   _attn_implementationtrainshaperB   zeroshidden_sizer|   rF   r   rE   	unsqueezeexpandr   ry   printrn   centerr_   rv   )rm   r~   r   rz   kwargsrP   img	processorrs   inputsr   tokensr   
batch_size
seq_lengthr   r   causal_masktop_bottom_borderside_borderrW   s                        r   r   z0AttentionMaskVisualizer.visualize_attention_mask   s   
;!%<<<wC*X\#d;;;?@@C %5dlUefffIy-00 g'3'1GGIaHbccdef N!/!7!7!M!MYcv^bcccF(2HH)JbIcddefgD#$45N6))+12B+C'((>>vk?RST?UVVFF[#'>>>%5dlCCI''77F&Y~dKKKL\]NNm5<+Bmmmnnn,3)!/!5
J{J
EL<T#U]a]g]mnnnj11(<%)) 
 
 
 ")..000NN+55a88BB1EELLZYZ\fhrssN [t{/E[[$/[[\\_``
 &$&&'''dk6Lt|eiete}  G  G%&&  	
 	
 	
 	
 	"$%%%6&"4;0@$GG!::&677-
 
 
 	h"$%%%%%r   N)r5   )rn   ro   rp   rK   ri   r   r   r   r   r   ra   ra      s}        3    2E Es E E E EJ& J&s J& J& J& J& J& J&r   ra   )r   NNN)r   PILr   masking_utilsr   models.auto.auto_factoryr   models.auto.configuration_autor   models.auto.modeling_autor   r	   models.auto.processing_autor
   r   models.auto.tokenization_autor   r   import_utilsr   rB   torch.nnrg   r-   r*   r,   r+   r.   r_   ra   r   r   r   <module>r      sW           . . . . . . 7 7 7 7 7 7 7 7 7 7 7 7 T T T T T T T T P P P P P P P P R R R R R R R R , , , , , ,  LLL 		 `di i i iXg& g& g& g& g& g& g& g& g& g&r   