
     `i-                         d Z ddlmZmZ ddlmZ ddlmZmZm	Z	m
Z
 ddlmZmZmZ ddlmZ  G d d	ed
          Z G d ded
          Z ej        e          Z G d de	          ZdgZdS )z!
Processor class for Pix2Struct.
    )OptionalUnion   )BatchFeature)ImagesKwargsProcessingKwargsProcessorMixinUnpack)BatchEncodingPreTokenizedInput	TextInput)loggingc                   h    e Zd ZU ee         ed<   eeeee	e         e	e         f                  ed<   dS )Pix2StructImagesKwargsmax_patchesheader_textN)
__name__
__module____qualname__r   int__annotations__r   r   r   list     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/pix2struct/processing_pix2struct.pyr   r      sL         #%	+<d9otTeOf fghhhhhhr   r   F)totalc            
       <    e Zd ZU eed<   dddddddddd	ddidZd	S )
Pix2StructProcessorKwargsimages_kwargsTFr   )	add_special_tokenspaddingstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverboser   i   )text_kwargsr   N)r   r   r   r   r   	_defaultsr   r   r   r   r       s[         )))) #').*/&+%*"

 

 4
 IIIr   r   c            	            e Zd ZdZddgZdZdZ fdZ	 	 	 	 ddee	e
ee	         ee
         f         d	ee         d
eeef         fdZed             Z xZS )Pix2StructProcessora  
    Constructs a PIX2STRUCT processor which wraps a BERT tokenizer and PIX2STRUCT image processor into a single
    processor.

    [`Pix2StructProcessor`] offers all the functionalities of [`Pix2StructImageProcessor`] and [`T5TokenizerFast`]. See
    the docstring of [`~Pix2StructProcessor.__call__`] and [`~Pix2StructProcessor.decode`] for more information.

    Args:
        image_processor (`Pix2StructImageProcessor`):
            An instance of [`Pix2StructImageProcessor`]. The image processor is a required input.
        tokenizer (Union[`T5TokenizerFast`, `T5Tokenizer`]):
            An instance of ['T5TokenizerFast`] or ['T5Tokenizer`]. The tokenizer is a required input.
    image_processor	tokenizerPix2StructImageProcessor)T5TokenizerT5TokenizerFastc                 Z    d|_         t                                          ||           d S )NF)r&   super__init__)selfr-   r.   	__class__s      r   r4   zPix2StructProcessor.__init__J   s+    */	')44444r   Ntextkwargsreturnc                    ||t          d           | j        t          fd| j        j        i|}|d                             dd          }|>| j        j        s2||nd|d         d<   | j        | _         | j        dd|i|d         }|S | j        j        s | j        |fi |d         }	n0|d         	                    d	|            | j        |fi |d         }	|i| j        j        s]||nd
|d         d<    | j        dd|i|d         }d|v r|                    d          |d<   d|v r|                    d          |d<   nd}||	
                    |           |	S )a  
        This method uses [`Pix2StructImageProcessor.preprocess`] method to prepare image(s) for the model, and
        [`T5TokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.
        Nz*You have to specify either images or text.tokenizer_init_kwargsr)   r    Tr7   r   r   Fattention_maskdecoder_attention_mask	input_idsdecoder_input_idsr   )
ValueError_merge_kwargsr   r.   init_kwargspopr-   is_vqacurrent_processor
setdefaultupdate)
r5   imagesr7   audiovideosr8   output_kwargsr    text_encodingencoding_image_processors
             r   __call__zPix2StructProcessor.__call__N   s     >dlIJJJ**%
 
"&."<
 
 

 +=9==>RTXYY>$"6"=>&8&D""$ -()=> &*^D"*DNUUUm8TUUM  #* 	f';t';F'e'emTcFd'e'e$$ /*55mTJJJ';t';F'e'emTcFd'e'e$D$8$?&8&D""% -()=> +DNUUUm8TUUM=00:G:K:KL\:]:]67m++5B5F5F{5S5S12 M$$++M:::''r   c                 ,    | j         j        }ddg}||z   S )Nr=   r?   )r-   model_input_names)r5   image_processor_input_namesdecoder_idss      r   rP   z%Pix2StructProcessor.model_input_names   s$    &*&:&L#/1DE*[88r   )NNNN)r   r   r   __doc__
attributesimage_processor_classtokenizer_classr4   r   r   r   r   r
   r   r   r   rN   propertyrP   __classcell__)r6   s   @r   r,   r,   7   s          $[1J68O5 5 5 5 5 ^b8( 8( I0$y/4HYCZZ[8( 238( 
}l*	+8( 8( 8( 8(t 9 9 X9 9 9 9 9r   r,   N)rS   typingr   r   feature_extraction_utilsr   processing_utilsr   r   r	   r
   tokenization_utils_baser   r   r   utilsr   r   r   
get_loggerr   loggerr,   __all__r   r   r   <module>ra      sP    # " " " " " " " 4 4 4 4 4 4 V V V V V V V V V V V V R R R R R R R R R R      i i i i i\ i i i i
     0    ( 
	H	%	%U9 U9 U9 U9 U9. U9 U9 U9p !
!r   