
     `iK                        U d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZm Z   ej!        e"          Z#er, e            Z$ee%e&e	e%         e	e%         f         f         e'd<   n eg d          Z$e$(                                D ]\  Z)Z*e*Z+ e            sdZ+e+e$e)<    eee$          Z,de%fdZ-	 	 	 	 	 	 	 d!de
e%ej.        f         de	e
e%ej.        f                  de/de	e/         de	e0e%e%f                  de	e
e/e%f                  de	e%         de/fdZ1 ed           G d d                      Z2d dgZ3dS )"zAutoVideoProcessor class.    N)OrderedDict)TYPE_CHECKINGOptionalUnion   )PretrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)CONFIG_NAMEVIDEO_PROCESSOR_NAMEcached_fileis_torchvision_availablelogging)requires)BaseVideoProcessor   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigmodel_type_to_module_name!replace_list_option_in_docstringsVIDEO_PROCESSOR_MAPPING_NAMES))glm4vGlm4vVideoProcessor)instructblipInstructBlipVideoVideoProcessor)instructblipvideor   )internvlInternVLVideoProcessor)llava_next_videoLlavaNextVideoVideoProcessor)llava_onevisionLlavaOnevisionVideoProcessor)perception_lmPerceptionLMVideoProcessor)qwen2_5_omniQwen2VLVideoProcessor)
qwen2_5_vlr'   )qwen2_vlr'   )qwen3_omni_moer'   )qwen3_vlQwen3VLVideoProcessor)qwen3_vl_moer,   )
sam2_videoSam2VideoVideoProcessor)smolvlmSmolVLMVideoProcessor)video_llavaVideoLlavaVideoProcessor)vjepa2VJEPA2VideoProcessor
class_namec                    t                                           D ]S\  }}| |v rJt          |          }t          j        d| d          }	 t          ||           c S # t          $ r Y Ow xY wTt          j        	                                D ]}t          |dd           | k    r|c S t          j        d          }t          ||           rt          ||           S d S )N.ztransformers.models__name__transformers)r   itemsr   	importlibimport_modulegetattrAttributeErrorVIDEO_PROCESSOR_MAPPING_extra_contentvalueshasattr)r6   module_name
extractorsmodule	extractormain_modules         /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/auto/video_processing_auto.pyvideo_processor_class_from_namerJ   Q   s   #@#F#F#H#H  Z##3K@@K,->->->@UVVFvz22222!    $ -;BBDD  	9j$//:== >
 ).99K{J'' 0{J///4s   A
A*)A*Fpretrained_model_name_or_path	cache_dirforce_downloadresume_downloadproxiestokenrevisionlocal_files_onlyc                    |                     dd          }	|	-t          j        dt                     |t	          d          |	}t          | t          |||||||	  	        }
|
t                              d           i S t          |
d          5 }t          j        |          cddd           S # 1 swxY w Y   dS )	a  
    Loads the video processor configuration from a pretrained model video processor configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        resume_download:
            Deprecated and ignored. All downloads are now resumed by default when possible.
            Will be removed in v5 of Transformers.
        proxies (`dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `hf auth login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the video processor configuration from local files.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `Dict`: The configuration of the video processor.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    video_processor_config = get_video_processor_config("llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
    # This model does not have a video processor config so the result will be an empty dict.
    video_processor_config = get_video_processor_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained video processor locally and you can reload its config
    from transformers import AutoVideoProcessor

    video_processor = AutoVideoProcessor.from_pretrained("llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
    video_processor.save_pretrained("video-processor-test")
    video_processor = get_video_processor_config("video-processor-test")
    ```use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`.)rL   rM   rN   rO   rP   rQ   rR   zbCould not locate the video processor configuration file, will try to use the model config instead.zutf-8)encoding)popwarningswarnFutureWarning
ValueErrorr   r   loggerinfoopenjsonload)rK   rL   rM   rN   rO   rP   rQ   rR   kwargsrT   resolved_config_filereaders               rI   get_video_processor_configre   i   s2   J ZZ 0$77N! A	
 	
 	
 uvvv&%%')
 
 
 #p	
 	
 	
 		"W	5	5	5 !y  ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !s   B33B7:B7)visiontorchvision)backendsc                   j    e Zd ZdZd Ze ee          d                         Ze		 dd            Z
dS )AutoVideoProcessora%  
    This is a generic video processor class that will be instantiated as one of the video processor classes of the
    library when created with the [`AutoVideoProcessor.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                      t          d          )NzAutoVideoProcessor is designed to be instantiated using the `AutoVideoProcessor.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs    rI   __init__zAutoVideoProcessor.__init__   s    d
 
 	
    c                    |                     dd          }|Ct          j        dt                     |                    d          t          d          ||d<   |                     dd          }|                     dd          }d|d	<   t          j        |fi |\  }}|                    d
d          }	d}
d|                    di           v r|d         d         }
|	|
|                     dd          }|3|                    dd          }|t          
                                v r|}	d|                    di           v r$|d         d         }|                    dd          }
|	b|
`t          |t                    st          j        |fd|i|}t          |d
d          }	t!          |d          rd|j        v r|j        d         }
|	t%          |	          }	|
du}|	dupt'          |          t(          v }t+          ||||          }|rJ|rH|
}t-          ||fi |}	|                     dd          }|	                                  |	j        |fi |S |	 |	j        |fi |S t'          |          t(          v r<t(          t'          |                   }	|	 |	j        |g|R i |S t          d          t          d| dt2           dt4           dt4           dd                    d t          D                        
          )a[  
        Instantiate one of the video processor classes of the library from a pretrained model vocabulary.

        The video processor class to instantiate is selected based on the `model_type` property of the config object
        (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
        missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained video_processor hosted inside a model repo on
                  huggingface.co.
                - a path to a *directory* containing a video processor file saved using the
                  [`~video_processing_utils.BaseVideoProcessor.save_pretrained`] method, e.g.,
                  `./my_model_directory/`.
                - a path or url to a saved video processor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model video processor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the video processor files and override the cached versions if
                they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or *bool*, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `hf auth login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final video processor object. If `True`, then this
                functions returns a `Tuple(video_processor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not video processor attributes: i.e., the part of
                `kwargs` which has not been used to update `video_processor` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (`dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are video processor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* video processor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        <Tip>

        Passing `token=True` is required when you want to use a private model.

        </Tip>

        Examples:

        ```python
        >>> from transformers import AutoVideoProcessor

        >>> # Download video processor from huggingface.co and cache.
        >>> video_processor = AutoVideoProcessor.from_pretrained("llava-hf/llava-onevision-qwen2-0.5b-ov-hf")

        >>> # If video processor files are in a directory (e.g. video processor was saved using *save_pretrained('./test/saved_model/')*)
        >>> # video_processor = AutoVideoProcessor.from_pretrained("./test/saved_model/")
        ```rT   NrU   rP   rV   configtrust_remote_codeT
_from_autovideo_processor_typerj   auto_mapimage_processor_typeImageProcessorVideoProcessorAutoImageProcessorcode_revisionz_This video processor cannot be instantiated. Please make sure you have `torchvision` installed.z Unrecognized video processor in z2. Should have a `video_processor_type` key in its z of z3, or one of the following `model_type` keys in its z: z, c              3      K   | ]}|V  d S )N ).0cs     rI   	<genexpr>z5AutoVideoProcessor.from_pretrained.<locals>.<genexpr>t  s"      @j@jq@j@j@j@j@j@jro   )rX   rY   rZ   r[   getr\   r   get_video_processor_dictreplacer   rB   
isinstancer   r   from_pretrainedr>   rC   ru   rJ   typer@   r
   r	   register_for_auto_class	from_dictr   r   join)clsrK   inputsrb   rT   rq   rr   config_dict_video_processor_classvideo_processor_auto_mapimage_processor_classvideo_processor_class_inferredimage_processor_auto_maphas_remote_codehas_local_code	class_refs                    rI   r   z"AutoVideoProcessor.from_pretrained   s   R  $4d;;%M E   zz'"". l   -F7OHd++"JJ':DAA#|+DEbmmflmmQ +0F M M#' ;??:r#B#BBB'2:'>?S'T$ !(-E-M$/OO4JD$Q$Q!$01F1N1NO_aq1r1r. 25R5Y5Y5[5[[[,J)#{z2'F'FFF+6z+BCW+X(+C+K+KL\^n+o+o( !(-E-Mf&677 #31 EVZ`  %,F4JD$Q$Q!vz** Q/Cv/V/V+1?;O+P( ,$CDY$Z$Z!2$>.d:ed6llNe>e5<no
 
  	0 	0I$A)Mj$u$unt$u$u!

?D11A!99;;;2(2;II&III".2(2;II&III&\\444$;DLL$I!$0<,<=Zn]cnnngmnnn u   m/L m m1Em mKVm m(3m m7;yy@j@jLi@j@j@j7j7jm m
 
 	
ro   Fc                 @    t                               | ||           dS )a7  
        Register a new video processor for this class.

        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            video_processor_class ([`BaseVideoProcessor`]):
                The video processor to register.
        )exist_okN)r@   register)config_classr   r   s      rI   r   zAutoVideoProcessor.registerw  s'     	 ((7LW_(`````ro   N)F)r9   
__module____qualname____doc__rn   classmethodr   r   r   staticmethodr   r|   ro   rI   rj   rj      s         
 
 
 &&'DEEW
 W
 FE [W
r  a a a \a a aro   rj   r@   )NFNNNNF)4r   r<   r`   osrY   collectionsr   typingr   r   r   configuration_utilsr   dynamic_module_utilsr	   r
   utilsr   r   r   r   r   utils.import_utilsr   video_processing_utilsr   auto_factoryr   configuration_autor   r   r   r   
get_loggerr9   r]   r   strtuple__annotations__r;   
model_typevideo_processorsfast_video_processor_classr@   rJ   PathLikebooldictre   rj   __all__r|   ro   rI   <module>r      s^            				  # # # # # # 1 1 1 1 1 1 1 1 1 1 4 3 3 3 3 3 \ \ \ \ \ \ \ \ f f f f f f f f f f f f f f * * * * * * 8 8 8 8 8 8 * * * * * *            
	H	%	%   \g[f[h[h!;sE(3-RU:V4W/W#Xhhhh$/K	
 	
 	
% %!, %B$G$G$I$I K K J !1 $#%% *%)"0J!*--**+?A^__     4 48 &*(,(,""a! a!#(bk)9#:a!c2;./0a! a! d^	a!
 d38n%a! E$)$%a! sma! a! a! a! a!H 
,---xa xa xa xa xa xa xa .-xav %&:
;ro   