
     `iZ                        d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZmZ d	d
lmZmZmZ  e            rddlZ ej        e          ZdZ	 	 	 	 d/de	ee         df         dedee         fdZ e            r)edfdej        j        dedee         dej        j        fdZ	 	 	 	 d/dee         dedee         fdZ G d de          Z  G d de          Z! G d de          Z" G d de"          Z# G d de          Z$ G d de          Z% G d  d!e          Z& G d" d#e          Z' G d$ d%e          Z( G d& d'e          Z) G d( d)e          Z*d*dd*d*d	d*d*d*d*d+	Z+e$e"e#e!e%e&e'e(e)e*d,
Z,d-d-d-d-d-d.d-d-d-d-d,
Z-dS )0zGLUE processors and helpers    N)asdict)Enum)OptionalUnion   )PreTrainedTokenizer)is_tf_availablelogging   )DataProcessorInputExampleInputFeaturesu  This {0} will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyexamplesztf.data.Dataset	tokenizer
max_lengthc                 2   t          j        t                              d          t                     t                      rCt          | t          j        j	                  r$|t          d          t          | |||          S t          | |||||          S )a=  
    Loads a data file into a list of `InputFeatures`

    Args:
        examples: List of `InputExamples` or `tf.data.Dataset` containing the examples.
        tokenizer: Instance of a tokenizer that will tokenize the examples
        max_length: Maximum example length. Defaults to the tokenizer's max_len
        task: GLUE task
        label_list: List of labels. Can be obtained from the processor using the `processor.get_labels()` method
        output_mode: String indicating the output mode. Either `regression` or `classification`

    Returns:
        If the `examples` input is a `tf.data.Dataset`, will return a `tf.data.Dataset` containing the task-specific
        features. If the input is a list of `InputExamples`, will return a list of task-specific `InputFeatures` which
        can be fed to the model.

    functionNzWWhen calling glue_convert_examples_to_features from TF, the task parameter is required.r   task)r   r   
label_listoutput_mode)warningswarnDEPRECATION_WARNINGformatFutureWarningr	   
isinstancetfdataDataset
ValueError%_tf_glue_convert_examples_to_features"_glue_convert_examples_to_features)r   r   r   r   r   r   s         u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/data/processors/glue.py!glue_convert_examples_to_featuresr%   )   s    2 M%,,Z88-HHH lZ"'/BB l<vwww4XyU_fjkkkk-)
*bm       returnc                    t          |                     fd| D             } t          | |||          |dk    rt          j        nt          j        }fd}|j        }t          j        j                            |t          
                    |t          j                  |fd |D             t          j        g           f          S )zb
        Returns:
            A `tf.data.Dataset` containing the task-specific features.

        c                 `    g | ]*}                                         |                    +S  )tfds_mapget_example_from_tensor_dict).0example	processors     r$   
<listcomp>z9_tf_glue_convert_examples_to_features.<locals>.<listcomp>Z   s6    ppp\cI&&y'M'Mg'V'VWWpppr&   r   sts-bc               3      K   D ]H} d t          |                                           D             }|                    d          }||fV  Id S )Nc                     i | ]
\  }}|||S Nr*   )r-   kvs      r$   
<dictcomp>zF_tf_glue_convert_examples_to_features.<locals>.gen.<locals>.<dictcomp>`   s    JJJdaAMQMMMr&   label)r   itemspop)exdr8   featuress      r$   genz2_tf_glue_convert_examples_to_features.<locals>.gen^   sg       ! !JJfRjj&6&6&8&8JJJg%j    ! !r&   c                 :    i | ]}|t          j        d g          S r4   )r   TensorShape)r-   r5   s     r$   r7   z9_tf_glue_convert_examples_to_features.<locals>.<dictcomp>i   s&    ===Aa''===r&   )glue_processorsr%   r   float32int64model_input_namesr   r    from_generatordictfromkeysint32r@   )	r   r   r   r   
label_typer>   input_namesr=   r/   s	          @@r$   r"   r"   N   s     $D)++	ppppgoppp4XyU_fjkkk#'7??RZZ
	! 	! 	! 	! 	!  1w--]];11:>=====r~b?Q?QR
 
 	
r&   c                    ||j         }|yt          |                     }|4|                                }t                              d| d|            -t
          |         t                              d d|            d t          |          D             dt          dt          t          t          d f         ffdfd| D             } |d	 | D             |d
d          g }t          t          |                     D ]<fdD             }	t          di |	d|         i}
|                    |
           =t          | d d                   D ]d\  }t                              d           t                              d|j                    t                              d|                     e|S )NzUsing label list z
 for task zUsing output mode c                     i | ]\  }}||	S r*   r*   )r-   ir8   s      r$   r7   z6_glue_convert_examples_to_features.<locals>.<dictcomp>   s    @@@ha@@@r&   r.   r'   c                     | j         d S dk    r| j                  S dk    rt          | j                   S t                    )Nclassification
regression)r8   floatKeyError)r.   	label_mapr   s    r$   label_from_examplez>_glue_convert_examples_to_features.<locals>.label_from_example   sR    = 4***W]++L(('''{###r&   c                 &    g | ]} |          S r*   r*   )r-   r.   rT   s     r$   r0   z6_glue_convert_examples_to_features.<locals>.<listcomp>   s%    BBBg  ))BBBr&   c                 *    g | ]}|j         |j        fS r*   )text_atext_b)r-   r.   s     r$   r0   z6_glue_convert_examples_to_features.<locals>.<listcomp>   s!    BBBg'.'.	)BBBr&   r   T)r   padding
truncationc                 .    i | ]}||                  S r*   r*   )r-   r5   batch_encodingrM   s     r$   r7   z6_glue_convert_examples_to_features.<locals>.<dictcomp>   s%    BBBa!^A&q)BBBr&   r8      z*** Example ***zguid: z
features: r*   )model_max_lengthrA   
get_labelsloggerinfoglue_output_modes	enumerater   r   intrQ   rangelenr   appendguid)r   r   r   r   r   r   r/   labelsr=   inputsfeaturer.   r\   rM   rT   rS   s        `      @@@@r$   r#   r#   m   s/    /
#D)++	"--//JKKHJHH$HHIII+D1KKKJ[JJDJJKKK@@)J*?*?@@@I$L $U3t;K5L $ $ $ $ $ $ $ CBBBBBBFYBBBBB	  N H3x==!! ! !BBBBB>BBB::&::q	:::    !-- 0 0
7%&&&+W\++,,,.!..////Or&   c                       e Zd ZdZdZdS )
OutputModerO   rP   N)__name__
__module____qualname__rO   rP   r*   r&   r$   rm   rm      s        %NJJJr&   rm   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	MrpcProcessorz/Processor for the MRPC data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S Nr/   super__init__r   r   r   r   r   selfargskwargs	__class__s      r$   rw   zMrpcProcessor.__init__   F    $)&))))00==}MMMMMr&   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S See base class.idx	sentence1utf-8	sentence2r8   r   numpydecodestrry   tensor_dicts     r$   r,   z*MrpcProcessor.get_example_from_tensor_dict       $$&&$**,,33G<<$**,,33G<<G$**,,--	
 
 	
r&   c                    t                               dt          j                            |d                      |                     |                     t          j                            |d                    d          S )r   zLOOKING AT 	train.tsvtrain)r`   ra   ospathjoin_create_examples	_read_tsvry   data_dirs     r$   get_train_examplesz MrpcProcessor.get_train_examples   s`    G"',,x"E"EGGHHH$$T^^BGLL;4W4W%X%XZabbbr&   c                     |                      |                     t          j                            |d                    d          S r   zdev.tsvdevr   r   r   r   r   r   s     r$   get_dev_exampleszMrpcProcessor.get_dev_examples   5    $$T^^BGLL94U4U%V%VX]^^^r&   c                     |                      |                     t          j                            |d                    d          S r   ztest.tsvtestr   r   s     r$   get_test_exampleszMrpcProcessor.get_test_examples   5    $$T^^BGLL:4V4V%W%WY_```r&   c                 
    ddgS r   01r*   ry   s    r$   r_   zMrpcProcessor.get_labels       Szr&   c           	          g }t          |          D ]Y\  }}|dk    r| d| }|d         }|d         }|dk    rdn|d         }	|                    t          ||||	                     Z|S )5Creates examples for the training, dev and test sets.r   -r      r   Nrh   rW   rX   r8   rc   rg   r   
ry   linesset_typer   rM   linerh   rW   rX   r8   s
             r$   r   zMrpcProcessor._create_examples   s     '' 	` 	`GAtAvv$$$$D!WF!WF$..DDDGEOOLd6&X]^^^____r&   rn   ro   rp   __doc__rw   r,   r   r   r   r_   r   __classcell__r|   s   @r$   rr   rr      s        99N N N N N
 
 
c c c
_ _ _a a a        r&   rr   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	MnliProcessorz3Processor for the MultiNLI data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S rt   ru   rx   s      r$   rw   zMnliProcessor.__init__   r}   r&   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S )r   r   premiser   
hypothesisr8   r   r   s     r$   r,   z*MnliProcessor.get_example_from_tensor_dict   s    $$&&	"((**11'::%++--44W==G$**,,--	
 
 	
r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   r   r   s     r$   r   z MnliProcessor.get_train_examples   5    $$T^^BGLL;4W4W%X%XZabbbr&   c                     |                      |                     t          j                            |d                    d          S )r   zdev_matched.tsvdev_matchedr   r   s     r$   r   zMnliProcessor.get_dev_examples   s6    $$T^^BGLLK\4]4]%^%^`mnnnr&   c                     |                      |                     t          j                            |d                    d          S )r   ztest_matched.tsvtest_matchedr   r   s     r$   r   zMnliProcessor.get_test_examples   s6    $$T^^BGLLK]4^4^%_%_aopppr&   c                 
    g dS )r   )contradiction
entailmentneutralr*   r   s    r$   r_   zMnliProcessor.get_labels   s    9999r&   c           	         g }t          |          D ]n\  }}|dk    r| d|d          }|d         }|d         }|                    d          rdn|d         }	|                    t          ||||	                     o|S )	r   r   r      	   r   Nr   )rc   
startswithrg   r   r   s
             r$   r   zMnliProcessor._create_examples   s     '' 	` 	`GAtAvv**a**D!WF!WF$//77EDDT"XEOOLd6&X]^^^____r&   r   r   s   @r$   r   r      s        ==N N N N N
 
 
c c co o oq q q: : :      r&   r   c                   .     e Zd ZdZ fdZd Zd Z xZS )MnliMismatchedProcessorz>Processor for the MultiNLI Mismatched data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S rt   ru   rx   s      r$   rw   z MnliMismatchedProcessor.__init__	  r}   r&   c                     |                      |                     t          j                            |d                    d          S )r   zdev_mismatched.tsvdev_mismatchedr   r   s     r$   r   z(MnliMismatchedProcessor.get_dev_examples  s6    $$T^^BGLLK_4`4`%a%acstttr&   c                     |                      |                     t          j                            |d                    d          S )r   ztest_mismatched.tsvtest_mismatchedr   r   s     r$   r   z)MnliMismatchedProcessor.get_test_examples  s6    $$T^^BGLLK`4a4a%b%bduvvvr&   )rn   ro   rp   r   rw   r   r   r   r   s   @r$   r   r     sk        HHN N N N Nu u uw w w w w w wr&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	ColaProcessorz/Processor for the CoLA data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S rt   ru   rx   s      r$   rw   zColaProcessor.__init__  r}   r&   c           	          t          |d                                         |d                                                             d          dt          |d                                                             S r   r   sentencer   Nr8   r   r   s     r$   r,   z*ColaProcessor.get_example_from_tensor_dict  i    $$&&
#))++227;;G$**,,--	
 
 	
r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   z ColaProcessor.get_train_examples&  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zColaProcessor.get_dev_examples*  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zColaProcessor.get_test_examples.  r   r&   c                 
    ddgS r   r*   r   s    r$   r_   zColaProcessor.get_labels2  r   r&   c           	          |dk    }|r
|dd         }|rdnd}g }t          |          D ]F\  }}| d| }||         }	|rdn|d         }
|                    t          ||	d|
                     G|S )r   r   r   Nr   r   r   r   )ry   r   r   	test_mode
text_indexr   rM   r   rh   rW   r8   s              r$   r   zColaProcessor._create_examples6  s    &	 	!""IE#*QQ
 '' 	^ 	^GAt$$$$D*%F%2DD47EOOLd6$V[\\\]]]]r&   r   r   s   @r$   r   r     s        99N N N N N
 
 
c c c_ _ _a a a        r&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	Sst2Processorz0Processor for the SST-2 data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S rt   ru   rx   s      r$   rw   zSst2Processor.__init__H  r}   r&   c           	          t          |d                                         |d                                                             d          dt          |d                                                             S r   r   r   s     r$   r,   z*Sst2Processor.get_example_from_tensor_dictL  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   z Sst2Processor.get_train_examplesU  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zSst2Processor.get_dev_examplesY  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zSst2Processor.get_test_examples]  r   r&   c                 
    ddgS r   r*   r   s    r$   r_   zSst2Processor.get_labelsa  r   r&   c           	          g }|dk    rdnd}t          |          D ]Q\  }}|dk    r| d| }||         }|dk    rdn|d         }	|                    t          ||d|	                     R|S )r   r   r   r   r   Nr   r   )
ry   r   r   r   r   rM   r   rh   rW   r8   s
             r$   r   zSst2Processor._create_examplese  s    "f,,QQ!
 '' 	^ 	^GAtAvv$$$$D*%F$..DDDGEOOLd6$V[\\\]]]]r&   r   r   s   @r$   r   r   E  s        ::N N N N N
 
 
c c c_ _ _a a a        r&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	StsbProcessorz0Processor for the STS-B data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S rt   ru   rx   s      r$   rw   zStsbProcessor.__init__v  r}   r&   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S r   r   r   s     r$   r,   z*StsbProcessor.get_example_from_tensor_dictz  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   z StsbProcessor.get_train_examples  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zStsbProcessor.get_dev_examples  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zStsbProcessor.get_test_examples  r   r&   c                     dgS )r   Nr*   r   s    r$   r_   zStsbProcessor.get_labels  s	    vr&   c           	          g }t          |          D ]_\  }}|dk    r| d|d          }|d         }|d         }|dk    rdn|d         }	|                    t          ||||	                     `|S )	r   r   r      r   r   Nr   r   r   r   s
             r$   r   zStsbProcessor._create_examples       '' 	` 	`GAtAvv**a**D!WF!WF$..DDDHEOOLd6&X]^^^____r&   r   r   s   @r$   r   r   s  s        ::N N N N N
 
 
c c c_ _ _a a a        r&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	QqpProcessorz.Processor for the QQP data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S rt   ru   rx   s      r$   rw   zQqpProcessor.__init__  r}   r&   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S )r   r   	question1r   	question2r8   r   r   s     r$   r,   z)QqpProcessor.get_example_from_tensor_dict  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zQqpProcessor.get_train_examples  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zQqpProcessor.get_dev_examples  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zQqpProcessor.get_test_examples  r   r&   c                 
    ddgS r   r*   r   s    r$   r_   zQqpProcessor.get_labels  r   r&   c           	      (   |dk    }|rdnd}|rdnd}g }t          |          D ]m\  }}|dk    r| d|d          }		 ||         }
||         }|rdn|d	         }n# t          $ r Y Cw xY w|                    t          |	|
||
                     n|S )r   r   r   r      r   r   r   Nr]   r   )rc   
IndexErrorrg   r   )ry   r   r   r   q1_indexq2_indexr   rM   r   rh   rW   rX   r8   s                r$   r   zQqpProcessor._create_examples  s    &	!(11q!(11q '' 
	` 
	`GAtAvv**a**Dhh )6tAw   OOLd6&X]^^^____s   A
A('A(r   r   s   @r$   r   r     s        88N N N N N
 
 
c c c_ _ _a a a        r&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	QnliProcessorz/Processor for the QNLI data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S rt   ru   rx   s      r$   rw   zQnliProcessor.__init__  r}   r&   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S )r   r   questionr   r   r8   r   r   s     r$   r,   z*QnliProcessor.get_example_from_tensor_dict  s    $$&&
#))++227;;
#))++227;;G$**,,--	
 
 	
r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   z QnliProcessor.get_train_examples  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zQnliProcessor.get_dev_examples  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zQnliProcessor.get_test_examples  r   r&   c                 
    ddgS r   r   not_entailmentr*   r   s    r$   r_   zQnliProcessor.get_labels      .//r&   c           	          g }t          |          D ]_\  }}|dk    r| d|d          }|d         }|d         }|dk    rdn|d         }	|                    t          ||||	                     `|S 	r   r   r   r   r   r   Nr   r   r   r   s
             r$   r   zQnliProcessor._create_examples  r   r&   r   r   s   @r$   r  r    s        99N N N N N
 
 
c c c_ _ _a a a0 0 0      r&   r  c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	RteProcessorz.Processor for the RTE data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S rt   ru   rx   s      r$   rw   zRteProcessor.__init__  r}   r&   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S r   r   r   s     r$   r,   z)RteProcessor.get_example_from_tensor_dict
  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zRteProcessor.get_train_examples  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zRteProcessor.get_dev_examples  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zRteProcessor.get_test_examples  r   r&   c                 
    ddgS r  r*   r   s    r$   r_   zRteProcessor.get_labels  r  r&   c           	          g }t          |          D ]_\  }}|dk    r| d|d          }|d         }|d         }|dk    rdn|d         }	|                    t          ||||	                     `|S r  r   r   s
             r$   r   zRteProcessor._create_examples#  r   r&   r   r   s   @r$   r  r    s        88N N N N N
 
 
c c c_ _ _a a a0 0 0      r&   r  c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	WnliProcessorz/Processor for the WNLI data set (GLUE version).c                      t                      j        |i | t          j        t                              d          t                     d S rt   ru   rx   s      r$   rw   zWnliProcessor.__init__4  r}   r&   c           	      L   t          |d                                         |d                                                             d          |d                                                             d          t          |d                                                             S r   r   r   s     r$   r,   z*WnliProcessor.get_example_from_tensor_dict8  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   z WnliProcessor.get_train_examplesA  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zWnliProcessor.get_dev_examplesE  r   r&   c                     |                      |                     t          j                            |d                    d          S r   r   r   s     r$   r   zWnliProcessor.get_test_examplesI  r   r&   c                 
    ddgS r   r*   r   s    r$   r_   zWnliProcessor.get_labelsM  r   r&   c           	          g }t          |          D ]_\  }}|dk    r| d|d          }|d         }|d         }|dk    rdn|d         }	|                    t          ||||	                     `|S r  r   r   s
             r$   r   zWnliProcessor._create_examplesQ  r   r&   r   r   s   @r$   r  r  1  s        99N N N N N
 
 
c c c_ _ _a a a        r&   r  r   )	colamnlimrpcsst-2r1   qqpqnlirtewnli)
r$  r%  zmnli-mmr&  r'  r1   r(  r)  r*  r+  rO   rP   )NNNN).r   r   r   dataclassesr   enumr   typingr   r   tokenization_utilsr   utilsr	   r
   r   r   r   
tensorflowr   
get_loggerrn   r`   r   listrd   r%   r   r   r    r"   r#   rm   rr   r   r   r   r   r   r   r  r  r  glue_tasks_num_labelsrA   rb   r*   r&   r$   <module>r5     sK    " ! 				              " " " " " " " " 5 5 5 5 5 5 - - - - - - - - = = = = = = = = = = ? 		H	%	%m  !%	   D&(99: "         F ? 

 $(	
 
'/
&
 SM	

 

 
 
 
D !%	4 4< 4"4 4 4 4 4n       
, , , , ,M , , ,^+ + + + +M + + +\w w w w wm w w w , , , , ,M , , ,^+ + + + +M + + +\+ + + + +M + + +\1 1 1 1 1= 1 1 1h+ + + + +M + + +\+ + + + += + + +\+ + + + +M + + +^ 
 
  &      r&   