
     `imx                     R   d dl Z d dlZd dlmZmZmZmZ d dlZddl	m
Z
 ddlmZmZmZmZ ddlmZmZmZmZ  e            r
d dlZddlmZ  e            r
d dlZdd	lmZ  G d
 de          Z G d de          Z e ed          d           G d de                      ZeZdS )    N)AnyOptionalUnionoverload   )BasicTokenizer)ExplicitEnumadd_end_docstringsis_tf_availableis_torch_available   )ArgumentHandlerChunkPipelineDatasetbuild_pipeline_init_args)/TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES),MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMESc                   :    e Zd ZdZdeeee         f         fdZdS )"TokenClassificationArgumentHandlerz5
    Handles arguments for token classification.
    inputsc                    |                     dd          }|                     d          }|Nt          |t          t          f          r2t	          |          dk    rt          |          }t	          |          }nft          |t
                    r|g}d}nKt          t          |t                    st          |t          j                  r||d |fS t          d          |                     d          }|rUt          |t                    rt          |d         t                    r|g}t	          |          |k    rt          d          ||||fS )	Nis_split_into_wordsF	delimiterr   r   zAt least one input is required.offset_mappingz;offset_mapping should have the same batch size as the input)
get
isinstancelisttuplelenstrr   typesGeneratorType
ValueError)selfr   kwargsr   r   
batch_sizer   s          /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/pipelines/token_classification.py__call__z+TokenClassificationArgumentHandler.__call__    sN   $jj)>FFJJ{++	*VdE]"C"CFVW&\\FVJJ$$ 	@XFJJ Z%@%@ JvW\WjDkDk .i??>???$455 	`.$// 2J~a?PRW4X4X 2"0!1>""j00 !^___*NIEE    N)__name__
__module____qualname____doc__r   r    r   r(    r)   r'   r   r      sM         FuS$s)^4 F F F F F Fr)   r   c                   &    e Zd ZdZdZdZdZdZdZdS )AggregationStrategyzDAll the valid aggregation strategies for TokenClassificationPipelinenonesimplefirstaveragemaxN)	r*   r+   r,   r-   NONESIMPLEFIRSTAVERAGEMAXr.   r)   r'   r0   r0   8   s-        NNDFEG
CCCr)   r0   T)has_tokenizera
  
        ignore_labels (`list[str]`, defaults to `["O"]`):
            A list of labels to ignore.
        grouped_entities (`bool`, *optional*, defaults to `False`):
            DEPRECATED, use `aggregation_strategy` instead. Whether or not to group the tokens corresponding to the
            same entity together in the predictions or not.
        stride (`int`, *optional*):
            If stride is provided, the pipeline is applied on all the text. The text is split into chunks of size
            model_max_length. Works only with fast tokenizers and `aggregation_strategy` different from `NONE`. The
            value of this argument defines the number of overlapping tokens between chunks. In other words, the model
            will shift forward by `tokenizer.model_max_length - stride` tokens each step.
        aggregation_strategy (`str`, *optional*, defaults to `"none"`):
            The strategy to fuse (or not) tokens based on the model prediction.

                - "none" : Will simply not do any aggregation and simply return raw results from the model
                - "simple" : Will attempt to group entities following the default schema. (A, B-TAG), (B, I-TAG), (C,
                  I-TAG), (D, B-TAG2) (E, B-TAG2) will end up being [{"word": ABC, "entity": "TAG"}, {"word": "D",
                  "entity": "TAG2"}, {"word": "E", "entity": "TAG2"}] Notice that two consecutive B tags will end up as
                  different entities. On word based languages, we might end up splitting words undesirably : Imagine
                  Microsoft being tagged as [{"word": "Micro", "entity": "ENTERPRISE"}, {"word": "soft", "entity":
                  "NAME"}]. Look for FIRST, MAX, AVERAGE for ways to mitigate that and disambiguate words (on languages
                  that support that meaning, which is basically tokens separated by a space). These mitigations will
                  only work on real words, "New york" might still be tagged with two different entities.
                - "first" : (works only on word based models) Will use the `SIMPLE` strategy except that words, cannot
                  end up with different tags. Words will simply use the tag of the first token of the word when there
                  is ambiguity.
                - "average" : (works only on word based models) Will use the `SIMPLE` strategy except that words,
                  cannot end up with different tags. scores will be averaged first across tokens, and then the maximum
                  label is applied.
                - "max" : (works only on word based models) Will use the `SIMPLE` strategy except that words, cannot
                  end up with different tags. Word entity will simply be the token with the maximum score.c                       e Zd ZdZdZdZdZdZdZ e	            f fd	Z
	 	 	 	 	 	 	 	 d)dee         dee         d	ee         d
eeeeef                           dedee         dee         fdZedededeeeef                  fd            Zedee         dedeeeeef                           fd            Zdeeee         f         dedeeeeef                  eeeeef                           f         f fdZd*dZd Zej        dfdZd Z	 	 d+dedej        dej        d
eeeeef                           dej        d	edeeee                           deeeeef                           dee         fdZd ee         d	edee         fd!Z d"ee         d	edefd#Z!d"ee         d	edee         fd$Z"d"ee         defd%Z#d&edeeef         fd'Z$d"ee         dee         fd(Z% xZ&S ),TokenClassificationPipelineuv	  
    Named Entity Recognition pipeline using any `ModelForTokenClassification`. See the [named entity recognition
    examples](../task_summary#named-entity-recognition) for more information.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> token_classifier = pipeline(model="Jean-Baptiste/camembert-ner", aggregation_strategy="simple")
    >>> sentence = "Je m'appelle jean-baptiste et je vis à montréal"
    >>> tokens = token_classifier(sentence)
    >>> tokens
    [{'entity_group': 'PER', 'score': 0.9931, 'word': 'jean-baptiste', 'start': 12, 'end': 26}, {'entity_group': 'LOC', 'score': 0.998, 'word': 'montréal', 'start': 38, 'end': 47}]

    >>> token = tokens[0]
    >>> # Start and end provide an easy way to highlight words in the original text.
    >>> sentence[token["start"] : token["end"]]
    ' jean-baptiste'

    >>> # Some models use the same idea to do part of speech.
    >>> syntaxer = pipeline(model="vblagoje/bert-english-uncased-finetuned-pos", aggregation_strategy="simple")
    >>> syntaxer("My name is Sarah and I live in London")
    [{'entity_group': 'PRON', 'score': 0.999, 'word': 'my', 'start': 0, 'end': 2}, {'entity_group': 'NOUN', 'score': 0.997, 'word': 'name', 'start': 3, 'end': 7}, {'entity_group': 'AUX', 'score': 0.994, 'word': 'is', 'start': 8, 'end': 10}, {'entity_group': 'PROPN', 'score': 0.999, 'word': 'sarah', 'start': 11, 'end': 16}, {'entity_group': 'CCONJ', 'score': 0.999, 'word': 'and', 'start': 17, 'end': 20}, {'entity_group': 'PRON', 'score': 0.999, 'word': 'i', 'start': 21, 'end': 22}, {'entity_group': 'VERB', 'score': 0.998, 'word': 'live', 'start': 23, 'end': 27}, {'entity_group': 'ADP', 'score': 0.999, 'word': 'in', 'start': 28, 'end': 30}, {'entity_group': 'PROPN', 'score': 0.999, 'word': 'london', 'start': 31, 'end': 37}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This token recognition pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"ner"` (for predicting the classes of tokens in a sequence: person, organisation, location or miscellaneous).

    The models that this pipeline can use are models that have been fine-tuned on a token classification task. See the
    up-to-date list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=token-classification).
    	sequencesFTc                      t                      j        di | |                     | j        dk    rt          nt
                     t          d          | _        || _        d S )NtfF)do_lower_caser.   )	super__init__check_model_type	frameworkr   r   r   _basic_tokenizer_args_parser)r$   args_parserr%   	__class__s      r'   rC   z$TokenClassificationPipeline.__init__   ss    ""6"""~%% <;=	
 	
 	
 !/U C C C'r)   Ngrouped_entitiesignore_subwordsaggregation_strategyr   r   strider   c	                    i }	||	d<   |r	|dn||	d<   |||	d<   i }
||b|r|rt           j        }n|r|st           j        }nt           j        }|t	          j        d| d           |t	          j        d| d           |yt          |t                    rt           |                                         }|t           j        t           j	        t           j
        hv r| j        j        st          d          ||
d	<   |||
d
<   |i|| j        j        k    rt          d          |t           j        k    rt          d| d          | j        j        rdd|d}||	d<   nt          d          |	i |
fS )Nr    r   r   zl`grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy="z"` instead.zk`ignore_subwords` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy="z{Slow tokenizers cannot handle subwords. Please set the `aggregation_strategy` option to `"simple"` or use a fast tokenizer.rL   ignore_labelszl`stride` must be less than `tokenizer.model_max_length` (or even lower if the tokenizer adds special tokens)zI`stride` was provided to process all the text but `aggregation_strategy="z&"`, please select another one instead.T)return_overflowing_tokenspaddingrM   tokenizer_paramszm`stride` was provided to process all the text but you're using a slow tokenizer. Please use a fast tokenizer.)r0   r8   r7   r6   warningswarnr   r    upperr:   r9   	tokenizeris_fastr#   model_max_length)r$   rP   rJ   rK   rL   r   r   rM   r   preprocess_paramspostprocess_paramsrS   s               r'   _sanitize_parametersz0TokenClassificationPipeline._sanitize_parameters   sn    3F/0 	U4=4ESS9k*%2@./'?+F @O @':'@$$! @/ @':'A$$':'?$+Q/CQ Q Q   *Q/CQ Q Q  
  +.44 Y':;O;U;U;W;W'X$$'-/B/FH[Hcde e.e !>   :N56$2?/888  C   $':'??? U,U U U  
 >) 59#'"(( ($
 =M%&899$8   !"&888r)   r   r%   returnc                     d S Nr.   r$   r   r%   s      r'   r(   z$TokenClassificationPipeline.__call__   s    LOCr)   c                     d S r_   r.   r`   s      r'   r(   z$TokenClassificationPipeline.__call__   s    X[X[r)   c                      | j         |fi |\  }}}}||d<   ||d<   |r4t          d |D                       s t                      j        |gfi |S |r||d<    t                      j        |fi |S )a  
        Classify each token of the text(s) given as inputs.

        Args:
            inputs (`str` or `List[str]`):
                One or several texts (or one list of texts) for token classification. Can be pre-tokenized when
                `is_split_into_words=True`.

        Return:
            A list or a list of list of `dict`: Each result comes as a list of dictionaries (one for each token in the
            corresponding input, or each entity if this pipeline was instantiated with an aggregation_strategy) with
            the following keys:

            - **word** (`str`) -- The token/word classified. This is obtained by decoding the selected tokens. If you
              want to have the exact string in the original sentence, use `start` and `end`.
            - **score** (`float`) -- The corresponding probability for `entity`.
            - **entity** (`str`) -- The entity predicted for that token/word (it is named *entity_group* when
              *aggregation_strategy* is not `"none"`.
            - **index** (`int`, only present when `aggregation_strategy="none"`) -- The index of the corresponding
              token in the sentence.
            - **start** (`int`, *optional*) -- The index of the start of the corresponding entity in the sentence. Only
              exists if the offsets are available within the tokenizer
            - **end** (`int`, *optional*) -- The index of the end of the corresponding entity in the sentence. Only
              exists if the offsets are available within the tokenizer
        r   r   c              3   @   K   | ]}t          |t                    V  d S r_   )r   r   ).0inputs     r'   	<genexpr>z7TokenClassificationPipeline.__call__.<locals>.<genexpr>  s,      *W*Wu:eT+B+B*W*W*W*W*W*Wr)   r   )rG   allrB   r(   )r$   r   r%   _inputsr   r   r   rI   s          r'   r(   z$TokenClassificationPipeline.__call__   s    : CT$BSTZBeBe^dBeBe?$ni(;$%'{ 	8s*W*WPV*W*W*W'W'W 	8#577#VH77777 	6'5F#$uww11&111r)   c              +   (  K   |                     di           }| j        j        o| j        j        dk    }d }|d         }|r|d         }t          |t                    st          d          |}	|                    |	          }g }t          |          }
d}|	D ]>}|                    ||t          |          z   f           |t          |          |
z   z  }?|	}d|d<   n&t          |t                    st          d          |} | j        |f| j
        |d| j        j        d|}|r| j        j        st          d	          |                     d
d            t          |d                   }t          |          D ]| j
        dk    r!fd|                                D             }n fd|                                D             }|||d<   dk    r|nd |d<   |dz
  k    |d<   ||                              |d<   ||d<   |V  d S )NrS   r   r   r   zEWhen `is_split_into_words=True`, `sentence` must be a list of tokens.TzKWhen `is_split_into_words=False`, `sentence` must be an untokenized string.)return_tensors
truncationreturn_special_tokens_maskreturn_offsets_mappingz@is_split_into_words=True is only supported with fast tokenizers.overflow_to_sample_mapping	input_idsr@   c                 N    i | ]!\  }}|t          j        |         d           "S r   )r@   expand_dimsrd   kvis      r'   
<dictcomp>z:TokenClassificationPipeline.preprocess.<locals>.<dictcomp>F  s/    VVVtq!2>!A$#:#:VVVr)   c                 N    i | ]!\  }}||                              d           "S rq   )	unsqueezers   s      r'   rw   z:TokenClassificationPipeline.preprocess.<locals>.<dictcomp>H  s/    PPPA1Q4>>!#4#4PPPr)   r   sentencer   is_lastword_idsword_to_chars_map)poprW   rY   r   r   r#   joinr   appendr    rE   rX   rangeitemsr|   )r$   rz   r   rZ   rS   rk   r}   r   r   wordsdelimiter_lenchar_offsetwordtext_to_tokenizer   
num_chunksmodel_inputsrv   s                    @r'   
preprocessz&TokenClassificationPipeline.preprocess  s     ,001CRHH^4\9X[\9\
 /0EF 	()+6Ih-- j !hiiiE ~~e,,H "	NNMK 9 9!((+{SYY7N)OPPPs4yy=88  %6:233h,, p !nooo'
>!'+#'>#9
 
 
 
  	at~'= 	a_```

/666,--
z"" 	 	A~%%VVVVv||~~VVVPPPPPPP)1?-.3466xxtL$&':>&9L# ,+1??1+=+=Z(4E01	 	r)   c                    |                     d          }|                     dd           }|                     d          }|                     d          }|                     dd           }|                     dd           }| j        dk    r | j        di |d         }n2 | j        di |}	t          |	t                    r|	d	         n|	d         }|||||||d
|S )Nspecial_tokens_maskr   rz   r{   r|   r}   r@   r   logits)r   r   r   rz   r{   r|   r}   r.   )r~   rE   modelr   dict)
r$   r   r   r   rz   r{   r|   r}   r   outputs
             r'   _forwardz$TokenClassificationPipeline._forwardT  s   *../DEE%))*:DAA##J//""9--##J55(,,-@$GG>T!!TZ//,//2FFTZ//,//F)3FD)A)APVH%%vayF #6,  !2	
 	
 	
 		
r)   c                 R   dgg }|d                              d          }|D ]}| j        dk    ri|d         d         j        t          j        t          j        fv r>|d         d                             t          j                                                  }n |d         d                                         }|d         d         }|d         d         }	|d         |d         d         nd }
|d	         d                                         }|                     d
          }t          j
        |dd          }t          j        ||z
            }||                    dd          z  }| j        dk    r,|	                                }	|
|
                                nd }
|                     ||	||
||||          }|                     ||          }fd|D             }|                    |           t!          |          }|dk    r|                     |          }|S )NOr   r}   ptr   rz   ro   r   r   r|   T)axiskeepdimsr@   )r|   r}   c                 t    g | ]4}|                     d d          vr|                     dd          v2|5S )entityNentity_group)r   )rd   r   rP   s     r'   
<listcomp>z;TokenClassificationPipeline.postprocess.<locals>.<listcomp>  sX       ::h--]BBJJ~t44MII  JIIr)   r   )r   rE   dtypetorchbfloat16float16tofloat32numpynpr5   expsumgather_pre_entities	aggregateextendr   aggregate_overlapping_entities)r$   all_outputsrL   rP   all_entitiesr}   model_outputsr   rz   ro   r   r   r|   maxesshifted_expscorespre_entitiesrJ   entitiesr   s      `                r'   postprocessz'TokenClassificationPipeline.postprocessn  s]     EM (N../BCC( (	* (	*M~%%-*A!*D*Ju~_d_lNm*m*m&x0366u}EEKKMM&x0399;;"1~j1H%k215I6CDT6U6a./22gk  #00E"Fq"I"O"O"Q"Q$((44HF6T:::E&%00K ;??T?#J#JJF~%%%OO--	;I;U!5!5!7!7!7[_33#$!"3 4 	 	L  $~~l<PQQ   .  H ))))%%
>>>>|LLLr)   c                    t          |          dk    r|S t          |d           }g }|d         }|D ]~}|d         |d         cxk    r|d         k     rFn nC|d         |d         z
  }|d         |d         z
  }||k    s||k    r|d         |d         k    r|}g|                    |           |}|                    |           |S )Nr   c                     | d         S )Nstartr.   )xs    r'   <lambda>zLTokenClassificationPipeline.aggregate_overlapping_entities.<locals>.<lambda>  s
    !G* r)   keyr   endscore)r   sortedr   )r$   r   aggregated_entitiesprevious_entityr   current_lengthprevious_lengths          r'   r   z:TokenClassificationPipeline.aggregate_overlapping_entities  s   x==AO((<(<=== "1+ 	) 	)Fw'6'?SSSS_U=SSSSSS!'!@"1%"8?7;S"S"_44%88w/'*BBB&,O#**?;;;"(""?333""r)   rz   ro   r   r   r|   r}   c	                    g }	t          |          D ]\  }
}||
         r| j                            t          ||
                             }|V||
         \  }}|!|||
         }|||         \  }}||z  }||z  }t	          |t                    s3| j        dk    r(|                                }|                                }|||         }t          | j        dd          rAt          | j        j        j	        dd          r!t          |          t          |          k    }nW|t          j        t          j        t          j        hv rt          j        dt"                     |dk    od||dz
  |dz            v}t          ||
                   | j        j        k    r|}d	}nd}d}d	}|||||
|d
}|	                    |           |	S )zTFuse various numpy arrays into dicts with all the information needed for aggregationNr   
_tokenizercontinuing_subword_prefixz?Tokenizer does not support real words, using fallback heuristicr   rO   r   F)r   r   r   r   index
is_subword)	enumeraterW   convert_ids_to_tokensintr   rE   itemgetattrr   r   r   r0   r8   r9   r:   rT   rU   UserWarningunk_token_idr   )r$   rz   ro   r   r   r   rL   r|   r}   r   idxtoken_scoresr   	start_indend_ind
word_index
start_char_word_refr   
pre_entitys                        r'   r   z/TokenClassificationPipeline.gather_pre_entities  s,    !*6!2!2 9	, 9	,C"3' >77IcN8K8KLLD)%3C%8"	7 ',=,I!)#J!-(9*(E
A!Z/	:-!)S11 1~--$-NN$4$4	"),,..#Ig$564><>> f7N-35PRVD D f
 "%Tc(mm!;JJ ,+1+3+/0  
 !]'   "+Q!e3hyST}W`cdWdGd>e3eJy~&&$.*EEE#D!&J 	"
 &"( J 
++++r)   r   c                    |t           j        t           j        hv r{g }|D ]u}|d                                         }|d         |         }| j        j        j        |         ||d         |d         |d         |d         d}|                    |           vn|                     ||          }|t           j        k    r|S | 	                    |          S )Nr   r   r   r   r   )r   r   r   r   r   r   )
r0   r6   r7   argmaxr   configid2labelr   aggregate_wordsgroup_entities)r$   r   rL   r   r   
entity_idxr   r   s           r'   r   z%TokenClassificationPipeline.aggregate  s    $7$<>Q>X#YYYH* ( (
'188::
"8,Z8"j/8D"'0&v.'0%e,  ''''( ++L:NOOH#6#;;;O""8,,,r)   r   c                    | j                             d |D                       }|t          j        k    rB|d         d         }|                                }||         }| j        j        j        |         }n|t          j        k    rNt          |d           }|d         }|                                }||         }| j        j        j        |         }n|t          j
        k    rht          j        d |D                       }t          j        |d          }	|	                                }
| j        j        j        |
         }|	|
         }nt          d          ||||d         d	         |d
         d         d}|S )Nc                     g | ]
}|d          S r   r.   rd   r   s     r'   r   z>TokenClassificationPipeline.aggregate_word.<locals>.<listcomp>  s    7^7^7^6v7^7^7^r)   r   r   c                 6    | d                                          S )Nr   )r5   )r   s    r'   r   z<TokenClassificationPipeline.aggregate_word.<locals>.<lambda>!  s    &:J:N:N:P:P r)   r   c                     g | ]
}|d          S )r   r.   r   s     r'   r   z>TokenClassificationPipeline.aggregate_word.<locals>.<listcomp>'  s    GGGFvh/GGGr)   )r   zInvalid aggregation_strategyr   r   r   )r   r   r   r   r   )rW   convert_tokens_to_stringr0   r8   r   r   r   r   r:   r5   r9   r   stacknanmeanr#   )r$   r   rL   r   r   r   r   r   
max_entityaverage_scoresr   
new_entitys               r'   aggregate_wordz*TokenClassificationPipeline.aggregate_word  s{   ~667^7^U]7^7^7^__#6#<<<a[*F--//C3KEZ&/4FF!%8%<<<X+P+PQQQJ)F--//C3KEZ&/4FF!%8%@@@XGGhGGGHHFZQ777N'..00JZ&/
;F":.EE;<<<a[)B<&
 

 r)   c                 `   |t           j        t           j        hv rt          d          g }d}|D ]R}||g}|d         r|                    |           &|                    |                     ||                     |g}S|)|                    |                     ||                     |S )z
        Override tokens from a given word that disagree to force agreement on word boundaries.

        Example: micro|soft| com|pany| B-ENT I-NAME I-ENT I-ENT will be rewritten with first strategy as microsoft|
        company| B-ENT I-ENT
        z;NONE and SIMPLE strategies are invalid for word aggregationNr   )r0   r6   r7   r#   r   r   )r$   r   rL   word_entities
word_groupr   s         r'   r   z+TokenClassificationPipeline.aggregate_words7  s      $&$
 
 
 Z[[[
 	& 	&F!$X

% &!!&))))$$T%8%8EY%Z%Z[[[$X

!  !4!4ZAU!V!VWWWr)   c                 >   |d         d                              dd          d         }t          j        d |D                       }d |D             }|t          j        |          | j                            |          |d         d         |d         d	         d
}|S )z
        Group together the adjacent tokens with the same entity predicted.

        Args:
            entities (`dict`): The entities predicted by the pipeline.
        r   r   -r   r   c                     g | ]
}|d          S )r   r.   r   s     r'   r   zBTokenClassificationPipeline.group_sub_entities.<locals>.<listcomp>\  s    DDDVG_DDDr)   c                     g | ]
}|d          S r   r.   r   s     r'   r   zBTokenClassificationPipeline.group_sub_entities.<locals>.<listcomp>]  s    888V&.888r)   r   r   )r   r   r   r   r   )splitr   r   meanrW   r   )r$   r   r   r   tokensr   s         r'   group_sub_entitiesz.TokenClassificationPipeline.group_sub_entitiesS  s     !X&,,S!44R8DD8DDDEE88x888 #WV__N;;FCCa[)B<&
 
 r)   entity_namec                     |                     d          rd}|dd          }n&|                     d          rd}|dd          }nd}|}||fS )NzB-Br   zI-I)
startswith)r$   r   bitags       r'   get_tagz#TokenClassificationPipeline.get_tagh  sk    !!$'' 
	Babb/CC##D)) 	Babb/CC BC3wr)   c                    g }g }|D ]}|s|                     |           |                     |d                   \  }}|                     |d         d                   \  }}||k    r|dk    r|                     |           ~|                     |                     |                     |g}|r(|                     |                     |                     |S )z
        Find and group together the adjacent tokens with the same entity predicted.

        Args:
            entities (`dict`): The entities predicted by the pipeline.
        r   r   r   )r   r   r   )	r$   r   entity_groupsentity_group_disaggr   r   r   last_bilast_tags	            r'   r   z*TokenClassificationPipeline.group_entitiesv  s      	/ 	/F& #**6222 ll6(#344GB $-@-DX-N O OGXh299#**62222 $$T%<%<=P%Q%QRRR'-h## 	O  !8!89L!M!MNNNr)   )NNNNNFNNr_   )NN)'r*   r+   r,   r-   default_input_names_load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   rC   r   boolr0   r   r   r   r    r\   r   r   r   r(   r   r   r   r6   r   r   r   ndarrayr   r   r   r   r   r   r   __classcell__)rI   s   @r'   r=   r=   B   s)       F" "H &O!#O#E#E#G#G 
( 
( 
( 
( 
( 
( +/*.>B:>$) $#'N9 N9 #4.N9 "$	N9
 '':;N9 !eCHo!67N9 "N9 N9 C=N9 N9 N9 N9` OsOcOd4S>6JOOO XO[tCy[C[Dd3PS8nAU<V[[[ X[%2CcN+%27:%2	tDcN#T$tCH~*>%??	@%2 %2 %2 %2 %2 %2N9 9 9 9v
 
 
4 =P<Tdh 4 4 4 4l# # #< 37=AG GG :G 
	G
 !eCHo!67G  ZG 2G 4./G $DsCx$9:G 
dG G G GR-d4j -H[ -`dei`j - - - -,tDz I\ ae    <T
 J] bfgkbl    84: $    *3 5c?    #tDz #d4j # # # # # # # #r)   r=   ) r!   rT   typingr   r   r   r   r   r   models.bert.tokenization_bertr   utilsr	   r
   r   r   baser   r   r   r   
tensorflowr@   models.auto.modeling_tf_autor   r   models.auto.modeling_autor   r   r0   r=   NerPipeliner.   r)   r'   <module>r     s     1 1 1 1 1 1 1 1 1 1 1 1     : : : : : :            T S S S S S S S S S S S ? _^^^^^^ YLLLXXXXXXF F F F F F F F:    ,    4000n! !Du u u u u- u uE! !Dup *r)   