
    eThc                      S r SSKJr  SSKrSSKrSSKrSSKrSSKrSSKrSSK	r	SSK
r
SSKJr  SSKJr  SSKJrJrJrJrJrJrJr  SSKrSSKrSSKrSSKJr  SS	KJrJ r   SS
K!J"r"  SSK#J$r$  SSK%J&r&  SSK'J(r(J)r)  SSK*J+r+J,r,J-r-J.r.J/r/  SSK0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8J9r9J:r:J;r;J<r<J=r=J>r>J?r?J@r@JArAJBrBJCrCJDrD  SSKEJFrFJGrG  \@" 5       (       a  SSKHJIrI  SSKJJKrL  \(       a  SSKJMrM  \BR                  " \O5      rPS\R                  ;  a  S\R                  S'   O$\R                  S   S:w  a  \PR                  S5         SSKSrTSSKSJUrV  \R                  " 5       r\\\\R                     \\R                     \\_\R                  4   \\_\R                  4   \R                  \R                  4   r`S ra " S S5      rbS rc " S S5      rd " S  S!5      re " S" S#5      rf " S$ S%5      rg " S& S'5      rh " S( S)\d5      ri " S* S+5      rjS, rkS- rlS. rmSBS/ jrnS0\44SCS1 jjroSDS2 jrpSES3 jrq SDS4 jrrSES5 jrsSES6 jrtSES7 jruS8 rv " S9 S:\TR                  \b\)\95      rx " S; S<\TR                  R                  5      r{ " S= S>\TR                  R                  5      r| " S? S@\TR                  R                  5      r}SFSGSA jjr~g! \W\X4 a6    SSKTrTSSKTJUrV  \" \TR                  5      R                  S:  a  \[" S5      e GNf = f)HzTF general model utils.    )annotationsN)Mapping)Path)TYPE_CHECKINGAnyCallableDictListOptionalUnion)parse   )DataCollatorWithPaddingDefaultDataCollator)get_tf_activation)PretrainedConfig)custom_object_save)GenerationConfigTFGenerationMixin)convert_batch_encoding	expand_1dload_attributes_from_hdf5_groupsave_attributes_to_hdf5_group
shape_list)SAFE_WEIGHTS_INDEX_NAMESAFE_WEIGHTS_NAMETF2_WEIGHTS_INDEX_NAMETF2_WEIGHTS_NAMETF_WEIGHTS_NAMEWEIGHTS_INDEX_NAMEWEIGHTS_NAMEModelOutputPushToHubMixincached_filedownload_urlfind_labelshas_fileis_offline_modeis_remote_urlis_safetensors_availableis_tf_symbolic_tensorloggingrequires_backendsworking_or_temp_dir)convert_file_size_to_intget_checkpoint_shard_files)	safe_open)	save_file)PreTrainedTokenizerBaseTF_USE_LEGACY_KERAS1zTransformers is only compatible with Keras 2, but you have explicitly set `TF_USE_LEGACY_KERAS` to `0`. This may result in unexpected behaviour or errors if Keras 3 objects are passed to Transformers models.)backend   zYour currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.c                    UR                   R                  S::  a  U$ [        [        SUR                   R                  5      5      n[        R
                  " XS9$ )Nr   axis)shaperanklistrangetfreduce_mean)y_truey_predreduction_axess      V/var/www/auris/envauris/lib/python3.13/site-packages/transformers/modeling_tf_utils.py
dummy_lossrE   v   sC    ||AeAv||'8'89:~~f::    c                  &    \ rS rSrSrSSS jjrSrg)TFModelUtilsMixin~   z;
A few utilities for `keras.Model`, to be used as a mixin.
c                z    U(       a%  [        [        S U R                   5       5      5      $ U R                  5       $ )a  
Get the number of (optionally, trainable) parameters in the model.

Args:
    only_trainable (`bool`, *optional*, defaults to `False`):
        Whether or not to return only the number of trainable parameters

Returns:
    `int`: The number of parameters.
c              3  ~   #    U  H3  n[         R                  " UR                  R                  5       5      v   M5     g 7fN)npprodr;   as_list).0ws     rD   	<genexpr>3TFModelUtilsMixin.num_parameters.<locals>.<genexpr>   s)     X?W!277177??#455?Ws   ;=)intsumtrainable_variablescount_params)selfonly_trainables     rD   num_parameters TFModelUtilsMixin.num_parameters   s2     sXt?W?WXXYY$$&&rF    N)F)rY   boolreturnrT   )__name__
__module____qualname____firstlineno____doc__rZ   __static_attributes__r\   rF   rD   rH   rH   ~   s    ' 'rF   rH   c                  ^ ^^ T R                   m[        T SS5      mTc  [        S5      e[        R                  " T5      UU4S j5       nUT l         [        T S5      (       d  [        S5      e[        T R                  S5      (       a  U 4S jnUT l        S	T l        [        [        R                  S
5      (       a$  [        R                  R                  5       " T 5      m T $ )aT  
Decorate a Keras Layer class to support Keras serialization.

This is done by:

1. Adding a `transformers_config` dict to the Keras config dictionary in `get_config` (called by Keras at
   serialization time.
2. Wrapping `__init__` to accept that `transformers_config` dict (passed by Keras at deserialization time) and
   convert it to a config object for the actual layer initializer.
3. Registering the class as a custom object in Keras (if the Tensorflow version supports this), so that it does not
   need to be supplied in `custom_objects` in the call to `keras.models.load_model`.

Args:
    cls (a `keras.layers.Layers subclass`):
        Typically a `TF.MainLayer` class in this project, in general must accept a `config` argument to its
        initializer.

Returns:
    The same class object, with modifications for Keras deserialization.
config_classNz2Must set `config_class` to use @keras_serializablec                  > U(       a  [        US   [        5      (       a  US   OUR                  SS 5      n[        U[        5      (       a  TR	                  U5      nT" X/UQ70 UD6  OI[        U[        5      (       a)  [        U5      S:  a  T" U /UQ70 UD6  OT" X/UQ70 UD6  O[        S5      eX0l        X l        g )Nr   configz?Must pass either `config` (PretrainedConfig) or `config` (dict))	
isinstancer   popdict	from_dictlen
ValueError_config_kwargs)rX   argskwargsrh   rf   initializers       rD   wrapped_init(keras_serializable.<locals>.wrapped_init   s     ZQ9I%J%JaPVPZPZ[ceiPjfd##!++F3F6t6v6 0114y1}D24262D:4:6:^__rF   
get_configz=Only use @keras_serializable on keras.layers.Layer subclasses_is_defaultc                   > [         TU ]  5       nU R                  R                  5       US'   UR	                  U R
                  5        U$ )Nrh   )superrv   ro   to_dictupdaterp   )rX   cfgclss     rD   rv   &keras_serializable.<locals>.get_config   s=    T-/C LL002CMJJt||$JrF   Tregister_keras_serializable)__init__getattrAttributeError	functoolswrapshasattr	TypeErrorrv   _keras_serializablekerasutilsr   )r}   rt   rv   rf   rs   s   `  @@rD   keras_serializabler      s    * ,,K35LQRR__[! ""  CL3%%WXXs~~}--	 $"Cu{{9::kk557<JrF   c                      \ rS rSrSrS rSrg)TFCausalLanguageModelingLoss   z
Loss function suitable for causal language modeling (CLM), that is, the task of guessing the next token.

<Tip>

Any label of -100 will be ignored (along with the corresponding logits) in the loss computation.

</Tip>
c           	        [         R                  R                  S[         R                  R                  R                  S9nU R
                  R                  (       a  [        R                  " [        R                  " US5      S5      n[        R                  " [        R                  " US[        U5      S   45      U5      n[        R                  " [        R                  " US5      U5      nU" X5      $ U" [        R                  R                  U5      U5      n[        R                  " US:g  UR                  S9nXg-  n[        R                   " U5      [        R                   " U5      -  n	[        R                  " U	S5      $ )	NTfrom_logits	reductionr   r7   dtyper   )r   lossesSparseCategoricalCrossentropy	ReductionNONErh   tf_legacy_lossr?   	not_equalreshapeboolean_maskr   nnrelucastr   
reduce_sum
rX   labelslogitsloss_fnactive_lossreduced_logitsunmasked_loss	loss_maskmasked_lossreduced_masked_losss
             rD   hf_compute_loss,TFCausalLanguageModelingLoss.hf_compute_loss   s   ,,<<Y^YeYeYoYoYtYt<u;;%%,,rzz&%'@$GK__RZZZPVEWXYEZ@[-\^ijN__RZZ%>LF622  

6 2F;GGFdN-2E2EF	#/ mmK82==;SSzz-t44rF   r\   Nr_   r`   ra   rb   rc   r   rd   r\   rF   rD   r   r      s    5rF   r   c                      \ rS rSrSrS rSrg)TFQuestionAnsweringLoss   z0
Loss function suitable for question answering.
c                    [         R                  R                  S[         R                  R                  R                  S9nU" US   US   5      nU" US   US   5      nXE-   S-  $ )NTr   start_positionr   end_positionr   g       @r   r   r   r   r   )rX   r   r   r   
start_lossend_losss         rD   r   'TFQuestionAnsweringLoss.hf_compute_loss   sg    ,,<<Y^YeYeYoYoYtYt<uV$45vayA
6.16!9=%,,rF   r\   Nr   r\   rF   rD   r   r      s    -rF   r   c                      \ rS rSrSrS rSrg)TFTokenClassificationLoss   z
Loss function suitable for token classification.

<Tip>

Any label of -100 will be ignored (along with the corresponding logits) in the loss computation.

</Tip>
c           	     F   [         R                  R                  S[         R                  R                  R                  S9n[
        R                  " 5       (       a=  [
        R                  R                  US:H  5      (       a  [
        R                  " S5        U R                  R                  (       a  [
        R                  R                  US:H  5      (       a1  [
        R                  " S5        [
        R                  " US5      S:g  nO[
        R                  " US5      S:g  n[
        R                  " [
        R                  " US[        U5      S   45      U5      n[
        R                  " [
        R                  " US5      U5      nU" X5      $ U" [
        R                  R!                  U5      U5      n[
        R"                  " US:  UR$                  S	9nXg-  n[
        R&                  " U5      [
        R&                  " U5      -  n	[
        R                  " U	S
5      $ )NTr   r   zSUsing `-1` to mask the loss for the token is deprecated. Please use `-100` instead.r   r   r7   r   r   r   )r   r   r   r   r   r?   executing_eagerlymath
reduce_anyprintrh   r   r   r   r   r   r   r   r   r   r   s
             rD   r   )TFTokenClassificationLoss.hf_compute_loss
  s   ,,<<Y^YeYeYoYoYtYt<u!!ww!!&B,//no;;%% ww!!&B,//no jj72= jj74?__RZZZPVEWXYEZ@[-\^ijN__RZZ%>LF622  

6 2F; GGFaK}/B/BC	 $/ mmK82==;SSzz-t44rF   r\   Nr   r\   rF   rD   r   r      s    5rF   r   c                      \ rS rSrSrS rSrg)TFSequenceClassificationLossi)  z5
Loss function suitable for sequence classification.
c                   UR                   R                  S:X  d  UR                   S   S:X  ao  [        R                  R	                  [        R                  R
                  R                  S9nUR                   R                  S:X  a  [        R                  " USS9nO@[        R                  R                  S[        R                  R
                  R                  S9nU" X5      $ )Nr   )r   r   r9   Tr   )
r;   r<   r   r   MeanSquaredErrorr   r   r?   expand_dimsr   rX   r   r   r   s       rD   r   ,TFSequenceClassificationLoss.hf_compute_loss.  s    <<!V\\!_%9ll33ell>T>T>Y>Y3ZG||  A%R8ll@@ ELL,B,B,G,G A G v&&rF   r\   Nr   r\   rF   rD   r   r   )  s    'rF   r   c                      \ rS rSrSrS rSrg)TFMultipleChoiceLossi<  z1Loss function suitable for multiple choice tasks.c                    [         R                  R                  S[         R                  R                  R                  S9nU" X5      $ )NTr   r   r   s       rD   r   $TFMultipleChoiceLoss.hf_compute_loss?  s9    ,,<<Y^YeYeYoYoYtYt<uv&&rF   r\   Nr   r\   rF   rD   r   r   <  s
    ;'rF   r   c                      \ rS rSrSrSrg)TFMaskedLanguageModelingLossiD  z
Loss function suitable for masked language modeling (MLM), that is, the task of guessing the masked tokens.

<Tip>

Any label of -100 will be ignored (along with the corresponding logits) in the loss computation.

</Tip>
r\   N)r_   r`   ra   rb   rc   rd   r\   rF   rD   r   r   D  s    rF   r   c                      \ rS rSrSrS rSrg)TFNextSentencePredictionLossiP  z
Loss function suitable for next sentence prediction (NSP), that is, the task of guessing the next sentence.

<Tip>

Any label of -100 will be ignored (along with the corresponding logits) in the loss computation.

</Tip>
c                h   [         R                  R                  S[         R                  R                  R                  S9nU R
                  R                  (       a  [        R                  " [        R                  " US5      S5      n[        R                  " [        R                  " US5      U5      n[        R                  " [        R                  " US5      U5      nU" Xe5      $ U" [        R                  R                  U5      US9n[        R                  " US:g  UR                  S9nXx-  n	U	$ )NTr   r   r   )r   r7   )rA   rB   r   )r   r   r   r   r   rh   r   r?   r   r   r   r   r   r   r   )
rX   r   r   r   next_sentence_active_lossnext_sentence_reduced_logitsnext_sentence_labelunmasked_ns_lossns_loss_maskmasked_ns_losss
             rD   r   ,TFNextSentencePredictionLoss.hf_compute_loss[  s    ,,<<Y^YeYeYoYoYtYt<u;;%% )+RZZ5NPT(U%+-??2::fg;VXq+r("$//"**VU2KMf"g.MM #"%%**V*<VLwwv~5E5K5KL)8rF   r\   Nr   r\   rF   rD   r   r   P  s    rF   r   c                    0 nSU;   a  US   b  US   OU R                   US'   US   b  US   OU R                  US'   US   b  US   OU R                  US'   SU;   a  US   b  US   O[        U SS5      US'   U$ )z
Process the input booleans of each model.

Args:
    config ([`PretrainedConfig`]):
        The config of the running model.
    **kwargs:
        The boolean parameters

Returns:
    A dictionary with the proper values for each boolean
output_attentionsNoutput_hidden_statesreturn_dict	use_cache)r   r   r   r   )rh   rr   final_booleanss      rD   booleans_processingr   r  s     N f$+12E+F+RF&'X^XpXp 	*+ +11G*H*T%&Z`ZuZu )* >DM=R=^F=$9djdvdvN=!f#)+#6#BF;PVXceiHj 	{# rF   c                   ^ ^ [         R                  " T 5      m[        R                  " T 5      U U4S j5       nTUl        U$ )a  
Decorator that processes the inputs to a Keras layer, passing them to the layer as keyword arguments. This enables
downstream use of the inputs by their variable name, even if they arrive packed as a dictionary in the first input
(common case in Keras).

Args:
    func (`callable`):
        The callable function of the TensorFlow model.


Returns:
    A callable that wraps the original `func` with the behavior described above.
c           	     
  > UR                  5        VVs0 s H"  u  p4U[        T
R                  5      ;  d  M   X4_M$     nnnUR                  5        VVs0 s H  u  p4X5;  d  M  X4_M     nnnUR                  SU05        UR                  [        [	        T	R
                  R                  SS  U5      5      5        SU R                  R                  ;   a  S nOU R                  n[        T	U40 UD6nT	" U 40 UD6$ s  snnf s  snnf )Nkwargs_callr   EncoderDecoder)itemsrk   
parametersr{   zip__code__co_varnames	__class__r_   rh   input_processing)rX   rq   rr   keyvalr   fn_args_and_kwargsrh   unpacked_inputsfuncoriginal_signatures            rD   run_call_with_unpacked_inputs4unpack_inputs.<locals>.run_call_with_unpacked_inputs  s     17qHC#TRdRoRoMpBpxsxq7=||~`~83I_hch~`!!=+">? 	!!$s4==+D+DQR+H$'O"PQ t~~666F[[F*4N;MND,O,, r`s   C9C9C?#C?)inspect	signaturer   r   __signature__)r   r   r   s   ` @rD   unpack_inputsr     sC     !**40__T- -* 3E!/((rF   c           
        [        [        R                  " U 5      R                  5      n[	        UR                  SS5      5      nUR                  SS5        [        UR                  5       5      nUS   nUR                  US5      n0 n[        R                  [        [        [        [        [        [         [        R                  4n	SUS   ;   a2  [        R                   " S["        5        US   R                  S5      US'   S	US   ;   a2  [        R                   " S
["        5        US   R                  S	5      US'   SUS   ;   a9  SU;   a3  [        R                   " S["        5        US   R                  S5      US'   O&SUS   ;   a  SU;   a  US   R                  S5      US'   U(       a  UR                  S0 5      US'   OL[%        US   5      S:  a)  ['        S[        US   R                  5       5       S35      eUR                  S5        UR)                  5        HU  u  p[+        X5      (       d  [        R,                  " U5      (       d  Uc  XU
'   M9  ['        S[/        U5       SU	 SU
 S35      e   [+        U[        [        45      (       a  [1        U5       H  u  p[3        U5      (       a1  UR4                  R7                  S5      S   nX;   a  XU'   M>  XX\   '   MF  [+        X5      (       d  Uc  XX\   '   Ma  ['        S[/        U5       SU	 SX\    S35      e   GOA[+        U[8        5      (       a  SU;   a/  [        R                   " S["        5        UR                  S5      US'   S	U;   a/  [        R                   " S
["        5        UR                  S	5      US'   [        U5      R)                  5        Hc  u  p[+        X5      (       d  Uc  XU
'   M  X;  a$  SU;  a  [:        R=                  SU
 SU S35        MG  ['        S[/        U5       SU	 SU
 S35      e   OA[        R,                  " U5      (       d  Uc  XxU'   O['        S[/        U5       SU	 SU S35      eU HJ  nU[        UR                  5       5      ;  d  M"  US:w  d  M*  UR                  XU   R>                  5      X'   ML     SU;   aM  US   b<  [3        US   5      (       a)  US   R4                  R7                  S5      S   nUS   X'   OUS   US'   US	 SU;   a  US	 0 nUR)                  5        H  u  nn[+        U[        R                  5      (       aH  UR@                  [        RB                  :X  a*  [        RD                  " U[        RF                  5      UU'   Mm  [+        U[        R                  5      (       aB  UR@                  [        RB                  :X  a$  URI                  [        RF                  5      UU'   M  UUU'   M     UnAUbH  UR)                  5        V
Vs0 s H  u  pU
S;   d  M  X_M     nn
nURK                  [M        SSU0UD65        U$ s  snn
f )av  
Process the input of each TensorFlow model including the booleans. In case of a list of symbolic inputs, each input
has to be named accordingly to the parameters name, i.e. `input_ids = keras.Input(shape=(128,), dtype='int32',
name="input_ids")` otherwise the order of the tensors will not be guaranteed during the training.

Args:
    func (`callable`):
        The callable function of the TensorFlow model.
    config ([`PretrainedConfig`]):
        The config of the running model.
    **kwargs:
        The inputs of the model.

Returns:
    Two lists, one for the missing layers, and another one for the unexpected layers.
rr   NrX   r   inputsr   zeThe `inputs` argument is deprecated and will be removed in a future version, use `input_ids` instead.	input_idsdecoder_cached_stateszzThe `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.past_key_valuespastziThe `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.zAThe following keyword arguments are not supported by this model: .zData of type z is not allowed only z is accepted for :rq   zThe parameter z( does not belongs to the parameter list z and will be ignored.)r   r   r   r   rh   r\   )'rk   r   r   r   r]   rj   r=   keysr?   TensorrT   r"   tuplerM   ndarraywarningswarnFutureWarningrm   rn   r   ri   	is_tensortype	enumerater+   namesplitr   loggerwarningdefaultr   int64r   int32astyper{   r   )r   rh   rr   r   
has_kwargsparameter_namesmain_input_name
main_inputoutputallowed_typeskviinputtensor_namer  cast_outputr   r   boolean_dicts                       rD   r   r     s	   " W&&t,778IimmHd34JMM&$9>>+,O%a(OOT2JFYYc;tT2::VM6-((s	

 %]377A{&"77*	

 %+=$9$=$=>U$V !&&+<+O	

 %+=$9$=$=f$E !	f]3	3/8Q.223DEv!::mR8xvm$%).33567q:  	

=!a''2<<??ai1I}T!WI5J=/Yjkljmmnopp	  *udm,,!*-HA$U++ $jj..s3A61*/;'16?-.E11U]-2)* #DK=0Em_ U'*+1.  .$ 
J	(	(z!MM #-..":F;"j0MM.
 )37N(OF$%$**,DA!++qyq	)fO.K$QC'OP_O``uv  =a	9N}o]nopnqqr!stt - <<
##z'9&0?#Z 011F}o V#$A'   tFKKM**tv~!::ddO,C,CDFL   &>%*?v*O*O .--33C8;K"(.F #).F;6N68KLLNSc299%%#))rxx*?!wwsBHH5KRZZ((SYY"((-B"zz"((3K"K # F 
&]] AD& 	 
 	 	
 M
s   Y!Yc                   Ub?  U R                  U5      (       a)  U [        U5      S  n U R                  S5      (       a  U SS  n SU ;  aA  [        U R                  S5      5      S:  a#  SR                  U R                  S5      SS  5      n U $ )N/r   model.)
startswithrm   r  join)r  _prefixs     rD   strip_model_name_and_prefixr&  l  s~    tw77CLN#??38DtDJJsO 4q 8xx

3+,KrF   10GBc           
     x   [        U5      n/ n/ nSnSnU  Hj  nUR                  5       R                  UR                  R                  -  nXX-   U:  a  UR	                  U5        / nSnUR	                  U5        XX-  nXh-  nMl     UR	                  U5        [        U5      S:X  a  X#S   0S4$ 0 n	0 n
[        U5       Hp  u  pUR                  SSUS-   S S[        U5      S S35      nUR                  SSUS-   S S[        U5      S S35      nXU'   U H  nUR                  nXU'   M     Mr     S	U0nUU	S
.nU
U4$ )a  
Splits a model state dictionary in sub-checkpoints so that the final size of each sub-checkpoint does not exceed a
given size.

The sub-checkpoints are determined by iterating through the `state_dict` in the order of its keys, so there is no
optimization made to make each sub-checkpoint as close as possible to the maximum size passed. For example, if the
limit is 10GB and we have weights of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB],
[6+2+2GB] and not [6+2+2GB], [6+2GB], [6GB].

<Tip warning={true}>

If one of the model's weight is bigger that `max_shard_size`, it will end up in its own sub-checkpoint which will
have a size greater than `max_shard_size`.

</Tip>

Args:
    weights (`Dict[str, tf.RessourceVariable]`): The list of tf.RessourceVariable of a model to save.
    max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`):
        The maximum size of each sub-checkpoint. If expressed as a string, needs to be digits followed by a unit
        (like `"5MB"`).
r   r   Nz.h5-05dz-of-.safetensors
total_size)metadata
weight_map)	r/   numpysizer   appendrm   r
  replacer  )weightsmax_shard_sizeweights_namesharded_state_dictscurrent_blockcurrent_block_sizer,  itemweight_sizer.  shardsidxshard
shard_fileweightweight_namer-  indexs                     rD   tf_shard_checkpointrB  v  s   . .n=NMJjjl''$**//9 +n<&&}5M!"T")!
  }- 1$!45t;; JF 34
!))%1S1WSMcJ]F^_bEccf1gh
''aa}D5H1I#0Nl[

 #zF ++K&0{#  5 j)H!<E5=rF   c           	     B   [        5       n[        5       n[        5       n[        5       n0 n	[        U R                  5       H  u  pUR                  nUb5  UR	                  U5      (       a  U[        U5      S nUR                  S5      nSU;   dA  [        UR                  S5      5      S:X  d#  SR                  UR                  S5      SS 5      nUR                  U5        XU'   M     U H[  n[        U U	UUUS9u  pnUR                  U5        UR                  U5        UR                  U5        [        R                  " 5         M]     X-
  nU(       a  [        U5      S:  d  [        U5      S:  a  SU R                  R                   3n[        U5      S:  a/  SR                  U Vs/ s H	  nS	U S	3PM     sn5      nUS
U S3-  n[        U5      S:  a/  SR                  U Vs/ s H	  nS	U S	3PM     sn5      nUS
U S3-  n[!        U5      eUXW4$ s  snf s  snf )a  
This is the same as `load_tf_weights` but for a sharded checkpoint. Detect missing and unexpected layers and load
the TF weights from the shard file accordingly to their names and shapes.

This load is performed efficiently: each checkpoint shard is loaded one by one in RAM and deleted after being
loaded in the model.

Args:
    model (`keras.models.Model`): The model in which to load the checkpoint.
    shard_files (`str` or `os.PathLike`): A list containing the sharded checkpoint names.
    ignore_mismatched_sizes`bool`, *optional`, defaults to `True`):
        Whether or not to ignore the mismatch between the sizes
    strict (`bool`, *optional*, defaults to `True`):
        Whether to strictly enforce that the keys in the model state dict match the keys in the sharded checkpoint.

Returns:
    Three lists, one for the missing layers, another one for the unexpected layers, and a last one for the
    mismatched layers.
Nr!  r"  r   ignore_mismatched_sizesr%  r   #Error(s) in loading state_dict for ,"
Missing key(s): r   )setr
  r3  r  r#  rm   lstripr  r$  addload_tf_shardr{   gccollectr   r_   RuntimeError)modelshard_filesrE  strictr%  unexpected_keys
saved_keysmismatched_keys
model_keysmodel_layer_mapr  r  
layer_namer>  saved_weight_names_setunexpected_keys_setmismatched_keys_setmissing_keyserror_messagestr_missing_keysstr_unexpected_keyss                        rD   load_tf_sharded_weightsra    s#   , eOJeO JO%--(VV
:#8#8#A#A#CLN3J#**3/JJ&#j.>.>s.C*D*I*"2"23"7";<Jz"&'
# ) "
KX$;L
H5H 	012323


 " *L3|$q(C,@1,D=eoo>V>V=WX|q "xx<(H<a1QCq<(HI12B1C1EEM!#"%((o+Noas!Ho+N"O12E1FaHHM=))99 )I ,Os   *H(Hc           
     x   [        5       n0 n[        5       n[        5       n [        R                  " US5       n	[        [        U	S5      5      n
/ nU
 H  nX   n[        R
                  " U5      Xl'   UR                  U5        X;  a  UR                  U5        MH  U R                  X      nXl   nUbS  [        R                  " U5      UR                  :w  a-   [        R                  " U[        R                  " U5      5      nOUnUR                  UW45        M     SSS5        [        R                  " W5        XXU4$ ! [         aF  nU(       a8  UR                  XR                  [        R                  " U5      45         SnAGM5  UeSnAff = f! , (       d  f       N{= f! [         a  n [!        U5       nUR#                  5       R%                  S5      (       a  ['        S5      e[        SU S35      Ue! , (       d  f        SnAg= f! [(        [        4 a    ['        SU S	U S
35      ef = fSnAff = f)a  
Loads a shard from a sharded checkpoint file. Can be either H5 or Safetensors.
Handles missing keys and unexpected keys.

Args:
    model (`keras.models.Model`): Model in which the weights are loaded
    model_layer_map (`Dict`): A dictionary mapping the layer name to the index of the layer in the model.
    resolved_archive_file (`str`): Path to the checkpoint file from which the weights will be loaded
    ignore_mismatched_sizes (`bool`, *optional*, defaults to `False`): Whether to ignore the mismatched keys

Returns:
    `keras.models.Model`: Three lists, one for the layers that were found and successfully restored (from the
    shard file), one for the mismatched layers, and another one for the unexpected layers.
rlayer_namesNversionYou seem to have cloned a repository without having git-lfs installed. Please install git-lfs and run `git lfs install` followed by `git lfs pull` in the folder you cloned.zUnable to locate the file z_ which is necessary to load this pretrained model. Make sure you have saved the model properly.z4Unable to load weights from TF checkpoint file for 'z' at 'z'. If you tried to load a TF model from a sharded checkpoint, you should try converting the model by loading it in pytorch and saving it locally. A conversion script should be released soon.)rJ  h5pyFiler   rM   asarrayrL  r3  K	int_shaper;   r   rn   r1  batch_set_value	Exceptionopenreadr#  OSErrorUnicodeDecodeError)rQ  rX  resolved_archive_filerE  r%  rZ  saved_weightsrV  rT  sharded_checkpoint_filesaved_h5_model_layers_nameweight_value_tuplesrY  h5_layer_objectsymbolic_weightsaved_weight_valuearrayefs                      rD   rM  rM    sA    !UMeOeOBYY,c26M),-LMdfs-t)u&"$ 9
"9"E,.JJ,G)&**:64#''
3&+mmO4O&PO)6)B&)5;;7;M;S;SS	,(*

3Eq{{SbGc(d %7E (../GHA 9 3R 	
-.%GG! $. ,#:$3$7$7)35M5Mq{{[jOk(l%& %-*+G,7 32Z  	+,668&&y11!&  %45J4K LO O  -,, #J/ 	FG\F] ^,- .oo 	s   F# BF+D?F!F# ?
F	8F
FF

FF
F F# #
H9.H9A G99
H	HH#H11H44H9c                   [        5       n/ n[        5       nU Hc  n[        U UUUS9u  pnUR                  [        U	5      5        UR                  U
5        UR                  U5        [        R
                  " 5         Me     [         R                  " U6 nU(       a  [        U5      S:  d  [        U5      S:  a  SU R                  R                   3n[        U5      S:  a/  SR                  U Vs/ s H	  nSU S3PM     sn5      nUSU S3-  n[        U5      S:  a/  SR                  U Vs/ s H	  nSU S3PM     sn5      nUSU S3-  n[        U5      eXU4$ s  snf s  snf )a  
This is the same as `load_tf_weights_from_safetensors` but for a sharded TF-format safetensors checkpoint.
Detect missing and unexpected layers and load the TF weights from the shard file accordingly to their names and
shapes.

This load is performed efficiently: each checkpoint shard is loaded one by one in RAM and deleted after being
loaded in the model.

Args:
    model (`keras.models.Model`): The model in which to load the checkpoint.
    shard_files (`str` or `os.PathLike`): A list containing the sharded checkpoint names.
    ignore_mismatched_sizes`bool`, *optional`, defaults to `True`):
        Whether or not to ignore the mismatch between the sizes
    strict (`bool`, *optional*, defaults to `True`):
        Whether to strictly enforce that the keys in the model state dict match the keys in the sharded checkpoint.

Returns:
    Three lists, one for the missing layers, another one for the unexpected layers, and a last one for the
    mismatched layers.
rD  r   rF  rG  rH  rI  r   )rJ   load_tf_weights_from_safetensorsr1  r{   rN  rO  intersectionrm   r   r_   r$  rP  )rQ  rR  rE  rS  r%  rT  all_missing_keysrV  r>  missing_layersunexpected_layersmismatched_layersr]  r^  r  r_  r`  s                    rD   (load_tf_sharded_weights_from_safetensorsr  W  st   2 eOeO!
?_$;	@
<+< 	N 340101


 " ##%56L3|$q(C,@1,D=eoo>V>V=WX|q "xx<(H<a1QCq<(HI12B1C1EEM!#"%((o+Noas!Ho+N"O12E1FaHHM=))/99 )I ,Os   /E-E!c                V    UR                  S5      (       a  [        nO[        nU" XX#S9$ )aT  
Detect missing and unexpected layers and load the TF weights from the shard file accordingly to their names and
shapes.

Args:
    model (`keras.models.Model`):
        The model to load the weights into.
    resolved_archive_file (`str`):
        The location of the H5 file.
    ignore_mismatched_sizes (`bool`, *optional*, defaults to `False`):
        Whether or not to ignore weights with shapes that don't match between the checkpoint of the model.

Returns:
    Three lists, one for the missing layers, another one for the unexpected layers, and a last one for the
    mismatched layers.
r+  rD  )endswithr~  load_tf_weights_from_h5)rQ  rr  rE  r%  load_functions        rD   load_tf_weightsr    s2    " %%n558/>U rF   c                .   / n[         R                  " US5       n[        [        US5      5      n[	        U R
                   Vs1 s H  owR                  iM     snU-
  5      n[	        X`R
                   Vs1 s H  owR                  iM     sn-
  5      n	[        5       n
[        5       n/ nU R
                   GH  nUR                  U;   d  M  XWR                     nUR                  UR                  -   n0 n[        US5       H^  nSR                  UR                  S5      SS  5      nUb  US-   U-   n[        R                  " UU   5      UU'   U
R                  U5        M`     U GH_  nUbj  [        UR                  S5      5      nSR                  UR                  R                  S5      S U UR                  R                  S5      US-   S  -   5      nO-SR                  UR                  R                  S5      SS  5      nUR                  US 5      nUc0  UR!                  S5      (       a  US S S-   nUR                  US 5      nUR                  U5        Uc  M  ["        R$                  " U5      UR&                  :w  a-   [        R(                  " U["        R$                  " U5      5      nOUnUR-                  UU45        GMb     GM     S S S 5        ["        R.                  " W5        WR1                  [	        WW
-
  5      5        W	R1                  [	        X-
  5      5        XU4$ s  snf s  snf ! [*         aG  nU(       a9  UR-                  UUR&                  ["        R$                  " U5      45         S nAGM  UeS nAff = f! , (       d  f       N= f)	Nrc  rd  weight_namesr!  r   zembeddings:0izweight:0)rg  rh  rJ  r   r=   layersr  trainable_weightsnon_trainable_weightsr$  r  rM   ri  rL  rm   getr  rj  rk  r;   r   rn   r1  rl  extend)rQ  rr  rE  r%  r  rt  ru  layerr  r  rZ  symbolic_weights_namesrv  rw  symbolic_weightsrs  r@  r  rx  	delimitersymbolic_weight_namery  rz  r{  s                           rD   r  r    sn    
(#	.2I%()HI`bo)p%q" u||D|ezz|DGaab !!;WcWc>dWcezzWc>d!de!$!$  \\Ezz77"9**"E#(#:#:U=X=X#X  " $C?Tb#cK88K$5$5c$:12$>?D*&}t3*,**_[5Q*RM$' +..t4 $d (8O*$'c(:$;	/2xx+0066s;JYG-2288=i!moNO0,
 03xx8L8L8R8RSV8WXYXZ8[/\,
 *7):):;OQU)V& *16J6S6STb6c6c/CDS/IJ/V,-:->->?SUY-Z* +../CD *5;;7;M;S;SS	,(*

3Eq{{SbGc(d %7E ,22OU3KL[ (81 " 
/n )* $58NNOPT"8"QRS.???q E ?e@ $. ,#:$5$<$<)=?Q?W?WYZYdYdetYu(v%& %-*+G,Q 
/	.sf   )NL(
N5L-
=NFN$N+L2- N(
N2
N	<9M>	5N<M>	>N	N
Nc           
        [        USS9 n/ nU R                   Vs/ s H  n[        UR                  US9PM     nn[	        UR                  5       5      n[	        [        U5      [        U5      -
  5      n	[	        [        U5      [        U5      -
  5      n
U R                   H  n[        UR                  US9nX;   d  M  UR                  U5      n[        R                  " U5      UR                  :w  a,   [        R                  " U[        R                  " U5      5      n[        R"                  " X5        M     S S S 5        W	W
W4$ s  snf ! [        [        R                  R                  4 aF  nU(       a8  UR!                  XR                  [        R                  " U5      45         S nAGM  UeS nAff = f! , (       d  f       N= f)Nr?   	framework)r%  )r1   r3  r&  r  r=   r  rJ  
get_tensorrj  rk  r;   r?   r   rn   errorsInvalidArgumentErrorr1  	set_value)rQ  rr  rE  r%  safetensors_archiver  rQ   r  loaded_weight_namesr  r  r?  r@  weight_valuer{  s                  rD   r~  r~    sw   	(D	9=PV[VcVcdVcQR3AFFGLVcd"#6#;#;#=>c,/#6I2JJK %8!9C<M!MNmmF5fkk7SK12==kJ;;v&,*<*<<$')zz,F@S'T F1# $ 
:6 ,.???3 e" '		(F(FG $2-44kCUCUWXWbWbciWj5kl$"#G$' 
:	9sR   F9EBF9?5F95+E F9F9$F608F1(F9/F11F66F99
Gc                   [        U 5      u  p#X-
  n[        R                  R                  US5      (       a  [        R                  " U R                  5       [        R                  " SU/SS//5      SS9n[        X!5      n[        R                  " [        R                  " US/5      S5      n[        R                  " U[        R                  " SU/SS//5      SS9nXu4$ [        R                  " U R                  5       [        R                  " SS/5      [        R                  " X/5      5      n[        R                  " [        R                  " US/5      S5      nXu4$ )a6  
This function aims to reduce the embeddings in case new_num_tokens < old_num_tokens or to pad with -1 in case
new_num_tokens > old_num_tokens. A mask is also computed in order to know which weight in the embeddings should be
kept or not. Example:

    - if new_num_tokens=5 and old_num_tokens=4 and old_embeddings=[w1,w2,w3,w4]

        -  mask=[True,True,True,True,False] and current_weights=[w1,w2,w3,w4,-1]
    - if new_num_tokens=4 and old_num_tokens=5 and old_embeddings=[w1,w2,w3,w4,w5]

        - mask=[True,True,True,True] and current_weights=[w1,w2,w3,w4]
r   r   constant_valuesr   TF)
r   r?   r   greaterpadvalueconvert_to_tensorminfillslice)old_embeddingsnew_num_tokensold_num_tokensold_embedding_dim	size_diffcurrent_weightsnum_tokens_to_copymasks           rD   init_copy_embeddingsr  -  s7    )3>(B%N/I 
wwy!$$ &&  "B$8$81i.1a&9Q$Rdf
 !@wwr++-?,CDdKvvdB001i.1a&1IJ\ab    ((  "  !Q(  .!DE

 wwr++^Q,?@$G  rF   c            
        ^  \ rS rSrSrSrSrSrSrSr	Sr
SrSrSr\S>S j5       rS r\S?S	 j5       rS@S
 jrU 4S jrS r\R,                  " \R0                  R2                  5      U 4S j5       r\R,                  " \R0                  R4                  5      U 4S j5       r\R,                  " \R0                  R6                  5      U 4S j5       r\R,                  " \R0                  R8                  5      U 4S j5       r\R,                  " \R0                  R:                  5      U 4S j5       r\R,                  " \R0                  R<                  5      U 4S j5       r\S 5       r \S 5       r!SAS jr"S r#\$RJ                  S 5       r&\SBS j5       r'S r(\SCS j5       r)SDS jr*S r+       SE               SFS jjr,       SGU 4S jjr-U 4S jr.S  r/S! r0S" r1        SH                 SIS# jjr2S$ r3SJS% jr4S& r5SJS' jr6SKS( jr7SLS) jr8S* r9SDS+ jr: S@   SMS, jjr;S@SNS- jjr<S. r=S/ r>S0 r?S1 r@      SOS2 jrAS3 rBS@SPS4 jjrC      SQS5 jrDS6 rE        SR       SSS7 jjrF\SSSSSSS8SS9.                 STS: jj5       rG       SU                 SVS; jjrH\SWS< j5       rIS=rJU =rK$ )XTFPreTrainedModeliU  a}  
Base class for all TF models.

[`TFPreTrainedModel`] takes care of storing the configuration of the models and handles methods for loading,
downloading and saving models as well as a few methods common to all models to:

    - resize the input embeddings,
    - prune heads in the self-attention heads.

Class attributes (overridden by derived classes):

    - **config_class** ([`PretrainedConfig`]) -- A subclass of [`PretrainedConfig`] to use as configuration class
      for this model architecture.
    - **base_model_prefix** (`str`) -- A string indicating the attribute associated to the base model in derived
      classes of the same architecture adding modules on top of the base model.
    - **main_input_name** (`str`) -- The name of the principal input to the model (often `input_ids` for NLP
      models, `pixel_values` for vision models and `input_values` for speech models).
N r   Fc                   0 nU R                   R                  5        H|  u  p#UR                   Vs/ s H	  oDb  UOSPM     nnUR                  S   c  SUS'   [        R                  " XSR
                  S9X'   US:X  d  Mb  [        R                  " X   5      X'   M~     U R                  R                  (       a  S[        R                  " U R                  5      R                  ;   aa  SU;  a[  U R                  S:X  a@  [        R                  " SSU R                  R                  4[        R                  SS9US'   U$ [!        S	5      eU$ s  snf )
z\
Dummy inputs to build the network.

Returns:
    `Dict[str, tf.Tensor]`: The dummy inputs.
r7   r   r   )r;   r   token_type_idsencoder_hidden_statesr   r;   r   r  zModel has cross-attention but we couldn't infer the shape for the encoder hidden states. Please manually override dummy_inputs!)input_signaturer   r;   r?   onesr   
zeros_likerh   add_cross_attentionr   r   callr   r  hidden_sizefloat32NotImplementedError)rX   dummiesr   specdimdummy_shapes         rD   dummy_inputsTFPreTrainedModel.dummy_inputsx  s(    --335ICDHJJOJS/3q8JKOzz!}$!"A77JJGGL&&!}}W\: 6 ;;**/F'J[J[\`\e\eJfJqJq/q&g5'';679ww !T[[%<%<=RZZVm8G34  . Z  % Ps   E	c                    [         R                  " U R                  5         U R                  S S9  S S S 5        g ! , (       d  f       g = f)N)input_shape)r?   
name_scoper  buildrX   s    rD   build_in_name_scope%TFPreTrainedModel.build_in_name_scope  s,    ]]499%JJ4J( &%%s	   :
Ac                    g)z3
:str: Identifies that this is a TensorFlow model.
r?   r\   r  s    rD   r  TFPreTrainedModel.framework  s    
 rF   c                    g rL   r\   rX   r  s     rD   r  TFPreTrainedModel.build  s    rF   c                  > [         TU ]  " U0 UD6  [        U[        5      (       d:  [	        SU R
                  R                   SU R
                  R                   S35      eXl        UR                  U l        U R                  5       (       a  [        R                  " U5      OS U l        U R                  U R                  5        g )NzParameter config in `zt(config)` should be an instance of class `PretrainedConfig`. To create a model from a pretrained model use `model = z(.from_pretrained(PRETRAINED_MODEL_NAME)`)ry   r   ri   r   r   r   r_   rh   name_or_pathcan_generater   from_model_configgeneration_config_set_save_specr  )rX   rh   r   rr   r   s       rD   r   TFPreTrainedModel.__init__  s    &+F+&"233'(?(?'@ A NN3344\^  "//OSO`O`ObOb!1!C!CF!KhlD001rF   c                6    U R                   R                  5       $ rL   )rh   rz   r  s    rD   rv   TFPreTrainedModel.get_config  s    {{""$$rF   c                >   > [        U0 UD6u  p[        TU ]  " U0 UD6$ rL   )r   ry   fitrX   rq   rr   r   s      rD   r  TFPreTrainedModel.fit  s(    -t>v>w{D+F++rF   c                >   > [        U0 UD6u  p[        TU ]  " U0 UD6$ rL   )r   ry   train_on_batchr  s      rD   r   TFPreTrainedModel.train_on_batch  s)    -t>v>w%t6v66rF   c                >   > [        U0 UD6u  p[        TU ]  " U0 UD6$ rL   )r   ry   test_on_batchr  s      rD   r  TFPreTrainedModel.test_on_batch  s)    -t>v>w$d5f55rF   c                >   > [        U0 UD6u  p[        TU ]  " U0 UD6$ rL   )r   ry   predict_on_batchr  s      rD   r  "TFPreTrainedModel.predict_on_batch  s)    -t>v>w'888rF   c                >   > [        U0 UD6u  p[        TU ]  " U0 UD6$ rL   )r   ry   predictr  s      rD   r  TFPreTrainedModel.predict  s(    -t>v>w///rF   c                >   > [        U0 UD6u  p[        TU ]  " U0 UD6$ rL   )r   ry   evaluater  s      rD   r  TFPreTrainedModel.evaluate  s)    -t>v>w000rF   c                    [        U[        5      (       a  U R                  " U40 UD6$ U R                  U R                  R                  " U40 UD65      $ rL   )ri   r   _from_configrf   rl   r}   rh   rr   s      rD   from_configTFPreTrainedModel.from_config  sM    f.//##F5f55 0 0 : :6 LV LMMrF   c                    U " U40 UD6$ )zJ
All context managers that the model should be initialized under go here.
r\   r  s      rD   r  TFPreTrainedModel._from_config  s    
 6$V$$rF   c                >    Ub  U R                  X5      nU$ S/U-  nU$ )a  
Prepare the head mask if needed.

Args:
    head_mask (`tf.Tensor` with shape `[num_heads]` or `[num_hidden_layers x num_heads]`, *optional*):
        The mask indicating if we should keep the heads or not (1.0 for keep, 0.0 for discard).
    num_hidden_layers (`int`):
        The number of hidden layers in the model.

Returns:
    `tf.Tensor` with shape `[num_hidden_layers x batch x num_heads x seq_length x seq_length]` or list with
    `[None]` for each layer.
N)_convert_head_mask_to_5drX   	head_masknum_hidden_layerss      rD   get_head_maskTFPreTrainedModel.get_head_mask  s7      55iSI  !22IrF   c                |   UR                   R                  S:X  a"  USSSS2SS4   n[        R                  " XSS9nO(UR                   R                  S:X  a  USS2SSS2SS4   nUR                   R                  S:X  d   SUR	                  5        35       e[        R
                  " U[        R                  5      nU$ )zD-> [num_hidden_layers x batch x num_heads x seq_length x seq_length]r   Nr   )repeatsr:   r7      zhead_mask.dim != 5, instead )r;   r<   r?   repeatr  r   r  r  s      rD   r  *TFPreTrainedModel._convert_head_mask_to_5d  s    ??1$!$at";<I		)QOI__!!Q&!!T1dD"89I##q(Z,HHY*ZZ(GGIrzz2	rF   c                F    U R                  U5      nU R                  U5      $ )a  
Args:
Method used for serving the model. Does not have a specific signature, but will be specialized as concrete
functions when saving with `save_pretrained`.
    inputs (`Dict[str, tf.Tensor]`):
        The input of the saved model as a dictionary of tensors.
)r  serving_output)rX   r   r  s      rD   servingTFPreTrainedModel.serving  s#     6"""6**rF   c                T   [        [        R                  " U R                  5      R                  5      n0 nSU;   ai  U R
                  R                  R                  S5      (       a  SnOSnS H4  nXA;   d  M
  [        R                  " S/U-  [        R                  US9X$'   M6     SU;   a  / S	Qn[        U R                  S
5      (       a  U R                  R                  nOU R                  n[        US5      (       a  UR                  US'   O[        S5      e[        US5      (       a  UR                   =US'   US'   O1[        US5      (       a  UR"                  =US'   US'   O[        S5      e[        R                  " U[        R$                  SS9US'   SU;   a  [        S5      eU$ )z
This property should return a dict mapping input names to tf.TensorSpec objects, representing the expected
shape and dtype for model inputs. It is used for both serving and for generating dummy inputs.
r   ForMultipleChoice   r7   )r   attention_maskr  decoder_input_idsdecoder_attention_maskNr  pixel_values)NNNNvision_confignum_channelsr   zhCould not infer number of channels from config, please override input_signature to specify input shapes.
image_size
input_sizezgCould not infer input image shape from config, please override input_signature to specify input shapes.input_featuresz4Audio models need a manually defined input_signature)r=   r   r   r  r   r   r_   r  r?   
TensorSpecr  r   rh   r  r  r  r  r  r  )rX   model_inputssig	text_dims
input_namepixel_values_shaper  s          rD   r  !TFPreTrainedModel.input_signature  s    G--dii8CCD,&~~&&//0CDD		
 -&(mmTFY4FWa&bCO \)!9t{{O44 $ 9 9 $}n55(5(B(B"1%)~  }l33@M@X@XX"1%(:1(=55@M@X@XX"1%(:1(=)}  #%--0BBJJUc"dC|+%&\]]
rF   c                   [        U[        5      (       d  U$ U GH  nUR                  S5      (       a!  [        U R                  SS5      (       d  SX'   OUR                  S5      (       a!  [        U R                  SS5      (       d  SX'   OiUS:X  a!  [        U R                  SS5      (       d  SX'   OBUS	:X  a<  [        U R                  SS5      (       a  [        U R                  S
S5      (       d  SX'   [        X   [
        [        45      (       d  M   [        R                  " X   5      X'   GM     U$ ! [        [        R                  R                  4 a     GMD  f = f)zj
Prepare the output of the saved model. Can be overridden if specific serving modifications are required.
hidden_statesr   FN
attentionsr   r   r   cross_attentionsr  )ri   r"   r  r   rh   r  r=   r?   r  rn   r  r  )rX   r  r   s      rD   r   TFPreTrainedModel.serving_output=  s%    &+..MC||O,,WT[[J`bg5h5h"l++GDKKI\^c4d4d"))'$++{TY2Z2Z"**%8%@@WT[[ZoqvEwEw"&+t}55"$"6"6v{"CFK    #BII$B$BC s   D66$EEc                j    S[        U R                  5      ;   a  S[        U R                  5      ;   a  gg)z
Returns whether this model can generate sequences with `.generate()`.

Returns:
    `bool`: Whether this model can generate sequences with `.generate()`.
GenerationMixinFT)strprepare_inputs_for_generationgenerate)r}   s    rD   r  TFPreTrainedModel.can_generateU  s2     C$E$E FFK\`cdgdpdp`qKqrF   c                b    [        X R                  U 5      nXLa  UR                  5       $ [        e)z
Returns the model's input embeddings layer.

Returns:
    `tf.Variable`: The embeddings layer mapping vocabulary to hidden states.
)r   base_model_prefixget_input_embeddingsr  )rX   
main_layers     rD   r(  &TFPreTrainedModel.get_input_embeddingsc  s1     T#9#94@
!2244%%rF   c                   [         R                  R                  U5      (       d  [         R                  " U5        [         R                  R	                  US5      nU R                  U5        X R                  R                  5       S.n[         R                  R	                  US5      n[        US5       n[        R                  " XF5        S S S 5        g ! , (       d  f       g = f)Nz
weights.h5)epochoptimizer_statezextra_data.picklewb)ospathisdirmkdirr$  save_weights	optimizerget_weightsrn  pickledump)rX   checkpoint_dirr,  weights_path
extra_dataextra_data_pathr|  s          rD   _save_checkpoint"TFPreTrainedModel._save_checkpointq  s    ww}}^,,HH^$ ww||NLA,'$9S9S9UV
'',,~7JK/4(AKK
& )((s   4C
C"c	                   [        U S/5        SSKn	Uc  Uc
  [        SS9nO
[        USS9nUc  0 n[	        XR
                  5      (       d  [        S5      e[        [        R                  " U R                  5      R                  5      n
[        U R                  5      nS[        [        R                  " UR                  5      R                  R                  5       5      ;   a  UR                  USUUU
S	9u  pOOUR                    Vs/ s H  nX;  d  M
  US
;  d  M  UPM     nnUR#                  U5      nUR                  USXVS9u  p[        UR                  5       5      nU Vs/ s H  nUU
;   d  M  UU;  d  M  UPM     nnU Vs/ s H  nUU;   d  M  UPM     nn[%        U5      S:X  a  US   OUn[%        U5      S:X  a  US   OUnUc  UnUR'                  UUUUUUUUS9nU$ s  snf s  snf s  snf )a  
Wraps a HuggingFace [`~datasets.Dataset`] as a `tf.data.Dataset` with collation and batching. This method is
designed to create a "ready-to-use" dataset that can be passed directly to Keras methods like `fit()` without
further modification. The method will drop columns from the dataset if they don't match input names for the
model. If you want to specify the column names to return rather than using the names that match this model, we
recommend using `Dataset.to_tf_dataset()` instead.

Args:
    dataset (`Any`):
        A [~`datasets.Dataset`] to be wrapped as a `tf.data.Dataset`.
    batch_size (`int`, *optional*, defaults to 8):
        The size of batches to return.
    shuffle (`bool`, defaults to `True`):
        Whether to return samples from the dataset in random order. Usually `True` for training datasets and
        `False` for validation/test datasets.
    tokenizer ([`PreTrainedTokenizerBase`], *optional*):
        A `PreTrainedTokenizer` that will be used to pad samples to create batches. Has no effect if a specific
        `collate_fn` is passed instead.
    collate_fn (`Callable`, *optional*):
        A function that collates samples from the dataset into a single batch. Defaults to
        `DefaultDataCollator` if no `tokenizer` is supplied or `DataCollatorWithPadding` if a `tokenizer` is
        passed.
    collate_fn_args (`Dict[str, Any]`, *optional*):
        A dict of arguments to pass to the `collate_fn` alongside the list of samples.
    drop_remainder (`bool`, *optional*):
        Whether to drop the final batch, if the batch_size does not evenly divide the dataset length. Defaults
        to the same setting as `shuffle`.
    prefetch (`bool`, defaults to `True`):
        Whether to add prefetching to the end of the `tf.data` pipeline. This is almost always beneficial for
        performance, but can be disabled in edge cases.


Returns:
    `Dataset`: A `tf.data.Dataset` which is ready to pass to the Keras API.
datasetsr   NrM   )return_tensors)	tokenizerr@  z.Dataset argument should be a datasets.Dataset!cols_to_retain)
batch_size
collate_fncollate_fn_argsrB  )	label_idslabel)rC  rD  rE  r   )columns
label_colsrC  shuffledrop_remainderrD  rE  prefetch)r-   r?  r   r   ri   Datasetr   r=   r   r   r  r   r&   r   _get_output_signaturer  featuresremove_columnsrm   to_tf_dataset)rX   datasetrC  rJ  rA  rD  rE  rK  rL  r?  r  model_labelsoutput_signature_featureunwanted_columnsoutput_columnscolfeature_colsrI  
tf_datasets                        rD   prepare_tf_dataset$TFPreTrainedModel.prepare_tf_dataset~  s(   \ 	$- 0E
4yY]^
" O'#3#344LMMG--dii8CCD"4>>2tG$5$5g6S6S$T$_$_$d$d$fgg")"?"?% /+ #@ #a  '// /G. 3:BX3X /   
 ,,-=>G")"?"?DZ #@ # .3356'5i~9LQT\hQh~i%3K^csl7Jc^
K
 +.l*;q*@|Al&)*o&:Z]

!$N** !!)!+ + 	

 ?  jKs0   	G&G&G&%
G+3G+;G+
G0G0c                  > US;   a  [         R                  S5        SnUS:X  a  [        nSU l        OSU l        [	        [
        R                  " [        R                  R                  5      R                  R                  5       5      n	SU	;   a  [        T
U ]$  " S
UUUUUUUS.UD6  g	[        T
U ]$  " S
UUUUUUUS.UD6  g	)z
This is a thin wrapper that sets the model's loss output head as the loss if the user does not specify a loss
function themselves.
)auto_with_warningpassthrougha  No loss specified in compile() - the model's internal loss computation will be used as the loss. Don't panic - this is a common way to train TensorFlow models in Transformers! To disable this behaviour please pass a loss argument, or explicitly pass `loss=None` if you do not want your model to compute a loss. You can also specify `loss='auto'` to get the internal loss without printing this info string.autoTFsteps_per_execution)r4  lossmetricsloss_weightsweighted_metricsrun_eagerlyrb  )r4  rc  rd  re  rf  rg   experimental_steps_per_executionNr\   )r  inforE   _using_dummy_lossr=   r   r   r   Modelcompiler   r  ry   )rX   r4  rc  rd  re  rf  rg  rb  rr   parent_argsr   s             rD   rl  TFPreTrainedModel.compile  s     77KKK D6>D%)D"%*D"7,,U[[-@-@ALLQQST K/GO 	#)!1'$7	 	 GO 	#)!1'1D	 	rF   c                   > [        [        R                  S5      (       a  [        TU ]  " U0 UD6$ [
        R                  " S[        5        U R                  " U0 UD6$ )Ncompute_lossa0  The old compute_loss method is deprecated as it conflicts with the Keras compute_loss method added in TF 2.8. If you want the original HF compute_loss, please call hf_compute_loss() instead. From TF versions >= 2.8, or Transformers versions >= 5, calling compute_loss() will get the Keras method instead.)	r   r   rk  ry   rp  r  r  r  r   r  s      rD   rp  TFPreTrainedModel.compute_loss  sX    5;;//7'888MML  ''888rF   c                    [        [        R                  " U R                  5      R                  5      nU R
                  b  U R
                  $ SU;   a  SSS.$ SU;   a  SSS.$ S	U;   a  SS
S.$ SU;   a  SSS.$ 0 $ )Nstart_positionsstart_logits
end_logits)rs  end_positionssentence_order_labelprediction_logits
sop_logits)r   rw  r   seq_relationship_logits)r   r   	mc_labelsr   	mc_logits)r   r{  )r=   r   r   r  r   _label_to_output_map)rX   	arg_namess     rD    get_label_to_output_name_mapping2TFPreTrainedModel.get_label_to_output_name_mapping+  s    **4995@@A	$$0,,,)+'5UU#y01<XX"i/1JcddI%&[AAIrF   c                   [        [        R                  " U R                  5      R                  5      n[        U R                  5      nU R                  5       nUR                  5        VVs0 s H  u  pVXe_M	     nnnU R                  (       d1  [        [        R                  5      [        S5      :  a  [        U5      n[        R                  R!                  U5      u  pn
[#        U[$        5      (       a  UR'                  5       n[#        U	[$        5      (       a  U	R'                  5       n	U R                  (       a  U	b  [)        U5      S:X  ac  [#        U	[        R*                  5      (       aD  [#        U[        R*                  5      (       a  US   U0n[-        [/        U5      5      nX;  a  XU'   O[#        U	[$        5      (       at  [#        U[        R*                  5      (       a  US   U0nU	R                  5        H:  u  pVXR;   a  XX;  a  XhU'   M  UR1                  US5      U;   d  M-  XX;  d  M4  XhXu   '   M<     U	cN  UR                  5        VVs0 s H  u  pVXS;   d  M  XV_M     n	nnU	(       d  U R                  (       d  [3        S5      e[#        U	[$        5      (       a4  U	R                  5        VVs0 s H  u  pVUR1                  XU5      U_M     n	nn[        R4                  " 5        nU R                  (       a  SU;   a	  U " USSS9nOU " USS	9nU R                  (       a0  U R7                  UR8                  UR8                  XR:                  S
9nOSn[#        U	[$        5      (       a  [)        U	5      S:X  a  [        U	R=                  5       5      S   UR=                  5       ;   a   U[        U	R=                  5       5      S      nO+[        UR=                  5       5      S   S:X  a  US   nOUS   nU	R?                  5       u  pO[#        U	[$        5      (       a,  UR                  5        VVs0 s H  u  pVXY;   d  M  XV_M     nnnO[#        U	[@        5      (       d  [#        U	[         5      (       aS  [        UR=                  5       5      S   S:X  a  URC                  5       SS nOURC                  5       nUS[)        U	5       nO+[        UR=                  5       5      S   S:X  a  US   nOUS   nUc  U R7                  XXR:                  S
9nSSS5        U RD                  RG                  WU RH                  WS9  U RJ                  RM                  U	WU
5        0 nU RN                   HJ  nURQ                  5       n[#        U[$        5      (       a  URS                  U5        M;  UUURT                  '   ML     U$ s  snnf s  snnf s  snnf s  snnf ! , (       d  f       N= f)  
A modification of Keras's default `train_step` that correctly handles matching outputs to labels for our models
and supports directly training on the loss output head. In addition, it ensures input keys are copied to the
labels where appropriate. It will also copy label keys into the input dict when using the dummy loss, to ensure
that they are available to the model during the forward pass.
2.11.0Nr   r   RCould not find label column(s) in input dict and no separate labels were provided!return_lossT)trainingr  r  regularization_lossesrc  )tape)+r=   r   r   r  r   r&   r   r  r   rj  r   r?   __version__r   r   r   unpack_x_y_sample_weightri   rk   copyrm   r  nextiterr  rn   GradientTapecompiled_lossrc  r   r  popitemr  to_tupler4  minimizerV   compiled_metricsupdate_staterd  resultr{   r  )rX   datar~  label_kwargslabel_to_outputr   r   output_to_labelxysample_weightlabel_kwargr  rB   rc  rU  return_metricsmetricr  s                      rD   
train_stepTFPreTrainedModel.train_step:  s    **4995@@A	"4>>2??A4C4I4I4KL4K384KL%%%*?%/*QT?D#kkBB4Hm aAaA !!am< A%*Q		*B*Ba++"1q)A"4#56'%&kNAt$$a++"1q)A !	HC'CL!$#(,,S$79D25/./	 !*
 9*+'')K)hcs7J)AKT33 !uvvaDEGGINI$$S.3IAN __$%%-9*Da$DAa$/%%))&++v{{Mitit)u !T""s1v{>!$5#DN1$56F&++-(+v5#AYF#AYFyy{1At$$39<<>N>xsSX(#(>NAu%%At)<)<&q)V3#__.qr2F#__.F#a&) &q)V3#AYF#AYF|))!]ZeZe)fM R 	d&>&>TJ**1fmDllF]]_F&$''%%f-.4v{{+ # } MB L O2 O- s>   ,V2V8V83V>)EW
,W;WC	W
W


Wc                v   [        [        R                  " U R                  5      R                  5      n[        U R                  5      nU R                  5       nUR                  5        VVs0 s H  u  pVXe_M	     nnnU R                  (       d1  [        [        R                  5      [        S5      :  a  [        U5      n[        R                  R!                  U5      u  pn
[#        U[$        5      (       a  UR'                  5       n[#        U	[$        5      (       a  U	R'                  5       n	U R                  (       Ga2  U	Gb.  [        [        R                  " U R                  5      R                  5      n[)        U5      S:X  ac  [#        U	[        R*                  5      (       aD  [#        U[        R*                  5      (       a  US   U0n[-        [/        U5      5      nX;  a  XU'   O[#        U	[$        5      (       at  [#        U[        R*                  5      (       a  US   U0nU	R                  5        H:  u  pVXR;   a  XX;  a  XhU'   M  UR1                  US5      U;   d  M-  XX;  d  M4  XhXu   '   M<     U	cN  UR                  5        VVs0 s H  u  pVXS;   d  M  XV_M     n	nnU	(       d  U R                  (       d  [3        S5      e[#        U	[$        5      (       a4  U	R                  5        VVs0 s H  u  pVUR1                  XU5      U_M     n	nnU R                  (       a  SU;   a	  U " USSS	9nOU " USS
9nU R                  (       a0  U R5                  UR6                  UR6                  XR8                  S9nOSn[#        U	[$        5      (       a  [)        U	5      S:X  a  [        U	R;                  5       5      S   UR;                  5       ;   a   U[        U	R;                  5       5      S      nO+[        UR;                  5       5      S   S:X  a  US   nOUS   nU	R=                  5       u  pO[#        U	[$        5      (       a,  UR                  5        VVs0 s H  u  pVXY;   d  M  XV_M     nnnO[#        U	[>        5      (       d  [#        U	[         5      (       aS  [        UR;                  5       5      S   S:X  a  URA                  5       SS nOURA                  5       nUS[)        U	5       nO+[        UR;                  5       5      S   S:X  a  US   nOUS   nUc  U R5                  XXR8                  S9nU RB                  RE                  XU
5        0 nU RF                   HJ  nURI                  5       n[#        U[$        5      (       a  URK                  U5        M;  UUURL                  '   ML     U$ s  snnf s  snnf s  snnf s  snnf )r  r  Nr   r   r  r  TF)r  r  r  r  rc  )'r=   r   r   r  r   r&   r   r  r   rj  r   r?   r  r   r   r   r  ri   rk   r  rm   r  r  r  r  rn   r  rc  r   r  r  r  r  r  r  rd  r  r{   r  )rX   r  r~  r  r  r   r   r  r  r  r  r  rB   rc  rU  r  r  r  s                     rD   	test_stepTFPreTrainedModel.test_step  s    **4995@@A	"4>>2??A4C4I4I4KL4K384KL%%%*?%/*QT?D#kkBB4Hm aAaA !!!amW..tyy9DDEI< A%*Q		*B*Ba++"1q)A"4#56'%&kNAt$$a++"1q)A !	HC'CL!$#(,,S$79D25/./	 !*
 9*+'')K)hcs7J)AKT33 !uvvaDEGGINI$$S.3IAN !!my&@!>F!e,F!!%%fkk6;;epep%qDD a3q6Q;AFFH~a FKKM1QVVXq 12fkkm$Q'6199;DAq4  /5||~J~83hch~FJF5!!Z4%8%8FKKM"1%/*12.*Hc!f%F FKKM"1%/<%%aVaVa%bD**1mDllF]]_F&$''%%f-.4v{{+ # w MD L O0 Ks$   ,V#V)V)(V/V5V5c                    SSK Jn  UR                  U U R                  UUUUUUUU	U
S9nUR	                  5       n[        [        R                  R                  US5      S5       nUR                  U5        SSS5        g! , (       d  f       g= f)a  
Creates a draft of a model card using the information available to the `Trainer`.

Args:
    output_dir (`str` or `os.PathLike`):
        The folder in which to create the model card.
    model_name (`str`, *optional*):
        The name of the model.
    language (`str`, *optional*):
        The language of the model (if applicable)
    license (`str`, *optional*):
        The license of the model. Will default to the license of the pretrained model used, if the original
        model given to the `Trainer` comes from a repo on the Hub.
    tags (`str` or `List[str]`, *optional*):
        Some tags to be included in the metadata of the model card.
    finetuned_from (`str`, *optional*):
        The name of the model used to fine-tune this one (if applicable). Will default to the name of the repo
        of the original model given to the `Trainer` (if it comes from the Hub).
    tasks (`str` or `List[str]`, *optional*):
        One or several task identifiers, to be included in the metadata of the model card.
    dataset_tags (`str` or `List[str]`, *optional*):
        One or several dataset tags, to be included in the metadata of the model card.
    dataset (`str` or `List[str]`, *optional*):
        One or several dataset identifiers, to be included in the metadata of the model card.
    dataset_args (`str` or `List[str]`, *optional*):
       One or several dataset arguments, to be included in the metadata of the model card.
r   )TrainingSummary)
keras_historylanguagelicensetags
model_namefinetuned_fromtasksdataset_tagsrR  dataset_argsz	README.mdrQ   N)
	modelcardr  
from_kerashistoryto_model_cardrn  r/  r0  r$  write)rX   
output_dirr  r  r  r  r  r  r  rR  r  r  training_summary
model_cardr|  s                  rD   create_model_card#TFPreTrainedModel.create_model_card  s    R 	/*55,,!)%% 6 
 &335
"'',,z;7=GGJ >==s   $A??
Bc                    [        X R                  5      nUc  [        S5      e UR                  U5        g! [         a9    [
        R                  S5        U R                  5         UR                  U5         gf = f)z}
Set model's input embeddings

Args:
    value (`tf.Variable`):
        The new weights mapping hidden states to vocabulary.
Nz>The model does not implements the base_model_prefix attribute.Building the model)r   r'  r  set_input_embeddingsr   r  ri  r  )rX   r  r)  s      rD   r  &TFPreTrainedModel.set_input_embeddingsJ  so     T#9#9:
%&fgg	3++E2 	3KK,-$$&++E2	3s   7 A A:9A:c                    U R                  5       b!  U R                  5       n UR                  5       $ g! [         a=    [        R	                  S5        U R                  5         U" 5       R                  5       s $ f = f)zy
Returns the model's output embeddings

Returns:
    `tf.Variable`: The new weights mapping vocabulary to hidden states.
Nr  )get_lm_headget_output_embeddingsr   r  ri  r  rX   lm_heads     rD   r  'TFPreTrainedModel.get_output_embeddings^  su     )&&(G94466  " 901((*y6688	9s   4 AA;:A;c                    U R                  5       b#  U R                  5       n UR                  U5        gg! [         a9    [        R	                  S5        U R                  5         UR                  U5         gf = f)z~
Set model's output embeddings

Args:
    value (`tf.Variable`):
        The new weights mapping hidden states to vocabulary.
Nr  )r  set_output_embeddingsr   r  ri  r  rX   r  r  s      rD   r  'TFPreTrainedModel.set_output_embeddingsr  sq     )&&(G5--e4 * " 501((*--e45s   6 A A98A9c                X    [         R                  " S[        5        U R                  5       $ )z
Get the layer that handles a bias attribute in case the model has an LM head with weights tied to the
embeddings

Return:
    `keras.layers.Layer`: The layer that handles the bias, None if not an LM model.
zVThe method get_output_layer_with_bias is deprecated. Please use `get_lm_head` instead.)r  r  r  r  r  s    rD   get_output_layer_with_bias,TFPreTrainedModel.get_output_layer_with_bias  s'     	dfs	
 !!rF   c                :    [         R                  " S[        5        g)z
Get the concatenated _prefix name of the bias from the model name to the parent layer

Return:
    `str`: The _prefix name of the bias.
zMThe method get_prefix_bias_name is deprecated. Please use `get_bias` instead.N)r  r  r  r  s    rD   get_prefix_bias_name&TFPreTrainedModel.get_prefix_bias_name  s     	egturF   c                    U R                  5       b!  U R                  5       n UR                  5       $ g! [         a#    U R                  5         UR                  5       s $ f = f)z
Dict of bias attached to an LM head. The key represents the name of the bias attribute.

Return:
    `tf.Variable`: The weights representing the bias, None if not an LM model.
N)r  get_biasr   r  r  s     rD   r  TFPreTrainedModel.get_bias  sf     )&&(G*''))
 	 " *((*''))*s   4 *A! A!c                    U R                  5       b#  U R                  5       n UR                  U5        gg! [         a$    U R                  5         UR                  U5         gf = f)z{
Set all the bias in the LM head.

Args:
    value (`Dict[tf.Variable]`):
        All the new bias attached to an LM head.
N)r  set_biasr   r  r  s      rD   r  TFPreTrainedModel.set_bias  sd     )&&(G(  ' * " (((*  '(s   6 +A$#A$c                    g)z
The LM Head layer. This method must be overwritten by all the models that have a lm head.

Return:
    `keras.layers.Layer`: The LM head layer if the model has one, None if not.
Nr\   r  s    rD   r  TFPreTrainedModel.get_lm_head  s     rF   c                N   [        U R                  5       [        R                  R                  5      (       a  U R                  U5      $ Ub  XR                  R                  :X  a  U R                  U R                  5       5      $ U R                  U5      nXR                  l        U$ )aw  
Resizes input token embeddings matrix of the model if `new_num_tokens != config.vocab_size`.

Takes care of tying weights embeddings afterwards if the model class has a `tie_weights()` method.

Arguments:
    new_num_tokens (`int`, *optional*):
        The number of new tokens in the embedding matrix. Increasing the size will add newly initialized
        vectors at the end. Reducing the size will remove vectors from the end. If not provided or `None`, just
        returns a pointer to the input tokens without doing anything.

Return:
    `tf.Variable` or `keras.layers.Embedding`: Pointer to the input tokens of the model.
)
ri   r(  r   r  	Embedding_v2_resized_token_embeddingsrh   
vocab_size_get_word_embedding_weight_resize_token_embeddingsrX   r  model_embedss      rD   resize_token_embeddings)TFPreTrainedModel.resize_token_embeddings  s    ( d//15<<3I3IJJ44^DD!^{{7M7M%M2243L3L3NOO44^D "0rF   c                    Ub  XR                   R                  :X  a  U R                  5       $ U R                  U5      nXR                   l        U$ )a  
Resizes input token embeddings matrix of the model if `new_num_tokens != config.vocab_size`.

Arguments:
    new_num_tokens (`int`, *optional*):
        The number of new tokens in the embedding matrix. Increasing the size will add newly initialized
        vectors at the end. Reducing the size will remove vectors from the end. If not provided or `None`, just
        returns a pointer to the input tokens without doing anything.

Return:
    `keras.layers.Embedding`: Pointer to the input tokens of the model.
)rh   r  r(  _v2_resize_token_embeddingsr  s      rD   r  .TFPreTrainedModel._v2_resized_token_embeddings  sJ     !^{{7M7M%M,,..77G "0rF   c                    [        U[        R                  5      (       a  U$ [        USS 5      nUb  U$ [        USS 5      nUb  U$ U R	                  5         [        USS 5      nUb  U$ [        USS 5      nUb  U$ g )Nr?  decoder)ri   r?   r  r   r  )rQ  embedding_layerembedss      rD   r  ,TFPreTrainedModel._get_word_embedding_weight  s     oryy11"" (D9M)T:M
 	!!#(D9M)T:MrF   c                   U R                  U R                  5       5      nU R                  X!5      nU R                  5       b2  U R                  5       nU R	                  XA5      nU R                  U5        U R                  5       bA  U R                  U R                  5       5      nU R                  Xa5      nU R                  U5        U R                  U5        U R                  5       $ rL   )
r  r(  _get_resized_embeddingsr  _get_resized_lm_head_biasr  r  _get_resized_lm_head_decoderr  r  )rX   r  r  new_embeddingsold_lm_head_biasnew_lm_head_biasold_lm_head_decodernew_lm_head_decoders           rD   r  *TFPreTrainedModel._resize_token_embeddings  s    889R9R9TU55nU ==?&#}}#==>N_MM*+ %%'3"&"A"A$B\B\B^"_"&"C"CDW"h&&':;!!.1((**rF   c                    U R                  5       nU R                  X!5      nU R                  U5        U R                  5       b2  U R                  5       nU R	                  XA5      nU R                  U5        U R                  5       U R                  5       :H  nU R                  5       bH  U(       dA  U R                  U R                  5       5      nU R                  Xq5      nU R                  U5        U R                  5       $ rL   )
r(  _v2_get_resized_embeddingsr  r  _v2_get_resized_lm_head_biasr  r  r  r  r  )	rX   r  r  r  r  r  tied_weightsr  r  s	            rD   r  -TFPreTrainedModel._v2_resize_token_embeddings2  s    22488X!!.1 ==?&#}}#@@AQbMM*+ 002d6P6P6RR%%'3L"&"A"A$B\B\B^"_"&"C"CDW"h&&':;((**rF   c           	     N   0 nUR                  5        GH  u  pE[        R                  " U5      S:X  a  S[        U5      S   4O
[        U5      u  pgX'-
  nUc  U/OXb/n	[        R                  R                  US5      (       a  Uc  SU//OSS/SU//n
[        R                  " UR                  5       [        R                  " U
5      SS9n[        Xr5      nUc  U/OSU/n[        R                  " [        R                  " U5      S5      n[        R                  " U[        R                  " U
5      SS9nOUc  S/OSS/n[        R                  " UR                  5       [        R                  " U5      [        R                  " U	5      5      n[        R                  " [        R                  " U	5      S5      nU R                  U	SSUR                  R                  S	5      S   S
9n[        R                  " XUR                  5       5      nUR!                  U5        UX4'   GM     U$ )a=  
Build a resized bias from the old ones. Increasing the size will add newly initialized vectors at the end.
Reducing the size will remove vectors from the end

Args:
    old_lm_head_bias (`tf.Variable`):
        Old lm head bias to be resized.
    new_num_tokens (`int`, *optional*):
        New number of tokens in the linear matrix.

        Increasing the size will add newly initialized vectors at the end. Reducing the size will remove
        vectors from the end. If not provided or `None`, just returns None

Return:
    `tf.Variable`: Pointer to the resized bias.
r   Nr   r   r  TFzerosr   r;   rs   	trainabler  )r   r?   r<   r   r   r  r  r  r  r  r  r  
add_weightr  r  whereassign)rX   r  r  r  attrr?  	first_dimr  r  final_shapepadding_shapecurrent_biasr  
mask_shape	bias_mask
slice_fromnew_bias	init_biass                     rD   r  +TFPreTrainedModel._get_resized_lm_head_biasG  s   $ ,224LDIKQW\]I]z&/A!/D(Ecmntcu%I&7I.7.?>*iE`K wwy!,,4=4E!Y 0QPQFUVXaTbKc!vvfllnb6J6J=6Ykmn%(%H"5>5F01QPbLc
GGB$8$8$DdK	FF9b.B.B=.Qchi	$-$5aSAq6
!xxLLNB$8$8$DbFZFZ[fFg  GGB$8$8$EtL	!#[[&&s+A.	 ' H (..:JKIOOI&%-"; 5>  rF   c                   0 nUR                  5        H  u  pE[        R                  " U5      S:X  a  S[        U5      S   4O
[        U5      u  pgX'-
  nXr:  a  UR	                  5       SSU24   n	OIUc  SU//OSS/SU//n
[        R
                  " UR	                  5       [        R                  " U
5      5      n	XU'   M     U$ )a  
Build a resized bias from the old ones. Increasing the size will add newly initialized vectors at the end.
Reducing the size will remove vectors from the end

Args:
    old_lm_head_bias (`Dict[str, tf.Variable]`):
        Old lm head bias to be resized.
    new_num_tokens (`int`):
        New number of tokens in the linear matrix. Increasing the size will add newly initialized vectors at
        the end. Reducing the size will remove vectors from the end.

Return:
    `tf.Tensor`: Values for the resized bias.
r   Nr   .)r   r?   r<   r   r  r  r  )rX   r  r  r  r   r?  r  r  r  r  r  s              rD   r  .TFPreTrainedModel._v2_get_resized_lm_head_bias|  s    " ,224LDIKQW\]I]z&/A!/D(Ecmntcu%I&7I .!<<>#*>?4=4E!Y 0QPQFUVXaTbKc66&,,."2F2F}2UV%-T" 5  rF   c                   Un[         R                  " U R                  U R                  5       5      U:H  5      nUb  U(       d  [	        U5      S   n[        X5      u  pgU R                  X%4SSUR                  R                  S5      S   S9n[         R                  " XgUR                  5       5      nUR                  U5        U$ )a  
Build a resized decoder from the old ones. Increasing the size will add newly initialized vectors at the end.
Reducing the size will remove vectors from the end

Args:
    old_lm_head_decoder (`tf.Variable`):
        Old lm head decoder to be resized.
    new_num_tokens (`int`, *optional*):
        New number of tokens in the linear matrix.

        Increasing the size will add newly initialized vectors at the end. Reducing the size will remove
        vectors from the end. If not provided or `None`, just returns None

Return:
    `tf.Variable`: Pointer to the resized decoder or None if the output embeddings are different from the input
    ones.
r   r  Tr   r   r  )r?   r   r  r(  r   r  r  r  r  r  r  r  )	rX   r  r  r  is_input_output_equalsr  decoder_maskcurrent_decoderinit_decoders	            rD   r  .TFPreTrainedModel._get_resized_lm_head_decoder  s    $ 2!#++D,E,E,GHL__"
 *3I *+> ? B,@AT,e)L"&//%9#(--33C8;	 #2 # 88LCVC\C\C^_L&&|4""rF   c                b   [        U5      S   n[        U R                  SS5      n[        X5      u  pVU R	                  UR
                  R                  S5      S   X#/[        U5      [        R                  S9n[        R                  " XVUR                  5       5      nUR                  U5        U$ )a  
Build a resized Embedding weights from a provided token Embedding weights. Increasing the size will add newly
initialized vectors at the end. Reducing the size will remove vectors from the end

Args:
    old_embeddings (`tf.Variable`):
        Old embeddings to be resized.
    new_num_tokens (`int`, *optional*):
        New number of tokens in the embedding matrix.

        Increasing the size will add newly initialized vectors at the end. Reducing the size will remove
        vectors from the end. If not provided or `None`, just returns a pointer to the input tokens
        `tf.Variable` module of the model without doing anything.

Return:
    `tf.Variable`: Pointer to the resized Embedding Module or the old Embedding Module if `new_num_tokens` is
    `None`
r   initializer_range{Gz?r   r   )r  r;   rs   r   )r   r   rh   r  r  r  r  get_initializerr?   r  r  r  r  )	rX   r  r  r  
init_rangeembeddings_maskcurrent_embeddingsr  init_embeddingss	            rD   r  )TFPreTrainedModel._get_resized_embeddings  s    ( '~6q9T[[*=tD
.B>.b+$$**3/2!5'
3**	 ) 
 ((?H\H\H^_o.rF   c                d   Sn/ SQnU H6  n[        U R                  U5      (       d  M   [        U R                  U5      nM8     [        R                  R                  UUR                  [        R                  R                  US9UR                  R                  SS S9nU" [        R                  " S//5      5        UR                  U:  a  UR                  SU nO8[        R                  " UR                  UR                  UR                  S /SS9nUR                  R                  U5        U$ )	a  
Build a resized Embedding layer from a provided Embedding layer. Increasing the size will add newly initialized
vectors at the end. Reducing the size will remove vectors from the end.

Args:
    old_embeddings (`keras.layers.Embedding`):
        Old embeddings to be resized.
    new_num_tokens (`int`, *optional*):
        New number of tokens in the embedding matrix.

Return:
    `keras.layers.Embedding`: Resized Embedding layer.
r  )r  initializer_factorinit_stdstddevNi)	input_dim
output_dimembeddings_initializerr  r   r9   )r   rh   r   r   r  r  r"  initializersTruncatedNormal
embeddingsr  r?   constantr!  concatr  )rX   r  r  r  'potential_initialization_variable_namesvar_namer  r  s           rD   r  ,TFPreTrainedModel._v2_get_resized_embeddings  s   $ 
3
/
 @Ht{{H--$T[[(;
 @
 //$%00#(#5#5#E#EZ#E#X**//5	 0 
 	r{{QC5)* ##~5,77HO ii**N,E,EnF^F^F`,abijO 	!!((9rF   c                    [         e)ah  
Prunes heads of the base model.

Arguments:
    heads_to_prune (`Dict[int, List[int]]`):
        Dictionary with keys being selected layer indices (`int`) and associated values being the list of heads
        to prune in said layer (list of `int`). For instance {1: [0, 2], 2: [2, 3]} will prune heads 0 and 2 on
        layer 1 and heads 2 and 3 on layer 2.
)r  )rX   heads_to_prunes     rD   prune_headsTFPreTrainedModel.prune_heads	  s
     "!rF   c
                   U
R                  SS5      nUb+  [        R                  " S[        5        U	b  [	        S5      eUn	U	b  XS'   [
        R                  R                  U5      (       a  [        R                  SU S35        g[
        R                  " USS	9  U(       ar  U
R                  S
S5      nU
R                  SUR                  [
        R                  R                  5      S   5      nU R                  " U40 U
D6nU R                  U5      nU(       Ga  [        U R                   SS5      bi  [#        U R                   R$                  [&        5      (       d@  ['        U R                   R$                  5      R                  S5      S   U R                   l        UGc  U R(                  R+                  U R,                  5      n[/        S U R,                  R1                  5        5       5      (       a  U R,                  R3                  5        VVs0 s Hi  u  nnU[4        R6                  " UR8                  UR:                  [4        R<                  :X  a  [4        R>                  OUR:                  UR@                  S9_Mk     nnnU R(                  R+                  U5      nUUS.nOUn[
        R                  RC                  US['        U5      5      nU RE                  USUS9  [        RG                  SU 35        U RH                  RJ                  SS /U R                   l&        U RN                  b  [Q        XU R                   S9  U R                   RS                  U5        U RU                  5       (       a  U RV                  RS                  U5        U(       a  [X        O[Z        n[
        R                  RC                  UU5      n[]        U R^                  UUS9u  nn[
        R`                  " U5       H  n[
        R                  RC                  UU5      nURc                  SS5      Rc                  SS5      nURe                  U5      (       d  M]  [
        R                  R                  U5      (       d  M  UURg                  5       ;  d  M  [
        Rh                  " U5        M     Uc~  U(       aL  U R^                   Vs0 s H'  n[k        UR@                  5      URm                  5       _M)     nn[o        UUSS0S9  OU Rq                  U5        [        RG                  S U 35        GO`U(       a  [r        O[t        n[
        R                  RC                  UU5      n[w        US!S"S#9 n[x        Rz                  " USSS$9S%-   n UR}                  U 5        SSS5        [        RG                  S&U S'[        U5       S(U S35        UR3                  5        GH  u  n!n"U(       aa  U" Vs0 s H'  n[k        UR@                  5      URm                  5       _M)     n#n[o        U#[
        R                  RC                  UU!5      SS0S9  Mo  [        R                  " [
        R                  RC                  UU!5      S!S)9 n!/ n$[        U"S* S+9 H  n%S,U%R@                  ;   d(  [        U%R@                  R                  S-5      5      S:X  a  U%R@                  n&O-S-RC                  U%R@                  R                  S-5      SS 5      n&U!R                  U&U%R                  5       R8                  U%R                  5       R:                  S.9n'U%R                  5       U'SS& U$R                  U&R                  S/5      5        M     [        U!S0U$5        SSS5        GM     U(       a  U R                  UWWWU	S19  ggs  snnf s  snf ! , (       d  f       GN%= fs  snf ! , (       d  f       GM  = f)2a,	  
Save a model and its configuration file to a directory, so that it can be re-loaded using the
[`~TFPreTrainedModel.from_pretrained`] class method.

Arguments:
    save_directory (`str`):
        Directory to which to save. Will be created if it doesn't exist.
    saved_model (`bool`, *optional*, defaults to `False`):
        If the model has to be saved in saved model format as well or not.
    version (`int`, *optional*, defaults to 1):
        The version of the saved model. A saved model needs to be versioned in order to be properly loaded by
        TensorFlow Serving as detailed in the official documentation
        https://www.tensorflow.org/tfx/serving/serving_basic
    push_to_hub (`bool`, *optional*, defaults to `False`):
        Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
        repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
        namespace).
    signatures (`dict` or `tf.function`, *optional*):
        Model's signature used for serving. This will be passed to the `signatures` argument of model.save().
    max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`):
        The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size
        lower than this size. If expressed as a string, needs to be digits followed by a unit (like `"5MB"`).

        <Tip warning={true}>

        If a single weight of the model is bigger than `max_shard_size`, it will be in its own checkpoint shard
        which will be bigger than `max_shard_size`.

        </Tip>

    create_pr (`bool`, *optional*, defaults to `False`):
        Whether or not to create a PR with the uploaded files or directly commit.
    safe_serialization (`bool`, *optional*, defaults to `False`):
        Whether to save the model using `safetensors` or the traditional TensorFlow way (that uses `h5`).
    token (`str` or `bool`, *optional*):
        The token to use as HTTP bearer authorization for remote files. If `True`, or not specified, will use
        the token generated when running `huggingface-cli login` (stored in `~/.huggingface`).
    kwargs (`Dict[str, Any]`, *optional*):
        Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`.tokenzProvided path (z#) should be a directory, not a fileT)exist_okcommit_messagerepo_idr   torch_dtyper   r   c              3  Z   #    U  H!  oR                   [        R                  :H  v   M#     g 7frL   )r   r?   r  )rP   r  s     rD   rR   4TFPreTrainedModel.save_pretrained.<locals>.<genexpr>z	  s     X:W$zzRXX-:Ws   )+r  )serving_defaultint64_servingsaved_modelF)include_optimizer
signatureszSaved model created in r7   )rh   )r5  z.binr  r+  formatr?   )r-  zModel weights saved in rQ   zutf-8)encoding)indent	sort_keys
z:The model is bigger than the maximum size per checkpoint (z) and is going to be split in z^ checkpoint shards. You can find where each parameters has been saved in the index located at )modec                    U R                   $ rL   r  )r  s    rD   <lambda>3TFPreTrainedModel.save_pretrained.<locals>.<lambda>	  s    rF   )r   r"  r!  r   utf8rd  )r6  r4  )Irj   r  r  r  rn   r/  r0  isfiler  errormakedirsr  sep_create_repo_get_files_timestampsr   rh   ri   r8  r"  r  get_concrete_functionr  anyvaluesr   r?   r  r;   r   r  r  r  r$  saveri  r   r_   architectures_auto_classr   save_pretrainedr  r  r   r   rB  r3  listdirr2  r#  r  remover&  r  safe_save_filer3  r   r   rn  jsondumpsr  rm   rg  rh  sortedcreate_datasetr/  r1  encoder   _upload_modified_files)(rX   save_directoryr=  re  push_to_hubr?  r4  	create_prsafe_serializationr4  rr   r1  r6  r7  files_timestampsr;  r   r  
int64_specr<  saved_model_dirr5  output_model_filer;  rA  filenamefull_filenameweights_no_suffixrQ   
state_dictsave_index_file
index_filecontentr>  r=  shard_state_dictr  r  rY  
param_dsets(                                           rD   rV  !TFPreTrainedModel.save_pretrained"	  ss   j  $4d;%MM E   l  #E#7O77>>.))LL?>*::]^_
NT2#ZZ(8$?NjjN,@,@,Mb,QRG'':6:G#99.I t{{M48DZX\XcXcXoXoqtMuMu*-dkk.E.E*F*L*LS*QRS*T'!"&,,"D"DTEYEY"ZX$:N:N:U:U:WXXX
 *.)=)=)C)C)E	" *FIC R]]"&**

bhh@VBHH\`\f\fmqmvmv  *F	  " %)LL$F$Fz$RM5DWd!eJ!0J ggll>=#g,WOIIo:IVKK1/1BCD &*^^%<%<QR%@$A! 'tDKKH##N3""22>B -?(DTGGLLF+DLL.Wcd 

>2HGGLLBM !- 4 4VR @ H HY[ \##$566GGNN=11FKKM1		-( 3 =!VZVbVbcVbQR9!&&A1779LVb
cz+<RVGWX!!"34KK12C1DEF9K5QgO ggll>?KOosW=**U1EL  ) > KKL^L\ ]K= )$$3#4A7
 &,\\^!
E%`e'f`e[\(CAFF(KQWWY(V`e$'f"("'',,~z*R^fhl]m 277<<
#KRUVZd!#%+E7G%HE'5::5UZZ=M=Mc=R9SWX9X-2ZZ
-0XXejj6F6Fs6KAB6O-P
)3)B)B *EKKM,?,?u{{}GZGZ *C *J -2KKMJqM"MM**;*;F*CD &I 6j-QWX WV &4* '' - (  k"^ d >= (g
 WVs,   A0^!<.^'#+^, .^>0D_,
^;
_	main)rh   	cache_dirrE  force_downloadlocal_files_onlyr4  revisionuse_safetensorsc               R   UR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  S	S5      nUR                  S
S5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUb+  [        R                  " S[        5        Ub  [	        S5      eUnUSL a  [
        R                  S5        SSUS.nUb  UUS'   [        5       (       a  U(       d  [
        R                  S5        SnU	c  [        5       (       d  Sn	[        U[        5      (       d4  Ub  UOUnU R                  R                  " U4USUUUUUUUUUS.UD6u  nnOUnUc  [        USS5      nSnUGb  [        U5      n[         R"                  R%                  U5      nU(       Ga  U(       al  [         R"                  R'                  [         R"                  R)                  U[*        5      5      (       a&  [         R"                  R)                  U[*        5      nGOU(       an  [         R"                  R'                  [         R"                  R)                  U[,        5      5      (       a(  [         R"                  R)                  U[,        5      nSnGOAU	SLal  [         R"                  R'                  [         R"                  R)                  U[.        5      5      (       a&  [         R"                  R)                  U[.        5      nGOU	SLan  [         R"                  R'                  [         R"                  R)                  U[0        5      5      (       a(  [         R"                  R)                  U[0        5      nSnGO][         R"                  R'                  [         R"                  R)                  U[2        5      5      (       a&  [         R"                  R)                  U[2        5      nGO[         R"                  R'                  [         R"                  R)                  U[4        5      5      (       a(  [         R"                  R)                  U[4        5      nSnGOU	(       a  [7        S[.         S[0         SU S35      e[         R"                  R'                  [         R"                  R)                  U[*        5      5      (       dF  [         R"                  R'                  [         R"                  R)                  U[,        5      5      (       a  [7        S[2         S[.         SU S 35      e[7        S[2         S![.         S[*         SU S"3	5      e[         R"                  R'                  U5      (       a  UnSnGOh[         R"                  R'                  US#-   5      (       a	  US#-   nSnGO8[9        U5      (       a  Un[;        U5      n GOU(       a  [*        nOU	SLa  [.        nO[2        n UUUUUUUUUSSUS$.n![=        UU40 U!D6n U c!  U[.        :X  a  [2        n[=        U[2        40 U!D6n U c   U[2        :X  a  [=        U[4        40 U!D6n U b  SnU c   U[*        :X  a  [=        U[,        40 U!D6n U b  SnU cn  UUUUUS%.n"[?        U[0        40 U"D6(       a  SnON[?        U[*        40 U"D6(       a  [7        U S&[2         S 35      e[7        U S&[*         S![2         S[@         35      e U(       aG  [
        R                  S*W 35        Un U RE                  [         R"                  RF                  5      S+   nO[
        R                  S*W S,W  35        OSn U(       a  [I        UU UUUUUUUUUS-9u  n n#Sn$W[.        :X  a_  [K        U S.S/9 n%U%RM                  5       n&SSS5        W&b  U&RO                  S05      S1;  a  [Q        S2U  S335      eU&RO                  S05      S4:H  n$OkU[0        :X  aa  [K        U S5   S.S/9 n%U%RM                  5       n&SSS5        W&b  U&RO                  S05      S1;  a  [Q        S2U  S335      eU&RO                  S05      S4:H  n$Xl)        U RT                  (       a,  URO                  S65      b  US7-   URO                  S65      -   US
'   U " U/U
Q70 UD6n'Uc  [W        U'S5      (       a  U'RX                  nU(       a  S8S9K-J.n(  U(" U'U SUUUS:9$ UbC  [^        R`                  Rb                  Re                  U5         U'Rg                  5         SSS5        OU'Rg                  5         U$(       a.  U(       d'  S8S;K-J4n)  [K        U S.S/9 n*U)" U'U*SSUUUUS<9sSSS5        $ U$(       a  S8S=K-J5n+  U+" U'U SSUUUUS<9$  U(       aY  U  H'  n,[         R"                  R'                  U,5      S>U, 34  M)     U[0        :X  a  [m        U'U UUS?9u  n-n.n/O"[o        U'U UUS?9u  n-n.n/O[q        U'U UUS?9u  n-n.n/ U Rz                  b?  U Rz                   H/  n1W- V2s/ s H  n2[|        R~                  " U1U25      b  M  U2PM!     n-n2M1     U R                  b?  U R                   H/  n1W. V2s/ s H  n2[|        R~                  " U1U25      b  M  U2PM!     n.n2M1     [        W.5      S5:  ab  [
        R                  SCU SDU'R                  R                   SEU. SFU'R                  R                   SGU'R                  R                   SH35        O-[
        R                  SIU'R                  R                   SJ35        [        W-5      S5:  a4  [
        R                  SKU'R                  R                   SLU SMU- SN35        OV[        W/5      S5:X  aG  [
        R                  SOU'R                  R                   SPU SQU'R                  R                   SR35        [        W/5      S5:  ag  SSR)                  U/ V3V4V5s/ s H  u  n3n4n5STU3 SUU4 SVU5 SW3PM     sn5n4n35      n6[
        R                  SXU'R                  R                   SLU SYU6 SN35        U'R                  5       (       a(   [        R                  " U4UUUUUUUUUUSZ.
UD6U'lF        U(       a
  U-U.U/S\.n7U'U74$ U'$ ! [6         a    e [B         a'    [7        S'U S(U S)[*         S![2         S[@         3
5      ef = f! , (       d  f       GN'= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= f! [P         a}  n0 [s        U 5       n%U%Ru                  5       Rw                  S@5      (       a  [Q        SA5      e[        U0e! , (       d  f        Sn0A0GN{= f! [x        [        4 a    [Q        SB5      ef = fSn0A0ff = fs  sn2f s  sn2f s  sn5n4n3f ! [P         a    [
        R                  S[5         GNVf = f)]a  
Instantiate a pretrained TF 2.0 model from a pre-trained model configuration.

The warning *Weights from XXX not initialized from pretrained model* means that the weights of XXX do not come
pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning
task.

The warning *Weights from XXX not used in YYY* means that the layer XXX is not used by YYY, therefore those
weights are discarded.

Parameters:
    pretrained_model_name_or_path (`str`, *optional*):
        Can be either:

            - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
            - A path to a *directory* containing model weights saved using
              [`~TFPreTrainedModel.save_pretrained`], e.g., `./my_model_directory/`.
            - A path or url to a *PyTorch state_dict save file* (e.g, `./pt_model/pytorch_model.bin`). In this
              case, `from_pt` should be set to `True` and a configuration object should be provided as `config`
              argument. This loading path is slower than converting the PyTorch model in a TensorFlow model
              using the provided conversion scripts and loading the TensorFlow model afterwards.
            - `None` if you are both providing the configuration and state dictionary (resp. with keyword
              arguments `config` and `state_dict`).
    model_args (sequence of positional arguments, *optional*):
        All remaining positional arguments will be passed to the underlying model's `__init__` method.
    config (`Union[PretrainedConfig, str]`, *optional*):
        Can be either:

            - an instance of a class derived from [`PretrainedConfig`],
            - a string valid as input to [`~PretrainedConfig.from_pretrained`].

        Configuration for the model to use instead of an automatically loaded configuration. Configuration can
        be automatically loaded when:

            - The model is a model provided by the library (loaded with the *model id* string of a pretrained
              model).
            - The model was saved using [`~TFPreTrainedModel.save_pretrained`] and is reloaded by supplying the
              save directory.
            - The model is loaded by supplying a local directory as `pretrained_model_name_or_path` and a
              configuration JSON file named *config.json* is found in the directory.
    from_pt (`bool`, *optional*, defaults to `False`):
        Load the model weights from a PyTorch state_dict save file (see docstring of
        `pretrained_model_name_or_path` argument).
    ignore_mismatched_sizes (`bool`, *optional*, defaults to `False`):
        Whether or not to raise an error if some of the weights from the checkpoint do not have the same size
        as the weights of the model (if for instance, you are instantiating a model with 10 labels from a
        checkpoint with 3 labels).
    cache_dir (`str`, *optional*):
        Path to a directory in which a downloaded pretrained model configuration should be cached if the
        standard cache should not be used.
    force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download of the model weights and configuration files, overriding the
        cached versions if they exist.
    resume_download:
        Deprecated and ignored. All downloads are now resumed by default when possible.
        Will be removed in v5 of Transformers.
    proxies:
        (`Dict[str, str], `optional`): A dictionary of proxy servers to use by protocol or endpoint, e.g.,
        `{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
        output_loading_info(`bool`, *optional*, defaults to `False`): Whether ot not to also return a
        dictionary containing missing keys, unexpected keys and error messages.
    local_files_only(`bool`, *optional*, defaults to `False`):
        Whether or not to only look at local files (e.g., not try downloading the model).
    token (`str` or `bool`, *optional*):
        The token to use as HTTP bearer authorization for remote files. If `True`, or not specified, will use
        the token generated when running `huggingface-cli login` (stored in `~/.huggingface`).
    revision (`str`, *optional*, defaults to `"main"`):
        The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
        git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
        identifier allowed by git.


        <Tip>

        To test a pull request you made on the Hub, you can pass `revision="refs/pr/<pr_number>"`.

        </Tip>

    mirror (`str`, *optional*):
        Mirror source to accelerate downloads in China. If you are from China and have an accessibility
        problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety.
        Please refer to the mirror site for more information.
    subfolder (`str`, *optional*, defaults to `""`):
        In case the relevant files are located inside a subfolder of the model repo on huggingface.co, you can
        specify the folder name here.
    tf_to_pt_weight_rename (`Callable`, *optional*):
        A function that is called to transform the names of weights during the PyTorch to TensorFlow
        crossloading process. This is not necessary for most models, but is useful to allow composite models to
        be crossloaded correctly.
    use_safetensors (`bool`, *optional*, defaults to `None`):
        Whether or not to use `safetensors` checkpoints. Defaults to `None`. If not specified and `safetensors`
        is not installed, it will be set to `False`.
    kwargs (remaining dictionary of keyword arguments, *optional*):
        Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
        `output_attentions=True`). Behaves differently depending on whether a `config` is provided or
        automatically loaded:

            - If a configuration is provided with `config`, `**kwargs` will be directly passed to the
              underlying model's `__init__` method (we assume all relevant updates to the configuration have
              already been done)
            - If a configuration is not provided, `kwargs` will be first passed to the configuration class
              initialization function ([`~PretrainedConfig.from_pretrained`]). Each key of `kwargs` that
              corresponds to a configuration attribute will be used to override said attribute with the
              supplied `kwargs` value. Remaining keys that do not correspond to any configuration attribute
              will be passed to the underlying model's `__init__` function.

Examples:

```python
>>> from transformers import BertConfig, TFBertModel

>>> # Download model and configuration from huggingface.co and cache.
>>> model = TFBertModel.from_pretrained("google-bert/bert-base-uncased")
>>> # Model was saved using *save_pretrained('./test/saved_model/')* (for example purposes, not runnable).
>>> model = TFBertModel.from_pretrained("./test/saved_model/")
>>> # Update configuration during loading.
>>> model = TFBertModel.from_pretrained("google-bert/bert-base-uncased", output_attentions=True)
>>> assert model.config.output_attentions == True
>>> # Loading from a Pytorch model file instead of a TensorFlow checkpoint (slower, for example purposes, not runnable).
>>> config = BertConfig.from_json_file("./pt_model/my_pt_model_config.json")
>>> model = TFBertModel.from_pretrained("./pt_model/my_pytorch_model.bin", from_pt=True, config=config)
```from_ptFresume_downloadNproxiesoutput_loading_infor1  trust_remote_codemirrorload_weight_prefix_from_pipeline
_from_auto	subfolderr  _commit_hashtf_to_pt_weight_renameadapter_kwargsr2  r3  TzgThe argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.rQ  
tensorflow)	file_typer  from_auto_classusing_pipelinez+Offline mode: forcing local_files_only=True)rs  return_unused_kwargsrt  rz  r{  ru  r4  rv  r  r  r  zError no file named z or z found in directory zu. Please make sure that the model has been saved with `safe_serialization=True` or do not set `use_safetensors=True`.zc but there is a file for PyTorch weights. Use `from_pt=True` to load this model from those weights.z, r   z.index)rs  rt  r{  rz  ru  r4  
user_agentrv  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entriesr  )rv  r{  r4  rs  ru  z& does not appear to have a file named zCan't load the model for 'z'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'z=' is the correct path to a directory containing a file named zloading weights file r   z from cache at )	rs  rt  r{  rz  ru  r4  r  rv  r  r?   r  r@  )ptr?   flaxmlxz"The safetensors archive passed at zf does not contain the valid metadata. Make sure you save your model with the `save_pretrained` method.r  r   r  r!  r   )$load_pytorch_checkpoint_in_tf2_model)allow_missing_keysr|  r%  r  )$load_pytorch_state_dict_in_tf2_model)	tf_inputsr  r|  r%  rE  r  )-load_sharded_pytorch_safetensors_in_tf2_modelzError retrieving files rD  re  rf  z}Unable to load weights from h5 file. If you tried to load a TF 2.0 model from a PyTorch checkpoint, please set from_pt=True. z)Some layers from the model checkpoint at z! were not used when initializing z: z,
- This IS expected if you are initializing z from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing z from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).z8All model checkpoint layers were used when initializing z.
zSome layers of z3 were not initialized from the model checkpoint at z and are newly initialized: zo
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.zAll the layers of z/ were initialized from the model checkpoint at zf.
If your task is similar to the task the model of the checkpoint was trained on, you can already use z* for predictions without further training.rD  z- z: found shape z in the checkpoint and z in the model instantiatedzSome weights of z= and are newly initialized because the shapes did not match:
)
rs  rt  rz  r{  ru  r4  rv  r  r  r  zZGeneration config file not found, using a generation config created from the model config.)r]  rT  rV  )Grj   r  r  r  rn   r  r  r(   ri  r*   ri   r   rf   from_pretrainedr   r"  r/  r0  r1  rJ  r$  r!   r    r   r   r   r   EnvironmentErrorr)   r%   r$   r'   r   rm  r  rM  r0   r1   r-  r  rp  r  _requires_load_weight_prefixr   r  modeling_tf_pytorch_utilsr  r?   compatv1variable_scoper  r  r  r  ra  r  rn  ro  r#  rq  _keys_to_ignore_on_load_missingresearch"_keys_to_ignore_on_load_unexpectedrm   r   r_   r  r   r  )8r}   pretrained_model_name_or_pathrh   rs  rE  rt  ru  r4  rv  rw  
model_argsrr   ry  rz  r{  r|  r1  r}  rU  r  from_pipeliner  r  commit_hashr  r  config_pathmodel_kwargs
is_shardedis_localarchive_filerh  rr  cached_file_kwargshas_file_kwargssharded_metadatasafetensors_from_ptr|  safetensors_metadatarQ  r  r  r  r  filer]  rT  rV  r{  patr  r   shape1shape2mismatched_warningloading_infos8                                                           rD   r  !TFPreTrainedModel.from_pretrained	  s   R **Y. **%6=**Y-$jj)>F$4d;"JJ':DAJJx&#ZZ(<dC

#3T: **\59JJ{B/	jj6!',Dd!K JJ'.%MM E   l  #E$NN
 $+Zij
$+8J'(%5KKEF#"+C+E+E#O &"233$*$6&<YK#&#3#3#C#C$#%)- /!1!*,($ $ FL  "L!&.$?K 
(4,/0M,N)ww}}%BCHrww~~bggll;XZf.ghh#%77<<0M|#\L=Z\n0o!p!p#%77<<0MOa#bL!%J$E1bggnnGGLL!>@QR7 7 $&77<<0MO`#aL$E1bggnnGGLL!>@WX7 7 $&77<<0MOf#gL!%JWW^^BGGLL1NP`$abb#%77<<0MO_#`LWW^^BGGLL1NPf$ghh#%77<<0MOe#fL!%J %*./@.AF]E^^r  tQ  sR R6 7 
 WW^^BGGLL1NP\$]^^bdbibibpbpGGLL!>@RSc c +./?.@EVDWWk  mJ  lK K# #  +./?.@CTBUUYZfYgg{89<   =>>< = HII<xG<==8(45R(S% +H$E10H/HM &/*8#*+:,<!&&0$,%.<AAF(3*& -88UW_,vcu,v) -4EV9V $40;9;K1Oa1- -4EU9U0;9;Q1Ug1- 1<)-J,4\9Q0;9;M1Qc1- 1<)-J,4 )1'.%*)20@+ $$ACZn^mn)-J%&C\eUde"2#@"A B$$4#5 6G!G#  #3#@"AAghtgu v$$4#5T/9J!L# ' 5J 3L>BC(4%066rww{{CBG3H:_MbLcde$(! 6P-%#- /!1%!(73!#3 $((0DAQ'(zz|$ B#+/C/G/G/QYt/t89N8O PX X  #7":":8"D"L0003tD'(zz|$ E#+/C/G/G/QYt/t89N8O PX X  #7":":8"D"L; ++0@0@0H0T1Cc1ILL\L\]cLd1dL-. F8Z8<8!)ge=U.V.V &+%A%A"W 8%#'$7*'=  ),,-?@))+ A@ %%'zW0DAEX <'#'+(;.,C+A		 BA !`@%#'$7*(?'=	 	)	1DGGNN4(,CD6*JJ 266Em-0G 2	FBL/? F]-0G 2	FBL/? BQ),C.	B>o. ..:::+7U<a299S!;L<U ; 11===.="[o3PQAR1o"[ > !#NN;<Y;Z [!!&!9!9 :"_<M N!!&!9!9 : ; OO,,- .tt NNUV[VeVeVnVnUoorst|q NN!%//":":!; <122N|n ]nn
 !Q&NN$U__%=%=$> ?12 388=8P8P7Q R !#!% 0?/>+VV ^F83J6(Rlm/>" NN"5??#;#;"< =12 3./ 0<< *:*J*J1+'#1$3#%5%'.#0+ +'(  ,#2#2L ,&&w (     +45R4S T==Z<[ \>>J^2N^M__cdsctv J BA EDP A@ BAp  	/0Avvx**955%*  )a/ 100 '
3 o 	&  V #\<:  p 	s   B)s" As" /t t2"u*uAu( +u( <u( -w2w29w7w7<w<('x ";t 
t/2
u
u
u%(
w/3w>7v55
w	?www''w**w/x&%x&c	           
        Ub+  [         R                  " S[        5        Ub  [        S5      eUnSU	;   a'  [         R                  " S5        U	R	                  S5      nU	R	                  SS5      n
U	R	                  SS5      n[
        R                  R                  U5      (       a/  UnUR                  [
        R                  R                  5      S   nOUR                  S	5      S   nU R                  XXjUS
9nUc$  [
        R                  R                  U5      (       + n[        XS9 nU R                  U5      nU R                  XS9  [        U S5      (       aL  [        U S5      (       a;  U[        U5      R                   S.n	U	R#                  U	5        U R$                  " S0 U	D6  U R'                  UUUUUUS9  SSS5        g! , (       d  f       g= f)u  
Upload the model files to the 🤗 Model Hub while synchronizing a local clone of the repo in `repo_path_or_name`.

Parameters:
    repo_id (`str`):
        The name of the repository you want to push your model to. It should contain your organization name
        when pushing to a given organization.
    use_temp_dir (`bool`, *optional*):
        Whether or not to use a temporary directory to store the files saved before they are pushed to the Hub.
        Will default to `True` if there is no directory named like `repo_id`, `False` otherwise.
    commit_message (`str`, *optional*):
        Message to commit while pushing. Will default to `"Upload model"`.
    private (`bool`, *optional*):
        Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
    token (`bool` or `str`, *optional*):
        The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
        when running `huggingface-cli login` (stored in `~/.huggingface`). Will default to `True` if `repo_url`
        is not specified.
    max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`):
        Only applicable for models. The maximum size for a checkpoint before being sharded. Checkpoints shard
        will then be each of size lower than this size. If expressed as a string, needs to be digits followed
        by a unit (like `"5MB"`).
    create_pr (`bool`, *optional*, defaults to `False`):
        Whether or not to create a PR with the uploaded files or directly commit.

Examples:

```python
from transformers import TFAutoModel

model = TFAutoModel.from_pretrained("google-bert/bert-base-cased")

# Push the model to your namespace with the name "my-finetuned-bert".
model.push_to_hub("my-finetuned-bert")

# Push the model to an organization with the name "my-finetuned-bert".
model.push_to_hub("huggingface/my-finetuned-bert")
```
Nr2  r3  repo_path_or_namezpThe `repo_path_or_name` argument is deprecated and will be removed in v5 of Transformers. Use `repo_id` instead.repo_urlorganizationr   r!  )privater4  r  r  )working_diruse_temp_dir)r4  r  r  )r  r  )r6  r4  rb  r\   )r  r  r  rn   rj   r/  r0  r1  r  rM  rN  r.   rO  rV  r   r   r  r{   r  r_  )rX   r7  r  r6  r  r4  r4  r1  rb  base_model_card_argsr  r  r  work_dirrd  s                  rD   ra  TFPreTrainedModel.push_to_hub-  s   h %MM E   l  #E"66MM% +../BCG'++J=+//E77==!!!KmmBGGKK04G!--,R0K##ES_ $ 
 !ww}}[99L [TX`#99(C    ItY''GD:M,N,N #+"&w-"4"4($ %++,@A&&>)=>'' -# (  UTTs   (BG
Gc                    [        U[        5      (       d  UR                  nSSKJs  Jn  [        X!5      (       d  [        U S35      eXl        g)a  
Register this class with a given auto class. This should only be used for custom models as the ones in the
library are already mapped with an auto class.

<Tip warning={true}>

This API is experimental and may have some slight breaking changes in the next releases.

</Tip>

Args:
    auto_class (`str` or `type`, *optional*, defaults to `"TFAutoModel"`):
        The auto class to register this new model with.
r   Nz is not a valid auto class.)	ri   r"  r_   transformers.models.automodelsra  r   rn   rU  )r}   
auto_classauto_modules      rD   register_for_auto_class)TFPreTrainedModel.register_for_auto_class  sE      *c**#,,J66{//
|+FGHH$rF   )rj  rh   r  r  )r^   Dict[str, tf.Tensor])r^   r"  rL   )r  ztf.Tensor | Noner  rT   r^   	tf.Tensor)r^   zDict[str, tf.TensorSpec])r^   r]   )r^   zkeras.layers.Layer)   TNNNNT)rR  z'datasets.Dataset'rC  rT   rJ  r]   rA  z#Optional['PreTrainedTokenizerBase']rD  zOptional[Callable]rE  zOptional[Dict[str, Any]]rK  Optional[bool]rL  r]   )rmspropr_  NNNNN)NNNNNNNN)r  r"  r  Optional[str]r  r  r  r  r  r  r  r  r  Optional[Union[str, List[str]]]rR  r  r  r  )r^   zUnion[None, keras.layers.Layer])r^   zUnion[None, str])r^   z#Union[None, Dict[str, tf.Variable]])r  Optional[int]r^   z*Union[keras.layers.Embedding, tf.Variable])r  r  r^   keras.layers.Embedding)r  zDict[str, tf.Variable]r  rT   r^   r  )r^   ztf.Variable)r  r  r  rT   r^   r  )Fr   FN5GBFFN)r4  zUnion[int, str]rb  r]   rc  r]   r4  Optional[Union[str, bool]])r  !Optional[Union[str, os.PathLike]]rh   z3Optional[Union[PretrainedConfig, str, os.PathLike]]rs  r  rE  r]   rt  r]   ru  r]   r4  r  rv  r"  rw  r  )NNNr'  NNF)r7  r"  r  r  r6  r  r  r  r4  zOptional[Union[int, str]]r4  Optional[Union[bool, str]]r1  r  rb  r]   r^   r"  )TFAutoModel)Lr_   r`   ra   rb   rc   rf   r'  r  rU  rj  r}  r  r  r  propertyr  r  r  r  r   rv   r   r   r   rk  r  r  r  r  r  r  classmethodr  r  r  r  r?   functionr  r  r  r  r(  r<  r\  rl  rp  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r.  rV  r  ra  r  rd   __classcell__r   s   @rD   r  r  U  s)   & L!OK '+# *.&#(  <)  2% __U[[__%, &, __U[[//07 17 __U[[../6 06 __U[[1129 39 __U[[(()0 *0 __U[[))*1 +1 N N
 % %*	 [[
+ 
+ , ,\0  &'  9=)-48)-g#g g 	g
 7g 'g 2g 'g gV   4l9jXfX #'!%"(,#8<378<:  :   	: 
 :  :  &:  :  6:  1:  6: x3((5"""(  /3+	3B.>+.+*3 j   6  HK  	  D$#L!F-4-FI-	-^
" */#(,0u (u u !u *un 
 GK7;(-$!&,0*.Q	'HQ	 D	Q	
 5Q	 "&Q	 Q	 Q	 *Q	 Q	 (Q	 Q	l (,(,"&4:,059kk %k &	k
  k 2k *k 3k k 
kZ % %rF   r  c                  <   ^  \ rS rSrSrSU 4S jjrS rS rSrU =r	$ )TFConv1Di  a  
1D-convolutional layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2).

Basically works like a linear layer but the weights are transposed.

Args:
    nf (`int`):
        The number of output features.
    nx (`int`):
        The number of input features.
    initializer_range (`float`, *optional*, defaults to 0.02):
        The standard deviation to use to initialize the weights.
    kwargs (`Dict[str, Any]`, *optional*):
        Additional keyword arguments passed along to the `__init__` of `keras.layers.Layer`.
c                J   > [         TU ]  " S0 UD6  Xl        X l        X0l        g )Nr\   )ry   r   nfnxr  )rX   r  r  r  rr   r   s        rD   r   TFConv1D.__init__  s#    "6"!2rF   c                   U R                   (       a  g SU l         U R                  SU R                  U R                  /[	        U R
                  5      S9U l        U R                  SSU R                  /[        R                  " 5       S9U l	        g )NTr?  r;   rs   biasr   )
builtr  r  r  r  r  r?  r?   zeros_initializerr  r  s     rD   r  TFConv1D.build  sp    ::
ooTWWdgg.ODLbLb<c & 
 OOF1dgg,BL`L`LbOc	rF   c                
   [        U5      S S u  p#[        R                  " USU R                  /5      n[        R                  " XR
                  5      U R                  -   n[        R                  " XX0R                  /5      nU$ )Nr7   r   )r   r?   r   r  matmulr?  r  r  )rX   r  bzsls       rD   r  TFConv1D.call  sc    Ar"JJq2tww-(IIa%		1JJqr77+,rF   )r  r  r  r  r  r?  r  )
r_   r`   ra   rb   rc   r   r  r  rd   r  r  s   @rD   r  r    s     3d rF   r  c                  f   ^  \ rS rSrSrS
SU 4S jjjrU 4S jrU 4S jrSSS jjrS r	S r
S	rU =r$ )TFSharedEmbeddingsi  a  
Construct shared token embeddings.

The weights of the embedding layer is usually shared with the weights of the linear decoder when doing language
modeling.

Args:
    vocab_size (`int`):
        The size of the vocabulary, e.g., the number of unique tokens.
    hidden_size (`int`):
        The size of the embedding vectors.
    initializer_range (`float`, *optional*):
        The standard deviation to use when initializing the weights. If no value is provided, it will default to
        \\(1/\sqrt{hidden\_size}\\).
    kwargs (`Dict[str, Any]`, *optional*):
        Additional keyword arguments passed along to the `__init__` of `keras.layers.Layer`.
c                   > [         TU ]  " S0 UD6  Xl        X l        Uc  US-  OUU l        [
        R                  " S[        5        g )Ng      z^`TFSharedEmbeddings` is scheduled for deletion in v4.32, use `keras.layers.Embedding` instead.r\   )ry   r   r  r  r  r  r  DeprecationWarning)rX   r  r  r  rr   r   s        rD   r   TFSharedEmbeddings.__init__  sE    "6"$&6G6Od!2Ufl	
rF   c                   > U R                  SU R                  U R                  /[        U R                  5      S9U l        [        TU ]  U5        g)z
Build shared token embedding layer Shared weights logic adapted from
https://github.com/tensorflow/models/blob/a009f4fb9d2fc4949e32192a944688925ef78659/official/transformer/v2/embedding_layer.py#L24
r?  r  N)r  r  r  r  r  r?  ry   r  )rX   r  r   s     rD   r  TFSharedEmbeddings.build  sJ    
 ooT__d.>.>?_]a]s]sMt & 
 	k"rF   c                   > U R                   U R                  U R                  S.n[        TU ]  5       n[        [        UR                  5       5      [        UR                  5       5      -   5      $ )N)r  r  r  )r  r  r  ry   rv   rk   r=   r   )rX   rh   base_configr   s      rD   rv   TFSharedEmbeddings.get_config	  s[    //++!%!7!7

 g(*D**,-V\\^0DDEErF   c                |    US:X  a  U R                  U5      $ US:X  a  U R                  U5      $ [        SU S35      e)a  
Get token embeddings of inputs or decode final hidden state.

Args:
    inputs (`tf.Tensor`):
        In embedding mode, should be an int64 tensor with shape `[batch_size, length]`.

        In linear mode, should be a float tensor with shape `[batch_size, length, hidden_size]`.
    mode (`str`, defaults to `"embedding"`):
       A valid value is either `"embedding"` or `"linear"`, the first one indicates that the layer should be
       used as an embedding layer, the second one that the layer should be used as a linear decoder.

Returns:
    `tf.Tensor`: In embedding mode, the output is a float32 embedding tensor, with shape `[batch_size, length,
    embedding_size]`.

    In linear mode, the output is a float32 with shape `[batch_size, length, vocab_size]`.

Raises:
    ValueError: if `mode` is not valid.

Shared weights logic is adapted from
[here](https://github.com/tensorflow/models/blob/a009f4fb9d2fc4949e32192a944688925ef78659/official/transformer/v2/embedding_layer.py#L24).
	embeddinglinearzmode z is not valid.)
_embedding_linearrn   )rX   r   rE  s      rD   r  TFSharedEmbeddings.call  sF    2 ;??6**X<<''uTF.9::rF   c                D    [         R                  " U R                  U5      $ )z)Applies embedding based on inputs tensor.)r?   gatherr?  )rX   r   s     rD   r  TFSharedEmbeddings._embedding3  s    yyi00rF   c                    [        U5      SS n[        R                  " USU R                  /5      n[        R                  " X0R
                  SS9n[        R                  " XBU R                  /-   5      $ )z
Computes logits by running inputs through a linear layer.

Args:
    inputs: A float32 tensor with shape [..., hidden_size]

Returns:
    float32 tensor with shape [..., vocab_size].
Nr   T)transpose_b)r   r?   r   r  r  r?  r  )rX   r   
first_dimsr  r   s        rD   r  TFSharedEmbeddings._linear7  s_      ',
JJvD$4$4561kkt<zz&/@"@AArF   )r  r  r  r?  rL   )r  rT   r  rT   r  zOptional[float])r  )r   r  rE  r"  r^   r  )r_   r`   ra   rb   rc   r   r  rv   r  r  r  rd   r  r  s   @rD   r  r    s4    (
 
#F;@1B BrF   r  c                  D   ^  \ rS rSrSrSSU 4S jjjrS	S jrS rSrU =r	$ )
TFSequenceSummaryiH  a  
Compute a single vector summary of a sequence hidden states.

Args:
    config ([`PretrainedConfig`]):
        The config used by the model. Relevant arguments in the config class of the model are (refer to the actual
        config class of your model for the default values it uses):

        - **summary_type** (`str`) -- The method to use to make this summary. Accepted values are:

            - `"last"` -- Take the last token hidden state (like XLNet)
            - `"first"` -- Take the first token hidden state (like Bert)
            - `"mean"` -- Take the mean of all tokens hidden states
            - `"cls_index"` -- Supply a Tensor of classification token position (GPT/GPT-2)
            - `"attn"` -- Not implemented now, use multi-head attention

        - **summary_use_proj** (`bool`) -- Add a projection after the vector extraction.
        - **summary_proj_to_labels** (`bool`) -- If `True`, the projection outputs to `config.num_labels` classes
          (otherwise to `config.hidden_size`).
        - **summary_activation** (`Optional[str]`) -- Set to `"tanh"` to add a tanh activation to the output,
          another string or `None` will add no activation.
        - **summary_first_dropout** (`float`) -- Optional dropout probability before the projection and activation.
        - **summary_last_dropout** (`float`)-- Optional dropout probability after the projection and activation.

    initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation to use to initialize the weights.
    kwargs (`Dict[str, Any]`, *optional*):
        Additional keyword arguments passed along to the `__init__` of `keras.layers.Layer`.
c                
  > [         TU ]  " S0 UD6  [        US5      (       a  UR                  OSU l        U R                  S:X  a  [        e[        US5      =(       a    UR
                  U l        U R                  (       ax  [        US5      (       a.  UR                  (       a  UR                  S:  a  UR                  nOUR                  n[        R                  R                  U[        U5      SS9U l        SU l        [!        US	S 5      nUb  S
U l        [#        U5      U l        [        US5      =(       a    UR&                  S:  U l        U R(                  (       a.  [        R                  R+                  UR&                  5      U l        [        US5      =(       a    UR.                  S:  U l        U R0                  (       a.  [        R                  R+                  UR.                  5      U l        UR                  U l	        g )Nsummary_use_projlastattnsummary_proj_to_labelsr   summary)kernel_initializerr  Fsummary_activationTsummary_first_dropoutsummary_last_dropoutr\   )ry   r   r   summary_typer  r  has_summaryr	  
num_labelsr  r   r  Denser  r
  has_activationr   r   
activationr  has_first_dropoutDropoutfirst_dropoutr  has_last_dropoutlast_dropout)rX   rh   r  rr   num_classesactivation_stringr   s         rD   r   TFSequenceSummary.__init__f  s   "6"3:6CU3V3VF//\b& &%"6+=>Z6CZCZv788V=Z=Z_e_p_pst_t$//$00 <<--@Q0RYb . DL $#F,@$G("&D/0ABDO!(1H!I!nfNjNjmnNn!!!&!5!5f6R6R!SD '0F G kFLgLgjkLk   % 4 4V5P5P QD!--rF   c                   [        U[        [        [        45      (       d  UnOp[        U[        [        45      (       a2  US   n[	        U5      S:  a  US   OS n[	        U5      S::  d   S5       eO#UR                  S5      nUR                  SS 5      nU R                  S:X  a  US S 2S4   nGOU R                  S	:X  a
  US S 2S4   nOU R                  S
:X  a  [        R                  " USS9nOU R                  S:X  a  [        U5      nUc   [        R                  " US S US   S-
  5      n[        U5      n[	        U5      [	        U5      S-
  ::  a  [        R                  " USS9n[        R                  " XB[	        U5      S-
  S9n[        R                  " U[	        U5      S-
  S9nOU R                  S:X  a  [        eU R                  (       a  U R!                  WUS9nU R"                  (       a  U R%                  W5      nU R&                  (       a  U R)                  W5      nU R*                  (       a  U R-                  WUS9nW$ )Nr   r   r7   zToo many inputs.r  	cls_indexr  r   firstmeanr9   )
batch_dimsr  r  )ri   rk   r  r=   rm   r  r  r?   r@   r   r  r   r  squeezer  r  r  r  r
  r  r  r  r  )rX   r   r  r  r  r  hidden_shape	cls_shapes           rD   r  TFSequenceSummary.call  s	   &4"566"M.."1IM%([1_q	$Iv;!#7%77#"JJ7M

;5I&"1b5)F')"1a4(F&(^^M:F+-%m4L GG "%|B'7!';	 #9-I9~\!2Q!66NN92>	
 YY}CDUXYDYZFZZS.2F &(%%!!'''BF\\&)F__V,F  &&v&AFrF   c                    U R                   (       a  g SU l         [        U SS 5      bE  [        R                  " S5         U R                  R                  U R                  5        S S S 5        g g ! , (       d  f       g = f)NTr
  )r  r   r?   r  r
  r  r  r  s     rD   r  TFSequenceSummary.build  sZ    ::
4D)5y)""4#3#34 *) 6))s   &A..
A<)r  r  r  r  r  r  r  r  r  r
  r  r  )rh   r   r  float)NF)
r_   r`   ra   rb   rc   r   r  r  rd   r  r  s   @rD   r  r  H  s#    :!. !.F1f5 5rF   r  c                <    [         R                  R                  U S9$ )a  
Creates a `keras.initializers.TruncatedNormal` with the given range.

Args:
    initializer_range (*float*, defaults to 0.02): Standard deviation of the initializer range.

Returns:
    `keras.initializers.TruncatedNormal`: The truncated normal initializer.
r  )r   r$  r%  )r  s    rD   r  r    s     --5F-GGrF   rL   )r5  r"  )FFN)FNr  )r  r)  r^   z"keras.initializers.TruncatedNormal)rc   
__future__r   r   rN  r   rZ  r/  r6  r  r  collections.abcr   pathlibr   typingr   r   r   r	   r
   r   r   rg  r/  rM   r  r?   packaging.versionr   r  r   r   activations_tfr   configuration_utilsr   dynamic_module_utilsr   
generationr   r   tf_utilsr   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   	utils.hubr/   r0   safetensorsr1   safetensors.tensorflowr2   rY  r3   
get_loggerr_   r  environr  tf_kerasr   r6   rj  ModuleNotFoundErrorImportErrorr  majorrn   	tf_loggerr  r  r"  TFModelInputTyperE   rH   r   r   r   r   r   r   r   r   r   r   r   r&  rB  ra  rM  r  r  r  r~  r  rk  r  r  Layerr  r  r  r  r\   rF   rD   <module>rA     s     "  	   	  	  #  L L L    # : - 1 4 ;      , L %B)			H	%

*(+BJJ$%ZZ%&#-
NN	r

% MMO	ObiibjjIIJJ ;' ',>B5 5:
- 
-'5 '5T' '&' '	#? 	 DB))XjZ 17L\ BJ@:FVt NR4:n6a@H@@%!P]!%%68I> ]!%@C(u||!! (VeB++ eBPz5** z5z
HOk 	[) 	
"U%%)&
 	
 *		
s   9
J8 8:K65K6