
    fTh                    F   S r SSKrSSKrSSKrSSKJr  SSKJrJrJ	r	  SSK
r
SSKr
SSK
Jr  SSKJrJrJr  SSKJr  SS	KJrJrJrJrJrJr  SS
KJr  SSKJrJrJr  SSK J!r!J"r"  SSK#J$r$  \"RJ                  " \&5      r'/ SQr(\ " S S\5      5       r)S r* " S S\RV                  5      r, " S S\RV                  5      r- " S S\RV                  5      r. " S S\RV                  5      r/ " S S\RV                  5      r0 " S S\RV                  5      r1 " S S \RV                  5      r2 " S! S"\RV                  5      r3 " S# S$\RV                  5      r4 " S% S&\RV                  5      r5 " S' S(\RV                  5      r6 " S) S*\RV                  5      r7 " S+ S,\RV                  5      r8 " S- S.\RV                  5      r9\! " S/ S0\5      5       r:\! " S1 S2\:5      5       r;\!" S3S49 " S5 S6\:5      5       r<\! " S7 S8\:5      5       r=\! " S9 S:\:5      5       r>\! " S; S<\:5      5       r?/ S=Qr@g)>zPyTorch CANINE model.    N)	dataclass)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputModelOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )CanineConfig)   +   ;   =   I   a   g   q                           c                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)	CanineModelOutputWithPooling2   a  
Output type of [`CanineModel`]. Based on [`~modeling_outputs.BaseModelOutputWithPooling`], but with slightly
different `hidden_states` and `attentions`, as these also include the hidden states and attentions of the shallow
Transformer encoders.

Args:
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model (i.e. the output of the final
        shallow Transformer encoder).
    pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`):
        Hidden-state of the first token of the sequence (classification token) at the last layer of the deep
        Transformer encoder, further processed by a Linear layer and a Tanh activation function. The Linear layer
        weights are trained from the next sentence prediction (classification) objective during pretraining.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the input to each encoder + one for the output of each layer of each
        encoder) of shape `(batch_size, sequence_length, hidden_size)` and `(batch_size, sequence_length //
        config.downsampling_rate, hidden_size)`. Hidden-states of the model at the output of each layer plus the
        initial input to each Transformer encoder. The hidden states of the shallow encoders have length
        `sequence_length`, but the hidden states of the deep encoder have length `sequence_length` //
        `config.downsampling_rate`.
    attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of the 3 Transformer encoders of shape `(batch_size,
        num_heads, sequence_length, sequence_length)` and `(batch_size, num_heads, sequence_length //
        config.downsampling_rate, sequence_length // config.downsampling_rate)`. Attentions weights after the
        attention softmax, used to compute the weighted average in the self-attention heads.
Nlast_hidden_statepooler_outputhidden_states
attentions )__name__
__module____qualname____firstlineno____doc__r.   r   torchFloatTensor__annotations__r/   r0   r   r1   __static_attributes__r2       b/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/canine/modeling_canine.pyr,   r,   2   sh    6 6:x 1 12915M8E--.58<M8E%"3"345<59Ju00129r<   r,   c           	      (    SSK nSSKnSSKn[        R                  R                  U5      n[        R                  SU 35        UR                  R                  U5      n/ n/ n	U H]  u  p[        R                  SU
 SU 35        UR                  R                  Xj5      nUR                  U
5        U	R                  U5        M_     [        X5       GHY  u  pU
R                  S5      n
[!        S U
 5       5      (       a)  [        R                  S	SR#                  U
5       35        MW  U
S   S
:X  a  SU
S'   O[U
S   S:X  a  U
R%                  U
S   5        O=U
S   S:X  a  SU
S'   O.U
S   S:X  a
  S/U
SS -   n
OU
S   S:X  a  U
S   S;   a	  S/U
SS -   n
U nU
 H  nUR'                  SU5      (       a  SU;  a  UR                  SU5      nOU/nUS   S:X  d	  US   S:X  a  [)        US5      nODUS   S:X  d	  US   S:X  a  [)        US5      nO%US   S:X  a  [)        US5      nO [)        XS   5      n[-        U5      S :  d  M  [/        US   5      nUU   nM     WS!S S":X  a  [)        US5      nOJUS#S [1        S$5       Vs/ s H  nS%U 3PM
     sn;   a  [)        US5      nOUS:X  a  UR3                  U5      nUR4                  UR4                  :w  a&  [7        S&UR4                   S'UR4                   S(35      e[        R                  S)U
 35        [8        R:                  " U5      Ul        GM\     U $ ! [         a    [        R                  S5        e f = f! [*         a,    [        R                  S	SR#                  U
5       35         GM  f = fs  snf )*z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape /c              3   ,   #    U  H
  nUS ;   v   M     g7f))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepclsautoregressive_decoderchar_output_weightsNr2   ).0ns     r=   	<genexpr>,load_tf_weights_in_canine.<locals>.<genexpr>s   s%      
  	
 s   z	Skipping bertencoderr   
embeddingssegment_embeddingstoken_type_embeddingsinitial_char_encoderchars_to_moleculesfinal_char_encoder)	LayerNormconv
projectionz[A-Za-z]+_\d+Embedderz_(\d+)kernelgammaweightoutput_biasbetabiasoutput_weights   i_embeddingsi   	Embedder_zPointer shape z and array shape z mismatchedzInitialize PyTorch weight )renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzipsplitanyjoinremove	fullmatchgetattrAttributeErrorlenintrange	transposeshape
ValueErrorr8   
from_numpydata)modelconfigtf_checkpoint_pathre   nptftf_path	init_varsnamesarraysnamer   arraypointerm_namescope_namesnumis                     r=   load_tf_weights_in_caniner   U   s   
 ggoo01G
KK8	BC''0IEF (l5'BC&&w5Te	 ! 5)zz#  
 
 
 
 KK)CHHTN#3457fDG!W$KKQ !W,,-DG!W..()DI5D!W,,a<Q1Q >DH,DF-v66Jf<T hhy&9%h1~)[^w-F!'84Q=0KNf4L!'62Q#33!'84%g1~>G ;1$+a.)!#,' ( #$<=(gx0GCD\uQx@x!	!ox@@gx0GxLL'E==EKK'~gmm_<Mekk]Zefgg078''.I *J Lo  Q	
 	J & KK)CHHTN+; <= As#   L2 "M?N2!M1NNc                      ^  \ rS rSrSrU 4S jrS\S\4S jrS\S\S\4S jr    SS	\	\
R                     S
\	\
R                     S\	\
R                     S\	\
R                     S\
R                  4
S jjrSrU =r$ )CanineEmbeddings   z<Construct the character, position and token_type embeddings.c           	        > [         TU ]  5         Xl        UR                  UR                  -  n[        UR                  5       H3  nSU 3n[        X[        R                  " UR                  U5      5        M5     [        R                  " UR                  UR                  5      U l
        [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                   5      U l        U R%                  S[&        R(                  " UR*                  5      R-                  S5      SS9  [/        USS5      U l        g )	NHashBucketCodepointEmbedder_epsposition_ids)r   F)
persistentposition_embedding_typeabsolute)super__init__r   hidden_sizenum_hash_functionsr}   setattrr   	Embeddingnum_hash_bucketschar_position_embeddingstype_vocab_sizerQ   rV   layer_norm_epsDropouthidden_dropout_probdropoutregister_bufferr8   arangemax_position_embeddingsexpandry   r   )selfr   shard_embedding_sizer   r   	__class__s        r=   r   CanineEmbeddings.__init__   s,     &11V5N5NNv001A1!5DDV-D-DFZ [\ 2 )+V5L5LfN`N`(a%%'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<= 	ELL)G)GHOOPWXej 	 	
 (/v7PR\']$r<   
num_hashesnum_bucketsc                     U[        [        5      :  a  [        S[        [        5       35      e[        SU n/ nU H  nUS-   U-  U-  nUR                  U5        M!     U$ )aW  
Converts ids to hash bucket ids via multiple hashing.

Args:
    input_ids: The codepoints or other IDs to be hashed.
    num_hashes: The number of hash functions to use.
    num_buckets: The number of hash buckets (i.e. embeddings in each table).

Returns:
    A list of tensors, each of which is the hash bucket IDs from one hash function.
z`num_hashes` must be <= Nr   )r{   _PRIMESr   rr   )r   	input_idsr   r   primesresult_tensorsprimehasheds           r=   _hash_bucket_tensors%CanineEmbeddings._hash_bucket_tensors   sk     G$7G~FGG*%E 1}-<F!!&)  r<   embedding_sizec                     X#-  S:w  a  [        SU SU S35      eU R                  XUS9n/ n[        U5       H,  u  pxSU 3n	[        X	5      " U5      n
UR	                  U
5        M.     [
        R                  " USS9$ )	zDConverts IDs (e.g. codepoints) into embeddings via multiple hashing.r   zExpected `embedding_size` (z) % `num_hashes` (z) == 0)r   r   r   r   dim)r   r   	enumeratery   rr   r8   cat)r   r   r   r   r   hash_bucket_tensorsembedding_shardsr   hash_bucket_idsr   shard_embeddingss              r=   _embed_hash_buckets$CanineEmbeddings._embed_hash_buckets   s    &!+:>:JJ\]g\hhnopp"77	fq7r"+,?"@A1!5D&t2?C##$45 #A
 yy)r22r<   r   token_type_idsr   inputs_embedsreturnc                 `   Ub  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUc8  [        R                  " U[        R                  U R                  R
                  S9nUcO  U R                  XR                  R                  U R                  R                  U R                  R                  5      nU R                  U5      nXG-   nU R                  S:X  a  U R                  U5      n	X-  nU R                  U5      nU R                  U5      nU$ )Nr   r   dtypedevicer   )sizer   r8   zeroslongr   r   r   r   r   r   rQ   r   r   rV   r   )
r   r   r   r   r   input_shape
seq_lengthrQ   rO   position_embeddingss
             r=   forwardCanineEmbeddings.forward   s     #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  44;;22DKK4R4RTXT_T_TpTpM !% : :> J":
'':5"&"?"?"M-J^^J/
\\*-
r<   )rV   r   r   r   r   rQ   )NNNN)r3   r4   r5   r6   r7   r   r|   r   r   r   r8   
LongTensorr9   r   r;   __classcell__r   s   @r=   r   r      s    F^0# C .3S 3c 3`c 3  15593759"E,,-" !!1!12" u//0	"
   1 12" 
		" "r<   r   c                   f   ^  \ rS rSrSrU 4S jrS\R                  S\R                  4S jrSr	U =r
$ )CharactersToMoleculesi  zeConvert character sequence to initial molecule sequence (i.e. downsample) using strided convolutions.c                 6  > [         TU ]  5         [        R                  " UR                  UR                  UR
                  UR
                  S9U l        [        UR                     U l	        [        R                  " UR                  UR                  S9U l
        g )Nin_channelsout_channelskernel_sizestrider   )r   r   r   Conv1dr   downsampling_raterW   r   
hidden_act
activationrV   r   r   r   r   s     r=   r   CharactersToMolecules.__init__  sv    II**++00++	
	 !!2!23 f&8&8f>S>STr<   char_encodingr   c                 0   US S 2SS2S S 24   n[         R                  " USS5      nU R                  U5      n[         R                  " USS5      nU R                  U5      nUS S 2SS2S S 24   n[         R                  " X$/SS9nU R                  U5      nU$ )Nr   r   ra   r   r   )r8   r~   rW   r   r   rV   )r   r   cls_encodingdownsampleddownsampled_truncatedresults         r=   r   CharactersToMolecules.forward-  s    $Q!QY/ q!<ii.ook1a8ook2 !,AqtQJ 7 L@aH'r<   )rV   r   rW   )r3   r4   r5   r6   r7   r   r8   Tensorr   r;   r   r   s   @r=   r   r     s,    oUU\\ ell  r<   r   c                      ^  \ rS rSrSrU 4S jr S	S\R                  S\\R                     S\R                  4S jjr	Sr
U =r$ )
ConvProjectioniI  z
Project representations from hidden_size*2 back to hidden_size across a window of w = config.upsampling_kernel_size
characters.
c                 ~  > [         TU ]  5         Xl        [        R                  " UR
                  S-  UR
                  UR                  SS9U l        [        UR                     U l
        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )Nra   r   r   r   )r   r   r   r   r   r   upsampling_kernel_sizerW   r   r   r   rV   r   r   r   r   r   s     r=   r   ConvProjection.__init__O  s    II**Q.++55	
	 !!2!23 f&8&8f>S>STzz&"<"<=r<   inputsfinal_seq_char_positionsr   c                    [         R                  " USS5      nU R                  R                  S-
  nUS-  nX4-
  n[        R
                  " XE4S5      nU R                  U" U5      5      n[         R                  " USS5      nU R                  U5      nU R                  U5      nU R                  U5      nUnUb  [        S5      eUn	U	$ )Nr   ra   r   z,CanineForMaskedLM is currently not supported)r8   r~   r   r   r   ConstantPad1drW   r   rV   r   NotImplementedError)
r   r   r   	pad_totalpad_begpad_endpadr   final_char_seq	query_seqs
             r=   r   ConvProjection.forward^  s     A.
 KK66:	q.%1153v;'A.('f%#/
 &&TUU&Ir<   )rV   r   r   rW   r   N)r3   r4   r5   r6   r7   r   r8   r   r   r   r;   r   r   s   @r=   r   r   I  sJ    
>$ <@"" #+5<<"8" 
	" "r<   r   c                      ^  \ rS rSrU 4S jrS r   SS\R                  S\R                  S\\R                     S\\R                     S\\
   S	\\R                  \\R                     4   4S
 jjrSrU =r$ )CanineSelfAttentioni  c                   > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  5      U l        [#        USS5      U l        U R$                  S:X  d  U R$                  S	:X  aH  UR&                  U l        [        R(                  " S
UR&                  -  S-
  U R                  5      U l        g g )Nr   r   zThe hidden size (z6) is not a multiple of the number of attention heads ()r   r   relative_keyrelative_key_queryra   r   )r   r   r   num_attention_headshasattrr   r|   attention_head_sizeall_head_sizer   Linearquerykeyvaluer   attention_probs_dropout_probr   ry   r   r   r   distance_embeddingr   s     r=   r   CanineSelfAttention.__init__  s    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'.v7PR\']$''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD# >rr<   c                     UR                  5       S S U R                  U R                  4-   nUR                  " U6 nUR	                  SSSS5      $ )Nr   r   ra   r   r   )r   r  r  viewpermute)r   xnew_x_shapes      r=   transpose_for_scores(CanineSelfAttention.transpose_for_scores  sL    ffhsmt'?'?AYAY&ZZFFK yyAq!$$r<   from_tensor	to_tensorattention_mask	head_maskoutput_attentionsr   c                 P   U R                  U5      nU R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U5      n	[        R
                  " XR                  SS5      5      n
U R                  S:X  d  U R                  S:X  Ga?  UR                  5       S   n[        R                  " U[        R                  UR                  S9R                  SS5      n[        R                  " U[        R                  UR                  S9R                  SS5      nX-
  nU R                  XR                  -   S-
  5      nUR                  U	R                   S9nU R                  S:X  a  [        R"                  " SX5      nU
U-   n
OFU R                  S:X  a6  [        R"                  " SX5      n[        R"                  " S	X5      nU
U-   U-   n
U
[$        R&                  " U R(                  5      -  n
Ubg  UR*                  S
:X  aS  [        R,                  " USS9nSUR/                  5       -
  [        R0                  " U
R                   5      R2                  -  nX-   n
[4        R6                  R9                  U
SS9nU R;                  U5      nUb  UU-  n[        R
                  " UU5      nUR=                  SSSS
5      R?                  5       nUR                  5       S S U R@                  4-   nUR                  " U6 nU(       a  UU4nU$ U4nU$ )Nr   rT   r
  r  r   r   )r   zbhld,lrd->bhlrzbhrd,lrd->bhlrr   r         ?r   ra   )!r  r  r  r  r8   matmulr~   r   r   r   r   r   r  r  r   tor   einsummathsqrtr  ndim	unsqueezefloatfinfominr   
functionalsoftmaxr   r  
contiguousr  )r   r  r  r   r!  r"  mixed_query_layer	key_layervalue_layerquery_layerattention_scoresr   position_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                          r=   r   CanineSelfAttention.forward  s    !JJ{3 --dhhy.AB	//

90EF//0AB !<<5H5HR5PQ''>9T=Y=Y]q=q$))+A.J"\\*EJJ{OaOabgghjlmnN"\\*EJJ{OaOabgghikmnN%6H#'#:#:8FbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H++l(#36N#N --1EE16>NP[1r./4||<Li/n,#36T#TWs#s +dii8P8P.QQ%""a'!&Q!G #&(<(<(>">%++N^NdNdBeBiBi!i/@ --//0@b/I ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**,CD6G=/2 O\M]r<   )
r  r  r  r   r  r   r  r   r  r  NNF)r3   r4   r5   r6   r   r  r8   r   r   r9   boolr   r   r;   r   r   s   @r=   r  r    s    u,% 7;15,1E\\E <<E !!2!23	E
 E--.E $D>E 
u||Xell33	4E Er<   r  c                      ^  \ rS rSrU 4S jrS\\R                     S\R                  S\\R                  \R                  4   4S jrSr	U =r
$ )CanineSelfOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr   )r   r   r   r  r   denserV   r   r   r   r   r   s     r=   r   CanineSelfOutput.__init__  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r<   r0   input_tensorr   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r  rI  r   rV   r   r0   rK  s      r=   r   CanineSelfOutput.forward  s7     

=1]3}'CDr<   rV   rI  r   r3   r4   r5   r6   r   r   r8   r9   r   r;   r   r   s   @r=   rF  rF    sQ    >"5#4#45EJEVEV	u  %"3"33	4 r<   rF  c                     ^  \ rS rSrSr       SS\S\S\S\S\S\4U 4S	 jjjrS
 r   SS\	\
R                     S\\
R                     S\\
R                     S\\   S\	\
R                  \\
R                     4   4
S jjrSrU =r$ )CanineAttentioni  aB  
Additional arguments related to local attention:

    - **local** (`bool`, *optional*, defaults to `False`) -- Whether to apply local attention.
    - **always_attend_to_first_position** (`bool`, *optional*, defaults to `False`) -- Should all blocks be able to
      attend
    to the `to_tensor`'s first position (e.g. a [CLS] position)? - **first_position_attends_to_all** (`bool`,
    *optional*, defaults to `False`) -- Should the *from_tensor*'s first position be able to attend to all
    positions within the *from_tensor*? - **attend_from_chunk_width** (`int`, *optional*, defaults to 128) -- The
    width of each block-wise chunk in `from_tensor`. - **attend_from_chunk_stride** (`int`, *optional*, defaults to
    128) -- The number of elements to skip when moving to the next block in `from_tensor`. -
    **attend_to_chunk_width** (`int`, *optional*, defaults to 128) -- The width of each block-wise chunk in
    *to_tensor*. - **attend_to_chunk_stride** (`int`, *optional*, defaults to 128) -- The number of elements to
    skip when moving to the next block in `to_tensor`.
always_attend_to_first_positionfirst_position_attends_to_allattend_from_chunk_widthattend_from_chunk_strideattend_to_chunk_widthattend_to_chunk_stridec	                   > [         T	U ]  5         [        U5      U l        [	        U5      U l        [        5       U l        X l        XV:  a  [        S5      eXx:  a  [        S5      eX0l
        X@l        XPl        X`l        Xpl        Xl        g )Nze`attend_from_chunk_width` < `attend_from_chunk_stride` would cause sequence positions to get skipped.z``attend_to_chunk_width` < `attend_to_chunk_stride`would cause sequence positions to get skipped.)r   r   r  r   rF  outputsetpruned_headslocalr   rT  rU  rV  rW  rX  rY  
r   r   r^  rT  rU  rV  rW  rX  rY  r   s
            r=   r   CanineAttention.__init__  s     	'/	&v.E 
"=w  !9r  0O,-J*'>$(@%%:"&<#r<   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r   )r{   r   r   r  r  r]  r   r  r  r  r[  rI  r  union)r   headsindexs      r=   prune_headsCanineAttention.prune_heads)  s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r<   r0   r   r!  r"  r   c                    U R                   (       d  U R                  XX#U5      nUS   nGOUR                  S   =pxU=p/ nU R                  (       a  UR	                  S5        SnOSn[        XU R                  5       H-  n[        X}U R                  -   5      nUR	                  X45        M/     / nU R                  (       a  UR	                  SU45        [        SXR                  5       H-  n[        XU R                  -   5      nUR	                  X45        M/     [        U5      [        U5      :w  a  [        SU SU S35      e/ n/ n[        X5       H  u  u  nnu  nnU	S S 2UU2S S 24   nU
S S 2UU2S S 24   nUS S 2UU2UU24   nU R                  (       aJ  US S 2UU2SS24   n[        R                   " UU/SS9nU
S S 2SS2S S 24   n[        R                   " UU/SS9nU R                  UUUX45      nUR	                  US   5        U(       d  M  UR	                  US   5        M     [        R                   " USS9nU R#                  Xa5      nU4nU R                   (       d
  UWSS  -   nU$ U[%        W5      -   nU$ )	Nr   r   )r   r   z/Expected to have same number of `from_chunks` (z) and `to_chunks` (z). Check strides.ra   r   )r^  r   r   rU  rr   r}   rW  r.  rV  rY  rX  r{   r   rs   rT  r8   r   r[  tuple)r   r0   r   r!  r"  self_outputsattention_outputfrom_seq_lengthto_seq_lengthr  r  from_chunks
from_startchunk_start	chunk_end	to_chunksattention_output_chunksattention_probs_chunksfrom_endto_startto_endfrom_tensor_chunkto_tensor_chunkattention_mask_chunkcls_attention_maskcls_positionattention_outputs_chunkrA  s                               r=   r   CanineAttention.forward;  s    zz99]>^opL+A.;.A.A!.DDO&33K K11""6* 

$Z$B_B_`t?[?[1[\	""K#;<  a
 I11  !]!34$Q7R7RST=W=W/WX	  +!9:  T ;3y>1 Ek] S$$/=0AC  ')#%'">A+>Y:&X(:6$/:h3F0I$J!"+Ax,A"B (6aH9LhW]o6]'^$77)7:h;NPQRSPS8S)T&+0996HJ^5_ef+g(#,Q!QY#7L&+ii0OUV&WO*.))%8Li+' (../Fq/IJ$$*112I!2LM% ?Z(  %yy)@aH;;'7G#%zzQR 00G  &< ==Gr<   )
rT  rW  rV  rY  rX  rU  r^  r[  r]  r   FFF   r  r  r  rC  )r3   r4   r5   r6   r7   rD  r|   r   re  r   r8   r9   r   r   r;   r   r   s   @r=   rS  rS    s    & 05.3'*(+%(&)= *.	=
 (,= "%= #&=  #= !$= =B;* 7;15,1HU../H !!2!23H E--.	H
 $D>H 
u  (5+<+<"==	>H Hr<   rS  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )CanineIntermediatei  c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r  )r   r   r   r  r   intermediate_sizerI  
isinstancer   strr   intermediate_act_fnr   s     r=   r   CanineIntermediate.__init__  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r<   r0   r   c                 J    U R                  U5      nU R                  U5      nU$ r  rI  r  r   r0   s     r=   r   CanineIntermediate.forward  s&    

=100?r<   r  )
r3   r4   r5   r6   r   r8   r9   r   r;   r   r   s   @r=   r  r    s,    9U%6%6 5;L;L  r<   r  c                      ^  \ rS rSrU 4S jrS\\R                     S\R                  S\R                  4S jrSr	U =r
$ )CanineOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g rH  )r   r   r   r  r  r   rI  rV   r   r   r   r   r   s     r=   r   CanineOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r<   r0   rK  r   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r  rM  rN  s      r=   r   CanineOutput.forward  s5    

=1]3}'CDr<   rP  rQ  r   s   @r=   r  r    s?    >U5+<+<%= UM^M^ chctct  r<   r  c                      ^  \ rS rSrU 4S jr   SS\\R                     S\\R                     S\\R                     S\\	   S\\R                  \\R                     4   4
S jjr
S	 rS
rU =r$ )CanineLayeri  c	           
         > [         T	U ]  5         UR                  U l        SU l        [	        UUUUUUUU5      U l        [        U5      U l        [        U5      U l	        g Nr   )
r   r   chunk_size_feed_forwardseq_len_dimrS  	attentionr  intermediater  r[  r_  s
            r=   r   CanineLayer.__init__  se     	'-'E'E$(+)#$!"	
 /v6"6*r<   r0   r   r!  r"  r   c                     U R                  UUUUS9nUS   nUSS  n[        U R                  U R                  U R                  U5      nU4U-   nU$ )N)r"  r   r   )r  r   feed_forward_chunkr  r  )	r   r0   r   r!  r"  self_attention_outputsrj  rA  layer_outputs	            r=   r   CanineLayer.forward  sv     "&/	 "0 "
 2!4(,0##T%A%A4CSCSUe
  /G+r<   c                 J    U R                  U5      nU R                  X!5      nU$ r  )r  r[  )r   rj  intermediate_outputr  s       r=   r  CanineLayer.feed_forward_chunk  s)    "//0@A{{#6Ir<   )r  r  r  r[  r  rC  )r3   r4   r5   r6   r   r   r8   r9   r   rD  r   r  r;   r   r   s   @r=   r  r    s    +< 7;15,1U../ !!2!23 E--.	
 $D> 
u  (5+<+<"==	>0 r<   r  c                      ^  \ rS rSr       SU 4S jjr     SS\\R                     S\\R                     S\\R                     S\\	   S\\	   S\\	   S	\
\\4   4S
 jjrSrU =r$ )CanineEncoderi  c	                    > [         T
U ]  5         Xl        [        R                  " [        UR                  5       V	s/ s H  n	[        UUUUUUUU5      PM     sn	5      U l        SU l	        g s  sn	f )NF)
r   r   r   r   
ModuleListr}   num_hidden_layersr  layergradient_checkpointing)r   r   r^  rT  rU  rV  rW  rX  rY  _r   s             r=   r   CanineEncoder.__init__  s}     	]] v778 9A 31+,)*	 9

 ',#s   A-r0   r   r!  r"  output_hidden_statesreturn_dictr   c                    U(       a  SOS nU(       a  SOS n[        U R                  5       H{  u  pU(       a  Xq4-   nUb  X9   OS nU R                  (       a1  U R                  (       a   U R	                  U
R
                  UUUU5      nO	U
" XX5      nUS   nU(       d  Ms  XS   4-   nM}     U(       a  Xq4-   nU(       d  [        S XU4 5       5      $ [        UUUS9$ )Nr2   r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr  r2   rI   vs     r=   rK   (CanineEncoder.forward.<locals>.<genexpr>!  s     m$[q$[   	)r.   r0   r1   )r   r  r  training_gradient_checkpointing_func__call__rh  r   )r   r0   r   r!  r"  r  r  all_hidden_statesall_self_attentionsr   layer_modulelayer_head_masklayer_outputss                r=   r   CanineEncoder.forward  s     #7BD$5b4(4OA#$58H$H!.7.CilO**t}} $ A A ))!"#%! !-]O o)!,M  &91=M<O&O#'  5*   14D Dm]GZ$[mmm++*
 	
r<   )r   r  r  r~  )NNFFT)r3   r4   r5   r6   r   r   r8   r9   r   rD  r   r   r   r;   r   r   s   @r=   r  r    s     (-&+ #!$!",B 7;15,1/4&**
U../*
 !!2!23*
 E--.	*

 $D>*
 'tn*
 d^*
 
uo%	&*
 *
r<   r  c                   h   ^  \ rS rSrU 4S jrS\\R                     S\R                  4S jrSr	U =r
$ )CaninePooleri)  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r  )r   r   r   r  r   rI  Tanhr   r   s     r=   r   CaninePooler.__init__*  s9    YYv1163E3EF
'')r<   r0   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )rI  r   )r   r0   first_token_tensorpooled_outputs       r=   r   CaninePooler.forward/  s6     +1a40

#566r<   )r   rI  rQ  r   s   @r=   r  r  )  s1    $
U5+<+<%= %BSBS  r<   r  c                   h   ^  \ rS rSrU 4S jrS\\R                     S\R                  4S jrSr	U =r
$ )CaninePredictionHeadTransformi8  c                 p  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  S9U l        g rH  )r   r   r   r  r   rI  r  r   r  r   transform_act_fnrV   r   r   s     r=   r   &CaninePredictionHeadTransform.__init__9  s~    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>STr<   r0   r   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r  )rI  r  rV   r  s     r=   r   %CaninePredictionHeadTransform.forwardB  s4    

=1--m<}5r<   )rV   rI  r  rQ  r   s   @r=   r  r  8  s2    UU5+<+<%= %BSBS  r<   r  c                   h   ^  \ rS rSrU 4S jrS\\R                     S\R                  4S jrSr	U =r
$ )CanineLMPredictionHeadiI  c                 H  > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  SS9U l        [        R                  " [        R                  " UR                  5      5      U l        U R                  U R                  l        g )NF)r_   )r   r   r  	transformr   r  r   
vocab_sizedecoder	Parameterr8   r   r_   r   s     r=   r   CanineLMPredictionHead.__init__J  sm    6v> yy!3!3V5F5FUSLLV->->!?@	 !IIr<   r0   r   c                 J    U R                  U5      nU R                  U5      nU$ r  )r  r  r  s     r=   r   CanineLMPredictionHead.forwardW  s$    }5]3r<   )r_   r  r  rQ  r   s   @r=   r  r  I  s1    &U5+<+<%= %BSBS  r<   r  c                   n   ^  \ rS rSrU 4S jrS\\R                     S\\R                     4S jrSr	U =r
$ )CanineOnlyMLMHeadi]  c                 B   > [         TU ]  5         [        U5      U l        g r  )r   r   r  predictionsr   s     r=   r   CanineOnlyMLMHead.__init__^  s    1&9r<   sequence_outputr   c                 (    U R                  U5      nU$ r  r  )r   r  prediction_scoress      r=   r   CanineOnlyMLMHead.forwardb  s     !,,_=  r<   r  )r3   r4   r5   r6   r   r   r8   r   r   r;   r   r   s   @r=   r  r  ]  s6    :!u||,! 
u||	! !r<   r  c                   *    \ rS rSr\r\rSrSr	S r
Srg)CaninePreTrainedModelij  canineTc                 8   [        U[        R                  [        R                  45      (       ak  UR                  R
                  R                  SU R                  R                  S9  UR                  b%  UR                  R
                  R                  5         gg[        U[        R                  5      (       ax  UR                  R
                  R                  SU R                  R                  S9  UR                  b2  UR                  R
                  UR                     R                  5         gg[        U[        R                  5      (       aJ  UR                  R
                  R                  5         UR                  R
                  R                  S5        gg)zInitialize the weightsg        )meanstdNr$  )r  r   r  r   r\   r   normal_r   initializer_ranger_   zero_r   padding_idxrV   fill_)r   modules     r=   _init_weights#CaninePreTrainedModel._init_weightsq  s   fryy"))455 MM&&CT[[5R5R&S{{&  &&( '--MM&&CT[[5R5R&S!!-""6#5#56<<> .--KK""$MM$$S) .r<   r2   N)r3   r4   r5   r6   r   config_classr   load_tf_weightsbase_model_prefixsupports_gradient_checkpointingr  r;   r2   r<   r=   r  r  j  s    L/O &*#*r<   r  c                     ^  \ rS rSrSU 4S jjrS rS rS\R                  S\	4S jr
S\R                  S	\	S
\R                  4S jr\         SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\   S\\   S\\   S
\\\4   4S jj5       rSrU =r$ )CanineModeli  c                   > [         TU ]  U5        Xl        [        R                  " U5      nSUl        [        U5      U l        [        USSSUR                  UR                  UR                  UR                  S9U l
        [        U5      U l        [        U5      U l        [        U5      U l        [        U5      U l        U(       a  [#        U5      OSU l        U R'                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
r   TF)r^  rT  rU  rV  rW  rX  rY  N)r   r   r   copydeepcopyr  r   char_embeddingsr  local_transformer_striderR   r   rS   rN   r   rX   rU   r  pooler	post_init)r   r   add_pooling_layershallow_configr   s       r=   r   CanineModel.__init__  s    
 	 v.+,(/7$1,1*/$*$C$C%+%D%D"("A"A#)#B#B	%
! #8"?$V,(0"/"?.?l6*T 	r<   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsrN   r  r  re  )r   heads_to_pruner  rc  s       r=   _prune_headsCanineModel._prune_heads  s<    
 +002LELLu%//;;EB 3r<   c                    UR                   S   UR                   S   pCUR                   S   n[        R                  " X#SU45      R                  5       n[        R                  " X4S4[        R
                  UR                  S9nXb-  nU$ )a  
Create 3D attention mask from a 2D tensor mask.

Args:
    from_tensor: 2D or 3D Tensor of shape [batch_size, from_seq_length, ...].
    to_mask: int32 Tensor of shape [batch_size, to_seq_length].

Returns:
    float Tensor of shape [batch_size, from_seq_length, to_seq_length].
r   r   )r   r   r   )r   r8   reshaper,  onesfloat32r   )r   r  to_mask
batch_sizerk  rl  broadcast_onesmasks           r=   )_create_3d_attention_mask_from_input_mask5CanineModel._create_3d_attention_mask_from_input_mask  s     '2&7&7&:K<M<Ma<POa(--a)GHNNP
 *q)IQVQ^Q^gnguguv 'r<   char_attention_maskr   c                     UR                   u  p4[        R                  " XSU45      n[        R                  R	                  X"S9" UR                  5       5      n[        R                  " USS9nU$ )z[Downsample 2D character attention mask to 2D molecule attention mask using MaxPool1d layer.r   )r   r   r   r   )r   r8   r  r   	MaxPool1dr,  squeeze)r   r  r   r	  char_seq_lenpoolable_char_maskpooled_molecule_maskmolecule_attention_masks           r=   _downsample_attention_mask&CanineModel._downsample_attention_mask  sp     $7#<#< 
"]]+>QP\@]^  %xx11>O1j$$& 

 #(--0D""M&&r<   	moleculeschar_seq_lengthr   c                     U R                   R                  nUSS2SS2SS24   n[        R                  " XCSS9nUSS2SS2SS24   nX#-  n[        R                  " UXs-   SS9n[        R                  " XX/SS9$ )zDRepeats molecules to make them the same length as the char sequence.Nr   rT   )repeatsr   r   r   )r   r   r8   repeat_interleaver   )	r   r  r  ratemolecules_without_extra_clsrepeatedlast_moleculeremainder_lengthremainder_repeateds	            r=   _repeat_moleculesCanineModel._repeat_molecules  s     {{,,&/12q&9#**+FZ\] "!RS!),*1"44$+	
 yy(7R@@r<   r   r   r   r   r!  r   r"  r  r  c
           	      \   Ub  UOU R                   R                  nUb  UOU R                   R                  nU(       a  SOS n
U(       a  SOS nU	b  U	OU R                   R                  n	Ub  Ub  [	        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       S S nO[	        S5      eUu  pUb  UR                  OUR                  nUc  [        R                  " X4US9nUc$  [        R                  " U[        R                  US9nU R                  X,5      nU R                  X R                   R                  S9nU R                  UUUR                  S   45      nU R!                  XPR                   R"                  5      nU R%                  UUUUS9nU R'                  Ub  UOUU5      nU R)                  UUUUS	9nUR*                  nU R-                  U5      nU R/                  UUUUUU	S
9nUS   nU R0                  b  U R1                  U5      OS nU R3                  UUS   S9n[        R4                  " UU/SS9nU R7                  U5      nU R9                  UUUUS	9nUR*                  nU(       a7  U	(       a  UR:                  OUS   nU
UR:                  -   U-   UR:                  -   n
U(       a7  U	(       a  UR<                  OUS   n UUR<                  -   U -   UR<                  -   nU	(       d  UU4n!U![?        S X4 5       5      -  n!U!$ [A        UUU
US9$ )Nr2   zDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embeds)r   r   )r   )r   r   r   r   )r   r"  r  )r   r!  r"  r  r  r   )r  r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr  r2   r  s     r=   rK   &CanineModel.forward.<locals>.<genexpr>  s     a'O!AA'Or  )r.   r/   r0   r1   )!r   r"  r  use_return_dictr   %warn_if_padding_and_no_attention_maskr   r   r8   r  r   r   get_extended_attention_maskr  r   r   get_head_maskr  r  r  rR   r.   rS   rN   r  r#  r   rX   rU   r0   r1   rh  r,   )"r   r   r   r   r   r!  r   r"  r  r  r  r  r   r	  r   r   extended_attention_maskr   extended_molecule_attention_maskinput_char_embeddingsr  init_chars_encoder_outputsinput_char_encodinginit_molecule_encodingencoder_outputsmolecule_sequence_outputr  repeated_moleculesconcatr  final_chars_encoder_outputsdeep_encoder_hidden_statesdeep_encoder_self_attentionsr[  s"                                     r=   r   CanineModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 #7BD$5b4%0%<k$++B]B] ]%>cdd"66yQ#..*K&',,.s3KTUU!,
%.%:!!@T@T!"ZZ*)A6RN!"[[EJJvVN 150P0PQ_0m"&"A"Akk.K.K #B #
 :>9Y9Y#j2I2O2OPR2S%T:
( &&y++2O2OP	 !% 4 4%)'	 !5 !
 #LL".IM>
 &*%>%>!./!5	 &? &
" 9JJ  "&!8!89L!M ,,";/!5# ' 
 $31#5 AEAX$<=^b "334L^ijl^m3n /1CD"M //&1 '+&=&=2/!5	 '> '
# 6GGJU)F)F[jkl[m&!,::;,- .;;<  IT?+E+EZijlZm(#,778./ .889   %}5Fea(9'OaaaFM+-'+*	
 	
r<   )r  rS   r   rN   rU   rR   r  rX   )T)	NNNNNNNNN)r3   r4   r5   r6   r   r  r  r8   r   r|   r  r#  r   r   r   r9   rD  r   r   r,   r   r;   r   r   s   @r=   r  r    sM    DC6'ell '_b '"A5<< A# ARWR^R^ A2  156:59371559,0/3&*\
E,,-\
 !!2!23\
 !!1!12	\

 u//0\
 E--.\
   1 12\
 $D>\
 'tn\
 d^\
 
u22	3\
 \
r<   r  z
    CANINE Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )custom_introc                   R  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )CanineForSequenceClassificationi  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r  r   r   
num_labelsr  r  r   r   r   r   r  r   
classifierr  r   s     r=   r   (CanineForSequenceClassification.__init__  i      ++!&)zz&"<"<=))F$6$68I8IJ 	r<   r   r   r   r   r!  r   labelsr"  r  r  r   c                 R   U
b  U
OU R                   R                  n
U R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nU R	                  U5      nSnUGb  U R                   R
                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R
                  S:X  aI  [        5       nU R                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R
                  S:X  a=  [        5       nU" UR                  SU R                  5      UR                  S5      5      nO,U R                   R
                  S:X  a  [        5       nU" X5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [!        UUUR"                  UR$                  S	9$ )
ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr   r   r   r!  r   r"  r  r  r   
regressionsingle_label_classificationmulti_label_classificationr   ra   losslogitsr0   r1   )r   r(  r  r   r@  problem_typer?  r   r8   r   r|   r
   r  r	   r  r   r   r0   r1   )r   r   r   r   r   r!  r   rC  r"  r  r  rA  r  rK  rJ  loss_fctr[  s                    r=   r   'CanineForSequenceClassification.forward  s   ( &1%<k$++B]B]++))%'/!5#  

  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r<   r  r@  r   r?  
NNNNNNNNNN)r3   r4   r5   r6   r   r   r   r8   r   r9   rD  r   r   r   r   r;   r   r   s   @r=   r<  r<    s   	  156:59371559-1,0/3&*E
E,,-E
 !!2!23E
 !!1!12	E

 u//0E
 E--.E
   1 12E
 ))*E
 $D>E
 'tnE
 d^E
 
u..	/E
 E
r<   r<  c                   R  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )CanineForMultipleChoicei  c                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  5      U l        [        R                  " UR                  S5      U l
        U R                  5         g r  )r   r   r  r  r   r   r   r   r  r   r@  r  r   s     r=   r    CanineForMultipleChoice.__init__  sV     !&)zz&"<"<=))F$6$6: 	r<   r   r   r   r   r!  r   rC  r"  r  r  r   c                 Z   U
b  U
OU R                   R                  n
Ub  UR                  S   OUR                  S   nUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb1  UR                  SUR	                  S5      UR	                  S5      5      OSnU R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nU R                  U5      nUR                  SU5      nSnUb  [        5       nU" X5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
    model's internal embedding lookup matrix.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr   r   rT   rE  ra   rI  )r   r(  r   r  r   r  r   r@  r	   r   r0   r1   )r   r   r   r   r   r!  r   rC  r"  r  r  num_choicesrA  r  rK  reshaped_logitsrJ  rM  r[  s                      r=   r   CanineForMultipleChoice.forward  s   X &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 ++))%'/!5#  

  
]3/ ++b+6')HO4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r<   )r  r@  r   rP  )r3   r4   r5   r6   r   r   r   r8   r   r9   rD  r   r   r   r   r;   r   r   s   @r=   rR  rR    s     156:59371559-1,0/3&*X
E,,-X
 !!2!23X
 !!1!12	X

 u//0X
 E--.X
   1 12X
 ))*X
 $D>X
 'tnX
 d^X
 
u//	0X
 X
r<   rR  c                   R  ^  \ rS rSrU 4S jr\          SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )CanineForTokenClassificationiV  c                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r  r>  r   s     r=   r   %CanineForTokenClassification.__init__X  rB  r<   r   r   r   r   r!  r   rC  r"  r  r  r   c                    U
b  U
OU R                   R                  n
U R                  UUUUUUUU	U
S9	nUS   nU R                  U5      nU R	                  U5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      nU
(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

Example:

```python
>>> from transformers import AutoTokenizer, CanineForTokenClassification
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("google/canine-s")
>>> model = CanineForTokenClassification.from_pretrained("google/canine-s")

>>> inputs = tokenizer(
...     "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt"
... )

>>> with torch.no_grad():
...     logits = model(**inputs).logits

>>> predicted_token_class_ids = logits.argmax(-1)

>>> # Note that tokens are classified rather then input words which means that
>>> # there might be more predicted token classes than words.
>>> # Multiple token classes might account for the same word
>>> predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]
>>> predicted_tokens_classes  # doctest: +SKIP
```

```python
>>> labels = predicted_token_class_ids
>>> loss = model(**inputs, labels=labels).loss
>>> round(loss.item(), 2)  # doctest: +SKIP
```NrE  r   r   ra   rI  )r   r(  r  r   r@  r	   r  r?  r   r0   r1   )r   r   r   r   r   r!  r   rC  r"  r  r  rA  r  rK  rJ  rM  r[  s                    r=   r   $CanineForTokenClassification.forwardc  s    ` &1%<k$++B]B]++))%'/!5#  

 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r<   rO  rP  )r3   r4   r5   r6   r   r   r   r8   r   r9   rD  r   r   r   r   r;   r   r   s   @r=   rZ  rZ  V  s   	  156:59371559-1,0/3&*P
E,,-P
 !!2!23P
 !!1!12	P

 u//0P
 E--.P
   1 12P
 ))*P
 $D>P
 'tnP
 d^P
 
u++	,P
 P
r<   rZ  c                   r  ^  \ rS rSrU 4S jr\           SS\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S\\R                     S	\\R                     S
\\R                     S\\
   S\\
   S\\
   S\\\4   4S jj5       rSrU =r$ )CanineForQuestionAnsweringi  c                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r  )
r   r   r?  r  r  r   r  r   
qa_outputsr  r   s     r=   r   #CanineForQuestionAnswering.__init__  sS      ++!&)))F$6$68I8IJ 	r<   r   r   r   r   r!  r   start_positionsend_positionsr"  r  r  r   c                    Ub  UOU R                   R                  nU R                  UUUUUUU	U
US9	nUS   nU R                  U5      nUR	                  SSS9u  nnUR                  S5      nUR                  S5      nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5        UR                  SU5        [        US9nU" X5      nU" UU5      nUU-   S-  nU(       d  UU4USS  -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S9$ )	NrE  r   r   r   r   )ignore_indexra   )rJ  start_logits
end_logitsr0   r1   )r   r(  r  rb  rt   r  r{   r   clamp_r	   r   r0   r1   )r   r   r   r   r   r!  r   rd  re  r"  r  r  rA  r  rK  rh  ri  
total_lossignored_indexrM  
start_lossend_lossr[  s                          r=   r   "CanineForQuestionAnswering.forward  s    &1%<k$++B]B]++))%'/!5#  

 "!*1#)<<r<#: j#++B/''+

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M""1m4  M2']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r<   )r  r?  rb  )NNNNNNNNNNN)r3   r4   r5   r6   r   r   r   r8   r   r9   rD  r   r   r   r   r;   r   r   s   @r=   r`  r`    s$     156:593715596:48,0/3&*>
E,,->
 !!2!23>
 !!1!12	>

 u//0>
 E--.>
   1 12>
 "%"2"23>
   0 01>
 $D>>
 'tn>
 d^>
 
u22	3>
 >
r<   r`  )rR  r`  r<  rZ  r  r  r  r   )Ar7   r  r(  rk   dataclassesr   typingr   r   r   r8   torch.utils.checkpointr   torch.nnr   r	   r
   activationsr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   configuration_caniner   
get_loggerr3   ri   r   r,   r   Moduler   r   r   r  rF  rS  r  r  r  r  r  r  r  r  r  r  r<  rR  rZ  r`  __all__r2   r<   r=   <module>r}     sA      	 ! ) )    A A !  . l l , . 
		H	% U :; : :D^Bbryy bJ+BII +\7RYY 7ta")) aHryy  Lbii L^ 299 7")) 7tI
BII I
X299 BII "RYY (
!		 
! *O * *. M
' M
 M
` R
&; R
R
j d
3 d
 d
N ]
#8 ]
 ]
@ J
!6 J
 J
Z	r<   