ó
    fT–h¡ù  ã                   ó²  • S r SSKrSSKrSSKrSSKJr  SSKJrJrJ	r	J
r
  SSKrSSKJr  SSKJrJr  SSKJrJr  SS	KJr  SS
KJrJrJr  SSKJr  \R6                  " \5      r " S S\R<                  5      r\ " S S\5      5       r \ " S S\5      5       r!\ " S S\5      5       r"S r# " S S\R<                  5      r$ " S S\R<                  5      r% " S S\R<                  5      r& " S S\R<                  5      r' " S S\R<                  5      r( " S  S!\R<                  5      r) " S" S#\R<                  5      r* " S$ S%\R<                  5      r+ " S& S'\R<                  5      r, " S( S)\R<                  5      r- " S* S+\R<                  5      r. " S, S-\R<                  5      r/ " S. S/\R<                  5      r0 " S0 S1\R<                  5      r1 " S2 S3\R<                  5      r2 " S4 S5\R<                  5      r3 " S6 S7\R<                  5      r4\ " S8 S9\5      5       r5\ " S: S;\55      5       r6\ " S< S=\55      5       r7\" S>S?9 " S@ SA\55      5       r8/ SBQr9g)CzPyTorch LXMERT model.é    N)Ú	dataclass)ÚDictÚOptionalÚTupleÚUnion)Únn)ÚCrossEntropyLossÚSmoothL1Lossé   )ÚACT2FNÚgelu)ÚPreTrainedModel)ÚModelOutputÚauto_docstringÚloggingé   )ÚLxmertConfigc                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚGeLUé$   c                 ó"   >• [         TU ]  5         g ©N)ÚsuperÚ__init__)ÚselfÚ	__class__s    €Úb/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/lxmert/modeling_lxmert.pyr   ÚGeLU.__init__%   s   ø€ Ü‰ÑÕó    c                 ó   • [        U5      $ r   )r   )r   Úxs     r   ÚforwardÚGeLU.forward(   s   € ÜA‹wˆr   © ©Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__r   r"   Ú__static_attributes__Ú__classcell__©r   s   @r   r   r   $   s   ø† õ÷ð r   r   c                   óx  • \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S	'   Sr\\\R                        \	S
'   Sr\\\R                        \	S'   Srg)ÚLxmertModelOutputé,   aó
  
Lxmert's outputs that contain the last hidden states, pooled outputs, and attention probabilities for the language,
visual, and, cross-modality encoders. (note: the visual encoder in Lxmert is referred to as the "relation-ship"
encoder")


Args:
    language_output (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the language encoder.
    vision_output (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the visual encoder.
    pooled_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`):
        Last layer hidden-state of the first token of the sequence (classification, CLS, token) further processed
        by a Linear layer and a Tanh activation function. The Linear
    language_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for input features + one for the output of each cross-modality layer) of
        shape `(batch_size, sequence_length, hidden_size)`.
    vision_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for input features + one for the output of each cross-modality layer) of
        shape `(batch_size, sequence_length, hidden_size)`.
    language_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.
    vision_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.
    cross_encoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.
NÚlanguage_outputÚvision_outputÚpooled_outputÚlanguage_hidden_statesÚvision_hidden_statesÚlanguage_attentionsÚvision_attentionsÚcross_encoder_attentionsr$   )r&   r'   r(   r)   Ú__doc__r0   r   ÚtorchÚFloatTensorÚ__annotations__r1   r2   r3   r   r4   r5   r6   r7   r*   r$   r   r   r.   r.   ,   sÐ   ‡ ñ ðD 48€OX˜e×/Ñ/Ñ0Ó7Ø15€M8˜E×-Ñ-Ñ.Ó5Ø15€M8˜E×-Ñ-Ñ.Ó5ØAEÐ˜H U¨5×+<Ñ+<Ñ%=Ñ>ÓEØ?CÐ˜( 5¨×):Ñ):Ñ#;Ñ<ÓCØ>BÐ˜ %¨×(9Ñ(9Ñ":Ñ;ÓBØ<@Ðx  e×&7Ñ&7Ñ 8Ñ9Ó@ØCGÐ˜h u¨U×->Ñ->Ñ'?Ñ@ÖGr   r.   c                   óP  • \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S	'   Sr\\\R                        \	S
'   Srg)Ú LxmertForQuestionAnsweringOutputéZ   aQ	  
Output type of [`LxmertForQuestionAnswering`].

Args:
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss as the sum of the masked language modeling loss and the next sequence prediction
        (classification) loss.k.
    question_answering_score (`torch.FloatTensor` of shape `(batch_size, n_qa_answers)`, *optional*):
        Prediction scores of question answering objective (classification).
    language_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for input features + one for the output of each cross-modality layer) of
        shape `(batch_size, sequence_length, hidden_size)`.
    vision_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for input features + one for the output of each cross-modality layer) of
        shape `(batch_size, sequence_length, hidden_size)`.
    language_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.
    vision_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.
    cross_encoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.
NÚlossÚquestion_answering_scorer3   r4   r5   r6   r7   r$   )r&   r'   r(   r)   r8   r?   r   r9   r:   r;   r@   r3   r   r4   r5   r6   r7   r*   r$   r   r   r=   r=   Z   s»   ‡ ñð: )-€Dˆ(5×$Ñ$Ñ
%Ó,Ø<@Ð˜h u×'8Ñ'8Ñ9Ó@ØAEÐ˜H U¨5×+<Ñ+<Ñ%=Ñ>ÓEØ?CÐ˜( 5¨×):Ñ):Ñ#;Ñ<ÓCØ>BÐ˜ %¨×(9Ñ(9Ñ":Ñ;ÓBØ<@Ðx  e×&7Ñ&7Ñ 8Ñ9Ó@ØCGÐ˜h u¨U×->Ñ->Ñ'?Ñ@ÖGr   r=   c                   ó   • \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\R                     \	S'   Sr\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S	'   Sr\\\R                        \	S
'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)ÚLxmertForPreTrainingOutputé‚   aï
  
Output type of [`LxmertForPreTraining`].

Args:
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss as the sum of the masked language modeling loss and the next sequence prediction
        (classification) loss.
    prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    cross_relationship_score (`torch.FloatTensor` of shape `(batch_size, 2)`):
        Prediction scores of the textual matching objective (classification) head (scores of True/False
        continuation before SoftMax).
    question_answering_score (`torch.FloatTensor` of shape `(batch_size, n_qa_answers)`):
        Prediction scores of question answering objective (classification).
    language_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for input features + one for the output of each cross-modality layer) of
        shape `(batch_size, sequence_length, hidden_size)`.
    vision_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for input features + one for the output of each cross-modality layer) of
        shape `(batch_size, sequence_length, hidden_size)`.
    language_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.
    vision_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.
    cross_encoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
        the self-attention heads.

Nr?   Úprediction_logitsÚcross_relationship_scorer@   r3   r4   r5   r6   r7   r$   )r&   r'   r(   r)   r8   r?   r   r9   r:   r;   rD   rE   r@   r3   r   r4   r5   r6   r7   r*   r$   r   r   rB   rB   ‚   sè   ‡ ñ!ðF )-€Dˆ(5×$Ñ$Ñ
%Ó,Ø59Ðx × 1Ñ 1Ñ2Ó9Ø<@Ð˜h u×'8Ñ'8Ñ9Ó@Ø<@Ð˜h u×'8Ñ'8Ñ9Ó@ØAEÐ˜H U¨5×+<Ñ+<Ñ%=Ñ>ÓEØ?CÐ˜( 5¨×):Ñ):Ñ#;Ñ<ÓCØ>BÐ˜ %¨×(9Ñ(9Ñ":Ñ;ÓBØ<@Ðx  e×&7Ñ&7Ñ 8Ñ9Ó@ØCGÐ˜h u¨U×->Ñ->Ñ'?Ñ@ÖGr   rB   c           	      ó<  •  SSK nSSKnSSKn[        R                  R                  U5      n[        R                  SU 35        UR                  R                  U5      n/ n/ n	U H]  u  p«[        R                  SU
 SU 35        UR                  R                  Xj5      nUR                  U
5        U	R                  U5        M_     [        X‰5       GHª  u  p¬U
R                  S5      n
[!        S U
 5       5      (       a)  [        R                  S	SR#                  U
5       35        MW  U nU
 HÍ  nUR%                  S
U5      (       a  UR                  SU5      nOU/nUS   S:X  d	  US   S:X  a  ['        US5      nOZUS   S:X  d	  US   S:X  a  ['        US5      nO;US   S:X  a  ['        US5      nO%US   S:X  a  ['        US5      nO ['        XßS   5      n[+        U5      S:¼  d  Mº  [-        US   5      nUU   nMÏ     WSS S:X  a  ['        US5      nOUS:X  a  UR/                  U5      n UR0                  UR0                  :X  d   e [        R                  SU
 35        [6        R8                  " U5      Ul        GM­     U $ ! [         a    [        R                  S5        e f = f! [(         a,    [        R                  S	SR#                  U
5       35         GM¯  f = f! [2         a1  nU=R4                  UR0                  UR0                  4-  sl        e SnAff = f)z'Load tf checkpoints in a pytorch model.r   Nz™Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape Ú/c              3   ó,   #   • U  H
  nUS ;   v •  M     g7f))Úadam_vÚadam_mÚAdamWeightDecayOptimizerÚAdamWeightDecayOptimizer_1Úglobal_stepNr$   )Ú.0Úns     r   Ú	<genexpr>Ú,load_tf_weights_in_lxmert.<locals>.<genexpr>Ï   s%   é € ð 

ò ð ðöò ùs   ‚z	Skipping z[A-Za-z]+_\d+z_(\d+)ÚkernelÚgammaÚweightÚoutput_biasÚbetaÚbiasÚoutput_weightsÚsquadÚ
classifieré   r   iõÿÿÿÚ_embeddingszInitialize PyTorch weight )ÚreÚnumpyÚ
tensorflowÚImportErrorÚloggerÚerrorÚosÚpathÚabspathÚinfoÚtrainÚlist_variablesÚload_variableÚappendÚzipÚsplitÚanyÚjoinÚ	fullmatchÚgetattrÚAttributeErrorÚlenÚintÚ	transposeÚshapeÚAssertionErrorÚargsr9   Ú
from_numpyÚdata)ÚmodelÚconfigÚtf_checkpoint_pathr]   ÚnpÚtfÚtf_pathÚ	init_varsÚnamesÚarraysÚnameru   ÚarrayÚpointerÚm_nameÚscope_namesÚnumÚes                     r   Úload_tf_weights_in_lxmertrŠ   ²   sö  € ð
ÛãÛô g‰go‰oÐ0Ó1€GÜ
‡KKÐ8¸¸	ÐBÔCà—‘×'Ñ'¨Ó0€IØ€EØ€FÛ ‰ˆÜ‰Ð(¨¨¨l¸5¸'ÐBÔCØ—‘×&Ñ& wÓ5ˆØ‰TÔØ‰eÖñ	 !ô ˜5×)‰ˆØz‰z˜#‹ˆô ñ 

ñ ó

÷ 

ñ 

ô K‰K˜) C§H¡H¨T£NÐ#3Ð4Ô5ÙØˆÛˆFØ|‰|Ð,¨f×5Ñ5Ø Ÿh™h y°&Ó9‘à%˜hØ˜1‰~ Ó)¨[¸©^¸wÓ-FÜ! '¨8Ó4‘Ø˜Q‘ =Ó0°KÀ±NÀfÓ4LÜ! '¨6Ó2‘Ø˜Q‘Ð#3Ó3Ü! '¨8Ó4‘Ø˜Q‘ 7Ó*Ü! '¨<Ó8‘ðÜ% g¸1©~Ó>Gô ;Ó 1Õ$Ü˜+ a™.Ó)Ø! #™,’ñ+ ð, #$ˆ<˜=Ó(Ü˜g xÓ0‰GØxÓØ—L‘L Ó'ˆEð	Ø—=‘= E§K¡KÓ/Ð/Ñ/ô 	‰Ð0°°Ð7Ô8Ü×'Ò'¨Ó.ˆñe *ðf €LøôK ó Ü‰ðQô	
ð 	ðûôh &ó Ü—K‘K )¨C¯H©H°T«NÐ+;Ð <Ô=Ûðûô ó 	ØFŠFw—}‘} e§k¡kÐ2Ñ2FØûð	ús5   ‚J ÇJ'È,K Ê!J$Ê'1KËKË 
LË*,LÌLc                   ó6   ^ • \ rS rSrSrU 4S jrSS jrSrU =r$ )ÚLxmertEmbeddingsi  zGConstruct the embeddings from word, position and token_type embeddings.c                 óÎ  >• [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR                  UR
                  SS9U l        [        R                  " UR                  UR
                  SS9U l
        [        R                  " UR
                  SS9U l        [        R                  " UR                  5      U l        g )Nr   )Úpadding_idxçê-™—q=©Úeps)r   r   r   Ú	EmbeddingÚ
vocab_sizeÚhidden_sizeÚword_embeddingsÚmax_position_embeddingsÚposition_embeddingsÚtype_vocab_sizeÚtoken_type_embeddingsÚ	LayerNormÚDropoutÚhidden_dropout_probÚdropout©r   r{   r   s     €r   r   ÚLxmertEmbeddings.__init__  s¥   ø€ Ü‰ÑÔÜ!Ÿ|š|¨F×,=Ñ,=¸v×?QÑ?QÐ_`ÑaˆÔÜ#%§<¢<°×0NÑ0NÐPV×PbÑPbÐpqÑ#rˆÔ Ü%'§\¢\°&×2HÑ2HÈ&×J\ÑJ\ÐjkÑ%lˆÔ"ô Ÿš f×&8Ñ&8¸eÑDˆŒÜ—z’z &×"<Ñ"<Ó=ˆr   c                 óJ  • Ub  UR                  5       nUR                  nOUR                  5       S S nUR                  nUS   n[        R                  " U[        R                  US9nUR                  S5      R                  U5      nUc8  [        R                  " U[        R                  U R                  R                  S9nUc  U R                  U5      nU R                  U5      nU R                  U5      n	X8-   U	-   n
U R                  U
5      n
U R                  U
5      n
U
$ )Néÿÿÿÿr   ©ÚdtypeÚdevicer   )Úsizer¤   r9   ÚarangeÚlongÚ	unsqueezeÚexpandÚzerosÚposition_idsr•   r—   r™   rš   r   )r   Ú	input_idsÚtoken_type_idsÚinputs_embedsÚinput_shaper¤   Ú
seq_lengthr«   r—   r™   Ú
embeddingss              r   r"   ÚLxmertEmbeddings.forward  s	  € ØÑ Ø#Ÿ.™.Ó*ˆKØ×%Ñ%‰Fà'×,Ñ,Ó.¨s°Ð3ˆKØ"×)Ñ)ˆFØ  ‘^ˆ
ä—|’| J´e·j±jÈÑPˆØ#×-Ñ-¨aÓ0×7Ñ7¸ÓDˆàÑ!Ü"Ÿ[š[¨¼E¿J¹JÈt×O`ÑO`×OgÑOgÑhˆNàÑ Ø ×0Ñ0°Ó;ˆMØ"×6Ñ6°|ÓDÐØ $× :Ñ :¸>Ó JÐà"Ñ8Ð;PÑPˆ
Ø—^‘^ JÓ/ˆ
Ø—\‘\ *Ó-ˆ
ØÐr   )rš   r   r—   r™   r•   ©NN)	r&   r'   r(   r)   r8   r   r"   r*   r+   r,   s   @r   rŒ   rŒ     s   ø† ÙQõ	>÷ò r   rŒ   c                   ó<   ^ • \ rS rSrSU 4S jjrS rSS jrSrU =r$ )ÚLxmertAttentioni)  c                 ó°  >• [         TU ]  5         UR                  UR                  -  S:w  a&  [	        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l        Uc  UR                  n[        R                  " UR                  U R                  5      U l
        [        R                  " X R                  5      U l        [        R                  " X R                  5      U l        [        R                  " UR                  5      U l        g )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads (Ú))r   r   r”   Únum_attention_headsÚ
ValueErrorrs   Úattention_head_sizeÚ	head_sizer   ÚLinearÚqueryÚkeyÚvaluer›   Úattention_probs_dropout_probr   )r   r{   Úctx_dimr   s      €r   r   ÚLxmertAttention.__init__*  s  ø€ Ü‰ÑÔØ×Ñ × :Ñ :Ñ:¸aÓ?ÜØ# F×$6Ñ$6Ð#7ð 8Ø ×4Ñ4Ð5°Qð8óð ð $*×#=Ñ#=ˆÔ Ü#& v×'9Ñ'9¸F×<VÑ<VÑ'VÓ#WˆÔ Ø×1Ñ1°D×4LÑ4LÑLˆŒð ‰?Ø×(Ñ(ˆGÜ—Y’Y˜v×1Ñ1°4·>±>ÓBˆŒ
Ü—9’9˜W§n¡nÓ5ˆŒÜ—Y’Y˜w¯©Ó7ˆŒ
ä—z’z &×"EÑ"EÓFˆr   c                 ó¤   • UR                  5       S S U R                  U R                  4-   nUR                  U5      nUR	                  SSSS5      $ )Nr¡   r   r[   r   r   )r¥   r¸   rº   ÚviewÚpermute)r   r!   Únew_x_shapes      r   Útranspose_for_scoresÚ$LxmertAttention.transpose_for_scores>  sT   € Ø—f‘f“h˜s mØ×$Ñ$Ø×$Ñ$ð'
ñ 
ˆð F‰F;ÓˆØy‰y˜˜A˜q !Ó$Ð$r   c                 óÂ  • U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      n	U R                  U5      n
[        R
                  " X‰R                  SS5      5      nU[        R                  " U R                  5      -  nUb  X³-   n[        R                  R                  USS9nU R                  U5      n[        R
                  " XÊ5      nUR                  SSSS5      R                  5       nUR!                  5       S S U R"                  4-   nUR%                  U5      nU(       a  XÜ4nU$ U4nU$ )Nr¡   éþÿÿÿ)Údimr   r[   r   r   )r½   r¾   r¿   rÇ   r9   Úmatmulrt   ÚmathÚsqrtrº   r   Ú
functionalÚsoftmaxr   rÅ   Ú
contiguousr¥   r»   rÄ   )r   Úhidden_statesÚcontextÚattention_maskÚoutput_attentionsÚmixed_query_layerÚmixed_key_layerÚmixed_value_layerÚquery_layerÚ	key_layerÚvalue_layerÚattention_scoresÚattention_probsÚcontext_layerÚnew_context_layer_shapeÚoutputss                   r   r"   ÚLxmertAttention.forwardF  sS  € Ø ŸJ™J }Ó5ÐØŸ(™( 7Ó+ˆØ ŸJ™J wÓ/Ðà×/Ñ/Ð0AÓBˆØ×-Ñ-¨oÓ>ˆ	Ø×/Ñ/Ð0AÓBˆô !Ÿ<š<¨×5HÑ5HÈÈRÓ5PÓQÐØ+¬d¯iªi¸×8PÑ8PÓ.QÑQÐàÑ%Ø/Ñ@Ðô Ÿ-™-×/Ñ/Ð0@ÀbÐ/ÐIˆð Ÿ,™, Ó7ˆäŸš _ÓBˆØ%×-Ñ-¨a°°A°qÓ9×DÑDÓFˆØ"/×"4Ñ"4Ó"6°s¸Ð";¸t¿~¹~Ð>OÑ"OÐØ%×*Ñ*Ð+BÓCˆæ6G=Ð2ˆØˆð O\ÐM]ˆØˆr   )rº   r   r»   r¾   r¸   r½   r¿   r   ©NF)	r&   r'   r(   r)   r   rÇ   r"   r*   r+   r,   s   @r   rµ   rµ   )  s   ø† ÷Gò(%÷ò r   rµ   c                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚLxmertAttentionOutputif  c                 ó  >• [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  SS9U l        [        R                  " UR                  5      U l	        g ©Nr   r   )
r   r   r   r¼   r”   Údenserš   r›   rœ   r   rž   s     €r   r   ÚLxmertAttentionOutput.__init__g  sZ   ø€ Ü‰ÑÔÜ—Y’Y˜v×1Ñ1°6×3EÑ3EÓFˆŒ
ÜŸš f×&8Ñ&8¸eÑDˆŒÜ—z’z &×"<Ñ"<Ó=ˆr   c                 óp   • U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   ©rç   r   rš   ©r   rÒ   Úinput_tensors      r   r"   ÚLxmertAttentionOutput.forwardm  ó5   € ØŸ
™
 =Ó1ˆØŸ™ ]Ó3ˆØŸ™ }Ñ'CÓDˆØÐr   ©rš   rç   r   r%   r,   s   @r   rä   rä   f  ó   ø† õ>÷ð r   rä   c                   ó2   ^ • \ rS rSrU 4S jrSS jrSrU =r$ )ÚLxmertCrossAttentionLayerit  c                 ób   >• [         TU ]  5         [        U5      U l        [	        U5      U l        g r   )r   r   rµ   Úatträ   Úoutputrž   s     €r   r   Ú"LxmertCrossAttentionLayer.__init__u  s&   ø€ Ü‰ÑÔÜ" 6Ó*ˆŒÜ+¨FÓ3ˆr   c                 óˆ   • U R                  XX4S9nU(       a  US   nU R                  US   U5      nU(       a  UW4nU$ U4nU$ ©N©rÕ   r   r   ©rô   rõ   )	r   rì   Ú
ctx_tensorÚctx_att_maskrÕ   rõ   rÝ   Úattention_outputrà   s	            r   r"   Ú!LxmertCrossAttentionLayer.forwardz  sX   € Ø—‘˜,°LÐfˆÞØ$ Q™iˆOØŸ;™; v¨a¡y°,Ó?ÐÞ9JÐ# _Ð5ˆØˆð RbÐPcˆØˆr   rú   râ   r%   r,   s   @r   rò   rò   t  s   ø† õ4÷
ò r   rò   c                   ó2   ^ • \ rS rSrU 4S jrSS jrSrU =r$ )ÚLxmertSelfAttentionLayeriƒ  c                 ób   >• [         TU ]  5         [        U5      U l        [	        U5      U l        g r   )r   r   rµ   r   rä   rõ   rž   s     €r   r   Ú!LxmertSelfAttentionLayer.__init__„  s&   ø€ Ü‰ÑÔÜ# FÓ+ˆŒ	Ü+¨FÓ3ˆr   c                 óŒ   • U R                  UUUUS9nU(       a  US   nU R                  US   U5      nU(       a  UW4nU$ U4nU$ rø   )r   rõ   )r   rì   rÔ   rÕ   rõ   rÝ   rý   rà   s           r   r"   Ú LxmertSelfAttentionLayer.forward‰  sg   € à—‘ØØØØ/ð	 ð 
ˆö Ø$ Q™iˆOØŸ;™; v¨a¡y°,Ó?ÐÞ9JÐ# _Ð5ˆØˆð RbÐPcˆØˆr   )rõ   r   ©Fr%   r,   s   @r   r   r   ƒ  s   ø† õ4÷
ò r   r   c                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚLxmertIntermediatei˜  c                 ó²   >• [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     U l	        g r   )
r   r   r   r¼   r”   Úintermediate_sizerç   r   Ú
hidden_actÚintermediate_act_fnrž   s     €r   r   ÚLxmertIntermediate.__init__™  s?   ø€ Ü‰ÑÔÜ—Y’Y˜v×1Ñ1°6×3KÑ3KÓLˆŒ
Ü#)¨&×*;Ñ*;Ñ#<ˆÕ r   c                 óJ   • U R                  U5      nU R                  U5      nU$ r   ©rç   r  ©r   rÒ   s     r   r"   ÚLxmertIntermediate.forwardž  s&   € ØŸ
™
 =Ó1ˆØ×0Ñ0°Ó?ˆØÐr   r  r%   r,   s   @r   r  r  ˜  s   ø† õ=÷
ð r   r  c                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚLxmertOutputi¤  c                 ó  >• [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  SS9U l        [        R                  " UR                  5      U l
        g ræ   )r   r   r   r¼   r	  r”   rç   rš   r›   rœ   r   rž   s     €r   r   ÚLxmertOutput.__init__¥  sZ   ø€ Ü‰ÑÔÜ—Y’Y˜v×7Ñ7¸×9KÑ9KÓLˆŒ
ÜŸš f×&8Ñ&8¸eÑDˆŒÜ—z’z &×"<Ñ"<Ó=ˆr   c                 óp   • U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   rê   rë   s      r   r"   ÚLxmertOutput.forward«  rî   r   rï   r%   r,   s   @r   r  r  ¤  rð   r   r  c                   ó2   ^ • \ rS rSrU 4S jrSS jrSrU =r$ )ÚLxmertLayeri²  c                 ó‚   >• [         TU ]  5         [        U5      U l        [	        U5      U l        [        U5      U l        g r   )r   r   r   Ú	attentionr  Úintermediater  rõ   rž   s     €r   r   ÚLxmertLayer.__init__³  s3   ø€ Ü‰ÑÔÜ1°&Ó9ˆŒÜ.¨vÓ6ˆÔÜ" 6Ó*ˆr   c                 ó†   • U R                  XUS9nUS   nU R                  U5      nU R                  Xe5      nU4USS  -   nU$ )Nrù   r   r   ©r  r  rõ   )r   rÒ   rÔ   rÕ   rà   rý   Úintermediate_outputÚlayer_outputs           r   r"   ÚLxmertLayer.forward¹  sW   € Ø—.‘. ÐRc.ÐdˆØ" 1™:ÐØ"×/Ñ/Ð0@ÓAÐØ—{‘{Ð#6ÓIˆØ/ G¨A¨B KÑ/ˆØˆr   r  râ   r%   r,   s   @r   r  r  ²  s   ø† õ+÷ò r   r  c                   óL   ^ • \ rS rSrU 4S jr SS jrS rS r SS jrSr	U =r
$ )	ÚLxmertXLayeriÂ  c                 ó  >• [         TU ]  5         [        U5      U l        [	        U5      U l        [	        U5      U l        [        U5      U l        [        U5      U l
        [        U5      U l        [        U5      U l        g r   )r   r   rò   Úvisual_attentionr   Úlang_self_attÚvisn_self_attr  Ú
lang_interr  Úlang_outputÚ
visn_interÚvisn_outputrž   s     €r   r   ÚLxmertXLayer.__init__Ã  sk   ø€ Ü‰ÑÔä 9¸&Ó AˆÔô 6°fÓ=ˆÔÜ5°fÓ=ˆÔô -¨VÓ4ˆŒÜ'¨Ó/ˆÔÜ,¨VÓ4ˆŒÜ'¨Ó/ˆÕr   c                 óP   • U R                  UUUUS9nU R                  UUUSS9nXg4$ )N)rü   rÕ   F)r%  )r   Ú
lang_inputÚlang_attention_maskÚvisual_inputÚvisual_attention_maskÚoutput_x_attentionsÚlang_att_outputÚvisual_att_outputs           r   Ú	cross_attÚLxmertXLayer.cross_attÒ  sT   € ð ×/Ñ/ØØØ.Ø1ð	 0ð 
ˆð !×1Ñ1ØØØ,Ø#ð	 2ð 
Ðð Ð1Ð1r   c                 óV   • U R                  XSS9nU R                  X4SS9nUS   US   4$ )NFrù   r   )r&  r'  )r   r.  r/  r0  r1  r3  r4  s          r   Úself_attÚLxmertXLayer.self_atté  sE   € à×,Ñ,¨ZÐ`eÐ,ÐfˆØ ×.Ñ.¨|ÐfkÐ.ÐlÐØ˜qÑ!Ð#4°QÑ#7Ð7Ð7r   c                 ó   • U R                  U5      nU R                  U5      nU R                  X15      nU R                  XB5      nXV4$ r   )r(  r*  r)  r+  )r   r.  r0  Úlang_inter_outputÚvisual_inter_outputr)  Úvisual_outputs          r   Ú	output_fcÚLxmertXLayer.output_fcï  sM   € à ŸO™O¨JÓ7ÐØ"Ÿo™o¨lÓ;Ðð ×&Ñ&Ð'8ÓEˆØ×(Ñ(Ð)<ÓKˆàÐ)Ð)r   c                 ó¸   • U R                  UUUUUS9u  pgUSS  nU R                  US   UUS   U5      u  pgU R                  Xg5      u  pšU(       a  U	U
US   4$ Xš4$ )N)r.  r/  r0  r1  r2  r   r   )r5  r8  r>  )r   Ú
lang_featsr/  Úvisual_featsr1  rÕ   r3  r4  rÝ   r)  r=  s              r   r"   ÚLxmertXLayer.forwardú  s   € ð .2¯^©^Ø!Ø 3Ø%Ø"7Ø 1ð .<ð .
Ñ*ˆð *¨!¨"Ð-ˆØ-1¯]©]Ø˜AÑØØ˜aÑ Ø!ó	.
Ñ*ˆð &*§^¡^°OÓ%WÑ"ˆö !ð	 ØØ Ñ"ðð	
ð Ð-ð	
r   )r(  r)  r&  r*  r+  r'  r%  r  )r&   r'   r(   r)   r   r5  r8  r>  r"   r*   r+   r,   s   @r   r#  r#  Â  s+   ø† õ0ð* "ô2ò.8ò	*ð"  ÷ 
ò  
r   r#  c                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚLxmertVisualFeatureEncoderi  c                 óÀ  >• [         TU ]  5         UR                  nUR                  n[        R
                  " X!R                  5      U l        [        R                  " UR                  SS9U l	        [        R
                  " X1R                  5      U l
        [        R                  " UR                  SS9U l        [        R                  " UR                  5      U l        g ræ   )r   r   Úvisual_feat_dimÚvisual_pos_dimr   r¼   r”   Úvisn_fcrš   Úvisn_layer_normÚbox_fcÚbox_layer_normr›   rœ   r   )r   r{   Úfeat_dimÚpos_dimr   s       €r   r   Ú#LxmertVisualFeatureEncoder.__init__  sœ   ø€ Ü‰ÑÔØ×)Ñ)ˆØ×'Ñ'ˆô —y’y ×+=Ñ+=Ó>ˆŒÜ!Ÿ|š|¨F×,>Ñ,>ÀEÑJˆÔô —i’i ×);Ñ);Ó<ˆŒÜ Ÿlšl¨6×+=Ñ+=À5ÑIˆÔä—z’z &×"<Ñ"<Ó=ˆr   c                 ó¾   • U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nX4-   S-  nU R	                  U5      nU$ ©Nr[   )rI  rJ  rK  rL  r   )r   rB  Ú
visual_posr!   Úyrõ   s         r   r"   Ú"LxmertVisualFeatureEncoder.forward-  s\   € ØL‰L˜Ó&ˆØ× Ñ  Ó#ˆØK‰K˜
Ó#ˆØ×Ñ Ó"ˆØ‘%˜1‘ˆà—‘˜fÓ%ˆØˆr   )rK  rL  r   rI  rJ  r%   r,   s   @r   rE  rE    s   ø† õ>÷ð r   rE  c                   ó6   ^ • \ rS rSrU 4S jr  SS jrSrU =r$ )ÚLxmertEncoderi8  c                 ó‚  >• [         TU ]  5         [        U5      U l        Xl        UR
                  U l        UR                  U l        UR                  U l
        [        R                  " [        U R                  5       Vs/ s H  n[        U5      PM     sn5      U l        [        R                  " [        U R                  5       Vs/ s H  n[!        U5      PM     sn5      U l        [        R                  " [        U R                  5       Vs/ s H  n[        U5      PM     sn5      U l	        g s  snf s  snf s  snf r   )r   r   rE  rI  r{   Úl_layersÚnum_l_layersÚx_layersÚnum_x_layersÚr_layersÚnum_r_layersr   Ú
ModuleListÚranger  Úlayerr#  )r   r{   Ú_r   s      €r   r   ÚLxmertEncoder.__init__9  sè   ø€ Ü‰ÑÔô 2°&Ó9ˆŒØŒð #ŸO™OˆÔØ"ŸO™OˆÔØ"ŸO™OˆÔô —]’]ÄÀt×GXÑGXÔAYÓ#ZÒAY¸A¤K°Ö$7ÑAYÑ#ZÓ[ˆŒ
ÜŸšÄUÈ4×K\ÑK\ÔE]Ó&^ÒE]À¤|°FÖ';ÑE]Ñ&^Ó_ˆŒÜŸšÄEÈ$×J[ÑJ[ÔD\Ó&]ÒD\¸q¤{°6Ö':ÑD\Ñ&]Ó^ˆùò $[ùÚ&^ùÚ&]s   Â D2ÃD7ÄD<c           	      óœ  • SnSnU(       d  U R                   R                  (       a  SOS n	U(       d  U R                   R                  (       a  SOS n
U(       d  U R                   R                  (       a  SOS nU R                  X45      nU R                   H!  nU" XUS9nUS   nX4-   nU
c  M  X­S   4-   n
M#     U R                   H!  nU" X5US9nUS   nXs4-   nU	c  M  XžS   4-   n	M#     U R
                   H+  nU" UUUUUS9nUS S u  pXs4-   nX4-   nUc  M#  X¿S   4-   nM-     UU(       a  U	OS 4nUU(       a  U
OS 4nUUU(       a  U4$ S 4$ )Nr$   rù   r   r   r[   )r{   rÕ   rI  r`  r\  rZ  )r   rA  r/  rB  rR  r1  rÕ   r4   r3   r6   r5   r7   Úlayer_moduleÚ	l_outputsÚ	v_outputsÚ	x_outputsÚvisual_encoder_outputsÚlang_encoder_outputss                     r   r"   ÚLxmertEncoder.forwardK  s§  € ð  "ÐØ!#ÐÞ"3°t·{±{×7T×7T™BÐZ^ÐÞ$5¸¿¹×9V×9V™bÐ\`ÐÞ):¸d¿k¹k×>[×>[¡2ÐaeÐ à—|‘| LÓ=ˆð !ŸJœJˆLÙ$ ZÐXiÑjˆIØ" 1™ˆJØ%;¸mÑ%KÐ"Ø"Ó.Ø&9Àq¹\¸OÑ&KÒ#ñ 'ð !ŸMœMˆLÙ$ \Ð\mÑnˆIØ$ Q™<ˆLØ#7¸/Ñ#IÐ Ø Ó,Ø$5À1¹¸Ñ$GÒ!ñ *ð !ŸMœMˆLÙ$ØØ#ØØ%Ø"3ñˆIð (1°°! }Ñ$ˆJØ#7¸/Ñ#IÐ Ø%;¸mÑ%KÐ"Ø'Ó3Ø+CÐQRÁ|ÀoÑ+UÒ(ñ *ð !Þ!2Ñ¸ð"
Ðð
 #Þ#4Ñ¸$ð 
Ðð
 #Ø Þ(9Ð$ð
ð 	
ð @Dð
ð 	
r   )r{   r`  rY  r]  r[  r\  rI  rZ  r³   r%   r,   s   @r   rV  rV  8  s   ø† õ_ð0 #Ø÷;
ò ;
r   rV  c                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚLxmertPooleri‰  c                 ó¾   >• [         [        U ]  5         [        R                  " UR
                  UR
                  5      U l        [        R                  " 5       U l        g r   )	r   rl  r   r   r¼   r”   rç   ÚTanhÚ
activationrž   s     €r   r   ÚLxmertPooler.__init__Š  s;   ø€ ÜŒl˜DÑ*Ô,Ü—Y’Y˜v×1Ñ1°6×3EÑ3EÓFˆŒ
ÜŸ'š'›)ˆr   c                 ó\   • US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )rç   ro  )r   rÒ   Úfirst_token_tensorr2   s       r   r"   ÚLxmertPooler.forward  s6   € ð +ª1¨a¨4Ñ0ÐØŸ
™
Ð#5Ó6ˆØŸ™¨Ó6ˆØÐr   )ro  rç   r%   r,   s   @r   rl  rl  ‰  s   ø† õ$÷
ð r   rl  c                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚLxmertPredictionHeadTransformi˜  c                 ó  >• [         [        U ]  5         [        R                  " UR
                  UR
                  5      U l        [        UR                     U l	        [        R                  " UR
                  SS9U l
        g ræ   )r   ru  r   r   r¼   r”   rç   r   r
  Útransform_act_fnrš   rž   s     €r   r   Ú&LxmertPredictionHeadTransform.__init__™  sZ   ø€ ÜÔ+¨TÑ;Ô=Ü—Y’Y˜v×1Ñ1°6×3EÑ3EÓFˆŒ
Ü & v×'8Ñ'8Ñ 9ˆÔÜŸš f×&8Ñ&8¸eÑDˆr   c                 ól   • U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )rç   rw  rš   r  s     r   r"   Ú%LxmertPredictionHeadTransform.forwardŸ  s4   € ØŸ
™
 =Ó1ˆØ×-Ñ-¨mÓ<ˆØŸ™ }Ó5ˆØÐr   )rš   rç   rw  r%   r,   s   @r   ru  ru  ˜  s   ø† õE÷ð r   ru  c                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚLxmertLMPredictionHeadi¦  c                 óX  >• [         [        U ]  5         [        U5      U l        [
        R                  " UR                  S5      UR                  S5      SS9U l        X R                  l	        [
        R                  " [        R                  " UR                  S5      5      5      U l        g )Nr   r   F©rW   )r   r|  r   ru  Ú	transformr   r¼   r¥   ÚdecoderrT   Ú	Parameterr9   rª   rW   ©r   r{   Úlxmert_model_embedding_weightsr   s      €r   r   ÚLxmertLMPredictionHead.__init__§  s€   ø€ ÜÔ$ dÑ4Ô6Ü6°vÓ>ˆŒô —y’yØ*×/Ñ/°Ó2Ø*×/Ñ/°Ó2Øñ
ˆŒð
 =‰ÔÜ—L’L¤§¢Ð-K×-PÑ-PÐQRÓ-SÓ!TÓUˆ	r   c                 ód   • U R                  U5      nU R                  U5      U R                  -   nU$ r   )r  r€  rW   r  s     r   r"   ÚLxmertLMPredictionHead.forwardµ  s-   € ØŸ™ }Ó5ˆØŸ™ ]Ó3°d·i±iÑ?ˆØÐr   )rW   r€  r  r%   r,   s   @r   r|  r|  ¦  s   ø† õV÷ð r   r|  c                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚLxmertVisualAnswerHeadi»  c           	      ó  >• [         TU ]  5         UR                  n[        R                  " [        R
                  " X3S-  5      [        5       [        R                  " US-  SS9[        R
                  " US-  U5      5      U l        g )Nr[   r   r   )	r   r   r”   r   Ú
Sequentialr¼   r   rš   Úlogit_fc)r   r{   Ú
num_labelsÚhid_dimr   s       €r   r   ÚLxmertVisualAnswerHead.__init__¼  sb   ø€ Ü‰ÑÔØ×$Ñ$ˆÜŸšÜIŠIg¨™{Ó+Ü‹FÜLŠL˜ 1™¨%Ñ0ÜIŠIg ‘k :Ó.ó	
ˆr   c                 ó$   • U R                  U5      $ r   ©r‹  r  s     r   r"   ÚLxmertVisualAnswerHead.forwardÆ  s   € Ø}‰}˜]Ó+Ð+r   r  r%   r,   s   @r   rˆ  rˆ  »  s   ø† õ
÷,ð ,r   rˆ  c                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚLxmertVisualObjHeadiÊ  c                 ó  >• [         TU ]  5         [        U5      U l        0 nUR                  (       a  SUR
                  S.US'   UR                  (       a  SUR                  S.US'   UR                  (       a  SUR                  4UR                  S.US'   X l
        [        R                  " U R                   Vs0 s H4  o3[        R                  " UR                  U R                  U   S   5      _M6     sn5      U l        g s  snf )N©r¡   )ru   rˆ   ÚobjÚattrr¡   Úfeatrˆ   )r   r   ru  r  Úvisual_obj_lossÚnum_object_labelsÚvisual_attr_lossÚnum_attr_labelsÚvisual_feat_lossrG  Úvisual_lossesr   Ú
ModuleDictr¼   r”   Údecoder_dict)r   r{   rž  r¾   r   s       €r   r   ÚLxmertVisualObjHead.__init__Ë  sä   ø€ Ü‰ÑÔÜ6°vÓ>ˆŒàˆØ×!×!Ø-2¸6×;SÑ;SÑ#TˆM˜%Ñ Ø×"×"Ø.3¸F×<RÑ<RÑ$SˆM˜&Ñ!Ø×"×"à˜f×4Ñ4Ð5Ø×-Ñ-ñ%ˆM˜&Ñ!ð +Ôô ŸMšMØ[_×[mÒ[mÓnÒ[mÐTW”"—)’)˜F×.Ñ.°×0BÑ0BÀ3Ñ0GÈÑ0NÓOÒOÑ[mÑnó
ˆÕùÚns   Â;;Dc                 ó€   • U R                  U5      n0 nU R                   H  nU R                  U   " U5      X#'   M     U$ r   )r  rž  r   )r   rÒ   rõ   r¾   s       r   r"   ÚLxmertVisualObjHead.forwardá  sA   € ØŸ™ }Ó5ˆØˆØ×%Ô%ˆCØ×+Ñ+¨CÒ0°Ó?ˆF‹Kñ &àˆr   )r   r  rž  r%   r,   s   @r   r“  r“  Ê  s   ø† õ
÷,ð r   r“  c                   ó.   ^ • \ rS rSrU 4S jrS rSrU =r$ )ÚLxmertPreTrainingHeadsié  c                 ó–   >• [         [        U ]  5         [        X5      U l        [
        R                  " UR                  S5      U l        g rQ  )	r   r¥  r   r|  Úpredictionsr   r¼   r”   Úseq_relationshipr‚  s      €r   r   ÚLxmertPreTrainingHeads.__init__ê  s7   ø€ ÜÔ$ dÑ4Ô6Ü1°&ÓYˆÔÜ "§	¢	¨&×*<Ñ*<¸aÓ @ˆÕr   c                 óL   • U R                  U5      nU R                  U5      nX44$ r   ©r§  r¨  )r   Úsequence_outputr2   Úprediction_scoresÚseq_relationship_scores        r   r"   ÚLxmertPreTrainingHeads.forwardï  s-   € Ø ×,Ñ,¨_Ó=ÐØ!%×!6Ñ!6°}Ó!EÐØ Ð8Ð8r   r«  r%   r,   s   @r   r¥  r¥  é  s   ø† õA÷
9ð 9r   r¥  c                   ó*   • \ rS rSr\r\rSrSr	S r
Srg)ÚLxmertPreTrainedModeliõ  ÚlxmertFc                 óŒ  • [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       ax  UR                  R                  R                  SU R                  R                  S9  UR                  b2  UR                  R                  UR                     R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a%  UR                  R                  R                  5         gg)zInitialize the weightsç        )ÚmeanÚstdNç      ð?)Ú
isinstancer   r¼   rT   ry   Únormal_r{   Úinitializer_rangerW   Úzero_r’   rŽ   rš   Úfill_r|  )r   Úmodules     r   Ú_init_weightsÚ#LxmertPreTrainedModel._init_weightsü  s3  € äfœbŸi™i×(Ñ(ð M‰M×Ñ×&Ñ&¨C°T·[±[×5RÑ5RÐ&ÑSØ{‰{Ñ&Ø—‘× Ñ ×&Ñ&Õ(ð 'ä˜¤§¡×-Ñ-ØM‰M×Ñ×&Ñ&¨C°T·[±[×5RÑ5RÐ&ÑSØ×!Ñ!Ñ-Ø—‘×"Ñ" 6×#5Ñ#5Ñ6×<Ñ<Õ>ð .ä˜¤§¡×-Ñ-ØK‰K×Ñ×"Ñ"Ô$ØM‰M×Ñ×$Ñ$ SÕ)Ü˜Ô 6×7Ñ7ØK‰K×Ñ×"Ñ"Õ$ð 8r   r$   N)r&   r'   r(   r)   r   Úconfig_classrŠ   Úload_tf_weightsÚbase_model_prefixÚ!_supports_param_buffer_assignmentr¾  r*   r$   r   r   r±  r±  õ  s   † à€LØ/€OØ ÐØ(-Ð%õ%r   r±  c                   óx  ^ • \ rS rSrU 4S jrS rS r\          SS\\	R                     S\\	R                     S\\	R                     S\\	R                     S	\\	R                     S
\\	R                     S\\	R                     S\\   S\\   S\\   S\\\\	R                     4   4S jj5       rSrU =r$ )ÚLxmertModeli  c                 ó¤   >• [         TU ]  U5        [        U5      U l        [	        U5      U l        [        U5      U l        U R                  5         g r   )	r   r   rŒ   r±   rV  Úencoderrl  ÚpoolerÚ	post_initrž   s     €r   r   ÚLxmertModel.__init__  s>   ø€ Ü‰Ñ˜Ô Ü*¨6Ó2ˆŒÜ$ VÓ,ˆŒÜ" 6Ó*ˆŒà‰Õr   c                 ó.   • U R                   R                  $ r   ©r±   r•   ©r   s    r   Úget_input_embeddingsÚ LxmertModel.get_input_embeddings  s   € Ø‰×.Ñ.Ð.r   c                 ó$   • XR                   l        g r   rÌ  )r   Únew_embeddingss     r   Úset_input_embeddingsÚ LxmertModel.set_input_embeddings  s   € Ø*8‰Õ'r   r¬   rB  rR  rÔ   r1  r­   r®   rÕ   Úoutput_hidden_statesÚreturn_dictÚreturnc                 óŠ  • Ub  UOU R                   R                  nU	b  U	OU R                   R                  n	U
b  U
OU R                   R                  n
Ub  Ub  [	        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       SS nO[	        S5      eUc  [	        S5      eUc  [	        S5      eUb  UR                  OUR                  nUc  [        R                  " X¼S9nUc$  [        R                  " U[        R                  US9nUR                  S	5      R                  S
5      nUR                  U R                  S9nSU-
  [        R                  " U R                  5      R                   -  nUbj  UR                  S	5      R                  S
5      nUR                  U R                  S9nSU-
  [        R                  " U R                  5      R                   -  nOSnU R#                  XU5      nU R%                  UUUUUUS9nUSS
 u  nnUS   nUS   nSnU(       a  US	   nUS	   nUS
   nUUU4nU	(       a  UU4OSnUS   nUS   nU R'                  U5      nU
(       d  UUU4U-   U-   $ [)        UUUU	(       a  UOSU	(       a  UOSU(       a  WOSU(       a  WOSU(       a  WS9$ SS9$ )aÿ  
visual_feats (`torch.FloatTensor` of shape `(batch_size, num_visual_features, visual_feat_dim)`):
    This input represents visual features. They ROI pooled object features from bounding boxes using a
    faster-RCNN model)

    These are currently not provided by the transformers library.
visual_pos (`torch.FloatTensor` of shape `(batch_size, num_visual_features, visual_pos_dim)`):
    This input represents spatial features corresponding to their relative (via index) visual features. The
    pre-trained LXMERT model expects these spatial features to be normalized bounding boxes on a scale of 0 to
    1.

    These are currently not provided by the transformers library.
visual_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.

    [What are attention masks?](../glossary#attention-mask)
NzDYou cannot specify both input_ids and inputs_embeds at the same timer¡   z5You have to specify either input_ids or inputs_embedsz`visual_feats` cannot be `None`z`visual_pos` cannot be `None`©r¤   r¢   r   r[   )r£   r·  )rB  rR  r1  rÕ   r   r$   )r2   r0   r1   r3   r4   r5   r6   r7   )r{   rÕ   rÔ  Úuse_return_dictr¹   Ú%warn_if_padding_and_no_attention_maskr¥   r¤   r9   Úonesrª   r§   r¨   Útor£   ÚfinfoÚminr±   rÇ  rÈ  r.   )r   r¬   rB  rR  rÔ   r1  r­   r®   rÕ   rÔ  rÕ  r¯   r¤   Úextended_attention_maskÚextended_visual_attention_maskÚembedding_outputÚencoder_outputsrh  ri  r4   r3   Úall_attentionsr5   r6   r7   rÒ   r=  r)  r2   s                                r   r"   ÚLxmertModel.forward  s,  € ðD 2CÑ1NÑ-ÐTX×T_ÑT_×TqÑTqÐà$8Ñ$DÑ È$Ï+É+×JjÑJjð 	ð &1Ñ%<‘kÀ$Ç+Á+×B]ÑB]ˆàÑ  ]Ñ%>ÜÐcÓdÐdØÑ"Ø×6Ñ6°yÔQØ#Ÿ.™.Ó*‰KØÑ&Ø'×,Ñ,Ó.¨s°Ð3‰KäÐTÓUÐUàÑÜÐ>Ó?Ð?ØÑÜÐ<Ó=Ð=à%.Ñ%:×!Ò!À×@TÑ@TˆàÑ!Ü"ŸZšZ¨ÑCˆNØÑ!Ü"Ÿ[š[¨¼E¿J¹JÈvÑVˆNð #1×":Ñ":¸1Ó"=×"GÑ"GÈÓ"JÐð #:×"<Ñ"<À4Ç:Á:Ð"<Ð"NÐØ#&Ð)@Ñ#@ÄEÇKÂKÐPT×PZÑPZÓD[×D_ÑD_Ñ"_Ðð !Ñ,Ø-B×-LÑ-LÈQÓ-O×-YÑ-YÐZ[Ó-\Ð*Ø-K×-NÑ-NÐUY×U_ÑU_Ð-NÐ-`Ð*Ø.1Ð4RÑ.RÔV[×VaÒVaÐbf×blÑblÓVm×VqÑVqÑ-qÑ*à-1Ð*ð  Ÿ?™?¨9ÀmÓTÐð Ÿ,™,ØØ#Ø%Ø!Ø"@Ø/ð 'ð 
ˆð 8GÀrÈÐ7JÑ4ÐÐ 4Ø5°aÑ8ÐØ!5°aÑ!8ÐàˆÞØ"6°qÑ"9ÐØ 6°qÑ 9ÐØ'6°qÑ'9Ð$à#Ø!Ø(ðˆNö K_Ð/Ð1EÑFÐdfˆà,¨RÑ0ˆØ,¨RÑ0ˆØŸ™ KÓ0ˆæØ °Ð>ÀÑNÐQ_Ñ_Ð_ä Ø'Ø'Ø'Þ=QÑ#9ÐW[Þ9MÑ!5ÐSWÞ7HÑ 3ÈdÞ3DÑ/È$ÞARÐ%=ñ	
ð 		
ð Y]ñ	
ð 		
r   )r±   rÇ  rÈ  )
NNNNNNNNNN)r&   r'   r(   r)   r   rÎ  rÒ  r   r   r9   Ú
LongTensorr:   Úboolr   r.   r   r"   r*   r+   r,   s   @r   rÅ  rÅ    s.  ø† õò/ò9ð ð 15Ø48Ø26Ø6:Ø=AØ59Ø59Ø,0Ø/3Ø&*ñB
à˜E×,Ñ,Ñ-ðB
ð ˜u×0Ñ0Ñ1ðB
ð ˜U×.Ñ.Ñ/ð	B
ð
 ! ×!2Ñ!2Ñ3ðB
ð  (¨×(9Ñ(9Ñ:ðB
ð ! ×!1Ñ!1Ñ2ðB
ð   × 1Ñ 1Ñ2ðB
ð $ D™>ðB
ð ' t™nðB
ð ˜d‘^ðB
ð 
Ð  %¨×(9Ñ(9Ñ":Ð:Ñ	;ôB
ó öB
r   rÅ  c            #       ó¨  ^ • \ rS rSrS/rU 4S jrS r S S\S\\   S\	S\
R                  4U 4S	 jjjrS\4S
 jrS rS rS\
R                   4S jrS rS r\              S!S\\R,                     S\\R.                     S\\R.                     S\\R.                     S\\R.                     S\\R,                     S\\R.                     S\\R,                     S\\\\\R.                  \R.                  4   4      S\\R,                     S\\R6                     S\\	   S\\	   S\\	   S\\\\R.                     4   4S jj5       rSrU =r $ )"ÚLxmertForPreTrainingi¥  zcls.predictions.decoder.weightc                 ó†  >• [         TU ]  U5        Xl        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        UR                  U l        [        U5      U l
        [        XR                  R                  R                  R                  5      U l        U R                  (       a  [!        U5      U l        U R                  (       a  [%        XR                  5      U l        U R)                  5         [+        SS9[-        SS9[-        5       S.U l        0 nUR0                  (       a  SUR2                  SS.US'   UR4                  (       a  SUR6                  SS.US'   UR8                  (       a  S	UR:                  4UR:                  S
S.US'   X l        g )NÚnone)Ú	reduction)Úl2Ú	visual_ceÚcer•  rí  )ru   rˆ   r?   r–  r—  r¡   rì  r˜  )r   r   r{   Únum_qa_labelsÚvisual_loss_normalizerÚtask_mask_lmÚtask_obj_predictÚtask_matchedÚtask_qarÅ  r²  r¥  r±   r•   rT   Úclsr“  Úobj_predict_headrˆ  Úanswer_headrÉ  r
   r	   Ú	loss_fctsr™  rš  r›  rœ  r  rG  rž  )r   r{   rž  r   s      €r   r   ÚLxmertForPreTraining.__init__©  s  ø€ Ü‰Ñ˜Ô àŒØ#×1Ñ1ˆÔØ&,×&CÑ&CˆÔ#ð #×/Ñ/ˆÔØ &× 7Ñ 7ˆÔØ"×/Ñ/ˆÔØ—~‘~ˆŒô " &Ó)ˆŒô *¨&·+±+×2HÑ2H×2XÑ2X×2_Ñ2_Ó`ˆŒØ× × Ü$7¸Ó$?ˆDÔ!Ø<<Ü5°f×>PÑ>PÓQˆDÔð 	‰Ôô ¨Ñ0Ü)°FÑ;Ü"Ó$ñ
ˆŒð ˆØ×!×!àØ×/Ñ/Ø#ñ$ˆM˜%Ñ ð
 ×"×"àØ×-Ñ-Ø#ñ%ˆM˜&Ñ!ð
 ×"×"à˜f×4Ñ4Ð5Ø×-Ñ-Øñ%ˆM˜&Ñ!ð
 +Õr   c                 óž   • U R                   R                  R                  R                  U R                  R
                  R                  l        g r   )r²  r±   r•   rT   rõ  r§  r€  rÍ  s    r   Ú_tie_weightsÚ!LxmertForPreTraining._tie_weightsà  s0   € Ø.2¯k©k×.DÑ.D×.TÑ.T×.[Ñ.[ˆ‰×Ñ×$Ñ$Õ+r   Únew_num_tokensÚpad_to_multiple_ofÚmean_resizingrÖ  c                 óº   >• [         TU ]  XU5      nU R                  U R                  R                  R
                  U5      U R                  R                  l        U$ r   )r   Úresize_token_embeddingsÚ_resize_biasrõ  r§  rW   )r   rý  rþ  rÿ  rÑ  r   s        €r   r  Ú,LxmertForPreTraining.resize_token_embeddingsã  sM   ø€ ô ™Ñ8¸Ð]jÓkˆØ$(×$5Ñ$5°d·h±h×6JÑ6J×6OÑ6OÐQ_Ó$`ˆ‰×ÑÔ!ØÐr   c                 óÖ   • UR                   S   nX#::  a  US U nO8[        R                  " X#-
  UR                  S9n[        R                  " X/5      n[
        R                  " U5      nU$ )Nr   rØ  )ru   r9   rª   r¤   Úcatr   r  )r   rW   rý  Úold_num_tokensÚnew_biasÚ
extra_biass         r   r  Ú!LxmertForPreTraining._resize_biasë  s\   € ØŸ™ A™ˆØÓ+Ø˜O˜^Ð,‰HäŸš ^Ñ%DÈTÏ[É[ÑYˆJÜ—y’y $Ð!3Ó4ˆHÜ—<’< Ó)ˆØˆr   c                 ó‚   • U R                  5       nUb  Uc  gU R                  U5      nXR                  l        Xl        U$ ©a—  
Build a resized question answering linear layer Module from a provided new linear layer. Increasing the size
will add newly initialized weights. Reducing the size will remove weights from the end

Args:
    num_labels (`int`, *optional*):
        New number of labels in the linear layer weight matrix. Increasing the size will add newly initialized
        weights at the end. Reducing the size will remove weights from the end. If not provided or `None`, just
        returns a pointer to the qa labels ``torch.nn.Linear``` module of the model without doing anything.

Return:
    `torch.nn.Linear`: Pointer to the resized Linear layer or the old Linear layer
N©Úget_qa_logit_layerÚ_resize_qa_labelsr{   rï  ©r   rŒ  Úcur_qa_logit_layerÚnew_qa_logit_layers       r   Úresize_num_qa_labelsÚ)LxmertForPreTraining.resize_num_qa_labelsõ  óI   € ð "×4Ñ4Ó6ÐØÑÐ!3Ñ!;ØØ!×3Ñ3°JÓ?ÐØ$.‰Ô!Ø'Ôà!Ð!r   c                 ó†   • U R                  5       nU R                  X!5      nU R                  U5        U R                  5       $ r   ©r  Ú_get_resized_qa_labelsÚ_set_qa_logit_layerr  s       r   r  Ú&LxmertForPreTraining._resize_qa_labels  óA   € Ø!×4Ñ4Ó6ÐØ!×8Ñ8Ð9KÓXÐØ× Ñ Ð!3Ô4Ø×&Ñ&Ó(Ð(r   c                 óX   • [        U S5      (       a  U R                  R                  S   $ g)zä
Returns the linear layer that produces question answering logits.

Returns:
    `nn.Module`: A torch module mapping the question answering prediction hidden states or `None` if LXMERT
    does not have a visual answering head.
r÷  r¡   N©Úhasattrr÷  r‹  rÍ  s    r   r  Ú'LxmertForPreTraining.get_qa_logit_layer  s-   € ô 4˜×'Ñ'Ø×#Ñ#×,Ñ,¨RÑ0Ð0ð (r   c                 ó4   • XR                   R                  S'   g ©Nr¡   ©r÷  r‹  ©r   Úqa_logit_layers     r   r  Ú(LxmertForPreTraining._set_qa_logit_layer  ó   € Ø(6×Ñ×!Ñ! "Ò%r   c                 ó@  • Uc  U$ UR                   R                  5       u  p4X2:X  a  U$ [        USS 5      b  [        R                  " XB5      nO[        R                  " XBSS9nUR                  UR                   R                  5        U R                  U5        [        X25      nUR                   R                  S U2S S 24   UR                   R                  S U2S S 24'   [        USS 5      b0  UR                  R                  S U UR                  R                  S U& U$ ©NrW   Fr~  ©rT   r¥   rp   r   r¼   rÜ  r¤   r¾  rÞ  ry   rW   ©r   r  rŒ  Úcur_qa_labelsÚ
hidden_dimr  Únum_labels_to_copys          r   r  Ú+LxmertForPreTraining._get_resized_qa_labels!  ó#  € ØÑØ%Ð%à$6×$=Ñ$=×$BÑ$BÓ$DÑ!ˆØÓ&Ø%Ð%ô Ð% v¨tÓ4Ñ@Ü!#§¢¨:Ó!BÑä!#§¢¨:ÈÑ!NÐà×ÑÐ0×7Ñ7×>Ñ>Ô?ð 	×ÑÐ-Ô.ô ! Ó;ÐØAS×AZÑAZ×A_ÑA_Ð`sÐasÐ`sÒuvÐ`vÑAwÐ×!Ñ!×&Ñ&Ð':Ð(:Ð':ºAÐ'=Ñ>ÜÐ% v¨tÓ4Ñ@Ø@R×@WÑ@W×@\Ñ@\Ð]pÐ^pÐ@qÐ×#Ñ#×(Ñ(Ð)<Ð*<Ð=à!Ð!r   r¬   rB  rR  rÔ   r1  r­   r®   ÚlabelsÚ
obj_labelsÚmatched_labelÚansrÕ   rÔ  rÕ  c                 óð  • SU;   a,  [         R                  " S[        5        UR                  S5      nUb  UOU R                  R
                  nUb  UR                  OUR                  nU R                  UUUUUUUUUUS9
nUS   US   US   nnnU R                  UU5      u  nnU R                  (       a  U R                  U5      nOUS   S   nUc  U
c  U	c  Uc  SO[        R                  " SUS	9nUb_  U R                  (       aN  U R                  S
   " UR                  SU R                  R                   5      UR                  S5      5      nUU-  nU
bK  U R"                  (       a:  U R                  S
   " UR                  SS5      U
R                  S5      5      nUU-  nU	Gb  U R$                  (       Ga  [        R                  " SUR                  S	9nU R'                  U5      nU R(                  R+                  5        H´  u  nnU	U   u  nn US   n!US   n"US   n#U R,                  n$U R                  U"   n%UU   n&U%" U&R                  SU!5      UR                  U#5      5      n'U'R/                  5       S:”  a  U'R1                  S5      n'U'U R                  S5      -  R1                  5       U$-  n'UU'-  nM¶     UU-  nUbU  U R                  (       aD  U R                  S
   " UR                  SU R2                  5      UR                  S5      5      n(UU(-  nU(       d  UUU4USS -   n)Ub  U4U)-   $ U)$ [5        UUUUUR6                  UR8                  UR:                  UR<                  UR>                  S9	$ )aÂ  
visual_feats (`torch.FloatTensor` of shape `(batch_size, num_visual_features, visual_feat_dim)`):
    This input represents visual features. They ROI pooled object features from bounding boxes using a
    faster-RCNN model)

    These are currently not provided by the transformers library.
visual_pos (`torch.FloatTensor` of shape `(batch_size, num_visual_features, visual_pos_dim)`):
    This input represents spatial features corresponding to their relative (via index) visual features. The
    pre-trained LXMERT model expects these spatial features to be normalized bounding boxes on a scale of 0 to
    1.

    These are currently not provided by the transformers library.
visual_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.

    [What are attention masks?](../glossary#attention-mask)
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
obj_labels (`Dict[Str: Tuple[Torch.FloatTensor, Torch.FloatTensor]]`, *optional*):
    each key is named after each one of the visual losses and each element of the tuple is of the shape
    `(batch_size, num_features)` and `(batch_size, num_features, visual_feature_dim)` for each the label id and
    the label score respectively
matched_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the whether or not the text input matches the image (classification) loss. Input
    should be a sequence pair (see `input_ids` docstring) Indices should be in `[0, 1]`:

    - 0 indicates that the sentence does not match the image,
    - 1 indicates that the sentence does match the image.
ans (`Torch.Tensor` of shape `(batch_size)`, *optional*):
    a one hot representation hof the correct answer *optional*
Úmasked_lm_labelszlThe `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.N©
r¬   rB  rR  r­   rÔ   r1  r®   rÔ  rÕ   rÕ  r   r   r[   r´  rØ  rî  r¡   rˆ   r?   ru   r   )	r?   rD   rE   r@   r3   r4   r5   r6   r7   ) ÚwarningsÚwarnÚFutureWarningÚpopr{   rÙ  r¤   r²  rõ  rô  r÷  r9   Útensorrñ  rø  rÄ   r“   ró  rò  rö  rž  Úitemsrð  rË   rµ  rï  rB   r3   r4   r5   r6   r7   )*r   r¬   rB  rR  rÔ   r1  r­   r®   r/  r0  r1  r2  rÕ   rÔ  rÕ  Úkwargsr¤   Úlxmert_outputr)  r=  r2   Úlang_prediction_scoresrE   Úanswer_scoreÚ
total_lossÚmasked_lm_lossÚmatched_lossÚtotal_visual_lossÚvisual_prediction_scores_dictr¾   Úkey_infoÚlabelÚ	mask_confÚ
output_dimÚloss_fct_nameÚlabel_shaperT   Úvisual_loss_fctÚvisual_prediction_scoresÚvisual_lossÚanswer_lossrõ   s*                                             r   r"   ÚLxmertForPreTraining.forward<  s³  € ðp  Ó'ÜMŠMðäôð
 —Z‘ZÐ 2Ó3ˆFà%0Ñ%<‘kÀ$Ç+Á+×B]ÑB]ˆà%.Ñ%:×!Ò!À×@TÑ@TˆØŸ™ØØ%Ø!Ø)Ø)Ø"7Ø'Ø!5Ø/Ø#ð $ð 
ˆð ˜!ÑØ˜!ÑØ˜!Ñð %2]ˆð
 <@¿8¹8ÀKÐQ^Ó;_Ñ8ÐÐ 8Ø<<Ø×+Ñ+¨MÓ:‰Là(¨Ñ+¨AÑ.ˆLð ‘ =Ñ#8¸ZÑ=OÐTWÑT_ñ ä—’˜c¨&Ñ1ð 	ð
 Ñ $×"3×"3Ø!Ÿ^™^¨DÒ1Ø&×+Ñ+¨B°·±×0FÑ0FÓGØ—‘˜B“óˆNð ˜.Ñ(ˆJØÑ$¨×):×):ØŸ>™>¨$Ò/Ð0H×0MÑ0MÈbÐRSÓ0TÐVc×VhÑVhÐikÓVlÓmˆLØ˜,Ñ&ˆJØÒ! d×&;×&;Ð&;Ü %§¢¨S¸×9IÑ9IÑ JÐØ,0×,AÑ,AÀ-Ó,PÐ)Ø!%×!3Ñ!3×!9Ñ!9Ö!;‘XØ#-¨c¡?Ñ yØ% e™_
Ø (¨Ñ 0Ø& wÑ/Ø×4Ñ4Ø"&§.¡.°Ñ"?Ø+HÈÑ+MÐ(Ù-Ø,×1Ñ1°"°jÓAØ—J‘J˜{Ó+óð —?‘?Ó$ qÓ(Ø"-×"2Ñ"2°1Ó"5KØ*¨Y¯^©^¸BÓ-?Ñ?×EÑEÓGÈ&ÑPØ! [Ñ0Ò!ñ "<ð  Ð+Ñ+ˆJØ‰?˜tŸ|Ÿ|ØŸ.™.¨Ò.¨|×/@Ñ/@ÀÀT×EWÑEWÓ/XÐZ]×ZbÑZbÐceÓZfÓgˆKØ˜+Ñ%ˆJæà&Ø(Øðð ˜a˜bÐ!ñ	"ˆFð
 0:Ñ/EZM FÑ*ÐQÈ6ÐQä)ØØ4Ø%=Ø%1Ø#0×#GÑ#GØ!.×!CÑ!CØ -× AÑ AØ+×=Ñ=Ø%2×%KÑ%Kñ

ð 
	
r   )r÷  rõ  r{   rø  r²  rï  rö  rñ  ró  rò  rô  rð  rž  )NT)NNNNNNNNNNNNNN)!r&   r'   r(   r)   Ú_tied_weights_keysr   rû  rs   r   ræ  r   r’   r  r  r  r  ÚModuler  r  r  r   r9   rå  r:   r   Ústrr   ÚTensorr   rB   r"   r*   r+   r,   s   @r   rè  rè  ¥  s  ø† à:Ð;Ðõ5+òn\ð dhñØ!ðØ7?À±}ðØ\`ðà	‰÷ð ð°ô ò"ò0)ð	1 B§I¡Iô 	1ò7ò"ð6 ð 15Ø48Ø26Ø6:Ø=AØ59Ø59Ø-1ØW[Ø48Ø&*Ø,0Ø/3Ø&*ñR
à˜E×,Ñ,Ñ-ðR
ð ˜u×0Ñ0Ñ1ðR
ð ˜U×.Ñ.Ñ/ð	R
ð
 ! ×!2Ñ!2Ñ3ðR
ð  (¨×(9Ñ(9Ñ:ðR
ð ! ×!1Ñ!1Ñ2ðR
ð   × 1Ñ 1Ñ2ðR
ð ˜×)Ñ)Ñ*ðR
ð ˜T # u¨U×->Ñ->À×@QÑ@QÐ-QÑ'RÐ"RÑSÑTðR
ð   × 0Ñ 0Ñ1ðR
ð e—l‘lÑ#ðR
ð $ D™>ðR
ð ' t™nðR
ð ˜d‘^ðR
ð" 
Ð)¨5°×1BÑ1BÑ+CÐCÑ	Dô#R
ó öR
r   rè  zR
    Lxmert Model with a visual-answering head on top for downstream QA tasks
    )Úcustom_introc                   óÆ  ^ • \ rS rSrU 4S jrS rS rS\R                  4S jr	S r
S r\           SS	\\R                     S
\\R                      S\\R                      S\\R                      S\\R                      S\\R                     S\\R                      S\\R"                     S\\   S\\   S\\   S\\\\R                      4   4S jj5       rSrU =r$ )ÚLxmertForQuestionAnsweringiÒ  c                 ó  >• [         TU ]  U5        Xl        UR                  U l        UR                  U l        [        U5      U l        [        XR                  5      U l        U R                  5         [        5       U l        g r   )r   r   r{   rï  rð  rÅ  r²  rˆ  r÷  rÉ  r	   r?   rž   s     €r   r   Ú#LxmertForQuestionAnswering.__init__Ø  sj   ø€ Ü‰Ñ˜Ô àŒØ#×1Ñ1ˆÔØ&,×&CÑ&CˆÔ#ô " &Ó)ˆŒä1°&×:LÑ:LÓMˆÔð 	‰Ôô %Ó&ˆ	r   c                 ó‚   • U R                  5       nUb  Uc  gU R                  U5      nXR                  l        Xl        U$ r  r  r  s       r   r  Ú/LxmertForQuestionAnswering.resize_num_qa_labelsë  r  r   c                 ó†   • U R                  5       nU R                  X!5      nU R                  U5        U R                  5       $ r   r  r  s       r   r  Ú,LxmertForQuestionAnswering._resize_qa_labels  r  r   rÖ  c                 óX   • [        U S5      (       a  U R                  R                  S   $ g)zö
Returns the linear layer that produces question answering logits

Returns:
    `nn.Module`: A torch module mapping the question answering prediction hidden states. `None`: A NoneType
    object if Lxmert does not have the visual answering head.
r÷  r¡   Nr  rÍ  s    r   r  Ú-LxmertForQuestionAnswering.get_qa_logit_layer	  s-   € ô 4˜×'Ñ'Ø×#Ñ#×,Ñ,¨RÑ0Ð0ð (r   c                 ó4   • XR                   R                  S'   g r   r!  r"  s     r   r  Ú.LxmertForQuestionAnswering._set_qa_logit_layer  r%  r   c                 ó@  • Uc  U$ UR                   R                  5       u  p4X2:X  a  U$ [        USS 5      b  [        R                  " XB5      nO[        R                  " XBSS9nUR                  UR                   R                  5        U R                  U5        [        X25      nUR                   R                  S U2S S 24   UR                   R                  S U2S S 24'   [        USS 5      b0  UR                  R                  S U UR                  R                  S U& U$ r'  r(  r)  s          r   r  Ú1LxmertForQuestionAnswering._get_resized_qa_labels  r.  r   r¬   rB  rR  rÔ   r1  r­   r®   r/  rÕ   rÔ  rÕ  c                 óÌ  • Ub  UOU R                   R                  nU R                  UUUUUUUU
U	US9
nUS   nU R                  U5      nSnUb;  U R	                  UR                  SU R                  5      UR                  S5      5      nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  UR                  UR                  UR                  S9$ )ao  
visual_feats (`torch.FloatTensor` of shape `(batch_size, num_visual_features, visual_feat_dim)`):
    This input represents visual features. They ROI pooled object features from bounding boxes using a
    faster-RCNN model)

    These are currently not provided by the transformers library.
visual_pos (`torch.FloatTensor` of shape `(batch_size, num_visual_features, visual_pos_dim)`):
    This input represents spatial features corresponding to their relative (via index) visual features. The
    pre-trained LXMERT model expects these spatial features to be normalized bounding boxes on a scale of 0 to
    1.

    These are currently not provided by the transformers library.
visual_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.

    [What are attention masks?](../glossary#attention-mask)
labels (`Torch.Tensor` of shape `(batch_size)`, *optional*):
    A one-hot representation of the correct answer
Nr5  r[   r¡   r   )r?   r@   r3   r4   r5   r6   r7   )r{   rÙ  r²  r÷  r?   rÄ   rï  r=   r3   r4   r5   r6   r7   )r   r¬   rB  rR  rÔ   r1  r­   r®   r/  rÕ   rÔ  rÕ  r=  r2   r?  r?   rõ   s                    r   r"   Ú"LxmertForQuestionAnswering.forward3  s  € ðJ &1Ñ%<‘kÀ$Ç+Á+×B]ÑB]ˆàŸ™ØØ%Ø!Ø)Ø)Ø"7Ø'Ø!5Ø/Ø#ð $ð 
ˆð & aÑ(ˆØ×'Ñ'¨Ó6ˆØˆØÑØ—9‘9˜\×.Ñ.¨r°4×3EÑ3EÓFÈÏÉÐTVËÓXˆDæØ"_ }°Q°RÐ'8Ñ8ˆFØ'+Ñ'7D7˜VÑ#ÐC¸VÐCä/ØØ%1Ø#0×#GÑ#GØ!.×!CÑ!CØ -× AÑ AØ+×=Ñ=Ø%2×%KÑ%Kñ
ð 	
r   )r÷  r{   r?   r²  rï  rð  )NNNNNNNNNNN)r&   r'   r(   r)   r   r  r  r   rQ  r  r  r  r   r   r9   rå  r:   rS  ræ  r   r=   r   r"   r*   r+   r,   s   @r   rV  rV  Ò  s_  ø† õ'ò&"ò0)ð
1 B§I¡Iô 
1ò7ò"ð6 ð 15Ø48Ø26Ø6:Ø=AØ59Ø59Ø)-Ø,0Ø/3Ø&*ñE
à˜E×,Ñ,Ñ-ðE
ð ˜u×0Ñ0Ñ1ðE
ð ˜U×.Ñ.Ñ/ð	E
ð
 ! ×!2Ñ!2Ñ3ðE
ð  (¨×(9Ñ(9Ñ:ðE
ð ! ×!1Ñ!1Ñ2ðE
ð   × 1Ñ 1Ñ2ðE
ð ˜Ÿ™Ñ&ðE
ð $ D™>ðE
ð ' t™nðE
ð ˜d‘^ðE
ð 
Ð/°°u×7HÑ7HÑ1IÐIÑ	JôE
ó öE
r   rV  )rV  rè  rV  rÅ  r±  rE  r#  ):r8   rÍ   rc   r6  Údataclassesr   Útypingr   r   r   r   r9   r   Útorch.nnr	   r
   Úactivationsr   r   Úmodeling_utilsr   Úutilsr   r   r   Úconfiguration_lxmertr   Ú
get_loggerr&   ra   rQ  r   r.   r=   rB   rŠ   rŒ   rµ   rä   rò   r   r  r  r  r#  rE  rV  rl  ru  r|  rˆ  r“  r¥  r±  rÅ  rè  rV  Ú__all__r$   r   r   Ú<module>rn     s^  ðñ ã Û 	Û Ý !ß /Ó /ã Ý ß 3ç 'Ý -ß 9Ñ 9Ý .ð 
×	Ò	˜HÓ	%€ôˆ29‰9ô ð ô*H˜ó *Hó ð*HðZ ô$H {ó $Hó ð$HðN ô,H ó ,Hó ð,Hò^Lô^%r—y‘yô %ôP:b—i‘iô :ôz˜BŸI™Iô ô §	¡	ô ô˜rŸy™yô ô*	˜Ÿ™ô 	ô2—9‘9ô ô"—)‘)ô ô X
2—9‘9ô X
ôv §¡ô ô6N
B—I‘Iô N
ôb2—9‘9ô ô B§I¡Iô ô˜RŸY™Yô ô*,˜RŸY™Yô ,ô˜"Ÿ)™)ô ô>	9˜RŸY™Yô 	9ð ô%˜Oó %ó ð%ð2 ôR
Ð'ó R
ó ðR
ðj ôi
Ð0ó i
ó ði
ñX	 ðñô
b
Ð!6ó b
óð
b
òJr   