o
    ZhE                  
   @   s  d Z ddlZddlmZ ddlmZmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZmZ dd
lmZmZmZ ddlmZ eeZdeej deej deej fddZdejdededejfddZdejdejdejdedejf
ddZdejdejdedejfd d!Zd"edejfd#d$Z eG d%d& d&eZ!G d'd( d(e	j"Z#G d)d* d*e	j"Z$G d+d, d,e	j"Z%G d-d. d.e	j"Z&d/e%iZ'G d0d1 d1e	j"Z(G d2d3 d3e	j"Z)G d4d5 d5e	j"Z*G d6d7 d7e	j"Z+eG d8d9 d9eZ,ed:d;G d<d= d=e,Z-d9d=gZ.dS )>zPyTorch SuperGlue model.    N)	dataclass)OptionalTupleUnion)nn)PreTrainedModel)SuperGlueConfig   ) find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputauto_docstringlogging   )AutoModelForKeypointDetectiontensor_tuple0tensor_tuple1returnc                 C   s   t dd t| |D S )a'  
    Concatenate two tuples of tensors pairwise

    Args:
        tensor_tuple0 (`Tuple[torch.Tensor]`):
            Tuple of tensors.
        tensor_tuple1 (`Tuple[torch.Tensor]`):
            Tuple of tensors.

    Returns:
        (`Tuple[torch.Tensor]`): Tuple of concatenated tensors.
    c                 S   s   g | ]\}}t ||gqS  )torchcat).0Ztensor0Ztensor1r   r   _/var/www/auris/lib/python3.10/site-packages/transformers/models/superglue/modeling_superglue.py
<listcomp>/   s    z concat_pairs.<locals>.<listcomp>)tuplezip)r   r   r   r   r   concat_pairs"   s   r   	keypointsheightwidthc                 C   sh   t j||g| j| jdd }|d }|jdddjd }| |dddddf  |dddddf  S )a  
    Normalize keypoints locations based on image image_shape

    Args:
        keypoints (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`):
            Keypoints locations in (x, y) format.
        height (`int`):
            Image height.
        width (`int`):
            Image width.

    Returns:
        Normalized keypoints locations of shape (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`).
    )devicedtypeNr      T)Zkeepdimgffffff?)r   tensorr    r!   maxvalues)r   r   r   sizecenterZscalingr   r   r   normalize_keypoints2   s   0r(   log_cost_matrixlog_source_distributionlog_target_distributionnum_iterationsc                 C   sr   t |}t |}t|D ]}|t j| |d dd }|t j| |d dd }q| |d |d S )az  
    Perform Sinkhorn Normalization in Log-space for stability

    Args:
        log_cost_matrix (`torch.Tensor` of shape `(batch_size, num_rows, num_columns)`):
            Logarithm of the cost matrix.
        log_source_distribution (`torch.Tensor` of shape `(batch_size, num_rows)`):
            Logarithm of the source distribution.
        log_target_distribution (`torch.Tensor` of shape `(batch_size, num_columns)`):
            Logarithm of the target distribution.

    Returns:
        log_cost_matrix (`torch.Tensor` of shape `(batch_size, num_rows, num_columns)`): Logarithm of the optimal
        transport matrix.
    r"   r   dim)r   Z
zeros_likerangeZ	logsumexp	unsqueeze)r)   r*   r+   r,   Zlog_u_scalingZlog_v_scaling_r   r   r   log_sinkhorn_iterationsG   s   

r2   scores	reg_param
iterationsc                 C   s  | j \}}}| d}|| | || | }}|||d}	||d|}
||dd}tt| |	gdt|
|gdgd}||   }t||| d | g}t||| d | g}|d |d|d |d}}t||||d}|| }|S )a  
    Perform Differentiable Optimal Transport in Log-space for stability

    Args:
        scores: (`torch.Tensor` of shape `(batch_size, num_rows, num_columns)`):
            Cost matrix.
        reg_param: (`torch.Tensor` of shape `(batch_size, 1, 1)`):
            Regularization parameter.
        iterations: (`int`):
            Number of Sinkhorn iterations.

    Returns:
        log_optimal_transport_matrix: (`torch.Tensor` of shape `(batch_size, num_rows, num_columns)`): Logarithm of the
        optimal transport matrix.
    r"   N)r,   )shape
new_tensortoexpandr   r   logr2   )r3   r4   r5   
batch_sizeZnum_rowsZnum_columnsZ
one_tensorZnum_rows_tensorZnum_columns_tensorZsource_reg_paramZtarget_reg_paramZ	couplingsZlog_normalizationr*   r+   Zlog_optimal_transport_matrixr   r   r   log_optimal_transportd   s,   
(r=   r.   c                 C   s   |  | j| dd S Nr   r"   )Znew_onesr7   Zcumsum)xr.   r   r   r   arange_like   s   r@   c                   @   s   e Zd ZU dZdZeej ed< dZ	eej ed< dZ
eej ed< dZeej ed< dZeej ed< dZeeej  ed< dZeeej  ed	< dS )
KeypointMatchingOutputa,  
    Base class for outputs of keypoint matching models. Due to the nature of keypoint detection and matching, the number
    of keypoints is not fixed and can vary from image to image, which makes batching non-trivial. In the batch of
    images, the maximum number of matches is set as the dimension of the matches and matching scores. The mask tensor is
    used to indicate which values in the keypoints, matches and matching_scores tensors are keypoint matching
    information.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*):
            Loss computed during training.
        mask (`torch.IntTensor` of shape `(batch_size, num_keypoints)`):
            Mask indicating which values in matches and matching_scores are keypoint matching information.
        matches (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
            Index of keypoint matched in the other image.
        matching_scores (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
            Scores of predicted matches.
        keypoints (`torch.FloatTensor` of shape `(batch_size, num_keypoints, 2)`):
            Absolute (x, y) coordinates of predicted keypoints in a given image.
        hidden_states (`Tuple[torch.FloatTensor, ...]`, *optional*):
            Tuple of `torch.FloatTensor` (one for the output of each stage) of shape `(batch_size, 2, num_channels,
            num_keypoints)`, returned when `output_hidden_states=True` is passed or when
            `config.output_hidden_states=True`)
        attentions (`Tuple[torch.FloatTensor, ...]`, *optional*):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, 2, num_heads, num_keypoints,
            num_keypoints)`, returned when `output_attentions=True` is passed or when `config.output_attentions=True`)
    Nlossmatchesmatching_scoresr   maskhidden_states
attentions)__name__
__module____qualname____doc__rB   r   r   FloatTensor__annotations__rC   rD   r   rE   Z	IntTensorrF   r   rG   r   r   r   r   rA      s   
 rA   c                       sD   e Zd Zdedededdf fddZdejdejfd	d
Z  Z	S )SuperGlueMultiLayerPerceptronconfigin_channelsout_channelsr   Nc                    s2   t    t||| _t|| _t | _d S N)	super__init__r   LinearlinearZBatchNorm1d
batch_normZReLU
activation)selfrO   rP   rQ   	__class__r   r   rT      s   
z&SuperGlueMultiLayerPerceptron.__init__hidden_statec                 C   s:   |  |}|dd}| |}|dd}| |}|S )Nr6   )rV   	transposerW   rX   )rY   r\   r   r   r   forward   s   


z%SuperGlueMultiLayerPerceptron.forward)
rH   rI   rJ   r   intrT   r   Tensorr_   __classcell__r   r   rZ   r   rN      s    rN   c                       s`   e Zd Zdeddf fddZ	ddejdejd	ee de	ejee	ej  f fd
dZ
  ZS )SuperGlueKeypointEncoderrO   r   Nc                    sr   t     j} j}dg| |g  fddtdtd D }|td d  t	|| _
d S )Nr	   c                    $   g | ]}t  |d   | qS r"   rN   r   irO   Zencoder_channelsr   r   r          z5SuperGlueKeypointEncoder.__init__.<locals>.<listcomp>r"   r]   r6   )rS   rT   Zkeypoint_encoder_sizeshidden_sizer/   lenappendr   rU   
ModuleListencoder)rY   rO   Zlayer_sizesrk   layersrZ   ri   r   rT      s   
z!SuperGlueKeypointEncoder.__init__Fr   r3   output_hidden_statesc                 C   sR   | d}tj||gdd}|rdnd }| jD ]}||}|r$||f }q||fS )Nr   r-   r   )r0   r   r   ro   )rY   r   r3   rq   r\   all_hidden_stateslayerr   r   r   r_      s   


z SuperGlueKeypointEncoder.forward)FrH   rI   rJ   r   rT   r   ra   r   boolr   r_   rb   r   r   rZ   r   rc      s    rc   c                       s   e Zd Zd fdd	ZdejdejfddZ						dd	ejd
eej deej deej deej dee	e	ej   dee
 de	ej fddZ  ZS )SuperGlueSelfAttentionNc                    s   t    |j|j dkrt|dstd|j d|j d|j| _t|j|j | _| j| j | _t	
|j| j| _t	
|j| j| _t	
|j| j| _t	|j| _|p\t|dd| _| jdksh| jd	kry|j| _t	d
|j d | j| _|j| _d S )Nr   Zembedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterelative_keyrelative_key_queryr   r"   )rS   rT   rk   num_attention_headshasattr
ValueErrorr`   attention_head_sizeall_head_sizer   rU   querykeyvalueZDropoutZattention_probs_dropout_probdropoutgetattrrx   max_position_embeddingsZ	Embeddingdistance_embedding
is_decoderrY   rO   rx   rZ   r   r   rT      s*   

zSuperGlueSelfAttention.__init__r?   r   c                 C   s6   |  d d | j| jf }||}|ddddS )Nr6   r   r   r"   r	   )r&   r|   r   viewpermute)rY   r?   Znew_x_shaper   r   r   transpose_for_scores  s   
z+SuperGlueSelfAttention.transpose_for_scoresFrF   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsc                 C   s  |  |}|d u}	|	r|d ur|d }
|d }|}nP|	r/| | |}
| | |}|}n;|d urZ| | |}
| | |}tj|d |
gdd}
tj|d |gdd}n| | |}
| | |}| |}|d u}| jrz|
|f}t||
dd}| j	dks| j	dkr	|j
d |
j
d }}|rtj|d tj|jd	dd}ntj|tj|jd	dd}tj|tj|jd	dd}|| }| || j d }|j|jd
}| j	dkrtd||}|| }n| j	dkr	td||}td|
|}|| | }|t| j }|d ur|| }tjj|dd}| |}|d ur0|| }t||}|dddd }| d d | jf }||}|rX||fn|f}| jrd||f }|S )Nr   r"   r   r-   r6   r]   rz   r{   )r!   r    r!   zbhld,lrd->bhlrzbhrd,lrd->bhlrr	   ) r   r   r   r   r   r   r   matmulr^   rx   r7   r#   longr    r   Zaranger   r   r9   r!   Zeinsummathsqrtr   r   Z
functionalZsoftmaxr   r   
contiguousr&   r   )rY   rF   r   r   r   r   r   r   Zmixed_query_layerZis_cross_attentionZ	key_layerZvalue_layerZquery_layerZ	use_cacheZattention_scoresZquery_lengthZ
key_lengthZposition_ids_lZposition_ids_rZdistanceZpositional_embeddingZrelative_position_scoresZrelative_position_scores_queryZrelative_position_scores_keyZattention_probsZcontext_layerZnew_context_layer_shapeoutputsr   r   r   r_     sn   









zSuperGlueSelfAttention.forwardrR   NNNNNF)rH   rI   rJ   rT   r   ra   r   r   rL   r   ru   r_   rb   r   r   rZ   r   rv      s4    	rv   c                       s8   e Zd Zdef fddZdejdejfddZ  ZS )SuperGlueSelfOutputrO   c                    s    t    t|j|j| _d S rR   )rS   rT   r   rU   rk   denserY   rO   rZ   r   r   rT   r  s   
zSuperGlueSelfOutput.__init__rF   r   c                 G   s   |  |}|S rR   )r   )rY   rF   argsr   r   r   r_   v  s   
zSuperGlueSelfOutput.forward	rH   rI   rJ   r   rT   r   ra   r_   rb   r   r   rZ   r   r   q  s    r   eagerc                       s   e Zd Zd fdd	Zdd Z						ddejdeej d	eej d
eej deej dee	e	ej   dee
 de	ej fddZ  ZS )SuperGlueAttentionNc                    s4   t    t|j ||d| _t|| _t | _d S )N)rx   )	rS   rT    SUPERGLUE_SELF_ATTENTION_CLASSESZ_attn_implementationrY   r   outputsetpruned_headsr   rZ   r   r   rT     s   

zSuperGlueAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r"   r-   )rl   r
   rY   r|   r   r   r   r   r   r   r   r   r   union)rY   Zheadsindexr   r   r   prune_heads  s   zSuperGlueAttention.prune_headsFrF   r   r   r   r   r   r   r   c              	   C   s<   |  |||||||}| |d |}	|	f|dd   }
|
S r>   )rY   r   )rY   rF   r   r   r   r   r   r   Zself_outputsZattention_outputr   r   r   r   r_     s   
	zSuperGlueAttention.forwardrR   r   )rH   rI   rJ   rT   r   r   ra   r   rL   r   ru   r_   rb   r   r   rZ   r   r     s4    	r   c                       s   e Zd Zdeddf fddZ					ddejdeej d	eej d
eej dedede	ejee	ej  ee	ej  f fddZ
  ZS )SuperGlueAttentionalPropagationrO   r   Nc                    sx   t     j}t | _|d |d |g fddtdtd D }|t	d d  t
|| _d S )Nr   c                    rd   re   rf   rg   rO   Zmlp_channelsr   r   r     rj   z<SuperGlueAttentionalPropagation.__init__.<locals>.<listcomp>r"   r]   r6   )rS   rT   rk   r   	attentionr/   rl   rm   r   rU   rn   mlp)rY   rO   rk   rp   rZ   r   r   rT     s   

z(SuperGlueAttentionalPropagation.__init__Fdescriptorsr   r   r   r   rq   c                 C   sr   | j |||||d}|d }|dd  }	tj||gdd}
|r!dnd }| jD ]}||
}
|r3||
f }q&|
||	fS )N)r   r   r   r   r   r"   r   r-   r   )r   r   r   r   )rY   r   r   r   r   r   rq   Zattention_outputsr   r   r\   rr   rs   r   r   r   r_     s"   	


z'SuperGlueAttentionalPropagation.forward)NNNFFrt   r   r   rZ   r   r     s,    "r   c                       sl   e Zd Zdeddf fddZ			ddejdeej d	ed
ee de	ejee	 ee	 f f
ddZ
  ZS )SuperGlueAttentionalGNNrO   r   Nc                    sB   t     j| _ j| _t fddtt| jD | _	d S )Nc                    s   g | ]}t  qS r   )r   )r   r1   rO   r   r   r     s    z4SuperGlueAttentionalGNN.__init__.<locals>.<listcomp>)
rS   rT   rk   Zgnn_layers_typeslayers_typesr   rn   r/   rl   rp   r   rZ   r   r   rT     s   
(z SuperGlueAttentionalGNN.__init__Fr   rE   r   rq   c              	   C   s   |rdnd }|r
dnd }|j \}}}	|r||f }t| j| jD ]X\}
}d }d }|dkrV|dd|| jd||| j}|d urT|dddd|d|dd|nd }|
||||||d}|d }|rl||d  }|rt||d  }|| }q |||fS )Nr   crossr6   r   r"   )r   r   r   rq   r   r   )r7   r   rp   r   reshaperk   flip)rY   r   rE   r   rq   rr   all_attentionsr<   num_keypointsr1   Z	gnn_layerZ
layer_typer   r   gnn_outputsdeltar   r   r   r_     s@   
$

zSuperGlueAttentionalGNN.forward)NFFrt   r   r   rZ   r   r     s     	r   c                       s<   e Zd Zdeddf fddZdejdejfddZ  ZS )	SuperGlueFinalProjectionrO   r   Nc                    s&   t    |j}tj||dd| _d S )NT)bias)rS   rT   rk   r   rU   
final_proj)rY   rO   rk   rZ   r   r   rT     s   
z!SuperGlueFinalProjection.__init__r   c                 C   s
   |  |S rR   )r   )rY   r   r   r   r   r_     s   
z SuperGlueFinalProjection.forwardr   r   r   rZ   r   r     s    r   c                   @   s,   e Zd ZeZdZdZdejddfddZ	dS )SuperGluePreTrainedModelZ	supergluepixel_valuesmoduler   Nc                 C   s   t |tjtjtjfr%|jjjd| jj	d |j
dur#|j
j  dS dS t |tjr:|j
j  |jjd dS t |trJtj|jj
d dS dS )zInitialize the weightsg        )meanZstdN      ?)
isinstancer   rU   ZConv2dZConv1dweightdataZnormal_rO   Zinitializer_ranger   Zzero_Z	LayerNormZfill_rN   initZ	constant_rV   )rY   r   r   r   r   _init_weights&  s   

z&SuperGluePreTrainedModel._init_weights)
rH   rI   rJ   r   Zconfig_classZbase_model_prefixZmain_input_namer   Moduler   r   r   r   r   r      s
    r   zV
    SuperGlue model taking images as inputs and outputting the matching of them.
    )Zcustom_introc                       s   e Zd ZdZdeddf fddZ			ddejdejd	ejd
edede	ej de	e
 de	e
 deejejeef fddZe				ddejde	ej de	e
 de	e
 de	e
 deeef fddZ  ZS )SuperGlueForKeypointMatchinga  SuperGlue feature matching middle-end

    Given two sets of keypoints and locations, we determine the
    correspondences by:
      1. Keypoint Encoding (normalization + visual feature and location fusion)
      2. Graph Neural Network with multiple self and cross-attention layers
      3. Final projection layer
      4. Optimal Transport Layer (a differentiable Hungarian matching algorithm)
      5. Thresholding matrix based on mutual exclusivity and a match_threshold

    The correspondence ids use -1 to indicate non-matching points.

    Paul-Edouard Sarlin, Daniel DeTone, Tomasz Malisiewicz, and Andrew
    Rabinovich. SuperGlue: Learning Feature Matching with Graph Neural
    Networks. In CVPR, 2020. https://arxiv.org/abs/1911.11763
    rO   r   Nc                    sb   t  | t|j| _t|| _t|| _	t
|| _tjtd}| d| |   d S )Nr   	bin_score)rS   rT   r   Zfrom_configZkeypoint_detector_configkeypoint_detectorrc   keypoint_encoderr   gnnr   final_projectionr   r   	Parameterr#   Zregister_parameterZ	post_init)rY   rO   r   rZ   r   r   rT   L  s   


z%SuperGlueForKeypointMatching.__init__r   r   r3   r   r   rE   r   rq   c	           &         s  |rdnd}	|r
dnd}
|j d dkr*|j dd }|j|dtjd|||	|
fS |j \ }}| d d}| d | jj}| d }|durY| d nd}t|||}| j	|||d}|d }|| }|dur|
 }| ||}n
tj f|jd}| j||||d	}|d }| |}| d| jj}|dddf }|ddd
f }||d
d }|| jjd  }|dur| d}|dddf ddd}||dkd}t|| j| jjd}|ddddddf d}|ddddddf d
}|j}|j}t|d
d |d
|k}t|d
d |d
|k}|d}t||j |}t|| jjk||}t||d
||}|||k@ } || d
|@ }!t| ||d}"t|!||d}#t |"|#g dd}$t ||g dd}%|r|	|d
  }	|	|d
  }	|	|f }	t! fdd|	D }	|r|
|d  }
t! fdd|
D }
|$|%|	|
fS )a=  
        Perform keypoint matching between two images.

        Args:
            keypoints (`torch.Tensor` of shape `(batch_size, 2, num_keypoints, 2)`):
                Keypoints detected in the pair of image.
            descriptors (`torch.Tensor` of shape `(batch_size, 2, descriptor_dim, num_keypoints)`):
                Descriptors of the keypoints detected in the image pair.
            scores (`torch.Tensor` of shape `(batch_size, 2, num_keypoints)`):
                Confidence scores of the keypoints detected in the image pair.
            height (`int`): Image height.
            width (`int`): Image width.
            mask (`torch.Tensor` of shape `(batch_size, 2, num_keypoints)`, *optional*):
                Mask indicating which values in the keypoints, matches and matching_scores tensors are keypoint matching
                information.
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors. Default to `config.output_attentions`.
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers. Default to `config.output_hidden_states`.

        Returns:
            matches (`torch.Tensor` of shape `(batch_size, 2, num_keypoints)`):
                For each image pair, for each keypoint in image0, the index of the keypoint in image1 that was matched
                with. And for each keypoint in image1, the index of the keypoint in image0 that was matched with.
            matching_scores (`torch.Tensor` of shape `(batch_size, 2, num_keypoints)`):
                Scores of predicted matches for each image pair
            all_hidden_states (`tuple(torch.FloatTensor)`, *optional*):
                Tuple of `torch.FloatTensor` (one for the output of each stage) of shape `(1, 2, num_keypoints,
                num_channels)`.
            all_attentions (`tuple(torch.FloatTensor)`, *optional*):
                Tuple of `torch.FloatTensor` (one for each layer) of shape `(1, 2, num_heads, num_keypoints,
                num_keypoints)`.
        r   Nr   r   r6   r   )rq   )r    )rE   rq   r   r"   g      ?g    e)r5   c                 3   s(    | ]}|  d dddV  qdS )r   r6   r]   N)r   r^   r   r?   r<   r   r   r   	<genexpr>  s    
zASuperGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>c                 3   s"    | ]}|  d dV  qdS )r   r6   N)r   r   r   r   r   r     s     )"r7   Znew_fullr   r`   Z	new_zerosr   rO   rk   r(   r   r&   Zget_extended_attention_maskZonesr    r   r   r^   r0   r:   Zmasked_fillr=   r   Zsinkhorn_iterationsr$   indicesr@   gatherr8   wherer%   expZmatching_thresholdr   r   )&rY   r   r   r3   r   r   rE   r   rq   rr   r   r7   r1   Zencoded_keypointsZlast_hidden_stateZinput_shapeZextended_attention_maskr   Zprojected_descriptorsZfinal_descriptorsZfinal_descriptors0Zfinal_descriptors1Zmask0Zmax0Zmax1Zindices0Zindices1Zmutual0Zmutual1zeroZmatching_scores0Zmatching_scores1Zvalid0Zvalid1Zmatches0Zmatches1rC   rD   r   r   r   _match_image_pairZ  s   ,
   

z.SuperGlueForKeypointMatching._match_image_pairr   labelsreturn_dictc              
   C   s  d}|dur
t d|dur|n| jj}|dur|n| jj}|dur$|n| jj}|jdks4|ddkr8t d|j\}}}	}
}||d |	|
|}| 	|}|dd \}}}}||ddd
|}||dd
|}||dd| jj
|}||dd}| }|ddddddd	f | |ddddddd	f< |dddddddf |
 |dddddddf< | j||||
||||d
\}}}}|stdd |||||||fD S t|||||||dS )a  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoModel
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "https://github.com/magicleap/SuperGluePretrainedNetwork/blob/master/assets/phototourism_sample_images/london_bridge_78916675_4568141288.jpg?raw=true"
        >>> image1 = Image.open(requests.get(url, stream=True).raw)
        >>> url = "https://github.com/magicleap/SuperGluePretrainedNetwork/blob/master/assets/phototourism_sample_images/london_bridge_19481797_2295892421.jpg?raw=true"
        >>> image2 = Image.open(requests.get(url, stream=True).raw)
        >>> images = [image1, image2]

        >>> processor = AutoImageProcessor.from_pretrained("magic-leap-community/superglue_outdoor")
        >>> model = AutoModel.from_pretrained("magic-leap-community/superglue_outdoor")

        >>> with torch.no_grad():
        >>>     inputs = processor(images, return_tensors="pt")
        >>>     outputs = model(**inputs)
        ```Nz9SuperGlue is not trainable, no labels should be provided.   r"   r   zOInput must be a 5D tensor of shape (batch_size, 2, num_channels, height, width)   r6   r   )rE   r   rq   c                 s   s    | ]	}|d ur|V  qd S rR   r   )r   vr   r   r   r   1  s    z7SuperGlueForKeypointMatching.forward.<locals>.<genexpr>)rB   rC   rD   r   rE   rF   rG   )r~   rO   r   rq   Zuse_return_dictndimr&   r7   r   r   r9   rk   cloner   r   rA   )rY   r   r   r   rq   r   rB   r<   r1   Zchannelsr   r   Zkeypoint_detectionsr   r3   r   rE   Zabsolute_keypointsrC   rD   rF   rG   r   r   r   r_     sV   
88z$SuperGlueForKeypointMatching.forward)NNN)NNNN)rH   rI   rJ   rK   r   rT   r   ra   r`   r   ru   r   r   r   rL   Z
LongTensorr   rA   r_   rb   r   r   rZ   r   r   5  sX    	

 
r   )/rK   r   dataclassesr   typingr   r   r   r   r   Ztransformersr   Z5transformers.models.superglue.configuration_supergluer   Zpytorch_utilsr
   r   utilsr   r   r   autor   Z
get_loggerrH   loggerra   r   r`   r(   r2   r=   r@   rA   r   rN   rc   rv   r   r   r   r   r   r   r   r   __all__r   r   r   r   <module>   sb   
(
 -%  3+7
  
