o
    Zh|                     @   sb  d Z ddlZddlmZmZmZmZmZm	Z	m
Z
 ddlZddlZddlmZ ddlmZmZmZ ddlmZ ddlmZmZmZmZ dd	lmZmZ dd
lmZmZ ddlm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z& e!'e(Z)G dd dej*Z+G dd dej*Z,	dBdej*dej-dej-dej-deej- de.de.fddZ/G dd dej*Z0G dd  d ej*Z1G d!d" d"ej*Z2G d#d$ d$ej*Z3dCd&ej-d'e.d(e4d)ej-fd*d+Z5G d,d- d-ej*Z6G d.d/ d/ej*Z7G d0d1 d1ej*Z8G d2d3 d3ej*Z9G d4d5 d5ej*Z:e G d6d7 d7eZ;e G d8d9 d9e;Z<e d:d;G d<d= d=e;Z=e d>d;G d?d@ d@e;e$Z>g dAZ?dS )DzPyTorch DINOv2 model.    N)CallableDictListOptionalSetTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutputBaseModelOutputBaseModelOutputWithPoolingImageClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging	torch_int)BackboneMixin   )Dinov2Configc                       sj   e Zd ZdZdeddf fddZdejded	edejfd
dZ	ddejde
ej dejfddZ  ZS )Dinov2EmbeddingszM
    Construct the CLS token, mask token, position and patch embeddings.
    configreturnNc                    s   t    ttdd|j| _|jrtt	d|j| _
t|| _| jj}ttd|d |j| _t|j| _|j| _|j| _|| _d S )Nr   )super__init__r	   	ParametertorchZrandnhidden_size	cls_tokenuse_mask_tokenZzeros
mask_tokenDinov2PatchEmbeddingspatch_embeddingsnum_patchesposition_embeddingsDropouthidden_dropout_probdropout
patch_sizer   )selfr   r*   	__class__ Y/var/www/auris/lib/python3.10/site-packages/transformers/models/dinov2/modeling_dinov2.pyr!   *   s   


zDinov2Embeddings.__init__
embeddingsheightwidthc                 C   s  |j d d }| jj d d }tj s||kr||kr| jS | jddddf }| jddddf }|j d }|| j }	|| j }
t|d }|d|||}|dddd}|j	}t
jj|tj|	|
fdd	d
j|d}|dddddd|}tj||fddS )a-  
        This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
        images. This method is also adapted to support torch.jit tracing and interpolation at torch.float32 precision.

        Adapted from:
        - https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
        - https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
        r   Ng      ?r   r      ZbicubicF)sizemodeZalign_cornersdtypedim)shaper+   r#   Zjit
is_tracingr/   r   reshapepermuter=   r	   
functionalZinterpolatetofloat32viewcat)r0   r5   r6   r7   r*   Znum_positionsZclass_pos_embedZpatch_pos_embedr?   Z
new_heightZ	new_widthZsqrt_num_positionstarget_dtyper3   r3   r4   interpolate_pos_encoding8   s.   




z)Dinov2Embeddings.interpolate_pos_encodingpixel_valuesbool_masked_posc           
      C   s   |j \}}}}| jjjj}| |j|d}|d ur/| jr/t|	d| j
|j	d|}| j|dd}	tj|	|fdd}|| ||| }| |}|S )Nr<   r8   r   r   r>   )r@   r)   
projectionweightr=   rE   r&   r#   whereZ	unsqueezer'   r%   expandrH   rJ   r.   )
r0   rK   rL   
batch_size_r6   r7   rI   r5   Z
cls_tokensr3   r3   r4   forward`   s   
zDinov2Embeddings.forwardN)__name__
__module____qualname____doc__r   r!   r#   TensorintrJ   r   rS   __classcell__r3   r3   r1   r4   r   %   s
    *(r   c                       s6   e Zd ZdZ fddZdejdejfddZ  ZS )r(   z
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    c                    s   t    |j|j}}|j|j}}t|tjj	r|n||f}t|tjj	r)|n||f}|d |d  |d |d   }|| _|| _|| _|| _
tj||||d| _d S )Nr   r   )Zkernel_sizeZstride)r    r!   
image_sizer/   num_channelsr$   
isinstancecollectionsabcIterabler*   r	   Conv2drM   )r0   r   r\   r/   r]   r$   r*   r1   r3   r4   r!   }   s   
 zDinov2PatchEmbeddings.__init__rK   r   c                 C   sH   |j d }|| jkrtd| j d| d| |ddd}|S )Nr   zoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .r9   )r@   r]   
ValueErrorrM   flatten	transpose)r0   rK   r]   r5   r3   r3   r4   rS      s   

zDinov2PatchEmbeddings.forward)	rU   rV   rW   rX   r!   r#   rY   rS   r[   r3   r3   r1   r4   r(   v   s    r(           modulequerykeyvalueattention_maskscalingr.   c           
      K   s|   t ||dd| }tjj|dt jd|j}tjj	||| j
d}|d ur,|| }t ||}	|	dd }	|	|fS )Nr8   )r?   r=   )ptrainingr   r9   )r#   matmulrf   r	   rD   ZsoftmaxrF   rE   r=   r.   rp   
contiguous)
rh   ri   rj   rk   rl   rm   r.   kwargsZattn_weightsZattn_outputr3   r3   r4   eager_attention_forward   s   rt   c                
       sv   e Zd Zdeddf fddZdejdejfddZ		dd
eej de	de
eejejf eej f fddZ  ZS )Dinov2SelfAttentionr   r   Nc                    s   t    |j|j dkrt|dstd|j d|j d|| _|j| _t|j|j | _| j| j | _	|j
| _| jd | _d| _tj|j| j	|jd| _tj|j| j	|jd| _tj|j| j	|jd| _d S )	Nr   Zembedding_sizezThe hidden size z4 is not a multiple of the number of attention heads rc   g      Fbias)r    r!   r$   num_attention_headshasattrrd   r   rZ   attention_head_sizeall_head_sizeZattention_probs_dropout_probdropout_probrm   	is_causalr	   LinearZqkv_biasri   rj   rk   r0   r   r1   r3   r4   r!      s"   

zDinov2SelfAttention.__init__xc                 C   s6   |  d d | j| jf }||}|ddddS )Nr8   r   r9   r   r   )r:   rx   rz   rG   rC   )r0   r   Znew_x_shaper3   r3   r4   transpose_for_scores   s   
z(Dinov2SelfAttention.transpose_for_scoresF	head_maskoutput_attentionsc              
   C   s   |  | |}|  | |}|  | |}t}| jjdkr4| jjdkr.|r.td nt	| jj }|| ||||| j
| j| jsCdn| jd\}}	| d d | jf }
||
}|rc||	f}|S |f}|S )NeagerZsdpaz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.rg   )r}   rm   r.   rn   )r   rj   rk   ri   rt   r   Z_attn_implementationloggerZwarning_oncer   r}   rm   rp   r|   r:   r{   rB   )r0   hidden_statesr   r   Z	key_layerZvalue_layerZquery_layerZattention_interfaceZcontext_layerZattention_probsZnew_context_layer_shapeoutputsr3   r3   r4   rS      s4   

zDinov2SelfAttention.forwardNF)rU   rV   rW   r   r!   r#   rY   r   r   boolr   r   rS   r[   r3   r3   r1   r4   ru      s    ru   c                       sF   e Zd ZdZdeddf fddZdejdejdejfd	d
Z  Z	S )Dinov2SelfOutputz
    The residual connection is defined in Dinov2Layer instead of here (as is the case with other models), due to the
    layernorm applied before each block.
    r   r   Nc                    s.   t    t|j|j| _t|j| _d S rT   )	r    r!   r	   r~   r$   denser,   r-   r.   r   r1   r3   r4   r!      s   
zDinov2SelfOutput.__init__r   input_tensorc                 C   s   |  |}| |}|S rT   )r   r.   )r0   r   r   r3   r3   r4   rS     s   

zDinov2SelfOutput.forward)
rU   rV   rW   rX   r   r!   r#   rY   rS   r[   r3   r3   r1   r4   r      s    $r   c                       s~   e Zd Zdeddf fddZdee ddfddZ			dd
ej	de
ej	 dedeeej	ej	f eej	 f fddZ  ZS )Dinov2Attentionr   r   Nc                    s*   t    t|| _t|| _t | _d S rT   )r    r!   ru   	attentionr   outputsetpruned_headsr   r1   r3   r4   r!   
  s   


zDinov2Attention.__init__headsc                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   r>   )lenr   r   rx   rz   r   r   ri   rj   rk   r   r   r{   union)r0   r   indexr3   r3   r4   prune_heads  s   zDinov2Attention.prune_headsFr   r   r   c                 C   s4   |  |||}| |d |}|f|dd   }|S )Nr   r   )r   r   )r0   r   r   r   Zself_outputsattention_outputr   r3   r3   r4   rS   "  s   zDinov2Attention.forwardr   )rU   rV   rW   r   r!   r   rZ   r   r#   rY   r   r   r   r   rS   r[   r3   r3   r1   r4   r   	  s    r   c                       4   e Zd Zd fddZdejdejfddZ  ZS )	Dinov2LayerScaler   Nc                    s(   t    t|jt|j | _d S rT   )	r    r!   r	   r"   layerscale_valuer#   Zonesr$   lambda1r   r1   r3   r4   r!   1  s   
zDinov2LayerScale.__init__hidden_statec                 C   s
   || j  S rT   )r   r0   r   r3   r3   r4   rS   5  s   
zDinov2LayerScale.forwardr   NrU   rV   rW   r!   r#   rY   rS   r[   r3   r3   r1   r4   r   0  s    r   Finput	drop_probrp   r   c                 C   sd   |dks|s| S d| }| j d fd| jd   }|tj|| j| jd }|  | || }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    rg   r   r   )r   )r=   device)r@   ndimr#   Zrandr=   r   Zfloor_div)r   r   rp   Z	keep_probr@   Zrandom_tensorr   r3   r3   r4   	drop_path:  s   
r   c                       sT   e Zd ZdZddee ddf fddZdejdejfdd	Z	de
fd
dZ  ZS )Dinov2DropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                    s   t    || _d S rT   )r    r!   r   )r0   r   r1   r3   r4   r!   R  s   

zDinov2DropPath.__init__r   c                 C   s   t || j| jS rT   )r   r   rp   )r0   r   r3   r3   r4   rS   V  s   zDinov2DropPath.forwardc                 C   s   d | jS )Nzp={})formatr   r0   r3   r3   r4   
extra_reprY  s   zDinov2DropPath.extra_reprrT   )rU   rV   rW   rX   r   floatr!   r#   rY   rS   strr   r[   r3   r3   r1   r4   r   O  s
    r   c                       r   )		Dinov2MLPr   Nc                    sn   t    |j }}t|j|j }tj||dd| _t|j	t
r(t|j	 | _n|j	| _tj||dd| _d S )NTrv   )r    r!   r$   rZ   	mlp_ratior	   r~   fc1r^   Z
hidden_actr   r   
activationfc2r0   r   Zin_featuresout_featuresZhidden_featuresr1   r3   r4   r!   ^  s   

zDinov2MLP.__init__r   c                 C   s"   |  |}| |}| |}|S rT   )r   r   r   r   r3   r3   r4   rS   i  s   


zDinov2MLP.forwardr   r   r3   r3   r1   r4   r   ]  s    r   c                       r   )	Dinov2SwiGLUFFNr   Nc                    sl   t    |j }}t|j|j }t|d d d d d }tj|d| dd| _tj||dd| _d S )Nr9   r         Trv   )	r    r!   r$   rZ   r   r	   r~   
weights_inweights_outr   r1   r3   r4   r!   q  s   

zDinov2SwiGLUFFN.__init__r   c                 C   s6   |  |}|jddd\}}tj|| }| |S )Nr9   r8   r>   )r   chunkr	   rD   Zsilur   )r0   r   x1Zx2Zhiddenr3   r3   r4   rS   z  s   

zDinov2SwiGLUFFN.forwardr   r   r3   r3   r1   r4   r   p  s    	r   c                       sl   e Zd ZdZdeddf fddZ		ddejd	eej d
e	de
eejejf eej f fddZ  ZS )Dinov2LayerzCThis corresponds to the Block class in the original implementation.r   r   Nc                    s   t    tj|j|jd| _t|| _t	|| _
|jdkr#t|jnt | _tj|j|jd| _|jr;t|| _nt|| _t	|| _d S )NZepsrg   )r    r!   r	   	LayerNormr$   layer_norm_epsnorm1r   r   r   layer_scale1Zdrop_path_rater   Identityr   norm2Zuse_swiglu_ffnr   mlpr   layer_scale2r   r1   r3   r4   r!     s   



zDinov2Layer.__init__Fr   r   r   c                 C   s|   | j | |||d}|d }| |}|dd  }| || }| |}| |}| |}| || }|f| }|S )N)r   r   r   )r   r   r   r   r   r   r   )r0   r   r   r   Zself_attention_outputsr   r   Zlayer_outputr3   r3   r4   rS     s   




zDinov2Layer.forwardr   )rU   rV   rW   rX   r   r!   r#   rY   r   r   r   r   rS   r[   r3   r3   r1   r4   r     s    r   c                       sb   e Zd Zdeddf fddZ				ddejd	eej d
ededede	e
ef fddZ  ZS )Dinov2Encoderr   r   Nc                    s:   t     | _t fddt jD | _d| _d S )Nc                    s   g | ]}t  qS r3   )r   .0rR   r   r3   r4   
<listcomp>  s    z*Dinov2Encoder.__init__.<locals>.<listcomp>F)	r    r!   r   r	   Z
ModuleListrangenum_hidden_layerslayergradient_checkpointingr   r1   r   r4   r!     s   
 
zDinov2Encoder.__init__FTr   r   r   output_hidden_statesreturn_dictc                 C   s   |rdnd }|r
dnd }t | jD ]8\}}	|r||f }|d ur$|| nd }
| jr6| jr6| |	j||
|}n|	||
|}|d }|rI||d f }q|rQ||f }|s_tdd |||fD S t|||dS )Nr3   r   r   c                 s   s    | ]	}|d ur|V  qd S rT   r3   )r   vr3   r3   r4   	<genexpr>  s    z(Dinov2Encoder.forward.<locals>.<genexpr>)last_hidden_stater   
attentions)	enumerater   r   rp   Z_gradient_checkpointing_func__call__tupler   )r0   r   r   r   r   r   Zall_hidden_statesZall_self_attentionsiZlayer_moduleZlayer_head_maskZlayer_outputsr3   r3   r4   rS     s6   

zDinov2Encoder.forward)NFFT)rU   rV   rW   r   r!   r#   rY   r   r   r   r   r   rS   r[   r3   r3   r1   r4   r     s&    	
r   c                   @   sL   e Zd ZeZdZdZdZdgZdZ	dZ
deejejejf ddfdd	ZdS )
Dinov2PreTrainedModeldinov2rK   Tr   rh   r   Nc                 C   s4  t |tjtjfr0tjj|jjt	j
d| jjd|jj|j_|jdur.|jj  dS dS t |tjrE|jj  |jjd dS t |trtjj|jjt	j
d| jjd|jj|j_tjj|jjt	j
d| jjd|jj|j_| jjr|jj  dS dS t |tr|jj| jj dS dS )zInitialize the weightsrg   )meanZstdNg      ?)r^   r	   r~   rb   initZtrunc_normal_rN   datarE   r#   rF   r   Zinitializer_ranger=   rw   Zzero_r   Zfill_r   r+   r%   r&   r'   r   r   r   )r0   rh   r3   r3   r4   _init_weights  sB   





z#Dinov2PreTrainedModel._init_weights)rU   rV   rW   r   Zconfig_classZbase_model_prefixZmain_input_nameZsupports_gradient_checkpointingZ_no_split_modulesZ_supports_sdpaZ_supports_flash_attn_2r   r	   r~   rb   r   r   r3   r3   r3   r4   r     s    &r   c                       s   e Zd Zdef fddZdefddZdeee	e f ddfd	d
Z
e						ddeej deej deej dee dee dee deeef fddZ  ZS )Dinov2Modelr   c                    sF   t  | || _t|| _t|| _tj|j	|j
d| _|   d S )Nr   )r    r!   r   r   r5   r   encoderr	   r   r$   r   	layernorm	post_initr   r1   r3   r4   r!     s   

zDinov2Model.__init__r   c                 C      | j jS rT   r5   r)   r   r3   r3   r4   get_input_embeddings      z Dinov2Model.get_input_embeddingsheads_to_pruneNc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   r   r   r   )r0   r   r   r   r3   r3   r4   _prune_heads#  s   zDinov2Model._prune_headsrK   rL   r   r   r   r   c                 C   s   |dur|n| j j}|dur|n| j j}|dur|n| j j}|du r&td| || j j}| j||d}| j|||||d}|d }	| 	|	}	|	dddddf }
|sa|	|
f}||dd  S t
|	|
|j|jdS )z
        bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, sequence_length)`):
            Boolean masked positions. Indicates which patches are masked (1) and which aren't (0). Only relevant for
            pre-training.
        Nz You have to specify pixel_values)rL   r   r   r   r   r   r   )r   Zpooler_outputr   r   )r   r   r   use_return_dictrd   Zget_head_maskr   r5   r   r   r   r   r   )r0   rK   rL   r   r   r   r   embedding_outputZencoder_outputssequence_outputZpooled_outputZhead_outputsr3   r3   r4   rS   +  s6   
zDinov2Model.forwardNNNNNN)rU   rV   rW   r   r!   r(   r   r   rZ   r   r   r   r   r#   rY   r   r   r   r   rS   r[   r3   r3   r1   r4   r     s4    
r   z
    Dinov2 Model transformer with an image classification head on top (a linear layer on top of the final hidden state
    of the [CLS] token) e.g. for ImageNet.
    )Zcustom_introc                       s   e Zd Zdeddf fddZe						ddeej deej deej d	ee	 d
ee	 dee	 de
eef fddZ  ZS )Dinov2ForImageClassificationr   r   Nc                    sR   t  | |j| _t|| _|jdkrt|jd |jnt | _	| 
  d S )Nr   r9   )r    r!   
num_labelsr   r   r	   r~   r$   r   
classifierr   r   r1   r3   r4   r!   j  s   
$z%Dinov2ForImageClassification.__init__rK   r   labelsr   r   r   c                 C   s  |dur|n| j j}| j|||||d}|d }|dddf }	|ddddf }
tj|	|
jddgdd}| |}d}|dur||j}| j j	du rr| j
dkrXd| j _	n| j
dkrn|jtjksi|jtjkrnd| j _	nd| j _	| j j	dkrt }| j
dkr|| | }n+|||}n%| j j	dkrt }||d	| j
|d	}n| j j	dkrt }|||}|s|f|d
d  }|dur|f| S |S t|||j|jdS )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   r>   Z
regressionZsingle_label_classificationZmulti_label_classificationr8   r9   )losslogitsr   r   )r   r   r   r#   rH   r   r   rE   r   Zproblem_typer   r=   longrZ   r   Zsqueezer   rG   r
   r   r   r   )r0   rK   r   r   r   r   r   r   r   r%   Zpatch_tokensZlinear_inputr   r   Zloss_fctr   r3   r3   r4   rS   x  sT   


"


z$Dinov2ForImageClassification.forwardr   )rU   rV   rW   r   r!   r   r   r#   rY   r   r   r   r   rS   r[   r3   r3   r1   r4   r   c  s0    
r   zO
    Dinov2 backbone, to be used with frameworks like DETR and MaskFormer.
    c                       sb   e Zd Z fddZdefddZe			ddejde	e
 d	e	e
 d
e	e
 def
ddZ  ZS )Dinov2Backbonec                    sj   t    t     fddt jd D | _t | _t | _	t
j j jd| _|   d S )Nc                    s   g | ]} j qS r3   )r$   r   r   r3   r4   r     s    z+Dinov2Backbone.__init__.<locals>.<listcomp>r   r   )r    r!   Z_init_backboner   r   Znum_featuresr   r5   r   r   r	   r   r$   r   r   r   r   r1   r   r4   r!     s   

zDinov2Backbone.__init__r   c                 C   r   rT   r   r   r3   r3   r4   r     r   z#Dinov2Backbone.get_input_embeddingsNrK   r   r   r   c                 C   sb  |dur|n| j j}|dur|n| j j}|dur|n| j j}| |}| j|d||d}|r1|jn|d }d}t| j|D ]F\}	}
|	| j	v r| j j
rO| |
}
| j jr~|
ddddf }
|j\}}}}| j j}|
||| || d}
|
dddd	 }
||
f7 }q=|s|r|f|dd  }|S |f|d	d  }|S t||r|jnd|r|jd
S dd
S )a%  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
        >>> model = AutoBackbone.from_pretrained(
        ...     "facebook/dinov2-base", out_features=["stage2", "stage5", "stage8", "stage11"]
        ... )

        >>> inputs = processor(image, return_tensors="pt")

        >>> outputs = model(**inputs)
        >>> feature_maps = outputs.feature_maps
        >>> list(feature_maps[-1].shape)
        [1, 768, 16, 16]
        ```NT)r   r   r   r   r3   r8   r   r   r9   )feature_mapsr   r   )r   r   r   r   r5   r   r   zipZstage_namesr   Zapply_layernormr   Zreshape_hidden_statesr@   r/   rB   rC   rr   r   r   )r0   rK   r   r   r   r   r   r   r   Zstager   rQ   rR   r6   r7   r/   r   r3   r3   r4   rS     sF    



zDinov2Backbone.forward)NNN)rU   rV   rW   r!   r(   r   r   r#   rY   r   r   r   rS   r[   r3   r3   r1   r4   r     s$    r   )r   r   r   r   )rg   )rg   F)@rX   collections.abcr_   typingr   r   r   r   r   r   r   r#   Ztorch.utils.checkpointr	   Ztorch.nnr
   r   r   Zactivationsr   Zmodeling_outputsr   r   r   r   Zmodeling_utilsr   r   Zpytorch_utilsr   r   utilsr   r   r   Zutils.backbone_utilsr   Zconfiguration_dinov2r   Z
get_loggerrU   r   Moduler   r(   rY   r   rt   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __all__r3   r3   r3   r4   <module>   sp   $
Q)
?' 
43)PW\