
    fTh$                        S SK JrJr  S SKrS SKJr  S SKJrJrJr  S SK	J
r
  SSKJr  SSKJr  SSKJrJr  S	S
KJrJrJr   " S S\5      r\ " S S\5      5       r " S S\\5      r\" SS9 " S S\\5      5       r/ SQrg)    )OptionalUnionN)BCEWithLogitsLossCrossEntropyLossMSELoss)IJepaConfig   )ImageClassifierOutput)PreTrainedModel)auto_docstring	torch_int   )ViTEmbeddingsViTForImageClassificationViTModelc            	          ^  \ rS rSrSS\S\SS4U 4S jjjrS\R                  S\	S	\	S\R                  4S
 jr
  SS\R                  S\\R                     S\S\R                  4S jjrSrU =r$ )IJepaEmbeddings   configuse_mask_tokenreturnNc                    > [         TU ]  X5        U ?U R                  R                  n[
        R                  " [        R                  " SX1R                  5      5      U l
        g )N   )super__init__	cls_tokenpatch_embeddingsnum_patchesnn	Parametertorchrandnhidden_sizeposition_embeddings)selfr   r   r   	__class__s       _/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/ijepa/modular_ijepa.pyr   IJepaEmbeddings.__init__   sH    0N++77#%<<A{L^L^0_#`     
embeddingsheightwidthc                 ,   UR                   S   nU R                  R                   S   n[        R                  R	                  5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  nUR                   S   nX R
                  -  nX0R
                  -  n	[        US-  5      n
UR                  SXU5      nUR                  SSSS5      n[        R                  R                  UX4SSS	9nUR                  SSSS5      R                  SSU5      nU$ )
a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
images. This method is also adapted to support torch.jit tracing.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
r   g      ?r   r	   r   bicubicF)sizemodealign_corners)shaper$   r!   jit
is_tracing
patch_sizer   reshapepermuter   
functionalinterpolateview)r%   r*   r+   r,   r   num_positionspatch_pos_embeddim
new_height	new_widthsqrt_num_positionss              r'   interpolate_pos_encoding(IJepaEmbeddings.interpolate_pos_encoding   s    !&&q)0066q9 yy##%%+*F6?+++22r".
__,	&}c'9:)11!5G]`a)11!Q1=--33(	 4 
 *11!Q1=BB1b#Nr)   pixel_valuesbool_masked_posrB   c                 n   UR                   u  pEpgU R                  XS9nUbX  UR                   S   n	U R                  R                  XIS5      n
UR	                  S5      R                  U
5      nUSU-
  -  X-  -   nU(       a  XR                  XU5      -   nOXR                  -   nU R                  U5      nU$ )N)rB   r   r.         ?)	r3   r   
mask_tokenexpand	unsqueezetype_asrB   r$   dropout)r%   rD   rE   rB   
batch_size_r+   r,   r*   
seq_lengthmask_tokensmasks               r'   forwardIJepaEmbeddings.forward>   s     (4'9'9$
v**<*k
&#))!,J//00LK",,R088ED#sTz2[5GGJ $#&C&CJX]&^^J#&>&>>J\\*-
r)   )r$   )F)NF)__name__
__module____qualname____firstlineno__r   boolr   r!   TensorintrB   r   
BoolTensorrR   __static_attributes____classcell__r&   s   @r'   r   r      s    a{ aD aT a a%5<< % %UX %]b]i]i %T 7;).	ll "%"2"23 #'	
 
 r)   r   c                       \ rS rSr\rSrSrSrSS/r	Sr
SrS\\R                  \R                  \R                   4   SS	4S
 jrSrg	)IJepaPreTrainedModelY   ijeparD   Tr   
IJepaLayermoduler   Nc                    [        U[        R                  [        R                  45      (       a  [        R                  R                  UR                  R                  R                  [        R                  5      SU R                  R                  S9R                  UR                  R                  5      UR                  l        UR                  b%  UR                  R                  R                  5         gg[        U[        R                   5      (       aJ  UR                  R                  R                  5         UR                  R                  R#                  S5        g[        U[$        5      (       a  [        R                  R                  UR&                  R                  R                  [        R                  5      SU R                  R                  S9R                  UR&                  R                  5      UR&                  l        UR(                  b%  UR(                  R                  R                  5         ggg)zInitialize the weightsg        )meanstdNrG   )
isinstancer   LinearConv2dinittrunc_normal_weightdatator!   float32r   initializer_rangedtypebiaszero_	LayerNormfill_r   r$   rH   )r%   rd   s     r'   _init_weights"IJepaPreTrainedModel._init_weightsc   s   fryy"))455 "$!6!6""%%emm43DKKDaDa "7 "b$$% MM {{&  &&( '--KK""$MM$$S)00.0gg.C.C**//225==AKK11 /D / b++112	 &&+
   ,!!&&,,. - 1r)    )rT   rU   rV   rW   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modules_supports_sdpa_supports_flash_attn_2r   r   ri   rj   ru   rw   r\   ry   r)   r'   r`   r`   Y   sZ    L$O&*#*L9N!/E"))RYY*L$M /RV /r)   r`   c                   <   ^  \ rS rSrSS\S\S\4U 4S jjjrSrU =r$ )
IJepaModelz   r   add_pooling_layerr   c                 L   > [         TU ]  U5        Xl        [        XS9U l        g)z
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
use_mask_token (`bool`, *optional*, defaults to `False`):
    Whether to use a mask token for masked image modeling.
)r   N)r   r   r   r   r*   )r%   r   r   r   r&   s       r'   r   IJepaModel.__init__{   s#     	 )&Pr)   )r   r*   )FF)	rT   rU   rV   rW   r   rX   r   r\   r]   r^   s   @r'   r   r   z   s(    	Q{ 	Qt 	Q]a 	Q 	Qr)   r   a  
    IJepa Model transformer with an image classification head on top (a linear layer on top of the final hidden states)
    e.g. for ImageNet.

    <Tip>

        Note that it's possible to fine-tune IJepa on higher resolution images than the ones it has been trained on, by
        setting `interpolate_pos_encoding` to `True` in the forward of the model. This will interpolate the pre-trained
        position embeddings to the higher resolution.

    </Tip>
    )custom_introc                      ^  \ rS rSrS\4U 4S jjr       SS\\R                     S\\R                     S\\R                     S\\	   S\\	   S	\\	   S
\\	   S\
\\4   4S jjrSrU =r$ )IJepaForImageClassification   r   c                 b   > [         TU ]  U5        [        USS9U l        U R	                  5         g )NF)r   )r   r   r   rb   	post_init)r%   r   r&   s     r'   r   $IJepaForImageClassification.__init__   s(     %@
r)   rD   	head_masklabelsoutput_attentionsoutput_hidden_statesrB   return_dictr   c           	      z   Ub  UOU R                   R                  nU R                  UUUUUUS9nUS   n	U R                  U	R	                  SS95      n
SnUGb  UR                  U
R                  5      nU R                   R                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       nU R                  S:X  a&  U" U
R                  5       UR                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [        5       nU" U
R!                  S	U R                  5      UR!                  S	5      5      nO,U R                   R                  S:X  a  [#        5       nU" X5      nU(       d  U
4USS -   nUb  U4U-   $ U$ [%        UU
UR&                  UR(                  S
9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
N)r   r   r   rB   r   r   r   )r>   
regressionsingle_label_classificationmulti_label_classificationr.   )losslogitshidden_states
attentions)r   use_return_dictrb   
classifierrf   ro   deviceproblem_type
num_labelsrr   r!   longrZ   r   squeezer   r;   r   r
   r   r   )r%   rD   r   r   r   r   rB   r   outputssequence_outputr   r   loss_fctoutputs                 r'   rR   #IJepaForImageClassification.forward   s     &1%<k$++B]B]**/!5%=#  
 "!*!5!5!!5!<=YYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE$!//))	
 	
r)   )rb   )NNNNNNN)rT   rU   rV   rW   r   r   r   r!   rY   rX   r   tupler
   rR   r\   r]   r^   s   @r'   r   r      s    {  04,0)-,0/337&*A
u||,A
 ELL)A
 &	A

 $D>A
 'tnA
 #+4.A
 d^A
 
u++	,A
 A
r)   r   )r`   r   r   )typingr   r   r!   torch.nnr   r   r   r   -transformers.models.ijepa.configuration_ijepar   modeling_outputsr
   modeling_utilsr   utilsr   r   vit.modeling_vitr   r   r   r   r`   r   r   __all__ry   r)   r'   <module>r      s    "   A A E 5 - . Q QGm GT /? / /@
Q%x 
Q G
"68Q G
G
Tr)   