
    fTh@                        S r SSKJrJrJrJrJr  SSKrSSKJ	r	  SSKJ
r
  SSKJrJrJr  SSKJr  SSKJr  SSKJrJrJrJr  SS	KJr  SS
KJr  SSKJr  SSKJr  \R>                  " \ 5      r! " S S\	RD                  5      r# " S S\	RD                  5      r$ " S S\	RD                  5      r% " S S\	RD                  5      r&\ " S S\5      5       r'\ " S S\'5      5       r(\" SS9 " S S\'5      5       r)\" SS9 " S S \'\5      5       r*/ S!Qr+g)"zPyTorch TextNet model.    )AnyListOptionalTupleUnionN)Tensor)BCEWithLogitsLossCrossEntropyLossMSELoss)PreTrainedModel)ACT2CLS)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)TextNetConfig)logging)BackboneMixin   )auto_docstringc                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )TextNetConvLayer*   configc           	        > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        [        UR                  [        5      (       a$  UR                  S   S-  UR                  S   S-  4OUR                  S-  n[        R                  " UR                  UR                  UR                  UR                  USS9U l        [        R                  " UR                  UR                   5      U l        [        R$                  " 5       U l        U R                  b  [(        U R                     " 5       U l        g g )Nr         F)kernel_sizestridepaddingbias)super__init__stem_kernel_sizer   stem_strider   stem_act_funcactivation_function
isinstancetuplennConv2dstem_num_channelsstem_out_channelsconvBatchNorm2dbatch_norm_eps
batch_normIdentity
activationr   )selfr   r    	__class__s      d/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/textnet/modeling_textnet.pyr#   TextNetConvLayer.__init__+   s   !22((#)#7#7  &11599 "a'););A)>!)CD((A- 	 II$$$$//%%
	 ..)A)A6CXCXY++-##/%d&>&>?ADO 0    hidden_statesreturnc                 h    U R                  U5      nU R                  U5      nU R                  U5      $ N)r.   r1   r3   )r4   r9   s     r6   forwardTextNetConvLayer.forwardF   s-    		-06}--r8   )r3   r'   r1   r.   r   r   )__name__
__module____qualname____firstlineno__r   r#   torchr   r=   __static_attributes____classcell__r5   s   @r6   r   r   *   s1    B} B6.U\\ .ell . .r8   r   c            
       ~   ^  \ rS rSrSrS\S\S\S\S\4
U 4S jjrS	\R                  S
\R                  4S jr
SrU =r$ )TextNetRepConvLayerL   a  
This layer supports re-parameterization by combining multiple convolutional branches
(e.g., main convolution, vertical, horizontal, and identity branches) during training.
At inference time, these branches can be collapsed into a single convolution for
efficiency, as per the re-parameterization paradigm.

The "Rep" in the name stands for "re-parameterization" (introduced by RepVGG).
r   in_channelsout_channelsr   r   c           	      P  > [         T	U ]  5         X l        X0l        X@l        XPl        US   S-
  S-  US   S-
  S-  4n[        R                  " 5       U l        [        R                  " UUUUUSS9U l
        [        R                  " X1R                  S9U l        US   S-
  S-  S4nSUS   S-
  S-  4nUS   S:w  aG  [        R                  " UUUS   S4UUSS9U l        [        R                  " X1R                  S9U l        OSu  U l        U l        US   S:w  aG  [        R                  " UUSUS   4UUSS9U l        [        R                  " X1R                  S9U l        OSu  U l        U l        X2:X  a*  US:X  a$  [        R                  " X!R                  S9U l        g S U l        g )Nr   r   r   F)rJ   rK   r   r   r    r!   )num_featuresepsNN)r"   r#   num_channelsrK   r   r   r*   ReLUr'   r+   	main_convr/   r0   main_batch_normvertical_convvertical_batch_normhorizontal_convhorizontal_batch_normrbr_identity)
r4   r   rJ   rK   r   r   r    vertical_paddinghorizontal_paddingr5   s
            r6   r#   TextNetRepConvLayer.__init__V   s   '(&NQ&1,{1~/Aa.GH#%779 #%#
  "~~<MbMbc(^a/A5q9+a.1"4!:;q>Q!#')(^Q/("D (*~~<UjUj'kD$;E8D 8q>Q#%99')A/*$D  *,\WlWl)mD&?I<D $"< *v{ NN9N9NO 	  	r8   r9   r:   c                 x   U R                  U5      nU R                  U5      nU R                  b&  U R                  U5      nU R                  U5      nX#-   nU R                  b&  U R	                  U5      nU R                  U5      nX$-   nU R                  b  U R                  U5      nX%-   nU R                  U5      $ r<   )rR   rS   rT   rU   rV   rW   rX   r'   )r4   r9   main_outputsvertical_outputshorizontal_outputsid_outs         r6   r=   TextNetRepConvLayer.forward   s    ~~m4++L9 )#11-@#778HI':L +!%!5!5m!D!%!;!;<N!O'<L(&&}5F'0L''55r8   )r'   rW   rV   r   rS   rR   rP   rK   rX   r   rU   rT   )r?   r@   rA   rB   __doc__r   intr#   rC   r   r=   rD   rE   rF   s   @r6   rH   rH   L   sS    7
} 7
3 7
c 7
`c 7
mp 7
r6U\\ 6ell 6 6r8   rH   c                   :   ^  \ rS rSrS\S\4U 4S jjrS rSrU =r	$ )TextNetStage   r   depthc                 r  > [         TU ]  5         UR                  U   nUR                  U   n[	        U5      nUR
                  U   nUR
                  US-      nU/U/US-
  -  -   nU/U-  n	/ n
[        XX45       H  nU
R                  [        U/UQ76 5        M      [        R                  " U
5      U l        g )Nr   )r"   r#   conv_layer_kernel_sizesconv_layer_strideslenhidden_sizeszipappendrH   r*   
ModuleListstage)r4   r   rg   r   r   
num_layersstage_in_channel_sizestage_out_channel_sizerJ   rK   rp   stage_configr5   s               r6   r#   TextNetStage.__init__   s    44U;**51%
 & 3 3E :!'!4!4UQY!?,-1G0HJYZN0[[./*<;OLLL,VClCD P]]5)
r8   c                 <    U R                    H  nU" U5      nM     U$ r<   rp   )r4   hidden_stateblocks      r6   r=   TextNetStage.forward   s     ZZE .L  r8   rw   )
r?   r@   rA   rB   r   rc   r#   r=   rD   rE   rF   s   @r6   re   re      s     *} *S *" r8   re   c            	       r   ^  \ rS rSrS\4U 4S jjr  S
S\R                  S\\	   S\\	   S\
4S jjrS	rU =r$ )TextNetEncoder   r   c                    > [         TU ]  5         / n[        UR                  5      n[	        U5       H  nUR                  [        X5      5        M     [        R                  " U5      U l	        g r<   )
r"   r#   rk   ri   rangern   re   r*   ro   stages)r4   r   r   
num_stagesstage_ixr5   s        r6   r#   TextNetEncoder.__init__   sU    778
j)HMM,v89 * mmF+r8   rx   output_hidden_statesreturn_dictr:   c                     U/nU R                    H  nU" U5      nUR                  U5        M     U(       d  U4nU(       a  Xd4-   $ U$ [        XS9$ )N)last_hidden_stater9   )r   rn   r   )r4   rx   r   r   r9   rp   outputs          r6   r=   TextNetEncoder.forward   s[     &[[E .L  . ! "_F0D6,,P&P-jjr8   )r   rO   )r?   r@   rA   rB   r   r#   rC   r   r   boolr   r=   rD   rE   rF   s   @r6   r|   r|      sY    ,} , 04&*	kllk 'tnk d^	k
 
(k kr8   r|   c                   &    \ rS rSr\rSrSrS rSr	g)TextNetPreTrainedModel   textnetpixel_valuesc                 &   [        U[        R                  [        R                  45      (       ak  UR                  R
                  R                  SU R                  R                  S9  UR                  b%  UR                  R
                  R                  5         g g [        U[        R                  5      (       aX  UR                  R
                  R                  S5        UR                  b%  UR                  R
                  R                  5         g g g )Ng        )meanstdg      ?)r(   r*   Linearr+   weightdatanormal_r   initializer_ranger!   zero_r/   fill_)r4   modules     r6   _init_weights$TextNetPreTrainedModel._init_weights   s    fryy"))455MM&&CT[[5R5R&S{{&  &&( '//MM$$S){{&  &&( ' 0r8    N)
r?   r@   rA   rB   r   config_classbase_model_prefixmain_input_namer   rD   r   r8   r6   r   r      s     L!$O)r8   r   c                      ^  \ rS rSrU 4S jr\ S	S\S\\   S\\   S\	\
\\\   4   \
\   \4   4S jj5       rSrU =r$ )
TextNetModel   c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        [        R                  " S5      U l        U R                  5         g )N)r   r   )
r"   r#   r   stemr|   encoderr*   AdaptiveAvgPool2dpooler	post_initr4   r   r5   s     r6   r#   TextNetModel.__init__   sD     $V,	%f-**62r8   r   r   r   r:   c                 D   Ub  UOU R                   R                  nUb  UOU R                   R                  nU R                  U5      nU R	                  XBUS9nUS   nU R                  U5      nU(       d  Xg4nU(       a  XS   4-   $ U$ [        UUU(       a  US   S9$ S S9$ )Nr   r   r   r   )r   pooler_outputr9   )r   use_return_dictr   r   r   r   r   )	r4   r   r   r   rx   encoder_outputsr   pooled_outputr   s	            r6   r=   TextNetModel.forward   s     &1%<k$++B]B]$8$D $++JjJj 	 yy.,,Q\ ' 
 ,A.$56'7F5I6Q/11UvU7/'0D/!,
 	
 KO
 	
r8   )r   r   r   rO   )r?   r@   rA   rB   r#   r   r   r   r   r   r   r   r   r   r=   rD   rE   rF   s   @r6   r   r      sg     os
"
:B4.
^fgk^l
	uS$s)^$eCj2ZZ	[
 
r8   r   z
    TextNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                      ^  \ rS rSrU 4S jr\    S
S\\R                     S\\R                     S\\
   S\\
   S\4
S jj5       rS	rU =r$ )TextNetForImageClassificationi  c                   > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " S5      U l        [
        R                  " 5       U l	        UR                  S:  a.  [
        R                  " UR                  S   UR                  5      O[
        R                  " 5       U l        [
        R                  " U R                  U R                  /5      U l        U R!                  5         g )N)r   r   r   )r"   r#   
num_labelsr   r   r*   r   avg_poolFlattenflattenr   rl   r2   fcro   
classifierr   r   s     r6   r#   &TextNetForImageClassification.__init__  s      ++#F+,,V4zz|KQK\K\_`K`"))F//3V5F5FGfhfqfqfs --(EF 	r8   r   labelsr   r   r:   c                 @   Ub  UOU R                   R                  nU R                  XUS9nUS   nU R                   H  nU" U5      nM     U R	                  U5      nSn	UGb  U R                   R
                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R
                  S:X  aI  [        5       n
U R                  S:X  a&  U
" UR                  5       UR                  5       5      n	OU
" X5      n	OU R                   R
                  S:X  a=  [        5       n
U
" UR                  SU R                  5      UR                  S5      5      n	O,U R                   R
                  S:X  a  [        5       n
U
" X5      n	U(       d  U4US	S -   nU	b  U	4U-   $ U$ [!        XUR"                  S
9$ )a  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:
```python
>>> import torch
>>> import requests
>>> from transformers import TextNetForImageClassification, TextNetImageProcessor
>>> from PIL import Image

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> processor = TextNetImageProcessor.from_pretrained("czczup/textnet-base")
>>> model = TextNetForImageClassification.from_pretrained("czczup/textnet-base")

>>> inputs = processor(images=image, return_tensors="pt")
>>> with torch.no_grad():
...     outputs = model(**inputs)
>>> outputs.logits.shape
torch.Size([1, 2])
```Nr   r   r   
regressionsingle_label_classificationmulti_label_classificationr   r   )losslogitsr9   )r   r   r   r   r   problem_typer   dtyperC   longrc   r   squeezer
   viewr	   r   r9   )r4   r   r   r   r   outputsr   layerr   r   loss_fctr   s               r6   r=   %TextNetForImageClassification.forward'  s   B &1%<k$++B]B],,|do,p#AJ__E %&7 8 %*+{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F'+'7D7V#CVC3\c\q\qrrr8   )r   r   r   r   r   r   )NNNN)r?   r@   rA   rB   r#   r   r   rC   FloatTensor
LongTensorr   r   r=   rD   rE   rF   s   @r6   r   r     s      59-1/3&*Bsu001Bs ))*Bs 'tn	Bs
 d^Bs 
.Bs Bsr8   r   zP
    TextNet backbone, to be used with frameworks like DETR and MaskFormer.
    c                   n   ^  \ rS rSrU 4S jr\ S	S\S\\   S\\   S\	\
\
   \4   4S jj5       rSrU =r$ )
TextNetBackboneim  c                    > [         TU ]  U5        [         TU ]	  U5        [        U5      U l        UR
                  U l        U R                  5         g r<   )r"   r#   _init_backboner   r   rl   rM   r   r   s     r6   r#   TextNetBackbone.__init__s  sC     v&#F+"// 	r8   r   r   r   r:   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nU R                  USUS9nU(       a  UR                  OUS   nSn[        U R                  5       H  u  pxXR                  ;   d  M  XeU   4-  nM      U(       d)  U4n	U(       a  U(       a  UR                  OUS   nX4-  n	U	$ [        UU(       a  UR                  SS9$ SSS9$ )a  
Examples:

```python
>>> import torch
>>> import requests
>>> from PIL import Image
>>> from transformers import AutoImageProcessor, AutoBackbone

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> processor = AutoImageProcessor.from_pretrained("czczup/textnet-base")
>>> model = AutoBackbone.from_pretrained("czczup/textnet-base")

>>> inputs = processor(image, return_tensors="pt")
>>> with torch.no_grad():
>>>     outputs = model(**inputs)
```NTr   r   r   )feature_mapsr9   
attentions)	r   r   r   r   r9   	enumeratestage_namesout_featuresr   )
r4   r   r   r   r   r9   r   idxrp   r   s
             r6   r=   TextNetBackbone.forward}  s    . &1%<k$++B]B]$8$D $++JjJj 	 ,,|$T_,`1<--'!*#D$4$45JC)))s!3 55 6 "_F#9D 5 5'RS***M%3G'//
 	
MQ
 	
r8   )rM   r   rO   )r?   r@   rA   rB   r#   r   r   r   r   r   r   r   r=   rD   rE   rF   s   @r6   r   r   m  sW     os/
"/
:B4./
^fgk^l/
	uU|^+	,/
 /
r8   r   )r   r   r   r   ),rb   typingr   r   r   r   r   rC   torch.nnr*   r   r	   r
   r   transformersr   transformers.activationsr   transformers.modeling_outputsr   r   r   r   1transformers.models.textnet.configuration_textnetr   transformers.utilsr   !transformers.utils.backbone_utilsr   utilsr   
get_loggerr?   loggerModuler   rH   re   r|   r   r   r   r   __all__r   r8   r6   <module>r      s,    4 4    A A ( ,  L & ; # 
		H	%.ryy .DW6")) W6t299 0kRYY k: )_ ) )  "
) "
 "
J Rs$: RsRsj 
;
,m ;

;
| ir8   