
    fThQ8                        S r SSKJrJrJrJr  SSKrSSKJr  SSKJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJr  SSKJr   " S S\R(                  5      r " S S\R(                  5      r " S S\R(                  5      r " S S\R(                  5      r " S S\R(                  5      r\ " S S\5      5       r\" SS9 " S S\5      5       rSS/rg)zrPyTorch UperNet model. Based on OpenMMLab's implementation, found in https://github.com/open-mmlab/mmsegmentation.    )ListOptionalTupleUnionN)nn)CrossEntropyLoss   )SemanticSegmenterOutput)PreTrainedModel)auto_docstring)load_backbone   )UperNetConfigc                      ^  \ rS rSrSr   SS\S\S\\\\\4   4   S\\\\\4   \4   S\	S\\\\\4   4   S	S
4U 4S jjjr
S\R                  S	\R                  4S jrSrU =r$ )UperNetConvModule   z
A convolutional block that bundles conv/norm/activation layers. This block simplifies the usage of convolution
layers, which are commonly used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
in_channelsout_channelskernel_sizepaddingbiasdilationreturnNc           	         > [         TU ]  5         [        R                  " UUUUUUS9U l        [        R
                  " U5      U l        [        R                  " 5       U l        g )N)r   r   r   r   r   r   )	super__init__r   Conv2dconvBatchNorm2d
batch_normReLU
activation)selfr   r   r   r   r   r   	__class__s          d/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/upernet/modeling_upernet.pyr   UperNetConvModule.__init__$   sQ     	II#%#
	 ..6'')    inputc                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ N)r   r    r"   )r#   r(   outputs      r%   forwardUperNetConvModule.forward9   s1    5!((r'   )r"   r    r   )r   Fr   )__name__
__module____qualname____firstlineno____doc__intr   r   strboolr   torchTensorr,   __static_attributes____classcell__r$   s   @r%   r   r      s     5601$$ $ 3c3h/0	$
 sE#s(OS01$ $ U38_,-$ 
$ $*U\\ ell  r'   r   c                   v   ^  \ rS rSrS\S\S\SS4U 4S jjrS\R                  S\R                  4S	 jrS
r	U =r
$ )UperNetPyramidPoolingBlockA   
pool_scaler   channelsr   Nc                    > [         TU ]  5         [        R                  " U5      [	        X#SS9/U l        [        U R
                  5       H   u  pEU R                  [        U5      U5        M"     g )Nr   r   )	r   r   r   AdaptiveAvgPool2dr   layers	enumerate
add_moduler4   )r#   r>   r   r?   ilayerr$   s         r%   r   #UperNetPyramidPoolingBlock.__init__B   sX      ,kC
 "$++.HAOOCFE* /r'   r(   c                 @    UnU R                    H  nU" U5      nM     U$ r*   rC   )r#   r(   hidden_staterG   s       r%   r,   "UperNetPyramidPoolingBlock.forwardK   s%    [[E .L !r'   rJ   )r.   r/   r0   r1   r3   r   r6   r7   r,   r8   r9   r:   s   @r%   r<   r<   A   sD    +3 +S +C +D +U\\ ell  r'   r<   c            
          ^  \ rS rSrSrS\\S4   S\S\S\SS	4
U 4S
 jjrS\	R                  S\\	R                     4S jrSrU =r$ )UperNetPyramidPoolingModuleR   aQ  
Pyramid Pooling Module (PPM) used in PSPNet.

Args:
    pool_scales (`Tuple[int]`):
        Pooling scales used in Pooling Pyramid Module.
    in_channels (`int`):
        Input channels.
    channels (`int`):
        Channels after modules, before conv_seg.
    align_corners (`bool`):
        align_corners argument of F.interpolate.
pool_scales.r   r?   align_cornersr   Nc                   > [         TU ]  5         Xl        X@l        X l        X0l        / U l        [        U5       HE  u  pV[        XbUS9nU R                  R                  U5        U R                  [        U5      U5        MG     g )N)r>   r   r?   )r   r   rP   rQ   r   r?   blocksrD   r<   appendrE   r4   )	r#   rP   r   r?   rQ   rF   r>   blockr$   s	           r%   r   $UperNetPyramidPoolingModule.__init__a   sn    &*& &{3MA.*hpqEKKu%OOCFE* 4r'   xc                     / nU R                    HV  nU" U5      n[        R                  R                  XAR	                  5       SS  SU R
                  S9nUR                  U5        MX     U$ )N   bilinearsizemoderQ   )rS   r   
functionalinterpolater\   rQ   rT   )r#   rW   ppm_outsppmppm_outupsampled_ppm_outs         r%   r,   #UperNetPyramidPoolingModule.forwardm   sg    ;;C!fG " 9 9ffhqrl4K]K] !: ! OO-.  r'   )rQ   rS   r?   r   rP   )r.   r/   r0   r1   r2   r   r3   r5   r   r6   r7   r   r,   r8   r9   r:   s   @r%   rN   rN   R   s`    
+E#s(O 
+# 
+QT 
+ei 
+nr 
+ $u||*<  r'   rN   c                   l   ^  \ rS rSrSrU 4S jrS rS\R                  S\R                  4S jr	Sr
U =r$ )	UperNetHeadx   z
Unified Perceptual Parsing for Scene Understanding. This head is the implementation of
[UPerNet](https://arxiv.org/abs/1807.10221).
c                   > [         TU ]  5         Xl        UR                  U l        X l        UR
                  U l        SU l        [        R                  " U R                  UR                  SS9U l        [        U R                  U R                  S   U R                  U R                  S9U l        [        U R                  S   [        U R                  5      U R                  -  -   U R                  SSS9U l        [        R"                  " 5       U l        [        R"                  " 5       U l        U R                  S S  Hm  n[        X R                  SS9n[        U R                  U R                  SSS9nU R$                  R)                  U5        U R&                  R)                  U5        Mo     [        [        U R                  5      U R                  -  U R                  SSS9U l        g )NFr   rA   )rQ   r	   r   r   )r   r   configrP   r   hidden_sizer?   rQ   r   r   
num_labels
classifierrN   psp_modulesr   len
bottleneck
ModuleListlateral_convs	fpn_convsrT   fpn_bottleneck)r#   rk   r   l_convfpn_convr$   s        r%   r   UperNetHead.__init__~   s   !--&**"))DMM63D3DRST 7R MM,,	
 ,R 3t'7'7#84==#HHMM	
  ]]_++CR0K&{MMqQF(ST^_`H%%f-NN!!(+	 1 0  !DMM1MM	
r'   c                     US   nU/nUR                  U R                  U5      5        [        R                  " USS9nU R	                  U5      nU$ )Nri   r   dim)extendro   r6   catrq   )r#   inputsrW   psp_outsr+   s        r%   psp_forwardUperNetHead.psp_forward   sL    2J3((+,99X1-*r'   encoder_hidden_statesr   c           	      @   [        U R                  5       VVs/ s H  u  p#U" X   5      PM     nnnUR                  U R                  U5      5        [	        U5      n[        US-
  SS5       HP  nXBS-
     R                  SS  nXBS-
     [        R                  R                  XB   USU R                  S9-   XBS-
  '   MR     [        US-
  5       Vs/ s H  o R                  U   " XB   5      PM     nnUR                  US   5        [        US-
  SS5       HA  n[        R                  R                  Xr   US   R                  SS  SU R                  S9Xr'   MC     [        R                  " USS9nU R                  U5      nU R                  U5      nU$ s  snnf s  snf )Nr   r   ri   rY   rZ   r[   rz   )rD   rs   rT   r   rp   rangeshaper   r^   r_   rQ   rt   r6   r}   ru   rn   )	r#   r   rF   lateral_convlateralsused_backbone_levels
prev_shapefpn_outsr+   s	            r%   r,   UperNetHead.forward   s   R[\`\n\nRopRoqL!6!9:Rop(()>?@  #8}+a/B7A!a%..qr2J&1uo0I0I*:TM_M_ 1J 1 HUO 8 =BBVYZBZ<[\<[qNN1%hk2<[\%+a/B7A--33(1+"3"3AB"7jX\XjXj 4 HK 8 99X1-$$X.(3 q ]s   F F)rQ   rq   r?   rn   rk   ru   rt   r   rs   rP   ro   )r.   r/   r0   r1   r2   r   r   r6   r7   r,   r8   r9   r:   s   @r%   rf   rf   x   s3    
%
NU\\ ell  r'   rf   c                      ^  \ rS rSrSr SS\S\S\\\\\4   4   SS4U 4S jjjrS	\	R                  S\	R                  4S
 jrSrU =r$ )UperNetFCNHead   a  
Fully Convolution Networks for Semantic Segmentation. This head is the implementation of
[FCNNet](https://arxiv.org/abs/1411.4038>).

Args:
    config:
        Configuration.
    in_channels (int):
        Number of input channels.
    kernel_size (int):
        The kernel size for convs in the head. Default: 3.
    dilation (int):
        The dilation rate for convs in the head. Default: 1.
in_indexr   r   r   Nc                 >  > [         TU ]  5         Xl        UR                  U l        UR
                  U l        UR                  U l        UR                  U l
        X l        US-  U-  n/ nUR                  [        U R                  U R                  X5US95        [        U R                  S-
  5       H2  nUR                  [        U R                  U R                  X5US95        M4     U R                  S:X  a  [        R                   " 5       U l        O[        R$                  " U6 U l        U R                  (       a4  [        U R                  U R                  -   U R                  X3S-  S9U l        [        R(                  " U R                  UR*                  SS9U l        g )NrY   )r   r   r   r   r   rj   rA   )r   r   rk   auxiliary_in_channelsr   auxiliary_channelsr?   auxiliary_num_convs	num_convsauxiliary_concat_inputconcat_inputr   rT   r   r   r   Identityconvs
Sequentialconv_catr   rm   rn   )	r#   rk   r   r   r   conv_paddingr   rF   r$   s	           r%   r   UperNetFCNHead.__init__   sM    	!771133"99 #q(H4  $--[iq	

 t~~)*ALL!MM4==kjr + >>QDJ.DJ-  4==0$--[qrbrDM ))DMM63D3DRSTr'   r   c                     XR                      nU R                  U5      nU R                  (       a%  U R                  [        R
                  " X#/SS95      nU R                  U5      nU$ )Nr   rz   )r   r   r   r   r6   r}   rn   )r#   r   hidden_statesr+   s       r%   r,   UperNetFCNHead.forward   sT    -mm<M*]]599m-D!#LMF(r'   )	r?   rn   r   rk   r   r   r   r   r   )rY   r	   r   )r.   r/   r0   r1   r2   r3   r   r   r   r6   r7   r,   r8   r9   r:   s   @r%   r   r      sw      hi"U #"U69"UINsTYZ]_bZbTcOcId"U	"U "UHU\\ ell  r'   r   c                   &    \ rS rSr\rSr/ rS rSr	g)UperNetPreTrainedModeli
  pixel_valuesc                    [        U[        R                  5      (       ak  UR                  R                  R                  SU R                  R                  S9  UR                  b%  UR                  R                  R                  5         g g [        U[        R                  5      (       aJ  UR                  R                  R                  S5        UR                  R                  R                  5         g g )Ng        )meanstdg      ?)
isinstancer   r   weightdatanormal_rk   initializer_ranger   zero_r   fill_)r#   modules     r%   _init_weights$UperNetPreTrainedModel._init_weights  s    fbii((MM&&CT[[5R5R&S{{&  &&( '//MM$$S)KK""$ 0r'    N)
r.   r/   r0   r1   r   config_classmain_input_name_no_split_modulesr   r8   r   r'   r%   r   r   
  s     L$O%r'   r   zW
    UperNet framework leveraging any vision backbone e.g. for ADE20k, CityScapes.
    )custom_introc                      ^  \ rS rSrU 4S jr\     SS\\R                     S\\	   S\\	   S\\R                     S\\	   S\
\\4   4S	 jj5       rS
rU =r$ )UperNetForSemanticSegmentationi  c                    > [         TU ]  U5        [        U5      U l        [	        XR                  R
                  S9U l        UR                  (       a  [        U5      OS U l	        U R                  5         g )N)r   )r   r   r   backbonerf   r?   decode_headuse_auxiliary_headr   auxiliary_head	post_init)r#   rk   r$   s     r%   r   'UperNetForSemanticSegmentation.__init__   sY     %f- 'v==;Q;QR8>8Q8QnV4W[ 	r'   r   output_attentionsoutput_hidden_stateslabelsreturn_dictr   c                    Ub%  U R                   R                  S:X  a  [        S5      eUb  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R
                  nU R                  R                  XUS9nUR                  nU R                  U5      n[        R                  R                  XR                  SS SSS9nSn	U R                  b=  U R                  U5      n	[        R                  R                  XR                  SS SSS9n	Sn
UbK  [        U R                   R                   S	9nU" X5      n
U	b#  U" X5      nXR                   R"                  U-  -  n
U(       d%  U(       a
  U4USS -   nO	U4USS -   nU
b  U
4U-   $ U$ [%        U
UUR&                  UR(                  S
9$ )a(  
labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
    Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

Examples:
```python
>>> from transformers import AutoImageProcessor, UperNetForSemanticSegmentation
>>> from PIL import Image
>>> from huggingface_hub import hf_hub_download

>>> image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-tiny")
>>> model = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-tiny")

>>> filepath = hf_hub_download(
...     repo_id="hf-internal-testing/fixtures_ade20k", filename="ADE_val_00000001.jpg", repo_type="dataset"
... )
>>> image = Image.open(filepath).convert("RGB")

>>> inputs = image_processor(images=image, return_tensors="pt")

>>> outputs = model(**inputs)

>>> logits = outputs.logits  # shape (batch_size, num_labels, height, width)
>>> list(logits.shape)
[1, 150, 512, 512]
```Nr   z/The number of labels should be greater than one)r   r   rY   rZ   Fr[   )ignore_index)losslogitsr   
attentions)rk   rm   
ValueErroruse_return_dictr   r   r   forward_with_filtered_kwargsfeature_mapsr   r   r^   r_   r   r   r   loss_ignore_indexauxiliary_loss_weightr
   r   r   )r#   r   r   r   r   r   outputsfeaturesr   auxiliary_logitsr   loss_fctauxiliary_lossr+   s                 r%   r,   &UperNetForSemanticSegmentation.forward,  s   H $++"8"8A"=NOO%0%<k$++B]B]$8$D $++JjJj 	 2C1N-TXT_T_TqTq--<<Wh = 
 ''!!(+**68J8J128NU_ot*u*#228<!}}88 '9'9!"'=J^c  9   'T[[5R5RSHF+D+!)*:!C99NJJ# WQR[0 WQR[0)-)9TGf$EvE&!//))	
 	
r'   )r   r   r   )NNNNN)r.   r/   r0   r1   r   r   r   r6   r7   r5   r   tupler
   r,   r8   r9   r:   s   @r%   r   r     s    
  04,0/3)-&*P
u||,P
 $D>P
 'tn	P

 &P
 d^P
 
u--	.P
 P
r'   r   )r2   typingr   r   r   r   r6   r   torch.nnr   modeling_outputsr
   modeling_utilsr   utilsr   utils.backbone_utilsr   configuration_upernetr   Moduler   r<   rN   rf   r   r   r   __all__r   r'   r%   <module>r      s    y / /   % 7 - # 1 0 		  F "#")) #LQ")) Qh;RYY ;| %_ % % 
^
%; ^

^
B ,-E
Fr'   