o
    Zh                  	   @   s  d Z ddlZddlZddlmZ ddlmZmZm	Z	 ddl
Z
ddlZ
ddl
mZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZmZmZ ddlmZ eeZ eG dd deZ!dd Z"dd Z#dIde
j$de%de&de
j$fddZ'G dd dej(Z)G dd dej(Z*G d d! d!ej(Z+G d"d# d#ej(Z,G d$d% d%ej(Z-G d&d' d'ej(Z.G d(d) d)ej(Z/G d*d+ d+ej(Z0G d,d- d-ej(Z1G d.d/ d/ej(Z2G d0d1 d1ej(Z3G d2d3 d3ej(Z4G d4d5 d5ej(Z5eG d6d7 d7eZ6eG d8d9 d9e6Z7G d:d; d;ej(Z8G d<d= d=ej(Z9G d>d? d?ej(Z:G d@dA dAej(Z;G dBdC dCej(Z<edDdEG dFdG dGe6Z=g dHZ>dS )Jz"PyTorch Swin2SR Transformer model.    N)	dataclass)OptionalTupleUnion)nn   )ACT2FN)BaseModelOutputImageSuperResolutionOutput)PreTrainedModel) find_pruneable_heads_and_indicesmeshgridprune_linear_layer)ModelOutputauto_docstringlogging   )Swin2SRConfigc                   @   sP   e Zd ZU dZdZeej ed< dZ	ee
ej  ed< dZee
ej  ed< dS )Swin2SREncoderOutputa  
    Swin2SR encoder's outputs, with potential hidden states and attentions.

    Args:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlast_hidden_statehidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r    r   r   [/var/www/auris/lib/python3.10/site-packages/transformers/models/swin2sr/modeling_swin2sr.pyr   %   s
   
 r   c                 C   sR   | j \}}}}| ||| ||| ||} | dddddd d|||}|S )z2
    Partitions the given input into windows.
    r   r   r            shapeviewpermute
contiguous)input_featurewindow_size
batch_sizeheightwidthnum_channelswindowsr   r   r    window_partition@   s   $r1   c                 C   sN   | j d }| d|| || |||} | dddddd d|||} | S )z?
    Merges windows to produce higher resolution features.
    r$   r   r   r   r!   r"   r#   r%   )r0   r+   r-   r.   r/   r   r   r    window_reverseM   s   
$r2           Finput	drop_probtrainingreturnc                 C   sd   |dks|s| S d| }| j d fd| jd   }|tj|| j| jd }|  | || }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r3   r   r   )r   )dtypedevice)r&   ndimr   Zrandr8   r9   Zfloor_div)r4   r5   r6   Z	keep_probr&   Zrandom_tensoroutputr   r   r    	drop_pathX   s   
r=   c                       sT   e Zd ZdZddee ddf fddZdejdejfdd	Z	de
fd
dZ  ZS )Swin2SRDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr5   r7   c                    s   t    || _d S N)super__init__r5   )selfr5   	__class__r   r    rA   p   s   

zSwin2SRDropPath.__init__r   c                 C   s   t || j| jS r?   )r=   r5   r6   rB   r   r   r   r    forwardt   s   zSwin2SRDropPath.forwardc                 C   s   d | jS )Nzp={})formatr5   rB   r   r   r    
extra_reprw   s   zSwin2SRDropPath.extra_reprr?   )r   r   r   r   r   floatrA   r   TensorrF   strrI   __classcell__r   r   rC   r    r>   m   s
    r>   c                       s>   e Zd ZdZ fddZdeej deej	 fddZ
  ZS )Swin2SREmbeddingsz?
    Construct the patch and optional position embeddings.
    c                    s`   t    t|| _| jj}|jr tt	d|d |j
| _nd | _t|j| _|j| _d S )Nr   )r@   rA   Swin2SRPatchEmbeddingspatch_embeddingsnum_patchesZuse_absolute_embeddingsr   	Parameterr   zeros	embed_dimposition_embeddingsDropouthidden_dropout_probdropoutr+   )rB   configrQ   rC   r   r    rA      s   

zSwin2SREmbeddings.__init__pixel_valuesr7   c                 C   s4   |  |\}}| jd ur|| j }| |}||fS r?   )rP   rU   rX   )rB   rZ   
embeddingsoutput_dimensionsr   r   r    rF      s
   


zSwin2SREmbeddings.forward)r   r   r   r   rA   r   r   r   r   rK   rF   rM   r   r   rC   r    rN   {   s    &rN   c                       sD   e Zd Zd fdd	Zdeej deejee	 f fddZ
  ZS )	rO   Tc                    s   t    |j}|j|j}}t|tjjr|n||f}t|tjjr%|n||f}|d |d  |d |d  g}|| _	|d |d  | _
tj||j||d| _|r[t|j| _d S d | _d S )Nr   r   )Zkernel_sizeZstride)r@   rA   rT   
image_size
patch_size
isinstancecollectionsabcIterablepatches_resolutionrQ   r   Conv2d
projection	LayerNorm	layernorm)rB   rY   normalize_patchesr/   r]   r^   rc   rC   r   r    rA      s   
  zSwin2SRPatchEmbeddings.__init__r[   r7   c                 C   sN   |  |}|j\}}}}||f}|ddd}| jd ur#| |}||fS )Nr!   r   )re   r&   flatten	transposerg   )rB   r[   _r-   r.   r\   r   r   r    rF      s   


zSwin2SRPatchEmbeddings.forward)T)r   r   r   rA   r   r   r   r   rK   intrF   rM   r   r   rC   r    rO      s    .rO   c                       (   e Zd ZdZ fddZdd Z  ZS )Swin2SRPatchUnEmbeddingszImage to Patch Unembeddingc                    s   t    |j| _d S r?   )r@   rA   rT   )rB   rY   rC   r   r    rA      s   
z!Swin2SRPatchUnEmbeddings.__init__c                 C   s2   |j \}}}|dd|| j|d |d }|S )Nr   r!   r   )r&   rj   r'   rT   )rB   r[   Zx_sizer,   Zheight_widthr/   r   r   r    rF      s   "z Swin2SRPatchUnEmbeddings.forwardr   r   r   r   rA   rF   rM   r   r   rC   r    rn      s    rn   c                	       sh   e Zd ZdZejfdee dedejddf fddZ	d	d
 Z
dejdeeef dejfddZ  ZS )Swin2SRPatchMerginga'  
    Patch Merging Layer.

    Args:
        input_resolution (`Tuple[int]`):
            Resolution of input feature.
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    input_resolutiondim
norm_layerr7   Nc                    sB   t    || _|| _tjd| d| dd| _|d| | _d S )Nr"   r!   Fbias)r@   rA   rq   rr   r   Linear	reductionnorm)rB   rq   rr   rs   rC   r   r    rA      s
   
zSwin2SRPatchMerging.__init__c                 C   sF   |d dkp|d dk}|r!ddd|d d|d f}t j||}|S )Nr!   r   r   )r   
functionalpad)rB   r*   r-   r.   Z
should_pad
pad_valuesr   r   r    	maybe_pad   s
   zSwin2SRPatchMerging.maybe_padr*   input_dimensionsc                 C   s   |\}}|j \}}}|||||}| |||}|d d dd ddd dd d f }|d d dd ddd dd d f }	|d d dd ddd dd d f }
|d d dd ddd dd d f }t||	|
|gd}||dd| }| |}| |}|S )Nr   r!   r   r$   r"   )r&   r'   r|   r   catrw   rx   )rB   r*   r}   r-   r.   r,   rr   r/   Zinput_feature_0Zinput_feature_1Zinput_feature_2Zinput_feature_3r   r   r    rF      s   $$$$

zSwin2SRPatchMerging.forward)r   r   r   r   r   rf   r   rl   ModulerA   r|   r   rK   rF   rM   r   r   rC   r    rp      s
    **rp   c                       sj   e Zd Zddgf fdd	Zdd Z			ddejd	eej d
eej dee	 de
ej f
ddZ  ZS )Swin2SRSelfAttentionr   c              
      s  t    || dkrtd| d| d|| _t|| | _| j| j | _t|tj	j
r0|n||f| _|| _ttdt|ddf | _ttjddd	d
tjd	dtjd|dd
| _tj| jd d  | jd tjd }tj| jd d  | jd tjd }tt||gddddd d}|d dkr|d d d d d d df  |d d   < |d d d d d d df  |d d   < n3|dkr|d d d d d d df  | jd d   < |d d d d d d df  | jd d   < |d9 }t|t t!|d  t" d }|#t$| j% j&}| j'd|dd t| jd }	t| jd }
tt|	|
gdd}t(|d}|d d d d d f |d d d d d f  }|ddd }|d d d d df  | jd d 7  < |d d d d df  | jd d 7  < |d d d d df  d| jd  d 9  < |)d}| j'd|dd tj| j| j|j*d
| _+tj| j| jdd
| _,tj| j| j|j*d
| _-t.|j/| _0d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()
   r   r!   i   Trt   inplaceFr8   Zij)Zindexing         ?relative_coords_table
persistentr$   relative_position_index)1r@   rA   
ValueErrornum_attention_headsrl   attention_head_sizeall_head_sizer_   r`   ra   rb   r+   pretrained_window_sizer   rR   r   logZoneslogit_scale
Sequentialrv   ZReLUcontinuous_position_bias_mlpZarangeZint64rJ   stackr   r(   r)   	unsqueezesignlog2absmathtonext
parametersr8   register_bufferri   sumZqkv_biasquerykeyvaluerV   attention_probs_dropout_probrX   )rB   rY   rr   	num_headsr+   r   Zrelative_coords_hZrelative_coords_wr   Zcoords_hZcoords_wZcoordsZcoords_flattenZrelative_coordsr   rC   r   r    rA      s`   
"&((
,.
..&,((,
zSwin2SRSelfAttention.__init__c                 C   s6   |  d d | j| jf }||}|ddddS )Nr$   r   r!   r   r   )sizer   r   r'   r(   )rB   xZnew_x_shaper   r   r    transpose_for_scores8  s   
z)Swin2SRSelfAttention.transpose_for_scoresNFr   attention_mask	head_maskoutput_attentionsr7   c                 C   s  |j \}}}| |}| | |}	| | |}
| |}tjj|ddtjj|	dddd }t	j
| jtdd }|| }| | jd| j}|| jd | jd | jd  | jd | jd  d}|ddd }d	t	| }||d }|d ur|j d }||| || j|||dd }||dd }|d| j||}tjj|dd}| |}|d ur|| }t	||
}|dddd
 }| d d | jf }||}|r||f}|S |f}|S )Nr$   rr   g      Y@)maxr   r   r!      r   )r&   r   r   r   r   r   ry   	normalizerj   r   clampr   r   r   expr   r   r'   r   r   r+   r(   r)   Zsigmoidr   ZsoftmaxrX   matmulr   r   )rB   r   r   r   r   r,   rr   r/   Zmixed_query_layerZ	key_layerZvalue_layerZquery_layerZattention_scoresr   Zrelative_position_bias_tableZrelative_position_biasZ
mask_shapeZattention_probsZcontext_layerZnew_context_layer_shapeoutputsr   r   r    rF   =  sT   

&


zSwin2SRSelfAttention.forwardNNF)r   r   r   rA   r   r   rK   r   r   boolr   rF   rM   r   r   rC   r    r      s"    =r   c                       s8   e Zd Z fddZdejdejdejfddZ  ZS )Swin2SRSelfOutputc                    s*   t    t||| _t|j| _d S r?   )r@   rA   r   rv   denserV   r   rX   rB   rY   rr   rC   r   r    rA   }  s   
zSwin2SRSelfOutput.__init__r   input_tensorr7   c                 C      |  |}| |}|S r?   r   rX   )rB   r   r   r   r   r    rF        

zSwin2SRSelfOutput.forwardr   r   r   rA   r   rK   rF   rM   r   r   rC   r    r   |  s    $r   c                       sd   e Zd Zd fdd	Zdd Z			ddejd	eej d
eej dee	 de
ej f
ddZ  ZS )Swin2SRAttentionr   c                    sL   t    t||||t|tjjr|n||fd| _t||| _	t
 | _d S )NrY   rr   r   r+   r   )r@   rA   r   r_   r`   ra   rb   rB   r   r<   setpruned_heads)rB   rY   rr   r   r+   r   rC   r   r    rA     s   
	zSwin2SRAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   r   )lenr   rB   r   r   r   r   r   r   r   r<   r   r   union)rB   headsindexr   r   r    prune_heads  s   zSwin2SRAttention.prune_headsNFr   r   r   r   r7   c                 C   s6   |  ||||}| |d |}|f|dd   }|S Nr   r   )rB   r<   )rB   r   r   r   r   Zself_outputsattention_outputr   r   r   r    rF     s   zSwin2SRAttention.forwardr   r   )r   r   r   rA   r   r   rK   r   r   r   r   rF   rM   r   r   rC   r    r     s"    r   c                       2   e Zd Z fddZdejdejfddZ  ZS )Swin2SRIntermediatec                    sJ   t    t|t|j| | _t|jt	rt
|j | _d S |j| _d S r?   )r@   rA   r   rv   rl   	mlp_ratior   r_   Z
hidden_actrL   r   intermediate_act_fnr   rC   r   r    rA     s
   
zSwin2SRIntermediate.__init__r   r7   c                 C   r   r?   )r   r   rE   r   r   r    rF        

zSwin2SRIntermediate.forwardr   r   r   rC   r    r     s    r   c                       r   )Swin2SROutputc                    s4   t    tt|j| || _t|j| _	d S r?   )
r@   rA   r   rv   rl   r   r   rV   rW   rX   r   rC   r   r    rA     s   
zSwin2SROutput.__init__r   r7   c                 C   r   r?   r   rE   r   r   r    rF     r   zSwin2SROutput.forwardr   r   r   rC   r    r     s    r   c                       s   e Zd Z	d fdd	Zdeeeef eeef f fddZdd	 Zd
d Z		dde	j
deeef dee	j dee dee	j
e	j
f f
ddZ  ZS )Swin2SRLayerr3   r   c           	         s   t    || _| |j|jf||f\}}|d | _|d | _t|||| jt|tj	j
r/|n||fd| _tj||jd| _|dkrGt|nt | _t||| _t||| _tj||jd| _d S )Nr   r   Zepsr3   )r@   rA   rq   _compute_window_shiftr+   
shift_sizer   r_   r`   ra   rb   	attentionr   rf   layer_norm_epslayernorm_beforer>   ZIdentityr=   r   intermediater   r<   layernorm_after)	rB   rY   rr   rq   r   drop_path_rater   r   r+   rC   r   r    rA     s*   


	zSwin2SRLayer.__init__r7   c                 C   s6   dd t | j|D }dd t | j||D }||fS )Nc                 S   s    g | ]\}}||kr|n|qS r   r   ).0rwr   r   r    
<listcomp>  s     z6Swin2SRLayer._compute_window_shift.<locals>.<listcomp>c                 S   s"   g | ]\}}}||krd n|qS r   r   )r   r   r   sr   r   r    r     s   " )ziprq   )rB   Ztarget_window_sizeZtarget_shift_sizer+   r   r   r   r    r     s   z"Swin2SRLayer._compute_window_shiftc              	   C   s  | j dkrtjd||df|d}td| j t| j | j  t| j  d f}td| j t| j | j  t| j  d f}d}|D ]}|D ]}	||d d ||	d d f< |d7 }qDq@t|| j}
|
d| j| j }
|
d|
d }||dkt	d|dkt	d}|S d }|S )Nr   r   r   r$   r!   g      Yr3   )
r   r   rS   slicer+   r1   r'   r   Zmasked_fillrJ   )rB   r-   r.   r8   Zimg_maskZheight_slicesZwidth_slicescountZheight_sliceZwidth_sliceZmask_windows	attn_maskr   r   r    get_attn_mask  s.   

$zSwin2SRLayer.get_attn_maskc                 C   sR   | j || j   | j  }| j || j   | j  }ddd|d|f}tj||}||fS )Nr   )r+   r   ry   rz   )rB   r   r-   r.   	pad_rightZ
pad_bottomr{   r   r   r    r|     s
   zSwin2SRLayer.maybe_padNFr   r}   r   r   c                 C   s  |\}}|  \}}}	|}
|||||	}| |||\}}|j\}}}}| jdkr9tj|| j | j fdd}n|}t|| j}|d| j| j |	}| j	|||j
d}|d ur_||j}| j||||d}|d }|d| j| j|	}t|| j||}| jdkrtj|| j| jfdd}n|}|d dkp|d dk}|r|d d d |d |d d f  }|||| |	}| |}|
| | }| |}| |}|| | | }|r||d	 f}|S |f}|S )
Nr   )r   r!   )Zshiftsdimsr$   r   )r   r   r#   r   )r   r'   r|   r&   r   r   Zrollr1   r+   r   r8   r   r9   r   r2   r)   r   r=   r   r<   r   )rB   r   r}   r   r   r-   r.   r,   rk   ZchannelsZshortcutr{   Z
height_padZ	width_padZshifted_hidden_statesZhidden_states_windowsr   Zattention_outputsr   Zattention_windowsZshifted_windowsZ
was_paddedZlayer_outputlayer_outputsr   r   r    rF     sH   

$


zSwin2SRLayer.forward)r3   r   r   NF)r   r   r   rA   r   rl   r   r   r|   r   rK   r   r   r   rF   rM   r   r   rC   r    r     s&    &
r   c                       s`   e Zd ZdZd fdd	Z		ddejdeeef d	e	ej
 d
e	e deej f
ddZ  ZS )Swin2SRStagezh
    This corresponds to the Residual Swin Transformer Block (RSTB) in the original implementation.
    r   c                    s   t     | _| _t fddt|D | _ jdkr.t	ddd| _
n6 jdkrdtt	d dddtjdd	d
t	d d dddtjdd	d
t	d ddd| _
t dd| _t | _d S )Nc              
      s6   g | ]}t  |d  dkrdn jd  dqS )r!   r   )rY   rr   rq   r   r   r   )r   r+   )r   irY   rr   rq   r   r   r   r    r   ]  s    	z)Swin2SRStage.__init__.<locals>.<listcomp>Z1convr   r   Z3convr"   皙?TZnegative_sloper   r   F)rh   )r@   rA   rY   rr   r   
ModuleListrangelayersZresi_connectionrd   convr   	LeakyReLUrO   patch_embedrn   patch_unembed)rB   rY   rr   rq   depthr   r=   r   rC   r   r    rA   X  s(   
	

zSwin2SRStage.__init__NFr   r}   r   r   r7   c                 C   s   |}|\}}t | jD ]\}}	|d ur|| nd }
|	|||
|}|d }q||||f}| ||}| |}| |\}}|| }||f}|rO||dd  7 }|S r   )	enumerater   r   r   r   )rB   r   r}   r   r   Zresidualr-   r.   r   Zlayer_modulelayer_head_maskr   r\   rk   Zstage_outputsr   r   r    rF   z  s   

zSwin2SRStage.forwardr   r   )r   r   r   r   rA   r   rK   r   rl   r   r   r   rF   rM   r   r   rC   r    r   S  s     &
r   c                       sp   e Zd Z fddZ				ddejdeeef deej	 d	ee
 d
ee
 dee
 deeef fddZ  ZS )Swin2SREncoderc                    sn   t    t j| _ | _dd tjd jt	 jddD t
 fddt| jD | _d| _d S )Nc                 S   s   g | ]}|  qS r   )item)r   r   r   r   r    r     s    z+Swin2SREncoder.__init__.<locals>.<listcomp>r   cpu)r9   c                    sd   g | ].}t   jd  d f j|  j| t jd| t jd|d   d dqS )r   r   N)rY   rr   rq   r   r   r=   r   )r   rT   depthsr   r   )r   Z	stage_idxrY   Zdpr	grid_sizer   r    r     s    
*F)r@   rA   r   r   Z
num_stagesrY   r   Zlinspacer   r   r   r   r   stagesgradient_checkpointing)rB   rY   r   rC   r   r    rA     s   
$

zSwin2SREncoder.__init__NFTr   r}   r   r   output_hidden_statesreturn_dictr7   c                 C   s   d}|rdnd }|rdnd }	|r||f7 }t | jD ]L\}
}|d ur&||
 nd }| jr9| jr9| |j||||}n|||||}|d }|d }|d |d f}||f7 }|r\||f7 }|rf|	|dd  7 }	q|sutdd |||	fD S t|||	d	S )
Nr   r   r   r   r$   r!   c                 s   s    | ]	}|d ur|V  qd S r?   r   )r   vr   r   r    	<genexpr>  s    z)Swin2SREncoder.forward.<locals>.<genexpr>r   r   r   )r   r   r   r6   Z_gradient_checkpointing_func__call__tupler   )rB   r   r}   r   r   r   r   Zall_input_dimensionsZall_hidden_statesZall_self_attentionsr   Zstage_moduler   r   r\   r   r   r    rF     s8   	


zSwin2SREncoder.forward)NFFT)r   r   r   rA   r   rK   r   rl   r   r   r   r   r   rF   rM   r   r   rC   r    r     s*    

r   c                   @   s$   e Zd ZeZdZdZdZdd ZdS )Swin2SRPreTrainedModelswin2srrZ   Tc                 C   sx   t |tjtjfr%tjjj|jj| j	j
d |jdur#|jj  dS dS t |tjr:|jj  |jjd dS dS )zInitialize the weights)ZstdNr   )r_   r   rv   rd   r   initZtrunc_normal_weightdatarY   Zinitializer_rangeru   Zzero_rf   Zfill_)rB   moduler   r   r    _init_weights  s   
z$Swin2SRPreTrainedModel._init_weightsN)	r   r   r   r   Zconfig_classZbase_model_prefixZmain_input_nameZsupports_gradient_checkpointingr  r   r   r   r    r    s    r  c                       s   e Zd Z fddZdd Zdd Zdd Ze								dd
ej	de
ej	 de
e de
e de
e deeef fddZ  ZS )Swin2SRModelc                    s   t  | || _|jdkr!|jdkr!tg ddddd}ntdddd}| j	d|dd |j
| _
t|j|jddd| _t|| _t|| jjjd| _tj|j|jd| _t|| _t|j|jddd| _|   d S )	Nr   )gw#?g8EGr?gB`"?r   meanFr   )r   r   )r@   rA   rY   r/   num_channels_outr   Ztensorr'   rS   r   	img_ranger   rd   rT   first_convolutionrN   r[   r   rP   rc   encoderrf   r   rg   rn   r   conv_after_body	post_init)rB   rY   r  rC   r   r    rA     s   

zSwin2SRModel.__init__c                 C   s   | j jS r?   )r[   rP   rH   r   r   r    get_input_embeddings  s   z!Swin2SRModel.get_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  layerr   r   )rB   Zheads_to_pruner  r   r   r   r    _prune_heads  s   zSwin2SRModel._prune_headsc           	      C   sn   |  \}}}}| jj}|||  | }|||  | }tj|d|d|fd}| j|}|| | j }|S )Nr   Zreflect)	r   rY   r+   r   ry   rz   r  Ztype_asr  )	rB   rZ   rk   r-   r.   r+   Zmodulo_pad_heightZmodulo_pad_widthr  r   r   r    pad_and_normalize  s   zSwin2SRModel.pad_and_normalizeNrZ   r   r   r   r   r7   c                 C   s   |d ur|n| j j}|d ur|n| j j}|d ur|n| j j}| |t| j j}|j\}}}}| |}| 	|}	| 
|	\}
}| j|
|||||d}|d }| |}| |||f}| ||	 }|sp|f|dd   }|S t||j|jdS )Nr   r   r   r   r   r   r  )rY   r   r   use_return_dictZget_head_maskr   r   r&   r  r  r[   r  rg   r   r  r	   r   r   )rB   rZ   r   r   r   r   rk   r-   r.   r[   Zembedding_outputr}   Zencoder_outputssequence_outputr<   r   r   r    rF   &  s:   	

	
zSwin2SRModel.forward)NNNN)r   r   r   rA   r  r  r  r   r   r   r   r   r   r   r	   rF   rM   r   r   rC   r    r    s.    
r  c                       rm   )UpsamplezUpsample module.

    Args:
        scale (`int`):
            Scale factor. Supported scales: 2^n and 3.
        num_features (`int`):
            Channel number of intermediate features.
    c                    s   t    || _||d @ dkr=ttt|dD ] }| d| t	|d| ddd | d| t
d qd S |dkrUt	|d| ddd| _t
d| _d S td	| d
)Nr   r   r!   convolution_r"   r   pixelshuffle_	   zScale z/ is not supported. Supported scales: 2^n and 3.)r@   rA   scaler   rl   r   r   Z
add_moduler   rd   PixelShuffleconvolutionpixelshuffler   )rB   r!  num_featuresr   rC   r   r    rA   i  s   
$zUpsample.__init__c                 C   s~   | j | j d @ dkr.ttt| j dD ]}| d| |}| d| |}q|S | j dkr=| |}| |}|S )Nr   r   r!   r  r  r   )r!  r   rl   r   r   __getattr__r#  r$  )rB   Zhidden_stater   r   r   r    rF   x  s   


zUpsample.forwardro   r   r   rC   r    r  _  s    	r  c                       rm   )UpsampleOneStepa  UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)

    Used in lightweight SR to save parameters.

    Args:
        scale (int):
            Scale factor. Supported scales: 2^n and 3.
        in_channels (int):
            Channel number of intermediate features.
        out_channels (int):
            Channel number of output features.
    c                    s6   t    t||d | ddd| _t|| _d S )Nr!   r   r   )r@   rA   r   rd   r   r"  pixel_shuffle)rB   r!  Zin_channelsZout_channelsrC   r   r    rA     s   
zUpsampleOneStep.__init__c                 C   r   r?   )r   r(  )rB   r   r   r   r    rF     r   zUpsampleOneStep.forwardro   r   r   rC   r    r'    s    r'  c                       $   e Zd Z fddZdd Z  ZS )PixelShuffleUpsamplerc                    sV   t    t|j|ddd| _tjdd| _t|j	|| _
t||jddd| _d S Nr   r   Tr   )r@   rA   r   rd   rT   conv_before_upsampler   
activationr  upscaleupsampler  final_convolutionrB   rY   r%  rC   r   r    rA     s
   
zPixelShuffleUpsampler.__init__c                 C   s,   |  |}| |}| |}| |}|S r?   )r,  r-  r/  r0  )rB   r  r   r   r   r    rF     s
   



zPixelShuffleUpsampler.forwardr   r   r   rA   rF   rM   r   r   rC   r    r*    s    r*  c                       r)  )NearestConvUpsamplerc                    s   t    |jdkrtdt|j|ddd| _tjdd| _	t||ddd| _
t||ddd| _t||ddd| _t||jddd| _tjddd| _d S )	Nr"   zNThe nearest+conv upsampler only supports an upscale factor of 4 at the moment.r   r   Tr   r   r   )r@   rA   r.  r   r   rd   rT   r,  r   r-  conv_up1conv_up2conv_hrr  r0  lrelur1  rC   r   r    rA     s   

zNearestConvUpsampler.__init__c              	   C   sn   |  |}| |}| | tjjj|ddd}| | tjjj|ddd}| 	| | 
|}|S )Nr!   Znearest)Zscale_factormode)r,  r-  r7  r4  r   r   ry   interpolater5  r0  r6  )rB   r  reconstructionr   r   r    rF     s   

zNearestConvUpsampler.forwardr2  r   r   rC   r    r3    s    r3  c                       r)  )PixelShuffleAuxUpsamplerc              	      s   t    |j| _t|j|ddd| _t|j|ddd| _tj	dd| _
t||jddd| _ttd|dddtj	dd| _t|j|| _t||jddd| _d S r+  )r@   rA   r.  r   rd   r/   conv_bicubicrT   r,  r   r-  conv_auxr   conv_after_auxr  r/  r  r0  r1  rC   r   r    rA     s   
$z!PixelShuffleAuxUpsampler.__init__c                 C   s   |  |}| |}| |}| |}| |}| |d d d d d || j d || j f |d d d d d || j d || j f  }| |}||fS r?   )r<  r,  r-  r=  r>  r/  r.  r0  )rB   r  bicubicr-   r.   auxr:  r   r   r    rF     s   




0*
z PixelShuffleAuxUpsampler.forwardr2  r   r   rC   r    r;    s    r;  zm
    Swin2SR Model transformer with an upsampler head on top for image super resolution and restoration.
    )Zcustom_introc                       sz   e Zd Z fddZe						ddeej deej deej dee	 dee	 d	ee	 d
e
eef fddZ  ZS )Swin2SRForImageSuperResolutionc                    s   t  | t|| _|j| _|j| _d}| jdkr!t||| _n4| jdkr-t||| _n(| jdkr=t	|j|j
|j| _n| jdkrIt||| _nt|j
|jddd| _|   d S )N@   r$  pixelshuffle_auxpixelshuffledirectnearest+convr   r   )r@   rA   r  r  	upsamplerr.  r*  r/  r;  r'  rT   r  r3  r   rd   r0  r  r1  rC   r   r    rA     s   




z'Swin2SRForImageSuperResolution.__init__NrZ   r   labelsr   r   r   r7   c                 C   s^  |dur|n| j j}d}|durtd|jdd \}}	| j jdkr5tjj||| j |	| j fddd}
| j	|||||d}|d	 }| jd
v rN| 
|}n!| jdkrh| 
||
||	\}}|| j	j | j	j }n|| | }|| j	j | j	j }|ddddd|| j d|	| j f }|s|f|dd  }|dur|f| S |S t|||j|jdS )a  
        Example:
         ```python
         >>> import torch
         >>> import numpy as np
         >>> from PIL import Image
         >>> import requests

         >>> from transformers import AutoImageProcessor, Swin2SRForImageSuperResolution

         >>> processor = AutoImageProcessor.from_pretrained("caidas/swin2SR-classical-sr-x2-64")
         >>> model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-classical-sr-x2-64")

         >>> url = "https://huggingface.co/spaces/jjourney1125/swin2sr/resolve/main/samples/butterfly.jpg"
         >>> image = Image.open(requests.get(url, stream=True).raw)
         >>> # prepare image for the model
         >>> inputs = processor(image, return_tensors="pt")

         >>> # forward pass
         >>> with torch.no_grad():
         ...     outputs = model(**inputs)

         >>> output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
         >>> output = np.moveaxis(output, source=0, destination=-1)
         >>> output = (output * 255.0).round().astype(np.uint8)  # float32 to uint8
         >>> # you can visualize `output` with `Image.fromarray`
         ```Nz'Training is not supported at the momentr!   rC  r?  F)r   r8  Zalign_cornersr  r   )r$  rD  rE  r   )lossr:  r   r   )rY   r  NotImplementedErrorr&   rF  r   ry   r9  r.  r  r/  r  r  r0  r
   r   r   )rB   rZ   r   rG  r   r   r   rH  r-   r.   r?  r   r  r:  r@  r<   r   r   r    rF     sJ   %

,z&Swin2SRForImageSuperResolution.forward)NNNNNN)r   r   r   rA   r   r   r   r   Z
LongTensorr   r   r   r
   rF   rM   r   r   rC   r    rA    s0    
rA  )rA  r  r  )r3   F)?r   collections.abcr`   r   dataclassesr   typingr   r   r   r   Ztorch.utils.checkpointr   Zactivationsr   Zmodeling_outputsr	   r
   Zmodeling_utilsr   Zpytorch_utilsr   r   r   utilsr   r   r   Zconfiguration_swin2srr   Z
get_loggerr   loggerr   r1   r2   rK   rJ   r   r=   r   r>   rN   rO   rn   rp   r   r   r   r   r   r   r   r   r  r  r  r'  r*  r3  r;  rA  __all__r   r   r   r    <module>   s`   
 7 /}GGk&q