o
    Zh;                     @   s|  d Z ddlZddlmZ ddlZddlZddlmZmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlmZ eeZG dd dejZG dd dejZG dd dejZG dd dejZ G dd dejZ!G dd dejZ"G dd dejZ#G dd dejZ$eG dd deZ%eG dd  d e%Z&ed!d"G d#d$ d$e%Z'g d%Z(dS )&zPyTorch RegNet model.    N)Optional)Tensornn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )RegNetConfigc                       sL   e Zd Z				ddededededed	ee f fd
dZdd Z  ZS )RegNetConvLayerr   r   reluin_channelsout_channelskernel_sizestridegroups
activationc              	      sX   t    tj|||||d |dd| _t|| _|d ur%t| | _	d S t | _	d S )N   F)r   r   paddingr   bias)
super__init__r   Conv2dconvolutionBatchNorm2dnormalizationr	   Identityr   )selfr   r   r   r   r   r   	__class__ Y/var/www/auris/lib/python3.10/site-packages/transformers/models/regnet/modeling_regnet.pyr   (   s   
		$zRegNetConvLayer.__init__c                 C   s"   |  |}| |}| |}|S N)r    r"   r   r$   hidden_stater'   r'   r(   forward>   s   


zRegNetConvLayer.forward)r   r   r   r   )	__name__
__module____qualname__intr   strr   r,   __classcell__r'   r'   r%   r(   r   '   s&    r   c                       s.   e Zd ZdZdef fddZdd Z  ZS )RegNetEmbeddingszO
    RegNet Embeddings (stem) composed of a single aggressive convolution.
    configc                    s0   t    t|j|jdd|jd| _|j| _d S )Nr   r   )r   r   r   )r   r   r   num_channelsembedding_size
hidden_actembedderr$   r4   r%   r'   r(   r   J   s
   
zRegNetEmbeddings.__init__c                 C   s*   |j d }|| jkrtd| |}|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)shaper5   
ValueErrorr8   )r$   pixel_valuesr5   r+   r'   r'   r(   r,   Q   s   


zRegNetEmbeddings.forward)r-   r.   r/   __doc__r   r   r,   r2   r'   r'   r%   r(   r3   E   s    r3   c                       sB   e Zd ZdZddededef fddZded	efd
dZ  ZS )RegNetShortCutz
    RegNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r   r   r   r   c                    s0   t    tj||d|dd| _t|| _d S )Nr   F)r   r   r   )r   r   r   r   r    r!   r"   )r$   r   r   r   r%   r'   r(   r   b   s   
zRegNetShortCut.__init__inputreturnc                 C   s   |  |}| |}|S r)   )r    r"   )r$   r?   r+   r'   r'   r(   r,   g   s   

zRegNetShortCut.forward)r   )	r-   r.   r/   r=   r0   r   r   r,   r2   r'   r'   r%   r(   r>   \   s    r>   c                       s2   e Zd ZdZdedef fddZdd Z  ZS )RegNetSELayerz|
    Squeeze and Excitation layer (SE) proposed in [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507).
    r   reduced_channelsc              	      sL   t    td| _ttj||ddt tj||ddt | _	d S )Nr   r   r   )r   )
r   r   r   AdaptiveAvgPool2dpooler
Sequentialr   ZReLUZSigmoid	attention)r$   r   rB   r%   r'   r(   r   r   s   

zRegNetSELayer.__init__c                 C   s    |  |}| |}|| }|S r)   )rE   rG   )r$   r+   ZpooledrG   r'   r'   r(   r,   }   s   

zRegNetSELayer.forward)r-   r.   r/   r=   r0   r   r,   r2   r'   r'   r%   r(   rA   m   s    rA   c                	       <   e Zd ZdZddedededef fddZd	d
 Z  ZS )RegNetXLayerzt
    RegNet's layer composed by three `3x3` convolutions, same as a ResNet bottleneck layer with reduction = 1.
    r   r4   r   r   r   c              
      s   t    ||kp|dk}td||j }|rt|||dnt | _tt	||d|j
dt	|||||j
dt	||dd d| _t|j
 | _d S )Nr   r   r   r   r   r   r   )r   r   maxgroups_widthr>   r   r#   shortcutrF   r   r7   layerr	   r   r$   r4   r   r   r   Zshould_apply_shortcutr   r%   r'   r(   r      s   
zRegNetXLayer.__init__c                 C   .   |}|  |}| |}||7 }| |}|S r)   rP   rO   r   r$   r+   Zresidualr'   r'   r(   r,         


zRegNetXLayer.forwardr   	r-   r.   r/   r=   r   r0   r   r,   r2   r'   r'   r%   r(   rI      s     rI   c                	       rH   )RegNetYLayerzC
    RegNet's Y layer: an X layer with Squeeze and Excitation.
    r   r4   r   r   r   c                    s   t    ||kp|dk}td||j }|rt|||dnt | _tt	||d|j
dt	|||||j
dt|tt|d dt	||dd d| _t|j
 | _d S )Nr   rJ   rK   rL      )rB   )r   r   rM   rN   r>   r   r#   rO   rF   r   r7   rA   r0   roundrP   r	   r   rQ   r%   r'   r(   r      s   
zRegNetYLayer.__init__c                 C   rR   r)   rS   rT   r'   r'   r(   r,      rU   zRegNetYLayer.forwardrV   rW   r'   r'   r%   r(   rX      s     rX   c                       sD   e Zd ZdZ		ddededededef
 fdd	Zd
d Z  ZS )RegNetStagez4
    A RegNet stage composed by stacked layers.
    r   r4   r   r   r   depthc                    sZ   t     jdkrtnttj ||dg fddt|d D R  | _d S )NxrJ   c                    s   g | ]} qS r'   r'   ).0_r4   rP   r   r'   r(   
<listcomp>   s    z(RegNetStage.__init__.<locals>.<listcomp>r   )	r   r   Z
layer_typerI   rX   r   rF   rangelayers)r$   r4   r   r   r   r\   r%   r`   r(   r      s   
zRegNetStage.__init__c                 C   s   |  |}|S r)   )rc   r*   r'   r'   r(   r,      s   
zRegNetStage.forward)r   r   rW   r'   r'   r%   r(   r[      s     	r[   c                	       s@   e Zd Zdef fddZ	ddededed	efd
dZ  Z	S )RegNetEncoderr4   c              	      s   t    tg | _| jt||j|jd |j	rdnd|j
d d t|j|jdd  }t||j
dd  D ]\\}}}| jt||||d q9d S )Nr   r   r   )r   r\   )r\   )r   r   r   Z
ModuleListstagesappendr[   r6   hidden_sizesZdownsample_in_first_stageZdepthszip)r$   r4   Zin_out_channelsr   r   r\   r%   r'   r(   r      s   
	 zRegNetEncoder.__init__FTr+   output_hidden_statesreturn_dictr@   c                 C   sb   |rdnd }| j D ]}|r||f }||}q	|r||f }|s+tdd ||fD S t||dS )Nr'   c                 s   s    | ]	}|d ur|V  qd S r)   r'   )r^   vr'   r'   r(   	<genexpr>   s    z(RegNetEncoder.forward.<locals>.<genexpr>)last_hidden_statehidden_states)re   tupler
   )r$   r+   ri   rj   rn   Zstage_moduler'   r'   r(   r,      s   



zRegNetEncoder.forward)FT)
r-   r.   r/   r   r   r   boolr
   r,   r2   r'   r'   r%   r(   rd      s    rd   c                   @   s&   e Zd ZeZdZdZdgZdd ZdS )RegNetPreTrainedModelregnetr<   rX   c                 C   s   t |tjrtjj|jddd d S t |tjrMtjj|jt	dd |j
d urKtj|j\}}|dkr=dt	| nd}tj|j
| | d S d S t |tjtjfrhtj|jd tj|j
d d S d S )NZfan_outr   )modeZnonlinearity   )ar   r   )
isinstancer   r   initZkaiming_normal_weightLinearZkaiming_uniform_mathsqrtr   Z_calculate_fan_in_and_fan_outZuniform_r!   Z	GroupNormZ	constant_)r$   moduleZfan_inr_   boundr'   r'   r(   _init_weights  s   
z#RegNetPreTrainedModel._init_weightsN)	r-   r.   r/   r   Zconfig_classZbase_model_prefixZmain_input_nameZ_no_split_modulesr~   r'   r'   r'   r(   rq     s    rq   c                
       sF   e Zd Z fddZe	d
dedee dee defdd	Z	  Z
S )RegNetModelc                    s>   t  | || _t|| _t|| _td| _	| 
  d S )NrC   )r   r   r4   r3   r8   rd   encoderr   rD   rE   	post_initr9   r%   r'   r(   r     s   

zRegNetModel.__init__Nr<   ri   rj   r@   c                 C   s|   |d ur|n| j j}|d ur|n| j j}| |}| j|||d}|d }| |}|s6||f|dd   S t|||jdS )Nri   rj   r   r   )rm   pooler_outputrn   )r4   ri   use_return_dictr8   r   rE   r   rn   )r$   r<   ri   rj   Zembedding_outputZencoder_outputsrm   pooled_outputr'   r'   r(   r,   (  s    

zRegNetModel.forward)NN)r-   r.   r/   r   r   r   r   rp   r   r,   r2   r'   r'   r%   r(   r     s    	r   z
    RegNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )Zcustom_introc                       s\   e Zd Z fddZe				ddeej deej dee	 dee	 de
f
d	d
Z  ZS )RegNetForImageClassificationc                    s^   t  | |j| _t|| _tt |jdkr#t|j	d |jnt
 | _|   d S )Nr   )r   r   
num_labelsr   rr   r   rF   ZFlattenry   rg   r#   
classifierr   r9   r%   r'   r(   r   M  s   
$z%RegNetForImageClassification.__init__Nr<   labelsri   rj   r@   c                 C   sb  |dur|n| j j}| j|||d}|r|jn|d }| |}d}|dur| j jdu rP| jdkr6d| j _n| jdkrL|jtj	ksG|jtj
krLd| j _nd| j _| j jdkrnt }	| jdkrh|	| | }n+|	||}n%| j jdkrt }	|	|d| j|d}n| j jdkrt }	|	||}|s|f|dd  }
|dur|f|
 S |
S t|||jd	S )
a0  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   Z
regressionZsingle_label_classificationZmulti_label_classificationr   r   )losslogitsrn   )r4   r   rr   r   r   Zproblem_typer   Zdtypetorchlongr0   r   Zsqueezer   viewr   r   rn   )r$   r<   r   ri   rj   Zoutputsr   r   r   Zloss_fctoutputr'   r'   r(   r,   Y  s6   


"


z$RegNetForImageClassification.forward)NNNN)r-   r.   r/   r   r   r   r   ZFloatTensorZ
LongTensorrp   r   r,   r2   r'   r'   r%   r(   r   E  s$    r   )r   r   rq   ))r=   rz   typingr   r   Ztorch.utils.checkpointr   r   Ztorch.nnr   r   r   Zactivationsr	   Zmodeling_outputsr
   r   r   Zmodeling_utilsr   utilsr   r   Zconfiguration_regnetr   Z
get_loggerr-   loggerModuler   r3   r>   rA   rI   rX   r[   rd   rq   r   r   __all__r'   r'   r'   r(   <module>   s<   
!&'@