o
    ZhK_                     @   s  d Z ddlmZmZmZ ddlZddlmZ ddl	m
Z
mZmZ ddlmZmZmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ eeZdZ dZ!g dZ"dZ#dZ$G dd dej%j&Z'G dd dej%j&Z(G dd dej%j&Z)G dd dej%j&Z*G dd dej%j&Z+G dd dej%j&Z,G dd dej%j&Z-G dd  d ej%j&Z.eG d!d" d"ej%j&Z/G d#d$ d$eZ0d%Z1d&Z2ed'e1G d(d) d)e0Z3ed*e1G d+d, d,e0eZ4g d-Z5dS ).zTensorFlow RegNet model.    )OptionalTupleUnionN   )ACT2FN)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forward) TFBaseModelOutputWithNoAttention*TFBaseModelOutputWithPoolingAndNoAttentionTFSequenceClassifierOutput)TFPreTrainedModelTFSequenceClassificationLosskeraskeras_serializableunpack_inputs)
shape_list)logging   )RegNetConfigr   zfacebook/regnet-y-040)r   i@     r   ztabby, tabby catc                       sV   e Zd Z				ddededededed	ee f fd
dZdd ZdddZ  Z	S )TFRegNetConvLayerr   r   reluin_channelsout_channelskernel_sizestridegroups
activationc              	      s~   t  jdi | tjj|d d| _tjj|||d|ddd| _tjjddd	d
| _	|d ur3t
| ntj| _|| _|| _d S )N   )paddingZVALIDFconvolution)filtersr   stridesr    r   use_biasnameh㈵>?normalizationepsilonZmomentumr%    )super__init__r   layersZZeroPadding2Dr    Conv2Dr!   BatchNormalizationr(   r   tfidentityr   r   r   )selfr   r   r   r   r   r   kwargs	__class__r+   \/var/www/auris/lib/python3.10/site-packages/transformers/models/regnet/modeling_tf_regnet.pyr-   7   s   
	
zTFRegNetConvLayer.__init__c                 C   s(   |  | |}| |}| |}|S N)r!   r    r(   r   )r3   hidden_stater+   r+   r7   callS   s   

zTFRegNetConvLayer.callNc                 C      | j rd S d| _ t| dd d ur2t| jj | jd d d | jg W d    n1 s-w   Y  t| dd d ur_t| jj | jd d d | j	g W d    d S 1 sXw   Y  d S d S NTr!   r(   
builtgetattrr1   
name_scoper!   r%   buildr   r(   r   r3   input_shaper+   r+   r7   rA   Y      "zTFRegNetConvLayer.build)r   r   r   r   r8   )
__name__
__module____qualname__intr   strr-   r:   rA   __classcell__r+   r+   r5   r7   r   6   s(    r   c                       s8   e Zd ZdZdef fddZdd Zd
dd	Z  ZS )TFRegNetEmbeddingszO
    RegNet Embeddings (stem) composed of a single aggressive convolution.
    configc                    s:   t  jdi | |j| _t|j|jdd|jdd| _d S )Nr   r   embedder)r   r   r   r   r   r%   r+   )r,   r-   num_channelsr   embedding_size
hidden_actrM   r3   rL   r4   r5   r+   r7   r-   j   s   zTFRegNetEmbeddings.__init__c                 C   sB   t |d }t r|| jkrtdtj|dd}| |}|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r   r   r   r   perm)r   r1   Zexecuting_eagerlyrN   
ValueError	transposerM   )r3   pixel_valuesrN   r9   r+   r+   r7   r:   v   s   
zTFRegNetEmbeddings.callNc                 C   d   | j rd S d| _ t| dd d ur0t| jj | jd  W d    d S 1 s)w   Y  d S d S )NTrM   )r>   r?   r1   r@   rM   r%   rA   rB   r+   r+   r7   rA         "zTFRegNetEmbeddings.buildr8   )	rE   rF   rG   __doc__r   r-   r:   rA   rJ   r+   r+   r5   r7   rK   e   s
    rK   c                       sV   e Zd ZdZddededef fddZdd	ejd
edejfddZ	dddZ
  ZS )TFRegNetShortCutz
    RegNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r   r   r   r   c                    sN   t  jd	i | tjj|d|ddd| _tjjdddd| _|| _|| _	d S )
Nr   Fr!   )r"   r   r#   r$   r%   r&   r'   r(   r)   r+   )
r,   r-   r   r.   r/   r!   r0   r(   r   r   )r3   r   r   r   r4   r5   r+   r7   r-      s   

zTFRegNetShortCut.__init__Finputstrainingreturnc                 C   s   | j | ||dS )Nr\   )r(   r!   )r3   r[   r\   r+   r+   r7   r:      s   zTFRegNetShortCut.callNc                 C   r;   r<   r=   rB   r+   r+   r7   rA      rD   zTFRegNetShortCut.build)r   )Fr8   )rE   rF   rG   rY   rH   r-   r1   Tensorboolr:   rA   rJ   r+   r+   r5   r7   rZ      s
    	rZ   c                       s<   e Zd ZdZdedef fddZdd Zdd	d
Z  ZS )TFRegNetSELayerz|
    Squeeze and Excitation layer (SE) proposed in [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507).
    r   reduced_channelsc                    s^   t  jd
i | tjjddd| _tjj|ddddtjj|ddd	dg| _|| _|| _	d S )NTpoolerZkeepdimsr%   r   r   zattention.0)r"   r   r   r%   Zsigmoidzattention.2r+   )
r,   r-   r   r.   GlobalAveragePooling2Drc   r/   	attentionr   rb   )r3   r   rb   r4   r5   r+   r7   r-      s   
zTFRegNetSELayer.__init__c                 C   s*   |  |}| jD ]}||}q|| }|S r8   )rc   rf   )r3   r9   Zpooledlayer_moduler+   r+   r7   r:      s
   


zTFRegNetSELayer.callNc                 C   s  | j rd S d| _ t| dd d ur-t| jj | jd W d    n1 s(w   Y  t| dd d urt| jd j | jd d d d | jg W d    n1 sVw   Y  t| jd j | jd d d d | j	g W d    d S 1 s}w   Y  d S d S )NTrc   NNNNrf   r   r   )
r>   r?   r1   r@   rc   r%   rA   rf   r   rb   rB   r+   r+   r7   rA      s   "zTFRegNetSELayer.buildr8   )	rE   rF   rG   rY   rH   r-   r:   rA   rJ   r+   r+   r5   r7   ra      s
    
ra   c                	       F   e Zd ZdZddedededef fddZd	d
 ZdddZ  Z	S )TFRegNetXLayerzt
    RegNet's layer composed by three `3x3` convolutions, same as a ResNet bottleneck layer with reduction = 1.
    r   rL   r   r   r   c              	      s   t  jdi | ||kp|dk}td||j }|r#t|||ddntjjddd| _t	||d|j
ddt	|||||j
dd	t	||dd d
dg| _t|j
 | _d S )Nr   shortcutr   r%   linearr%   layer.0r   r   r%   layer.1r   r   r   r%   layer.2r+   )r,   r-   maxgroups_widthrZ   r   r.   
Activationrk   r   rP   r   r   r3   rL   r   r   r   r4   Zshould_apply_shortcutr   r5   r+   r7   r-      s   zTFRegNetXLayer.__init__c                 C   8   |}| j D ]}||}q| |}||7 }| |}|S r8   r.   rk   r   r3   r9   Zresidualrg   r+   r+   r7   r:         



zTFRegNetXLayer.callNc              	   C      | j rd S d| _ t| dd d ur-t| jj | jd  W d    n1 s(w   Y  t| dd d urV| jD ]}t|j |d  W d    n1 sPw   Y  q8d S d S NTrk   r.   r>   r?   r1   r@   rk   r%   rA   r.   r3   rC   layerr+   r+   r7   rA         
zTFRegNetXLayer.buildr   r8   
rE   rF   rG   rY   r   rH   r-   r:   rA   rJ   r+   r+   r5   r7   rj      
     	rj   c                	       ri   )TFRegNetYLayerzC
    RegNet's Y layer: an X layer with Squeeze and Excitation.
    r   rL   r   r   r   c              
      s   t  jdi | ||kp|dk}td||j }|r#t|||ddntjjddd| _t	||d|j
ddt	|||||j
dd	t|tt|d
 ddt	||dd ddg| _t|j
 | _d S )Nr   rk   rl   rm   rn   ro   rp   rq   rr      rs   )rb   r%   zlayer.3r+   )r,   r-   rt   ru   rZ   r   r.   rv   rk   r   rP   ra   rH   roundr   r   rw   r5   r+   r7   r-     s   zTFRegNetYLayer.__init__c                 C   rx   r8   ry   rz   r+   r+   r7   r:     r{   zTFRegNetYLayer.callNc              	   C   r|   r}   r~   r   r+   r+   r7   rA     r   zTFRegNetYLayer.buildr   r8   r   r+   r+   r5   r7   r      r   r   c                       sL   e Zd ZdZ	ddededededef
 fdd	Zd
d ZdddZ  Z	S )TFRegNetStagez4
    A RegNet stage composed by stacked layers.
    r   rL   r   r   r   depthc                    s\   t  jdi |  jdkrtnt ||ddg fddt|d D | _d S )Nxzlayers.0rl   c              	      s&   g | ]} d |d  dqS )zlayers.r   rn   r+   ).0irL   r   r   r+   r7   
<listcomp>:  s   & z*TFRegNetStage.__init__.<locals>.<listcomp>r   r+   )r,   r-   Z
layer_typerj   r   ranger.   )r3   rL   r   r   r   r   r4   r5   r   r7   r-   1  s   
zTFRegNetStage.__init__c                 C   s   | j D ]}||}q|S r8   )r.   )r3   r9   rg   r+   r+   r7   r:   =  s   

zTFRegNetStage.callNc              	   C   sj   | j rd S d| _ t| dd d ur1| jD ]}t|j |d  W d    n1 s+w   Y  qd S d S )NTr.   )r>   r?   r.   r1   r@   r%   rA   r   r+   r+   r7   rA   B  s   
zTFRegNetStage.build)r   r   r8   r   r+   r+   r5   r7   r   ,  s     r   c                	       sL   e Zd Zdef fddZ	ddejdeded	efd
dZ	dddZ
  ZS )TFRegNetEncoderrL   c                    s   t  jdi | g | _| jt||j|jd |jrdnd|jd dd t	|j|jdd  }t
t	||jdd  D ]\}\\}}}| jt||||d|d  d q=d S )	Nr   r   r   zstages.0)r   r   r%   zstages.)r   r%   r+   )r,   r-   stagesappendr   rO   hidden_sizesZdownsample_in_first_stageZdepthszip	enumerate)r3   rL   r4   Zin_out_channelsr   r   r   r   r5   r+   r7   r-   M  s    
(&zTFRegNetEncoder.__init__FTr9   output_hidden_statesreturn_dictr]   c                 C   sb   |rdnd }| j D ]}|r||f }||}q	|r||f }|s+tdd ||fD S t||dS )Nr+   c                 s   s    | ]	}|d ur|V  qd S r8   r+   )r   vr+   r+   r7   	<genexpr>n  s    z'TFRegNetEncoder.call.<locals>.<genexpr>)last_hidden_statehidden_states)r   tupler
   )r3   r9   r   r   r   Zstage_moduler+   r+   r7   r:   _  s   



zTFRegNetEncoder.callNc              	   C   sV   | j rd S d| _ | jD ]}t|j |d  W d    n1 s#w   Y  qd S )NT)r>   r   r1   r@   r%   rA   )r3   rC   Zstager+   r+   r7   rA   r  s   
zTFRegNetEncoder.build)FTr8   )rE   rF   rG   r   r-   r1   r_   r`   r
   r:   rA   rJ   r+   r+   r5   r7   r   L  s    
r   c                       s^   e Zd ZeZ fddZe			ddejde	e
 de	e
 de
d	ef
d
dZdddZ  ZS )TFRegNetMainLayerc                    sJ   t  jdi | || _t|dd| _t|dd| _tjj	ddd| _
d S )NrM   rn   encoderTrc   rd   r+   )r,   r-   rL   rK   rM   r   r   r   r.   re   rc   rQ   r5   r+   r7   r-     s
   zTFRegNetMainLayer.__init__NFrV   r   r   r\   r]   c           
      C   s   |d ur|n| j j}|d ur|n| j j}| j||d}| j||||d}|d }| |}tj|dd}tj|dd}|rHtdd |d D }	|sT||f|dd   S t	|||r]|	d	S |j
d	S )
Nr^   r   r   r\   r   r   r   r   r   rR   c                 S   s   g | ]	}t j|d dqS )r   rR   )r1   rU   )r   hr+   r+   r7   r     s    z*TFRegNetMainLayer.call.<locals>.<listcomp>r   r   pooler_outputr   )rL   r   use_return_dictrM   r   rc   r1   rU   r   r   r   )
r3   rV   r   r   r\   Zembedding_outputZencoder_outputsr   pooled_outputr   r+   r+   r7   r:     s,   	
zTFRegNetMainLayer.callc                 C   s   | j rd S d| _ t| dd d ur-t| jj | jd  W d    n1 s(w   Y  t| dd d urRt| jj | jd  W d    n1 sMw   Y  t| dd d urzt| jj | jd W d    d S 1 ssw   Y  d S d S )NTrM   r   rc   rh   )	r>   r?   r1   r@   rM   r%   rA   r   rc   rB   r+   r+   r7   rA     s    "zTFRegNetMainLayer.buildNNFr8   )rE   rF   rG   r   config_classr-   r   r1   r_   r   r`   r   r:   rA   rJ   r+   r+   r5   r7   r   {  s&    &r   c                   @   s(   e Zd ZdZeZdZdZedd Z	dS )TFRegNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    regnetrV   c                 C   s    dt jd | jjddft jdiS )NrV      )shapeZdtype)r1   Z
TensorSpecrL   rN   Zfloat32)r3   r+   r+   r7   input_signature  s    z'TFRegNetPreTrainedModel.input_signatureN)
rE   rF   rG   rY   r   r   Zbase_model_prefixZmain_input_namepropertyr   r+   r+   r+   r7   r     s    r   ad  
    This model is a Tensorflow
    [keras.layers.Layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) sub-class. Use it as a
    regular Tensorflow Module and refer to the Tensorflow documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`RegNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a>  
    Args:
        pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`ConveNextImageProcessor.__call__`] for details.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zOThe bare RegNet model outputting raw features without any specific head on top.c                       s   e Zd Zdef fddZeeeee	e
eded			ddejd	ee d
ee dedee
eej f f
ddZdddZ  ZS )TFRegNetModelrL   c                    s,   t  j|g|R i | t|dd| _d S )Nr   rn   )r,   r-   r   r   r3   rL   r[   r4   r5   r+   r7   r-     s   zTFRegNetModel.__init__Zvision)
checkpointoutput_typer   Zmodalityexpected_outputNFrV   r   r   r\   r]   c                 C   sh   |d ur|n| j j}|d ur|n| j j}| j||||d}|s*|d f|dd   S t|j|j|jdS )N)rV   r   r   r\   r   r   r   )rL   r   r   r   r   r   r   r   )r3   rV   r   r   r\   outputsr+   r+   r7   r:     s    zTFRegNetModel.callc                 C   rW   )NTr   )r>   r?   r1   r@   r   r%   rA   rB   r+   r+   r7   rA     rX   zTFRegNetModel.buildr   r8   )rE   rF   rG   r   r-   r   r	   REGNET_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr1   r_   r   r`   r   r   r:   rA   rJ   r+   r+   r5   r7   r     s4    
r   z
    RegNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                       s   e Zd Zdef fddZeeeee	e
eed					ddeej deej d	ee d
ee dedee
eej f fddZdddZ  ZS )TFRegNetForImageClassificationrL   c                    sb   t  j|g|R i | |j| _t|dd| _tj |jdkr*tjj|jddnt	j
g| _d S )Nr   rn   r   zclassifier.1)r,   r-   Z
num_labelsr   r   r   r.   ZFlattenZDenser1   r2   
classifierr   r5   r+   r7   r-   "  s    
z'TFRegNetForImageClassification.__init__)r   r   r   r   NFrV   labelsr   r   r\   r]   c                 C   s   |dur|n| j j}|dur|n| j j}| j||||d}|r"|jn|d }| jd |}| jd |}	|du r:dn| j||	d}
|sW|	f|dd  }|
durU|
f| S |S t|
|	|jdS )a)  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   )r   logitsr   )lossr   r   )	rL   r   r   r   r   r   Zhf_compute_lossr   r   )r3   rV   r   r   r   r\   r   r   Zflattened_outputr   r   outputr+   r+   r7   r:   ,  s   z#TFRegNetForImageClassification.callc                 C   s   | j rd S d| _ t| dd d ur-t| jj | jd  W d    n1 s(w   Y  t| dd d urat| jd j | jd d d d | jj	d g W d    d S 1 sZw   Y  d S d S )NTr   r   r   )
r>   r?   r1   r@   r   r%   rA   r   rL   r   rB   r+   r+   r7   rA   W  s   ""z$TFRegNetForImageClassification.build)NNNNFr8   )rE   rF   rG   r   r-   r   r	   r   r   _IMAGE_CLASS_CHECKPOINTr   r   _IMAGE_CLASS_EXPECTED_OUTPUTr   r1   r_   r`   r   r   r:   rA   rJ   r+   r+   r5   r7   r     s:    
#r   )r   r   r   )6rY   typingr   r   r   Z
tensorflowr1   Zactivations_tfr   Z
file_utilsr   r   r	   Zmodeling_tf_outputsr
   r   r   Zmodeling_tf_utilsr   r   r   r   r   Ztf_utilsr   utilsr   Zconfiguration_regnetr   Z
get_loggerrE   loggerr   r   r   r   r   r.   ZLayerr   rK   rZ   ra   rj   r   r   r   r   r   ZREGNET_START_DOCSTRINGr   r   r   __all__r+   r+   r+   r7   <module>   sP   
/(%.. /@2B