o
    Zhfo                     @   sn  d dl mZ d dlmZmZ d dlmZ d dlZd dl	m
Z d dlmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZ d dlmZmZmZmZ d d	lm Z m!Z! d
Z"dZ#G dd dej$Z%G dd dej$Z&G dd dej$Z'G dd dej$Z(G dd dej$Z)G dd dej$Z*G dd dej$Z+G dd dej$Z,G dd dej$Z-G dd dej$Z.G d d! d!ej$Z/G d"d# d#ej$Z0G d$d% d%ej$Z1G d&d' d'ej$Z2G d(d) d)eZ3G d*d+ d+ej$Z4e d,e"G d-d. d.e3Z5d/Z6ee5e6 ee5eed0 G d1d2 d2ej$Z7G d3d4 d4ej$Z8e d5e"G d6d7 d7e3Z9d8Z:ee9e: ee9eed0 g d9Z;dS ):    )partial)OptionalTupleN)
FrozenDictfreezeunfreeze)flatten_dictunflatten_dict)RegNetConfig)"FlaxBaseModelOutputWithNoAttentionFlaxBaseModelOutputWithPooling,FlaxBaseModelOutputWithPoolingAndNoAttention(FlaxImageClassifierOutputWithNoAttention)ACT2FNFlaxPreTrainedModel append_replace_return_docstringsoverwrite_call_docstring)add_start_docstrings%add_start_docstrings_to_model_forwarda  

    This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading, saving and converting weights from PyTorch models)

    This model is also a
    [flax.linen.Module](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) subclass. Use it as
    a regular Flax linen Module and refer to the Flax documentation for all matter related to general usage and
    behavior.

    Finally, this model supports inherent JAX features such as:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        config ([`RegNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights.
        dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`):
            The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
            `jax.numpy.bfloat16` (on TPUs).

            This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
            specified all the computation will be performed with the given `dtype`.

            **Note that this only specifies the dtype of the computation and does not influence the dtype of model
            parameters.**

            If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and
            [`~FlaxPreTrainedModel.to_bf16`].
a@  
    Args:
        pixel_values (`numpy.ndarray` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`RegNetImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                   @   s   e Zd ZdZejdd ZdS )IdentityzIdentity function.c                 K   s   |S N )selfxkwargsr   r   ^/var/www/auris/lib/python3.10/site-packages/transformers/models/regnet/modeling_flax_regnet.py__call__b   s   zIdentity.__call__N)__name__
__module____qualname____doc__nncompactr   r   r   r   r   r   _   s    r   c                   @   s~   e Zd ZU eed< dZeed< dZeed< dZeed< dZe	e
 ed< ejZejed	< d
d ZddejdedejfddZdS )FlaxRegNetConvLayerout_channels   kernel_size   stridegroupsrelu
activationdtypec                 C   sz   t j| j| j| jf| j| jd | jdt jjdddd| jd| _	t j
dd	| jd
| _| jd ur7t| j | _d S t | _d S )N   F       @fan_outtruncated_normalmodedistribution)r&   stridespaddingZfeature_group_countuse_biaskernel_initr,   ?h㈵>Zmomentumepsilonr,   )r!   Convr$   r&   r(   r)   initializersvariance_scalingr,   convolution	BatchNormnormalizationr+   r   r   activation_funcr   r   r   r   setupo   s   

&zFlaxRegNetConvLayer.setupThidden_statedeterministicreturnc                 C   s&   |  |}| j||d}| |}|S N)Zuse_running_average)r?   rA   rB   )r   rE   rF   r   r   r   r   }   s   

zFlaxRegNetConvLayer.__call__NT)r   r   r   int__annotations__r&   r(   r)   r+   r   strjnpfloat32r,   rD   ndarrayboolr   r   r   r   r   r#   g   s   
  r#   c                   @   sJ   e Zd ZU eed< ejZejed< dd Zddej	de
dej	fd	d
ZdS )FlaxRegNetEmbeddingsconfigr,   c                 C   s"   t | jjdd| jj| jd| _d S )Nr%   r-   )r&   r(   r+   r,   )r#   rR   embedding_size
hidden_actr,   embedderrC   r   r   r   rD      s   zFlaxRegNetEmbeddings.setupTpixel_valuesrF   rG   c                 C   s0   |j d }|| jjkrtd| j||d}|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rF   )shaperR   num_channels
ValueErrorrU   )r   rV   rF   rZ   rE   r   r   r   r      s   
zFlaxRegNetEmbeddings.__call__NrI   )r   r   r   r
   rK   rM   rN   r,   rD   rO   rP   r   r   r   r   r   rQ      s
   
  	rQ   c                   @   sZ   e Zd ZU dZeed< dZeed< ejZ	ej	ed< dd Z
dd	ejd
edejfddZdS )FlaxRegNetShortCutz
    RegNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r$   r-   r(   r,   c              
   C   sD   t j| jd| jdt jjdddd| jd| _t jdd	| jd
| _	d S )Nr'   r'   Fr.   r/   r0   r1   )r&   r4   r6   r7   r,   r8   r9   r:   )
r!   r<   r$   r(   r=   r>   r,   r?   r@   rA   rC   r   r   r   rD      s   zFlaxRegNetShortCut.setupTr   rF   rG   c                 C   s   |  |}| j||d}|S rH   )r?   rA   )r   r   rF   rE   r   r   r   r         
zFlaxRegNetShortCut.__call__NrI   )r   r   r   r    rJ   rK   r(   rM   rN   r,   rD   rO   rP   r   r   r   r   r   r\      s   
  r\   c                   @   sL   e Zd ZU eed< eed< ejZejed< dd Zdej	dej	fdd	Z
d
S )FlaxRegNetSELayerCollectionin_channelsreduced_channelsr,   c                 C   sT   t j| jdt jjdddd| jdd| _t j| jdt jjdddd| jdd| _d S )	Nr]   r.   r/   r0   r1   0)r&   r7   r,   name2)	r!   r<   ra   r=   r>   r,   conv_1r`   conv_2rC   r   r   r   rD      s   z!FlaxRegNetSELayerCollection.setuprE   rG   c                 C   s,   |  |}t|}| |}t|}|S r   )re   r!   r*   rf   Zsigmoid)r   rE   	attentionr   r   r   r      s
   



z$FlaxRegNetSELayerCollection.__call__N)r   r   r   rJ   rK   rM   rN   r,   rD   rO   r   r   r   r   r   r_      s   
 r_   c                   @   sP   e Zd ZU dZeed< eed< ejZejed< dd Z	dej
dej
fd	d
ZdS )FlaxRegNetSELayerz|
    Squeeze and Excitation layer (SE) proposed in [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507).
    r`   ra   r,   c                 C   s*   t tjdd| _t| j| j| jd| _d S )Nr   r   rj   r5   r,   )	r   r!   avg_poolpoolerr_   r`   ra   r,   rg   rC   r   r   r   rD      s   zFlaxRegNetSELayer.setuprE   rG   c                 C   sF   | j ||jd |jd f|jd |jd fd}| |}|| }|S )Nr'   r-   Zwindow_shaper4   )rn   rY   rg   )r   rE   Zpooledrg   r   r   r   r      s   
zFlaxRegNetSELayer.__call__N)r   r   r   r    rJ   rK   rM   rN   r,   rD   rO   r   r   r   r   r   rh      s   
 rh   c                   @   s^   e Zd ZU eed< eed< dZeed< ejZ	ej	ed< dd Z
dd	ejd
edejfddZdS )FlaxRegNetXLayerCollectionrR   r$   r'   r(   r,   c              	   C   sf   t d| j| jj }t| jd| jj| jddt| j| j|| jj| jddt| jdd | jddg| _d S )Nr'   rb   r&   r+   r,   rc   1r(   r)   r+   r,   rc   rd   )	maxr$   rR   groups_widthr#   rT   r,   r(   layerr   r)   r   r   r   rD      s0   
z FlaxRegNetXLayerCollection.setupTrE   rF   rG   c                 C   s   | j D ]}|||d}q|S NrX   rv   )r   rE   rF   rv   r   r   r   r     r^   z#FlaxRegNetXLayerCollection.__call__NrI   )r   r   r   r
   rK   rJ   r(   rM   rN   r,   rD   rO   rP   r   r   r   r   r   rp      s   
  rp   c                   @   j   e Zd ZU dZeed< eed< eed< dZeed< ej	Z
ej
ed< dd	 ZddejdedejfddZdS )FlaxRegNetXLayerzt
    RegNet's layer composed by three `3x3` convolutions, same as a ResNet bottleneck layer with reduction = 1.
    rR   r`   r$   r'   r(   r,   c                 C   f   | j | jkp
| jdk}|rt| j| j| jdnt | _t| j| j | j| j| jd| _	t
| jj | _d S Nr'   )r(   r,   )r`   r$   r(   r,   )r`   r$   r(   r\   r,   r   shortcutrp   rR   rv   r   rT   rB   r   Zshould_apply_shortcutr   r   r   rD      s"   	zFlaxRegNetXLayer.setupTrE   rF   rG   c                 C   2   |}|  |}| j||d}||7 }| |}|S rx   rv   r~   rB   r   rE   rF   Zresidualr   r   r   r   4     

zFlaxRegNetXLayer.__call__NrI   r   r   r   r    r
   rK   rJ   r(   rM   rN   r,   rD   rO   rP   r   r   r   r   r   r{     s   
  r{   c                   @   s`   e Zd ZU eed< eed< eed< dZeed< ejZ	ej	ed< dd Z
d	ejd
ejfddZdS )FlaxRegNetYLayerCollectionrR   r`   r$   r'   r(   r,   c              
   C   s   t d| j| jj }t| jd| jj| jddt| j| j|| jj| jddt| jt	t
| jd | jddt| jdd | jd	dg| _d S )
Nr'   rb   rq   rr   rs      rd   )ra   r,   rc   3)rt   r$   rR   ru   r#   rT   r,   r(   rh   rJ   roundr`   rv   rw   r   r   r   rD   D  s<   
z FlaxRegNetYLayerCollection.setuprE   rG   c                 C   s   | j D ]}||}q|S r   ry   )r   rE   rv   r   r   r   r   f  s   

z#FlaxRegNetYLayerCollection.__call__N)r   r   r   r
   rK   rJ   r(   rM   rN   r,   rD   rO   r   r   r   r   r   r   =  s   
 "r   c                   @   rz   )FlaxRegNetYLayerzC
    RegNet's Y layer: an X layer with Squeeze and Excitation.
    rR   r`   r$   r'   r(   r,   c                 C   r|   r}   )r`   r$   r(   r\   r,   r   r~   r   rR   rv   r   rT   rB   r   r   r   r   rD   w  s"   	zFlaxRegNetYLayer.setupTrE   rF   rG   c                 C   r   rx   r   r   r   r   r   r     r   zFlaxRegNetYLayer.__call__NrI   r   r   r   r   r   r   l  s   
  r   c                   @   v   e Zd ZU dZeed< eed< eed< dZeed< dZeed< e	j
Ze	jed< d	d
 Zdde	jdede	jfddZdS )FlaxRegNetStageLayersCollection4
    A RegNet stage composed by stacked layers.
    rR   r`   r$   r-   r(   depthr,   c                 C   sz   | j jdkrtnt}|| j | j| j| j| jddg}t| j	d D ]}|
|| j | j| j| jt|d d q!|| _d S )Nr   rb   )r(   r,   rc   r'   r,   rc   )rR   Z
layer_typer{   r   r`   r$   r(   r,   ranger   appendrL   layers)r   rv   r   ir   r   r   rD     s*   


z%FlaxRegNetStageLayersCollection.setupTr   rF   rG   c                 C   s    |}| j D ]}|||d}q|S rx   r   )r   r   rF   rE   rv   r   r   r   r     s   
z(FlaxRegNetStageLayersCollection.__call__NrI   r   r   r   r    r
   rK   rJ   r(   r   rM   rN   r,   rD   rO   rP   r   r   r   r   r   r     s   
  r   c                   @   r   )FlaxRegNetStager   rR   r`   r$   r-   r(   r   r,   c                 C   s&   t | j| j| j| j| j| jd| _d S )N)r`   r$   r(   r   r,   )r   rR   r`   r$   r(   r   r,   r   rC   r   r   r   rD     s   zFlaxRegNetStage.setupTr   rF   rG   c                 C   s   | j ||dS rx   r   )r   r   rF   r   r   r   r     s   zFlaxRegNetStage.__call__NrI   r   r   r   r   r   r     s   
  
r   c                	   @   sP   e Zd ZU eed< ejZejed< dd Z		ddej	de
d	e
d
efddZdS )FlaxRegNetStageCollectionrR   r,   c                 C   s   t | jj| jjdd  }t| j| jj| jjd | jjrdnd| jjd | jddg}tt || jjdd  D ]\}\\}}}|	t| j|||| jt
|d d q8|| _d S )Nr'   r   r-   rb   )r(   r   r,   rc   )r   r,   rc   )ziprR   Zhidden_sizesr   rS   Zdownsample_in_first_stageZdepthsr,   	enumerater   rL   stages)r   Zin_out_channelsr   r   r`   r$   r   r   r   r   rD     s    

*
zFlaxRegNetStageCollection.setupFTrE   output_hidden_statesrF   rG   c                 C   sF   |rdnd }| j D ]}|r||ddddf }|||d}q	||fS )Nr   r   r%   r'   r-   rX   )r   	transpose)r   rE   r   rF   hidden_statesZstage_moduler   r   r   r     s   
z"FlaxRegNetStageCollection.__call__N)FTr   r   r   r
   rK   rM   rN   r,   rD   rO   rP   r   r   r   r   r   r   r     s   
 r   c                   @   sV   e Zd ZU eed< ejZejed< dd Z			ddej	de
d	e
d
e
def
ddZdS )FlaxRegNetEncoderrR   r,   c                 C   s   t | j| jd| _d S )Nrl   )r   rR   r,   r   rC   r   r   r   rD     s   zFlaxRegNetEncoder.setupFTrE   r   return_dictrF   rG   c                 C   sT   | j |||d\}}|r||ddddf }|s$tdd ||fD S t||dS )	N)r   rF   r   r%   r'   r-   c                 s   s    | ]	}|d ur|V  qd S r   r   ).0vr   r   r   	<genexpr>!  s    z-FlaxRegNetEncoder.__call__.<locals>.<genexpr>)last_hidden_stater   )r   r   tupler   )r   rE   r   r   rF   r   r   r   r   r     s   
zFlaxRegNetEncoder.__call__N)FTTr   r   r   r   r   r     s$   
 r   c                       s   e Zd ZU dZeZdZdZdZe	j
ed< ddejdfd	ed
edejdef fddZddejjdededefddZee				ddee dedee dee fddZ  ZS )FlaxRegNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    regnetrV   Nmodule_class)r'      r   r%   r   TrR   seedr,   _do_initc                    sL   | j d||d|}|d u rd|j|j|jf}t j||||||d d S )NrR   r,   r'   )input_shaper   r,   r   r   )r   Z
image_sizerZ   super__init__)r   rR   r   r   r,   r   r   module	__class__r   r   r   5  s   	z"FlaxRegNetPreTrainedModel.__init__rngr   paramsrG   c                 C   sz   t j|| jd}d|i}| jj||dd}|d ur;tt|}tt|}| jD ]}|| ||< q(t | _t	t
|S |S )Nrl   r   F)r   )rM   Zzerosr,   r   initr   r   Z_missing_keyssetr   r	   )r   r   r   r   rV   rngsZrandom_paramsZmissing_keyr   r   r   init_weightsC  s   
z&FlaxRegNetPreTrainedModel.init_weightsFtrainr   r   c              	   C   s   |d ur|n| j j}|d ur|n| j j}t|d}i }| jj|d ur'|d n| jd |d ur3|d n| jd dtj|tj	d| ||||rLdgdS ddS )N)r   r-   r%   r'   r   batch_stats)r   r   rl   F)r   Zmutable)
rR   r   r   rM   r   r   applyr   arrayrN   )r   rV   r   r   r   r   r   r   r   r   r   U  s$   

z"FlaxRegNetPreTrainedModel.__call__r   )NFNN)r   r   r   r    r
   config_classZbase_model_prefixZmain_input_namer   r!   ModulerK   rM   rN   rJ   r,   rP   r   jaxrandomZPRNGKeyr   r   r   r   REGNET_INPUTS_DOCSTRINGr   dictr   __classcell__r   r   r   r   r   *  sD   
  r   c                	   @   sP   e Zd ZU eed< ejZejed< dd Z			dde	de	d	e	d
e
fddZdS )FlaxRegNetModulerR   r,   c                 C   s8   t | j| jd| _t| j| jd| _ttjdd| _	d S )Nrl   ri   rk   )
rQ   rR   r,   rU   r   encoderr   r!   rm   rn   rC   r   r   r   rD   {  s   zFlaxRegNetModule.setupTFrF   r   r   rG   c           	      C   s   |d ur|n| j j}|d ur|n| j j}| j||d}| j||||d}|d }| j||jd |jd f|jd |jd fddddd}|dddd}|sZ||f|dd   S t|||j	dS )	NrX   )r   r   rF   r   r'   r-   ro   r%   )r   pooler_outputr   )
rR   r   use_return_dictrU   r   rn   rY   r   r   r   )	r   rV   rF   r   r   Zembedding_outputZencoder_outputsr   pooled_outputr   r   r   r     s4   zFlaxRegNetModule.__call__N)TFT)r   r   r   r
   rK   rM   rN   r,   rD   rP   r   r   r   r   r   r   r   w  s    
 r   zOThe bare RegNet model outputting raw features without any specific head on top.c                   @      e Zd ZeZdS )FlaxRegNetModelN)r   r   r   r   r   r   r   r   r   r     s    r   at  
    Returns:

    Examples:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxRegNetModel
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("facebook/regnet-y-040")
    >>> model = FlaxRegNetModel.from_pretrained("facebook/regnet-y-040")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> last_hidden_states = outputs.last_hidden_state
    ```
)output_typer   c                   @   sD   e Zd ZU eed< ejZejed< dd Zdej	dej	fddZ
d	S )
FlaxRegNetClassifierCollectionrR   r,   c                 C   s   t j| jj| jdd| _d S )Nrr   r   )r!   ZDenserR   
num_labelsr,   
classifierrC   r   r   r   rD     s   z$FlaxRegNetClassifierCollection.setupr   rG   c                 C   s
   |  |S r   )r   )r   r   r   r   r   r     s   
z'FlaxRegNetClassifierCollection.__call__N)r   r   r   r
   rK   rM   rN   r,   rD   rO   r   r   r   r   r   r     s
   
 r   c                   @   sF   e Zd ZU eed< ejZejed< dd Z				d
de	fdd	Z
dS )&FlaxRegNetForImageClassificationModulerR   r,   c                 C   s@   t | j| jd| _| jjdkrt| j| jd| _d S t | _d S )Nr   r   rl   )r   rR   r,   r   r   r   r   r   rC   r   r   r   rD     s   z,FlaxRegNetForImageClassificationModule.setupNTrF   c           	      C   s~   |d ur|n| j j}| j||||d}|r|jn|d }| |d d d d ddf }|s8|f|dd   }|S t||jdS )N)rF   r   r   r'   r   r-   )logitsr   )rR   r   r   r   r   r   r   )	r   rV   rF   r   r   Zoutputsr   r   outputr   r   r   r     s   z/FlaxRegNetForImageClassificationModule.__call__)NTNN)r   r   r   r
   rK   rM   rN   r,   rD   rP   r   r   r   r   r   r     s   
 
r   z
    RegNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                   @   r   ) FlaxRegNetForImageClassificationN)r   r   r   r   r   r   r   r   r   r     s    r   aa  
    Returns:

    Example:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxRegNetForImageClassification
    >>> from PIL import Image
    >>> import jax
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("facebook/regnet-y-040")
    >>> model = FlaxRegNetForImageClassification.from_pretrained("facebook/regnet-y-040")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> logits = outputs.logits

    >>> # model predicts one of the 1000 ImageNet classes
    >>> predicted_class_idx = jax.numpy.argmax(logits, axis=-1)
    >>> print("Predicted class:", model.config.id2label[predicted_class_idx.item()])
    ```
)r   r   r   )<	functoolsr   typingr   r   Z
flax.linenZlinenr!   r   Z	jax.numpynumpyrM   Zflax.core.frozen_dictr   r   r   Zflax.traverse_utilr   r	   Ztransformersr
   Z"transformers.modeling_flax_outputsr   r   r   r   Z transformers.modeling_flax_utilsr   r   r   r   Ztransformers.utilsr   r   ZREGNET_START_DOCSTRINGr   r   r   r#   rQ   r\   r_   rh   rp   r{   r   r   r   r   r   r   r   r   r   ZFLAX_VISION_MODEL_DOCSTRINGr   r   r   ZFLAX_VISION_CLASSIF_DOCSTRING__all__r   r   r   r   <module>   sp   #((/)0+M7
'
