o
    Zh`                     @   sF  d dl mZ d dlmZmZ d dlmZ d dlZd dl	m
Z d dlmZmZmZ d dlmZmZ ddlmZmZmZ ddlmZmZmZmZ dd	lmZmZ d
dlm Z  dZ!dZ"G dd dej#Z$G dd dej#Z%G dd dej#Z&G dd dej#Z'G dd dej#Z(G dd dej#Z)G dd dej#Z*G dd dej#Z+G dd dej#Z,G d d! d!ej#Z-G d"d# d#ej#Z.G d$d% d%ej#Z/G d&d' d'eZ0G d(d) d)ej#Z1ed*e!G d+d, d,e0Z2d-Z3ee2e3 ee2ee d. G d/d0 d0ej#Z4G d1d2 d2ej#Z5ed3e!G d4d5 d5e0Z6d6Z7ee6e7 ee6ee d. g d7Z8dS )8    )partial)OptionalTupleN)
FrozenDictfreezeunfreeze)flatten_dictunflatten_dict   )"FlaxBaseModelOutputWithNoAttention,FlaxBaseModelOutputWithPoolingAndNoAttention(FlaxImageClassifierOutputWithNoAttention)ACT2FNFlaxPreTrainedModel append_replace_return_docstringsoverwrite_call_docstring)add_start_docstrings%add_start_docstrings_to_model_forward   )ResNetConfiga  

    This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading, saving and converting weights from PyTorch models)

    This model is also a
    [flax.linen.Module](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) subclass. Use it as
    a regular Flax linen Module and refer to the Flax documentation for all matter related to general usage and
    behavior.

    Finally, this model supports inherent JAX features such as:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        config ([`ResNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights.
        dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`):
            The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
            `jax.numpy.bfloat16` (on TPUs).

            This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
            specified all the computation will be performed with the given `dtype`.

            **Note that this only specifies the dtype of the computation and does not influence the dtype of model
            parameters.**

            If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and
            [`~FlaxPreTrainedModel.to_bf16`].
aA  
    Args:
        pixel_values (`jax.numpy.float32` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`AutoImageProcessor.__call__`] for details.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                   @   s   e Zd ZdZejdd ZdS )IdentityzIdentity function.c                 K   s   |S N )selfxkwargsr   r   ^/var/www/auris/lib/python3.10/site-packages/transformers/models/resnet/modeling_flax_resnet.py__call__\   s   zIdentity.__call__N)__name__
__module____qualname____doc__nncompactr   r   r   r   r   r   Y   s    r   c                   @   sr   e Zd ZU eed< dZeed< dZeed< dZee	 ed< e
jZe
jed< d	d
 Zdde
jdede
jfddZdS )FlaxResNetConvLayerout_channelsr
   kernel_sizer   striderelu
activationdtypec                 C   sz   t j| j| j| jf| j| jd | jdt jjddd| jdd| _t j	dd	| jd
| _
| jd ur7t| j | _d S t | _d S )N   F       @fan_outnormal)modedistributionr*   )r&   stridespaddingr*   use_biaskernel_init?h㈵>Zmomentumepsilonr*   )r"   Convr%   r&   r'   r*   initializersvariance_scalingconvolution	BatchNormnormalizationr)   r   r   activation_funcr   r   r   r   setuph   s   
	&zFlaxResNetConvLayer.setupTr   deterministicreturnc                 C   s&   |  |}| j||d}| |}|S N)Zuse_running_average)r<   r>   r?   r   r   rB   hidden_stater   r   r   r   u   s   

zFlaxResNetConvLayer.__call__NT)r   r   r    int__annotations__r&   r'   r)   r   strjnpfloat32r*   rA   ndarrayboolr   r   r   r   r   r$   a   s   
  r$   c                   @   sN   e Zd ZU dZeed< ejZejed< dd Z	ddej
ded	ej
fd
dZdS )FlaxResNetEmbeddingszO
    ResNet Embeddings (stem) composed of a single aggressive convolution.
    configr*   c                 C   s6   t | jjdd| jj| jd| _ttjdddd| _d S )N   r+   )r&   r'   r)   r*   )r
   r
   )r+   r+   )r   r   rR   )window_shaper1   r2   )	r$   rP   embedding_size
hidden_actr*   embedderr   r"   max_poolr@   r   r   r   rA      s   zFlaxResNetEmbeddings.setupTpixel_valuesrB   rC   c                 C   s:   |j d }|| jjkrtd| j||d}| |}|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rB   )shaperP   num_channels
ValueErrorrV   rW   )r   rX   rB   r\   Z	embeddingr   r   r   r      s   

zFlaxResNetEmbeddings.__call__NrG   )r   r   r    r!   r   rI   rK   rL   r*   rA   rM   rN   r   r   r   r   r   rO   |   s   
  rO   c                   @   sZ   e Zd ZU dZeed< dZeed< ejZ	ej	ed< dd Z
dd	ejd
edejfddZdS )FlaxResNetShortCutz
    ResNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r%   r+   r'   r*   c              
   C   sD   t j| jd| jdt jjdddd| jd| _t jdd	| jd
| _	d S )NrR   Fr,   r-   Ztruncated_normal)r/   r0   )r&   r1   r3   r4   r*   r5   r6   r7   )
r"   r9   r%   r'   r:   r;   r*   r<   r=   r>   r@   r   r   r   rA      s   zFlaxResNetShortCut.setupTr   rB   rC   c                 C   s   |  |}| j||d}|S rD   )r<   r>   rE   r   r   r   r         
zFlaxResNetShortCut.__call__NrG   )r   r   r    r!   rH   rI   r'   rK   rL   r*   rA   rM   rN   r   r   r   r   r   r^      s   
  r^   c                   @   sV   e Zd ZU eed< dZeed< ejZejed< dd Z	ddej
d	ed
ej
fddZdS )FlaxResNetBasicLayerCollectionr%   r   r'   r*   c                 C   s,   t | j| j| jdt | jd | jdg| _d S )Nr'   r*   )r)   r*   )r$   r%   r'   r*   layerr@   r   r   r   rA      s   
z$FlaxResNetBasicLayerCollection.setupTrF   rB   rC   c                 C      | j D ]}|||d}q|S NrZ   rb   r   rF   rB   rb   r   r   r   r      r_   z'FlaxResNetBasicLayerCollection.__call__NrG   )r   r   r    rH   rI   r'   rK   rL   r*   rA   rM   rN   r   r   r   r   r   r`      s   
  r`   c                   @   sf   e Zd ZU dZeed< eed< dZeed< dZee	 ed< e
jZe
jed< d	d
 ZddefddZdS )FlaxResNetBasicLayerzO
    A classic ResNet's residual layer composed by two `3x3` convolutions.
    in_channelsr%   r   r'   r(   r)   r*   c                 C   sZ   | j | jkp
| jdk}|rt| j| j| jdnd | _t| j| j| jd| _t| j	 | _
d S )Nr   ra   )r%   r'   r*   )rh   r%   r'   r^   r*   shortcutr`   rb   r   r)   r?   r   Zshould_apply_shortcutr   r   r   rA      s   zFlaxResNetBasicLayer.setupTrB   c                 C   s@   |}| j ||d}| jd ur| j||d}||7 }| |}|S rd   )rb   ri   r?   r   rF   rB   Zresidualr   r   r   r      s   

zFlaxResNetBasicLayer.__call__NrG   )r   r   r    r!   rH   rI   r'   r)   r   rJ   rK   rL   r*   rA   rN   r   r   r   r   r   rg      s   
 rg   c                   @   sr   e Zd ZU eed< dZeed< dZee ed< dZ	eed< e
jZe
jed< d	d
 Zdde
jdede
jfddZdS )#FlaxResNetBottleNeckLayerCollectionr%   r   r'   r(   r)      	reductionr*   c              	   C   sL   | j | j }t|d| jddt|| j| jddt| j dd | jddg| _d S )Nr   0)r&   r*   name1)r'   r*   rp   2)r&   r)   r*   rp   )r%   rn   r$   r*   r'   rb   )r   Zreduces_channelsr   r   r   rA      s
   
z)FlaxResNetBottleNeckLayerCollection.setupTrF   rB   rC   c                 C   rc   rd   re   rf   r   r   r   r      r_   z,FlaxResNetBottleNeckLayerCollection.__call__NrG   )r   r   r    rH   rI   r'   r)   r   rJ   rn   rK   rL   r*   rA   rM   rN   r   r   r   r   r   rl      s   
  	rl   c                   @   s~   e Zd ZU dZeed< eed< dZeed< dZee	 ed< dZ
eed	< ejZejed
< dd ZddejdedejfddZdS )FlaxResNetBottleNeckLayera$  
    A classic ResNet's bottleneck layer composed by three `3x3` convolutions. The first `1x1` convolution reduces the
    input by a factor of `reduction` in order to make the second `3x3` convolution faster. The last `1x1` convolution
    remaps the reduced features to `out_channels`.
    rh   r%   r   r'   r(   r)   rm   rn   r*   c                 C   sb   | j | jkp
| jdk}|rt| j| j| jdnd | _t| j| j| j| j| jd| _	t
| j | _d S )Nr   ra   )r'   r)   rn   r*   )rh   r%   r'   r^   r*   ri   rl   r)   rn   rb   r   r?   rj   r   r   r   rA     s   zFlaxResNetBottleNeckLayer.setupTrF   rB   rC   c                 C   s>   |}| j d ur| j ||d}| ||}||7 }| |}|S rd   )ri   rb   r?   rk   r   r   r   r   !  s   

z"FlaxResNetBottleNeckLayer.__call__NrG   )r   r   r    r!   rH   rI   r'   r)   r   rJ   rn   rK   rL   r*   rA   rM   rN   r   r   r   r   r   rs     s   
  rs   c                   @   v   e Zd ZU dZeed< eed< eed< dZeed< dZeed< e	j
Ze	jed< d	d
 Zdde	jdede	jfddZdS )FlaxResNetStageLayersCollection4
    A ResNet stage composed by stacked layers.
    rP   rh   r%   r+   r'   depthr*   c                 C   s~   | j jdkrtnt}|| j| j| j| j j| jddg}t	| j
d D ]}||| j| j| j j| jt|d d q"|| _d S )NZ
bottleneckro   )r'   r)   r*   rp   r   )r)   r*   rp   )rP   Z
layer_typers   rg   rh   r%   r'   rU   r*   rangerw   appendrJ   layers)r   rb   rz   ir   r   r   rA   8  s*   


z%FlaxResNetStageLayersCollection.setupTr   rB   rC   c                 C   s    |}| j D ]}|||d}q|S rd   rz   )r   r   rB   rF   rb   r   r   r   r   T  s   
z(FlaxResNetStageLayersCollection.__call__NrG   r   r   r    r!   r   rI   rH   r'   rw   rK   rL   r*   rA   rM   rN   r   r   r   r   r   ru   ,  s   
  ru   c                   @   rt   )FlaxResNetStagerv   rP   rh   r%   r+   r'   rw   r*   c                 C   s&   t | j| j| j| j| j| jd| _d S )N)rh   r%   r'   rw   r*   )ru   rP   rh   r%   r'   rw   r*   rz   r@   r   r   r   rA   g  s   zFlaxResNetStage.setupTr   rB   rC   c                 C   s   | j ||dS rd   r|   )r   r   rB   r   r   r   r   q  s   zFlaxResNetStage.__call__NrG   r}   r   r   r   r   r~   [  s   
  
r~   c                	   @   sP   e Zd ZU eed< ejZejed< dd Z		ddej	de
d	e
d
efddZdS )FlaxResNetStageCollectionrP   r*   c                 C   s   t | jj| jjdd  }t| j| jj| jjd | jjrdnd| jjd | jddg}tt || jjdd  D ]\}\\}}}|	t| j|||| jt
|d d q8|| _d S )Nr   r   r+   ro   )r'   rw   r*   rp   )rw   r*   rp   )ziprP   Zhidden_sizesr~   rT   Zdownsample_in_first_stageZdepthsr*   	enumeratery   rJ   stages)r   Zin_out_channelsr   r{   rh   r%   rw   r   r   r   rA   y  s    

*
zFlaxResNetStageCollection.setupFTrF   output_hidden_statesrB   rC   c                 C   sF   |rdnd }| j D ]}|r||ddddf }|||d}q	||fS )Nr   r   r
   r   r+   rZ   )r   	transpose)r   rF   r   rB   hidden_statesZstage_moduler   r   r   r     s   
z"FlaxResNetStageCollection.__call__N)FTr   r   r    r   rI   rK   rL   r*   rA   rM   rN   r   r   r   r   r   r   r   u  s   
 r   c                   @   sV   e Zd ZU eed< ejZejed< dd Z			ddej	de
d	e
d
e
def
ddZdS )FlaxResNetEncoderrP   r*   c                 C   s   t | j| jd| _d S )Nr*   )r   rP   r*   r   r@   r   r   r   rA     s   zFlaxResNetEncoder.setupFTrF   r   return_dictrB   rC   c                 C   sT   | j |||d\}}|r||ddddf }|s$tdd ||fD S t||dS )	N)r   rB   r   r
   r   r+   c                 s   s    | ]	}|d ur|V  qd S r   r   ).0vr   r   r   	<genexpr>  s    z-FlaxResNetEncoder.__call__.<locals>.<genexpr>)last_hidden_stater   )r   r   tupler   )r   rF   r   r   rB   r   r   r   r   r     s   
zFlaxResNetEncoder.__call__N)FTTr   r   r   r   r   r     s$   
 r   c                       s   e Zd ZU dZeZdZdZdZe	j
ed< ddejdfd	ed
edejdef fddZddejjdededefddZee				ddee dedee dee fddZ  ZS )FlaxResNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    resnetrX   Nmodule_class)r      r   r
   r   TrP   seedr*   _do_initc                    sL   | j d||d|}|d u rd|j|j|jf}t j||||||d d S )NrP   r*   r   )input_shaper   r*   r   r   )r   Z
image_sizer\   super__init__)r   rP   r   r   r*   r   r   module	__class__r   r   r     s   	z"FlaxResNetPreTrainedModel.__init__rngr   paramsrC   c                 C   sz   t j|| jd}d|i}| jj||dd}|d ur;tt|}tt|}| jD ]}|| ||< q(t | _t	t
|S |S )Nr   r   F)r   )rK   Zzerosr*   r   initr   r   Z_missing_keyssetr   r	   )r   r   r   r   rX   rngsZrandom_paramsZmissing_keyr   r   r   init_weights  s   
z&FlaxResNetPreTrainedModel.init_weightsFtrainr   r   c              	   C   s   |d ur|n| j j}|d ur|n| j j}t|d}i }| jj|d ur'|d n| jd |d ur3|d n| jd dtj|tj	d| ||||rLdgdS ddS )N)r   r+   r
   r   r   batch_stats)r   r   r   F)r   Zmutable)
rP   r   r   rK   r   r   applyr   arrayrL   )r   rX   r   r   r   r   r   r   r   r   r     s$   

z"FlaxResNetPreTrainedModel.__call__r   )NFNN)r   r   r    r!   r   config_classZbase_model_prefixZmain_input_namer   r"   ModulerI   rK   rL   rH   r*   rN   r   jaxrandomZPRNGKeyr   r   r   r   RESNET_INPUTS_DOCSTRINGr   dictr   __classcell__r   r   r   r   r     sD   
  r   c                	   @   sP   e Zd ZU eed< ejZejed< dd Z			dde	de	d	e	d
e
fddZdS )FlaxResNetModulerP   r*   c                 C   s8   t | j| jd| _t| j| jd| _ttjdd| _	d S )Nr   )r   r   r   )r2   )
rO   rP   r*   rV   r   encoderr   r"   Zavg_poolpoolerr@   r   r   r   rA     s   zFlaxResNetModule.setupTFrB   r   r   rC   c           	      C   s   |d ur|n| j j}|d ur|n| j j}| j||d}| j||||d}|d }| j||jd |jd f|jd |jd fddddd}|dddd}|sZ||f|dd   S t|||j	dS )	NrZ   )r   r   rB   r   r   r+   )rS   r1   r
   )r   pooler_outputr   )
rP   r   use_return_dictrV   r   r   r[   r   r   r   )	r   rX   rB   r   r   Zembedding_outputZencoder_outputsr   pooled_outputr   r   r   r     s4   zFlaxResNetModule.__call__N)TFT)r   r   r    r   rI   rK   rL   r*   rA   rN   r   r   r   r   r   r   r   	  s    
 r   zOThe bare ResNet model outputting raw features without any specific head on top.c                   @      e Zd ZeZdS )FlaxResNetModelN)r   r   r    r   r   r   r   r   r   r   @  s    r   an  
    Returns:

    Examples:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxResNetModel
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)
    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
    >>> model = FlaxResNetModel.from_pretrained("microsoft/resnet-50")
    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> last_hidden_states = outputs.last_hidden_state
    ```
)output_typer   c                   @   sD   e Zd ZU eed< ejZejed< dd Zdej	dej	fddZ
d	S )
FlaxResNetClassifierCollectionrP   r*   c                 C   s   t j| jj| jdd| _d S )Nrq   )r*   rp   )r"   ZDenserP   
num_labelsr*   
classifierr@   r   r   r   rA   f  s   z$FlaxResNetClassifierCollection.setupr   rC   c                 C   s
   |  |S r   )r   )r   r   r   r   r   r   i  s   
z'FlaxResNetClassifierCollection.__call__N)r   r   r    r   rI   rK   rL   r*   rA   rM   r   r   r   r   r   r   b  s
   
 r   c                   @   sF   e Zd ZU eed< ejZejed< dd Z				d
de	fdd	Z
dS )&FlaxResNetForImageClassificationModulerP   r*   c                 C   s@   t | j| jd| _| jjdkrt| j| jd| _d S t | _d S )Nr   r   r   )r   rP   r*   r   r   r   r   r   r@   r   r   r   rA   q  s   z,FlaxResNetForImageClassificationModule.setupNTrB   c           	      C   s~   |d ur|n| j j}| j||||d}|r|jn|d }| |d d d d ddf }|s8|f|dd   }|S t||jdS )N)rB   r   r   r   r   r+   )logitsr   )rP   r   r   r   r   r   r   )	r   rX   rB   r   r   Zoutputsr   r   outputr   r   r   r   y  s   z/FlaxResNetForImageClassificationModule.__call__)NTNN)r   r   r    r   rI   rK   rL   r*   rA   rN   r   r   r   r   r   r   m  s   
 
r   z
    ResNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                   @   r   ) FlaxResNetForImageClassificationN)r   r   r    r   r   r   r   r   r   r     s    r   a]  
    Returns:

    Example:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxResNetForImageClassification
    >>> from PIL import Image
    >>> import jax
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
    >>> model = FlaxResNetForImageClassification.from_pretrained("microsoft/resnet-50")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> logits = outputs.logits

    >>> # model predicts one of the 1000 ImageNet classes
    >>> predicted_class_idx = jax.numpy.argmax(logits, axis=-1)
    >>> print("Predicted class:", model.config.id2label[predicted_class_idx.item()])
    ```
)r   r   r   )9	functoolsr   typingr   r   Z
flax.linenZlinenr"   r   Z	jax.numpynumpyrK   Zflax.core.frozen_dictr   r   r   Zflax.traverse_utilr   r	   Zmodeling_flax_outputsr   r   r   Zmodeling_flax_utilsr   r   r   r   utilsr   r   Zconfiguration_resnetr   ZRESNET_START_DOCSTRINGr   r   r   r$   rO   r^   r`   rg   rl   rs   ru   r~   r   r   r   r   r   ZFLAX_VISION_MODEL_DOCSTRINGr   r   r   ZFLAX_VISION_CLASSIF_DOCSTRING__all__r   r   r   r   <module>   sd   $%+/*L7
'
