o
    ZhX                     @   s  d dl Z d dlmZmZ d dlmZmZmZmZ d dl	Z	d dl
mZ d dl	mZ d dlmZ defddZd	eeeef eeeef  f d
eeeef eeeef  f deeee f fddZdejdedeeeef  f deegef deeedf eedf eeee f f fddZdejdeeedf eedf eeee f f fddZdejdeeedf eedf eeee f f fddZ	dFdejdee dee deddf
ddZdejdeeee f dee dee fdd Z	dFdejdee d!ee deddf
d"d#Z	$	$dGd%ejd&ee d'ee d!ee d(ee dejfd)d*Zd%ejfd+d,Zd%ejfd-d.ZG d/d0 d0ejZ G d1d2 d2ejZ!	dFd%ejd3edee!eedf f fd4d5Z"	dFd%ejd3edee eedf eedf f fd6d7Z#d8eeedf df deedf fd9d:Z$d;eej dee eedf eedf f fd<d=Z%	$	>dHd?e&ej d@eed$ ee' f dAe	j(j)fdBdCZ*	$	>dHd?e&ej d@eed$ ee' f dAe	j(j)fdDdEZ+dS )I    N)IterableSequence)AnyCallableNoReturnUnion)Tensor)NamedMemberAccessorreturnc                   C   s   t d)Na$  make_functional(module): we don't yet support models that do parameter tying (also sometimes known as weight sharing). Please try to rewrite your model by replacing all instances of the tied parameter with another and/or comment your support in https://github.com/pytorch/functorch/issues/446)RuntimeError r   r   O/var/www/auris/lib/python3.10/site-packages/torch/_functorch/make_functional.pyraise_parameter_tying_error   s   r   named_paramstied_named_paramsc                 C   s   t | } t |}t|  }t| }||sJ i }|  D ]
\}}|g f||< q!| D ]\}}||v s:J || d | q0t | S )a[  
    named_params is a dictionary of tensors: {'A': A, 'B': B}
    tied_named_params is another dictionary of tensors {'A': A, 'B': B, 'B_tied': B}
    with potentially tied (or 'duplicated') tensors

    This function creates a mapping from the names in named_params to the
    names in tied_named_params: {'A': ['A'], 'B': ['B', 'B_tied']}.
       )dictsetkeysissubsetitemsappendvalues)r   r   Ztensors_dict_keysZtied_tensors_dict_keysZtensor_to_mappingkeyZtensorr   r   r   create_names_map!   s   r   modnamed_members.subclassc                 C   s   t |dd}t |dd}t||}i }t| }|D ]\}}	|	|vr.|tj|	dd||	< ||	 }
|||
 qt|dkrDd\}}nt| \}}|||fS )NF)Zremove_duplicateTmeta)devicer   r   r   )tupler   r	   torchZ
empty_like
set_tensorlenzip)r   r   r   Zall_named_membersZunique_named_members	names_mapmemoaccessornamepreplacementnamesparamsr   r   r   _extract_members=   s   


r.   c                 C   s   t | | jtjS )aZ  
    This function removes all the Parameters from the model and
    return them as a tuple as well as their original attribute names.
    The weights must be re-loaded with `load_weights` before the model
    can be used again.
    Note that this function modifies the model in place and after this
    call, mod.parameters() will be empty.
    )r.   Znamed_parametersnn	Parameterr   r   r   r   extract_weightsV   s   r2   c                 C   s   t | | jdd S )Nc                 S   s   | S Nr   )xr   r   r   <lambda>g   s    z!extract_buffers.<locals>.<lambda>)r.   Znamed_buffersr1   r   r   r   extract_buffersd   s   r6   Fr,   r-   	as_paramsc                 C   s*   t | }|rdd |D }||| dS )a	  
    Reload a set of weights so that `mod` can be used again to perform a forward pass.
    Note that the `params` are regular Tensors (that can have history) and so are left
    as Tensors. This means that mod.parameters() will still be empty after this call.
    c                 S   s   g | ]}t |qS r   )r/   r0   ).0r*   r   r   r   
<listcomp>w   s    z load_weights.<locals>.<listcomp>Nr	   Zset_tensors)r   r,   r-   r7   r(   r   r   r   load_weightsj   s   r;   r&   elemsc           
      C   sf   g }t | }t| |D ]#\\}}}t|D ]\}}	|dkr)|||	| q||	| qq|S )Nr   )r	   r%   r   	enumerater   Zswap_tensorr#   )
r   r&   r<   resultr(   _Z
attr_nameselemiZ	attr_namer   r   r   _swap_state{   s   rB   buffersc                 C   s   t | }||| d S r3   r:   )r   r,   rC   r7   r(   r   r   r   load_buffers   s   rD   r   modelweightsweight_namesbuffer_namesc                 C   sP   t |t |ks
J t| || t |dkr&t |t |ks J t| || | S )zload_state(model, weights, weight_names, buffers=(), buffer_names=()) -> model

    load_state takes `weights` and `buffers` and assigns them to the model.
    This is the inverse operation of `make_functional_deprecated_v1`.
    r   )r$   r;   rD   )rE   rF   rG   rC   rH   r   r   r   
load_state   s   rI   c                    sF   t  }t|dkrtdt\} } fdd}|| fS )a  make_functional_deprecated_v1(model) -> weights, func, weight_names

    Given an nn.Module, make_functional_deprecated_v1 extracts the state (weights)
    and returns a functional version of the model, `func`. This makes
    it so that it is possible use transforms over the parameters of
    `model`.

    `func` can be invoked as follows:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, func, _ = make_functional_deprecated_v1(model)
    func(weights, (x,))
    ```

    And here is an example of applying the grad transform:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, _, func = make_functional_deprecated_v1(model)
    grad_weights = grad(func)(weights, (x,))
    ```

    To put the state back into a model, use `load_state`.
    r   zmake_functional_deprecated_v1(model): `model` has buffers. Please use make_functional_with_buffers_deprecated_v1(model) instead.c                    s   t }t| |  || S r3   )copydeepcopyr;   )rF   datamutable_modelZdescriptorsrE   r   r   fun   s   
z*make_functional_deprecated_v1.<locals>.fun)listrC   r$   r   r2   )rE   rC   rF   r?   rO   r   rN   r   make_functional_deprecated_v1   s   
rQ   c                    s:   t \}}t\} } fdd}||| fS )a`  make_functional_with_buffers_deprecated_v1(model) -> weights, buffers, func, weight_names, buffer_names

    Given an nn.Module, make_functional_with_buffers_deprecated_v1 extracts the state (weights and buffers)
    and returns a functional version of the model, `func`.

    `func` can be invoked as follows:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, buffers, func, _, _ = make_functional_with_buffers_deprecated_v1(model)
    func(weights, buffers, (x,))
    ```

    And here is an example of applying the grad transform:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, buffers, func, _, _ = make_functional_with_buffers_deprecated_v1(model)
    func(weights, buffers, (x,))
    grad_weights = grad(func)(weights, buffers, (x,))
    ```

    To put the state back into a model, use `load_state`.
    c                    s*   t }t||  t| | || S r3   )rJ   rK   r;   rD   )rF   rC   rL   rM   Zbuf_descriptorsrE   Zweight_descriptorsr   r   rO      s   
z7make_functional_with_buffers_deprecated_v1.<locals>.fun)r2   r6   )rE   rF   r?   rC   rO   r   rR   r   *make_functional_with_buffers_deprecated_v1   s   rS   c                       s   e Zd ZdZdejdeedf deedf deee	e f deee	e f dd	f fd
dZ
e	ddejdeded eedf eedf f fddZdee dee defddZ  ZS )FunctionalModuleWithBufferszW
    This is the callable object returned by :func:`make_functional_with_buffers`.
    stateless_modelparam_names.rH   param_names_mapbuffer_names_mapr
   Nc                    s6   t    || _|| _|| _t|| _| j| d S r3   )super__init__rU   rV   rH   r   all_names_mapupdate)selfrU   rV   rH   rW   rX   	__class__r   r   rZ      s   

z$FunctionalModuleWithBuffers.__init__FrE   disable_autograd_trackingc           
      C   sT   t | }t|\}}}t|\}}}|r|D ]}	|	d qt|||||||fS NF)rJ   rK   r2   r6   requires_grad_rT   )
rE   r`   
model_copyr-   rV   rW   rC   rH   rX   paramr   r   r   _create_from  s   

z(FunctionalModuleWithBuffers._create_fromr-   rC   c              
   O   sR   t | j| jt|t| }z| j|i |W t | j| j| S t | j| j| w r3   )rB   rU   r[   r!   )r]   r-   rC   argskwargs	old_stater   r   r   forward  s   $z#FunctionalModuleWithBuffers.forwardF__name__
__module____qualname____doc__r/   Moduler!   strr   rP   rZ   staticmethodboolr   re   r   r   ri   __classcell__r   r   r^   r   rT      s>    

rT   c                       s   e Zd ZdZdejdeedf deee	e f ddf fdd	Z
e	
ddejdeded eedf f fddZdee defddZ  ZS )FunctionalModulezJ
    This is the callable object returned by :func:`make_functional`.
    rU   rV   .r&   r
   Nc                    s    t    || _|| _|| _d S r3   )rY   rZ   rU   rV   r&   )r]   rU   rV   r&   r^   r   r   rZ   3  s   

zFunctionalModule.__init__FrE   r`   c                 C   s@   t | }t|\}}}|r|D ]}|d qt||||fS ra   )rJ   rK   r2   rb   ru   )rE   r`   rc   r-   rV   r&   rd   r   r   r   re   >  s   
zFunctionalModule._create_fromr-   c              
   O   sF   t | j| j|}z| j|i |W t | j| j| S t | j| j| w r3   )rB   rU   r&   )r]   r-   rf   rg   rh   r   r   r   ri   J  s   $zFunctionalModule.forwardrj   rk   r   r   r^   r   ru   .  s*    
ru   r`   c                 C   s.   t |  }t|dkrtdtj| |dS )a  make_functional(model, disable_autograd_tracking=False) -> func, params

    Given a ``torch.nn.Module``, :func:`make_functional` extracts the state
    (params) and returns a functional version of the model, ``func``. This
    makes it so that it is possible use transforms over the parameters of
    ``model``.

    ``func`` can be invoked as follows:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional

        x = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params = make_functional(model)
        func(params, x)

    And here is an example of applying the grad transform over the parameters
    of a model.

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional, grad

        x = torch.randn(4, 3)
        t = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params = make_functional(model)

        def compute_loss(params, x, t):
            y = func(params, x)
            return nn.functional.mse_loss(y, t)

        grad_weights = grad(compute_loss)(params, x, t)

    If the model has any buffers, please use :func:`make_functional_with_buffers` instead.

    Args:
        model (torch.nn.Module): Input model.
        disable_autograd_tracking (bool): Flag to disable gradients tracking for output parameters.
            The returned params are unrelated to the set of params from the original model. If False (default),
            the params will have ``requires_grad=True`` on them (aka they will be trackable with regular
            PyTorch autograd), matching the requires_grad-ness of the params from the original model.
            Otherwise, the returned params will have ``requires_grad=False``. Default, False.
            If you plan on using regular PyTorch autograd (e.g., if you want to call ``.backward()`` or
            ``torch.autograd.grad()``, then set ``disable_autograd_tracking=False``.
            Otherwise, if you're only planning on using functorch's gradient transforms,
            then please set ``disable_autograd_tracking=True`` to avoid unnecessarily tracking
            history with PyTorch autograd.

    r   zdmake_functional(model): `model` has buffers. Please use make_functional_with_buffers(model) instead.r`   )rP   rC   r$   r   ru   re   )rE   r`   rC   r   r   r   make_functionalT  s   ;rw   c                 C   s   t j| |dS )a  make_functional_with_buffers(model, disable_autograd_tracking=False) -> func, params, buffers

    Given a ``torch.nn.Module``, make_functional_with_buffers extracts the
    state (params and buffers) and returns a functional version of the model
    ``func`` that can be invoked like a function.

    ``func`` can be invoked as follows:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional_with_buffers

        x = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params, buffers = make_functional_with_buffers(model)
        func(params, buffers, x)

    And here is an example of applying the grad transform over the parameters
    of a model:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional_with_buffers, grad

        x = torch.randn(4, 3)
        t = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params, buffers = make_functional_with_buffers(model)

        def compute_loss(params, buffers, x, t):
            y = func(params, buffers, x)
            return nn.functional.mse_loss(y, t)

        grad_weights = grad(compute_loss)(params, buffers, x, t)

    Args:
        model (torch.nn.Module): Input model.
        disable_autograd_tracking (bool): Flag to disable gradients tracking for output parameters.
            The returned params are unrelated to the set of params from the original model. If False (default),
            the params will have ``requires_grad=True`` on them (aka they will be trackable with regular
            PyTorch autograd), matching the requires_grad-ness of the params from the original model.
            Otherwise, the returned params will have ``requires_grad=False``. Default, False.
            If you plan on using regular PyTorch autograd (e.g., if you want to call ``.backward()`` or
            ``torch.autograd.grad()``, then set ``disable_autograd_tracking=False``.
            Otherwise, if you're only planning on using functorch's gradient transforms,
            then please set ``disable_autograd_tracking=True`` to avoid unnecessarily tracking
            history with PyTorch autograd.

    rv   )rT   re   )rE   r`   r   r   r   make_functional_with_buffers  s   8rx   tuple_of_tuple_of_tensorsc                 C   s"   t t|  } t dd | D }|S )Nc                 s       | ]
}t | V  qd S r3   r"   stackdetachr8   Zshardsr   r   r   	<genexpr>  s    
z"transpose_stack.<locals>.<genexpr>)r!   r%   )ry   resultsr   r   r   transpose_stack  s
   r   modelsc                    s   t | dkr
tdtdd | D s tdd | D s tdt| d  t fdd| D s5tdtd	d
 | D  \}}}t|}t|}|d ||fS )a(  combine_state_for_ensemble(models) -> func, params, buffers

    Prepares a list of torch.nn.Modules for ensembling with :func:`vmap`.

    Given a list of ``M`` ``nn.Modules`` of the same class, stacks all of their
    parameters and buffers together to make ``params`` and ``buffers``.
    Each parameter and buffer in the result will have an additional dimension
    of size ``M``.

    :func:`combine_state_for_ensemble` also returns ``func``, a functional
    version of one of the models in :attr:`models`. One cannot directly run
    ``func(params, buffers, *args, **kwargs)`` directly, you probably want to
    use ``vmap(func, ...)(params, buffers, *args, **kwargs)``

    Here's an example of how to ensemble over a very simple model:

    .. code-block:: python

        num_models = 5
        batch_size = 64
        in_features, out_features = 3, 3
        models = [torch.nn.Linear(in_features, out_features) for i in range(num_models)]
        data = torch.randn(batch_size, 3)

        fmodel, params, buffers = combine_state_for_ensemble(models)
        output = vmap(fmodel, (0, 0, None))(params, buffers, data)

        assert output.shape == (num_models, batch_size, out_features)

    .. warning::
        All of the modules being stacked together must be the same (except for
        the values of their parameters/buffers). For example, they should be in the
        same mode (training vs eval).

        This API is subject to change -- we're investigating better ways to
        create ensembles and would love your feedback how to improve this.
    r   z?combine_state_for_ensemble: Expected at least one model, got 0.c                 s   s    | ]}|j V  qd S r3   Ztrainingr8   mr   r   r   r     s    z-combine_state_for_ensemble.<locals>.<genexpr>c                 s   s    | ]}|j  V  qd S r3   r   r   r   r   r   r     s    zTcombine_state_for_ensemble: Expected all models to have the same training/eval mode.c                 3   s    | ]	}t | kV  qd S r3   )typer   Z
model0_typr   r   r         zHcombine_state_for_ensemble: Expected all models to be of the same class.c                 S   s   g | ]}t |qS r   )rx   r8   rE   r   r   r   r9     s    z.combine_state_for_ensemble.<locals>.<listcomp>)r$   r   allr   r%   r   )r   funcsr-   rC   r   r   r   combine_state_for_ensemble  s&   ($
r   cpumodel_classensemble_shaper   c                        fdd}|S )Nc            	         s   t dkr
tdt dkr i }t|S d }|dkr.td| dt fddt|D }t i \}}}tdd |D }tt| }td	d |D }|||fS )
N   ,NYI: ensemble_shape with more than 1 elementr   num_models  should be > 0c                 3   $    | ]} i  V  qd S r3   tor8   r?   rf   r   rg   r   r   r   r   .      
z3functional_init.<locals>.wrapped.<locals>.<genexpr>c                 s   s    | ]	}t |d  V  qdS )r   N)rQ   r   r   r   r   r   2  r   c                 s   rz   r3   r{   r~   r   r   r   r   4      )r$   
ValueErrorr   rQ   r!   ranger%   )	rf   rg   rE   
num_modelsr   r?   fnr,   rF   r   r   r   rf   rg   r   wrapped$  s    
z functional_init.<locals>.wrappedr   r   r   r   r   r   r   r   functional_init  s   r   c                    r   )Nc                     s   t dkr
tdt dkr i }t|S d }|dkr.td| dt fddt|D }t i \}}}}}ttdd |D  \}	}
tt|	 }	td	d |	D }	tt|
 }
td
d |
D }
|	|
|||fS )Nr   r   r   r   r   c                 3   r   r3   r   r   r   r   r   r   I  r   z@functional_init_with_buffers.<locals>.wrapped.<locals>.<genexpr>c                 s   s     | ]}t |d d V  qd S )Nr   )rS   r   r   r   r   r   T  s
    
c                 s   rz   r3   r{   r~   r   r   r   r   Z  r   c                 s   rz   r3   r{   r~   r   r   r   r   \  r   )r$   r   r   rQ   r!   r   rS   r%   )rf   rg   rE   r   r   r?   r   rG   rH   rF   rC   r   r   r   r   ?  s8   	z-functional_init_with_buffers.<locals>.wrappedr   r   r   r   r   functional_init_with_buffers:  s    r   rj   r    )r   r   ),rJ   collections.abcr   r   typingr   r   r   r   r"   Ztorch.nnr/   r   Z%torch.nn.utils._named_member_accessorr	   r   r   rq   r!   rP   r   rp   r.   r2   r6   rs   r;   rB   rD   rI   rQ   rS   rT   ru   rw   rx   r   r   r   inttypesZDevicer   r   r   r   r   r   <module>   s  

(
(
(





*%8'
G
=



@
