a
    hp                  
   @   s   d dl Z d dlmZ d dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ ddgZeejee eed	f eeef f eej d
ddZdee ee eeeeef   eeeeeejf    ee dddZdS )    N)Sequence)AnycastOptionalUnion)ExceptionWrapper_get_device_index)Module	get_a_varparallel_apply.)objreturnc                 C   sx   t | tjr| S t | ttfrBtt| D ]}t |tjr(|  S q(t | trttt|  D ]}t |tjrZ|  S qZd S )N)	
isinstancetorchTensorlisttuplemapr   dictitems)r   result r   N/var/www/auris/lib/python3.9/site-packages/torch/nn/parallel/parallel_apply.pyr      s    


)modulesinputs
kwargs_tupdevicesr   c           
   	      s  t | t |ks,J dt |  dt | |durJt | t |ksfJ nttttf i ft |  }|durt | t |ksJ ndgt |  }dd |D }dd |D }t i t t	  dt
tttttf ttt
tjf  ttjj ddfdd	 t | d
krj fddtt| ||||D }|D ]}|  qD|D ]}|  qXn& d| d |d |d |d |d  g }tt |D ],}| }	t|	tr|	  ||	 q|S )a  Apply each `module` in :attr:`modules` in parallel on each of :attr:`devices`.

    Args:
        modules (Module): modules to be parallelized
        inputs (tensor): inputs to the modules
        devices (list of int or torch.device): CUDA devices

    :attr:`modules`, :attr:`inputs`, :attr:`kwargs_tup` (if given), and
    :attr:`devices` (if given) should all have same length. Moreover, each
    element of :attr:`inputs` can either be a single object as the only argument
    to a module, or a collection of positional arguments.
    zThe number of modules z& is not equal to the number of inputs Nc                 S   s   g | ]}t |d qS )Tr   .0xr   r   r   
<listcomp><       z"parallel_apply.<locals>.<listcomp>c                 S   s   g | ]}t j|qS r   )r   cudacurrent_streamr   r   r   r   r!   =   r"   )imoduleinputkwargsdevicestreamr   c                    s  t  |d u rht|}|d u r`& td|  dd| < W d    n1 sR0    Y  d S | }|d u r|t j|}zt j| t j|b t j	j
d d2 t|ttfs|f}||i |}W d    n1 s0    Y  W d    n1 s0    Y  W d    n1 s"0    Y   || < W d    n1 sP0    Y  W nT ty   * td|  d| d| < W d    n1 s0    Y  Y n0 d S )Nzin replica zQ, no device was provided and no tensor input was found; device cannot be resolved)wherer#   )Zenabledz on device )r   Zset_grad_enabledr   r   Z
get_devicer#   r$   r)   r*   ampZautocastr   r   r   	Exception)r%   r&   r'   r(   r)   r*   toutput)autocast_enabledgrad_enabledlockresultsr   r   _workerE   s:    

(

l,zparallel_apply.<locals>._worker   c              
      s4   g | ],\}\}}}}}t j ||||||fd qS ))targetargs)	threadingThread)r   r%   r&   r'   r(   r)   r*   )r4   r   r   r!   m   s   r   )NN)lenr   r   strr   r8   Lockr   Zis_grad_enabledZis_autocast_enabledintr
   r   r   r)   r#   ZStream	enumeratezipstartjoinranger   r   reraiseappend)
r   r   r   r   Zstreamsthreadsthreadoutputsr%   r/   r   )r4   r0   r1   r2   r3   r   r      sZ    
  

'
	&)NN)r8   collections.abcr   typingr   r   r   r   r   Ztorch._utilsr   Ztorch.cuda._utilsr	   Ztorch.nn.modulesr
   __all__r   r   r   r   r   r;   r=   r)   r   r   r   r   r   <module>   s&   $  