a
    ¢º”hU  ã                   @   s¼   U d dl Z d dlmZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ G dd„ de	ƒZG d	d
„ d
e	ƒZG dd„ de	ƒZG dd„ de	ƒZdaeeeej   ed< ejdœdd„ZdS )é    N)Úchain)ÚOptional©Ú_get_device_index)ÚFunction)Úcommc                   @   s$   e Zd Zedd„ ƒZedd„ ƒZdS )Ú	Broadcastc                    s¸   t dd„ |D ƒƒsJ dƒ‚dd„ |D ƒ}|| _t|ƒdkr>dS t|ƒ| _|d  ¡ | _t || j¡}g }t| j	dd … ƒD ]$\‰ }|sz| 
‡ fd	d„|D ƒ¡ qz| j|Ž  tt |¡ƒS )
Nc                 s   s   | ]}|j jd kV  qdS ©ÚcpuN©ÚdeviceÚtype©Ú.0Úi© r   úJ/var/www/auris/lib/python3.9/site-packages/torch/nn/parallel/_functions.pyÚ	<genexpr>   ó    z$Broadcast.forward.<locals>.<genexpr>z2Broadcast function not implemented for CPU tensorsc                 S   s   g | ]}t |d ƒ‘qS ©Tr   ©r   Úxr   r   r   Ú
<listcomp>   r   z%Broadcast.forward.<locals>.<listcomp>r   r   é   c                 3   s   | ]}|ˆ  V  qd S ©Nr   )r   Úoutput©Úidxr   r   r      r   )ÚallÚtarget_gpusÚlenÚ
num_inputsÚ
get_deviceÚinput_devicer   Zbroadcast_coalescedÚ	enumerateZneeds_input_gradÚextendZmark_non_differentiableÚtupler   Úfrom_iterable)Úctxr   ÚinputsÚoutputsZnon_differentiablesZinput_requires_gradr   r   r   Úforward   s     ÿ

zBroadcast.forwardc                 G   s   dt j| j| jg|¢R Ž  S )Nr   )ÚReduceAddCoalescedÚapplyr#   r!   ©r(   Zgrad_outputsr   r   r   Úbackward   s
    ÿÿzBroadcast.backwardN©Ú__name__Ú
__module__Ú__qualname__Ústaticmethodr+   r/   r   r   r   r   r      s   
r   c                   @   s$   e Zd Zedd„ ƒZedd„ ƒZdS )r,   c                    sL   ‡ fdd„t dtˆ ƒˆƒD ƒ| _‡ ‡fdd„t dtˆ ƒˆƒD ƒ}t ||¡S )Nc                    s   g | ]}ˆ |   ¡ ‘qS r   ©r"   r   )Úgradsr   r   r   )   s   z.ReduceAddCoalesced.forward.<locals>.<listcomp>r   c                    s   g | ]}ˆ ||ˆ … ‘qS r   r   r   ©r6   r!   r   r   r   -   r   )Úranger    r   r   Zreduce_add_coalesced)r(   Zdestinationr!   r6   Zgrads_r   r7   r   r+   '   s
    
ÿ zReduceAddCoalesced.forwardc                 G   s   dt j| jg|¢R Ž  S )N©NN)r   r-   r   r.   r   r   r   r/   0   s    ýzReduceAddCoalesced.backwardNr0   r   r   r   r   r,   &   s   
r,   c                   @   s$   e Zd Zedd„ ƒZedd„ ƒZdS )ÚGatherc                    sÂ   t dd„ |D ƒƒsJ dƒ‚|dkr*dˆ _nt|dƒ}|ˆ _|ˆ _tdd„ |D ƒƒˆ _t dd„ |D ƒƒr’|dkr’td	d„ |D ƒƒ}t d
¡ dˆ _ndˆ _t‡ fdd„|D ƒƒˆ _	t
 |ˆ jˆ j¡S )Nc                 s   s   | ]}|j jd kV  qdS r	   r   r   r   r   r   r   ;   r   z!Gather.forward.<locals>.<genexpr>z/Gather function not implemented for CPU tensorsr
   Tc                 s   s   | ]}|  ¡ V  qd S r   r5   r   r   r   r   r   D   r   c                 s   s   | ]}|  ¡ d kV  qdS ©r   N)Údim©r   Útr   r   r   r   E   r   r   c                 s   s   | ]}|  d ¡V  qdS )r   N)Úviewr=   r   r   r   r   F   r   zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.Fc                 3   s   | ]}|  ˆ j¡V  qd S r   )Úsizer<   r   ©r(   r   r   r   O   r   )r   Útarget_devicer   r<   r&   Ú
input_gpusÚwarningsÚwarnÚunsqueezed_scalarÚinput_sizesr   Zgather)r(   rB   r<   r)   r   rA   r   r+   9   s$    ÿ
ÿzGather.forwardc                 C   s6   t  | j| j| j|¡}| jr.tdd„ |D ƒƒ}d| S )Nc                 s   s   | ]}|d  V  qdS r;   r   )r   Úgr   r   r   r   X   r   z"Gather.backward.<locals>.<genexpr>r9   )ÚScatterr-   rC   rG   r<   rF   r&   )r(   Úgrad_outputZscattered_gradsr   r   r   r/   R   s    ÿzGather.backwardNr0   r   r   r   r   r:   8   s   
r:   c                   @   s$   e Zd Zedd„ ƒZedd„ ƒZdS )rI   c           
   	   C   sØ   dd„ |D ƒ}|| _ |jjdkr(| ¡ nd| _d }tj ¡ rT| jdkrTdd„ |D ƒ}t 	|||| j |¡}|d urÔt
|ƒD ]Z\}}tj || ¡2 tj ¡ }	|	 || ¡ | |	¡ W d   ƒ qx1 sÈ0    Y  qx|S )Nc                 S   s   g | ]}t |d ƒ‘qS r   r   r   r   r   r   r   _   r   z#Scatter.forward.<locals>.<listcomp>r
   éÿÿÿÿc                 S   s   g | ]}t t |¡ƒ‘qS r   )Ú_get_streamÚtorchr   )r   r   r   r   r   r   e   r   )r<   r   r   r"   r#   rM   ÚacceleratorÚis_availabler   Zscatterr$   Zdevice_indexZcurrent_streamZwait_streamZrecord_stream)
r(   r   Zchunk_sizesr<   ÚinputZstreamsr*   r   r   Zmain_streamr   r   r   r+   ]   s    
*zScatter.forwardc                 G   s    d d d t j| j| jg|¢R Ž fS r   )r:   r-   r#   r<   )r(   rJ   r   r   r   r/   p   s    zScatter.backwardNr0   r   r   r   r   rI   \   s   
rI   Ú_streams©r   c                 C   sp   | j dkstj ¡ sdS tj ¡ j | j ks.J ‚tdu rFdgtj ¡  at| j du rft | j¡t| j< t| j S )zBGet a background stream for copying between CPU and target device.r
   N)	r   rM   rN   rO   Zcurrent_acceleratorrQ   Zdevice_countÚindexÚStreamrR   r   r   r   rL   y   s    rL   )rD   Ú	itertoolsr   Útypingr   rM   Ztorch._utilsr   Ztorch.autogradr   Ztorch.nn.parallelr   r   r,   r:   rI   rQ   ÚlistrT   Ú__annotations__r   rL   r   r   r   r   Ú<module>   s   
$