a
    h.                     @   sX   d dl Z d dlmZ d dlmZ G dd deZG dd deZG dd de jjZ	dS )	    N)Functionc                   @   s$   e Zd Zedd Zedd ZdS )SyncBatchNormc
              
      sr  |j tjds$|j tjds$| }|d ur4| }t| |d }
|
dkrh|	dk rhtd|
 |j	d }| dkrt
||\}}tjd| |d |j|jd}tj|||gdd ntjd| d |j|jd | d	krN  }tjd||	  j jd}tj| |d
d t||	|f tj |dd\}}}nL fddt|	D }tj| |d
d tj|dd tj |dd\}}}tj rtj s|ddk}|| }|| }|| }|d}|d ur|j|jkr||j}t||||||||\}}|  |||||tj! || _"| dkrdt#||||||S t$|S d S )NZmemory_format      zEExpected more than 1 value per channel when training, got input size r   )r   dtypedevicedimZglooFZasync_opc                    s   g | ]}t  qS  )torch
empty_like).0_combinedr   I/var/www/auris/lib/python3.9/site-packages/torch/nn/modules/_functions.py
<listcomp>Q       z)SyncBatchNorm.forward.<locals>.<listcomp>)%is_contiguousr   channels_lastchannels_last_3d
contiguousintnumelsize
ValueErrorshapeZbatch_norm_statsfullr   r	   catzerosZ_get_backend_nameemptydistZall_gather_into_tensorZreshapesplitrangeZ
all_gatherstackZcudaZis_availableZis_current_stream_capturingZsqueezeviewtoZ#batch_norm_gather_stats_with_countssave_for_backwardint32process_groupZbatch_norm_elemtr   )selfinputweightZbiasZrunning_meanZrunning_varZepsZmomentumr-   Z
world_sizer   num_channelsmeaninvstdcountZcombined_sizeZcombined_flatZmean_allZ
invstd_allZ	count_allZcombined_listmaskcountsr   r   r   forward   s    

	
zSyncBatchNorm.forwardc                 C   s  |j tjds$|j tjds$| }| j\}}}}}d  } }}	| j}
| dkrFt|||||| j	d | j	d | j	d \}}}}	| j	d r|j
d }tj||gdd}tjj|tjjj|
dd t||\}}|d ur|j|jkr||j}t||||||||}|d u s&| j	d s*d }|d u s@| j	d sd }	nH|j
d }| j	d rtjd| |j|jd}tjj|tjjj|
dd |||	d d d d d d f	S )	Nr   r   r   r   r
   Fr   r   )r   r   r   r   r   saved_tensorsr-   r   Zbatch_norm_backward_reduceZneeds_input_gradr    r"   distributedZ
all_reduceZReduceOpZSUMr&   r   r*   Zbatch_norm_backward_elemtr#   r	   )r.   grad_outputZsaved_inputr0   r2   r3   Zcount_tensor
grad_inputZgrad_weightZ	grad_biasr-   Zsum_dyZ
sum_dy_xmur1   r   r   r   r   backward}   s|    


zSyncBatchNorm.backwardN__name__
__module____qualname__staticmethodr7   r<   r   r   r   r   r      s   
tr   c                   @   s&   e Zd Zed	ddZedd ZdS )
CrossMapLRN2d-C6?      ?r   c                 C   s  || _ || _|| _|| _d | _| dkr>td|  d| jpJ| | _| }| d}|| | j| |}t	j
|d|d t| j d d d }	t|	|}
| jdd}|  t|
D ]}||d| qtd|D ]}| jd|d }| jd|}|| |||	 d k rL|d||	 d }|j|dd ||	kr|d||	 }|j|d	d q| j| j| j  | j t	j
| j| j |d || | || |S )
N   z,CrossMapLRN2d: Expected input to be 4D, got z
D instead.r   r   outr   alphar   )r   rI   betakscaler   r   new
resize_as_r   powr   minselectzero_r'   add_Zcopy_mul_r+   )ctxr/   r   rI   rJ   rK   outputchannelsZinput_squareZpre_padZpre_pad_cropZscale_firstcZscale_previousZscale_currentZsquare_nextZsquare_previousr   r   r   r7      sJ    




zCrossMapLRN2d.forwardc                 C   s  | j \}}| }|d}|d}|d}|d}||| j d ||}	|||}
d| j | j | j }t| j| jd d  }|| tj| j	| j |d
| |	  |	d||}t|D ]}tj|| || |d || j	|  tj|	dd| jd dd|
d t|D ]R}|
|	|| j d   || | j|| | |
| d |
j|	| d	d
 q&q|d d d d fS )Nr   r   r      rF   F)ZkeepdimrG   )valuer   rH   )r8   rM   r   rI   rJ   r   rN   r   rO   rL   rT   rR   Znarrowr'   mulZdiv_sumrS   Zaddcmul_)rU   r:   r/   rV   r;   Z
batch_sizerW   Zinput_heightZinput_widthZpaddded_ratioZaccum_ratioZcache_ratio_valueZinversePrePadZpadded_ratio_centernrX   r   r   r   r<     s<    





zCrossMapLRN2d.backwardN)rC   rD   r   r=   r   r   r   r   rB      s   6rB   c                   @   s$   e Zd Zedd Zedd ZdS )BackwardHookFunctionc                 G   s   | j dd |D   |S )Nc                 S   s   g | ]}|j s|qS r   )Zrequires_grad)r   argr   r   r   r   6  r   z0BackwardHookFunction.forward.<locals>.<listcomp>)Zmark_non_differentiablerU   argsr   r   r   r7   4  s    zBackwardHookFunction.forwardc                 G   s   |S )Nr   r`   r   r   r   r<   9  s    zBackwardHookFunction.backwardNr=   r   r   r   r   r^   3  s   
r^   )
r   Ztorch.distributedr9   r%   Ztorch.autograd.functionr   r   rB   Zautogradr^   r   r   r   r   <module>   s    M`