o
    wZh.                     @   sX   d dl Z d dlmZ d dlmZ G dd deZG dd deZG dd de jjZ	dS )	    N)Functionc                   @   $   e Zd Zedd Zedd ZdS )SyncBatchNormc
              
      sd  |j tjds|j tjds| }|d ur| }t| |d }
|
dkr4|	dk r4td|
 |j	d }| dkrdt
||\}}tjd| |d |j|jd}tj|||gdd ntjd| d |j|jd | d	kr  }tjd||	  j jd}tj| |d
d t||	|f tj |dd\}}}n& fddt|	D }tj| |d
d tj|dd tj |dd\}}}tj rtj s|ddk}|| }|| }|| }|d}|d ur|j|jkr||j}t||||||||\}}|  |||||tj! || _"| dkr-t#||||||S t$|S )NZmemory_format      zEExpected more than 1 value per channel when training, got input size r   )r   dtypedevicedimZglooFZasync_opc                    s   g | ]}t  qS  )torch
empty_like).0_combinedr   J/var/www/auris/lib/python3.10/site-packages/torch/nn/modules/_functions.py
<listcomp>Q       z)SyncBatchNorm.forward.<locals>.<listcomp>)%is_contiguousr   channels_lastchannels_last_3d
contiguousintnumelsize
ValueErrorshapeZbatch_norm_statsfullr	   r
   catzerosZ_get_backend_nameemptydistZall_gather_into_tensorZreshapesplitrangeZ
all_gatherstackcudaZis_availableZis_current_stream_capturingZsqueezeviewtoZ#batch_norm_gather_stats_with_countssave_for_backwardZint32process_groupZbatch_norm_elemtr   )selfinputweightZbiasZrunning_meanZrunning_varZepsZmomentumr.   Z
world_sizer   num_channelsmeaninvstdcountZcombined_sizeZcombined_flatZmean_allZ
invstd_allZ	count_allZcombined_listmaskcountsr   r   r   forward   s   

	

zSyncBatchNorm.forwardc                 C   s  |j tjds|j tjds| }| j\}}}}}d  } }}	| j}
| dkrt|||||| j	d | j	d | j	d \}}}}	| j	d r|j
d }tj||gdd}tjj|tjjj|
dd t||\}}|d urz|j|jkrz||j}t||||||||}|d u s| j	d sd }|d u s| j	d sd }	n#|j
d }| j	d rtjd| |j|jd}tjj|tjjj|
dd |||	d d d d d d f	S )	Nr   r   r   r   r   Fr   r   )r   r   r   r   r   saved_tensorsr.   r   Zbatch_norm_backward_reduceZneeds_input_gradr!   r#   distributedZ
all_reduceZReduceOpZSUMr'   r	   r,   Zbatch_norm_backward_elemtr$   r
   )r/   grad_outputZsaved_inputr1   r3   r4   Zcount_tensor
grad_inputZgrad_weightZ	grad_biasr.   Zsum_dyZ
sum_dy_xmur2   r   r   r   r   backward}   s~   




zSyncBatchNorm.backwardN__name__
__module____qualname__staticmethodr8   r=   r   r   r   r   r      s
    
tr   c                   @   s&   e Zd Zed	ddZedd ZdS )
CrossMapLRN2d-C6?      ?r   c                 C   s  || _ || _|| _|| _d | _| dkrtd|  d| jp%| | _| }| d}|| | j| |}t	j
|d|d t| j d d d }	t|	|}
| jdd}|  t|
D ]}||d| qdtd|D ]C}| jd|d }| jd|}|| |||	 d k r|d||	 d }|j|dd ||	kr|d||	 }|j|d	d qu| j| j| j  | j t	j
| j| j |d || | || |S )
N   z,CrossMapLRN2d: Expected input to be 4D, got z
D instead.r   r   outr   alphar   )r   rJ   betakscaler   r    new
resize_as_r   powr   minselectzero_r(   add_Zcopy_mul_r-   )ctxr0   r   rJ   rK   rL   outputchannelsZinput_squareZpre_padZpre_pad_cropZscale_firstcZscale_previousZscale_currentZsquare_nextZsquare_previousr   r   r   r8      sL   




zCrossMapLRN2d.forwardc                 C   s  | j \}}| }|d}|d}|d}|d}||| j d ||}	|||}
d| j | j | j }t| j| jd d  }|| tj| j	| j |d
| |	  |	d||}t|D ]T}tj|| || |d || j	|  tj|	dd| jd dd|
d t|D ](}|
|	|| j d   || | j|| | |
| d |
j|	| d	d
 qqh|d d d d fS )Nr   r   r      rG   F)ZkeepdimrH   )valuer   rI   )r9   rN   r   rJ   rK   r   rO   r   rP   rM   rU   rS   Znarrowr(   mulZdiv_sumrT   Zaddcmul_)rV   r;   r0   rW   r<   Z
batch_sizerX   Zinput_heightZinput_widthZpaddded_ratioZaccum_ratioZcache_ratio_valueZinversePrePadZpadded_ratio_centernrY   r   r   r   r=     s>   





zCrossMapLRN2d.backwardN)rD   rE   r   r>   r   r   r   r   rC      s
    6rC   c                   @   r   )BackwardHookFunctionc                 G   s   | j dd |D   |S )Nc                 S   s   g | ]}|j s|qS r   )Zrequires_grad)r   argr   r   r   r   6  r   z0BackwardHookFunction.forward.<locals>.<listcomp>)Zmark_non_differentiablerV   argsr   r   r   r8   4  s   zBackwardHookFunction.forwardc                 G   s   |S )Nr   ra   r   r   r   r=   9  s   zBackwardHookFunction.backwardNr>   r   r   r   r   r_   3  s
    
r_   )
r   Ztorch.distributedr:   r&   Ztorch.autograd.functionr   r   rC   Zautogradr_   r   r   r   r   <module>   s    M`