a
    h*                    @   s,  U d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl m	Z	m
Z
 d dlmZmZ d dlmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlm  mZ d dlmZm Z m!Z! d dl"m#Z# d d	l$m%Z% d d
lm&Z&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z2 d dl3m4Z4 ej5j6Z6g Z7e8e9 e:d< ej;j<j=Z=G dd deZ>dneej?e@e@dddZAe	eAej?jBddZCe	eAej?jBdZDe	eAej?jBddZEe	eAej?jFdZGe!eHe!dddZIe#e=jJe/deDe!e!dddZJe#e=jKe/deDe!e!dd d!ZKe#e=jLe/deDe!e!eMeMd"d#d$ZLe#e=jNe/deDe!eMeMeMe@e!d%d&d'ZNe#e=jOjPgd(d) ZQe#e=jOj!ge!d*d+d,ZRe#e=jSe/ eDe!e!d-d.d/ZSe#e=jTe/deDe!e!d0d1d2ZTe#e=jUe/de!e!eMeMd3d4d5ZUe#e=jVe/ eDe!e!d-d6d7ZVe#e=jWe/ eDe!e!e!d8d9d:ZWe#e=jXe/de!e!eMd;d<d=ZXe#e=jYe/deDe!e!eMe@d>d?d@ZYe#e=jZe/deDdoe!e!e9dBdCdDZZe#e=j[eDe!e!dEdFdGZ[e#e=j\e/ eDe!e!d-dHdIZ\e#e=j]e/deDe!e!e!d8dJdKZ]e#e=j^e!e!e!dLdMdNZ^e#e=j_e!e!e!e`e!e!f dOdPdQZ_e#e=jae/ eDe!e!e!eMeMe@e@e!dRdSdTZae#e=jbe/deDe!e!e!e!dUdVdWZbe!eHdXdYdZZcejdd[d\d]Zee#e=jfe/ eDe>jgjhfe!e!eHe!d^d_d`Zfe#e=jie/deDe!e!e!eHdadbdcZie#e=jjdpdddeZke#e=jle/ eDe>jgjhdffe!e!eHeMdgdhdiZle#e=jmjneDe!e!e!eHeMdjdkdlZme#e=jmjoeDe!e!e!eHeMe!dmdndoZpe#e=jqjneDe!e!e!eHeMdpdqdrZqe#e=jqjreDe!e!e!eHeMe!dsdtduZse!e!e!ee! eHeHe!e!dvdwdxZte#e=jue/deDe!e!eHe!dydzd{Zue#e=jve/de!e!e!ee! eHeHe!e!dvd|d}Zve#e=jwe/de!e!e!ee! eHeHe!e!dvd~dZwe#e=jxe/ eDde>jgjhfe!e!ee! eHe!dddZxe#e=jye/deDde>jgjhfe!e!e!ee! eHe!dddZye#e=jze/ eDe>jgjhfe!e!eHe!dddZze#e=j{e/deDe>jgjhfe!e!e!eHe!dddZ{e#e=j|e/ dqe!e!eMdddZ|e#e=j}e/ e!e!e!dddZ}e#e=j~e/ e!e8eH eHeHeHeHdddZ~e#e=jj!dre!eHeeH eeH eHdddZe!eHeeH eeH e`eHeHf dddZe#e=je/ dse!e!eHeeH eeH eHdddZe#e=je/ e!e8eH eHeHdddZe#e=je/ e!e8eH eHeHeHdddZe!e!ejddddZe#e=je/deCe!e!eHejddddZe#e=je/ eCe!e!eHejddddZdd Ze#e=je/ e!e8eH e8eH e8eH e8eH e!dddZe#e=je/ eDe!e8eH e8eH e8eH e8eH e8eH e!dddZe#e=je/ e!e!eMdddZe#e=je/ e!e8eH eHeHeHe!dddZe#e=jjneDdte!e!eeM e!dddZe#e=je=jjne6je=jjne6je!eMee@ dddZe#e=je/ddÃe!eMee@ dddńZe#e=je/ e!eHe@dƜddȄZe#e=je/ddɍe!eHe@dƜdd˄Ze#e=je/ due!e!eHe@e@e!d͜ddτZe#e=je/ e!e!eHeHe@dМdd҄Ze8eH dӜddՄZe8e! eHeHe8e! d֜dd؄Ze8e! dٜddۄZe8e! eHdܜddބZe8e! eHeHdߜddZe#e=jjne=jjrgdve8e! eHeHee! e!dddZe#e=jjne=jjrgdwe!e8eH eHee8e!  ee8e!  dddZe#e=jj!dxe!eHeHe`e!df dddZe#e=jjndye!e8eH eHe`e!df dddZe#e=jj!dze!eHeHe`e!df dddZe=jje6jd{e!e!eHe`e!df dddZe#e=je/ddɍeDd|e!e!e!eHeHdddZe#e=je/ eDd}e!e!e!eHeHe@dddZe#e=je/ddɍeDd~e!e!e!eHeHdddZe#e=jjneDe!e!e!e!ee! eHeHeHeHe8e@ e`ee! ee! ee! f ddd Ze#e=jjre!e!e!e!ee! eHeHeHeHe8e@ ej!ej!ej!e`ee! ee! ee! f dddZee! ee! dddZe#e=jjne!e!e8eH e!e!ee! ee! e8e@ e`ee! ee! ee! f d	dd	Ze#e=jjre!e!e8eH e!e!ee! ee! e8e@ ej!ej!ej!e`ee! ee! ee! f d
ddZe!ee! ee! ee! ee! e@eMeMe@e`e!e!e!ee! ee! f d
ddZe#e=je/ddde!ee! ee! ee! ee! e@eMeMe`e!e!e!f d	ddZe=jjne6je=jjne6je!ee! ee! ee! ee! e@eMeMe`e!e!e!f d	ddZe=jjne6jde8e! dddZe#e=jjne!ee! ee! e!e!eMeMe`e!e!e!f dddZe#e=jjne!ee! ee! e!e!e@eMeMe`e!e!e!f d	ddZe#e=jje!ee! ee! e@eMeMe`e!e!e!f d d!d"Ze#e=jjne!ee! ee! e!e!e@eMeMe`e!e!e!e!e!f d	d#d$Ze!ee! ee! e!e!eMe@e!d%d&d'Ze#e=jjne!ee! ee! e!e!eMeMe`e!e!e!e!f dd(d)Ze#e=jjne!ee! ee! e!e!eMeMe`e!e!e!e!e!e!f dd*d+Ze#e=jjne!ee! ee! e!e!eMeMe`e!e!e!e!f dd,d-Ze#e=je/ddÃeDdd.d/Ze#e=je/ ddddddd0ee!e'f eejd eej e@e@eej d1d2d3Ze#e=je=je=jge/ d4d5 Ze=jjne6je#e=jÃe/ddÐd6d7e!e!ee! ee! ee! e@eMeMd8d9d:ZÐd;d< Ze#e=jjne!e!ee! ee! ee! ee! ee! e@eMe8e@ e!e`e!ee! ee! f d=d>d?Ze#e=jjne!e!ee! ee! ee! ee! ee! e@eMe8e@ e`e!ee! ee! f d@dAdBZe#e=jjre!e!ee! ee! ee! ee! ee! e@eMe8e@ ej!ej!ej!e`e!ee! ee! f dCdDdEZe#e=jȃe/ddÐd6e!e!e!ee! ee! ee! ee! eMdFdGdHZe#e=jɃe/ddÐd6e!e!e!ee! ee! ee! ee! eMe!dI	dJdKZe#e=jʃe/ eDe!e`eHeHf dLdMdNZe)e)e8eH eHdOdPdQZe#e=j̓e/ e)e)e8eH dRdSdTZe#e=j΃e/ e)e)e8eH e8eH e8eH dUdVdWZe#e=jσddXe)eHe)e)e'dYdZd[Ze#e=jЃe/ ddXe)eHe)e)e'dYd\d]ZddXe)eHe)e)e@e'd^d_d`Ze#e=jjne=jjne6jddbdcZe#e=jӃe)eHe)e)dddedfZe#e=jԃe/ e)eHe)e)dddgdhZe)eHe)e)e@didjdkZe#e=jփe/dldmeDe!e`e!e!f d-dndoZe#e=j׃e/ de!ee@eHeMf ee@eHeMf eej dpdqdrZe#e=jكddsdtZِdudv Zڐdwdx Ze#e=jj݃e#e=jj݃e#e=jj݃e=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6je!ee8eH  ee8eM  e!dydzd{Ze#e=jj݃e#e=jj݃e#e=jj݃e=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6je!ee8eH  ee8eM  e!dyd|d}Zdd~dZe#e=jjne=jjrge=jjne6je=jjne6je/dddde!e8eH eeM e!dddZe#e=jjne=jjrge=jjne6je=jjne6je/dddde!e8eH eeM e!dddZe#e=jjne=jjrge=jjne6je=jjne6je/dddde!e8eH eeM eeM e!dddZe#e=jjne=jjrge=jjne6je=jjne6je/dddde!e8eH eeM eeM e!dddZe#e=jjne=jjrge=jjne6je=jjne6je/dddde!e8eH eeM eeM eeM e!dddZe#e=jjne=jjrge=jjne6je=jjne6je/dddde!e8eH eeM eeM eeM e!dddZeDde!e8eH e8eeM  e@e!dddZdd Zdd Zdd Zdd ZdddZdd Zdd ZdddZdddZdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdd Zdd ZdddZdddZdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdd Z dd Zdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdd Ze#e=jj݃e=jjݠe6je=jjݠe6jdÐdĄ Ze#e=jj݃e=jjݠe6je=jjݠe6jdŐdƄ Z	e#e=j
j݃e#e=jj݃e=jjݠe6je=jjݠe6je=j
jݠe6je=j
jݠe6je=jjݠe6je=jjݠe6jdǐdȄ Ze#e=jjne=jjrge/ de!e8eH e@eeM e!dɜdʐd˄Ze#e=j
jne=j
jrge=j
jne6je/ de!e8eH e@eeM eeM e!d̜d͐d΄Z
e#e=jjne=jjrge/ de!e8eH e@eeM eeM eeM e!dϜdАdфZddҐdӄZdԐdՄ Zee! ee! e!e!d֜dאd؄Ze*e!dٜdڐdۄZeDe!e8eH e@e8eeM  e!dܜdݐdބZe#e=jjne!e!e@dߜddZe#e=je=jge/ dd Ze#e=jgdd Ze#e=jgdddZe#e=jgdd Ze#e=jgdd Ze!e!ee! eHeHe`e!e!f dddZe#e=je/dlde!e!ee! eHeHe`e!e!f dddZe#e=je/dlde!e!ee! eHeHe`e!e!f dddZe!eMe!dddZe!eMe!dddZe!e*dddZe*e!e!dddZ ee! e!dd dZ!eHe@ejdejdddZ"e!eHeHe@dddZ#e!eHeHeHe@dd	d
Z$e!e8eH e@dddZ%e!e8eH e@dddZ&e#e=j'e/ eDe!e8eH e@dddZ'de!e!eHeHe@e@e!dddZ(e#e=j)e/ eDde!e!eHeHe@e!dddZ)e#e=j*e/ddɍeDdd Z*e#e=j+e/ dde>jgjhfddZ+ej!ej!e@e@dddZ,e=j-jne6je=j-jre6je/dddd d!d"Z-e#e=j.jne=j.jrge=j.jne6je/ eDde!e`eHeHf e@eeM eeM e!d#d$d%Z/e#e=j.j݃e=j.jݠe6je=j.jݠe6je/ eDde!ee`eHeHf  e@ee`eMeMf  e!d&d'd(Z0e#e=j1e#e=j2e#e=j3eDe/ e!e`eHdf e!d)d*d+Z4e#e=j5e#e=j6e#e=j7eDe/ e!e`eHdf e!d)d,d-Z8e!e`eHdf eeHeHeHge!f e!d.d/d0Z9e#e=j:e#e=j;e#e=j<e/dd1d2 Z=e#e=j>e/d3d4ddd5d6d7Z>e#e=j?e/ ddd[d8d9Z?e#e=j@jne=j@jrge/ dejAddd:e'eejd ejBeej e@d;d<d=ZCe#e=j@jDgdejAddd:e'e'eejd ejBeej e@d>d?d@ZEe#e%dAdB ZFe#e=jGe=jGjne6je/ ddde>jgjhfe!e!e'e'ee! eHe!dCdDdEZGe#e=jHe=jHjne6je/dldFe!e!eHe`e!e!f ddGdHZHe#e=jIjnddddIe!e!e!eMe@ee! eeM e`e!e!f dJdKdLZJdMdN ZKe#e=jLge/ddɍeDddOdPZLe#e=jMe/ dQdR ZMe#e=jNdSdT ZNe#e=jOjne=jOjrgdddUe!eejd ee! e!dVdWdXZPe#e=jQjne=jQjRgde!eeH dYdZd[ZSe#ej<j=jTdd\d]ZTe#e=jUe/ ddd^d_d`ZUe#e=jVjnddaej!eej ej!dbdcddZVddedfdgZWddd^dhdiZXe#e=jYe/ djdk ZYe#e=jZddldmZZeKe=j[e=j\ eKe=j]e=j eKe=j^e=j eKe=j_e=jL eKe=j`e=jO eKe=jae=jb eKe=jce=jV eKe=jde=je eKe=jfe=jS eKe=jge=jh eKe=jie=jj eKe=jke=jl eKe=jme=jn eKe=joe=jp eKe=jqe=jr eKe=jse=jt eKe=jue=jv eKe=jwe=jx eKe=jye=jz eKe=j{e=j| eKe=j}e=j~ eKe=je=j eKe=je=j eKe=je=j eKe=je=j\ dS (      N)Iterable)Enum)partialreduce)chainproduct)AnyCallablecastOptionalUnion)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberTypesuggest_memory_format
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r(   r(   J/var/www/auris/lib/python3.9/site-packages/torch/_decomp/decompositions.pyr   0   s   r   F)ftype_promotioncompute_dtype_onlyinclude_non_tensor_argsc                    s    t  fdd}|S )Nc                     s   rt tjjfnt f  fddtj| i |D }tj|di\fdd}fdd}t|| i t||}r|S t||S d S )Nc                    s   g | ]}t | r|qS r(   )
isinstance.0x)allowed_typesr(   r)   
<listcomp>D   s   
z-type_casts.<locals>.inner.<locals>.<listcomp>type_promotion_kindc                    s   t | tr|  S | S d S Nr.   r   tor1   computation_dtyper(   r)   increase_precN   s    

z0type_casts.<locals>.inner.<locals>.increase_precc                    s   t | tr|  S | S d S r5   r6   r8   )result_dtyper(   r)   decrease_precT   s    

z0type_casts.<locals>.inner.<locals>.decrease_prec)	r   torchtypesZ_NumberpytreeZarg_tree_leavesutilselementwise_dtypesr   )argskwargsZ	flat_argsr;   r=   rr,   r*   r-   r+   )r2   r:   r<   r)   inner?   s     

ztype_casts.<locals>.inner)	functoolswraps)r*   r+   r,   r-   rG   r(   rF   r)   
type_casts9   s     rJ   T)r+   r,   )r+   )r+   r-   )r1   dimreturnc                 C   s$   t ||   D ]}| d} q| S N)rangerK   	unsqueeze)r1   rK   _r(   r(   r)   _unsqueeze_to_dimv   s    rR   
grad_inputout_gradyc                 C   s   | d||     S Nr    Zconj_physicalrT   r(   r(   r)   tanh_backward|   s    rY   c                 C   s   | |d|     S rW   rX   rT   r(   r(   r)   sigmoid_backward   s    rZ   )rU   r1   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)expr>   where)rU   r1   r[   r\   zr(   r(   r)   softplus_backward   s    rb   )grad_outputalphascaleinput_scale	is_resultself_or_resultc           	      C   sf   || }|}|}|r6t |dk| | ||  | | S t |dk| | | t ||  | | S d S Nr   )r>   r`   r_   )	rc   rd   re   rf   rg   rh   ZnegcoefZposcoefZ
negiptcoefr(   r(   r)   elu_backward   s    rj   c                 C   s   t | |S r5   )r>   Z	full_likeselfvaluer(   r(   r)   fill_scalar   s    rn   rm   c                    s(   t   dk fdd t|  S )Nr   c                      s   d    dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrK   r(   ro   r(   r)   <lambda>       zfill_tensor.<locals>.<lambda>)r>   _checkrK   atencopyrk   r(   ro   r)   fill_tensor   s
    

rv   )rl   rL   c                 C   s    t jt j| d ddddd S N   r   min   maxr>   clamprl   r(   r(   r)   hardsigmoid   s    r   rc   rl   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        r>   r`   r   r(   r(   r)   hardsigmoid_backward   s
    r   rc   rl   Zmin_valZmax_valc                 C   s   t ||k||kB d| S )Nr   r   r   r(   r(   r)   hardtanh_backward   s    r   c                 C   s$   | t jt j| d dddd d S rw   r~   r   r(   r(   r)   	hardswish   s    r   )rc   rl   rL   c              
   C   s,   t |dkdt |dk | |d d  | S )Nr   rx         ?r   r   r(   r(   r)   hardswish_backward   s
    r   rc   rl   r\   c                 C   s   t ||kd| S ri   r   r   r(   r(   r)   threshold_backward   s    r   rc   rl   negative_slopeself_is_resultc                 C   s   t |dk| | | S ri   r   r   r(   r(   r)   leaky_relu_backward   s    r   none)gradrl   approximatec                 C   s   d}d}d}|dkr|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S d S )
Ng;f?g;f?gmBP?tanhr   gHm?r    rx   g      )r>   r   erfr_   )r   rl   r   ZM_SQRT2Z	M_SQRT1_2Z
M_2_SQRTPIZkBetaZkKappaZx_sqZx_cuberG   Z
tanh_innerleftrightZleft_derivativeZtanh_derivativeZinner_derivativeZright_derivativeZkAlphaZcdfZpdfr(   r(   r)   gelu_backward   s,    
r   )rc   inputc                 C   s:   t t|}t |}|| d||   }| ||  S rW   )r>   r   FZsoftplussigmoid)rc   r   Zinput_tanh_softplusZinput_sigmoidoutr(   r(   r)   mish_backward  s    
r   c                 C   s   | t |  S r5   )r>   r   r   r(   r(   r)   silu!  s    r   c                 C   s,   ddt |   }| | d|d|    S rW   )r>   r_   )rc   rl   r   r(   r(   r)   silu_backward(  s    r   )rl   weightrL   c                 C   s   t | dk| ||  S ri   r   )rl   r   r(   r(   r)   _prelu_kernel0  s    r   )rc   rl   r   rL   c                 C   s4   t |dk| ||  }t |dkd||  }||fS )Nr   r   r   )rc   rl   r   Z
input_gradZweight_gradr(   r(   r)   _prelu_kernel_backward5  s    r   )rc   rl   noiseloweruppertrainingr   rL   c                 C   s:   |r|| dkr|  |S || d }t| |||S d S )Ngư>r!   )mulrt   r   )rc   rl   r   r   r   r   r   r   r(   r(   r)   rrelu_with_noise_backward@  s    
r   )rc   rl   bufferrL   c                 C   sN   |dk }t |dd}t |dd}t t | }| |||d|     S )Nr   r    rN   )r>   r`   r_   abs)rc   rl   r   Zin_negativeZ	max_derivsignra   r(   r(   r)   log_sigmoid_backwardU  s
    r   loss	reductionc                 C   s4   |t jjkrt| S |t jjkr,t| S | S d S r5   )r   r&   rm   r>   meanr'   sumr   r(   r(   r)   apply_loss_reductionb  s
    

r   dtypec                 C   s4   | t jkrt jS | t jkr t jS | t jkr0t jS d S r5   )r>   Z	complex32float16Z	complex64float32Z
complex128float64r   r(   r(   r)   to_real_dtypek  s    


r   )rl   targetr   rL   c                 C   s   | | d }t ||S )Nr!   )r   )rl   r   r   r   r(   r(   r)   mse_lossz  s    r   )rc   r   r   r   c                 C   s,   |t jjkrd|  nd}|||  |  S )N       @)r   r&   rm   numel)rc   r   r   r   normr(   r(   r)   mse_loss_backward  s    r   c                 C   sF   t j| ||d}| td}t j||dd}t |}t |||S )N)rK   r   z-infTrK   keepdim)r>   Zsoftmaxeqfloatall
zeros_liker`   )rl   rK   r   r   ZmaskedZmasked_rowszerosr(   r(   r)   safe_softmax  s
    
r   r^   )rl   r   r   r[   c                 C   s<   | |   }t||k d|d  | |d|  }t||S )Nr   r!   )r   r>   r`   r   )rl   r   r   r[   r   r(   r(   r)   smooth_l1_loss  s    	&r   )rc   rl   r   r   r[   c           	      C   sZ   |t jjkrd|  nd}|| }t|}||  }t||k || | |t| S r]   )r   r&   rm   r   r>   r   r`   r   )	rc   rl   r   r   r[   r   r1   Zabs_xZ	norm_gradr(   r(   r)   smooth_l1_loss_backward  s    

r   )rc   rl   r   r   r[   rS   c                 C   s*   t | ||||}t||j t||ddS NTZ	copy_fromZcopy_toexact_dtype)r   r   shaper   )rc   rl   r   r   r[   rS   resultr(   r(   r)   smooth_l1_loss_backward_out  s    
r   )rc   rl   r   r   deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S r]   )r   r&   rm   r   r>   r`   )rc   rl   r   r   r   r   r1   r(   r(   r)   huber_loss_backward  s     r   )rc   rl   r   r   r   rS   c                 C   s*   t | ||||}t||j t||ddS r   )r   r   r   r   )rc   rl   r   r   r   rS   r   r(   r(   r)   huber_loss_backward_out  s    
r   )rc   rl   r   r   r   ignore_indextotal_weightrL   c                 C   s   |  dk rdnd}|tjjkr(| | } ||}t||k|d}t|}	t|	||d}	|	  |     krzdkrn n
| |} |d urdd t	|  D }
|j
d |
|< ||
}| | } t||k| d} |	|  S )Nr!   r   r    g      c                 S   s   g | ]}d qS r    r(   r0   rQ   r(   r(   r)   r3     rr   z&_nll_loss_backward.<locals>.<listcomp>)rK   r   r&   rm   rP   r>   r`   r   scatterrO   r   reshape)rc   rl   r   r   r   r   r   channel_dimsafe_targetrS   Z	new_shaper(   r(   r)   _nll_loss_backward  s     	

 

r   )rc   rl   rK   rL   c           
      C   s   |  dksJ dt|  |}||}|d dksNJ d| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr!   z.Halving dimension must be even, but dimension z	 is size r^   rp   )rK   rA   canonicalize_dimsizenarrowr>   r   cat)
rc   rl   rK   Zwrap_dimZnInZ	inputSizeZ	firstHalfZ
secondHalfZgradInputFirstHalfZgradInputSecondHalfr(   r(   r)   glu_backward  s    

r   c                 C   sx  d|    krdks"n J d|  dks6J d|  dkoL|  dk}|s|jd |jd ksJ d|j d|j d| dksJ d	|j d
|  df|d u s| |jd ksJ d|tjjkr8|  dkr8|   dkr| jd |jd ksdJ d|jd  d|    d| jd  n,|   dkrT|  dksdJ d| j t| ||||||S )Nr   r!   input tensor should be 1D or 2Dr    ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rN   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rK   r   r   r   r%   rm   r   )rc   rl   r   r   r   r   r   no_batch_dimr(   r(   r)   nll_loss_backward  s:    "&
r   c                 C   s   |  dksJ d|   |  dks<J d|   |jd |jd krx|jd |jd krx|jd |jd ksJ d|j d	|j | dksJ d
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: rx   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r!   r    r   r   r   z ( z, elements))rK   r   r   r   )rc   rl   r   r   r   r   r   r(   r(   r)   nll_loss2d_backwardC  s0    r   )rl   r   r   r   rL   c              	   C   s\   |d t t |  | dd |t t | | dd  }|d urR|| }t||S )Nr    r(   i)r>   maximumlog1pnew_fulllogr   )rl   r   r   r   r   r(   r(   r)   binary_cross_entropyf  s    
r   )rc   rl   r   r   r   rL   c                 C   sR   d}| ||  t j|d|  |d }|d ur6|| }|tjjkrN||  }|S )Ng-q=r    ry   )r>   r   r   r&   rm   r   )rc   rl   r   r   r   ZEPSILONr   r(   r(   r)   binary_cross_entropy_backward|  s    
"r   )r   r   r   rL   c                 C   s    t t |  | }t||S r5   )r>   r   r_   r   )r   r   r   r   r(   r(   r)   soft_margin_loss  s    r   )rc   rl   r   r   rL   c                 C   s6   ||  t || d  }|tjjkr2||  }|S rW   )r>   r   r   r&   rm   r   )rc   rl   r   r   rS   r(   r(   r)   soft_margin_loss_backward  s    	r   r!   r   otherpc                 C   s   t j| | |dS )N)r   )rt   r   r   r(   r(   r)   dist  s    r   )x1x2rL   c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr!   rN   Tmemory_formatr   )powr   r>   	ones_likecontiguous_formatr   r   matmulmT	clamp_minsqrt)	r   r   Zx1_normZx1_padZx2_normZx2_padZx1_Zx2_r   r(   r(   r)   _euclidean_dist  s    r  )rc   input_sizesrK   startendstepc                 C   s   |  |}t|| ||||S r5   )	new_zerosr>   slice_scatter)rc   r  rK   r  r  r  rS   r(   r(   r)   slice_backward  s    

r
  r    )rl   rK   r  r  r  c                 C   s  ddl m}m} |  }|dkr(tdt|  |}t|  }t| 	 }	|dkr`td|d url|nd}
|d ur||nt
j}||
dk r|
|| 7 }
||dk r||| 7 }||
dk rd}
n||
|| kr|| }
||t
jkr|| }n.|||
k r|
}n|||| kr"|| }|  |
|	|   }||
 }|| d | ||< |	|  |9  < | jrttdn| ||	|S d S )Nr   )guard_size_obliviousstatically_known_truez,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver    z<Slice decomposition for quantized tensors aren't implemented)%torch.fx.experimental.symbolic_shapesr  r  rK   RuntimeErrorrA   r   listr   stridesysmaxsizestorage_offsetZis_quantizedNotImplementedErrorZ
as_strided)rl   rK   r  r  r  r  r  ndimsizesstridesZ	start_valZend_valr  lenr(   r(   r)   slice_forward  sD    	
r  )r1   rK   r  r  rL   c                    s@   | j |  td fdd}||d d}|||  }||fS )zn
    Normalize start and end such that both are in the range
    [0, x.get_size()[dim]] and start <= end.
    rL   c                    s,   | d u r|S | dk r|   } t t| ||S ri   rz   r}   )valr   r   defaultdim_sizer(   r)   
clamp_wrap  s
    z(_normalize_start_end.<locals>.clamp_wrapr   )r   int)r1   rK   r  r  r   r(   r  r)   _normalize_start_end  s
    
r"  )r   srcrK   r  r  r  c              	   C   sD  t | j|}| j| }t| |||\}}t| j}|| |d  | ||< ||}|dkrv||krv|dkrv| S d g|   }t	j
|| jd}	|	| | ||< t	j|| jt	jd}
|dkrt	|
|	|k}
||krt	|
|	|k }
|dkr
t	|
|	| | dk}
dg|   }d||< |
|}
t|
t||
|d| S )Nr    r   devicer%  r   rN   )rA   r   r  r   r"  r  expandclonerK   r>   aranger%  onesboollogical_andviewrt   r`   _unsafe_masked_index)r   r#  rK   r  r  r  r  Zsrc_sizeindicesidxmaskZ
mask_shaper(   r(   r)   r	    s,    





r	  )rc   r  rK   indexc                 C   s   |  |}t|| ||S r5   )r  r>   select_scatter)rc   r  rK   r2  rS   r(   r(   r)   select_backwardE  s    
r4  )rc   r  offsetdim1dim2c                 C   s   |  |}t|| |||S r5   )r  r>   Zdiagonal_scatter)rc   r  r5  r6  r7  rS   r(   r(   r)   diagonal_backwardL  s    
r8  rc   rS   input_dtypec                 C   s   | j |kr||}|S r5   )r   r7   r9  r(   r(   r)   _cast_grad_to_input_dtypeU  s    

r;  )rc   outputrK   r:  c                 C   s0   | | }||t j||dd  }t| || S NTr   )r>   r   r;  
contiguous)rc   r<  rK   r:  Znew_grad_outputrS   r(   r(   r)   _softmax_backward_data]  s
    
r?  c                 C   s*   | t |t j| |dd  }t| ||S r=  )r>   r_   r   r;  )rc   r<  rK   r:  rS   r(   r(   r)   _log_softmax_backward_datao  s    
r@  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr!   r    r   r%  r   rN   )r   r>   r)  int64rP   )
Zinput_dZkernel_dZ
dilation_dZ	padding_dZstride_dr%  Zblocks_dZ	arange_kwZblocks_d_indicesZkernel_gridr(   r(   r)    _im2col_col2im_indices_along_dim{  s
    rC  )r   kernel_sizedilationpaddingr  rL   c              	      s(  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dv otdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s@| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s$|d}|S ) Nr!   c                   S   s   dS )Nz"im2col(): only 2D kernel supportedr(   r(   r(   r(   r)   rq     rr   zim2col.<locals>.<lambda>c                   S   s   dS )Nz$im2col(): only 2D dilation supportedr(   r(   r(   r(   r)   rq     rr   c                   S   s   dS )Nz#im2col(): only 2D padding supportedr(   r(   r(   r(   r)   rq     rr   c                   S   s   dS )Nz"im2col(): only 2D stride supportedr(   r(   r(   r(   r)   rq     rr   Tc                 S   s<   |rt dd | D nt dd | D }t|dd  d S )Nc                 s   s   | ]}|d kV  qdS r   Nr(   r0   r   r(   r(   r)   	<genexpr>  rr   z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s   s   | ]}|d kV  qdS rG  r(   rH  r(   r(   r)   rI    rr   c                   S   s   dS )Nz<{param_name} should be greater {'than' zero, but got {param}r(   r(   r(   r(   r)   rq     rr   z0im2col.<locals>.check_positive.<locals>.<lambda>r   r>   rs   param
param_namestrictcondr(   r(   r)   check_positive  s    (zim2col.<locals>.check_positiverD  rE  rF  FrN  r  rx   r   c                 s   s   | ]}|d kV  qdS rG  r(   r0   dr(   r(   r)   rI    rr   zim2col.<locals>.<genexpr>r   c                      s   dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler(   r   r(   r)   rq     s   c                 s   s<   | ]4\}}}}}d |d|  ||d    d  |  V  qdS )r    r!   Nr(   r0   r   padZdilZkerstr(   r(   r)   rI    s   r   c                 s   s   | ]}|d kV  qdS rG  r(   )r0   cr(   r(   r)   rI    rr   c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spacial size r   , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.rU  r(   rE  rD  output_sizerF  r   r  r(   r)   rq     s   r   r   rN   r    rx      )T)r>   rs   r  r   r   rV  ziprP   rC  r%  r   rY  permuter   r   squeeze)r   rD  rE  rF  r  rP  r  batched_inputZ	batch_dimr   Zinput_hZinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wZblocks_row_indicesZblocks_col_indicesZpadded_inputr<  Znum_blocks_rowZnum_blocks_colr(   r`  r)   im2col  sd    	



 




ro  )r   ra  rD  rE  rF  r  rL   c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dv otdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tj||| dd#}t|| | | | f}|
s|d}|S )%Nr!   c                   S   s   dS )Nzonly 2D output_size supportedr(   r(   r(   r(   r)   rq     rr   zcol2im.<locals>.<lambda>c                   S   s   dS )Nzonly 2D kernel supportedr(   r(   r(   r(   r)   rq     rr   c                   S   s   dS )Nzonly 2D dilation supportedr(   r(   r(   r(   r)   rq     rr   c                   S   s   dS )Nzonly 2D padding supportedr(   r(   r(   r(   r)   rq     rr   c                   S   s   dS )Nzonly 2D stride supportedr(   r(   r(   r(   r)   rq     rr   Tc                 S   s<   |rt dd | D nt dd | D }t|dd  d S )Nc                 s   s   | ]}|d kV  qdS rG  r(   rH  r(   r(   r)   rI    rr   z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   s   | ]}|d kV  qdS rG  r(   rH  r(   r(   r)   rI    rr   c                   S   s   dS )Nz9{param_name} should be greater than zero, but got {param}r(   r(   r(   r(   r)   rq     rr   z0col2im.<locals>.check_positive.<locals>.<lambda>rJ  rK  r(   r(   r)   rP    s    (zcol2im.<locals>.check_positiverD  rE  rF  FrQ  r  ra  )r!   rx   c                 s   s   | ]}|d kV  qdS rG  r(   rS  r(   r(   r)   rI     rr   zcol2im.<locals>.<genexpr>r   c                      s   dt   S )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: rU  r(   rW  r(   r)   rq     s   r   r    c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r   z and kernel_size=r(   r(   )rD  r   r(   r)   rq     s   c                 S   s:   g | ]2\}}}}}d |d|  ||d    d  |  qS r    r!   r(   rX  r(   r(   r)   r3     s   zcol2im.<locals>.<listcomp>rN   c                      s4   d d d d d d  dd  d	S 
NzGiven output_size=r\  r]  r^  r_  z , expected input.size(-1) to be 	 but got rN   .r(   r(   LrE  rD  ra  rF  r   r  r(   r)   rq     s   c                      s4   d d d d d d  dd  d	S rq  r(   r(   rt  r(   r)   rq     s   rx   r   rb  c                 S   s   g | ]\}}|d |  qS r!   r(   )r0   or   r(   r(   r)   r3   6  rr   
accumulate)T)r>   rs   r  r   r   rc  rP   r   rd  rC  r%  rR   r  prodrt   _unsafe_index_putr   rY  re  )r   ra  rD  rE  rF  r  rP  r  Zprod_kernel_sizecolrf  Zout_hZout_wrg  rh  ri  rj  rk  rl  rm  rn  Zindices_rowZindices_colZoutput_padded_sizer<  r0  r(   rt  r)   col2im  s    




 



"

r}  )rc   r1  re   c                 C   s$   | | | |  jt| d}|S Nr   )type_asr(  rA   r   )rc   r1  re   rE   r(   r(   r)   native_dropout_backwardC  s    	r  )r   
input_size	dimensionr   r  rL   c           	      C   s   t |dkrt| dS tt ||}tj|| | jtjd}|d||	 }| 
d|d 	||d } | |}d| |f }tj||| dd S )Nr   r&  rN   r    r5   Trx  )r  r>   Zsqueeze_copyrA   r   r)  r%  int32ZunfoldflattenZmovedimr  rt   r{  r>  )	r   r  r  r   r  rK   r0  rS   r2  r(   r(   r)   unfold_backwardR  s    
r  )rc   rl   epsrL   c              	   C   sx   |d ur>|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS d S )Nr^   r   r(   nan)r>   r`   r,  r   r   )rc   rl   r  lohir(   r(   r)   logit_backwarde  s    r  r   r   trainc                 C   s*   |r|dkrt | ||d S |  S d S ri   )rt   native_dropoutr(  r  r(   r(   r)   dropoutz  s    r  out0out1c                 C   s   |rl|dkrl|dkr.t | t j| t jdfS | jjs>tdt | |k}||  tdd|   }||fS | t j| t jdfS d S )Nr   r    r   z?result type Float can't be cast to the desired output type Longr^   )	r>   r   r+  r   is_floating_pointr  	rand_liker   r   )r   r   r  Z	bool_maskresr(   r(   r)   r    s    r  )r1   rK   half_to_floatc                 C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkrTt
| }ntj| |dd}t
| | }|tj||dd }|s||}|S Nr4   r   T)r   )r>  r   r>   halfrA   rB   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr7   r   r_   amaxr   )r1   rK   r  r:   r<   Zunnormalizedx_maxr   r(   r(   r)   _softmax  s    


r  )r   c           	      C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkrN| }ntj
| |dd}| | }ttjt||dd}|| }|s||}|S r  )r>  r   r>   r  rA   rB   r  r  r7   r   r  r   r   r_   )	r1   rK   r  r:   r<   Zshiftedr  Zshifted_logsumexpr   r(   r(   r)   _log_softmax  s     


r  rN   )r   r/  padding_idxscale_grad_by_freqsparserL   c                 C   sN   |   dksJ d|jdkrB| d|}|jdkr>|d}|S | | S d S )Nr!   z'weight' must be 2-Dr    r   )rK   r  Zindex_selectre  )r   r/  r  r  r  r   r(   r(   r)   	embedding  s    	


r  )rc   r/  num_weightsr  r  c                 C   s   t j| t jjd\}}| |} t|tj}|rp||f}t	|}t
j||g|dd}|| }	| |	d } t||k| j}
| |
d}| |f| j|jd   }t
j||g|dd|S )Nr  Trx  rN   r   )rA   rB   r  r  r7   r   r>   longr  r   rt   r{  rP   rR   r  masked_fillr   )rc   r/  r  r  r  r:   r<   countsr*  Zgrad_weights_scaler1  r   grad_weightr(   r(   r)   embedding_dense_backward  s&    	


r  r8   c                 C   s   d}| D ]}||9 }q|S rW   r(   )r1   rE   ir(   r(   r)   rz    s    
rz  )tensorsrK   
num_chunksrL   c           	      C   s   g }| D ]}|  }|| | d | | }||| krndgd |j| d  d|||  g }t||d}|d | t|dg }||| q|S )Nr    r   r!   rN   )r   r  rt   constant_pad_ndr>   Sizeappendr   )	r  rK   r  padded_tensorstensortensor_sizeZpad_along_dimrY  Z	view_sizer(   r(   r)   
_pad_chunk  s    
r  )r  c                 C   s(   | d j }| D ]}|j |kr dS qdS )Nr   FTr  )r  r  r  r(   r(   r)   have_same_ndims  s
    

r  )r  rK   c                 C   sB   | d   d | }| D ]$}t|  d | |kdd  qd S )Nr   c                   S   s   dS )NzG_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensorsr(   r(   r(   r(   r)   rq   !  rr   z+leading_dimension_matches.<locals>.<lambda>)r   r>   rs   )r  rK   Zleading_dim_sizesr  r(   r(   r)   leading_dimension_matches  s    r  )r  rK   r  c                 C   s   t |dkdd  t t| dkdd  | d j}| d j}| D ]H}t | dkdd  t |j|kdd  t |j|kdd  qDt| rt| d 	 |}n4t |dkd	d  | D ]}t ||j
k d
d  qt| | |S )Nr    c                   S   s   dS )Nz&_chunk_cat expects positive num_chunksr(   r(   r(   r(   r)   rq   *  rr   z._preprocess_chunk_cat_inputs.<locals>.<lambda>r   c                   S   s   dS )Nz0_chunk_cat expects a non-empty input tensor listr(   r(   r(   r(   r)   rq   ,  rr   c                   S   s   dS )Nz#_chunk_cat expects non-empty tensorr(   r(   r(   r(   r)   rq   1  rr   c                   S   s   dS )Nz8_chunk_cat expects all input tensors with the same dtyper(   r(   r(   r(   r)   rq   4  rr   c                   S   s   dS )Nz8_chunk_cat expects all inputs tensors on the same devicer(   r(   r(   r(   r)   rq   8  rr   c                   S   s   dS )NzK_chunk_cat expects non-negative dim when input tensors have different ndimsr(   r(   r(   r(   r)   rq   ?  rr   c                   S   s   dS )Nz3_chunk_cat expects dim < ndim for all input tensorsr(   r(   r(   r(   r)   rq   D  rr   )r>   rs   r  r   r%  r   r  rA   r   rK   r  r  )r  rK   r  Zexpected_dtypeZexpected_devicer  r(   r(   r)   _preprocess_chunk_cat_inputs%  s:    


r  )r  rK   r  r   rL   c                 C   sL   t | ||}t| ||}|d u r0t||d S tj||d |d |S d S )Nr    )r   )r  r  r>   r   )r  rK   r  r   r  r(   r(   r)   
_chunk_catJ  s    r  )rl   split_sizesrK   r   rL   c                 C   s\   t j| ||d}|d u r&dd |D S t||D ]"\}}t||j t||dd q0d S d S )Nrp   c                 S   s   g | ]}|j tjd qS )r   )r(  r>   r   )r0   sr(   r(   r)   r3   f  rr   z)split_with_sizes_copy.<locals>.<listcomp>Tr   )rt   split_with_sizesrc  r   r   r   )rl   r  rK   r   splitsr<  splitr(   r(   r)   split_with_sizes_copy[  s    	r  .)r   
split_sizerK   rL   c                 C   s   t j| ||S r5   )rt   r  r   )r   r  rK   r(   r(   r)   unsafe_splitn  s    r  )r   r  rK   rL   c                 C   s   t j| ||S r5   )rt   r  r  )r   r  rK   r(   r(   r)   unsafe_split_with_sizess  s    r  )rl   r  rK   rL   c                    s   | j }|| } dkr,|dks"J |  fS |  d   }ddlm} ||} fddt|D }  | |  |d< t| ||S )Nr   r    )	guard_intc                    s   g | ]} qS r(   r(   r0   r  r  r(   r)   r3     rr   zsplit.<locals>.<listcomp>rN   )r   detachr  r  rO   r>   r  )rl   r  rK   r  r  chunksr  r  r(   r  r)   r  z  s    
r  )rl   tensor_indices_or_sectionsrK   rL   c                    s   |j jdksJ |jtjks J |  t dkp: dk fdd  dkrr| }t|t	sfJ | 
||S dd |D }| 
||S d S )Ncpur    r   c                      s   d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr(   r(   Z	split_dimr(   r)   rq     s   zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>c                 S   s   g | ]}|  qS r(   )itemr  r(   r(   r)   r3     rr   zCtensor_split_tensor_indices_or_sections_py_impl.<locals>.<listcomp>)r%  typer   r>   rB  rK   rs   r  r.   r   tensor_split)rl   r  rK   sectionsr/  r(   r  r)   /tensor_split_tensor_indices_or_sections_py_impl  s    

r  )rl   mat1mat2r[   rd   c                 C   sH   |   s |  s t|}t|}|t|| }|dkr<|S |||   S ri   )r  
is_complexr!  r>   mm)rl   r  r  r[   rd   r   r(   r(   r)   addmm  s    r  )rl   r  r  r[   rd   use_geluc                 C   s<   t | ||||}|r2| jr(tj|ddS t|S t|S )Nr   )r   )r  is_cudart   gelurelu)rl   r  r  r[   rd   r  r   r(   r(   r)   _addmm_activation  s    
r  )rl   r  vecr[   rd   c                 C   s\   |   s |  s t|}t|}|t|| }|dkr<|S | dkrP||  S |||   S ri   )r  r  r!  r>   mvr   )rl   r  r  r[   rd   r   r(   r(   r)   addmv  s    r  )rc   r   r   rstdgammaNCHxWgroupoutput_maskrL   c
              	      s  t j| ||dd t j|| dd t j|dd t|    k fdd tjfkfdd td u p  k fdd t \}
}t|dk fdd t| |	 j
d	gd
}| 	 j
d	gd
}d }d }d }|	d rvd|
  }d urt|d|

d	}t|d|

d	}t|dd|
}nL||

d	}||

d	}t|dtjd|
f|jd}| | | | | | }|  || |  }|d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r|	|
|	|
d  |d j
dgd
 }|	d	 r|j
dgd
}|||fS )NF)Zallow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr(   r(   )r  r  r  r(   r)   rq     rr   z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got rW  r(   )r  r  r   r(   r)   rq     rr   c                      s    d  dd ur  nd S )NzExpect gamma to have z elements but got rN   )r   r(   )r  r  r(   r)   rq     rr   r   c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r(   r(   )r  r  r(   r)   rq   	  rr   r!   rp   r^   rN   r    r$  r   )rA   Zcheck_same_deviceZcheck_same_shaper>   rs   r   r   divmodr   r-  r   rP   r   r*  r%  rR   r7   r   )rc   r   r   r  r  r  r  r  r  r  ZcpgZ_remZdsdbd_inputZd_gammad_biasr  Zds_valZdb_valc1c2c3r(   )r  r  r  r  r  r   r)   native_group_norm_backward  s    
 

""



$
r  )rc   r   r   r  r  r  r  r  r  r  r  r  out2rL   c
                C   sd   t | |||||||||	
}|
||f}t|D ]2\}}|d ur,t|| |j t||| dd q,|S r   )r  	enumerater   r   r   )rc   r   r   r  r  r  r  r  r  r  r  r  r  r   rS   r  rE   r(   r(   r)   native_group_norm_backward_out?  s    
r  )r1   rL   c                 C   s   | d ur|  |S | S r5   r7   )r1   r   r(   r(   r)   _maybe_cast\  s    
r  )	grad_outr   normalized_shaper   r  r   biasr  rL   c           "         sn  |j }| }	t|j  fdd| |||fD \}
}}}|
d usHJ |	t| }||d  }|d | }g }g }t|	D ]"}||kr|| q||| q|t|}t|}ddl	m
} ||dks||dkr*|d r||nd |d r|||d  nd |d r$|||d  nd fS t|| }t|| }|| | }|d urf|
| }n|
}|| }t||d}t||}t||d}t||}|| | }d }d } d }!|d r|| | }|d r|d urt|dkr
t|
| |d} n|
| } |d rL|d urLt|dkrDt|
|d}!n|
 }!t||jt| |jt|!|jfS )	Nc                 3   s*   | ]"}|d ur|j  tjdn|V  qd S r~  )r7   r>   r   r/   r9   r(   r)   rI  q  s   z-native_layer_norm_backward.<locals>.<genexpr>r   r  r    r!   TF)r   rK   rA   get_computation_dtyper   r  rO   r  rz  r  r  r  rR   r>   r   r   r(  r  )"r  r   r  r   r  r   r  r  input_shapeZ
input_ndimgrad_out_cast
input_castweight_castZ	bias_castaxisZ
inner_dimsZ
outer_dimsZinner_dim_indicesZouter_dim_indicesr  r  Mr  Zx_hatZ
grad_x_hatabr  r  r  rG   r  Zd_weightr  r(   r9   r)   native_layer_norm_backwardc  sj    







r  )r  r   r  r   r  r   r  r  r  r  r  rL   c             	   C   s`   t | |||||||}||	|
f}t|D ]2\}}|d ur(t|| |j t||| dd q(|S r   )r  r  r   r   r   )r  r   r  r   r  r   r  r  r  r  r  r   rS   r  rE   r(   r(   r)   native_layer_norm_backward_out  s    
r  )
r   r   r  running_meanrunning_varr   momentumr  
functionalrL   c	                 C   sb  dgt td|   }	t| j}
|}|}|rt| j}
| j|
d}tj||	ddd\}}t	|| }| | | }t
||	}t
||	}|d ur|| d| |  }|s|| |d ur|  | jd  }t
||	}|||d   }|| d| |  }|s|| n|d ur.|d us2J |j|
dd}|}|j|
dd}|}|}dt||  }| jjdkr|}|}n| d	}| d	}t||  d }t||  d }| | | }|d ur| }t||  d }|| }|d ur | }t||  d }|| }| jjdkrJ|j| jd}|j| jd}|j| jd||||fS )
Nr   r!   r   T)rK   Z
correctionr   r    )r   ru   r  r   )r  rO   rK   rA   r  r   r7   r>   Zvar_meanrsqrtre  copy_r   r   r  r%  r  r  rR   r  )r   r   r  r  r  r   r   r  r  Zreduction_dimsr:   new_running_meannew_running_varZ	input_accZ
biased_varr   r  r<  	save_mean	save_rstdnZsqueezed_varZunbiased_varinvstdr(   r(   r)   native_batch_norm_helper  sr    






r  r   r  save_invstd)	r   r   r  r  r  r   r   r  rL   c              
   C   s,   t | |||||||d	\}}	}
}}||	|
fS NFr  r   r   r  r  r  r   r   r  r<  r  r  rQ   r(   r(   r)   native_batch_norm#  s    r  c              
   C   sz   |d u r$|d u r$t | |||||S |d u r4td|d u rDtd|r`t | |||||||S t | ||||||S d S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)rt   _native_batch_norm_legitr  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r   r  r(   r(   r)   native_batch_norm_decomposition?  s&    r  r  c                    s|   |  |}|| d |   dkrh|dkrh fdd|D }  | |  ||d < tjjj| ||S tjjj|  |S )Nr    r   c                    s   g | ]} qS r(   r(   r   r  r(   r)   r3   j  rr   z(unsafe_chunk_py_impl.<locals>.<listcomp>)r   r>   opsrt   r  r  r  r   )r  r  rK   r  r  r(   r  r)   unsafe_chunk_py_impld  s    
r  )r   r   r  r  r  r   r  rL   c              
   C   s   t j| ||||d||S r  )rt   r  r  )r   r   r  r  r  r   r  r(   r(   r)   r  p  s    
r  c              
   C   s,   t | |||||||d	\}}	}
}}||	|
fS r  r  r  r(   r(   r)   r    s    r  )r   r   r  r   r   r  rL   c           
   
   C   s,   t | ||d d |||d	\}}}}	}	|||fS r  r  )
r   r   r  r   r   r  r<  r  r  rQ   r(   r(   r)   !_native_batch_norm_legit_no_stats  s    	r  c              
   C   sP   t | |||||||d	\}}	}
}}|d us2J d|d usBJ d||	|
||fS )NT#new_running_mean should not be None"new_running_var should not be Noner  )r   r   r  r  r  r   r   r  r<  r  r  r  r  r(   r(   r)   #_native_batch_norm_legit_functional  s    r  )r   r   r  r  r  r  r   rL   c           	   	   C   sP   t j| ||||d|}d}|t jjjkr8t j| |}t j|t j| j| j	dS )a  
    Return a reserve tensor for batch norm, used only by cudnn to pass forward state to the
    backward pass. This is needed for `_batch_norm_with_update` and `_batch_norm_no_update`,
    which support a variety of backends including cudnn. We create this tensor here to get
    the correct shape in the traced graph if we detect that will call the cudnn kernel,
    and rely on DCE to avoid materializing this tensor.
    Tr   )r   layoutr%  )
r>   _CZ_select_batch_norm_backendZ_BatchNormBackendZCudnnZ(_get_cudnn_batch_norm_reserve_space_sizeemptyuint8r  r%  )	r   r   r  r  r  r  r   backendZreserve_sizer(   r(   r)   _get_batch_norm_reserve_tensor  s    r  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NTFr   r  r  r   r   r  r  r  r   r  r<  r  r  rQ   reserver(   r(   r)   _batch_norm_with_update  s    
r$  c              
   C   sh   t | ||||d||d	\}}}	}
}t| |||||dd}|
d usHJ d|d usXJ d|||	||
|fS )NTr   r  r  r!  )r   r   r  r  r  r   r  r<  r  r  Znew_rmZnew_rvr#  r(   r(   r)   "_batch_norm_with_update_functional  s    r%  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NFr   r!  r"  r(   r(   r)   _batch_norm_no_update  s    
r&  c                 C   sB   |d u sJ t | |k jt jd}|| |  d|  }||fS )Nr   r^   )r>   r  r7   r  r  )r   r   	generatorr1  r  r(   r(   r)   _fused_dropout_decomposition-  s    r(  )r   r  r%  
pin_memorynon_blockingr   )r1   r   r%  r)  r*  r   c          	      C   s  |r|t jksJ d|r"J dt| t jttttfs<J |d u rl|d u rl|d u rlt| t jrh|  S | S d}t| t jr| }n
t 	| }|d ur||j
kr|d ur|jdkrt j||}d}t j|||}|d ur|st j||}d}|d urt j||dS |S )NZTODOFr  Tr   )r>   stridedr.   r   r!  r   r+  complexr(  scalar_tensorr%  r  _primsZconvert_element_typeZ
device_put)	r1   r   r  r%  r)  r*  r   Zdtype_convertedZx_tensorr(   r(   r)   _to_copy7  s,    

r/  c                 C   s
   t | S r5   )rt   aliasr8   r(   r(   r)   nop_decompositione  s    r1  r  Zout3)r   r   r  r  r  r   exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r:||	|
| jdtjdfS ||d|d| jdtjdfS )Nr  r   )rt   r  r  r>   r  )r   r   r  r  r  r   r2  r3  r  r  r[  r(   r(   r)   cudnn_batch_normm  s"    
r4  c                 C   s@   t |D ]2\}}|dkr|| jk r0| j| |ks| |} q| S rW   )r  r  r   rP   )r1   broadcast_maskr  r1  r(   r(   r)   _broadcast_batch_norm_backward  s     r6  )r  r   r   r  r  r  r  r  r  r  r#  rL   c                 C   s   t | |||||||||	
S r5   )native_batch_norm_backward)r  r   r   r  r  r  r  r  r  r  r#  r(   r(   r)   batch_norm_backward  s    r8  )r  r   r   r  r  r  r  r  r  r  rL   c
           &         s  |j }
|d ur|j }n|
}t|j   fdd| ||||||fD \}}}}}}}|j}| }|dksrJ dd}tt|||  }|}|}|r|d ur|d usJ n&|d ur|d usJ |}t|| }dg| }|| ||< g }t	|D ]}||kr|
| qt||}d| }t||}t|||  |}t|| |}tt|| || |} |d u rt||d }!nt|| |}!|r|| |  }"||" | |! }#n||! }#|	d r|| }$nd }$|	d r|}%nd }%|#|
t|$|t|%|fS )Nc                 3   s$   | ]}|d ur|  n|V  qd S r5   r  r/   r9   r(   r)   rI    s   z-native_batch_norm_backward.<locals>.<genexpr>r!   z$rank of the input must be at least 2r    r^   )r   rA   r  r   rK   rz  r  r>   r  rO   r  r6  r   r   r7   r  )&r  r   r   r  r  r  r  r  r  r  r:  Zweight_dtyper  r  r  Zrunning_mean_castZrunning_var_castZsave_mean_castZsave_invstd_castr  Z
input_rankr  Znum_featuresr   r
  r5  Zreduction_axesr  r   Zgrad_output_sumZdot_pZ	grad_meanZ
proj_scaleZ
grad_scaleZprojrS   r  Z	grad_biasr(   r9   r)   r7    s    	






r7  )r  r   r   r  r  r  r  r  r  r  r  r  r  rL   c
                C   sd   t | |||||||||	
}|
||f}t|D ]2\}}|d ur,t|| |j t||| dd q,|S r   )r7  r  r   r   r   )r  r   r   r  r  r  r  r  r  r  r  r  r  r   rS   r  rE   r(   r(   r)   native_batch_norm_backward_out	  s$    
r9  r   rc   r   r  r  r  save_varr3  c                 C   s    t || |||||d|g d
S NT)TTTrt   r7  r:  r(   r(   r)   miopen_batch_norm_backwardA	  s    r>  	r   rc   r   r  r  r  r;  r3  ZreserveSpacec	           	      C   s    t || |||||d|g d
S r<  r=  r?  r(   r(   r)   cudnn_batch_norm_backward[	  s    r@  )r   ra  c                    s  | j  | jttdv fdd | jdd  D ]}t|dkfdd q:d |d  dkrԈd |d  dkrtdd	 tdd  |D }td
d	 tdd  ||D }tjj	| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt
|d|
f }|	sV|sVtj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]B\}}|d u r|d|d d |f }n||d|d d |f  }q|||  S )NrR  c                      s
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r(   r(   r  r(   r)   rq   	  rr   z%adaptive_avg_pool2d.<locals>.<lambda>r   r   c                      s   dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape rs  rU  r(   rW  r(   r)   rq   	  s   rN   c                 s   s   | ]\}}|| V  qd S r5   r(   )r0   r  rw  r(   r(   r)   rI  	  rr   z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s$   | ]\}}}||d  |  V  qdS r    Nr(   )r0   r  rw  r  r(   r(   r)   rI  	  s   c                 S   s   t j| | |ddS )NtruncZrounding_moder>   divr  r  r[  r(   r(   r)   start_index	  s    z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr    rB  rC  rD  rF  r(   r(   r)   	end_index	  s    z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkpD|| dk }|rV|d7 }n|dkrf|d8 }t j| t jd}|d| }|rt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )Nr&  r    r   rN   rA  )r>   r)  rB  rP   r-  r   r%  minimum)in_sizeout_sizeZorangeZi0Z	maxlengthZin_size_modadaptive	range_maxr0  maxvali1length)r%  rH  rG  r(   r)   compute_idx	  s(    

z(adaptive_avg_pool2d.<locals>.compute_idx.r   )r   rN   rp   c                 S   sd   t |tr| |fS |dk sJ ||dk}|dkr>t|d}t| |d} t|| }| |fS d S )Nr   rN   r   r   r   )r.   r   rP   rR   r>   r  )valsrP  rM  rL  rK   r1  r(   r(   r)   
maybe_mask	  s    

z'adaptive_avg_pool2d.<locals>.maybe_mask)rL  rK   r   )r%  r   r  r>   rs   rV  rc  nnr  Z
avg_pool2drR   r   r   rO   )r   ra  rT  r  kernelrQ  ZidxhZlength_hZrange_max_hZ
adaptive_hZidxwZlength_wZrange_max_wZ
adaptive_wrR  rS  retr  jr(   )r%  rH  r  r   rG  r)   adaptive_avg_pool2dv	  sN    

(  



&
rX  )rl   r/  ra  rK   c           	      C   s   t d| d ttj| jd |  }ttj|}dg| j }| jd |  |d | < |tj|| j	d
||  d}| t| jd |  t| }tj|d|g| ddd
|jS )NZmax_unpoolingZd_forward_outr    r$  rN   Frx  )rA   Zalert_not_deterministicr   operatorr   r   r  rt   r)  r%  r-  r   r  r  r{  )	rl   r/  ra  rK   ZncZhwZindices_nc_shapeZindices_flatr<  r(   r(   r)   _max_unpoolnd	  s    	"rZ  rl   r/  ra  c                    s   t jt jkfdd t tdkfdd t jdv fdd t jjkfdd tdjD ]$ t  d	k fd
d q|t	dS )Nc                      s   d j  S )Nz2elements in indices should be type int64 but got: r   r(   )r/  r(   r)   rq    
  rr   zmax_unpool2d.<locals>.<lambda>r!   c                      s   dt   dS )NzMThere should be exactly two elements (height, width) in output_size, but got 
 elements.r  r(   ra  r(   r)   rq   
  s    rR  c                      s   d j  dS )NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with  dimensions.r  r(   r   r(   r)   rq   
  s    c                      s   dj  d j  S NzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: rW  r(   )r/  rl   r(   r)   rq   
  s    
r    r   c                      s   dj  d  dS )NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got  with dimension  being empty.rW  r(   )r  rl   r(   r)   rq   
  s
    )
r>   rs   r   rB  r  r  r   rO   r   rZ  r[  r(   )r  r/  ra  rl   r)   max_unpool2d	  s,    





	rc  r   r/  ra  r  rF  c                    s  t jt jkdd  t jdv fdd t tdkfdd t tdkfdd t tdkfdd t jjkfd	d td
jD ]$ t  dk fdd qt d dkod
 dkod dkfdd t	dS )Nc                   S   s   dS )Nz(elements in indices should be type int64r(   r(   r(   r(   r)   rq   0
  rr   zmax_unpool3d.<locals>.<lambda>r   rb  c                      s   d j  dS )NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with r_  r  r(   r   r(   r)   rq   4
  rr   rx   c                      s   dt   dS )NzVThere should be exactly three elements (depth, height, width) in output_size, but got r\  r]  r(   r^  r(   r)   rq   8
  s    c                      s   dt   dS )NzRThere should be exactly three elements (depth, height, width) in stride, but got: r\  r]  r(   r  r(   r)   rq   ?
  rr   c                      s   dt   dS )NzSThere should be exactly three elements (depth, height, width) in padding, but got: r\  r]  r(   )rF  r(   r)   rq   C
  rr   c                      s   dj  d j  S r`  rW  r(   )r/  r   r(   r)   rq   G
  s    
r    r   c                      s   dj  d  dS )NzZmax_unpooling3d(): Expected input to have non-zero size for non-batch dimensions, but got ra  rb  rW  r(   )r  r   r(   r)   rq   P
  s
    r!   c                      s
   d  S )Nz5strides should be greater than zero, but got stride: r(   r(   rg  r(   r)   rq   Y
  rr   )
r>   rs   r   rB  r  r  r   rO   r   rZ  rd  r(   )r  r/  r   ra  rF  r  r)   max_unpool3d&
  sB    	







	"
rh  )rd   r1   rK   r2  r  rd   c                C   s   t | |||d|dS )NTinplacerd   
_index_addri  r(   r(   r)   
index_add__
  s    	rn  c                C   s   t | |||d|dS )NFrj  rl  ri  r(   r(   r)   	index_addk
  s    
ro  )r1   rK   r2  r  rk  rd   c                   s*  t | jtjdkfdd jdkr<dnd|jdkrT|ndtkfdd  dkrt | jttkpt 	t
  fdd |  }| jdk}|r| dn| }d f }|rtjntj}	|	|||dd	}
|r| S |r|
dS |
 S d S )
Nr    c                      s   d j  dS Nz(Index should have dimension 1 or 0 (got r   r  r(   r2  r(   r)   rq   
  rr   z_index_add.<locals>.<lambda>r   c                      s   d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r(   r(   )rK   
index_sizer  r(   r)   rq   
  rr   c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)r  r(   )rd   python_typer(   r)   rq   
  rr   r5   Trx  )rA   canonicalize_dimsr  r>   rs   r   Zdtype_to_typer   r+  Zis_weakly_lesser_typer  rP   rt   
index_put_	index_putre  r>  )r1   rK   r2  r  rk  rd   zero_dimr   r0  rw  r   r(   )rd   rK   r2  rr  rt  r  r)   rm  x
  s6    	

rm  r   c              
   C   s   t t| dkdd  t| }| d  }|dd  }tdd | D }|rX||f}n||f}|| }| d ||}dt| }	t|D ]V}
| |
 }t||	d||d f |}|rtj	||d|
d}qtj	||d|
d}q|S )	Nr   c                   S   s   dS )Nz#received an empty list of sequencesr(   r(   r(   r(   r)   rq   
  rr   zpad_sequence.<locals>.<lambda>r    c                 s   s   | ]}| d V  qdS rG  r   r/   r(   r(   r)   rI  
  rr   zpad_sequence.<locals>.<genexpr>)r   r   rK   r2  )
r>   rs   r  r   r}   r   rO   rt   r  r3  )	sequencesbatch_firstZpadding_valueZsequences_sizemax_sizeZtrailing_dimsmax_lenZout_dimsr   Zdim_paddingsr  Zcurrseqrowr(   r(   r)   pad_sequence
  s(    
r  r1   rK   r2  r  c                 C   s   t | |||ddS )NTrk  _index_copyr  r(   r(   r)   index_copy_
  s    r  c                 C   s   t | |||ddS )NFr  r  r  r(   r(   r)   
index_copy
  s    r  )r1   rK   r2  r  rk  c          
         s   t | j|}t jdk fdd | jdk}|r@| dn| } jdkrX dn  d|  f }|rttjntj}||||}	|r| S |r|		dS |	
 S d S )Nr    c                      s   d j  dS rp  r  r(   rq  r(   r)   rq   
  rr   z_index_copy.<locals>.<lambda>r   r5   )rA   ru  r  r>   rs   rP   rt   rv  rw  re  r>  )
r1   rK   r2  r  rk  rx  r   r0  rw  r   r(   rq  r)   r  
  s    

r  r<  r   c                 C   sR   t | d| }t t |  }| js0| jr<| d}n|}|t | |fS )Nr(   r  )r>   rI  r  r_   r   r  Zis_xpur   )rl   rz   ra   r   r(   r(   r)   log_sigmoid_forward
  s    r  r1   lowhighr'  c                 C   s$   t j| jt|t|| j| j|dS )N)r  r  r   r%  r'  )primsZ_uniform_helperr   r   r   r%  r  r(   r(   r)   uniform
  s    r  c                 C   s   |  t| |||S r5   )r  r  )rl   r  r  r'  r(   r(   r)   uniform_
  s    r  c                 C   s   t | d }|d urDt|d u dd  tt ||kdd  |S |d urt|d u dd  tt ||kdd  g }t|D ]J\}}t||kr|| |d  t|  q|t| |d  |  q|S tddd  d S )	Nr!   c                   S   s   dS Nz9Must specify exactly one of output_size and scale_factorsr(   r(   r(   r(   r)   rq     rr   z.upsample_compute_output_size.<locals>.<lambda>c                   S   s   dS N r(   r(   r(   r(   r)   rq     rr   c                   S   s   dS r  r(   r(   r(   r(   r)   rq     rr   c                   S   s   dS r  r(   r(   r(   r(   r)   rq     rr   Fc                   S   s   dS r  r(   r(   r(   r(   r)   rq     rr   )r  r>   rs   r  r!  r  r   )r  ra  scale_factorsZspatial_dimensionsr  r  r(   r(   r)   upsample_compute_output_size  s.    r  c                 C   s   | d u rd S | | S r5   r(   )scalesr0  r(   r(   r)   get_scale_value  s    r  )r   ra  r  rL   c                 C   s2   t |  ||}|r|nd gt| }t| ||S r5   r  r   r  _upsample_nearestr   ra  r  osizer  r(   r(   r)   _upsample_nearest_vec#  s    r  c                 C   s6   t |  ||}|r|nd gt| }t| ||ddS NTexactr  r  r(   r(   r)   _upsample_nearest_exact_vec8  s    r  c                 C   s   g }t |}|rdnd}t|D ]}|| }| j| |  }	|| d urX|	|	||   n|	| }
tj|tj| jd}|| |
 tj}t|d | D ]}|	d}q|
| q |S )Nr   r   rA  r    rN   )r  rO   r   r>   r)  r   r%  r7   rB  rP   r  )r   ra  r  r  r/  Znum_spatial_dimsr5  rT  r  isizere   Zoutput_indicesZinput_indicesrQ   r(   r(   r)   !_compute_upsample_nearest_indicesM  s    $r  )Zpreserve_memory_formatr   )r   ra  r  rL   c                 C   s   t | ||gS r5   r  r   ra  r  r(   r(   r)   upsample_nearest1dm  s    	r  c                 C   s   t | ||gddS r  r  r  r(   r(   r)   upsample_nearest_exact1dy  s    r  )r   ra  scales_hscales_wrL   c                 C   s   t | |||gS r5   r  r   ra  r  r  r(   r(   r)   upsample_nearest2d  s    
r  c                 C   s   t | |||gddS r  r  r  r(   r(   r)   _upsample_nearest_exact2d  s    r  )r   ra  scales_dr  r  rL   c                 C   s   t | ||||gS r5   r  r   ra  r  r  r  r(   r(   r)   upsample_nearest3d  s    r  c                 C   s   t | ||||gddS r  r  r  r(   r(   r)   _upsample_nearest_exact3d  s    r  )r   ra  r  r  rL   c           	      C   sp   t | |||d}d d g| }t| |}|jdkrlt| }| jd }| jjdkr`|dk r`t	j
}|j|d}|S )Nr  r   r    cudar   )r  rt   _unsafe_indexr  rA   r   r   r%  r  r>   r   r>  )	r   ra  r  r  Zspatial_indicesr/  r   r   
n_channelsr(   r(   r)   r    s    


r  c                    sb   |r|rd n|rd n|r"d nd t   dksBJ t  fddtdt  D S )Nrb  r   rx   r!   r   c                    s    g | ]}t ||   qS r(   rU  r  Z
group_sizeparamsr(   r)   r3     s   z!gather_params.<locals>.<listcomp>)r  rO   )r  
has_biaseshas_projectionsr(   r  r)   gather_params  s    r  c                 C   sh   |rB| d|  |d|   }}| d| d  |d| d   }}n| | ||  }}d\}}||||fS )Nr!   r    NNr(   )r  hiddensr  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddenr(   r(   r)   params_hiddens  s    $r  c                 C   s2   ||ksJ | | d|||  | dd|S ri   )r  r   )r  last_batch_size
batch_sizer  r(   r(   r)   update_hidden_for_packed  s    r  c              	   C   s4   ||kr| S ||k sJ t | |d||| fS ri   )r>   concatr   )r  r  r  Z
inp_hiddenr(   r(   r)    update_hidden_for_packed_reverse  s    r  c                 C   s&  |d }|d }|r|d nd }	|r,|d nd }
g }g }|rD|d n|d }| dd|}t| t|}|r||d d d }|D ]Z} | jd }||krn"|rt||||}nt||||}|| |||	||
}|}|| q|r|  n|| |  t	|d}|st	|dn|}||fS )Nr   r    r!   rx   rN   )
r   r>   r  r  r   r  r  r  reverser   )inphiddenr  r  	hidden_fnbatch_sizesr  	ih_weight	hh_weightih_biashh_biasstep_outputr  r  r  	split_inpr  r   
hidden_outr(   r(   r)   one_layer_rnn_data  s@    


r  c                    s    fdd}|S )Nc                    s    t ||||  S r5   r   linearr  r  r  r  r  r  nonlinearityr(   r)   rG   <  s    zrnn_cell.<locals>.innerr(   r  rG   r(   r  r)   rnn_cell;  s    r  c                    s    fdd}|S )Nc                    s$   t | ||}  t ||||  S r5   r  r  r  r(   r)   rG   C  s    zrnn_cell_data.<locals>.innerr(   r  r(   r  r)   rnn_cell_dataB  s    r  c                 C   s   |d }|d }|r|d nd }|r,|d nd }	t | ||}
|rL|
dn|
}
|d}g }|
D ] }|||||||	}|| qb|r|  t|d}||dfS )Nr   r    r!   rx   )	r   r  fliprP   r  r  r>   r   re  )r  r  r  r  r  r  r  r  r  r  precomputed_inputr  r  r  r   r(   r(   r)   one_layer_rnnJ  s    
r  c                 C   s   |d }|d }|r&|d }|d }nt | }t | }|d d}	|d d}
g }d}|	d}d}d}d}d}|  } |	 }	|
 }
t jjj| |||||	|
|||||||||}|d |d |d   }}}||	d|	dffS )Nr   r    r!   rx   F)
r>   r   r   rP   r>  r  rt   Zmkldnn_rnn_layerr  re  )r  r  r  r  r  w0w1w2w3hxcxr  modeZhidden_size
num_layersr  r|  r  outputsrV   hycyr(   r(   r)   mkldnn_one_layer_lstm`  sN    


r  c
                 C   s   |r|  ddn| } g }
t|D ]}t||||\}}}}|rN||d k rN|nd}|	| |||\}}|
| |r|	| |||dd\}}|
| |rt||g| d } n|} |dkr |r ||d k r tj| |dd} q |r|  ddn| } | |
fS )Nr   r    r   T)r  )r  )	transposerO   r  r  r>   r   rK   r  )r   r  r  r  r  r  r  r  r|  layer_fnfinal_hiddensr  r  r  r  r  Zfwd_inpZ
fwd_hiddenZbwd_inpZ
bwd_hiddenr(   r(   r)   _rnn_helper  s*    



r  c	                 C   sR   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS Nr   Fr  )	unbindr  r  r   r  r  r>   r   stackr   r  r  r  r  r  r  r  r|  r  r   r  r(   r(   r)   rnn_tanh_input  s    
r  c	                 C   sR   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS r  )	r  r  r  r   r  r  r>   r  r  r  r(   r(   r)   rnn_relu_input  s    
r  c	                 C   sT   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS Nr   Fr  r  )	r  r  r  r   r  r  r>   r  r  datar  r  r  r  r  r  r  r  r  r   r  r(   r(   r)   rnn_relu_data  s&    
r  c	                 C   sT   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS r  )	r  r  r  r   r  r  r>   r   r  r  r(   r(   r)   rnn_tanh_data  s&    
r  c                 C   s   t ||||  }|d|}|d  }	|d  }
|d  }|d  }|
| |	|  }||  }|d u rv|nt ||d }||fS )Nr   r   r    r!   rx   r   r  chunkr   r   )r  r  r  r  r  	hr_weight	chunk_dimZgatesZchunked_gatesZin_gateZforget_gateZ	cell_gateZout_gater  r  r(   r(   r)   	lstm_cell?  s    r  c              
   C   s   |d }|d }|r|d nd }|r,|d nd }t |dkrD|d nt |dkrX|d nd }	|d d}
|d d}t| ||}|r|dn|}g }|D ](} t| |
||||	dd\}
}||
 q|r|  t	|d}||

d|
dffS )Nr   r    r!   rx   rb  r   r  )r  rP   r   r  r  r  r  r  r>   r   re  )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r(   r(   r)   one_layer_lstmM  s$    *r  c              
   C   s  |d }|d }|r|d nd }|r,|d nd }	t |dkrD|d nt |dkrX|d nd }
g }g }|rp|d n|d }t| t|}|r|d d d }|d }|d }|dd||dd| }}|D ]} | jd }t| ||} ||k r:||d||| |d||| f |dd||dd| }}||krt	||d||| fd}t	||d||| fd}t
| ||||	|
dd\}}|}|| q|r|  ||f}n:|||f |  t| \}}t|dt|df}t|d}||fS )	Nr   r    r!   rx   rb  r   rN   r  )r  r>   r  r  r   r   r   r  r  r  r  r  rc  r   )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zorig_hxZorig_cxr  r  r  r  Zhidden0Zhidden1r   r(   r(   r)   one_layer_lstm_datah  s\    *



r  c                 C   s    dd }|| ||rt S tS dS )a*  Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._get_mkldnn_enabled()`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c           	      S   s   t j sdS | gt| tt| }dd |D }t|dkrHdS | }|t dkrbdS dd |D }|D ]}|t j	t j
fvrt dS qt| jrdS |d d|d dk}|rdS d	S )
NFc                 S   s   h | ]
}|j qS r(   r$  r0   tr(   r(   r)   	<setcomp>  rr   zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>r    r  c                 S   s   h | ]
}|j qS r(   r   r  r(   r(   r)   r    rr   r   r!   T)r>   r  Z_get_mkldnn_enabledr  r   from_iterabler  popr%  r   bfloat16requires_gradr   )	r   r  r  r  Zdevicesr%  Zdtypesr   r  r(   r(   r)   
use_mkldnn  s&    
z2select_one_layer_lstm_function.<locals>.use_mkldnnN)r  r  )r   r  r  r  r(   r(   r)   select_one_layer_lstm_function  s    r  c	                 C   s   t |dksJ dt|||d d|d dk}tt|d |d }	t| ||}
t| |	||||||||

\}}tt| }|t|d dt|d dfS )Nr!   lstm expects two hidden statesr   r    )	r  r  r   r  rc  r  r  r>   r  )r   r  r  r  r  r  r  r  r|  r  r  r   r  r(   r(   r)   	lstm_impl  s$    $r	  c	                 C   s   t |dksJ dt|||d d|d dk}tt|d |d }	t| |	||||||dtt|d
\}
}tt| }|
t	|d dt	|d dfS )Nr!   r  r   r    F)r  )
r  r  r   r  rc  r  r   r  r>   r  r  r(   r(   r)   lstm_data_impl  s"    $
r
  c                 C   sr   |  dd}t||| dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nrx   r    r!   r   )r  r   r  r   r   r  r  r  r  r  r  Zchunked_igatesZchunked_hgatesZ
reset_gateZ
input_gateZnew_gater(   r(   r)   gru_cell  s    r  c                 C   s|   t | ||dd}t |||dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nrx   r    r   r!   r  r  r(   r(   r)   gru_cell_data&  s    r  c	                 C   sJ   t ||d}t| |d||||||dtt|td
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r  r>   r  )r  r  r  r  r  r  r  r  r  r   r  r(   r(   r)   gru_impl_data/  s    r  c	                 C   sH   t ||d}t| |d|||||||tttd
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r  r>   r  )r   r  r  r  r  r  r  r  r|  r   r  r(   r(   r)   gru_implM  s    
r  c                 C   s:   t |  ||}t|d}t|d}tjj| ||||S Nr   r    )r  r   r  r>   r  rt   _upsample_bilinear2d_aar   ra  align_cornersr  r  scale_hscale_wr(   r(   r)   upsample_bilinear2d_aa_veck  s    


r  c                 C   s:   t |  ||}t|d}t|d}tjj| ||||S r  )r  r   r  r>   r  rt   _upsample_bicubic2d_aar  r(   r(   r)   upsample_bicubic2d_aa_vecw  s    


r  c                 C   s4   t |  ||}|r|nd gt| }t| |||S r5   )r  r   r  _upsample_linear)r   ra  r  r  r  r  r(   r(   r)   _upsample_linear_vec  s    	r  )r   ra  r  r  rL   c                 C   s   t | |||gS r5   r  )r   ra  r  r  r(   r(   r)   upsample_linear1d  s    r  )r   ra  r  r  r  rL   c                 C   s   t | ||||gS r5   r  )r   ra  r  r  r  r(   r(   r)   upsample_bilinear2d  s    r  )r   ra  r  r  r  r  rL   c                 C   s   t | |||||gS r5   r  )r   ra  r  r  r  r  r(   r(   r)   upsample_trilinear3d  s    r  c                 C   sD   |r |dkr| d |d  S dS |d ur8|dkr8d| S | | S d S )Nr    r^   r   r(   )rJ  rK  r  re   r(   r(   r)   _compute_scale  s    r  c                 C   s    |r| | S | |d  d S d S Nr   r(   )re   Z	dst_indexr  r(   r(   r)   _compute_source_index  s    r!  )r#  weightsweights_precisionrL   c                 C   sB   t dd t| |D d|d >  }||? }t|ddtjS )Nc                 s   s*   | ]"\}}| tj| tj V  qd S r5   )r7   r>   r  )r0   r  r[  r(   r(   r)   rI    s   z%_sum_tensors_uint8.<locals>.<genexpr>r    r      )_sum_tensorsrc  r>   r   r7   r  )r#  r"  r#  r<  r(   r(   r)   _sum_tensors_uint8  s    
r&  )r"  rL   c                 C   sJ   t |  }d}t j||jd}d|d|d >   }|dk}||  S )N   r$  r   r    i   )r>   r  r}   r)  r%  r   )r"  Z
max_weightZmax_weight_precisionZ
precisionsvaluesr1  r(   r(   r)   _compute_weight_precision  s    r)  )r   ra  r  r  rL   c                    s  j d }j dd  }t|tjtjjd\}fddfddtt|||D }tt| \}g }	t	ddgg  D ]F d d g fd	dt
D  }
t|
}t|}|	| qtt
D ]N}|| |  d
dfddt|	d d d |	dd d D }	qt|	dksDJ |	d }t}jjdkrt|dk rttj}t|tjsJ |j|d} s| }|S )Nr    r!   r  c           	         s   t | | |}tj|jdjd}t|| jdd}|j|jd gdg| R  }|tj	}|d j| d d}|||fS )Nr$  r   r   ry   r   r    r|   )
r  r>   r)  r%  r7   r!  r   r   r   rB  )	inp_sizerK  r  ZnsqueezeZscale_factorr  Zx_f32r1   Zxp1)r  r   r   r(   r)   
get_values  s    z$_upsample_linear.<locals>.get_valuesc                    s,   g | ]$\}\}}} |||d  | qS r   r(   )r0   r  r*  rK  r  )r+  n_dimsr(   r)   r3     s   z$_upsample_linear.<locals>.<listcomp>r   c                    s(   g | ] } | d kr| n| qS r  r(   )r0   k)r  xp1sxsr(   r)   r3     rr   r   r^   c                    s$   g | ]\}}|t ||   qS r(   )r>   r   )r0   Zv1Zv2)xscaler(   r)   r3     s   r     r   )r   r  rA   rB   r  INT_TO_FLOATr  rc  r  r   rO   rt   r  r   r  reversedr   r7   r   r%  r  r>   r   r.   r   r>  r  round)r   ra  r  r  r  Z	inp_sizesrQ   r(  Zxs_f32vsr0  vr  r   r   r(   )	r  r  r   r+  r   r,  r.  r/  r0  r)   r    sF    


"



r  )r  r  rL   c                 C   s   | j |j kS r5   rW  )r  r  r(   r(   r)   is_same_size'  s    r7  c                 G   s   t | |S r5   )rt   r-  )r1   r   rC   r(   r(   r)   _reshape_alias,  s    r8  c                 C   s   t | |S r5   )rt   r2  )r1   r/  r(   r(   r)   r  2  s    r  c                 C   s   t | |||S r5   )rt   rw  )r1   r/  rm   ry  r(   r(   r)   r{  7  s    r{  c                 C   s   |D ]*}|d urt |jt jt jfv dd  qt |jt jkdd  ddlm} ||  dkrt j	
| |}| |j|S tt|D ]0}|| }|d ur|jd| |d d||< qt| || |S )Nc                   S   s   dS Nz3tensors used as indices must be long or int tensorsr(   r(   r(   r(   r)   rq   B  rr   z&_unsafe_masked_index.<locals>.<lambda>c                   S   s   dS Nz*tensors used as masks must be bool tensorsr(   r(   r(   r(   r)   rq   G  rr   r   r  r    r  )r>   rs   r   r  r!  r+  r  r  r   Z_meta_registrationsZmeta_index_Tensorr   r   rO   r  r   r   rt   r  r  )r1   r1  r/  fillr2  r  Zmeta_resultr  r(   r(   r)   r.  <  s&    
r.  c                 C   s   |D ]*}|d urt |jt jt jfv dd  qt |jt jkdd  |  dkr\|  S tt	|D ]8}|| }|d urh|j
| | | |d d||< qh|| d}tj| ||ddS )	Nc                   S   s   dS r9  r(   r(   r(   r(   r)   rq   ^  rr   z5_unsafe_masked_index_put_accumulate.<locals>.<lambda>c                   S   s   dS r:  r(   r(   r(   r(   r)   rq   c  rr   r   r    r  Trx  )r>   rs   r   r  r!  r+  r   r(  rO   r  r   r   r  rt   r{  )r1   r1  r/  r(  r2  r  Zmasked_valuer(   r(   r)   #_unsafe_masked_index_put_accumulateX  s$    
&r<  )rl   r   r   r   r   rL   c                 C   sV  |   }d}|dk rd}|d urX|dkrLdg| }|jd ||< ||}n|}| | } t||k|d}	|	|}
t| ||
| }t||k|d}|tj	j
kr|dkr| dd}||fS |d ur
|| j}t|||
|}t||k|d}| }n||k | }|tjj
kr4| }n|tjj
krN| | }||fS )Nr    r!   r   r(   r   )rK   r   r-  r>   r`   rP   gatherre  r   r%   rm   r   r'  r   r7   r'   r&   )rl   r   r   r   r   r,  r   r   wr   Zsafe_target_r   r   Zwsumr(   r(   r)   _nll_loss_forwardr  s@    




r?  r   c                 C   s   |   dkr|   dks J d|  dks4J d|   dkoJ|  dk}|s~| jd |jd ks~J d| j d|j d| jd	 }|d u s|  dkr| |ksJ d
| d|j t| ||||S )Nr   r!   r   r    r   r   r   r   rN   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rK   r   r   r?  )rl   r   r   r   r   r   Z	n_classesr(   r(   r)   nll_loss_forward  s    	 
"r@  c                 C   s   t | ||||S r5   )r?  )rl   r   r   r   r   r(   r(   r)   nll_loss2d_forward  s    	rA  )r1   ArL   c                 C   s    |d |  |d  |  |  d S )Nr!   rx   r    r(   r1   rB  r(   r(   r)   _upsample_cubic_convolution1  s    rD  c                 C   s(   ||  d|  |  d|  |  d|  S )Nrb     r   r(   rC  r(   r(   r)   _upsample_cubic_convolution2  s    rF  )r   rL   c           
      C   s   d}| j t dkrtj| d|  gdd}tj| d d|  gdd}t||}t||}tj|dd\}}tj|dd\}}	|||	|fS t| d |t| |td|  |td|  |fS d S )Ng      r  r^   r   rp   r   )r%  r>   r  rF  rD  r  )
r   rB  Ztt1Ztt2Zw03Zw12r  r  r  r  r(   r(   r)    _upsample_get_cubic_coefficients  s    

rG  )coeffstsrL   c                 C   s    t |}tdd t| |D S )Nc                 s   s   | ]\}}|| V  qd S r5   r(   r0   r  r  r(   r(   r)   rI    rr   z+_upsample_cubic_interp1d.<locals>.<genexpr>)rG  r%  rc  )rH  rI  Zcoeffs2r(   r(   r)   _upsample_cubic_interp1d  s    rK  )rI  rL   c                 C   s   t tj| S r5   )r   r>   add)rI  r(   r(   r)   r%    s    r%  )	num_stepsr  r   r%  c                 C   sB   | dkrt jd||dS |s(| d |  nd}t j| || ||dS )Nr    r   r&  )Zstepsr%  r   )r>   r  Zlinspace)rM  r  r   r%  r  r(   r(   r)   _linspace_from_neg_one  s    rN  )thetahr>  r  c           	      C   s   | j }| j}t||||d|d}t|||||dd}tjd||d}tjjj|dddd}tjjj|dddd}tjjj|d	ddd}|| | S )
Nr    )r    r    r    rA  )r   r!   constantr   rY  r  rm   )r    r    )r!   r   	r   r%  rN  r-  r>   r*  rT  r  rY  )	rO  rP  r>  r  r   r%  grid_xgrid_ygrid_oner(   r(   r)   _make_base_grid_4d  s    rW  )rO  rT  rP  r>  r  c                 C   s   | j }| j}t||||dd|d}t||||d|dd}t|||||ddd}	tjd||d}
tjjj|dddd}tjjj|dddd}tjjj|	d	ddd}	tjjj|
d
ddd}
|| |	 |
 S )Nr    )r    r    r    r    rA  )r   rx   rQ  r   rR  rp  r!   r    )rx   r   rS  )rO  rT  rP  r>  r  r   r%  rT  rU  Zgrid_zrV  r(   r(   r)   _make_base_grid_5d  s    rY  rO  r   r  c           	      C   sL   |\}}}}t | |||d}|ddd| jd d}||||dS )Nr  rN   rx   r    r   r!   )rW  r-  r   rP   r   )	rO  r   r  r	  rQ   rP  r>  	base_gridgridr(   r(   r)   _affine_grid_generator_4d$  s     r^  c           
      C   sR   |\}}}}}t | ||||d}|ddd| jd d}	|	||||dS )Nr[  rN   r   r    r   rx   )rY  r-  r   rP   r   )
rO  r   r  r	  rQ   rT  rP  r>  r\  r]  r(   r(   r)   _affine_grid_generator_5d.  s     r_  c                 C   sD   t t|dv dd  t|dkr2t| ||dS t| ||dS d S )Nre  c                   S   s   dS )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r(   r(   r(   r(   r)   rq   >  rr   z'affine_grid_generator.<locals>.<lambda>r   r[  )r>   rs   r  r^  r_  rZ  r(   r(   r)   affine_grid_generator8  s    
r`  )r  r]  interpolation_modepadding_moder  _expand_gridrL   c                    s(  t dv fdd t dv fdd tttdfddttttdd	d
tttdfddtttdfdd}j\ |j\}}|dksJ r|d| d}tttdfddt jjddddt j jdd ddtttt	d fddtttdfdd
|d }	|d }
dkrB||	}||
}|
 |
  d  }}d  }}|| }}|| ||  }|| ||  }|| ||  }| |  }t
fdd|f|||f|||f|||ffD S dkr|||	}||
}| }| }
||dS |	}|
}|
 |
 | | }sʈd|d}tttd
fd d!ttd"fd#d$	t	fd%dtd&D }t||S d S )'N)r   r    r!   c                      s
   d  S )NzInvalid interpolation mode r(   r(   )ra  r(   r)   rq   W  rr   z"_grid_sampler_2d.<locals>.<lambda>c                      s
   d  S )NzInvalid padding mode r(   r(   )rb  r(   r)   rq   Z  rr   )coordsr   rL   c                    s0    r|d d n|d }|d d }| | | S r   r(   )rd  r   r   ofsr[  r(   r)   unnormalize]  s    z%_grid_sampler_2d.<locals>.unnormalize)rd  	twice_low
twice_highrL   c                 S   sv   ||krt | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr!   r   r    r   )r>   r   r   fmodfloorr7   Zint8r`   )rd  rg  rh  Z
coords_minZcoords_spanZcoords2extraZflipsr(   r(   r)   reflect_coordinatesh  s    
z-_grid_sampler_2d.<locals>.reflect_coordinatesc                    sj   dkr| S dkr&t | d|d S  r@| dd|d  }n| dd| d }t |d|d S d S )Nr   r    r!   rN   r~   )rd  r   Zcoords_reflected)r  rb  rl  r(   r)   compute_coordinatest  s    z-_grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r5   r(   )rd  r   Z	coords_un)rm  rf  r(   r)   compute_source_index  s    
z._grid_sampler_2d.<locals>.compute_source_indexr!   r    )r/  ysrL   c                    s,   t d| kt | k t d|k| k S ri   r>   r,  )r/  ro  )iHiWr(   r)   in_bounds_cond  s    $z(_grid_sampler_2d.<locals>.in_bounds_condr$  )r/  ro  wsrL   c                    sN   | |rnd t  fdd| jtjd|jtjd|fD S )Nr    c                 3   s(   | ] }t |d  V  qdS rG  )r>   r`   r-  r  )r  r[  rO  oHoWr(   r)   rI    s   z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )rV  r7   r>   rB  )r/  ro  rt  )r  r  rc  rs  ru  rv  )r[  rO  r)   clip  s
    
z_grid_sampler_2d.<locals>.clip)ixiyrL   c                    s&   | ||\}}} ||f | S r5   r(   )rx  ry  r>  Zidx_xZidx_yZw_)C_idxN_idxr  rw  r(   r)   get_summand  s    z%_grid_sampler_2d.<locals>.get_summand).r   ).r    r   c                 3   s    | ]\}}} |||V  qd S r5   r(   )r0   rx  ry  r>  )r|  r(   r)   rI    s   z#_grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS rW   r(   )rx  ry  r1   rV   )rm  r|  rq  rr  r(   r)   get_value_bounded  s    

z+_grid_sampler_2d.<locals>.get_value_bounded)re  rL   c                    sF   | d  } d | | d | d |f}t |S )Nr    r!   )rK  )re  Ziy_ofscs)r}  ix_nwiy_nwtxr(   r)   	get_coeff  s    z#_grid_sampler_2d.<locals>.get_coeffc                 3   s   | ]} |V  qd S r5   r(   )r0   re  )r  r(   r)   rI    rr   r   )r>   rs   r   r!  r   r-  r'  r)  r%  r   rj  r%  r4  rP   rV  rO   rK  )r  r]  ra  rb  r  rc  rn  rQ   Ztwor1   rV   rx  ry  Zix_neZiy_neZix_swZiy_swZix_seZiy_seZw_nwZw_neZw_swZw_seZ
ix_nearestZ
iy_nearesttyrH  r(   )r  rz  r  r{  rc  r  r  rw  rm  r  r|  r}  rq  rr  rs  ra  r  r  ru  rv  rb  rl  r  rf  r)   _grid_sampler_2dF  sx    
 "



	







r  )r  r]  ra  rb  r  rL   c                 C   s   t | ||||dS )N)r]  ra  rb  r  )r  )r  r]  ra  rb  r  r(   r(   r)   grid_sampler_2d  s    
r  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr!   r    c                      s   d    d   S )Nzmatrix @ vector expected, got r  rp   r(   rl   r  r(   r)   rq     rr   zmv.<locals>.<lambda>r   c                      s*   d  d d  d d d dS )Nzsize mismatch, got input (r   r1   r    z), vec (r   ry  r(   r  r(   r)   rq   	  rr   rp   )r>   rs   rK   r   r   r  r(   r  r)   r    s    r  c                 C   sd   |d ur4|d | d }d| |  |t |   }nd| |  t |  }|d urZ|| }t||S rW   )r   Z
logsigmoidr   )rl   r   r   Z
pos_weightr   Z
log_weightr   r(   r(   r)    binary_cross_entropy_with_logits  s    r  )tensor1tensor2is_outrL   c           	         s   | j |j kr| |fn|| f\}}ddlm  |j dkr@|j dksDdS |jrR|sRdS | j dkr`dS  | dkrtdS |j}| }dg}t|dd  D ]}|||d   qt	 fd	d
t
|tt||D S )Nr   r  rx   r!   FTr    rN   c                 3   s(   | ] \}}} |d kp||kV  qdS rA  r(   )r0   r   r   r   r  r(   r)   rI  7  s   zshould_fold.<locals>.<genexpr>)r  r  r  r  r   r   r  r3  r  r   rc  r  )	r  r  r  t1t2Zt1_shapeZ	t1_strideZexpected_strider   r(   r  r)   should_fold  s(     

r  )Zpass_is_out)r  c                C   s  |   }|  }|dkr |dks$J |dkr@|dkr@t| |S |dkr\|dkr\t| |S |dkr|dkrttt| d|dS |dkr|dkrt| |S t| ||r||k}|r|jn| }|s|n|dkr| 	 n| }|j
}t|d d }	ttj|	}
|  dk}|r(|	|j
d  ||
|d }|rhtjj|||	}|rd|j S |S tjj|||	S n|dkrz|dkrz|dkr| dnd}| d}| j
d d }|dkr|dn|d}|dkr|dnd}g }t|d D ]}||| q|dkr|dkr|d |d kr|d dkrp| jrpt| d|S |d dkr|jrt| |dS tt||}|||g }t|}| ||||}|dk}|r ||g }||||d}n |||g }|||||}|}	|dkr8|	| |dkrL|	| |rh||d|	S |||	S ntddd	  d S )
Nr   r    r!   rN   r   rx   Fc                   S   s   dS )Nz/both arguments to matmul need to be at least 1Dr(   r(   r(   r(   r)   rq     rr   zmatmul.<locals>.<lambda>)rK   r>   dotr  re  r  rP   r  r   r   r   r  r   rY  r   r  r   r  rt   _unsafe_viewr>  r   rO   r  r   Zbroadcast_shapesrz  r'  bmmr-  rs   )r  r  r  Zdim_tensor1Zdim_tensor2r  r  r  Zsizes_1Zoutput_shapeZfolded_dim1Zt2_is_matrixZ	t1_foldedr<  r	  m1Zbatch_tensor1m2r   Zbatch_tensor2r  Zexpand_batch_portionZtensor1_expand_sizeZexpand_batch_productZtensor1_expandedZ
vector_rhsZtensor2_expand_sizeZtensor2_expandedr(   r(   r)   r   ?  s    	









r   )r   ra  r  r  r  rL   c                    s
  j \}}t|d ||}t|d ||}tjtjjd\}}tj|d jdj	|d}	tj|d jdj	|d}
t
||
|}t
||	|}|d}| }| }|| dd}|| dd}|	tj}|	tj}|d ||d |d	 f}|d ||d |d	 ft|t|}d
\jtjkrttt|fddD fdd|D }fddfdd t fdd|D }jtjkr؈d usJ t||}ntdd t||D }t}|j|d}|S )Nr   r    r  r$  r   rN   r   r^   r!   r  c                    s.   g | ]&}|d  >  t |d  t jqS r    r   r>   r   r7   int16r0   r>  )weights_precision_xr(   r)   r3     s   z.upsample_bicubic2d_default.<locals>.<listcomp>c                    s.   g | ]&}|d  >  t |d  t jqS r  r  r  )weights_precision_yr(   r)   r3     s   c                    s<   t | d d }t |dd }td d ||g}|S r  )r>   r   rt   r  )ro  r/  Zy_idxZx_idxr6  )in_hin_wr   r(   r)   load_bounded  s    z0upsample_bicubic2d_default.<locals>.load_boundedc                    sT   t  fddD }jtjkr<d us0J t|S tdd t|D S )Nc                 3   s   | ]} |V  qd S r5   r(   )r0   Zx_ofs)r  rV   r(   r)   rI    rr   zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>c                 s   s   | ]\}}|| V  qd S r5   r(   rJ  r(   r(   r)   rI    rr   )rV  r   r>   r  r&  r%  rc  )rV   Zsrc_x)r   ixs_ofsr  r  	weights_x)rV   r)   get_x_interp  s
    z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   s   | ]} |V  qd S r5   r(   )r0   Zy_ofs)r  r(   r)   rI    rr   z-upsample_bicubic2d_default.<locals>.<genexpr>c                 s   s   | ]\}}|| V  qd S r5   r(   rJ  r(   r(   r)   rI    rr   r   )r   r  rA   rB   r  r2  r>   r)  r%  r7   r!  rP   rj  r   rB  rG  r   r  r)  rV  r&  r%  rc  r   r>  )r   ra  r  r  r  rQ   Zh_scale_factorZw_scale_factorr   r  rW  Zx_floatZy_floatr1   rV   Zyscaler0  Ziys_ofsZ	weights_yZsrc_yr   r   r(   )	r  r  r  r   r  r  r  r  r  r)   upsample_bicubic2d_default  sR    




r  )r  ra  r  r  rL   c                 C   s   t t|t| dkdd  |d u rd|d us4J ttttf tdd t| jdd  |D }|rl|nd\}}t| ||||S )Nr    c                   S   s   dS )Nz:Must specify exactly one of output_size and scale_factors.r(   r(   r(   r(   r)   rq     rr   z(upsample_bicubic2d_vec.<locals>.<lambda>c                 s   s"   | ]\}}t t|| V  qd S r5   )r   r   )r0   r>  re   r(   r(   r)   rI  #  s   z)upsample_bicubic2d_vec.<locals>.<genexpr>r!   r  )	r>   rs   r+  r
   rV  r!  rc  r   r  )r  ra  r  r  r  r  r(   r(   r)   upsample_bicubic2d_vec  s    
r  )r  rF  rL   c                    s    fdd}t  ||S )Nc                    s4   t j|  ||  jd}|d |d |    S )Nr$  r    )r>   r)  r%  r   r   Zmiddler   Zdim_idxr  r(   r)   r0  2  s    z_reflection_pad.<locals>.idx_reflection_or_replication_padr  rF  r0  r(   r  r)   _reflection_pad,  s    r  c                    s    fdd}t  ||S )Nc                    s*   t j|  ||  jd}t |d|d S )Nr$  r   r    )r>   r)  r%  r   r  r  r(   r)   r0  C  s    z_replication_pad.<locals>.idxr  r  r(   r  r)   _replication_pad=  s    r  )r  rF  idx_fnrL   c                    s   t d  t|   d  d fv  fdd | j  d  }|    } fddt D } fddt D }| }t D ]>}d g|  }	||| || || |	|| < t||	}qt	|}
|j
|
d}|S )	Nr!   r    c                      s    d  d d  d d  dS )NZreflection_padzd requires r    zD or r!   zD inputr(   r(   rp   r(   r)   rq   V  rr   z0_reflection_or_replication_pad.<locals>.<lambda>c                    s    g | ]}d  d |   qS rX  r(   r  rK   rF  r(   r)   r3   [  rr   z2_reflection_or_replication_pad.<locals>.<listcomp>c                    s$   g | ]}d  d |  d  qS rX  r(   r  r  r(   r)   r3   \  rr   r   )r  r>   rs   rK   r   rO   rt   r  rA   r   r>  )r  rF  r  Z	inp_shapeZnc_dimpadding_leftpadding_rightr   r  r0  r   r(   r  r)   r  N  s"    
 
r  c                    sh  t d dd |j d  D fddtD fddtD g }t|jD ]:}dg|j }d||< |tj|j| |jd| qd|d    | d  
d	d
 
fddtD 
fddtD }
fddtD }fddtD 	t	
tj	fddtD }t|  d}	 fdd}
tjdd tD  D ]}|tdg krqg }g }tD ]}|| dkrڈ| }	| }nb|| dkr|| }
| d| f}n8|| dkr<|| }
| | |  | d f}|| || q|
|	||}	q|	S )Nr!   c                 S   s   g | ]}|d  qS r   r(   )r0   rP  r(   r(   r)   r3   q  rr   z,_reflection_pad_backward.<locals>.<listcomp>c                    s    g | ]}d  d |   qS rX  r(   r  r  r(   r)   r3   s  rr   c                    s$   g | ]}d  d |  d  qS rX  r(   r  r  r(   r)   r3   t  rr   r    rN   r$  c                 S   s   | \}}}t ||k||kS r5   rp  )index_ranger  ZlbZubr(   r(   r)   index_range_condition  s    
z7_reflection_pad_backward.<locals>.index_range_conditionc                    s   g | ]}|  |  qS r(   r(   r  r  xyzr(   r)   r3     rr   c                    s   g | ]} | |  qS r(   r(   r  r  r(   r)   r3     rr   c                    s(   g | ] }d  |  |  |  qS rv  r(   r  )dhwr  r  r(   r)   r3     rr   c                    s.   g | ]&} | d | |  |  fqS r  r(   r  )centerr  r  r  r(   r)   r3     s   c                    s   g | ]} | qS r(   r(   r  )r  range_cr(   r)   r3     rr   r   c                    st   t D ]2}|| d || d k }t|tr|r|   S qttjfdd|D }t| | d}| | S )Nr!   r    c                    s   g | ]} |qS r(   r(   )r0   r  )r  r(   r)   r3     rr   z@_reflection_pad_backward.<locals>.accumulate.<locals>.<listcomp>r   )rO   r.   r+  rH   r   rt   r,  r.  )r   r   index_rangesr  Zupper_less_than_lowerrO  g)r  rK   rc   r  r(   r)   ry    s    
z,_reflection_pad_backward.<locals>.accumulatec                 S   s   g | ]}g d qS ))rN   r   r    r(   r   r(   r(   r)   r3     rr   r   )r  r   rO   r  r  r>   r)  r%  r-  rH   r   rt   r,  r.  	itertoolsr   rV  )rc   r1   rF  r/  r  Z
view_shapeZleft_reflectZright_reflectrO  r   ry  areaZoutsr  r   r  r(   )r  r  r  rK   rc   r  rF  r  r  r  r  r)   _reflection_pad_backwardj  sT    $
"
r  rz   r}   r   c                C   s(   t j| ||d}t j| ||d}||fS )Nr   )r>   aminr  )rl   rK   r   r  r  r(   r(   r)   aminmax  s    r  c                C   s"   t jtt| d| |||dS )Nr   r   )rt   r   r>   r`   isnan)rl   rK   r   r   r(   r(   r)   nansum  s    r  r   r  r%  r)  r  r   r  r%  r)  c             	   C   s   t jjd| d||||dS )Nr   r    r  rt   r)  Z
start_stepr  r(   r(   r)   arange_default  s    
r  r  r  r   r  r%  r)  c             	   C   s   t jj| |d||||dS )Nr    r  r  r  r(   r(   r)   arange_start  s    
r  c                  O   s   ddl m} || i |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyper  )rC   rD   r  r(   r(   r)   out_dtype_decomp  s    r  )r   r   r   marginr   r   rL   c           	         s  t t jd jd  t |dkp:|dkdd  t jdkoX dkfdd t jdko~ kfdd d urt t jdko  k fdd dt jdd	}||  }|	d}|dkr|n|| }d ur$|  }t j
 jd
}t |k|d}|tjjkr\| S |tjjkr|| |jd  S |jddS d S )Nr   r    r!   c                   S   s   dS )Nz only p == 1 and p == 2 supportedr(   r(   r(   r(   r)   rq     rr   z#multi_margin_loss.<locals>.<lambda>c                      s   d j  S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: rW  r(   rf  r(   r)   rq   
  rr   c                      s   d  dj  S )Nz#inconsistent target size, expected rr  rW  r(   )nframer   r(   r)   rq     rr   c                      s   d  dj  S )Nz#inconsistent weight size, expected rr  rW  r(   )rK   r   r(   r)   rq     rr   rz  r$  rp   )r>   
atleast_2dZ
atleast_1dr   rs   r  r   rP   r=  r  r)  r%  r`   r   r&   rm   r   r'   r   )	r   r   r   r  r   r   ura   r0  r(   )rK   r   r  r   r   r)   multi_margin_loss  sB    








r  	is_targetc                    s  | j  |j t| } t|}| j d }tt dko@|dk fdd ttdkod k fdd tj||jd}|dk}tjt|||dd	d
}||k }t||d}tj	| d|d}	t||d}
tj
||
jddkdd}d|	jjdd |  }|d}|| }t|d|}|tjjkrJ|jdd }n$|tjjkrb| }n|jdd}|| j}||fS )Nr    r!   r   c                      s
   d  S r  r(   r(   )orig_input_shaper(   r)   rq   6  rr   z0multilabel_margin_loss_forward.<locals>.<lambda>c                      s   d d  S )Nzinconsistent target size: z for input of size: r(   r(   r  Zorig_target_shaper(   r)   rq   :  rr   r$  rN   Tr   rz  rp   r^   )r   rN   )r   r>   r  rs   r  r)  r%  r  r`   r=  anyrP   Tr  r   r&   rm   r   r   r'   r7   r   r   )r   r   r   rK   r0  Zis_endZend_idxZtarget_maskZtidx0r  Ztidx1r  ra   r(   r  r)   multilabel_margin_loss_forward'  s@    





r  )	attn_maskre   )querykeyrm   	dropout_p	is_causalr  re   rL   c          	   
      s   t t fdd t  dko@ dko@ dkfdd t  dk fdd t jd jd kojd jd kdd  tjj| |d |d	\}}|d
dddj	t j
ddd
dd}||fS )Nc                      s   d j  S )Nz-query must be FP32, FP64, BF16, FP16 but got r   r(   )r  r(   r)   rq   s  rr   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>r   c                      s"   d   d    d   S )Nz,q, k, v must be a 4 dimensional tensor, got r  rp   r(   )r  r  rm   r(   r)   rq   w  rr   r   c                      s
   d  S )Nz&dropout probability must be zero, got r(   r(   )r  r(   r)   rq   z  rr   rx   c                   S   s   dS )Nz&q, k, v should have the same head sizer(   r(   r(   r(   r)   rq   ~  rr   )r  r  r  Zdropout_maskre   r!   r   r    r   )r>   rs   r  rK   r   rt   Z"_scaled_dot_product_attention_mathr  rd  r>  r   )	r  r  rm   r  r  r  re   r<  Zattnr(   )r  r  r  rm   r)   *scaled_dot_product_flash_attention_for_cpuf  s@    
"&
"r  c                    s   t |  fdd}|S )Nc                     s    | i |}| d  |S ri   )r  )rC   rD   r   outplace_opr(   r)   
inplace_op  s    z$register_inplace.<locals>.inplace_opr   )Zaten_opr  r  r(   r  r)   register_inplace  s    r  c                 C   sx   |   s |  s t|}t|}t||}t|tjr@|dkrH|| }|dkrT|S t|tjrh|dkrp| | } | | S )Nr    r   )r  r  r!  r>   r  r.   numbersNumber)rl   Zbatch1Zbatch2r[   rd   r   r(   r(   r)   baddbmm  s    r  c                 C   s   t j| |ddS )Nrj  rC  rD  )rl   r   r(   r(   r)   floor_divide  s    r  c                 C   s   t tj| jdS rW   )rH   r   rY  r   r   )r   r(   r(   r)   	sym_numel  s    r  r   r   )rl   r   r   rL   c                C   s2   |d u rt jj| g |dS t jj| g ||dS d S )Nr   r  )rt   r   Zdim_IntListZIntList_out)rl   r   r   r(   r(   r)   sum_default  s    r  rl   rK   c                 C   sF   t | tjs| S |d u r2tj| tt|  S tj| |gS d S r5   )	r.   r>   r   rt   re  dimsr  rO   rK   r  r(   r(   r)   squeeze_default  s
    r  c                    s`   t  fddtt| jD }|jtjkr2tjnd }| jd|d|d}| ||	|j  |fS )Nc                 3   s   | ]}| kr|V  qd S r5   r(   r  rp   r(   r)   rI    rr   z)_weight_norm_interface.<locals>.<genexpr>r!   T)r   r   )
rV  rO   r  r   r   r>   r  r   r   r7   )r6  r  rK   Zkeep_dimZ
norm_dtyper   r(   rp   r)   _weight_norm_interface  s     r  assume_uniqueinvertc                C   s   t | tjstj| |jd} t |tjsD|r8t| |S t| |S | dt|  d k rlt	| ||dS t
| |||dS d S )Nr$  g      $@g(\?r  r  )r.   r>   r   r  r%  ner   r   r   isin_defaultisin_sorting)elementstest_elementsr  r  r(   r(   r)   isin  s    r  )r'  )rl   r'  rL   c                C   sP   |d u r"t j|  t j| jd}nt j|  |t j| jd}|| k | j}|S )NrA  )r'  r   r%  )r>   Zrandr   r   r%  r7   r   )rl   r'  Zraw_pr   r(   r(   r)   	bernoulli  s    r  r  c                C   sl   |   dkrtj| tjdS | jd|j  }| |}ttd|j d d}||kj	|d}|rh| S |S )Nr   r   r   rN   r    rp   )
r   r>   
empty_liker+  r   r  r-  rV  rO   r  )r  r  r  Zexpanded_elem_shaper1   rK   r  r(   r(   r)   r    s    
r  c                C   s   |   }|  }|rt||g}tj|dd\}}|dd  |d d k}	t|	ddgd}	|rj|	 }	t|	}
|
d||	}
|
d|   S t|\}}t	||}t
|| k |d}|| |k}|r| n|}|| jS d S )NT)Zstabler    rN   r   F)r  r>   r   sortr  Zlogical_notr  r  r   Zsearchsortedr`   r   r   )r  r  r  r  Zelements_flatZtest_elements_flatZall_elementsZsorted_elementsZsorted_orderZduplicate_maskr1  Zsorted_test_elementsrQ   r0  Ztest_idxcmpr(   r(   r)   r  #  s$    
r  c                 C   s   |  d}|| S rM   )r   )rl   r2  Z	flattenedr(   r(   r)   take@  s    
r  c                 C   s2   |d u rt j}|t jkr t|}tj| |j|dS r~  )r>   r   Zpreserve_formatr   rt   resizer   )rl   r   r   r(   r(   r)   	resize_asG  s
    
r  )FF)r   )N)r!   )r   NNr    )r   NNr    )N)rN   FF)N)r   N)r   )r   )r   )r   )r    r    )r    r    F)r    r    )r   )N)Fr   )r   r^   N)r   r    N)F)N)N)NN)NN)NNN)NNN)F)F)F)F)F)F)N)NN)NNN)N)F)r   r   FT)r   r   F)NN)N)NF)r   F)r    r    )N)r   )N(  rH   r  r  rY  r  collections.abcr   enumr   r   r   r   r   typingr   r	   r
   r   r   r>   Ztorch._meta_registrationsZtorch._primsr.  r  Ztorch._prims_commonZ_prims_commonrA   Ztorch.nn.functionalrT  r  r   r   r   r   Ztorch._decompr   r  r   r   r   r   r   r   Ztorch._prims_common.wrappersr   r   r   r   Ztorch.utilsr   r@   Ztorch.utils._pytreer   r  ZDispatchKeyr   r  str__annotations__Z_opsr  rt   r   r  r+  rJ   r  Zcompute_only_pw_cast_for_opmathZpw_cast_for_opmathZ"pw_cast_for_opmath_non_tensor_argsr2  Zpw_cast_for_int_to_realr!  rR   rY   rZ   rb   r   rj   r;  ZScalarrn   rv   r   r   r   r   r   r   r   r   r   r   r   r   r   rV  r   r   r   r   r   r   r&   rm   r   Z_safe_softmaxr   r   r   r  rS   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r
  slicer  r"  r	  r4  r8  r;  r?  r@  rC  ro  r}  r  r  r  r  Zpy_implZCompositeImplicitAutogradZAutogradr  r  r  r  r  rz  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zunsafe_chunkr  r  r  Zno_statsr  r  r  r$  r%  r&  Z_fused_dropoutr(  r/  r%  r   r  ZliftZ
lift_freshr1  r4  r6  r8  r7  r9  r>  r@  Z_adaptive_avg_pool2drX  rZ  rc  rh  rn  ro  rm  r  r  r  r  r  r  	Generatorr  r  r  r  r  r  r  r  Z_upsample_nearest_exact1dr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zrnn_tanhr   r  Zrnn_relur  r  r  r  r  r  r  r  Zlstmr	  r
  r  r  Zgrur  r  r  r  r  r  r  r  r  r  r  r!  r&  r)  r  r7  r8  r  r  r{  r.  r<  r?  r@  rA  rD  rF  rG  rK  r%  rN  rW  rY  r^  r_  r`  r  r  r  r  r  r   Zupsample_bicubic2dr  r  Zreflection_pad1dZreflection_pad2dZreflection_pad3dr  Zreplication_pad1dZreplication_pad2dZreplication_pad3dr  r  Zreflection_pad1d_backwardZreflection_pad2d_backwardZreflection_pad3d_backwardr  r  r  r)  r+  r  r  r  r  r  r  r  Z+_scaled_dot_product_flash_attention_for_cpur  r  r  r  r  r   r  re  rK   r  r  r  r  r  r  r  r  Zaddbmm_ZaddbmmZaddmm_Zaddmv_Zbaddbmm_Zfill_Zgelu_r  Z
hardswish_Z	hardtanh_ZhardtanhZhardsigmoid___iand____and____ilshift__
__lshift__rv  rw  Zindex_reduce_Zindex_reduce__ior____or____irshift__
__rshift____ixor____xor__Zleaky_relu_Z
leaky_reluZlogit_ZlogitZrelu_r  Zrenorm_ZrenormZround_r4  Zscatter_r   Zscatter_add_Zscatter_addZscatter_reduce_Zscatter_reduceZsilu_r(   r(   r(   r)   <module>   s
  

 *



	





 *!	
   :
   '

	P`

  
% 


"



"
     
W

R
R#







,

h
%$g-7(

"
  




$




$  	 	  
  
.2)


 
 @2

					 
  
	
I
5







$    '
  


  
w 
S


,


,



W


,

< 
C	

"	



