o
    Zh}                 -   @   sl3  d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZ d dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZ d d	lmZ d d
lmZmZ d dlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( d dl)m*Z*m+Z+m,Z,m-Z-m.Z. d dl/m0Z0m1Z1 d dl2m3Z4 d dl5m6Z7 e
dZ8edZ9ej:j;Z;ej<=dddZ>e?d\Z@ZAZBdeee9e8f gee9e8f f fddZCde#fddZDdd ZEdd ZFeCe;jGe;jHge. dddejId d fd!d"ZJeCe;jKjLe;jKjMge. d#d$ ZNeCe;jOjLe;jOjMge. d%d&d'd(ZOeCe;jPe. d)d* ZPeCe;jQjLe;jQjMe;jRjLe;jRjMge.d+d,d-d. ZSeCe;jTjLe;jTjMge. d/d0 ZTd1d2 ZUdBd3ed4eVeW d5eXfd6d7ZYeCe;jZjLe;jZjMge. d8d9 Z[dZ\d4eVeW fd:d;Z]eCe;j^jLe;j^jMge. d<d= Z_eCe;j`jadd>d?d@ZbeCe;j`jLejcddddAdBdCZdeCe;jejLe;jejMge. ejcddddAdDdEZfeCe;jejge;jejhge. ejcddddAdFdGZieCe;jjjLe;jjjMge. dddddAdHdIZkeCe;jljLe;jljMge. d3ed4eVeW dJeWdKeWfdLdMZmeCe;jnjLdBdNdOZodPdQ ZpeCe;jqjLdRdS ZreCe;js			dCdTedUedVedWe	e dXe	et dYe	eju fdZd[ZveCe;jw	dDd\ed]ed^edYe	eju fd_d`ZxeCe;jydadaddbdTed\ed]ed^edYe	eju f
dcddZzeCe;j{				 	 	a	 dEdeejdfejdWe	e dge	e dYe	eju dheXdieWdjeWdkeXfdldmZ|eCe;j}jLdndod3ed4eWdpedqejdretdseXdefdtduZ~eCe;jjLdndod3ed4eWdpedqejdretdseXdefdvdwZe. eCe;jjLdxdy ZeCe;jjLdddd d ddzd{edretd|e	e d,e	e d}e	e d~eWdeXdefddZeCe;jjLe;jjge. dd ZeCe;jjdBddZeCe;jjLe;jjge. dd ZeCe;jjdBddZeCe;jjLdd ZeCe;jjMdd ZeCe;jjLdd ZeCe;jjdd ZeCe;jjLdd ZeCe;jjLddddddddZeCe;jjLdFddZeCe;jjLdCddZeCe;jjLdFddZeCe;jjLdd ZeCe;jjdd Zd3edetfddZd3ededetfddZ	ndGdedetdeXfddZdHdedetdetfddZdededeXdetfddZ	dIdetdedTedetfddZdetfddZeCe;jjLe;jjge.dddJdedetdeXfddZeCe;jjLe;jjMge. dTedefddZeCe;jge.dddTefddĄZdedefddǄZeCe;je. d3ededeXdefddʄZeCe;je. dBd3ededeXdefdd̄ZeCe;je. dBd3edeXdefdd΄ZeCe;je. dBd3edeXdefddЄZeCe;jjLdKdedeXdeXfddӄZeCe;jjLe;jjMge. dTededefddքZeCe;jjLdBdedeXfdd؄ZeCe;jjLe;jjMge.dddۃd d dܜd3edeXdeXdeeeef fdd߄ZeCe;jjLe;jjMge. d ddedededeXdef
ddZeCe;jjLe;jjMge.ddddnddedeXdeeeef fddZeCe;jjLe;jjMge.dddۃdnd ddedeXdeXdeeeef fddZeCe;jjLe;jjMge. dnd ddedededeXdeXdefddZeCe;jÃe.ddd	n	ndLdededeXdeXdeeeef f
ddZdetdeeXeXf fddZeCe;jjLe;jjMge.dddMdedetdeeef fddZeCe;jjLe;jjge.ddddڃdedeeeeef fdd ZeCe;jjL	 	n	dNdedeXdeXde	et fddZːdededeeVeW eVeW f fdd	Z̐dedede	et deeef fd
dZdTededeXfddZeCe;jσdnd ddddddededeXdeXde	e de	e de	e de	e deeeeef fddZeCe;jjLe;jjMgdnd dddededeXdeXdeXde	e defddZeCe;j҃e.dddnd	n	 	 dOd3ededeXdeXdeXdeeef fddZeCe;jjLdd ZeCe;jփe. 	n	 dPdTedededeXdeXdefdd Z֐d!d" Zאd#d$ ZeCe;jكe. d%d& ZeCe;jۃe. d'd( Zܐd)d* ZeCe;jރe.d+d,d- ZeCe;je.d+d.d/ Zd0d1 ZeCe;je. d2d3 ZeCe;je. d4d5 ZeCe;jjLe;jje;jjLe;jjge.d+d6d7 Zd8d9 ZeCe;je. d:d; ZeCe;je. d<d= ZeCe;jjLe;jje;jjLe;jjge.d+d>d? ZeCe;je. dQd3edAedefdBdCZeCe;je. dDed3edAedEedef
dFdGZeCe;jjLe;jjMge. dadadHdIdJZeCe;jjLe;jjMge. dd>dKdLZeCe;jjdRdNdOZeCe;jjdRdPdQZeCe;j jLe;j jMge. dDdRdSZeCe;jjL	 	 dKdTdUZeCe;je. dVdW ZdXdY ZdSd[d\Z	dDd]ejdUejd^eeVeW eWf d_eeVeW eWf d`eeVeW eWf daeXdbeWdce	eeVeW eWf  fdddeZdfdg Z	eCe;j
jLd]ejdUejdWe	ej dhe	ej die	ej djeXdkedlefdmdnZeCe;jjLd]ejdUejdWejd^eVeW d_eVeW d`eVeW daeXdceVeW dbeWfdodpZejj
rjej<=dqddZeCej:jjjLdrds ZeCej:jjjLdtdu Zejj	rej<=dvddZeCej:jjdwdx Zej<=dyddZeCej:jjjLdzd{ ZeCej:jjjd|d} Z eCej:jj!jLeCej:jj!j"d~d Z#eCej:jj!jeCej:jj!j$dd Z%eCej:jj&jLeCej:jj'jLdd Z(ej<=dddZ)eCej:j*j+				 dTddZ,eCej:j*j-dd Z.dd Z/eCe;j0jL			 	n	dUddZ1dd Z2eCe;j3jLdd Z4eCe;j5e. 			 	n	dUddZ6eCe;j7e.d+dd Z8eCe;j9jLdd Z:eCe;j;jLdd Z<eCe;j=jLdd Z>eCe;j?e.d+dd Z@dedetfddZAeCe;jBe.dd,dd ZCeCe;jDe.d+dd ZEeCe;jFe.dd,dd ZGeCe;jHe.d+dd ZIeCe;jJjdDddZKeCe;jLjLe;jLjMge. dd ZMeCe;jNjLe;jNjMge. d%ddeWfddZNeCej:j;jOjLej:j;jOjMge. dd ZOeCe;jPje;jQjgdd ZReCe;jSjLgdd ZTeCe;jUjLe;jUjMge. dadadHddZVeCe;jWjLe;jXjLgdddddZYeCe;jZjLgdddddZ[eCe;j\ge. dd Z]eCe;j^gddÄ Z_eCe;j`gdĐdń ZaeCe;jbgdƐdǄ ZceCe;jdgdȐdɄ ZedeWdeWdeWfd̐d̈́Zfdΐdτ ZgeCe;jhgdWe	e fdАdфZieCe;jjgdҐdӄ ZkeCe;jlgdԐdՄ ZmeCe;jnjLd֐dׄ ZoeCe;jpe. dؐdل ZqeCe;jrjL	 	 	 		 	%dVdڐdۄZseCe;jtjLdܐd݄ ZudGdސd߄ZveCe;jwjLe;jwjMge. dWddddZxeCe;jyjLe;jzjLgdd Z{eCe;jyje;jyj|e;jzje;jzj|e;j}jLe;j}j~ge.d+d,dXddZeCe;jjLdd ZeCe;jjLdd ZeCe;jjLdd ZeCe;jje;jje;jje;jje;jjLe;jjLe;jjLgdd ZeCe;jje;jje;jje;jjgdddZeCe;jjLe;jjgdd Zdd ZeCe;jje;jjgdd ZeCe;jje;jjgdd ZeCe;jjLdd ZeCe;jje;jjgdd ZeCe;jje;jjgdd ZeCe;jjLdd  ZeCe;jje. ddefddZeCe;jge. 	dYddZeCe;jg	dYddZeCe;jg	dYd	d
ZeCe;jjLe;jjLgdBddZeCe;jjdd ZeCe;jjLdd ZeCe;jdd ZeCe;je. dd ZeCe;jdd ZeCe;jjLdBddZeCe;jjLdd ZdDddZeCe;jjLdd Zdd  Zd!d" Zd#d$ Zd%d& Z	 dBdTed'eWd(eWd)eWd*eWd+eWd,eWd-eWd.eWd/eWd0eWd1eWd2eWd3eWd4eWd5eWd6eWd7eWd8eWd9eWdetd:eXf,d;d<Zd=d> ZdTeded'eWd(eWd)eWd*eWd+eWd,eWd-eWd.eWd/eWd0eWd4eWd5eWd6eWd7eWd8eWd9eWdetf&d?d@ZdAdB ZeCe;jjLdCdD ZeCe;jjL				 dTdEdFZeCe;jjLdGdH ZeCe;jʃe.dd,				 dTdIdJZeCe;j̃e.d+dKdL ZdTedMefdNdOZG dPdQ dQeZdTedMedReWfdSdTZeCe;jjLdUdV ZeCe;jӃe. dWdX ZeCe;jԃe.d+dYdZd[ ZeCe;jjLgd\d] ZeCe;jjL					dZd^d_ZeCe;jjWd`da ZeCe;jjLdbdc ZeCe;jjLd[dddeZܐdGd4eWdfeWdgeXfdhdiZݐdjdk Zސdldm ZeCe;jjLdBdndoZdBdpdqZdDdrdsZdtdu ZdDdvdwZd\dxdyZeCe;jjLdzd{ ZeCe;jd|d} ZeCe;jje;jje;jje;jjge. dDd~dZeCe;jje;jje;jje;jjgdDddZeCe;jg		 	 	d]dededededeXdeXde	e fddZeCe;jg		 	 	d]dededede	e deXdedeXdeXde	e fddZeCe;jg			 	 	d^dededede	e dedeXdeXde	e fddZeCe;jg	dDdededededededededeWdeWdedeXdedede	e fddZeCe;jg		 		d_dededededeXde	e de	e fddZeCe;jg		dFdedededededededeXde	e de	e fddZeCe;jg		 	d`dededede	e deXdeXde	e fddZ eCe;jg	 	dadedededede	e dedededededeVeX deXde	e fddZeCe;jg	dDdedededededededededededeWdeWdedeXde	e f ddZeCe;jg					dZdededede	e de	e deWdeWdedeXdeXde	e de	eW de	eW de	e de	e fddZeCe;jg			dCdededededededededeWdeWdedeXdedede	e de	eW de	eW f"ddZeCe;j	g	 				dbdedededWe	e de	e de	e de	eW de	eW dedeWdeXde	e de	e de	e de	eW fddZ
eCe;jg			 d\dededededWe	e de	e de	e dejdejdededededeWdeXde	e de	eW deXf$ddZeCe;jjLg				 dcd3ejd^ejdejdejdWe	ej de	ej dYe	eju deXfdÐdĄZeCe;jje;jjge. dGdŐdƄZeCe;jjdGdǐdȄZeCe;jjLe;jjMge. dBdd>dɐdʄZdːd̄ Zd͐d΄ ZeCe;jjLe;jjLgdDdϐdЄZeCe;jjLe;jjLgdFdѐd҄ZeCe;jjLe;jjLg		dFdedeeeWejf  deeeWejf  de	e de	e f
dאd؄ZeCe;j jLe;j!jLgdCdِdڄZ eCe;j"jLe;j"j#e;j"j~e;j"j$gdddېd܄Z%dݐdބ Z&eCe;j'jL		dFdߐdZ(eCe;j)jLdd Z)eCe;j*jLdd Z*dd Z+dd Z,eCe;j-jLe;j.jLgdWddZ/eCe;j0jLdeddZ0eCe;j1jLdfddZ2eCe;j3e. 	dgddZ4eCe;j5jLe;j5j~ge.d+d,dXddZ6ej7Z8dd Z9eCe;j:jLdd Z:eCe;j;jLdd Z;eCe;j<jLdd Z=eCe;j>jLdd Z>eCe;j?je;j?j@ge. d d dddZAeCe;jBge. dhddZCeCe;jDjLe;jEjLg		dFddZFeCe;jGjLg		dFddZHeCe;jIjLdd ZIeCe;jJjLe;jJjMge. dCd	d
ZJeCej:j;jKdd ZKeCej:j;jLdd ZLeCe;jMe. d d dddddZNdd ZOeCe;jP	%diddZQeCe;jR	%diddZSeCe;jT	%diddZUeCe;jVe. d d dddZWeCe;jXe. deWd3edefddZYeCe;jZd3efd d!Z[eCe;j\e.dndd3edefd"d#Z\eCe;j]e. d3edefd$d%Z]eCe;j^e. d&ed4eWd'eXdefd(d)Z_eCe;j`e. 	%	 	 djdUed,ed*eWd+eXd,eXdefd-d.Z`eCe;jajL	dkd+ed}eVe d/eVeW d0efd1d2Zbd3d4 Zcd5d6 Zdece;je ece;jf ece;jg ece;jh ece;ji ece;jj ece;jk ece;jl ece;jm ede;jn ede;jo ede;jp ede;jq ede;jr ede;js ede;jt ede;ju ede;jv ede;jw ede;jx ede;jy d7d8 ZzeCe;j{e. d9d: Z{eCe;j|e. dad;d<d=Z|eCe;j}e. dad;d>d?Z}eze;j{Z~eze;j|Zeze;j}Zd dl/Zd dlZd dlZd@dA Ze  dS (l      N)Sequence)Enum)wraps)CallableOptionalTypeVarUnion)	ParamSpec)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND)
BoolLikecorresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KIND	FloatLikeIntLikemake_contiguous_strides_forNumber
TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_config)_pytree_T_PatenZIMPLMeta   returnc                    s    fdd}|S )Nc                    s$   t    fdd}t|  S )Nc                    s   t t|   d S N)r   r   opfn H/var/www/auris/lib/python3.10/site-packages/torch/_meta_registrations.pyregister9   s   z0register_meta.<locals>.wrapper.<locals>.register)r   pytreeZ	tree_map_)r1   r4   r.   r0   r3   wrapper6   s   zregister_meta.<locals>.wrapperr2   )r/   r6   r2   r.   r3   register_meta5   s   	r7   type_promotionc                    s>   t j|d| i\}  fdd|D }t| }t|dtjiS )Ntype_promotion_kindc                    s   g | ]}t | qS r2   )r   .0xresult_dtyper2   r3   
<listcomp>K       z$elementwise_meta.<locals>.<listcomp>r8   )utilsr   r$   r   r   DEFAULT)r8   args_r2   r=   r3   elementwise_metaB   s   
rE   c                 C   s(   t jt jt jt jt jt ji}|| | S r-   )torchZ	complex32halfcfloatfloatcdoubledoubleget)dtypeZfrom_complexr2   r2   r3   toRealValueTypeV   s
   rN   c                    s2   t tg|R   t k fdd d S )Nc                         d d  S )Nzoutput with shape z# doesn't match the broadcast shape r2   r2   Zbroadcasted_shape
self_shaper2   r3   <lambda>c       z)check_inplace_broadcast.<locals>.<lambda>)tupler#   rF   _check)rQ   Z
args_shaper2   rP   r3   check_inplace_broadcast_   s
   rV   Fc	           	         s  t tjrt dkdd  t tjr$t dkdd  tdd fD rMtt  d u r> ntt	 fdd npRt t tj
s[J tt tfdd t tsqJ tdkd	d  tjf|d
||dS )Nr   c                   S      dS Nz:linspace only supports 0-dimensional start and end tensorsr2   r2   r2   r2   r3   rR   w       z(meta_linspace_logspace.<locals>.<lambda>c                   S   rW   rX   r2   r2   r2   r2   r3   rR   |   rY   c                 s   s    | ]}t |tV  qd S r-   )
isinstancecomplex)r;   argr2   r2   r3   	<genexpr>   s    z)meta_linspace_logspace.<locals>.<genexpr>c                         d  d S )Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r2   r2   )default_complex_dtyperM   r2   r3   rR      rS   c                      s*   dt j dt  j dt j dS )Nz4received an invalid combination of arguments - got (, ))type__name__r2   )endstartstepsr2   r3   rR      s    c                   S   rW   )Nz$number of steps must be non-negativer2   r2   r2   r2   r3   rR      rY   meta)rM   layoutdevice
pin_memoryrequires_grad)rZ   rF   r   rU   dimanyrA   r   Zget_default_dtypeis_complex_dtyperM   _check_typer   empty)	re   rd   rf   baserM   ri   rh   rj   rk   r2   )r_   rM   rd   re   rf   r3   meta_linspace_logspaceg   sH   

rr   c                    sN   t  jt jk fdd t |  dko  dk dd  |  jS )Nc                         d j  S )Nz2take(): Expected a long tensor for index, but got rM   r2   indexr2   r3   rR          zmeta_take.<locals>.<lambda>r   c                   S   rW   )Nz*take(): tried to take from an empty tensorr2   r2   r2   r2   r3   rR      rY   )rF   rU   rM   long_check_indexnumel	new_emptyshape)selfrv   r2   ru   r3   	meta_take   s   

r~   rl   c                   sh   j }j }t||kdd  t dko dk fdd tjj}|S )Nc                   S   rW   )Nz=linalg.cross: inputs must have the same number of dimensions.r2   r2   r2   r2   r3   rR      rY   zlinalg_cross.<locals>.<lambda>r+   c                      s"   d  d   d   S )Nzlinalg.cross: inputs dimension z must have length 3. Got  and sizer2   rl   otherr}   r2   r3   rR      s
   )ndimrF   rU   r   r#   r|   r{   )r}   r   rl   Zx_dZy_d	out_shaper2   r   r3   linalg_cross   s   
r   c                 C   s$   t | d t| d tj| tjdS )Nzlinalg.matrix_expmemory_format)squareCheckInputscheckFloatingOrComplexrF   
empty_likecontiguous_formatr}   r2   r2   r3   linalg_matrix_exp   s   

r   valuesindicesc                 C   sV   t j| j| j| jd}t j| j| jt jd}|  dkr'| jdkr't|| j ||fS )Nri   rM   r   )	rF   rp   r|   ri   rM   int64rz   r   maybe_wrap_dim)r}   rl   r   r   r2   r2   r3   	cummaxmin   s
   r   c                 C   s   t || j tj| tjdS Nr   )r   r   rF   r   r   )r}   rl   r2   r2   r3   logcumsumexp   s   r   c                   s  |j }t|}|| }tt|}dd t|D }	|D ]}
d|	|
< qg g }}|D ]}
|	|
 s6||
 q*||
 q*|| }t|}|  |d | }|j fdddd |||d   }||}dgt|j|d   }|	|}|
d}||d< t|}tt|D ]}|||  ||d	 < q| j|tjd
 dd t|D }d	}|d	 }|dkr|| d ||| < ||||  9 }|d	8 }|dkst||D ]}| d	||  ||| < q| |||   | S )Nc                 S      g | ]}d qS Fr2   r;   rD   r2   r2   r3   r?      rS   z_exec_fft.<locals>.<listcomp>Tc                        |  S r-   r2   r<   Zself_stridesr2   r3   rR          z_exec_fft.<locals>.<lambda>keyreverser   r      r   c                 S   r   r   r2   r   r2   r2   r3   r?     rS   )r   lenlistrangeappendstridesortpermuter|   Zreshaper   resize_rF   r   as_strided_storage_offset)outr}   	out_sizesrl   forwardr   Zsignal_ndim
batch_dimsZdim_permuteZis_transformed_dimdleftrightZ	batch_endtmpinputZbatched_sizes
batch_sizeZbatched_out_sizesiZout_stridesZbatch_numelr2   r   r3   	_exec_fft   sN   




r   r}   rl   exclude_lastc                    s<   t |}|   |d t|t|  j fddd |S )Nc                    r   r-   r2   r   r   r2   r3   rR   !  r   z_sort_dims.<locals>.<lambda>)r   )r   r   r   intr   )r}   rl   r   sorted_dimsr2   r   r3   
_sort_dims  s   
r   c                 C   sH   t | jj |s|  S t| |}| |  }t|| |  ||dS )Nr   )	rF   rU   rM   
is_complexcloner   r{   r   r   )r}   rl   normalizationr   r   r   r2   r2   r3   meta_fft_c2c(  s   
r   c                 C   s8   t | tkst | dkr| d dkr| d dkrdS dS )N   r   r   FT)r   cufft_max_ndimr   r2   r2   r3   use_optimized_cufft_path7  s   0r   c                    s  t | jj t|  }t|}|d }|| d d }t|}|||< |r+|||< t| dkr| j|t	| jd}	| }
t
|rLt|	|
||dd ngt|dkrT|n|}t|	|
||gdd t|dkrq| j|t	| jd}
|d d }|r|
|	}	}
|
  |j fdd	dd
 ttt|}|t|| d  }t|	|
||dd |d t||  }|sy|s|	||| kr|
j|t jd |
}	|	S t| dkrt| |dd}| j|t	| jd}t|| ||ddS | j|t	| jdS )Nr   r   r   cudart   Tr   c                    r   r-   r2   r   stridesr2   r3   rR   d  r   zmeta_fft_r2c.<locals>.<lambda>r   r   Zxpu)r   )rF   rU   rM   is_floating_pointr   r   device_hintr{   rA   r   r   r   r   r   r   minr   r   r   r   )r}   rl   r   ZonesidedZinput_sizesr   Zlast_dimZlast_dim_halfsizeZonesided_sizesoutputZworking_tensorZtarget_sizesr   Zmax_dimsZ	last_dimsr   r2   r   r3   meta_fft_r2c>  sd   

r   )	generatorc                C   s   t |t| gS r-   )r   rF   Size)nr   r   r2   r2   r3   meta_randperm  s   r   rM   rh   ri   rj   c                C      t j| ||||dS Nr   rF   rp   )r   rM   rh   ri   rj   r2   r2   r3   meta_randperm_default  s   	
r   c                   s2   dt  k fdd t j|||||dS )Nr   c                      rO   Nz:random_ expects 'from' to be less than 'to', but got from=z >= to=r2   r2   highlowr2   r3   rR     rS   zmeta_randint.<locals>.<lambda>r   rF   rU   rp   )r   r   rM   rh   ri   rj   r2   r   r3   meta_randint  s   
r   c                   s.   t  k fdd t j|||||dS )Nc                      rO   r   r2   r2   r   r2   r3   rR     rS   z"meta_randint_low.<locals>.<lambda>r   r   )r   r   r   rM   rh   ri   rj   r2   r   r3   meta_randint_low  s   
r   c                C   r   r   r   )r   rM   rh   ri   rj   r2   r2   r3   meta_rand_default  s   
r   r   lastdimc           
      C   s*  t | jj t| dkrZt|  }|||d < | j|t| jd}t	|r5t
|| jt jd||ddS t|dkrGt| |d d d|}n| jt jd}t
||||d gddS | }t|dkrv|d d }t| ||dd}|dd  }t| }|||d < | j|t| jd}	t
|	|||ddS )	Nr   r   rt   r   Fr   r   r   )rF   rU   rM   r   r   r   r   r{   rN   r   r   r   r   r   r   )
r}   rl   r   r   r   r   tempr   Zc2c_dimsr   r2   r2   r3   meta_fft_c2r  s4   	r   c                 C   sf   ddl m} || st| dkrtdt|tr1|| |}|  | kr1t	j
||   | S )Nr   )free_unbacked_symbolsr   zQmore than one element of the written-to tensor refers to a single memory location)%torch.fx.experimental.symbolic_shapesr   rF   Z_debug_has_internal_overlapRuntimeErrorrZ   r   tor   r)   Zexpand_copydefault)r}   srcZnon_blockingr   Zintermediater2   r2   r3   
meta_copy_  s   
r   c                 C   sX   t |  }t |  }||  krdn|| ||  }||d ||| ||fS Nr   )r   r   r   rl   insert)tensorrl   Zresult_sizesZresult_strides
new_strider2   r2   r3   inferUnsqueezeGeometry  s    r   c                 C   s0   t ||  d }t| |\}}| || | S r   )r   rl   r   r   )r}   rl   Zg_sizesZ	g_stridesr2   r2   r3   meta_unsqueeze_  s   r   r   weight_metabias_activation_opt	out_dtypec           	      C   s   t | j}|d ur|d|dksJ d|d| dd ks%J |d|d< t| jdks7J dd| df}|d urQ| jtjkrM|tjksQJ d| j||d u r[| jn|d	||}|S )	Nr   zoutput size mismatchr   r   r   z*we can only handle the squashed input case9out_dtype is only supported for i8i8->i32 linear operatorrt   )
r   r|   r   r   rM   rF   int8int32r{   
as_strided)	r   r   r   r   r   r   output_sizesZtransposed_stridesr   r2   r2   r3   meta_sparse_structured_linear  s$   
	r   mat1	mat1_metamat2c                 C   s   t | jdks	J t |jdksJ t |jdksJ | d|dd ks)J | d|dg}|d urF|jtjkrB|tjksFJ d|j||d u rP|jn|d}|S )Nr   r   r   r   rt   r   r|   r   rM   rF   r   r   r{   )r   r   r   r   r   r   r2   r2   r3   meta_sparse_structured_mm:  s   r   r   )alphabetar   c          	      C   s   t | jdksJ dt |jdksJ t |jdksJ t |jdks&J | d|dks4J d|d|dd ksBJ |d|dg}|d ur_|jtjkr[|tjks_J d|j||d u ri|jn|d}|S )Nr   zEonly input broadcasted to columns of mat1 * mat2 product is supportedr   r   r   rt   r   )	r   r   r   r   r  r  r   r   r   r2   r2   r3   meta_sparse_structured_addmmS  s(   r  compressed_Adense_Br  transpose_resultalg_idsplit_ksplit_k_one_kernelc	                 C   s  |j tjtjtjtjtjhv sJ d| j |j ksJ dt|jdks(J d| j tjtjfv }	|	r5dnd}
|	rA|	 rAJ d|
d}|
d	}|  d
 |
|  }|d urb||
dksbJ |d urx|	rt|tjtjtjtjhv sxJ d|r~||fn||f}|j||dS )Nz;_cslt_sparse_mm only supports fp16, bf16, int8, and fp8e4m3zinputs must have the same dtyper   z'_cslt_sparse_mm only supports 2d inputs
   	   z.dense input must be transposed for 8bit dtypesr   r      z\out_dtype is not supported for {compressed_A.dtype} x {dense_B.dtype} -> {out_dtype} matmul!rt   )rM   rF   float32float16bfloat16r   float8_e4m3fnr   r|   is_contiguousr   rz   r   r{   )r  r  r   r  r   r  r  r  r	  Zis_8bit_input_typeZcompression_factorkr   moutput_shaper2   r2   r3   meta__cslt_sparse_mmv  sB   


r  T)include_selfrv   sourcereducer  c                C      t j| t jdS r   rF   r   r   r}   rl   rv   r  r  r  r2   r2   r3   meta_index_reduce  s   
r  c                C      | S r-   r2   r  r2   r2   r3   meta_index_reduce_  s   
r  c                 C   s.   t |  }|  dkr| ||< | |S Nr   )r   r   rl   rz   r{   )r}   rl   rv   result_sizer2   r2   r3   meta_index_select  s   
r!  )lengthsr   offsetsaxisunsafeinitialdatar"  r#  r$  r%  c          
         sf   |d urt d fdd}|d ur||jS |d ur/|jd d |jd d f }	||	S td)Nz?segment_reduce(): indices based reduction is not supported yet.c                    s(   t j| j d d   jdt jdS )Nr   rg   rM   ri   r   )rF   rp   r|   rM   r   )lengths_shaper$  r'  r2   r3   segment_reduce_lengths_tensor  s   z:meta_segment_reduce.<locals>.segment_reduce_lengths_tensorr   r   z<segment_reduce(): Either lengths or offsets must be defined.)NotImplementedErrorr|   r   )
r'  r  r"  r   r#  r$  r%  r&  r+  r)  r2   r*  r3   meta_segment_reduce  s   
r-  c                 C   
   |  dS Nr2   r{   r   r2   r2   r3   meta_max     
r1  c                 C   6   t | j|f}t| ||}| || j|tjdfS Nrt   rA   reduction_dimsr|   _compute_reduction_shaper{   rF   rx   r}   rl   keepdimr  r2   r2   r3   meta_max_dim  
   r:  c                 C   r.  r/  r0  r   r2   r2   r3   meta_min  r2  r<  c                 C   r3  r4  r5  r8  r2   r2   r3   meta_min_dim  r;  r=  c                 C   s4   |   r
t| j}n	t| tjd\}}tj| |dS Nr9   rt   )r   r   rM   r   r   INT_TO_FLOATrF   r   )r}   r>   rD   r2   r2   r3   
meta_angle  s   
rA  c                 C   s$   t ||  | j |t | S r-   )rF   Z_resize_output_r   ri   copy_angle)r}   r   r2   r2   r3   meta_angle_out  s   rD  c                 C      d S r-   r2   )valr2   r2   r3   assert_async!     rG  c                 C   rE  r-   r2   )rF  
assert_msgr2   r2   r3   assert_async_meta&  rH  rJ  c                 C   rE  r-   r2   )sr2   r2   r3   
print_meta+  rH  rL  rM   rh   ri   rj   r   c                 C   s   t jdddS )Nr   rg   ri   r   rM  r2   r2   r3   make_dep_token0  s   	rO  c                 C   s4   ddl m} t| ttfrtd|| ||d d S )Nr   )constrain_range'Constraining SymFloat or Symbool is nyir   max)r   rP  rZ   r   r
   
ValueError)r   r   rS  rP  r2   r2   r3   sym_constrain_range<  s   rU  c                 C      t j| ||d |S NrR  )r)   rU  r   r   rS  	dep_tokenr2   r2   r3   functional_sym_constrain_rangeF     rZ  c                 C   s   ddl m} |d u r|d u rt|  d S t| ttfr tdt| t	u r>|d ur1t
| |k |d ur<t
| |k d S || ||d d S )Nr   )_constrain_range_for_sizerQ  rR  )r   r\  rF   _check_is_sizerZ   r   r
   rT  rb   r   rU   )r   r   rS  r\  r2   r2   r3   sym_constrain_range_for_sizeL  s   
r^  c                 C   rV  rW  )r)   r^  rX  r2   r2   r3   'functional_sym_constrain_range_for_size`  r[  r_  c                 C   s   |S r-   r2   )rF  rI  rY  r2   r2   r3   functional_assert_async_metaf  rH  r`  f_namec                 C   sX   |   dksJ | d| d| dks*J | d| d d| d dd S )Nr   z3: The input tensor must have at least 2 dimensions.r   z5: A must be batches of square matrices, but they are  by 	 matrices)rl   r   )r}   ra  r2   r2   r3   r   l  s    r   Anamec                    s   t j jk fdd t j jk fdd t  d dk fdd t  ddk fdd d S )Nc                         dj  d j  dS )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.rN  r2   re  r}   r2   r3   rR   {  
   z(linearSolveCheckInputs.<locals>.<lambda>c                      rg  )Nz=Expected b and A to have the same dtype, but found b of type z and A of type rh  rt   r2   ri  r2   r3   rR     rj  r   rb  c                      s   d  d d  d dS )Nz3A must be batches of square matrices, but they are rb  rc  r   rd  r   r2   re  r2   r3   rR     s
   c                      s:   d d  d d  d d d d d 
S )NzIncompatible matrix sizes for z: each A matrix is r   rc  z but each b matrix is rb  r   r2   re  rf  r}   r2   r3   rR     s   )rF   rU   ri   rM   r   )r}   re  rf  r2   rl  r3   linearSolveCheckInputsx  s    


rm  tallow_low_precision_dtypesc                    s^   | j  t|  p|   fdd |s-t tjtjtjtjfv  fdd d S d S )Nc                          d  S )Nz<: Expected a floating point or complex tensor as input. Got r2   r2   rM   ra  r2   r3   rR         z(checkFloatingOrComplex.<locals>.<lambda>c                      rp  )Nz*: Low precision dtypes not supported. Got r2   r2   rq  r2   r3   rR     rr  )	rM   rF   rU   r   r   rI   rK   rH   rJ   )rn  ra  ro  r2   rq  r3   r     s   r   arg_namec                    s"   t |  dk fdd d S )Nr   c                          d  dS )Nz: The input tensor z! must have at least 2 dimensions.r2   r2   rs  ra  r2   r3   rR     rS   zcheckIsMatrix.<locals>.<lambda>)rF   rU   rl   )re  ra  rs  r2   ru  r3   checkIsMatrix  s   
rv  Br   c                    sZ   t   t tr ddkn	 ddk fdd d S )Nrb  r   c                      sH    drdnd d  d d  d d d d d d	S )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (rb  r<   r   r   ra   r   r2   re  rw  ra  r   r2   r3   rR     s   
z#checkInputsSolver.<locals>.<lambda>)r   rv  rF   rU   r   )re  rw  r   ra  r2   rx  r3   checkInputsSolver  s   

*ry  resultfn_nameresult_namec                    s&   t jjk fdd d S )Nc                	      s$     d d dj  dj  	S )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on rN  r2   r{  r   rz  r|  r2   r3   rR     s   z!checkSameDevice.<locals>.<lambda>)rF   rU   ri   )r{  rz  r   r|  r2   r}  r3   checkSameDevice  s   
r~  UPLOc                    s8      }tt dko|dkp|dk fdd d S )Nr   ULc                      
   d  S )Nz1Expected UPLO argument to be 'L' or 'U', but got r2   r2   r  r2   r3   rR        
 zcheckUplo.<locals>.<lambda>)upperrF   rU   r   )r  ZUPLO_uppercaser2   r  r3   	checkUplo  s
   
r  eigenvaluesZeigenvectorsr  	compute_vc                 C   sp   t | d t| t| j}|r | |}||t|dd n| dg}|  | j|t| j	d}||fS )Nzlinalg.eighFZ	row_majorr   rt   )
r   r  r   r|   r{   r   r   poprN   rM   )re  r  r  r|   Zvecsvalsr2   r2   r3   meta__linalg_eigh  s   


r  c                 C   s@   t | d t| jr| jnt| j}| j| jd d |dS )Nzlinalg.eigvalsr   rt   r   rA   rn   rM   r   r{   r|   )r   complex_dtyper2   r2   r3   meta__linalg_eigvals  s   


r  c                 C   sX   t | d t| jr| jnt| j}| j| jd d |d}| j| j|d}||fS )Nz
linalg.eigr   rt   r  )r   r  r   Zvectorsr2   r2   r3   meta_linalg_eig  s   


r  r   c                 C   s   | j jtjdddS )Nr   rb  r   )ZmTr   rF   r   	transpose)r   r2   r2   r3   cloneBatchedColumnMajor     r  r  c                 C   s   t | S r-   )r  )r}   re  r  r2   r2   r3   _cholesky_solve_helper
  s   r  c                    sP   t jdkfdd t  jdk fdd t d\}}t|||S )Nr   c                         d j  dS )Nz-b should have at least 2 dimensions, but has  dimensions insteadr   r2   r   r2   r3   rR     rr  z cholesky_solve.<locals>.<lambda>c                      r  )Nz-u should have at least 2 dimensions, but has r  r  r2   rk  r2   r3   rR     rr  cholesky_solve)rF   rU   r   !_linalg_broadcast_batch_dims_namer  )r}   re  r  Zself_broadcastedZA_broadcastedr2   ri  r3   r    s   

r  c                 C   s.   |   dkrtj| tjdS t| d t| S )Nr   r   cholesky)rz   rF   r   legacy_contiguous_formatr   r  r}   r  r2   r2   r3   r  !  s   
r  c                 C   s   t | d t| S )Ncholesky_inverse)r   r  r  r2   r2   r3   r  *  s   
r  check_errorsc                 C   sf   t | d t| d | j}t|}t|d}| |}||| | j|d|d  tjd}||fS )Nzlinalg.choleskyFr   r   rt   )	r   r   r|   r   r   r{   r   rF   r   )re  r  r  ZA_shaper   Z	L_stridesr  infosr2   r2   r3   linalg_cholesky_ex2  s   



r  tauc                    s  t jdkdd  t ddkdd  t ddkdd  t jj dkfd	d jdkr[jd d }jd d  t  |k fd
d t jjkfdd tdd t jjtjddjj	dS )Nr   c                   S   rW   )NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r2   r2   r2   r2   r3   rR   K  rY   z,linalg_householder_product.<locals>.<lambda>rb  r   c                   S   rW   )Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r2   r2   r2   r2   r3   rR   O  rY   c                   S   rW   )Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r2   r2   r2   r2   r3   rR   S  rY   r   c                         dj  d j  S )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to r  r2   r   r  r2   r3   rR   X  
   c                      r  )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r2   r2   actual_batch_tau_shaper2   r3   rR   b     c                      r  )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype rt   r2   r  r2   r3   rR   j  s   
z torch.linalg.householder_productr  Fr  r   r   rM   ri   )
rF   rU   r   r   r|   rM   r~  empty_stridedr   ri   )r   r  Zexpected_batch_tau_shaper2   )r  r   r  r3   linalg_householder_productD  sD   


r  c                 C   s^   t | d t| ddd | | j}|| jt| jdd | j| jd d tjd}||fS )Nzlinalg.inv_exF)ro  r  rb  rt   r   r   r{   r|   r   r   rF   r   )re  r  r  r  r2   r2   r3   linalg_inv_ex_metaz  s   
r  LDpivotsinfo)	hermitianr  r  c                C   st   t | d t| d tj| jt| jdd| j| jd}| j| jd d tj	d}| j| jd d tj	d}|||fS )Nztorch.linalg.ldl_factor_exFr  r  r   rt   rb  )
r   r   rF   r  r|   r   rM   ri   r{   r   )r}   r  r  r  r  r  r2   r2   r3   linalg_ldl_factor_ex_meta  s   


r  )r  c                   s   t d td t d t jdk fdd jd d }t|jkfdd ttj	fdd tj	 j	k fdd t
 \}}tj|t|d	d
 j	 jdS )Nztorch.linalg.ldl_solver   c                      r  )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has r  r  r2   )rw  r2   r3   rR        z'linalg_ldl_solve_meta.<locals>.<lambda>r   c                      r  )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadr|   r2   r  r2   r3   rR     r  c                      rs   )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got rt   r2   r  r2   r3   rR     rw   c                      r  )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype rt   r2   )rw  r  r2   r3   rR         Fr  r  )r   r   rm  rF   rU   r   r|   rA   is_integer_dtyperM   _linalg_broadcast_batch_dimsr  r   ri   )r  r  rw  r  Zexpected_pivots_shapeB_broadcast_sizerD   r2   )rw  r  r  r3   linalg_ldl_solve_meta  s6   
	






r  Pr  )pivotr  c          	         s   t  jdk fdd t j}|d }|d }t||}||d< |r+ |}n dg}||d<  |}||d< ||d<  |}|||fS )Nr   c                      r  )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: r  r  r2   rk  r2   r3   rR     rr  z linalg_lu_meta.<locals>.<lambda>rb  r   r   )rF   rU   r   r   r|   r   r{   )	re  r  sizesr  r   r  r  r  r  r2   rk  r3   linalg_lu_meta  s$   





r  LU)r  r  c          	         s   t  jdk fdd t j}|d }|d }t j|t|dd j jd}|	  t
|||d<  j|t jd	}|	   j|t jd	}|||fS )
Nr   c                      r  )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: r  r  r2   rk  r2   r3   rR     rr  z*linalg_lu_factor_ex_meta.<locals>.<lambda>rb  r   Fr  r  rt   )rF   rU   r   r   r|   r  r   rM   ri   r  r   r{   r   )	re  r  r  r  r  r   r  r  r  r2   rk  r3   linalg_lu_factor_ex_meta  s&   



r  )r   adjointr  c                   s   t d tj jk fdd tjtjkdd  td t |d tddkdd  tjd d jkfdd t	 \}}tj
|t|| d	 j jd
}| dkru|su| ru| }|S )Nztorch.linalg.lu_solvec                      rg  )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type r  rt   r2   )rw  r  r2   r3   rR     rj  z&linalg_lu_solve_meta.<locals>.<lambda>c                   S   rW   )NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r2   r2   r2   r2   r3   rR     rY   zlinalg.lu_solver   c                   S   rW   )NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr2   r2   r2   r2   r3   rR   $  rY   c                      r  )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape r  r  r2   r  r2   r3   rR   *  r  r  r  r   )r   rF   rU   rM   r   r   ry  r   r|   r  r  r   ri   rz   r   Zconj)r  r  rw  r   r  r  rD   rz  r2   )rw  r  r  r3   linalg_lu_solve_meta  s<   




r  unpack_dataunpack_pivotsc                    s   t  jdk fdd |rt |jt jkdd  t j}|d }|d }t||}||d< |r9 |}n dg}|rX||d<  |}	||d< ||d<  |}
n dg}	 dg}
||	|
fS )Nr   c                      r  )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: r  r  r2   r  r2   r3   rR   J  rr  z lu_unpack_meta.<locals>.<lambda>c                   S   rW   )Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr2   r2   r2   r2   r3   rR   O     rb  r   r   )	rF   rU   r   rM   r   r   r|   r   r{   )r  r  r  r  r  r  r   r  r  r  r  r2   r  r3   lu_unpack_meta@  s4   





r  modec                    sd    dkrd}d}||fS  dkrd}d}||fS  dkr$d}d}||fS t d fdd ||fS )NreducedTZcompleteFrc                         d  dS )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r2   r2   r  r2   r3   rR   w  s   z _parse_qr_mode.<locals>.<lambda>rF   rU   )r  	compute_qr  r2   r  r3   _parse_qr_modej  s"   	
r  QRr  c                 C   s   t | d t| d t|\}}| jd }| jd }t||}|r>t| j}|r*|n||d< | |}||t|dd n| dg}t| j}	|sM|sO|n||	d< | |	}
|
|	t|	dd ||
fS )Nz	linalg.qrrb  r   Fr  r   )	rv  r   r  r|   r   r   r{   r   r   )re  r  r  Zreduced_moder  r   r  ZQ_shaper  ZR_shaper  r2   r2   r3   linalg_qr_meta  s"   








r  sign	logabsdetc                 C   s   t | d t| dd | j}| |d d }| j|d d t| jd}tj|t|d| j| j	d}| j|d d tj
d}||||fS )Nzlinalg.slogdetFrb  rt   r  r   )r   r   r|   r{   rN   rM   rF   r  r   ri   r   )re  r|   r  r  r  r  r2   r2   r3   _linalg_slogdet  s   
r  full_matrices
compute_uvdriverc                 C   s   t | d t| d t| jd d }| jd }| jd }t||}|r]|||r*|n|g }| |}	|	|t|dd ||rB|n||g }
| |
}t| dk}||
t|
|d n| dg}	| dg}| j||g t	| j
d}|	||fS )	Nz
linalg.svdrb  r   Fr  r   r   rt   )rv  r   r   r|   r   r{   r   r   r   rN   rM   )re  r  r  r  r   r  r   r  ZU_shaper  ZV_shapeVZis_cudaSr2   r2   r3   _linalg_svd_meta  s$   







r  arg1arg2c                 C   sn   | j d d }|j d d }t||}t|}|| d| dg7 }t|}||d|dg7 }||fS )Nrb  r   )r|   r#   r   r   )r  r  Zarg1_batch_sizesZarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizer2   r2   r3   r    s   
r  c                 C   sV   |rt | || t| |\}}|| jkr| n| |}||jkr"|n||}||fS r-   )rm  r  r|   expand)r  r  rf  r  r  Zarg1_broadcastedZarg2_broadcastedr2   r2   r3   r    s   r  r   c                 C   s6   | j d d }|jdkp| jd |jko|j |k}|S )Nr   r   )r|   r   )r   r   Zexpected_batched_rhs_shapevector_caser2   r2   r3   linalg_solve_is_vector_rhs  s
   
r  )r   r  rz  r  r  r  c                   sh  t  d t jjk fdd t }|r dn}	t |	|d t|	 \}
}t|p6| dd  |rC|
d d n|
}tj|t	|| jj
d} j}tj|t	|d j j
d} j|d d tjd} j|d d	 tjd}||||f}||||f}td
d |D rt||D ]\}}t||j ||j|  t||dd q|S )Nzlinalg.solvec                      s   d j  dj  dS )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type r  r  rt   r2   re  rw  r2   r3   rR     rj  z"_linalg_solve_ex.<locals>.<lambda>r   c                   S   rW   )Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r2   r2   r2   r2   r3   rR     r  r  Frt   rb  c                 s   s    | ]}|d uV  qd S r-   r2   r:   r2   r2   r3   r]   3      z#_linalg_solve_ex.<locals>.<genexpr>)	copy_fromcopy_toexact_dtype)r   rF   rU   rM   r  	unsqueezery  r  r  r   ri   r|   r{   r   allzipr   r   r   r!   )re  rw  r   r  rz  r  r  r  r  B_ZB_broad_shaperD   Zresult_shapeZresult_r|   ZLU_Zpivots_Zinfo_r   resr  or2   r  r3   _linalg_solve_ex  sJ   



r  )r   unitriangularr   r  r   c          	      C   s   |d u r
|  dg}t|tsJ t| ||d t|| d \}}|dd o+| }|r6t||j	}|S t
||j	rL||ddj	 |dd |S )Nr   zlinalg.solve_triangularrb  r   )r{   rZ   r   ry  r  r  r  Zis_conjr   r|   r    r   
transpose_)	re  rw  r  r   r  r   r  ZA_Zavoid_copy_Ar2   r2   r3   linalg_solve_triangular_meta=  s   
r  XM)r  r  c           	         s   t jdkfdd t  jdk fdd t d  jt jkrOt \}}t j|t|ddj	j
d}t j|t|dd j	 j
d}||fS  jt jks[ jt jkrjt }d	g}||fS t dd
d  ||fS )Nr   c                      r  )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has r  r  r2   r   r2   r3   rR   b  r  z'triangular_solve_meta.<locals>.<lambda>c                      r  )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has r  r  r2   rk  r2   r3   rR   i  r  triangular_solveFr  r  r   c                   S   rW   )Nz+triangular_solve: Got an unexpected layout.r2   r2   r2   r2   r3   rR     rY   )rF   rU   r   rm  rh   stridedr  r  r   rM   ri   
sparse_csr
sparse_bsrr   r{   )	r}   re  r  r  r  Zself_broadcast_sizeZA_broadcast_sizeZsolutionZcloned_coefficientr2   ri  r3   triangular_solve_metaW  s<   	




r  c                 C   sp   t | d t| d | | jd d }| | j}|| jt| jdd | j| jd d tjd}|||fS )Nz
linalg.detrb  Fr  r   rt   r  )re  Zdetr  r  r2   r2   r3   _linalg_det_meta  s   


r  c                    s  t jdkdd  t jdkdd  |rdndt j jd kfdd t j jd kfdd t jd jd kd	d  t jj d
kfdd t jjkfdd jdkrjd d }jd d t |kfdd jd d  t  |k fdd t jjkfdd t jjkfdd tdd tdd t jjtjddjjdS )Nr   c                   S   rW   )Nz3torch.ormqr: input must have at least 2 dimensions.r2   r2   r2   r2   r3   rR     rY   zormqr.<locals>.<lambda>c                   S   rW   )Nz3torch.ormqr: other must have at least 2 dimensions.r2   r2   r2   r2   r3   rR     rY   rb  r   c                      r  )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r2   r2   left_size_conditionr2   r3   rR     rw   c                      r  )Nr  z"] must be equal to input.shape[-2]r2   r2   r  r2   r3   rR     rw   c                   S   rW   )NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r2   r2   r2   r2   r3   rR     rY   r   c                      r  )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to r  r  r2   r  r2   r3   rR     r  c                      r  )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to r  r  r2   r   r   r2   r3   rR     r  c                      r  )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r2   r2   r  r2   r3   rR     r  c                      r  )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r2   r2   )actual_batch_other_shaper2   r3   rR     r  c                         d j  dj  S )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype rt   r2   r  r2   r3   rR     r  c                      r  )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype rt   r2   r   r2   r3   rR     r  ztorch.ormqrr  r   Fr  r  )	rF   rU   r   r|   rM   r~  r  r   ri   )r   r  r   r   r  Zexpected_batch_shaper2   )r  r  r   r  r   r  r3   ormqr  sn   	







r  c                   s   t td  k fdd j}| d k}|}| }|r3td|D ]}|o0|dk}q&ntd|D ]}|oB|dk}q8t |pI| fdd d S )Nr   c                      s   dd   dt  S )Nzpadding size is expected to be r   z, but got: r   r2   )rl   paddingr2   r3   rR         z,_padding_check_valid_input.<locals>.<lambda>r   r   c                      s    d d  d d  dj  S )Nz	Expected r   zD or r   zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: r  r2   )rl   r   r2   r3   rR   
  s   )rF   rU   r   r   r   r   )r   r  rl   Z	input_dimZis_batch_modeZvalid_batch_modeZvalid_non_batch_moder   r2   )rl   r   r  r3   _padding_check_valid_input  s$   r  c                   s   d}d d}j dkrd} d7  |d7 }t|dd |\|}   |rHtk o>k  fdd tdkfdd j dkra|fS ||fS )	Nr   r   r+   r   c                         d d d  dj  S NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (r`   ) at dimension 
 of input r  r2   dim_wr   pad_lpad_rr2   r3   rR   &     z_pad1d_common.<locals>.<lambda>c                      r^   )Nz
input (W: z%) is too small. Calculated output W: r2   r2   )input_woutput_wr2   r3   rR   .  rS   r   )r   r   r  rF   rU   r{   )r   r  is_reflection	dim_planenbatchnplaner2   )r  r   r  r  r  r  r3   _pad1d_common  s0   




r  c                 C      t | |ddS NTr  r  r   r  r2   r2   r3   meta_reflection_pad1d7     r  c                 C   r  NFr  r  r  r2   r2   r3   meta_replication_pad1d=  r  r   c                   s   d |st t|dkdd  jdkr d7  |\ }|  |r=t |k o3|k  fdd t  k fdd jS )Nr   r   c                   S   rW   )Nz padding size is expected to be 2r2   r2   r2   r2   r3   rR   F  rY   z(_pad1d_backward_common.<locals>.<lambda>r+   c                      r  r	  r  r2   r  r2   r3   rR   S  r  c                         d d   S Nz(grad_output width unexpected. Expected: , Got: r   r2   r  grad_outputr  r2   r3   rR   [  r@   rF   rU   r   r   r   r{   r|   )r%  r   r  r  r  r2   )r  r%  r   r  r  r  r3   _pad1d_backward_commonC  s$   

r'  
grad_inputc                 C      t | ||ddS r  r'  r%  r   r  r2   r2   r3   meta_reflection_pad1d_backwarda     r,  c                 C   r)  r  r*  r+  r2   r2   r3   meta_replication_pad1d_backwardg  r-  r.  c                   s2  dd d}d}t |dd j}|dkr'd}d7  d7  |d7 }|\	
|} 
   	 |rptk oS	k 	fdd t
k ofk  
fdd tdkpydkfd	d jd
kr|fS ||fS )Nr   r   r   r      c                      r  r	  r  r2   r  r2   r3   rR     r  z_pad2d_common.<locals>.<lambda>c                         d d d  dj  S NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (r`   r
  r  r  r2   dim_hr   pad_bpad_tr2   r3   rR     r  c                      s   d  d d d S )Nz
input (H:  W: z%) is too small. Calculated output H: r2   r2   )input_hr  output_hr  r2   r3   rR     s
   r+   r  r   r   rF   rU   r{   )r   r  r  Z
dim_slicesr  r   r  r2   )r3  r  r   r7  r  r8  r  r4  r  r  r5  r3   _pad2d_commonm  sB   




r:  c                 C   r  r  r:  r  r2   r2   r3   meta_reflection_pad2d  r  r<  c                 C   r  r  r;  r  r2   r2   r3   meta_replication_pad2d  r  r=  c                    s   dd d}|j }| dkrd7  d7  |d7 }|\}}}}|  }	| }
|	| | |
| | tkfdd t k fdd ||j S )Nr   r   r   r/  c                      r!  r"  r   r2   r$  r2   r3   rR     r@   z%meta_pad2d_backward.<locals>.<lambda>c                      r!  Nz)grad_output height unexpected. Expected: r#  r   r2   r3  r%  r8  r2   r3   rR     r@   )r|   rl   rF   rU   r   r{   )r%  r}   r  r  rQ   r  r  r5  r4  r7  r  r2   )r3  r  r%  r8  r  r3   meta_pad2d_backward  s,   
r@  c             	      s  ddd d}t |dd jdk}|r+d}d7 d7  d7  |d7 }|\
|}    
   	|rtk odk fdd tk ow
k 
fd	d tk ok  fd
d t	dkpdkpdk	fdd |r||	fS |	fS )Nr+   r   r   r   r      c                      r  r	  r  r2   r  r2   r3   rR     r  z_pad3d_common.<locals>.<lambda>c                      r0  r1  r  r2   r2  r2   r3   rR     r  c                      r0  )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (r`   r
  r  r  r2   )dim_dr   pad_bkpad_fr2   r3   rR     r  c                      s(   d  d d d d d S )Nz
input (D:  H: r6  z%) is too small. Calculated output D: r2   r2   )input_dr7  r  output_dr8  r  r2   r3   rR     s   r9  )r   r  r  r  Z
batch_moder  r  r2   )rB  r3  r  r   rF  r7  r  rG  r8  r  r4  rC  rD  r  r  r5  r3   _pad3d_common  sP   





rH  c                 C   r  r  rH  r  r2   r2   r3   meta_reflection_pad3d  r  rJ  c                 C   r  r  rI  r  r2   r2   r3   meta_replication_pad3d  r  rK  c                    s(  t t|dkdd  |jdksJ j|jksJ ddd |jdkr2d7 d7  d7  |\}}}}}}| }	|}
|}|	| | |
| | || | t kfdd t kfd	d t  k fd
d ||jS )N   c                   S   rW   )Nz padding size is expected to be 6r2   r2   r2   r2   r3   rR   )  rY   z%meta_pad3d_backward.<locals>.<lambda>r+   r   r   rA  c                      r!  r"  r   r2   r$  r2   r3   rR   A  r@   c                      r!  r>  r   r2   r?  r2   r3   rR   E  r@   c                      r!  )Nz(grad_output depth unexpected. Expected: r#  r   r2   )rB  r%  rG  r2   r3   rR   I  r@   r&  )r%  r   r  r  r  r5  r4  rD  rC  rF  r7  r  r2   )rB  r3  r  r%  rG  r8  r  r3   meta_pad3d_backward  s<   




rM  r   pc                 C   s^   t |  dd  | d}|dkr| dgjt jdS | ||d  d fjt jdS )Nc                   S   rW   )Nz(_pdist_forward requires contiguous inputr2   r2   r2   r2   r3   rR   S  rY   z%meta__pdist_forward.<locals>.<lambda>r   r   r   r   )rF   rU   r  r   r{   r   r  )r}   rN  r   r2   r2   r3   meta__pdist_forwardO  s   
rO  gradpdistc                 C   s8   t | dd  t | dd  t j|t jdS )Nc                   S   rW   )Nz._pdist_backward requires self to be contiguousr2   r2   r2   r2   r3   rR   b  rY   z&meta__pdist_backward.<locals>.<lambda>c                   S   rW   )Nz/_pdist_backward requires pdist to be contiguousr2   r2   r2   r2   r3   rR   e  rY   r   )rF   rU   r  r   r  )rP  r}   rN  rQ  r2   r2   r3   meta__pdist_backward^  s   rR  )r  r  c          	         s     d}  d} d}|||ft  dkdd  t dkdd  tj j  ko=jkn   fdd  j}j|d |d td kocd kfd	d   S )
Nr   r   r   r+   c                   S   rW   Nzbatch1 must be a 3D tensorr2   r2   r2   r2   r3   rR   q  rY   zmeta_baddbmm.<locals>.<lambda>c                   S   rW   Nzbatch2 must be a 3D tensorr2   r2   r2   r2   r3   rR   r  rY   c                      s   dj  d j  dj  S )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: rt   r2   )batch1batch2r}   r2   r3   rR   u      c                	      &   d d d d  d d  d	S Nz@Expected size for first two dimensions of batch2 tensor to be: [r`   z] but got: [r   r   ].r2   r2   batch2_sizesbscontraction_sizer2   r3   rR   }  s   )r   r  rF   rU   rl   rM   r|   r{   )	r}   rU  rV  r  r  dim1dim2Zdim3batch1_sizesr2   )rU  rV  r\  r]  r^  r}   r3   meta_baddbmmj  s&   


rb  c                C   r  r   r  r}   r   r2   r2   r3   meta_bernoulli  s   rd        ?c                 C   r  r-   r2   r}   rN  r   r2   r2   r3   meta_bernoulli_  rH  rg  c                 C   r  r   r  rf  r2   r2   r3   meta_bernoulli_p  r-  rh  c                 C   
   t | S r-   rF   r   rc  r2   r2   r3   meta_poisson  r2  rk  c                 C   s6   t |
|  k dd  t j| t jd}t | |fS )Nc                   S   rW   )NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r2   r2   r2   r2   r3   rR     rY   z6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>rt   )rF   rU   rl   r   bool)r}   Zobserver_onZfake_quant_onZrunning_minZrunning_maxscaleZ
zero_pointZaveraging_constZ	quant_minZ	quant_maxZch_axisZper_row_fake_quantZsymmetric_quantmaskr2   r2   r3   $meta__fused_moving_avg_obs_fq_helper  s   
ro  c                    sn   t |  dkdd  t | dkdd  | j\ |j\t  k fdd | S )Nr   c                   S   rW   )Nza must be 2Dr2   r2   r2   r2   r3   rR     rY   zmeta_mm.<locals>.<lambda>c                   S   rW   )Nzb must be 2Dr2   r2   r2   r2   r3   rR     rY   c                	      s   d d  d d d	S )Nz/a and b must have same reduction dim, but got [r`   z] X [rZ  r2   r2   ZM1ZM2Nr  r2   r3   rR     s    )rF   rU   rl   r|   r{   abr2   rp  r3   meta_mm  s   

ru  c                    s0   |rt  fddtjD S tj S )Nc                 3   s&    | ]}| vrj | nd V  qdS )r   Nr  r;   r   dimsr}   r2   r3   r]     s   $ z+_compute_reduction_shape.<locals>.<genexpr>)rT   r   r   rA   compute_reduction_output_shaper|   )r}   rx  r9  r2   rw  r3   r7    s   r7  strc                 C   sD   t | tjjr| jjS t| dr t| jdr | jjdkr | jjS dS )Nri   rb   rg   r   )rZ   rF   Z_subclassesZ
FakeTensorZfake_devicerb   hasattrri   )r   r2   r2   r3   r     s   
r   input_tensorr   r  dilationis_transposedgroupsoutput_paddingc                    s@  dt dt dt dt dt dt fdd}dt dt dt dt dt d	t dt fd
d}	|jdd  }
| jdd   |r<||jd  }n|jd }|jd | | jd krQtd| jd |gt|tre|gt  }nt|dkrt|d gt  }t|tr|gt  }nt|dkr|d gt  }t|tr|gt  }nt|dkr|d gt  }d }|rt|tr|gt  }nt|dkr|d gt  }n|}tt D ]2}|r|	 | || || |
| || ||  qՈ| | || || |
| ||  qt	t
dd dd  D  fdd S )NlnrN  r   r  rK  r,   c                 S   s$   | d|  ||d   d | d S )a  
        Formula to apply to calculate the length of some dimension of the output

        See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
        Returns:
            The output length
        r   r   r2   )r  rN  r   r  rK  r2   r2   r3   _formula  s   $z+calc_conv_nd_return_shape.<locals>._formular/   c                 S   s(   | d | d|  ||d   | d S )a  
        Formula to apply to calculate the length of some dimension of the output
        if transposed convolution is used.
        See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
            op: output padding in that dim

        Returns:
            The output length
        r   r   r2   )r  rN  r   r  rK  r/   r2   r2   r3   _formula_transposed  s   (z6calc_conv_nd_return_shape.<locals>._formula_transposedr   r   r   zInvalid channel dimensionsc                 s       | ]}|d kV  qdS r   Nr2   r:   r2   r2   r3   r]   =	  r  z,calc_conv_nd_return_shape.<locals>.<genexpr>c                      s   dt   ddd   dS )NzGiven input size per channel: z&. Calculated output size per channel: r   z. Output size is too small)r   r2   rx  Z	ret_shaper2   r3   rR   >	  s    
z+calc_conv_nd_return_shape.<locals>.<lambda>)r   r|   r   rZ   r   r   r   r   rF   rU   rm   )r|  r   r   r  r}  r~  r  r  r  r  kernel_sizeZout_channelsZoutput_padding_listr   r2   r  r3   calc_conv_nd_return_shape  sb   "
&




"r  c                 C      t j| t jkS r-   rF   _prims_commonsuggest_memory_formatchannels_lasttenr2   r2   r3   is_channels_lastF	     r  running_meanrunning_vartrainingexponential_average_factorepsilonc                    s    j }|d ur
|j n|j }	|d ur|j n|j }
 fdd} |j| d}|r4 |	} |
}n
 d} d}|||fS )Nc                      s(   t  rtjS  jtjdrtjS tjS r   )r  rF   r  r  r   r2   r|  r2   r3   pick_memory_format]	  s
   z2meta_miopen_batch_norm.<locals>.pick_memory_formatr   r   )r|   r{   r   )r|  r   r   r  r  r  r  r  r   Zsave_mean_shapeZsave_var_shaper  r   Z	save_meanZsave_varr2   r  r3   meta_miopen_batch_normJ	  s   



r  c	              	      sf    fdd}	t  ||||||r|nd }
d}d} |dkr%d|
|<  |
}|j|	 d}|S )Nc                      s^   t  dkrt strtjS nt rtjS  jtjdr#tjS  jtjdr-tjS d S Nr   r   )r   r  rF   r  r  r   preserve_formatr2   r|  r   r2   r3   r  |	  s   z%meta_conv.<locals>.pick_memory_formatr   r   r   )r  r   r{   r   )r|  r   r   r   r  r}  r~  r  r  r  	shape_outZinput_channels_dimZoutput_channels_dimr   r2   r  r3   	meta_convp	  s$   

r  mkldnnc
              	   C   sH   t | ||||d|g }
| |
}tj}|  dkrtj}|j|d}|S )NFrA  r   )r  r{   rF   r  rl   channels_last_3dr   )r|  r   r   r  r   r}  r  attrscalars	algorithmr  r   Zout_memory_formatr2   r2   r3   meta_mkldnn_convolution_default	  s   
r  c                 C   s$   |  g | jd d |jd R S Nr   r   r{   r|   )r|  r   r   r  r  r  r2   r2   r3   meta_linear_pointwise_default	  s   $r  mklc                 C   s$   |  g | jd d |jd R S r  r  )r|  Zpacked_weightZorig_weightr   r   r2   r2   r3   meta_mkl_linear	  s   r  onednnc              	   C   sR   t | ||||	d|
d }|tjtjtjtjfv sJ | j||d}|jtjd}|S )NFrt   r   )	r  rF   r  r  uint8r   r{   r   r  )r<   x_scalex_zpww_scalew_zpr   r   r  r}  r  output_scaleoutput_zero_pointoutput_dtyper  r  r  r  r   r2   r2   r3   meta_qconv2d_pointwise	  s   
r  c                 C   s   |dksJ |S )Nsumr2   )r<   r  r  r  r  r  accumr   r   r  r}  r  r  r  r  Zaccum_scaleZaccum_zero_pointbinary_op_namer  unary_op_nameunary_op_argsunary_op_algorithmr2   r2   r3   meta_qconv2d_pointwise_binary	  s   r  c                 C   sF   t | j}|jd |d< |	tjtjtjtjfv sJ | j||	d}|S )Nr   r   rt   )r   r|   rF   r  r  r   r  r{   )r<   r  r  r  r  r  r   r  r  r  Zpost_op_nameZpost_op_argsZpost_op_algorithmr  r   r2   r2   r3   meta_qlinear_pointwise
  s
   
r  c                 C   sR   |dkr|S t | j}|jd |d< |
tjtjtjtjfv s J | j||
d}|S )Nr  r   r   rt   )r   r|   rF   r  r  r  r   r{   )r<   r  r  r  r  r  Zx_2r   r  r  r  Zx2_scaleZx2_zpr  r  r  r  r  r  r   r2   r2   r3   meta_qlinear_pointwise_binary%
  s   
r  c                 C   s&   t | j}|jd |d< | |}|S )Nr   r   )r   r|   r{   )r<   r  r   r  r   r2   r2   r3   meta_linear_dynamic_fp16D
  s   

r  	quantizedr2   r   r   c                 C   sr   t | |||||\}}}|  dkr| dnd}	tj}
|  dkr(|||g}n|	|||g}tj|| j| j|
dS Nr/  r   r+   r(  )#max_pool2d_checks_and_compute_shaperl   r   rF   r  rp   rM   ri   r   r  r   r  r}  	ceil_modenInputPlaneoutputHeightoutputWidthr  r   r   r2   r2   r3   meta_quantized_max_pool2dU
  s$   r  c                 C   s   t |  dkd|   d t | dkd|  d t | jt jt jt jfv d| j  t |jt jkd|j  t |jt jkd|j  t |j| jkd|j  | j	| 
d	|
d	| jd
S )Nr   zx must be a 2D tensor, got Dzw must be a 2D tensor, got #expected x to be f32/f16/bf16, got expected w to be uint8, got z q_group_size must be int64, got z5q_scale_and_zeros must have the same dtype as x, got r   rt   )rF   rU   rl   rM   r  r  r  r  r   r{   r   r<   r  Zq_group_sizeZq_scale_and_zerosr2   r2   r3   meta_int4mm_packed_weight_cpur
  s      




r  c                    s4   t   koj k fdd d S )Nc                      s8   d  d d dd   d dj   S )NzExpected a tensor of dimension z and tensor.size[z] == r`   zbut got : dimension z] = rl   r|   r2   rl   dim_sizer   r   r2   r3   rR   
  s    z check_dim_size.<locals>.<lambda>)rF   rU   rl   r|   )r   rl   r  r   r2   r  r3   check_dim_size
  s   r  c                    s  dd }|d|\}}	t t|dv dd  t  jt jt jt jt jfv fdd t|dkr8||	}
}nt|d	krH|d |d }
}n|d
|\}
}|d|\}}t |d u p_|dkdd    dkro 	dnd	} 	d} 	d} 	d}t
||||
d	|}t
||	||d	|}t }t ||	|
|||d	d	||||||   dkr|||g}n||||g}t j| j j|dS )Nc                    D   t t|dv  fdd |d }t|dkr|n|d }||fS )Nr   r   c                      r  )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr2   r2   rf  r2   r3   rR   
  rw   z1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>r   r   rF   rU   r   rf  rF  HWr2   r  r3   unpack
     

zmeta_avg_pool2d.<locals>.unpackr  r   r   r   c                   S   rW   NzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr2   r2   r2   r2   r3   rR   
  rY   z!meta_avg_pool2d.<locals>.<lambda>c                         d j   dS )Nz""avg_pool2d" not implemented for ''rM   __str__r2   r   r2   r3   rR   
      r   r   r   r  c                   S   rW   Nzdivisor must be not zeror2   r2   r2   r2   r3   rR   
  rY   r/  r  rb  r   r+   r(  )rF   rU   r   rM   r  uint16uint32uint64rl   r   pooling_output_shaperA   r  pool2d_shape_checkrp   ri   )r   r  r   r  r  count_include_paddivisor_overrider  kHkWdHdWpadHpadWr  r  inputHeight
inputWidthr  r  r   r   r2   r  r3   meta_avg_pool2d
  sj   
	





r  c                 C   sj   t | ||||||dd|	|
|||| |  }|	}t|||d | t|||d | t|||d | d S )Nr   r+   r   )r  rl   r  )r   Z
gradOutputr  r  r  r  r  r  r  r  r  r  r  r  
mem_formatr   nOutputPlaner2   r2   r3   avg_pool2d_backward_shape_check
  s,   r  c                 C   s  t t|dkpt|dkdd  |d }t|dkr|n|d }	t t|dkp5t|dkp5t|dkdd  t|dkrB|n|d }
t|dkrN|	nt|dkrV|
n|d }t t|dkpgt|dkdd  |d }t|dkrx|n|d }t |d u p|dkdd  |j}| d	kr|d
 nd}|d }|d }|d }t||||
d|}t||	||d|}t|}t|| |||	|
||||||||| t j	||j
|j|dS )Nr   r   c                   S   rW   )NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr2   r2   r2   r2   r3   rR     rY   z*meta_avg_pool2d_backward.<locals>.<lambda>r   c                   S   rW   r  r2   r2   r2   r2   r3   rR   !  rY   c                   S   rW   )NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr2   r2   r2   r2   r3   rR   '  rY   c                   S   rW   r  r2   r2   r2   r2   r3   rR   .  rY   r/  r  r  rb  r   r(  )rF   rU   r   r|   rl   r  rA   r  r  rp   rM   ri   )ZgradOutput_r   r  r   r  r  r  r  r  r  r  r  r  r  
input_sizer  r  r  r  r  r  r  r2   r2   r3   meta_avg_pool2d_backward  sj   "(
r  c                    s6  t t|dv dd  |d }t|dkr|n|d }t|dkr$|n|d }	t | p2t|dv dd  t  jt jt jt jt jfv fdd |sP|n|d }
|sX|nt|dkr`|
n|d }|sh|	nt|dkrp|
n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t  jd
v dd  t | p|dkdd   	d} 	d} 	d} 	d} 	d}t
||||
d|}t
||||d|}t
||	||d|}t ||||	|
|||||ddd||||||ddd  jdkr ||||fS  |||||fS )Nr   r+   c                   S   rW   NzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr2   r2   r2   r2   r3   rR   c  rY   z!meta_avg_pool3d.<locals>.<lambda>r   r   r   c                   S   rW   NzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr2   r2   r2   r2   r3   rR   k  rY   c                      r  )Nz""avg_pool3d" not implemented for 'r  r  r2   r  r2   r3   rR   o  r  c                   S   rW   NzBavg_pool3d: padding must be a single int, or a tuple of three intsr2   r2   r2   r2   r3   rR   w  rY   r/  rA  c                   S   rW   Nz9non-empty 4D or 5D (batch mode) tensor expected for inputr2   r2   r2   r2   r3   rR     rY   c                   S   rW   r  r2   r2   r2   r2   r3   rR     rY   r  r  rb  r   zavg_pool3d()T)check_input_sizer/  )rF   rU   r   rM   r  r  r  r  r   r   r  pool3d_shape_checkr{   )r   r  r   r  r  r  r  kTr  r  dTr  r  padTr  r  r  nslicesitimeiheightiwidthotimeoheightowidthr2   r  r3   meta_avg_pool3dV  s   

  





r  c                 C   s  t t|dv dd  |d }t|dkr|n|d }	t|dkr$|n|d }
t | p2t|dv dd  |s;|n|d }|sC|	nt|dkrK|n|d }|sS|
nt|dkr[|n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t |jd	v d
d  t | p|dkdd  |d}|d}|d}|d}t||||d|}t||	||d|}t||
||d|}t|| |||	|
||||||||||||d ||jS )Nr  c                   S   rW   r  r2   r2   r2   r2   r3   rR     rY   z*meta_avg_pool3d_backward.<locals>.<lambda>r   r   r   c                   S   rW   r  r2   r2   r2   r2   r3   rR     rY   c                   S   rW   r  r2   r2   r2   r2   r3   rR     rY   r   c                   S   rW   r  r2   r2   r2   r2   r3   rR     rY   c                   S   rW   r  r2   r2   r2   r2   r3   rR     rY   r  r  rb  r   zavg_pool3d_backward())	rF   rU   r   r   r   r  avg_pool3d_backward_shape_checkr{   r|   )r%  r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r
  Zotime_for_shape_checkZoheight_for_shape_checkZowidth_for_shape_checkr2   r2   r3   meta_avg_pool3d_backward  st   
  




r  c                    sZ   t  jdkp jdk fdd  jd d t| }t }t j| j j	|dS )Nr+   r/  c                      rs   )Nz"Expected 3D or 4D tensor, but got r  r2   r   r2   r3   rR     rw   z*meta_adaptive_avg_pool2d.<locals>.<lambda>rb  r(  )
rF   rU   r   r|   rT   rA   r  rp   rM   ri   )r}   output_sizer  r   r2   r   r3   meta_adaptive_avg_pool2d   s   

r  c                    s@   t  jdkp jdk fdd   jd d t| S )Nr/  rA  c                      rs   )Nz"Expected 4D or 5D tensor, but got r  r2   r   r2   r3   rR     rw   z*meta_adaptive_avg_pool3d.<locals>.<lambda>r  )rF   rU   r   r{   r|   rT   )r}   r  r2   r   r3   meta_adaptive_avg_pool3d  s
   
r  c                    s    j }td|D ]t dk fdd qt|dkp$|dkfdd tj jk fdd tj}trDtj}	j
j|d	S )
Nr   r   c                      s   d j  d dS )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyr  r2   )grad_outr   r2   r3   rR   !  s
    z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>r+   r/  c                      rs   )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got r  r2   r   r2   r3   rR   &  rw   c                      r  Nexpected dtype z! for `grad_output` but got dtype rt   r2   )r  r}   r2   r3   rR   *  r  r   )r   r   rF   rU   r   rM   r   r  r  r{   r|   r   )r  r}   r   r   r2   )r  r   r}   r3   "meta__adaptive_avg_pool2d_backward  s$   

r  c                 C   s   t | d tj|tjdS )NZadaptive_avg_pool3d_backwardr   )!_adaptive_pool_empty_output_checkrF   r   r  r%  r}   r2   r2   r3   "meta__adaptive_avg_pool3d_backward2  s   
r  r%  c                    s<   j }td|D ]tdk fdd qd S )Nr   r   c                      s     dj  d dS )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r  r  r  r2   rs  r%  r   r2   r3   rR   >  s
   z3_adaptive_pool_empty_output_check.<locals>.<lambda>)r   r   rF   rU   r   )r%  rs  r   r2   r  r3   r  9  s   r  c                    s"  j }t|dv fdd td|D ] t dk fdd qtt|dkdd  d}d}d}j d	krGd}|d7 }|d }|\}}j d
krm|||f}|}	j|tjd}
|	|
fS ||||f}t	}|j
|d}	j|tjdj
|d}
|	|
fS )Nr+   r/  c                      rs   )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: r  r2   r  r2   r3   rR   K  rw   z*meta_adaptive_max_pool2d.<locals>.<lambda>r   r   c                         dj  d  dS )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r  r2   r   r   r2   r3   rR   P  
   r   c                   S   rW   )NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r2   r2   r2   r2   r3   rR   X  rY   r/  r+   rt   r   )r   rF   rU   r   r   r   r{   r   rA   r  r   )r   r  r   ZdimHsizeBsizeDosizeHosizeWr   r   r   r   r2   r   r3   meta_adaptive_max_pool2dE  sD   







r&  c                    sd    j }t|dv  fdd t d tj jk fdd t}jj	|dS )Nr  c                      rs   )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: r  r2   r%  r2   r3   rR   {  rw   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>adaptive_max_pool2d_backwardc                      r  r  rt   r2   r%  r   r2   r3   rR     r  r   )
r   rF   rU   r  rM   rA   r  r{   r|   r   )r%  r   r   r   r   r2   r)  r3   !meta_adaptive_max_pool2d_backwardu  s   



r*  c                    s   j }t|dv fdd td|D ] t dk fdd qtt|dkdd  d}d}d}|d	krFd}|d7 }|}|\}}}|d
kr[||||f}	n|||||f}	|	}
j|	tjd}|
|fS )Nr   c                      rs   )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: r  r2   r  r2   r3   rR     rw   z*meta_adaptive_max_pool3d.<locals>.<lambda>r   r   c                      r  )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r  r2   r   r2   r3   rR     r!  r+   c                   S   rW   )NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r2   r2   r2   r2   r3   rR     rY   rA  r/  rt   )r   rF   rU   r   r   r   r{   r   )r   r  r   ZdimDr"  r#  ZosizeTr$  r%  r   r   r   r2   r   r3   meta_adaptive_max_pool3d  s8   





r+  c                 C   s   t | d ||jS )Nadaptive_max_pool3d_backward)r  r{   r|   )r%  r   r   r2   r2   r3   !meta_adaptive_max_pool3d_backward  s   
r-  c                 C   s   |d u rt d| |S )Nz:cannot repeat_interleave a meta tensor without output_size)r   r{   )repeatsr  r2   r2   r3   meta_repeat_interleave_Tensor  s   
r/  c                 C   s:   | j jsJ |j jsJ t| j|j}| j|t| j dS r4  )rM   r   r#   r|   r{   r   )realimagr   r2   r2   r3   meta_complex  s   r2  )
fill_valuer3  c                C   s   | j ||  ftjdS r4  )r{   rl   rF   rx   )r}   r   r3  r2   r2   r3   nonzero_static  s   r4  c                 C   s<   t tjdd  t j|  |  fd|  ft j| jdS )Nc                   S   rW   )NaY  The register_meta function for torch.nonzero() raises unimplemented by default, as a correct data-independent implementation does not exist. This implementation returns a fake value, assuming all elements of the tensor are non-zero. To enable this registration, please set 'torch.fx.experimental._config.meta_nonzero_assume_all_nonzero' to True.r2   r2   r2   r2   r3   rR     rY   znonzero.<locals>.<lambda>r   rM   ri   )	rF   Z_check_not_implemented
exp_configZmeta_nonzero_assume_all_nonzeror  rz   rl   rx   ri   r   r2   r2   r3   nonzero  s   
r7  c              
      s@  t tdd  g }tD ]q\d ur|t jt jt jt jt jfv dd  jt jt jfv rv }t	|t 
j jkfdd tjD ]#t 
j j  kfdd ||d qQq| q| q|t t	jkfdd dd lm} t|j t	jk rd  t	jk sd}d	}D ]|dkrǈd urd}q|dkr҈d u rd
}qd ur nqd}|sg }g }tD ]\d ur| | qtD ]\d u r| | q||g g  g tD ]&\}	d u rBr8 j|	  q"j|	  q"tjq" fdd}
   }ddlm} | dkrk|S |
}t|}t|ttt	|krt|j|}t|}t|t|}|| |}|S )Nc                   S   rW   )Nz#at least one index must be providedr2   r2   r2   r2   r3   rR     rY   z#meta_index_Tensor.<locals>.<lambda>c                   S   rW   )Nz?tensors used as indices must be long, int, byte or bool tensorsr2   r2   r2   r2   r3   rR     rY   c                      rs   )N)too many indices for tensor of dimension r  r2   r   r2   r3   rR     rw   c                	      s$   dj  d  dj  d  S )NzThe shape of the mask z
 at index z0 does not match the shape of the indexed tensor r  r2   )r   rv   jr  r}   r2   r3   rR     s
    r   c                      s   dj  dt  dS )Nr8  z (got ra   )r   r   r2   )r   r}   r2   r3   rR     r  r   Fr   Tc                    sL      }t |  }dgt |tt| jt  < | ||S )zI
        This follows restride_src in TensorAdvancedIndexing.cpp
        r   )r   r   r   r|   r   )r}   r|   r   )after_shapebefore_shapereplacement_shaper2   r3   _restride_srcG  s    z(meta_index_Tensor.<locals>._restride_srcguard_size_oblivious) rF   rU   rl  	enumeraterM   rx   r   r   r7  r   ry   r   r   r|   r   selecttorch._refsZ_refsr   r$   r   r{   r   r?  rz   rA   Z3compute_elementwise_output_logical_to_physical_permZ
apply_permr   Zinvert_permr   r   )r}   r   rz  r7  refsstateZhas_contiguous_subspacerx  Ztransposed_indicesrl   r=  r   r?  Zrestrided_selfpermZ
perm_shaper   r2   )	r:  r;  r   rv   r   r9  r  r<  r}   r3   meta_index_Tensor  s   










rF  c                 C   sT   d }d }d }|
d r|  | }|
d r|  | }|
d r%|  |}|||fS )Nr   r   r   r{   r   )grad_output_input_weight_Zbias_sizes_optr   r  r}  Z
transposedr  r  output_maskZbackend_grad_inputZbackend_grad_weightZbackend_grad_biasr2   r2   r3   meta_convolution_backwardh  s   

rL  c                   s     d} d}| ||f} t  dkdd  t dkdd  t  d dk fdd t  d dk fd	d t|  d|ko^|  d|kd
d  | |   S )Nr   r   r+   c                   S   rW   rS  r2   r2   r2   r2   r3   rR     rY   zmeta_addbmm.<locals>.<lambda>c                   S   rW   rT  r2   r2   r2   r2   r3   rR     rY   r   c                         d  d d d S )Nz8batch1 and batch2 must have same number of batches, got r   r   r   r2   rU  rV  r2   r3   rR     rW  c                
      6   d  d d  d d d d d d	S )Nz#Incompatible matrix sizes for bmm (r   r<   r   r   ra   r   r2   rN  r2   r3   rR     
   c                   S   rW   )Nz.self tensor does not match matmul output shaper2   r2   r2   r2   r3   rR     rY   )r   r  rF   rU   rl   r{   )r}   rU  rV  r  r  r_  r`  r2   rN  r3   meta_addbmm  s$   

rQ  )
grad_scale	found_infc       	            s4   | |||||fD ] t t t fdd qd S )Nc                         dt   S Nz'exponent must be a tensor list but got rb   r2   lr2   r3   rR     rr  z#meta__fused_adam_.<locals>.<lambda>rF   rU   rZ   r   )r}   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizerR  rS  r2   rW  r3   meta__fused_adam_  s   
rf  c       	            sZ   | |||||fD ] t t t fdd qdd }|| ||||||||fS )Nc                      rT  rU  rV  r2   rW  r2   r3   rR     rr  z"meta__fused_adam.<locals>.<lambda>c                 S   s   dd | D S )Nc                 S   s   g | ]}t |qS r2   rj  )r;   rn  r2   r2   r3   r?     r@   z=meta__fused_adam.<locals>.empty_like_list.<locals>.<listcomp>r2   )Ztensor_listr2   r2   r3   empty_like_list  s   z)meta__fused_adam.<locals>.empty_like_listrY  )r}   rZ  r[  r\  r]  r^  r_  r`  ra  rb  rc  rd  re  rR  rS  rg  r2   rW  r3   meta__fused_adam  s   
rh  c                    s   t   dkdd  t  dkdd  t  jt ju  fdd t jt ju fdd t  ddk fd	d  j ddft jd
S )Nr   c                   S   rW   )Nza must be a 2D tensorr2   r2   r2   r2   r3   rR     rY   zmeta__int_mm.<locals>.<lambda>c                   S   rW   )Nzb must be a 2D tensorr2   r2   r2   r2   r3   rR     rY   c                      rs   )Nzexpected self to be int8, got rt   r2   )rs  r2   r3   rR     rw   c                      rs   )Nzexpected mat2 to be int8, got rt   r2   )rt  r2   r3   rR     rw   r   r   c                
      rO  )Nz'Incompatible matrix sizes for _int_mm (r   r<   r   r   ra   r   r2   rr  r2   r3   rR     rP  rt   )rF   rU   rl   rM   r   r   r{   r   rr  r2   rr  r3   meta__int_mm  s   



 ri  c                    st   t   dkdd  t  jt ju  fdd  d} dd } j|d ||d  d	|d ft jd
S )Nr   c                   S   rW   Nzw must be a 2D tensorr2   r2   r2   r2   r3   rR     rY   z2meta__convert_weight_to_int4pack.<locals>.<lambda>c                      rs   Nr  rt   r2   r  r2   r3   rR     rw   r   r      r      rt   )rF   rU   rl   rM   r  r   r{   r   r  Zinner_k_tilesr   r  r2   rl  r3    meta__convert_weight_to_int4pack  s   



rp  c                    s`   t   dkdd  t  jt ju  fdd  d} d} j||d ft jdS )Nr   c                   S   rW   rj  r2   r2   r2   r2   r3   rR     rY   z:meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>c                      rs   Nzexpected w to be int32, got rt   r2   rl  r2   r3   rR     rw   r   r   rt   )rF   rU   rl   rM   r   r   r{   r  ro  r2   rl  r3   (meta__convert_weight_to_int4pack_for_cpu
  s   




rr  c                    s   t  dkdd  t   dkdd  t jt jt jt jfv fdd t  jt ju  fdd j	d 	dd	 jd
S )Nr   c                   S   rW   Nzx must be a 2D tensorr2   r2   r2   r2   r3   rR     rY   z*meta__weight_int4pack_mm.<locals>.<lambda>r/  c                   S   rW   )Nzw must be a 4D tensorr2   r2   r2   r2   r3   rR     rY   c                      rs   Nr  rt   r2   r   r2   r3   rR     rw   c                      rs   rq  rt   r2   rl  r2   r3   rR   #  rw   r   rm  rt   )
rF   rU   rl   rM   r  r  r  r   r{   r   r  r2   r  r<   r3   meta__weight_int4pack_mm  s   


"rv  c                    s   t  dkdd  t   dkdd  t jt jt jt jfv fdd t  jt ju  fdd j	d 	djdS )	Nr   c                   S   rW   rs  r2   r2   r2   r2   r3   rR   *  rY   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>c                   S   rW   rj  r2   r2   r2   r2   r3   rR   +  rY   c                      rs   rt  rt   r2   r   r2   r3   rR   .  rw   c                      rs   rk  rt   r2   rl  r2   r3   rR   2  rw   r   rt   )
rF   rU   rl   rM   r  r  r  r  r{   r   r  r2   ru  r3    meta__weight_int4pack_mm_for_cpu(  s   


rw  rs  rt  c                 C   s   | | d | | S r   r2   rr  r2   r2   r3   kai_roundup7  s   rx  c           	         s   | dkrv||kr/d}d}d}d
dddd 
fddfd	d
}||||||S |d dkrx|| dkrzd}d}d}d
ddd  fdd} 	
fdddd  fdd fdd	|||||||S d S d S d S )Nr/  rm  r  r   c                 S   s   t || d}t | |S )Nr/  rx  )r  krsrZkr_sr_roundedup4r2   r2   r3   kai_k_roundedupF  s   
z3get_kai_packed_weight_size.<locals>.kai_k_roundedupc                    s8    | ||}|d dksJ d||d     S )Nr   r   zk_internal must be evenr2   )r  nrrz  r{  Z
k_internal)r|  kai_num_bytes_biaskai_num_bytes_multiplier_rhskai_num_bytes_sum_rhsr2   r3   9kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0L  s   z]get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0c                    s    t | || }| |||| S r-   ry  )r   r  r}  rz  r{  num_rows)r  r2   r3   7kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0Z  s   z[get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0rn  r   c                    sR   || dksJ | dksJ |  dksJ t | || }|||||| S r  ry  )r   r  r}  rz  r{  blr  )kai_bl_multiple_of;kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0kai_nr_multiple_ofr2   r3   9kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0r  s   
z]get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0c                    s^   || dksJ | dksJ |  dksJ  }| |}||}|||    S r  r2   )r  r}  rz  r{  r  num_bytes_multiplier_rhsZnum_blocks_per_rowZnum_bytes_per_block)r  #kai_get_bf16_datatype_size_in_bytesr  kai_num_blocks_per_rowr~  kai_num_bytes_per_blockr  r2   r3   r    s   
z_get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0c                   S   rW   )Nr   r2   r2   r2   r2   r3   r    r  zGget_kai_packed_weight_size.<locals>.kai_get_bf16_datatype_size_in_bytesc                    s   |  dksJ t | || S r  ry  )r  r  r  r2   r3   r    s   z:get_kai_packed_weight_size.<locals>.kai_num_blocks_per_rowc                    s   |   dksJ | d | S )Nr   r   r2   )r  r  r  r2   r3   r    s   z;get_kai_packed_weight_size.<locals>.kai_num_bytes_per_blockr2   )	Zn_bitsrq  KZ	groupsizeZkai_nrZkai_krZkai_srr  r  r2   )r  r  r  r  r|  r  r  r~  r  r  r  r3   get_kai_packed_weight_size;  s@   
-r  c                    s   t  jt ju  fdd t jj rE||kr|jt jks4||k rE|d dkrE|| dkrE|jt jkrEt	d|||} j
t|t jdS   |  } j
|t jdS )Nc                      rs   rk  rt   r2   weightsr2   r3   rR     rw   z2meta__dyn_quant_pack_4bit_weight.<locals>.<lambda>rn  r   r/  rt   )rF   rU   rM   r  backendsZkleidiaiis_availablerI   r  r  r{   r   rz   )r  Zscales_zerosr   
block_sizein_featuresout_featuresZpacked_weight_sizer2   r  r3    meta__dyn_quant_pack_4bit_weight  s    

r  c                    sR   t   dkdd  t  jt jfv  fdd  d} j|| jdS )Nr   c                   S   rW   )Nzinput must be a 2D tensorr2   r2   r2   r2   r3   rR     rY   z-meta__dyn_quant_matmul_4bit.<locals>.<lambda>c                      rs   )Nzexpected input to be f32, got rt   r2   inpr2   r3   rR     rw   r   rt   )rF   rU   rl   rM   r  r   r{   )r  Zpacked_weightsr  r  r  r  r2   r  r3   meta__dyn_quant_matmul_4bit  s   

r  c                    s   t  dkdd  t jt jt jt jfv fdd t   dkdd  t  jt ju  fdd j	d 	djdS )	Nr   c                   S   rW   rs  r2   r2   r2   r2   r3   rR     rY   z*meta__weight_int8pack_mm.<locals>.<lambda>c                      rs   rt  rt   r2   r   r2   r3   rR     rw   c                   S   rW   rj  r2   r2   r2   r2   r3   rR     rY   c                      rs   )Nzexpected w to be int8, got rt   r2   rl  r2   r3   rR     rw   r   rt   )
rF   rU   rl   rM   r  r  r  r   r{   r   )r<   r  Zq_scalesr2   ru  r3   meta__weight_int8pack_mm  s   


r  c           	         s  t  dkfdd t  dkfdd t ddkfdd t tjdd  t tjdd  t |d	kd
d  t  dv  fdd d}d}jd d }jd d }tt 	||}|
||g |S )Nr   c                         d    dS )Nz1cdist only supports at least 2D tensors, X1 got: r  r   r2   )x1r2   r3   rR     rS   z$meta_cdist_forward.<locals>.<lambda>c                      r  )Nz1cdist only supports at least 2D tensors, X2 got: r  r   r2   )x2r2   r3   rR     rS   r   c                      rM  )Nz4X1 and X2 must have the same number of columns. X1: r   z X2: r   r2   )r  r  r2   r3   rR     rW  c                   S   rW   )Nz=cdist only supports floating-point dtypes, X1 got: {x1.dtype}r2   r2   r2   r2   r3   rR     rY   c                   S   rW   )Nz=cdist only supports floating-point dtypes, X2 got: {x2.dtype}r2   r2   r2   r2   r3   rR     rY   r   c                   S   rW   )Nz)cdist only supports non-negative p valuesr2   r2   r2   r2   r3   rR     rY   Nr   r   c                      r  )Nz%possible modes: None, 1, 2, but was: r2   r2   )compute_moder2   r3   rR     r  rb  )rF   rU   rl   r   rA   is_float_dtyperM   r|   r   broadcast_shapesextendr{   )	r  r  rN  r  r1r2batch_tensor1batch_tensor2r  r2   )r  r  r  r3   meta_cdist_forward  s@   









r  c                 C   s   |j d }|j d }|j d }|j d d }|j d d }	tt||	}
|
 }|||g t|
}|dksE|dksE|dksE|dkrJt|S |t|j krV|	|}tj
|tjdS )Nr   rb  r   r   )r|   r   rF   r  copyr  mathprod
zeros_liker  r   r   )rP  r  r  rN  Zcdistc1r  r  r  r  r  Ztensor1_expand_sizeZbatch_productr2   r2   r3   meta_cdist_backward  s   



 

r  c	                    s  t  jt jt jfv  fdd t jt jt jfv fdd t tjfdd d}	|rEt |	dkdd  |	d8 }	|	d}
d urzt |t	kdd  t j
dkfd	d t    k fd
d fdddd fdd}tdkr  d}  }|tkr |	d}nR d}nL||
|}|ttfv s|s̈ d}nd}|	}jd }|tkr|rt |dkdd  |d8 }|jd }n| }|
|||fS )Nc                      rs   )Nz(expected indices to be long or int, got rt   r2   )r   r2   r3   rR   *  rw   z$meta_embedding_bag.<locals>.<lambda>c                      rs   )Nz(expected offsets to be long or int, got rt   r2   )r#  r2   r3   rR   .  rw   c                      rs   )Nz/expected weight to be floating point type, got rt   r2   )r   r2   r3   rR   2  rw   r   r   c                   S   rW   Nz1include_last_offset: numBags should be at least 1r2   r2   r2   r2   r3   rR   9  rY   c                   S   rW   )Nz@embedding_bag: per_sample_weights only supported with mode='sum'r2   r2   r2   r2   r3   rR   B  rY   c                      r  )Nz1expected per_sample_weights to be 1D tensor, got r  r  r2   )per_sample_weightsr2   r3   rR   F  rr  c                      s   d   d    dS )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (ra   rz   r2   )r   r  r2   r3   rR   J  s   c                    s    | ||o| ddkS Nr   r   r   r   rm  r   padding_idx)is_fast_path_index_selectr2   r3   is_fast_path_index_select_scaleP  s   z;meta_embedding_bag.<locals>.is_fast_path_index_select_scalec                 S   s<   | j tjks| j tjko| ddko|ddko|dk S Nr   r   )rM   rF   rI   rG   r   )r   r   r  r2   r2   r3   r  U  s   z5meta_embedding_bag.<locals>.is_fast_path_index_selectc                    s"   |d ur| |||S  | ||S r-   r2   r  )r  r  r2   r3   is_fast_path]  s   z(meta_embedding_bag.<locals>.is_fast_pathcpuc                   S   rW   r  r2   r2   r2   r2   r3   rR   w  rY   )rF   rU   rM   rx   r   rA   r  r   r{   MODE_SUMr   rz   r   MODE_MAX	MODE_MEANr|   )r   r   r#  scale_grad_by_freqr  sparser  Zinclude_last_offsetr  Znum_bagsr   r  
offset2bagbag_sizemax_indicesZfast_path_sumZnumBagsr2   )r   r  r  r#  r  r   r3   meta_embedding_bag  st   








r  c                 G   sB   t | ||g|R  \}}}}t|dkr|| }||||fS )Nr  )r  r   r{   r   )r   r   r#  rC   r   r  r  r  r2   r2   r3   meta_embedding_bag_forward_only  s   r  c                 C   s.   |r|S | j js| j jr| j S |rtjS | j S r-   )rM   r   r   rF   rx   )r   rM   promote_int_to_longr2   r2   r3   _get_reduction_dtype  s   r  rt   c                C   s6   t | |dd}t| j|}t| ||}| j||dS )NT)r  rt   )r  rA   r6  r|   r7  r{   )r   rx  r9  rM   r  r  r2   r2   r3   meta_nansum  s   r  c                 C   s$   t | jtt|  }| |S r-   )rA   ry  r|   rT   r   rl   r{   )r   r  r2   r2   r3   meta_median  s   
r  c                 C   sL   t | dkrtd t| j|f}t| ||}| || j|tjdfS )Nr   zmedian CUDA with indices outputrt   )	r   rA   Zalert_not_deterministicr6  r|   r7  r{   rF   rx   )r   rl   r9  r  r2   r2   r3   meta_median_mode_dim  s   
r  c                 C   r  r-   r2   r   r2   r2   r3   meta_logical_not_  rH  r  c                    sd   t t|  kdd  t|   }d| t| j   fddttD }| |S )Nc                   S   rW   )NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr2   r2   r2   r2   r3   rR     rY   zmeta_repeat.<locals>.<lambda>r  c                    s   g | ]
} | |  qS r2   r2   rv  Zpadded_sizer.  r2   r3   r?     rW  zmeta_repeat.<locals>.<listcomp>)rF   rU   r   rl   rT   r|   r   r{   )r}   r.  Znum_new_dimensionsZtarget_sizer2   r  r3   meta_repeat  s   
r  c                 C   r  r-   r2   r   r2   r2   r3   
meta_zero_  rH  r  c                 C   s   t |tjrt| j|j | S r-   )rZ   rF   r   rV   r|   r}   r   r2   r2   r3   meta_binop_inplace  s   r  c                 C   sf   dd }dd }dd }|| r||rt d|| r$||s$t dt|tjr1t| j|j | S )	a*  
    Some checks for inplace ops.
    Checks for promotion rules for some dtypes.
    int.add/sub_(float) and bool.add/sub_(others) are rejected.
    Promoting in these in-place operations would require reallocating
    and copying over elements, hence not allowed.
    Checks for alpha param.
    c                 S       t | trt| jS t | tS r-   )rZ   r   rA   r  rM   r   r\   r2   r2   r3   is_integeric     

z.meta_binop_inplace_alpha.<locals>.is_integericc                 S   r  r-   )rZ   r   rA   r  rM   r   r  r2   r2   r3   
is_floatic   r  z,meta_binop_inplace_alpha.<locals>.is_floaticc                 S   r  r-   )rZ   r   rA   Zis_boolean_dtyperM   r   r  r2   r2   r3   is_booleanic  r  z.meta_binop_inplace_alpha.<locals>.is_booleanicz]Promotion of int.add/sub_(float) in in-place ops are not possible due to element size change.z_Promotion of book.add/sub_(others) in in-place ops are not possible due to element size change.)r   rZ   rF   r   rV   r|   )r}   r   r  r  r  r  r2   r2   r3   meta_binop_inplace_alpha  s   r  c                 K      t | tjdS Nr8   rE   r   rB   )r}   kwargsr2   r2   r3   
meta_round  s   r  c                    sl   t tj fdd tt jr&t tj fdd d S t tt fdd d S )Nc                           dj  S )Nz7: Expected input tensor to have an integral dtype. Got rt   r2   )r{  r}   r2   r3   rR   '  rS   z#shift_dtype_check.<locals>.<lambda>c                      r  )Nz6: Expected shift value to have an integral dtype. Got rt   r2   r{  rF  r2   r3   rR   ,  rS   c                      s     d S )Nz): Expected shift value to be an int. Got r2   r2   r  r2   r3   rR   1  rr  )rF   rU   rA   r  rM   rZ   r   r   r{  r}   rF  r2   r  r3   shift_dtype_check$  s   

r  c                 C      t d| | t| |tjdS )Nrshiftr  r  rE   r   rB   r  r2   r2   r3   meta_rshifts5     r  c                 C   r  )Nlshiftr  r  r  r2   r2   r3   meta_lshifts=  r  r  c                 C      |  | jS r-   r  r   r2   r2   r3   	meta_zeroE     r  c                 C   r  r-   r2   r}   rF  r2   r2   r3   
meta_fill_J  rH  r  c                 C   ri  r-   rj  r  r2   r2   r3   	meta_fillO     
r  c                 C   r  r-   r2   r   r2   r2   r3   
meta_relu_T  rH  r  c                 C      t | |tjdS r  r  )r}   r   r  r2   r2   r3   meta__add_reluY     r        ?UUUUUU?c                 C   ri  r-   rj  r}   noiselowerr  r  r   r2   r2   r3   meta_rrelu_with_noisea  s   
r  c                 C   s   t | t |fS r-   rj  r  r2   r2   r3    meta_rrelu_with_noise_functionali  s   r  c                 C   r  r-   r2   )r}   r  r  r  r   r2   r2   r3   meta_rrelu_with_noise_p  s   r  c                 C   ri  r-   rj  r}   r   r   
accumulater2   r2   r3   meta_index_putw  r  r  c                 C   s   t | j|j | S r-   rV   r|   )r}   rn  valuer2   r2   r3   meta_masked_fill_|  s   r   c                 C   s    |  |  jt| d}|S r   )r{   r   r   rA   r  )r}   rn  rm  Zmasked_scaler2   r2   r3   meta__masked_scale  s   r  c                    s@   t |jt jt jfv dd  t  jjk fdd  S )Nc                   S   rW   )NzMask must be bool or uint8r2   r2   r2   r2   r3   rR     rY   z&meta_masked_scatter_.<locals>.<lambda>c                      r  )NzEmasked_scatter: expected self and source to have same dtypes but got r   rt   r2   r}   r  r2   r3   rR     s
    )rF   rU   rM   rl  r  )r}   rn  r  r2   r  r3   meta_masked_scatter_  s   
r  c                 C   s*   t | |\} }tj| tjd}t|||S r   )r$   rF   r   r   r  )r}   rn  r  r   r2   r2   r3   meta_masked_scatter  s   r  c                 C   s
   |  |S r-   r0  )r}   rn  r  r2   r2   r3   meta_masked_scatter_backward  r  r  c                 C   r  r-   r2   r  r2   r2   r3   meta_index_put_  rH  r  c                 C   r  r-   )viewr|   r   r2   r2   r3   
meta_alias  r  r  c                    s   t |  dkdd  t | dkdd  |  }|  |d |d |d } d }||ft  d koB d k fdd |}|sqd urqt  dkd	d  t  kfd
d |S )Nr+   c                   S   rW   rS  r2   r2   r2   r2   r3   rR     rY   z)common_meta_baddbmm_bmm.<locals>.<lambda>c                   S   rW   rT  r2   r2   r2   r2   r3   rR     rY   r   r   r   c                	      rX  rY  r2   r2   r[  r2   r3   rR     s    c                   S   rW   )Nzself must be a 3D tensorr2   r2   r2   r2   r3   rR     rY   c                      s   d  d   S )Nz*Expected an input tensor shape with shape z but got shape: r   r2   )r  self_baddbmmr2   r3   rR     r  )rF   rU   rl   r   r{   )rU  rV  Zis_bmmr	  ra  Zres_rowsZres_colsr   r2   )r\  r]  r^  r  r	  r3   common_meta_baddbmm_bmm  s*   


r
  c                 C   s   t | |dS )NT)r
  )r}   r   r2   r2   r3   meta_bmm  r  r  c                 C   s<   | | }| | }|dkrt |dk t |dk kr|d8 }|S r  )rl  )r<   yqr  r2   r2   r3   div_rtn  s
    r  c                 C   sZ   t | | | ||d   d |r|d nd |d }|r+|d | | | kr+|d8 }|S r  )r  )	inputSize
kernelSizer  r  r   r}  r  Z
outputSizer2   r2   r3   pooling_output_shape_pad_lr  s*   

	r  c                    sl   t |dkdd  t dkfdd t d   d d k fdd t| | |S )Nr   c                   S   rW   )Nzstride should not be zeror2   r2   r2   r2   r3   rR     rY   z&pooling_output_shape.<locals>.<lambda>c                      r  )Nz'pad must be non-negative, but got pad: r2   r2   )padr2   r3   rR     r  r   r   c                      s   d d d  S )NzApad should be at most half of effective kernel size, but got pad=z, kernel_size=z and dilation=r2   r2   r}  r  r  r2   r3   rR     s
   )rF   rU   r  )r  r  r  r   r}  r  r2   r  r3   r    s   r  c              	      sN     }tdkodkdd  t|dko|dkdd  t|dko+|dkdd   ddko= ddk}|tjkrWt|dkoQ|oQ d	dkd
d  n"t|d	krf ddkrf|pr|dkor|or d	dk fdd td 
kod 	k	
fdd tdkodkfdd d S )Nr   c                   S   rW   )NzCkernel size should be greater than zero, but got kH: {kH}, kW: {kW}r2   r2   r2   r2   r3   rR     rY   z$pool2d_shape_check.<locals>.<lambda>c                   S   rW   )Nz>stride should be greater than zero, but got dH: {dH}, dW: {dW}r2   r2   r2   r2   r3   rR   "  rY   c                   S   rW   )Nz\dilation should be greater than zero, but got dilationH: {dilationH}, dilationW: {dilationW}r2   r2   r2   r2   r3   rR   &  rY   r   r   r/  r+   c                   S   rW   )NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: {input.size()}r2   r2   r2   r2   r3   rR   .  rY   c                         d    S )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: r   r2   r  r2   r3   rR   5  rr  c                      s   d d d d  S )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r2   r2   )r  r  r  r  r2   r3   rR   :  s    c                      s*   d d  d d d d dS NzGiven input size: (r<   z). Calculated output size: (z). Output size is too smallr2   r2   )r  r  r  r  r  r  r2   r3   rR   @  s    )rl   rF   rU   r   r  )r   r  r  r  r  r  r  	dilationH	dilationWr  r  r  r  r  r   r   Z
valid_dimsr2   )r   r  r  r  r  r  r  r  r  r  r  r3   r    sB   

r  r  r  r  r  r  r  r  pTpHpW	dilationTr  r  r  r	  r
  r  r  r  r  c              
      s  	j }tdkodkodkfdd tdko&dko& dk fdd tdko<dko<dkfdd t|dv 	fdd t|D ]|dkradkraqVt	dk	fd	d qV|rt
kokok
fd
d td kod kod kfdd tdkodkodk
fdd d S )Nr   c                         d d  d S )Nz5kernel size should be greater than zero, but got kT: z, kH: z, kW: r2   r2   )r  r  r  r2   r3   rR   b     z$pool3d_shape_check.<locals>.<lambda>c                      r  )Nz0stride should be greater than zero, but got dT: z, dH: z, dW: r2   r2   )r  r  r  r2   r3   rR   i  s   c                      r  )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: z, dilationW: r2   r2   )r  r  r  r2   r3   rR   o  r  r   c                      r  )Nz/: Expected 4D or 5D tensor for input, but got: r  r2   )r{  r   r2   r3   rR   w  rS   rA  c                      s     dj  d dS )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)r|   r   r2   )r{  r   r   r2   r3   rR     s
   c                      s*   d d  d d d d dS )Nzinput image (T: rE  r6  z ) smaller than kernel size (kT:  kH:  kW: ra   r2   r2   )r	  r  r
  r  r  r  r2   r3   rR     s   r   c                      s(   d d d  d d d S )NzHpad should be smaller than or equal to half of kernel size, but got kT: r  r  z padT: z padW: z padH: r2   r2   )r  r  r  r  r  r  r2   r3   rR     s   r   c                      s6   d d d  d d d d d dS r  r2   r2   )r	  r  r
  r  r  r  r  r2   r3   rR     s   )r   rF   rU   r   r   )r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r
  r  r  r  r{  r  r   r2   )r  r  r  r  r  r  r{  r   r	  r   r  r
  r  r  r  r  r  r  r  r  r  r  r3   r  F  sJ   	"r  c                 C   s   | j }t| |||||||	|
|||||||||||| t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | d S )Nr/  r+   r   r   r   r  r  )r   r%  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r
  r  r  r  r{  r   r2   r2   r3   max_pool3d_backward_shape_check  s@   r!  c                 C   s   | j }t| ||||||||	|
|ddd|||||||d t|||d | t|||d | t|||d | t|||d | d S )Nr   Tr/  r+   r   r   )r   r%  r  r  r  r  r  r  r  r  r  r  r  r	  r
  r  r  r  r{  r   r2   r2   r3   r    s:   r  c                 C   sB  dd }|d|\}}t t|dv dd  t|dkr#||}	}
n|d|\}	}
|d	|\}}|d
|\}}| d}| d}| d}t| }|t jkr^t |  dkdd  n|t jkrpt |  dv dd  nt ddd  t	||||	||}t	||||
||}t
| |||	|
|||||||||| |||fS )Nc                    r  )Nr  c                      r  )Nzmax_pool2d: r  r2   r2   r  r2   r3   rR   "  rw   zEmax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>r   r   r  r  r2   r  r3   r    r  z3max_pool2d_checks_and_compute_shape.<locals>.unpackr  r  c                   S   rW   )NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr2   r2   r2   r2   r3   rR   ,  rY   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>r   r   r  r}  r  rb  r   r/  c                   S   rW   )NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr2   r2   r2   r2   r3   rR   =  rY   r  c                   S   rW   )Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr2   r2   r2   r2   r3   rR   B  rY   Fc                   S   rW   )Nz?Unsupport memory format. Supports only ChannelsLast, Contiguousr2   r2   r2   r2   r3   rR   G  rY   )rF   rU   r   r   rA   r  r  rl   r   r  r  )r   r  r   r  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r2   r2   r3   r    sb   		









r  c                    s   t |||||\}tj jk fdd |jfdd}	|	  |	| t}
tjjjj	|
dS )Nc                      r  )NzExpected dtype z  for `gradOutput` but got dtype rt   r2   r  r2   r3   rR   w  r  z7meta_max_pool2d_with_indices_backward.<locals>.<lambda>c                    s:   t | d   t | d  t | d  d S )Nr+   r   r   )r  )rn  )r  r   r  r  r2   r3   _check_dim_size}  s   z>meta_max_pool2d_with_indices_backward.<locals>._check_dim_sizer(  )
r  rF   rU   rM   r   rA   r  rp   r|   ri   )r%  r}   r  r   r  r}  r  r   r  r"  r   r2   )r%  r  r   r  r  r}   r3   %meta_max_pool2d_with_indices_backwardb  s.   

r#  c                 C   s   t | |||||\}}}|  dkr| dnd}	t| }
|  dkr*|||g}n|	|||g}tj|| j| j|
dtj|tj	| j|
dfS r  )
r  rl   r   rA   r  rF   rp   rM   ri   r   r  r2   r2   r3   meta_max_pool2d_with_indices  s2   
r$  c           
   	      s  t jdv fdd j}t|d |D ] t  dkd  d  d qt td	kd
d  t t|d	kdd  d}dd|dkr_d}nd}t jjkdd  t jdkfdd d}d}d	 t ||kd t ||kdd  t  d	k fdd t |d d  d kfdd t |d d  d kfdd  dkr|||d |d g}	n	||d |d g}	t j|	jj	dt j|	t j
j	dfS )Nr  c                      rs   )Nz:fractional_max_pool2d: Expected 3D or 4D tensor, but got: r  r2   r   r2   r3   rR     rw   z,meta_fractional_max_pool2d.<locals>.<lambda>r+   r   z^fractional_max_pool2d: Expected input to have non-zero  size for non-batch dimenions, but got r  z emptyr   c                   S   rW   )NzNfractional_max_pool2d: kernel_size musteither be a single int or tuple of Intsr2   r2   r2   r2   r3   rR     rY   c                   S   rW   )NzOfractional_max_pool2d: output_size must either be a single int or tuple of Intsr2   r2   r2   r2   r3   rR     rY   r  rb  r   r/  r   c                   S   rW   )Nz6Expect _random_samples to have the same dtype as inputr2   r2   r2   r2   r3   rR     rY   c                      rs   )Nz1Expect _random samples to have 3 dimensions got, r  r2   )random_samplesr2   r3   rR     rw   z=Expect _random_samples.size(0) no less then input batch size.c                   S   rW   )Nz<Expect _random_samples.size(1) equals to input channel size.r2   r2   r2   r2   r3   rR     rY   c                      r  )Nz/Expect _random_samples.size(2) equals to 2 got .r2   r2   )r   r2   r3   rR     rw   c                         dd  d  S )Nz%fractional_max_pool2d: kernel height r   z' is too large relative to input height r2   r2   )input_heightr  r2   r3   rR     r  c                      r'  )Nz$fractional_max_pool2d: kernel width r   z& is too large relative to input width r2   r2   )input_widthr  r2   r3   rR     r  r5  )rF   rU   r   r   r   r   rM   rl   rp   ri   r   )
r}   r  r  r%  r   Zinput_channelsZinput_batchr   cr   r2   )r   r(  r)  r  r%  r}   r3   meta_fractional_max_pool2d  s   










r+  c                 C   s  t t|dv dd  |d }t|dkr|n|d }t|dkr$|n|d }t | p2t|dv dd  |s;|n|d }	|sC|nt|dkrK|	n|d }
|sS|nt|dkr[|	n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t | jd
v dd  | jdkr| dnd}| d}| d}| d}| d}t||||	||}t||||
||}t||||||}t| |||||	|
|||||||||||||d | jdkot| t j	k}| jdkr:| 
d}|  o2|jt j	d}||||f}n|||||f}| |}| j|t jd}|r_|jt j	d}|jt j	d}||fS )Nr  c                   S   rW   NzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr2   r2   r2   r2   r3   rR     rY   z.meta_max_pool3d_with_indices.<locals>.<lambda>r   r   r   c                   S   rW   NzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr2   r2   r2   r2   r3   rR     rY   c                   S   rW   NzImax_pool3d: padding must either be a single int, or a tuple of three intsr2   r2   r2   r2   r3   rR   &  rY   c                   S   rW   NzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr2   r2   r2   r2   r3   rR   .  rY   r   c                   S   rW   r  r2   r2   r2   r2   r3   rR   6  rY   rA  r  r  rb  r   zmax_pool3d_with_indices()r/  r   rt   )rF   rU   r   r   r   r  r  rA   r  r  r  r  r{   r   r   )r   r  r   r  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  input_channels_last_checkr   r   r   r2   r2   r3   meta_max_pool3d_with_indices
  s   

  







r2  c                 C   s^  t t|dv dd  |d }t|dkr|n|d }	t|dkr$|n|d }
t | p2t|dv dd  |s;|n|d }|sC|	nt|dkrK|n|d }|sS|
nt|dkr[|n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t |jd
v dd  |d}|d}|d}|d}| d}| d}| d}t|| ||||	|
|||||||||||||||d |jdkot|t jk}|jdkr|	d}|
  o|j
t jd}||j}|r-|jt jd}|S )Nr  c                   S   rW   r,  r2   r2   r2   r2   r3   rR     rY   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>r   r   r   c                   S   rW   r-  r2   r2   r2   r2   r3   rR     rY   c                   S   rW   r.  r2   r2   r2   r2   r3   rR     rY   c                   S   rW   r/  r2   r2   r2   r2   r3   rR     rY   r   c                   S   rW   r  r2   r2   r2   r2   r3   rR     rY   r  r  rb  r   z"max_pool3d_with_indices_backward()rA  r/  r   )rF   rU   r   r   r   r!  rA   r  r  r  r  r{   r|   r   )r%  r   r  r   r  r}  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  r1  r(  r2   r2   r3   %meta_max_pool3d_with_indices_backwards  s   
  









r3  gridc                    s   t j jk fdd t jt jko jt jk fdd t jd  jd k fdd t  jd jd k fdd tdjD ]t j dkfd	d qPd S )
Nc                      r  )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on rN  r2   r4  r   r2   r3   rR     r  z+check_grid_sampler_common.<locals>.<lambda>c                      r  )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )rh   r2   r5  r2   r3   rR     r  r   c                      r  )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes r  r2   r5  r2   r3   rR     r  r   r   c                      s   dj d  d j S )Nz+grid_sampler(): expected grid to have size r   z, in last dimension, but got grid with sizes )r   r|   r2   r5  r2   r3   rR     s   c                      r  )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r  r  r  r2   r   r2   r3   rR     r!  )rF   rU   ri   rh   r  r|   r   r   )r   r4  r2   )r4  r   r   r3   check_grid_sampler_common  s,   
r7  c                   @   s   e Zd ZdZdZdZdS )GridSamplerInterpolationr   r   r   N)rc   
__module____qualname__ZBILINEARZNEARESTBICUBICr2   r2   r2   r3   r8    s    r8  interpolation_modec                    sP   t jdkoj jk fdd t jdko |tjjk dd  d S )NrA  c                      r  )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes r6  r  r2   r5  r2   r3   rR     s
   z'check_grid_sampler_3d.<locals>.<lambda>c                   S   rW   )Nz<grid_sampler(): bicubic interpolation only supports 4D inputr2   r2   r2   r2   r3   rR     rY   )rF   rU   r   r8  r;  r  )r   r4  r<  r2   r5  r3   check_grid_sampler_3d  s   

r=  c           
      C   s:   |d }|rt j|t jd}nd }t j|t jd}	||	fS Nr   r   )rF   r  r   r   
r%  r   r4  r<  padding_modealign_cornersrK  Zinput_requires_gradr(  	grad_gridr2   r2   r3   grid_sampler_2d_backward_meta  s   
rC  c           
      C   s\   t | | t| || | jd }| jd }|jd }|jd }|jd }	| |||||	fS )Nr   r   r   r+   )r7  r=  r|   r{   )
r   r4  r<  r@  rA  rq  CZout_DZout_HZout_Wr2   r2   r3   grid_sampler_3d-  s   
	




rE  rB  c           
      C   sP   t || t||| |d }|rtj|tjd}nd }tj|tjd}	||	fS r>  )r7  r=  rF   r  r  r   r?  r2   r2   r3   grid_sampler_3d_backward@  s   
rF  c                 O   s:   | dd }|st|}||d< tj| g|R i |S )NrM   )rL   rA   Z	get_dtyperF   rp   )r   r3  rC   r  rM   r2   r2   r3   fullX  s
   
rG  c                 C   s   |t jkrJt |d u dd  t jd|d u r| jn|||d u r"| jn||d}| jr8||  | 	 | 
  n||  |  d |d |S tjj| |||||d}|d |S )Nc                   S   rW   )Nz9memory format option is only supported by strided tensorsr2   r2   r2   r2   r3   rR   n  rY   zzeros_like.<locals>.<lambda>r   r   TrM  )rF   Z
sparse_coorU   rp   rM   ri   	is_sparseZsparse_resize_and_clear_r   
sparse_dim	dense_dimrl   Z_coalesced_r)   r   r   fill_)r}   rM   rh   ri   rj   r   r  r2   r2   r3   r  b  s:   
	

	r  c           	         s   ddl m}  }t|dkdd   dkr n |   }t| |kp1||k  fdd dkrAn| t }t } |    }| = | = 	|||S )Nr   r>  c                   S   rW   )Nz-select() cannot be applied to a 0-dim tensor.r2   r2   r2   r2   r3   rR     rY   zmeta_select.<locals>.<lambda>c                      s   d d   d  S )Nzselect(): index z! out of range for tensor of size z at dimension r   r2   rl   rv   r}   r2   r3   rR     s
    )
r   r?  rl   rF   ry   r   r   r   r   r   )	r}   rl   rv   r?  r   r   new_sizer   Znew_storage_offsetr2   rL  r3   meta_select  s(   
rN  c                 C   ri  r-   rA   Zclone_preserve_strides)r}   r   rl   rv   r2   r2   r3   meta_select_scatter  r  rP  c                 C   ri  r-   rO  )r}   r   rl   re   rd   stepr2   r2   r3   meta_slice_scatter  r  rR  dim_post_exprwrap_scalarc                 C   sb   |dkr
|sJ d}| }|d }| |k s| |kr'J d|  d| d| d| dk r/| |7 } | S )Nr   r   zdim z out of bounds (r`   ra   r2   )rl   rS  rT  r   rS  r2   r2   r3   r     s   ,r   c                 C   s   |   dkrdS | j| S r  r  )rn  rl   r2   r2   r3   ensure_nonempty_size  s   rU  c                    st   t  d}t  d}t||kdd  t|D ] kr7tttk fdd qd S )Nr   c                   S   rW   )NzDIndex tensor must have the same number of dimensions as input tensorr2   r2   r2   r2   r3   rR     rY   z$gather_shape_check.<locals>.<lambda>c                      s$   d dj  dj  d   S )Nz!Size does not match at dimension z expected index  to be no larger than self  apart from dimension r  r2   rl   r   rv   r}   r2   r3   rR     s    )rS  rl   rF   rU   r   rU  )r}   rl   rv   	self_dimsZ
index_dimsr2   rX  r3   gather_shape_check  s   rZ  c                    sb   ddl m} t||  }|  dk}|s+t jtjk fdd t	| |  | 
 jS )Nr   r>  c                      rs   )Nz2gather(): Expected dtype int64 for index, but got rt   r2   ru   r2   r3   rR     rw   zmeta_gather.<locals>.<lambda>)r   r?  r   rl   rz   rF   rU   rM   rx   rZ  r{   r|   )r}   rl   rv   Zsparse_gradr?  wrapped_dimZis_index_emptyr2   ru   r3   meta_gather  s   

r\  c                 C   s   |r*| dkrdS | dkrdS | dkrdS | dkrdS | d	kr d
S t ddd  d S | dkr0dS | dkr6dS t ddd  d S )Nr  Z
REDUCE_ADDr  ZREDUCE_MULTIPLYmeanZREDUCE_MEANZamaxZREDUCE_MAXIMUMZaminZREDUCE_MINIMUMFc                   S   rW   )Nz=reduce argument must be either sum, prod, mean, amax or amin.r2   r2   r2   r2   r3   rR     rY   z#get_operator_enum.<locals>.<lambda>addmultiplyc                   S   rW   )Nz/reduce argument must be either add or multiply.r2   r2   r2   r2   r3   rR     rY   r  )reduce_use_new_optionsr2   r2   r3   get_operator_enum  s,   rb  c                    sd   ddl m} || dkrt|jtjk fdd |d ur0t|j|jk fdd d S d S )Nr   r>  c                      
     dS )Nz"(): Expected dtype int64 for indexr2   r2   method_namer2   r3   rR     r  z,scatter_gather_dtype_check.<locals>.<lambda>c                      rc  )Nz0(): Expected self.dtype to be equal to src.dtyper2   r2   rd  r2   r3   rR     r  )r   r?  rz   rF   rU   rM   rx   )re  r}   rv   src_optr?  r2   rd  r3   scatter_gather_dtype_check  s   



rg  c                 C   s
   t | dS r   )rS  r   r2   r2   r3   ensure_nonempty_dim  s   
rh  c           	         s0  ddl m} | dkrd S tt t kdd  d}t }t|D ]}t|}| kr:q.|t|krEd} nq.|scd urct|D ]}t|}|t|krbd} nqPd urtt t kdd  t|  fdd d S t|  fd	d d S )
Nr   r>  c                   S   rW   NzCIndex tensor must have the same number of dimensions as self tensorr2   r2   r2   r2   r3   rR   #  rY   z%scatter_shape_check.<locals>.<lambda>FTc                   S   rW   ri  r2   r2   r2   r2   r3   rR   =  rY   c                      s&   dj  dj  d  dj   S )NExpected index rV  rW  z and to be no larger than src r  r2   rl   rv   r}   rf  r2   r3   rR   A  s    c                      s   dj  dj  d   S )Nrj  rV  rW  r  r2   rL  r2   r3   rR   G  s    )	r   r?  rz   rF   rU   rh  rl   r   rU  )	r}   rl   rv   rf  r?  Zis_wrong_shaperY  r   Zindex_d_sizer2   rk  r3   scatter_shape_check  sJ   

rl  c                 C   sD   t ||  }td| || t| ||| |d ur t|| d S d S )Nscatter)r   rl   rg  rl  rb  )r}   rl   rv   r   r`  ra  r[  r2   r2   r3   scatter_meta_implM  s   rn  c                 C   s   t | |||d | | jS Nr^  rn  r{   r|   r}   rl   rv   r   r2   r2   r3   meta_scatter_addV  s   rr  c                 C   s   t | |||d | S ro  rn  rq  r2   r2   r3   meta_scatter_add_\  r[  rt  c                 C   s0   t |tjr|nd }t| |||| | | jS r-   )rZ   rF   r   rn  r{   r|   r}   rl   rv   Zsrc_or_valuer  r   r2   r2   r3   meta_scatterb  s   
rv  c                 C   s(   t |tjr|nd }t| |||| | S r-   )rZ   rF   r   rn  ru  r2   r2   r3   meta_scatter_q  s   	rw          queryr   r  	dropout_p	is_causalreturn_debug_maskrm  c              	   C   sJ  |  d}|  d}|  d}	|  d}
| d}| dd}t|dd}tj|||	ftj| jd}|rb|
dkr=dnd}t|	| }|dkrMd}n|dkrSd}tj|||	|f| j	| jd}n
tjd| j	| jd}tj
jrtj rtjd	tjd
d}tjd	tjd
d}ntjdtjd
d}tjd	tjd
d}||d d |	||||f	S )Nr   r   r   r+   r5  @         r2   rg   )r   r  rF   r   rp   rI   ri   r  ceilrM   versionhipr   r  rx   r  )ry  r   r  rz  r{  r|  rm  r   	num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_kZquery_t	attention	logsumexpblocksize_cmax_seqlen_k
debug_maskseedoffsetr2   r2   r3   (meta__scaled_dot_product_flash_attention  sP   






r  	attn_biascompute_log_sumexpc	              	   C   s   |  d}	|  d}
|  d}| d}| d}tj|	|
||f| j| jd}tj|	|
|ftj| jd}tjdtjdd}tjdtjdd}||d d ||||d f	S Nr   r   r   r   r5  r2   rg   r   rF   rp   rM   ri   rI   rx   )ry  r   r  r  r  rz  r{  r|  rm  rw  r  S_QS_KVD_Vr  
logsum_expr  r  r2   r2   r3   (meta__scaled_dot_product_cudnn_attention  s.   




r  c              	   C   s   |  d}|  d}	|  d}
| d}| d}tj||	|
|f| j| jd}tj||	|
ftj| jd}tjdtjdd}tjdtjdd}||d d |
|||d f	S r  r  )ry  r   r  r  rz  r{  r|  rm  rw  r  r  r  r  r  r  r  r  r2   r2   r3   5meta__scaled_dot_product_fused_attention_overrideable  s.   




r  r  r  	cum_seq_q	cum_seq_kmax_qmax_kphilox_seedphilox_offsetc                 C   sX   t |dddd}t |dddd}t |dddd}|||fS r  )rF   r   r  )r  ry  r   r  r   r  r  r  r  r  rz  r{  r  r  rm  grad_qgrad_kgrad_vr2   r2   r3   'meta__scaled_dot_product_flash_backward  s   
r  	attn_maskc                 C   sR   |  d}|  d}|  d}	t| }
tj||	|ftj| jddd}|
|fS )Nr   r   r   r5  )r   rF   r   rp   rI   ri   r  )ry  r   r  rz  r{  r  rm  r   r  r  r  r  r2   r2   r3   0meta__scaled_dot_product_flash_attention_for_cpu/  s"   




r  c
                 C   s   | d}
| d}| d}| d}| d}tj|
|||fd|j|jd}tj|
|||fd|j|jd}tj|
|||fd|j|jd}|||fS )Nr   r   r+   r   r   r   r   r+   r5  )r   rF   empty_permutedrM   ri   )r  ry  r   r  r   r  rz  r{  r  rm  r   r  r  len_qZlen_kr  r  r  r2   r2   r3   9meta__scaled_dot_product_flash_attention_for_cpu_backwardQ  s0   








r  c                 C   s   |  dd} | dd}| dd}| d}| d}	| d}
|d}tj||	|
|| j| jd}tjjrDtj	 rD	 |rA|	nd}n|rOt
|	d d nd}tj||
|ftj| jd}| dd}tjdtjd	d}tjdtjd	d}||||fS )
Nr   r   r   rb  r   r5  rn  r2   rg   )r  r   rF   rp   rM   ri   r  r  r   r  r  r  rI   rx   )ry  r   r  r  r  rz  r{  rm  rw  r  r  Kvr  logsumexp_dimr  r  r  r2   r2   r3   ,meta__scaled_dot_product_efficient_attention  s*   



r  grad_input_maskc                 C   s  | d}| d}| d}| d}| d}| d}tj||||fd|j|jd}tj||||fd|j|jd}tj||||fd|j|jd}d }|d ur|
d r| d}|d dkrb|n|d |d  }t|  }||d< tj||j|jd}|d	d |f }||||fS )
Nr   r   r   r+   r  r5  r   r  .)r   rF   r  rM   ri   r   rp   )r  ry  r   r  r  r   r  r  r  rz  r  r{  rm  r   r  r  r  Z
head_dim_vr  r  r  r  	grad_biaslastDimlastDimAligned	new_sizesr2   r2   r3   +meta__scaled_dot_product_efficient_backward  sF   









 
r  c                 C   s(   t |}t |}t |}|||fS r-   rj  )r  ry  r   r  r   r  r  r  r  r  r  r  r  rz  r{  rm  r  r  r  r2   r2   r3   'meta__scaled_dot_product_cudnn_backward  s   



r  window_size_leftwindow_size_right	seqused_kalibi_slopesc                 C   s  |d u r	|  dn| d }|d u r|  dn|}|d u r#| dn|}|  d}|  d}t| }|d u rFtj|||ftj| jd}n|  d}tj||ftj| jd}|	r|dkr_dnd}t|| }|dkrod}n|dkrud}tj||||f| j	| jd}n
tjd| j	| jd}d	\}}tj
jrtj rtjd
tjdd}tjd
tjdd}ntjdtjdd}tjd
tjdd}|||||fS )Nr   r   rb  r   r5  r}  r~  r  NNr2   rg   r   )r   rz   rF   r   rp   rI   ri   r  r  rM   r  r  r   r  rx   r  )ry  r   r  r  r  r  r  rz  r{  r|  rm  r  r  r  r  r   r  r  r  r  r  r  Ztotal_qr  r  r  r  r  r2   r2   r3   meta__flash_attention_forward  sR   




r  c                 C   s(   t |}t |}t |}|||fS r-   rj  )r  ry  r   r  r   r  r  r  r  r  rz  r{  r  r  rm  r  r  
grad_querygrad_key
grad_valuer2   r2   r3   meta__flash_attention_backwardV  s   



r  cu_seqlens_qcu_seqlens_kmax_seqlen_qr  custom_mask_typecausal_diagonalseqlen_kwindow_sizec                 C   s   |  d}|  d}| d}|  d}| d}tj||||| j| jd}|d ur1| dd n|}|}|d urA|d us?J |}|d urG|n|}|
rTt|d d nd}tj|||ftj| jd}tjdtjdd}tjdtjdd}||||||fS )	Nr   r   rb  r   r5  rn  r2   rg   )	r   rF   rp   rM   ri   r  r  rI   rx   )ry  r   r  r   r  r  r  r  rz  r  r  rm  r  r  r  rw  r  rq  r  r  r  Zlogsumexp_batch_dimZactual_max_seqlen_qZactual_max_seqlen_kr  r  r  r  r2   r2   r3   !meta__efficient_attention_forwardu  s,   




r  bias_requires_gradnum_splits_keyshared_storage_dqdkdvc                 C   sL  |rSt |jd |jd kdd  t |jd |jd kdd  t jg |jdd d|jd |jd R |j|jd	}|d
d}|d
d}|d
d}nt |}t |}t |}|d ur|d}|d dkrs|n|d |d  }t	| }||d< t j||j|jd	}|dd |f }nt jd|jd}||||fS )Nr   c                   S   rW   )Nz,seqlen must match for `shared_storage_dqdkdvr2   r2   r2   r2   r3   rR     rY   z4meta__efficient_attention_backward.<locals>.<lambda>r+   c                   S   rW   )Nz3embedding dim must match for `shared_storage_dqdkdvr2   r2   r2   r2   r3   rR     rY   r   rb  r   r5  r  r   r  .r2   rN  )
rF   rU   r|   rp   rM   ri   rA  r   r   r   )r  ry  r   r  r   r  r  r  r  r  rz  r  r  r  r  rm  r  r  chunkr  r  r  r  r  r  r  r2   r2   r3   "meta__efficient_attention_backward  s:   *



 r  scale_ascale_bscale_resultuse_fast_accumc                    s<  dd }t  dko dkfdd t |jo$|jfdd tdkrdd	 }	d
d }
dd }t |	 pJ|fdd t |
 p\|fdd t dd dkfdd t dd dkodd dkfdd t jt jkojt jkdd  j\ }d	 dkr	 dkrnMt  dkoĈ dkfdd d krddkrddkrdkrt 
 o
 dd  nt d fdd |d ur|nj}t jdd|jdS )Nc                 S   s   | t jt jt jt jfv S r-   )rF   r  Zfloat8_e5m2Zfloat8_e4m3fnuzZfloat8_e5m2fnuzrt   r2   r2   r3   is_fp8_type  s   z#meta_scaled_mm.<locals>.is_fp8_typer   c                      s   d   d    S )Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=r   r2   r   r}   r2   r3   rR     r  z meta_scaled_mm.<locals>.<lambda>c                      r  )Nz8Expected both inputs to be fp8 types but got self.dtype=z and mat2.dtype=rt   r2   r  r2   r3   rR     r  r   c                 S   s   | d | d ko| d dkS r  r2   r  r2   r2   r3   is_row_major     z$meta_scaled_mm.<locals>.is_row_majorc                 S   s   | d dko| d dkS r  r2   r  r2   r2   r3   is_col_major  r  z$meta_scaled_mm.<locals>.is_col_majorc                 S   s   |  ddkp|  ddkS r  r   )Z	tensor_2dr2   r2   r3   has_zero_dim	  r  z$meta_scaled_mm.<locals>.has_zero_dimc                      r  )Nz#self must be row_major, got stride r  r2   r   r2   r3   rR     rr  c                      r  )Nz#mat2 must be col_major, got stride r  r2   r   r2   r3   rR     rr  r   r  r   c                      s   d  d S )NzBExpected self.size(1) to be divisible by 16, but got self.size(1)=r   r   r2   r   r2   r3   rR     rS   c                      rs   )Nz>Expected both dimensions of mat2 to be divisble by 16 but got r  r2   r  r2   r3   rR     rw   c                   S   rW   )Nz6Both scale_a and scale_b must be float (fp32) tensors.r2   r2   r2   r2   r3   rR      rY   c                      s   d   d  S )NzLFor non-tensorwise scaling, scale tensors must be 2D, but got scale_a.dim()=z and scale_b.dim()=r   r2   )r  r  r2   r3   rR   +  r  c                   S   rW   )Nz@Both scale_a and scale_b must be contiguous for rowwise scaling.r2   r2   r2   r2   r3   rR   7  rY   Fc                      sB   d  d d d d d d d d d dS )	Nz}Invalid scaling configuration. For tensorwise scaling, both scales should be scalar. For rowwise scaling, scale_a should be (z, 1), scale_b should be (1, z). Got scale_a.size()=(r   r`   r   z) and scale_b.size()=(ra   r   r2   )r  r   r  r  r2   r3   rR   =  s   r5  )rF   rU   rl   rM   r   r   r   r  r|   rz   r  rp   ri   )r}   r   r  r  r   r  r   r  r  r  r  r  Z_kZ
_out_dtyper2   )r  r   r   r  r  r}   r3   meta_scaled_mm  sn   


"


 r  c                 C   s    t | ||||dd | | jS NT)ra  rp  r}   rl   rv   r   r  r  r2   r2   r3   meta_scatter_reduce_twoJ  s   r  c                 C   s   t | ||||dd | S r  rs  r  r2   r2   r3   meta_scatter_reduce__twoQ  s   r  c                   sh   t d    k odkn   fdd   dkr&t j|t j jdS t j d|t j jdS )Nr   r   c                      r  )Nz@The probabilty distributions dimensions must be 1 or 2, but got r   r2   r  r2   r3   rR   \  rr  z"meta_multinomial.<locals>.<lambda>r   r5  )rF   rU   rl   rp   rx   ri   r   )r   num_samplesreplacementr   r2   r  r3   meta_multinomialW  s   
r  c                 C   s   d}| D ]}||9 }q|S r   r2   )vsr  vr2   r2   r3   multiply_integerse  s   
r  c                    s   t tkfdd d  t t k fdd t tdd dd  D o9tdd D fdd d d \}}||gR S )Nc                         d  dt  S )Nz%It is expected output_size equals to , but got size r  r2   )num_spatial_dimsr  r2   r3   rR   o  r  z'upsample_common_check.<locals>.<lambda>r   c                      r  )Nz$It is expected input_size equals to r  r  r2   )expected_input_dimsr  r2   r3   rR   t  r  c                 s   r  r  r2   )r;   rK  r2   r2   r3   r]   x  r  z(upsample_common_check.<locals>.<genexpr>c                      r^   )NzDInput and output sizes should be greater than 0, but got input size z and output size r2   r2   )r  r  r2   r3   rR   y  s
    )rF   rU   r   r  )r  r  r  r  Zchannelsr2   )r  r  r  r  r3   upsample_common_checkl  s   

*r  c                    sZ   t   dkpt  dd   fdd t  |dd} |jt	 dS )Nr   r   c                      r  )Nz>Non-empty 3D data tensor expected but got a tensor with sizes r   r2   r  r2   r3   rR     rr  z$upsample_nearest1d.<locals>.<lambda>r  r   
rF   rU   rz   r  r   r  r{   r   rA   r  )r   r  scalesfull_output_sizer2   r  r3   upsample_nearest1d     


r  c           	         s   t   dkpt  dd   fdd t  |dd} |}t } j	\}}}} j
jdkr?|dk r?t j}|j|d	}|S )
Nr   r   c                      r  Nz>Non-empty 4D data tensor expected but got a tensor with sizes r   r2   r  r2   r3   rR     rr  z$upsample_nearest2d.<locals>.<lambda>r   r  r   r/  r   )rF   rU   rz   r  r   r  r{   rA   r  r|   ri   rb   r   
contiguous)	r   r  scales_hscales_wr  r   r   rD   Z
n_channelsr2   r  r3   upsample_nearest2d  s   



r  r  r  r  r  c                    st   t ||dd tjdkfdd tdD ]t  k fdd q|jt	dS )Nr   r  r/  c                      rs   NzFExpected grad_output to be a tensor of dimension 4 but got: dimension r  r2   r'  r2   r3   rR     rw   z-upsample_nearest2d_backward.<locals>.<lambda>c                
      &   d d   d d  S )NzCExpected grad_output to have the same shape as output; output.size() = z but got grad_output.size(r   r2   r  r%  r   r2   r3   rR     s   r   )
r  rF   rU   r   r   r   r{   r   rA   r  )r%  r  r  r  r  r2   r  r3   upsample_nearest2d_backward  s   

	r  c                    sZ   t   dkpt  dd   fdd t  |dd} |jt	 dS )Nr   r   c                      r  )Nz>Non-empty 5D data tensor expected but got a tensor with sizes r   r2   r  r2   r3   rR     rr  z$upsample_nearest3d.<locals>.<lambda>r+   r  r   r  )r   r  Zscales_dr  r  r  r2   r  r3   upsample_nearest3d  r  r  c           
      C   s   t | t j| t jd}}|d urQ|d urQt|tsJ t|ts$J |j}| }	t||}t||}|||	 |||	 t	||d t	||d ||fS ||fS )Nrt   )r  r  )
rF   r   r   rZ   r   r|   r   r   r   r!   )
r}   stablerl   Z
descendingr   r   r  r   r   Z
out_strider2   r2   r3   	meta_sort  s   	

r  c                    s  t jdkfdd t jjkfdd dd urPt jdkfdd t  kfdd t jjkfdd t jdkfd	d d
   t   k fdd t tfddfD dd  d S )Nr   c                          j  dS Nz != 2r  r2   input_gatesr2   r3   rR     rw   z%rnn_cell_checkSizes.<locals>.<lambda>c                         j  d j  S N != r  r2   )hidden_gatesr  r2   r3   rR     r  r   c                      r  )Nz != 1r  r2   )
input_biasr2   r3   rR     rw   c                      s      d  S r  r  r2   )
gates_sizer   r2   r3   rR     r  c                      r  r  r  r2   )hidden_biasr   r2   r3   rR     r  c                      r  r  r  r2   )prev_hiddenr2   r3   rR     rw   r   c                
      s,      dd d d d  d
S )Nr  r   z * z // z (aka ra   )rz   r   r2   )expected_prev_hidden_numelfactorr  r  r  r2   r3   rR     s   , c                 3   s    | ]	}|j  j kV  qd S r-   rN  r:   r  r2   r3   r]     s
    

z&rnn_cell_checkSizes.<locals>.<genexpr>c                   S   rW   )Nz%expected all inputs to be same devicer2   r2   r2   r2   r3   rR     rY   )rF   rU   r   r|   r   rz   r  )r  r  r   r  r  r  r2   )r  r  r  r  r  r   r  r  r3   rnn_cell_checkSizes  s8   





r  c                 C   sL   t | |||d| tj| tjd}tj|tjd}tj|tjd}|||fS )Nr/  r   )r  rF   r   r   )r  r  cxr   r  	workspacehycyr2   r2   r3   _thnn_fused_lstm_cell_meta   s
   
r  c                 C   s(  t |dk}|rt |}|d }| jd }n|
r| jd n| jd }|
r)| jd n| jd }d}|r4dnd}|dkr<|n|}|rG||| g}n|
rP|||| gn|||| g}| |}|	| ||g}|d u rptjd| jd}n||}||	| ||g}|rdnd}| j|tjd}|||||fS )Nr   r   r   r   rN  rt   )r   r|   r{   rF   rp   ri   r  )r   r   Zweight_stride0Z
weight_bufhxr  r  hidden_sizeZ	proj_size
num_layersbatch_firstZdropouttrainbidirectionalbatch_sizesZdropout_stateZis_input_packed
seq_length
mini_batchZbatch_sizes_sumZnum_directionsZout_sizer   r   Z
cell_shaper
  r	  Zreserve_shapeZreserver2   r2   r3   
_cudnn_rnn/  s2   

r  c                 C   s   |r| j d n| j d }|r| j d n| j d }|
}|r!|||gn|||g}| |}|d u r8tjd| jd}n||j }|d u rKtjd| jd}n||j }tjd| jtjd}||||fS )Nr   r   rN  r   )r|   r{   rF   rp   ri   r  )r   Zw0Zw1Zw2Zw3hx_Zcx_r   r  r  r  r  
has_biasesr  r  r  r  r  Zoutput_chanelsr   r   r	  r
  r  r2   r2   r3   mkldnn_rnn_layerg  s    
r  c                    sT   | j dkrt dkp dk fdd d S t|  dk fdd d S )Nr   r   c                      rp  )Nz4: Expected reduction dim -1 or 0 for scalar but got r2   r2   rl   r{  r2   r3   rR     rr  z'zero_numel_check_dims.<locals>.<lambda>c                      rt  )Nz: Expected reduction dim z to have non-zero size.r2   r2   r  r2   r3   rR     rS   )r   rF   ry   r   )r}   rl   r{  r2   r  r3   zero_numel_check_dims  s   
r  c                    sF   |d urt || }t||  d S t| dk fdd d S )Nr   c                      rc  )Nz@: Expected reduction dim to be specified for input.numel() == 0.r2   r2   r  r2   r3   rR     r  z%check_argmax_argmin.<locals>.<lambda>)r   rl   r  rF   rU   rz   )rf  r}   rl   r2   r  r3   check_argmax_argmin  s   

r  c                 C   sD   t d| | t| j|d ur|fnd }t| ||}| j|tjdS )Nargmaxrt   )r  rA   r6  r|   r7  r{   rF   r   )r}   rl   r9  rx  r|   r2   r2   r3   argmax_argmin_meta  s   r  c                 C   s$   |t jkrt j}t jd||||dS )Nr2   r   )rF   Zjaggedr  rp   )rK  rM   rh   ri   rj   r2   r2   r3   scalar_tensor  s
   

r  c                 C   s   t ||  dd}|  dkrdn| |}t| t||kdd  t| j}t|dkr4|||< | 	|| j	|tj
dfS )NTrT  r   r   c                   S   rW   )Nzk not in range for dimensionr2   r2   r2   r2   r3   rR     rY   ztopk_meta.<locals>.<lambda>rt   )r   rl   r   rF   r]  rU   r   r|   r   r{   r   )r}   r  rl   ZlargestsortedZ	sliceSizeZtopKSizer2   r2   r3   	topk_meta  s   

r!  c           
      C   s@   |d us|d usJ d|  }|   }	tj||	j|	j|	jdS )Nz;segment_reduce(): Either lengths or offsets must be defined)rM   ri   rh   )r  rF   r   rM   ri   rh   )
rP  r   r'  r  r"  r#  r$  r&  Zdata_contigZgrad_contigr2   r2   r3   meta__segment_reduce_backward  s   r"  c                    s   t  |  dd |  dkr|  nd}t|dko||k fdd t| jd   | j d d   }|rF|  dkrF| d | || j|tj	dfS )NTr  r   r   c                      r  )Nz9kthvalue(): selected number k out of range for dimension r2   r2   r   r2   r3   rR     r  zkthvalue_meta.<locals>.<lambda>rt   )
r   rl   r   rF   rU   r   r|   r   r{   r   )r}   r  rl   r9  ZdimSizer|   r2   r   r3   kthvalue_meta  s   
$r#  c                 C   s   | d ur| n|}t | dkdd  | }| d ur(t |  |kdd  |d ur8t | |kdd  t | |kdd  t | |kdd  t | dkdd  t | |d	 |d
  d kdd  d S )Nr   c                   S   rW   N r2   r2   r2   r2   r3   rR     rY   z(checkLSTMBackwardSizes.<locals>.<lambda>c                   S   rW   r$  r2   r2   r2   r2   r3   rR     rY   c                   S   rW   r$  r2   r2   r2   r2   r3   rR     rY   c                   S   rW   r$  r2   r2   r2   r2   r3   rR     rY   c                   S   rW   r$  r2   r2   r2   r2   r3   rR     rY   c                   S   rW   r$  r2   r2   r2   r2   r3   rR     rY   r   r   r/  c                   S   rW   r$  r2   r2   r2   r2   r3   rR     rY   )rF   rU   rl   r   rz   )grad_hygrad_cyr  r
  r  Zdefined_gradZexp_sizer2   r2   r3   checkLSTMBackwardSizes  s   ,r(  c           	      C   s`   | d u r
|d u r
dS t | |||| tj|td}tj|td}|r)|jdddnd }|||fS )NNNNr   r   F)r9  )r(  rF   r   legacy_contiguous_memory_formatr  )	r&  r'  r  r
  r  Zhas_biasZ
grad_gatesZgrad_cxr  r2   r2   r3   #_thnn_fused_lstm_cell_backward_impl  s   
r+  c                 C   sf   d }d }d }|d r| |  }|d s|d r.| |d| df}| |d}|||fS )Nr   r   r   r   rG  )rI  rH  rJ  rK  r(  Zgrad_weightr  r2   r2   r3   linear_backward  s   
r,  c                    s   t jdkrjd ||  dksJ dj d| dd   fdd	}jd ||  }jd
 | }jd | }g jd d |||R }|}|j| d}|S )Nr   r  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 S   r  r-   r  r  r2   r2   r3   r  "  r  z,meta_pixel_shuffle.<locals>.is_channels_lastc                      sL    rt dkrtjS tjS jtjdrtjS jtjdr$tjS d S r  )r   rF   r   r  r  r  r2   r  r}   r2   r3   r  %  s   z.meta_pixel_shuffle.<locals>.pick_memory_formatrb  r   r   )r   r|   r{   r   )r}   Zupscale_factorr  rD  ZHrZWrr   r   r2   r-  r3   meta_pixel_shuffle  s   & 
r.  c                 C   sZ   |  | j}| |j}| |j}| |j}| |j}| |j}|||||||fS r-   r  )r   Zweight0Zweight1Zweight2Zweight3r  Zcx_tmpr   Zhy_Zcy_Zgrad_output_r_optZgrad_hy_r_optZgrad_cy_r_optr   r  r  r  r  r  r  r  r  r  Zdiff_xZdiff_hxZdiff_cxZdiff_w1Zdiff_w2Zdiff_br2   r2   r3   mkldnn_rnn_layer_backward:  s   r/  )	out_int32r   c                C   s   t j| |rt jnt jt jdS )NrM   r   )rF   r   r   r   r   )r}   Z
boundariesr0  r   r2   r2   r3   meta_bucketize]  s
   r2  d   c                    s   dt dkrt fdd tt t fdd t dk fdd tttfdd tttfd	d tkd
d  tj jj	dS )Nzhistc()r  c                      r  )Nz%"histogram_cpu" not implemented for 'r  rt   r2   r  r2   r3   rR   n  rr  zmeta_histc.<locals>.<lambda>c                      s    dt   S )Nz#: argument 'bins' must be int, not rV  r2   binsr{  r2   r3   rR   r  r  r   c                      rp  )Nz: bins must be > 0, but got r2   r2   r4  r2   r3   rR   t  rr  c                           dt  S )Nz%: argument 'min' must be Number, not rV  r2   )r{  r   r2   r3   rR   w  r  c                      r6  )Nz%: argument 'max' must be Number, not rV  r2   )r{  rS  r2   r3   rR   {  r  c                   S   rW   )Nz&{fn_name}: max must be larger than minr2   r2   r2   r2   r3   rR   }  rY   r   )
r   rF   rU   r   rZ   r   r   rp   ri   rM   )r   r5  r   rS  r2   )r5  r{  r   rS  r   r3   
meta_histcg  s*   
r7  c                    sd   t   |dd}t  dkptdd   dd  D  fdd  |jt	 d	S )
Nr   r  r   c                 s   r  r  r2   )r;   r   r2   r2   r3   r]     r  z,meta_upsample_bimode2d_aa.<locals>.<genexpr>r   c                      r  r  r   r2   r  r2   r3   rR     rr  z+meta_upsample_bimode2d_aa.<locals>.<lambda>r   )
r  r   rF   rU   rz   r  r{   r   rA   r  )r   r  rA  r  r  r  r2   r  r3   meta_upsample_bimode2d_aa  s   

(

r8  c                    st   t ||dd tjdkfdd tdD ]tj   k fdd q|jt	dS )Nr   r  r/  c                      rs   r  r  r2   r'  r2   r3   rR     rw   z4meta_upsample_bimode2d_aa_backward.<locals>.<lambda>c                
      r  )NzD
Expected grad_output to have the same shape as output; output.size(r  z
but got grad_output_size(r   r2   r  r2   r3   rR     s    r   )
r  rF   rU   r   r   r|   r{   r   rA   r  )r%  r  r  rA  r  r  r2   r  r3   "meta_upsample_bimode2d_aa_backward  s   	

r9  c                 C   s\   t | dkdd  t | dkdd  t |jjdd  t |jjdd  d S )Nr   c                   S   rW   )Nz%found_inf must be a 1-element tensor.r2   r2   r2   r2   r3   rR     rY   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>c                   S   rW   )Nz%inv_scale must be a 1-element tensor.r2   r2   r2   r2   r3   rR     rY   c                   S   rW   )Nz!found_inf must be a float tensor.r2   r2   r2   r2   r3   rR     rY   c                   S   rW   )Nz!inv_scale must be a float tensor.r2   r2   r2   r2   r3   rR     rY   )rF   rU   rz   rM   r   )r}   rS  Z	inv_scaler2   r2   r3   *_amp_foreach_non_finite_check_and_unscale_  s   r:  c                 C   s   t |  }| |S r-   )r   r   r{   )r}   nanZposinfZneginfr   r2   r2   r3   
nan_to_num  s   
r<  c                 C   s   | j tjtjtjtjhvsJ d| j  d| j}t||}t||}||kr)| S t| 	 }t| 
 }|| || ||< ||< || || ||< ||< | || | S )Nz>torch.transpose_: in-place transposition is not supported for z layout)rh   rF   r  Z
sparse_cscr  Z
sparse_bscr   r   r   r   r   r   )r}   Zdim0r_  ndimsr   r   r2   r2   r3   r    s&   	

r  c                 C   sz   | j }| jr"|  }|  }|dkr|dks!J d| d| dn|  dks0J d| dt| d|dk r:dS dS )	Nr   r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is r  r   )r   rH  rI  rJ  rl   r  )r}   r=  rI  rJ  r2   r2   r3   t_  s   
r>  )r0  r   sidesorterc                   s   t tjdkpjd d  jd d k fdd t d u p)jjkfdd t |dkp9| d |rAt jnt j}t t jrSt j |t j	dS t j
d	|jd
S )Nr   r   c                      s   dt j dt  j S )Nztorch.searchsorted(): boundaries tensor should be 1 dimension or the first N-1 dimensions of boundaries tensor and input value tensor must match, but we got boundaries tensor z and input value tensor r   r|   r2   )r}   sorted_sequencer2   r3   rR     s
   z#meta_searchsorted.<locals>.<lambda>c                      s,   dt  j dd urt j S g  S )Nz[torch.searchsorted(): boundary and sorter must have the same size, but got boundary tensor z and got sorter tensor rA  r2   )rB  r@  r2   r3   rR     s   r   zetorch.searchsorted(): side and right can't be set to opposites, got side of left while right was Truer1  r2   r5  )rF   rU   r   r|   r   r   rZ   r   r   r   rp   ri   )rB  r}   r0  r   r?  r@  rM   r2   )r}   rB  r@  r3   meta_searchsorted  s&   
rC  c                    s(   t  t jt jt jfv fdd d S )Nc                      r  )Nz/Unsupported input type encountered for isin(): r2   r2   rt   r2   r3   rR   3  r  z3_check_for_unsupported_isin_dtype.<locals>.<lambda>)rF   rU   rl  Z
complex128Z	complex64rt   r2   rt   r3   !_check_for_unsupported_isin_dtype0  s   
rD  c                 C   s:   |	rt | ||||||||
|
S t| ||||||||
|
S r-   )r)   Z_embedding_bag_sparse_backward!meta_embedding_bag_dense_backward)rP  r   r#  r  r  maximum_indicesnum_weightsr  r  r  r  r  r2   r2   r3   meta_embedding_bag_backward7  s2   rH  c
                    sX   t  jt jt jt jt jfv  fdd |tkr t |d u  | 	df}
|
S )Nc                      rs   )Nz$Unsupported input type encountered: rt   r2   rP  r2   r3   rR   q  rw   z3meta_embedding_bag_dense_backward.<locals>.<lambda>r   )
rF   rU   rM   r  r  r  Zfloat64r  r{   r   )rP  r   r  r  rF  rG  r  r  r  r  Zindex_grad_weightr2   rI  r3   rE  b  s   
rE  c           
      C   s~   |  d}t|tkd t|  dk t| dk | d}t| dk t| d|k | |f}	|	S )Nr   zHembedding_bag_backward: per_sample_weights only supported for mode='sum'r   r   )r   rF   rU   r  rl   r{   )
rP  r   r   r#  r  r  r  Zembedding_featuresr  r   r2   r2   r3   .meta_embedding_bag_per_sample_weights_backwardy  s   


rJ  )assume_uniqueinvertc                C   sx   t t| tpt|tdd  t| tst j| |jd} t|ts*t j|| jd}t| j t|j t j| t j	dS )Nc                   S   rW   )Nz<At least one of elements and test_elements must be a Tensor.r2   r2   r2   r2   r3   rR     rY   zmeta_isin.<locals>.<lambda>rN  rt   )
rF   rU   rZ   r   r   ri   rD  rM   r   rl  )elementsZtest_elementsrK  rL  r2   r2   r3   	meta_isin  s   



rN  r   c                 C   s4   t | dkdd  t|tjd\}}t j||dS )Nr   c                   S   rW   )Nz,polygamma(n, x) does not support negative n.r2   r2   r2   r2   r3   rR     rY   z meta_polygamma.<locals>.<lambda>r?  rt   )rF   rU   r   r   r@  r   )r   r}   rD   r>   r2   r2   r3   meta_polygamma  s   
rO  c                 C   s   t d)Nz.Tensor.item() cannot be called on meta tensors)r   r   r2   r2   r3   meta_local_scalar_dense  s   rP  c                 C   ri  r-   rj  r   r2   r2   r3   silu  r2  rQ  c                 C   s    t | tjd\}}tj| |dS r>  )r   r   r@  rF   r   )r}   rD   r>   r2   r2   r3   sigmoid  s
   
rR  r<   half_to_floatc                 C   sL   |r
| j tjks
J tj| tjjd\}}|s|n|}tj| |tjd}|S )Nr?  r1  )	rM   rF   rG   rA   r   r   rB   r   r   )r<   rl   rS  Zcomputation_dtyper>   r  r2   r2   r3   softmax  s   
rT  r  r  r  c           	      C   sx   |   dks
J d| j}|j}|jdkr|d f}n|jdkr)|d |d f}n	g ||d R }| j}| j||dS )Nr   z'weight' must be 2-Dr   r   rt   )rl   r|   r   rM   r{   )	r   r   r  r  r  Zweight_shapeZindices_shaper   r   r2   r2   r3   	embedding  s   	

rU  max_lengthspadding_valuec                 C   s\   t |dksJ t |dksJ |d jd d }|d }||g| jdd  R }| |S r  )r   r|   r{   )r   r#  rV  rW  rw  r  r  r2   r2   r3   $meta__jagged_to_padded_dense_forward  s   
rX  c                 C      t | t dd }|S )Nc                 S   r  r  rE   r   r@  r   r2   r2   r3   _f  s   z)_create_unary_float_meta_func.<locals>._fr7   r"   funcr[  r2   r2   r3   _create_unary_float_meta_func     r_  c                 C   rY  )Nc                 S   r  r  rZ  )r<   r  r2   r2   r3   r[    r  z*_create_binary_float_meta_func.<locals>._fr\  r]  r2   r2   r3   _create_binary_float_meta_func  r`  ra  c                    s<   t   fdd} j d}||_ttt||}|S )Nc                    s(    | g|R i |}t | j|j | S r-   r  )r}   rC   r  r   r0   r2   r3   _fn*  s   z#_register_inplace_meta.<locals>._fnrD   )r   rc   r7   getattrr)   )r1   rb  Zinplace_namer2   r0   r3   _register_inplace_meta)  s   rd  c                    sr   t j jk fdd  g}ttr1jdkr,t jjkfdd | t|dtj	iS )Nc                      r  )Nr  z for `end`, but got dtype rt   r2   )rd   re   r2   r3   rR   <  r  zlerp.<locals>.<lambda>r   c                      r  )Nr  z for `weight`, but got dtype rt   r2   )re   r   r2   r3   rR   C  r  r8   )
rF   rU   rM   rZ   r   r   r   rE   r   rB   )re   rd   r   rC   r2   )rd   re   r   r3   lerp7  s"   




re  )r  c                C   s   t | ||tjdS r  r  r   Ztensor1Ztensor2r  r2   r2   r3   addcmulK  s   
rg  c                C   s8   t t|jot|j dd  t| ||tjdS )Nc                   S   rW   )N)zFInteger division with addcdiv is no longer supported, and in a future zErelease addcdiv will perform a true division of tensor1 and tensor2. z4The historic addcdiv behavior can be implemented as zA(input + value * torch.trunc(tensor1 / tensor2)).to(input.dtype) zfor integer inputs and as z6(input + value * tensor1 / tensor2) for float inputs. z?The future addcdiv behavior is just the latter implementation: z4(input + value * tensor1 / tensor2), for all dtypes.r2   r2   r2   r2   r3   rR   [  rY   zaddcdiv.<locals>.<lambda>r  )rF   rU   rA   r  rM   rE   r   rB   rf  r2   r2   r3   addcdivS  s   

rh  c                  C   s4  i } dD ]}t | }|D ]}|| vr|| | |< qq|  D ]y\}}t|tjjr*qt|ts1J |tjj	j
| tj| drR|t d v rQt| dq|jrVq| dv r]qd| v rjt|| qd| v rwt|| qd| v rt|| qd	| v rt|| qt|| qd S )
N)rg   Zpost_autogradZpre_autogradZCompositeImplicitAutogradrg   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   zaten::constant_pad_ndzaten::clonezaten::rot90zaten::empty_stridedzaten::copy_zaten::as_strided_scatterzaten::_to_copyzmkldnn::zmkl::zonednn::zquantized::)r   itemsrZ   rF   Z_opsZHigherOrderOperatorr   Zpy_impl_CZDispatchKeyr*   Z%_dispatch_has_kernel_for_dispatch_keyrf  r   Zis_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)Zactivate_meta_tablerb   registryZopoZop_overloadr1   r2   r2   r3   activate_metaw  sN   rr  r   r)  r-   )NNNFr   r   Fr  )Trk  )rz  )r  T)FF)TT)r  )FTN)TFF)TF)r   )re  N)r,   rz  )r2   r   r  F)r2   r   FTN)Fr   FNFr   )NF)r   F)r  r  FN)NNNNN)r   NNr   )NNF)rx  FFN)Nrx  FFN)rx  FNN)rx  FN)FN)FNNNN)NNNF)Nr   FNN)NNNN)r   TT)NNr   N)r3  r   r   )r   )r   FF)rx  (  r  collections.abcr   enumr   	functoolsr   typingr   r   r   r   Ztyping_extensionsr	   rF   Ztorch._prims_commonr  rA   r
   r   r   Ztorch._decompr   r   r   r   Z
torch._opsr   Ztorch._primsr   r   r   r   r   r   r   r   r   r   r   r   Ztorch._prims_common.wrappersr   r   r    r!   r"   rB  r#   r$   Ztorch.fx.experimentalr%   r6  Ztorch.utilsr&   r5   r'   r(   opsr)   ZlibraryLibraryrp  r   r  r  r  r7   rE   rN   rV   ZlinspaceZlogspacer  rr   Ztaker   r   r~   r   r   ZcummaxZcumminr   r   r   r   r   rl  r   Z_fft_c2cr   r   r   Z_fft_r2cr   ZrandpermZgenerator_outr   rx   r   randintr   r   Zlow_outr   Zrandr   Z_fft_c2rr   rB  r   r   Z
unsqueeze_r   Z_sparse_semi_structured_linearrz  rM   r   Z_sparse_semi_structured_mmr   Z_sparse_semi_structured_addmmr  Z_cslt_sparse_mmr  Zindex_reducer  Zindex_reduce_r  Zindex_selectr!  Zsegment_reducer-  rS  Z	unary_outr1  rl   r:  r   r<  r=  rC  rA  rD  Z_assert_asyncrG  msgrJ  Z_printrL  Z_make_dep_tokenrO  rU  Z_functional_sym_constrain_rangerZ  r^  Z(_functional_sym_constrain_range_for_sizer_  Z_functional_assert_asyncr`  r   rm  r   rv  ry  r~  r  Z_linalg_eighr  r  Z_linalg_eigvalsZlinalg_eigvalsr  Z
linalg_eigr  r  r  r  r  r  r  r  Zlinalg_inv_exr  Zlinalg_ldl_factor_exrT   r  Zlinalg_ldl_solver  Z	linalg_lur  Zlinalg_lu_factor_exr  Zlinalg_lu_solver  Z	lu_unpackr  r  Z	linalg_qrr  r  r  Z_linalg_svdr  r  r  r  r  Zlinalg_solve_triangularr  r  r  Z_linalg_detr  r  r  r  Zreflection_pad1dr  Zreplication_pad1dr   r'  Zreflection_pad1d_backwardr,  Zreplication_pad1d_backwardr.  r:  Zreflection_pad2dr<  Zreplication_pad2dr=  Zreflection_pad2d_backwardr(  Zreplication_pad2d_backwardr@  rH  Zreflection_pad3drJ  Zreplication_pad3drK  Zreflection_pad3d_backwardZreplication_pad3d_backwardrM  Z_pdist_forwardrI   rO  Z_pdist_backwardrR  Zbaddbmmrb  Z	bernoullird  Z
bernoulli_rg  rN  rh  Zpoissonrk  Z_fused_moving_avg_obs_fq_helperro  mmru  r7  r   r  r  Zmiopen_batch_normr  Zconvolutionr  rj  Z_has_mkldnnrk  r  Z_convolution_pointwiser  Z_linear_pointwiser  Zhas_mklrm  r  Z_mkl_linearr  rn  r  Zqconv2d_pointwiser  binaryr  Zqlinear_pointwiser   r  Zbinary_tensorr  Zlinear_dynamic_fp16Zlinear_relu_dynamic_fp16r  ro  r  Z
max_pool2dr  Zint4mm_packed_weight_cpur  r  Z
avg_pool2dr  r  Zavg_pool2d_backwardr  Z
avg_pool3dr  Zavg_pool3d_backwardr  Z_adaptive_avg_pool2dr  Z_adaptive_avg_pool3dr  Z_adaptive_avg_pool2d_backwardr  Z_adaptive_avg_pool3d_backwardr  r  Zadaptive_max_pool2dr&  r(  r*  Zadaptive_max_pool3dr+  r,  r-  Zrepeat_interleaver/  r[   r2  r4  r7  rv   Z_unsafe_indexrF  Zconvolution_backwardrL  ZaddbmmrQ  Z_fused_adam_Z_fused_adamw_rf  Z_fused_adamrh  Z_int_mmri  Z_convert_weight_to_int4packrp  Z#_convert_weight_to_int4pack_for_cpurr  Z_weight_int4pack_mmrv  Z_weight_int4pack_mm_for_cpurw  rx  r  Z_dyn_quant_pack_4bit_weightr  Z_dyn_quant_matmul_4bitr  Z_weight_int8pack_mmr  Z_cdist_forwardr  Z_cdist_backwardr  Z_embedding_bagr  Z_embedding_bag_forward_onlyr  r  Znansumr  ZmedianZ	nanmedianr  Z
dim_valuesr  r   r  Zlogical_not_r  repeatr  Zzero_r  Zmul_ZScalarZdiv_Zlogical_and_Zlogical_or_Zlogical_xor_r  Zadd_Zsub_r  roundZdecimalsr  r  
__rshift__r  
__lshift__r  zeror  rK  r  fillr  Zrelu_r  Z	_add_relur  Zrrelu_with_noiser  Zrrelu_with_noise_functionalr  Zrrelu_with_noise_r  Z	index_putZ_unsafe_index_putr  Zmasked_fill_r   Z_masked_scaler  Zmasked_scatter_r  Zmasked_scatterr  Zmasked_scatter_backwardr  Z
index_put_r  aliasr  r
  Zbmmr  r  r  r  r  r  r!  r  r  Z max_pool2d_with_indices_backwardr#  Zmax_pool2d_with_indicesr$  Zfractional_max_pool2dr+  Zmax_pool3d_with_indicesr2  Z max_pool3d_with_indices_backwardr3  r7  r8  r=  Zgrid_sampler_2d_backwardrC  rE  rF  rG  r  rA  rN  Zselect_scatterrP  Zslice_scatterrR  r   rU  rZ  gatherr\  rb  rg  rh  rl  rn  Zscatter_addrr  Zscatter_add_rt  rm  r   r  r  Zvalue_reducerv  Zscatter_rw  Z#_scaled_dot_product_flash_attentionr  Z#_scaled_dot_product_cudnn_attentionr  Z0_scaled_dot_product_fused_attention_overrideabler  Z,_scaled_dot_product_flash_attention_backwardr  Z+_scaled_dot_product_flash_attention_for_cpur  Z4_scaled_dot_product_flash_attention_for_cpu_backwardr  Z'_scaled_dot_product_efficient_attentionr  Z0_scaled_dot_product_efficient_attention_backwardr  Z,_scaled_dot_product_cudnn_attention_backwardr  Z_flash_attention_forwardr  Z_flash_attention_backwardr  Z_efficient_attention_forwardr  Z_efficient_attention_backwardZSymIntr  Z
_scaled_mmr  Zscatter_reducetwoZtwo_outr  Zscatter_reduce_r  Zmultinomialr  r  r  r  Z_upsample_nearest_exact1dr  Z_upsample_nearest_exact2dr  Z"_upsample_nearest_exact2d_backwardr  Z_upsample_nearest_exact3dr   r  Zvalues_stabler  r  Z_thnn_fused_lstm_cellr  r  r  r  r  r  Zargminr  r  Ztopkr!  Z_segment_reduce_backwardr"  Zkthvaluer#  r   r*  r(  r+  r,  Zpixel_shuffler.  r/  Z	bucketizeZ
Tensor_outr2  Zhistcr7  Z_upsample_bilinear2d_aaZ_upsample_bicubic2d_aar8  Z _upsample_bilinear2d_aa_backwardr9  r:  r<  r  r>  ZsearchsortedrC  rD  Z_embedding_bag_backwardrH  Z_embedding_bag_dense_backwardrE  Z*_embedding_bag_per_sample_weights_backwardrJ  isinrN  Z	polygammarO  Z_local_scalar_denserP  rQ  rR  Z_softmaxrT  rU  Z_jagged_to_padded_dense_forwardrX  r_  ra  Zspecial_airy_aiZspecial_bessel_y0Zspecial_bessel_y1Zspecial_modified_bessel_i0Zspecial_modified_bessel_i1Zspecial_modified_bessel_k0Zspecial_modified_bessel_k1Z!special_scaled_modified_bessel_k0Z!special_scaled_modified_bessel_k1Zspecial_chebyshev_polynomial_tZspecial_chebyshev_polynomial_uZspecial_chebyshev_polynomial_vZspecial_chebyshev_polynomial_wZ&special_shifted_chebyshev_polynomial_tZ&special_shifted_chebyshev_polynomial_uZ&special_shifted_chebyshev_polynomial_vZ&special_shifted_chebyshev_polynomial_wZspecial_hermite_polynomial_hZspecial_hermite_polynomial_heZspecial_laguerre_polynomial_lZspecial_legendre_polynomial_prd  re  rg  rh  Zlerp_Zaddcmul_Zaddcdiv_Ztorch._refs.nn.functionalZtorch._refs.specialrr  r2   r2   r2   r3   <module>   s:  0(
	8	6

@
"
'
	
!"	
0



#
	

	











	




'



"

2
*
*
"7
(&$
%
	
:

/Z&5?'$,



k%	,"Q,
H
XN



.


*" $ m
#c	







-


!
T	
\>	
6L+&
T

ge( 

	, $1	








@	)(	
		
*,	
7	
	
K	
	
/	
7d



'7'

"
0

*


"
	



(





E