a
    h                    @  s2  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlZd dlZd dlZd dl Zd dl!m"  m#Z$ d dl%m&Z& d d	l'm(Z( d d
l)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z?m@Z@mAZAmBZB ddlCmDZD ddlEmFZFmGZGmHZHmIZI ddlJmKZKmLZL ddlHmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZ ddl"m[Z[m\Z\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZdmeZemfZf ddlgmhZhmiZi erNddljmkZk edZledZme=ddgZneoepZqi Zrdesd< i Ztdesd < e=ejujv  ZwejhjxZxejhjyZyejhjzZze=ejujv  Z{e=ejujv ej|j}gZ~e=ejujv  Zi Zd!esd"< ejhjZd#d$ Zd%d&d'd(Zd)d*d+d,d-Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zeexjexjexjexjexjexjexjexjexjexjexjexjexjexjg ejejejejejejejejejejejejejd6Zd7d8d9d:Zd;d< Zd=d> Zd?d@dAdBZdCdD ZdEdF ZdGdHdIdJdIdKdLdMdNZdOdP ZdJd@dQdRZdSe1jdSerfdJdTdUdVdWZdXdY ZddZd[Zdd\d]Zdd^d_Zdd`dadbdcddZeej|j}dd@dedf Z}eezjdd@d`dadbdgdhZdSdid`dadbdjdkZeexjjdd@d`dadbdldmZdSdSdnd`dodpdqdrZeezjdd@dd`dodpdsdtZddue1jdSdddSdfdvdwZdxdy Ze  ddzd{ZeexjdSdd|d}d~ ZeexjdSdd|dd Zeexjexjexjexjezjgdd ZeexdreexjɃeǃ eexjdd@dddZeexjdd@dddZeexjgdddZeexj̓dd Zeexj΃dd Zeexjσdd ZeexjЃdd Zeexjj҃dd ZeexjӃdd Zeexjdd@dd Zeezjdd@dd Zeexjdd@dd Zeexj׃dd Zeexjdd@eexjdd@eexjdd@dd Zeexjdd@dd Zeexjdd@dddZeexjdd@dddZeexjdd@dddZeexjdd@dddZdddZeejdd@d`d`d`d7d7d7dad`dddZeejdd@ddd`d`d`d7d7d7dadd`d	ddZeejjdd@d`dd7d7d7dad`dddZeejjdd@ddd`dd7d7d7dadd`dddZeejjdd@d`d`d`d7d7dad`dddZeejjdd@ddd`d`d`d7d7dadd`dddZeexjdddZeexjdd@dd7d7d7dddĄZeexjdd@dd7d7d7dddƄZeexjdd@dd7d7d7dddȄZeexjdd@ddʄ Zeexjdd@ddd̄Zeexjdd@ddd΄Zeexjdd@dddЄZeexjdd@dd҄ Zeexjdd@ddԄ Zeexjdd@ddք Zddd؄ZeexjdddۄZddd݄Zejdd߄ ZdddddZdddddZdddddZdddZdd Zeejhjj dd@dd Z eexjdd@dd Zeexjdd@dd Zeexjjdd@dd Zeexjdd Zeddd Zdd Z	eexj
j҃Zeexj
jZeexjj҃ZeexjjZeexj eexj
dd Z
eexjdd ZeeGjdd@d d ZeeGjdd@dodddZeeGjdd@dd ZeeGjdd@dd ZeeGjdd@d d	d
d`dd7dddZeeGjdd@d d	d7d7d
d`d7dddZd`ddddZd`ddddZeexjj dd@dSdSdddd`d`dIdIddd`dddZeexj!e1j"d@dSdSdd`d`dIdId d!d"Z!d#d$ Z#d%d& Z$d'd( Z%d)d* Z&d+d, Z'd-d. Z(d/d0 Z)d1d2 Z*eexj+ eexj, eexj- eexj.dSd3 eexj/jdSd3 eexj0 eexj1dSd3 eexj2dSd3 ej34 r(eexj5dSd3 eexj6 eexj7 eexj8 eexj9j҃ eexj:j҃ eexj; eexj<j= eexj>j҃ eexj?j҃ eexj@ eexjAdSd3 eexjBe# eexje) eexjCe# eexjDe$ eexjEe$ eexjFe$ eexjG eexjH eexjH eexjI eexjJ eexjK eexjLe# eexjM eexjN eexjO eexjP eexjQ eexjR eexjS eexjT eexjUe$ eexjV eexjWe# eexjX eexjY eexjYjZ eexj[ eexj\ eexj] eexj^ eexj_ eexj` eexja eexjb eexjc eexjd eexje eexjf eexjg eexjh eexji eexjj eexjk eexjl eexjm eexjn eexjo eexjp eexjq eexjr eexjs eexjtju eexjv eexjw eexjx eexjy eexjz eexj{ eexj| eexj}j҃ eexj~jdSd3 eexje# eejjj eejjj eejjj eexj eexj eexje$ eexj eexj eexj eexj eexj eexjj eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe*dSd3 eexjjҐe* eexjjҐe* eexjjҐe* eexjjҐe* eexj eexjdd@dҐd4d5Zeexjdd6d7d8Zd9d: Zeexd;rPeexje eezjd<d= Zeexjdd@d7d7d>d?d@Zeexjdd@dӐdAdBZdCdD ZeejexjgddddSdEdFdGZeejdԐdHdIZeejdJdK ZeexjdLdM ZeexjdNdO ZeexjddddPdQdRZdSdT Zeexjdd@dUdV ZdWdX ZeejexjgdddddddYdZd[Zd\d] Zd^d_ ZeexjeeZeedZeed Zd`da ZeexjdddddbdcddZeexjdddddbdedfZeexjdddddbdgdhZeezjj҃didj Zeejexjgdkdl Zeexjdd@d֐dmdnZeexjdd@dאdodpZdqdr ZdؐdsdtZdudv ZdِdwdxZeexjdd@dydz Zeexjdd@d{d| Zeexjdd@dڐd}d~ZeexjŃdېddZŐdd ZƐdd Zeexjdd@dܐddZeeGjdd@dݐddZɐdސddZeexjjdSdZeexjjdSdZeexjdd@dd Zeexjdd@dd Zedd Zeexjdd@dߐddZeexjdd@d7dddZdduddd7ddIdddZeexjdd@ddd7ddddZeexjdd@d7dddZeexjdd@d7dddZeexjdd@d7dddZeexjdd@dudd7dIdddZאddd7dIdddZeexjj҃dddddZeexjj҃dddddZeexjj҃ddddddZeexjj҃ddddddZeexjj҃dddddddZeexjj҃dddddddZܐdd Zeezjj҃dd Zd`ddddÜdĐdńZeexjdd@ddƐdǄZdȐdɐdʜdːd̄ZdȐdȐd͜dΐdτZdȐdȐdȐdМdѐd҄ZddԐdՄZdd֜dאd؄Zdِdڄ Zddۜdܐd݄Zdސd߄ Zeezjdd@dddZd`dddd`dddZeezjdd@dd Zdd Zeexjdd@dddZeexjdd@dddZeexjjdSdZeexjdd@dd ZdddZdd Zdd Zdd ZeexjjdSdZeexjdd ZeexjjdSdZeexjdd Zdd Zeexjd d Zeexjdd Zdd Zeexjj҃dddZeexjjdSdZeexjjdSdZeexjdd@dd	d
Zeexjdd@dddZdd ZeexjjdSdZeexjdd@dddZeexj jdSdZeexj dd@dddZ dd Zdd ZdddddZdd Zeexjddd8ddZdd  Zd!d" Zd#d$ Z	d%d& Z
eexjezjgdddSd'd(d)ZeexjdddSd'd*d+Zd,d- Zed.d/ ZeexjjdSdZeexjjdSdZeexjjdSdZeexjdud0d1d2 Zdd3d4Zeexjd5d6 Zeexjdd@dd7d8Zed9d: Zed;d< Zeexjdud0dd=d>Zeexjgdud0d?d@ ZdAdBdCdDdEZeezjgdud0dFdG Z eexj!exjj gdue1j"d|dHdI Zeexj#ezj#gdud0dJdK Z#eexj$ezj$gddd8dLdMZ%eexj&j҃Z'eexj(j҃Z)eexj*j҃Z+eexj,j҃Z-eexj.j҃Z/eexj&ddNdOZ&eexj(ddPdQZ(eexj*dRdS Z*eexj,dd@ddTdUZ,eexj.dd@ddVdWZ.eexj0ddd8dXdYZ0eexj1ddZd[Z2eexj3dd@d d\d]Z4eexj5dd@dd^d_Z6eezj7ed` eexj8edaZ9eexj:edbZ;eexj<edcejddZ=eexj>edeejddZ?eexj@dudfdgZ@eexjYjZdSdZAeexjYjZdd@dddSdhdidjZBeexjYjdd@ddkdlZYddmdnZCdodpdqdrZDeCexjEZEeDexjFZFeCexjGZGeCexjHZHeexjIZIeDexjJZJeDexjKZKeexjLZLeexjMdudsZMeDexjN eDexjO eexjPZPeexjQZQeexjRZReexjSdtduZSeexjTZTeexjUZUeexjVZVeCexjW eCexjXZXeexjYe1j"d@eX eCexjZ eCexj[ eCexj\ eDexjq eexj]dduejdvZ]eexj^dduejdvZ^eexj_dduejdvZ_eexj`dduejdvZ`eexjaZaeexjbZbeexjcea eexjdeb eexjeZeeexjPZPeCexjfZfeexjg eexjhdwduZheexjσ eexjiejdd eexjjee eexjkejdd eexjlejdd eexjmejdd eexjnejddZneexjoejdd eexjpejdd eCexjq eCexjr eCexjs eCexjt eCexju eCexjv eCexjw eCexjx eCexjy eCexjz eCexj{ eCexj| eCexj} eCexj~ eCexj eCexj ddxlmZmZ dydz ZeD ]ZeexeD ],\ZZZeeeeed{ )qDeezeD ],\ZZZeeeeed{ )q)q0eexjje@dudsZeexjje@dudsZeexjj e@duds eexjjeZeexjj e eexjjeZeexjjeM eexjjeM eexjjҐee eexjjҐeP eexjje eexjje eexjje eexjjeZeexjj e eexjjeZeexjeK eexjeE eexjjea eexjjea eexjjeb eexjjeb eexjjea eexjjea eexjjeb eexjjeb eexjef eexjeh eexje d|d} Zeexjjexjje eexjjexjje eexjjexjje eexjjexjje eexjjexjje eexjjexjje d~d Zeexje@ eexjeQ eexjeR eexjeS eexjeT eexjeU eexjeV eexje eexjj e eexjje eexje] eexje^ eexje_ eexje` eexjeM eexjeI eexjeJ eexjeQ eexjeR eexjeT eexjeU eexjeV eexjexj eexjexj eexjexj eexjexj eexjexj eexjdddZeexjjÃdd ZeexjĐjÃdd ZeexjŃdd Ze8ơ D ]"\ZǐZee9eǃeȃ /qNeejɃdd Zeexjdd Zeejhjːj̃dd Zeejhjΐjσdd ZeejhjxjАjуdd ZeejhjӐd80r>eejhjӐjj҃dd ZeejhjxjՃdd6ddZd dl֐mאZ ee׃ ee*dd Zeejhjِjdd@dd Zeejhjِjdd@dd Zeejhjِjdd@dddddZeej|jdd@ddddddZee(dd@dddddZeejhjzjj҃dd Zeejhjِjdd@dd ZddlmZ e  eeGjdd@dd ZddlEmZ eDe ddlEmZ e  e  ddlEmZ e  ddlEmZ e  ejddpddZdS (      )annotationsN)defaultdict)IterableSequence)AnyCallablecastOptionalTYPE_CHECKINGTypeVarUnion)	ParamSpec)patch)counters)associative_scan_op)triton_kernel_wrapper_mutation)canonicalize_dimcanonicalize_dimscheckdtype_to_typeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDget_computation_dtypeis_boolean_dtypeis_float_dtypeis_integer_dtypeNumber)magic_methodsmethod_to_operator)free_unbacked_symbols)
OrderedSet)CeilDivFloorDivIdentityModularIndexing   )import_submodule   )configinductor_primsirtest_operators)decompositionsget_decompositions)	DtypeView
ExpandViewIndexingConstantIRNode	is_tritonOnlineSoftmaxReductionops_wrapperPermuteView	Pointwise	ReductionSqueezeView	TensorBoxvalidate_irView)ceildivdecode_device
is_dynamicis_gpuis_pointwise_useis_view,needs_fallback_due_to_atomic_add_limitationspad_listlike#register_op_dtype_propagation_rules#register_op_requires_libdevice_fp64sympy_productuse_scatter_fallback)opsV)ReductionType_T_Pztorchvision::roi_alignzaten::index_addz8dict[Union[Callable[..., Any], str], Callable[..., Any]]	loweringsz9dict[torch._ops.OpOverload, Optional[Callable[..., Any]]]_maybe_layout_constraintsz2dict[torch._ops.OpOverload, torch._ops.OpOverload]inplaceable_foreach_opsc                  C  s<   t jjjD ],} | jD ] }|jdkr,|jtv s  dS qq
dS )Ncall_functionTF)rI   graphcurrent_nodeusersoptargetforeach_ops)nodeuser rY   F/var/www/auris/lib/python3.9/site-packages/torch/_inductor/lowering.pycur_node_has_non_foreach_usersx   s
    
r[   z%Iterable[Union[tuple[Any, Any], Any]])	arg_pairsc                 C  s   t t}d}t| D ]\}}t|ts0d}|f}t|  p>tj}d }|D ]}t|trH|j	
 } qfqH|d usvJ d|r|\}|||f ||f q|S )NFTz.foreach op should have at least one tensor arg)r   list	enumerate
isinstancer   r>   r(   Z#combo_kernel_foreach_dynamic_shapesr9   data
get_deviceappend)r\   outZunpack_argsiargsuse_foreachdevicetrY   rY   rZ   group_foreach_args   s$    


ri   zCallable[..., Any]zOptional[Callable[..., Any]])fnreturnc                 C  s&   t | tjjsdS | tv r"t|  S dS )zHGet layout constraints. Returns None if there are no layout constraints.N)r_   torch_ops
OpOverloadrN   rj   rY   rY   rZ   maybe_layout_constraints   s
    rp   c                 C  sZ   | t jjjkrtS | t jjjkr$tS | t jjjkr6tS | t jjj	krHd S t
d|  d S )NzUnknown layout constraint tag: )rl   Z_CTagZneeds_exact_stridesconstrain_to_fake_tensorsZneeds_contiguous_stridesrequire_contiguous_stridesZneeds_fixed_stride_orderconstrain_to_fx_stridesZflexible_layoutAssertionError)tagrY   rY   rZ   tag_to_layout_constraint   s    rw   c                 C  s   | st d| d S )Nzinductor does not support NotImplementedError)condmsgrY   rY   rZ   
assert_nyi   s    r|   c                   sX   t  ttttfr dd  D S t  t  tjj	rTt
 fdd  D  d S )Nc                 S  s   g | ]}t |qS rY   )add_needs_realized_inputs.0xrY   rY   rZ   
<listcomp>       z-add_needs_realized_inputs.<locals>.<listcomp>c                 3  s   | ]}t  |V  qd S N)getattr)r   overloadro   rY   rZ   	<genexpr>   s   z,add_needs_realized_inputs.<locals>.<genexpr>)r_   r]   settupler    needs_realized_inputsaddrl   rm   OpOverloadPacketupdate	overloadsro   rY   ro   rZ   r}      s    
r}   c                 C  s8   t | tjjr,|  D ]}|tt| |< qn|t| < d S r   )r_   rl   rm   r   r   rN   r   )rj   
constraintr   rY   rY   rZ   add_layout_constraint   s    r   )r   r'   r%                     	   
         intdtypec                 C  s2   t | ts| S | tv s&J d|  dt|  } | S )Nzid z missing from DTYPE_ID_LOOKUP)r_   r   DTYPE_ID_LOOKUPr   rY   rY   rZ   decode_dtype   s
    
r   c                 C  sF   t | tr"t|  p t|  S t | tjr8| jdu S t | tS d S )NT)	r_   r9   r   	get_dtyper   sympyExpr
is_integerr   r   rY   rY   rZ   is_integer_type   s
    

r   c                 C  s$   t | trt|  S t | tS d S r   )r_   r9   r   r   boolr   rY   rY   rZ   is_boolean_type  s    
r   r   type_promotion_kindc                   s0   dd   fdd|D }t |d| i\}}|S )Nc                 S  s<   t | ttjfr| S t|  }tjdg| |  dS d S )Nr'   r   )	r_   r   r   Basiclenget_sizerl   zerosr   )inpdimrY   rY   rZ   construct_input  s    z+get_promoted_dtype.<locals>.construct_inputc                   s   g | ]} |qS rY   rY   )r   argr   rY   rZ   r     r   z&get_promoted_dtype.<locals>.<listcomp>r   )r   )r   re   Zinps_r   rY   r   rZ   get_promoted_dtype  s    r   c                 C  sh   t | ttfs| g} nt| } t| D ]<}t |tjjr&| D ] }t||}|tvr@| 	| q@q&| S r   )
r_   r]   r   rl   rm   r   r   r   rM   rb   )aten_fnrj   r   Zother_fnrY   rY   rZ   get_overloads  s    
r   c                 C  s6   t | tjjr|| jv S t | tjjr2||  v S dS )NF)r_   rl   rm   r   Z_qualified_op_namern   name)rT   	namespacerY   rY   rZ   in_namespace)  s
    
r   z	list[Any]zdict[str, Any]r   z)Optional[ELEMENTWISE_TYPE_PROMOTION_KIND]z tuple[list[Any], dict[str, Any]])re   kwargs	broadcastr   convert_input_to_boolrk   c                   s  dd t  D }dd  D }|s4|s4 fS |s<|r|rHtjn4dd  D }|dd  D  t|d|i|r |d  n
|d   fd	d
fdd D  fdd D |rtt	t
 fdd|D fdd|D  }t	|d  }	t||d t| D ]\}
}| |
< q.t||t|d  D ]\}}||< qXtt D ],}
t |
 tjrxt |
 |	 |
< qxD ],}t| tjrt| |	|< q fS )Nc                 S  s   g | ]\}}t |tr|qS rY   r_   r9   r   rd   r   rY   rY   rZ   r   8  r   z"transform_args.<locals>.<listcomp>c                 S  s   g | ]\}}t |tr|qS rY   r   r   kvrY   rY   rZ   r   9  r   c                 S  s*   g | ]"}t |ttjfs"t|d r|qS r   )r_   r   r   r   hasattrr   arY   rY   rZ   r   C  s   c                 s  s   | ]}t |d r|V  qdS )r   N)r   r   rY   rY   rZ   r   I  r   z!transform_args.<locals>.<genexpr>r   r   c                   s:   t | trt| S t | tjr2tj| j dS | S d S )Nvaluer   rg   )r_   r9   to_dtyper*   Constantr   )r   )rg   r   rY   rZ   promoteT  s
    

ztransform_args.<locals>.promotec                   s   g | ]} |qS rY   rY   r   r   rY   rZ   r   \  r   c                   s   i | ]\}}| |qS rY   rY   r   r   rY   rZ   
<dictcomp>]  r   z"transform_args.<locals>.<dictcomp>c                 3  s   | ]} | V  qd S r   rY   r   rd   re   rY   rZ   r   c  r   c                 3  s   | ]} | V  qd S r   rY   r   r   r   rY   rZ   r   d  r   )r^   itemsrl   r   extendvaluesr   ra   broadcast_tensorsr]   	itertoolschainr   zipr   ranger_   r*   r   r/   create)re   r   r   r   r   Zargs_indicesZkwargs_indicesZpromoting_argsZbroadcastedsizerd   r   r   rY   )re   rg   r   r   r   rZ   transform_args1  sT    r   c                   s>   t   fdd}t| }t| tt|| |S )a  
    Add a foreach lowering to lowerings dict.

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                    s*   t | dksJ  | i |}t| |S )Nr%   )r   r:   )re   r   rc   	decomp_fnrY   rZ   wrapped  s    z+_register_foreach_lowering.<locals>.wrapped)	functoolswrapsr   rV   r   rM   dictfromkeys)r   r   r   Zaten_fnsrY   r   rZ   _register_foreach_loweringy  s    
r   c                   s<   t  fdd}t  |t | |S )a  
    Add a lowering to lowerings dict

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                    s   t | } t|}d}t| dkrBt| d t tfrBd}t | d } tdd  D srtdd | D rrJ dt| |\} }|r| g} | i |}t	| |S )	NFr'   r   Tc                 s  s    | ]}|t v pt|d V  qdS )Z_c10d_functionalN)	fallbacksr   )r   rj   rY   rY   rZ   r     s   z6_register_lowering.<locals>.wrapped.<locals>.<genexpr>c                 s  s   | ]}|d kV  qdS )rc   NrY   r~   rY   rY   rZ   r     r   zout= ops aren't yet supported)
r]   r   r   r_   r   allanykeysr   r:   )re   r   unpackedrc   r   r   r   r   r   rY   rZ   r     s(    
z#_register_lowering.<locals>.wrapped)r   r   r   r   r   r   )r   r   r   r   r   lowering_dictr   rY   r   rZ   _register_lowering  s
    r   Fz.Callable[[Callable[_P, _T]], Callable[_P, _T]])r   rk   c                 C  s   t jt| ||||dS )z+
    Shim to support decorator syntax.
    )r   r   r   r   )r   partialr   )r   r   r   r   r   rY   rY   rZ   register_lowering  s    r   c                 C  s   g }t jt| t|tjjdD ]\}}tjjj	j
t|dddrP|| q tjjj	j
t|dddrx|| q tjj|| tt|jtt|jk r|| q || q tt|S )z
    Broadcasting logic based on symbolic shapes.

    We give the shapes 0 and 1 concrete values, while all other shapes
    are symbolic sympy formulas.
    )	fillvaluer'   TZsize_oblivious)r   zip_longestreversedr   SOnerI   rQ   sizevars	shape_envevaluate_exprEqrb   guard_equalsr   expandZfree_symbolsr   )r   boutputr   yrY   rY   rZ   broadcast_symbolic_shapes  s    $

 r   c              
     s.  |d u s|d u sJ d|d u r.|d u r.t j}tdd | D sD| S tdd | D r|pft| d|ifdd  fdd	| D S td
d | D }g }| D ]}t|ttfr|	t
tj|| | dt|  qt|tjr|	t
t|| | dt|  q|	| q|S )NzEonly one of override_return_dtype or type_promotion_kind may be givenc                 s  s    | ]}t |tjttfV  qd S r   )r_   r   r   r   floatr~   rY   rY   rZ   r     r   z$promote_constants.<locals>.<genexpr>c                 s  s    | ]}t |tttjfV  qd S r   )r_   r   r   r   r   r~   rY   rY   rZ   r     r   r   c                   s8   t | tjr tj|  td dS tj|  td dS d S )Nindexr   rg   r   )r_   r   r   r*   r0   r=   r   r   r   rY   rZ   
const_func  s
    
z%promote_constants.<locals>.const_funcc                   s   g | ]} |qS rY   rY   r~   )r   rY   rZ   r     r   z%promote_constants.<locals>.<listcomp>c                 s  s$   | ]}t |tttjfr|V  qd S r   )r_   r9   r/   r*   r   r~   rY   rY   rZ   r     r   r   r   )r   DEFAULTr   r   r   nextr_   r   r   rb   r/   r   r*   r   r   get_device_or_errorr]   r   r   r   r0   )inputsoverride_return_dtyper   exrc   r   rY   )r   r   rZ   promote_constants  sL    

	r  c                   s&   d ddd fdd}|S )Nalphar9   r  c              	     s  d ur*t dd D r*r"J  S t
rd| d urp| dkrpttd | d< n| d u spJ dd D d  
pd   dd  D ]B}t|tjst	t	| ksJ d d	 d	|  qt
jt
jftjd uo@ttjd
d d uo@tjjjd uo@tjjjddo@ v  	fdd}sd }D ]$}t| jrj| } qqj|sd  }p|}tj| |dS )Nc                 s  s    | ]}t |tot|V  qd S r   r_   r1   r2   r   r   rY   rY   rZ   r   5  s   z0make_pointwise.<locals>.inner.<locals>.<genexpr>r'   c                 S  s   g | ]}|  qS rY   make_loaderr~   rY   rY   rZ   r   B  r   z1make_pointwise.<locals>.inner.<locals>.<listcomp>r   zndim mismatch  rR   Zlow_precision_pointwise_barrierFc                   s   t  t ks$J d  d tjkrLd urL fddD  S g }tD ]N\}}| }|  }r|v rtj||dd}t||}|| qX| }rtj|dd}t|S |S d S )Nzwrong ndim r  c                   s   g | ]}| qS rY   rY   )r   loadr   rY   rZ   r   Z  r   zCmake_pointwise.<locals>.inner.<locals>.inner_fn.<locals>.<listcomp>F)Zuse_compute_types)r   rl   r   r^   r   rH   r   rb   )r   Zinputs_loadedZ	inp_indexr  rc   Z	inp_dtypeZdowncast)r   emulate_precision_castsrj   r  loaders	low_pr_fpoverride_fn_when_input_boolrangesr  rZ   inner_fnW  s     $z/make_pointwise.<locals>.inner.<locals>.inner_fnrg   r   r  r  )r   r  r]   mulr   r   r_   r*   BaseConstantr   rl   bfloat16float16rI   rQ   r   rR   metagetr?   ra   typer6   r   )r  r  otherr  rg   rd   allow_alpharj   override_devicer  r  triton_fallback)r   r  r  r  r  r  rZ   inner4  s\    


zmake_pointwise.<locals>.innerrY   )rj   r  r#  r  r"  r$  r%  rY   r!  rZ   make_pointwise,  s    "Kr&  c                   s   dddd fdd}|S )Nr'   r  zlist[list[TensorBox]]r	  c                   sV  t tjjjdkp$tjjjtv p$t }d }|D ]}t|t	t
fr.|} qJq.|d usZJ dg }|D ]2}t|t	t
fs||gt |  qb|| qbtt| }d gt | }| D ]\\}}	}
g }|
D ]Z\}} r|d| i}n| }|||< tj|tjr|	r|r|  ||  q|rtj| qtdd |D sRJ |S )Nr   z1at least one input must be a list to a foreach opr  c                 s  s   | ]}|d uV  qd S r   rY   r~   rY   rY   rZ   r     r   z8make_foreach_pointwise.<locals>.inner.<locals>.<genexpr>)r   rI   rQ   rR   rS   rU   inplace_foreach_opsr[   r_   r]   r   rb   ri   r   r   has_featureBackendFeatureFOREACHrealizeget_operation_nameregister_operation_listr   )r  r  Zrealize_outputsZa_list_inputinputZbroadcast_inputsgroupsoutputsrg   rf   groupoperation_list
output_indre   r   r"  pw_fnrY   rZ   r%    sT    
z%make_foreach_pointwise.<locals>.innerrY   )r5  r"  r%  rY   r4  rZ   make_foreach_pointwise  s    6r6  r9   ztorch.dtyper   r   c                   s>   |    kr |rt| S | S  fdd}t| d| S )Nc                   s   t j|  dS )N)	src_dtype)rH   r   r   r   r8  rY   rZ   	_to_dtype  s    zto_dtype.<locals>._to_dtyper  )r   cloner&  )r   r   copyr:  rY   r9  rZ   r     s
    r   c                 O  s   ddl m} |}| j}||tjd}t| |j|  W d   n1 sN0    Y  |j}|sfJ t|}dgt	| }	|
 D ]`\\}
}}g }|D ]:\}}||	|< tj|
tjr|r|  ||  q|rtj| qtdd |	D sJ |	S )aI  
    This lowers an invocation of foreach_map
    The way this works is that an arbitrary N-arg func is provided by the user, looped over by the
    polyfill with the same semantics as a foreach op (a loop applying an n-ary function to n args)
    and then traced into a subgraph by dynamo.
    This code allows us to inline the subgraph into the main graph lowering using the PontwiseSubgraphLowering.
    The graph outputs represent the vertically fused sequence of ops, and then register_operation_list
    below registers the buffers as horizontally fuseable in the scheduler.
    r'   )PointwiseSubgraphLowering)Zroot_graph_loweringNc                 s  s   | ]}|d uV  qd S r   rY   r~   rY   rY   rZ   r     r   z_foreach_map.<locals>.<genexpr>)subgraph_loweringr>  graph_modulerI   rQ   Zset_graph_handlerrunZgraph_outputsri   r   r   r(  r)  r*  r+  rb   r,  r-  r   )Zsubgraphre   r   r>  r  ZgmZpw_subgraphZsub_outputsr/  r0  rg   rf   r1  r2  r3  r   rY   rY   rZ   _foreach_map  s0    (rB  c                 C  sZ   |j s|  j rL|  r6t| |d}tj||  |S ttj	j
dd| |S t| |ddS )Nr   Fadd_to_fallback_setTr=  )
is_complexr   r   
empty_liker*   ZInplaceCopyFallbackr   fallback_handlerprimsconvert_element_typedefaultr   )r   r   dstrY   rY   rZ   _convert_element_type  s    rM  rE  c                C  sf   |   }||kr |rt| S | S dd }||}||}||krRttjj| |S tt| |S d S )Nc                 S  s"   | j rt| jS t| jS d S r   )is_floating_pointrl   Zfinfobitsiinfor   rY   rY   rZ   _get_primitive_bitwidth	  s    z1to_dtype_bitcast.<locals>._get_primitive_bitwidth)	r   r<  rH  atenviewr   r9   r.   r   )r   r   r=  Zx_dtyperQ  Zsrc_bitsZdst_bitsrY   rY   rZ   to_dtype_bitcast  s    rT  c                 C  s8   |j s|  j r.ttjtjjj	j
| |S t| |S r   )rF  r   r9   r   r*   ZComplexViewrl   rH   rR  rS  r   rT  r7  rY   rY   rZ   _view_dtype  s
    rU  r=  non_blockingztorch.device)r   rg   c                C  s:   t |}|  |kr$|r t| S | S ttj| ||S r   )r=   ra   r<  r9   r   r*   Z
DeviceCopy)r   rg   r=  rW  rY   rY   rZ   	to_device!  s    rX  c                 C  s   t | |d|dS )NTrV  )rX  )r   rg   rW  rY   rY   rZ   _device_put(  s    rY  Tc	           
      C  sz   |p| j }t|}	t||| |dur.t|}t|	||||d}	t| |||d|	}	tt|rvttt|d|d|	 |	S )z3A pointwise function that maps ops.{name} to inputsN)r  r  r"  r$  )r   r   r   )r   r   )__name__r4   rD   r&  r   r   rI  r   )
r   r   r   r   r   r  r  r"  r$  rj   rY   rY   rZ   register_pointwise-  s>    

r[  c                    sx   d} t d  fdd} fdd}t|t|tjdgfdd}ttj|}tt| rttt	t| d	d
| |S )z2A pointwise function that maps ops.frexp to inputsfrexpc                    s    | i |d S Nr   rY   re   r   r\  rY   rZ   frexp0_  s    zregister_frexp.<locals>.frexp0c                    s    | i |d S Nr'   rY   r^  r_  rY   rZ   frexp1b  s    zregister_frexp.<locals>.frexp1r;  c                    s$    d | i | d | i |fS Nr   r'   rY   r^  )pw_fnsrY   rZ   rj   j  s    zregister_frexp.<locals>.fnNr   )
r4   r&  rl   int32r   rR  r\  r   rI  r   )r   r`  rb  rj   rY   )r\  rd  rZ   register_frexpZ  s*    
rf  c                 C  s   t ||d}t| |}|S )Nr"  )r6  r   )r   Zpointwise_lowering_fnr"  rj   rY   rY   rZ   register_foreach_pointwise|  s    
rh  )r   r   c                   s  dd }t |ttfr"t||}t |ttfr<t||}| ||g t d  d tjd}dd t D }t|t	 fdd|D  D ]\}}| |< qt
t D ]8}t  | tjrt | t |d	    |< qt||d
 d	 t d |t d |S )Nc                  W  s
   t j|  S r   )rH   wherer   rY   rY   rZ   rj     s    zwhere.<locals>.fnr'   r%   r   c                 S  s   g | ]\}}t |tr|qS rY   r   r   rY   rY   rZ   r     r   zwhere.<locals>.<listcomp>c                   s   g | ]} | qS rY   rY   r   r   rY   rZ   r     r   r   r;  )r_   r   r   constant_liker   r   r   r^   r   r   r   r   r*   r   r/   r   r]   r   r&  r   )rz   r   r   rj   r   indicesrd   r   rY   r   rZ   ri    s$    
$
&
ri  c                  G  s   t | dkr*t| d ttfr*t| d  S ttdd | D g }g }| D ]H}| }t |t |ks~t	dd t
||D rt||}|| qJ|S )Nr'   r   c                 S  s   g | ]}|  qS rY   )r   r~   rY   rY   rZ   r     r   z%broadcast_tensors.<locals>.<listcomp>c                 s  s   | ]~\}}t jjjjt|d ddrDt jjjjt|d dd p|t jjjjt|d dd o|t jjjjt|d ddV  qdS )r'   Tr   N)rI   rQ   r   r   r   r   r   r   r   r   rY   rY   rZ   r     s   

	

z$broadcast_tensors.<locals>.<genexpr>)r   r_   r]   r   r   r   reducer   r   r   r   r   rb   )r  rU   r0  r   sizesrY   rY   rZ   r     s    
r   c                 C  s   | S r   rY   r   rY   rY   rZ   nop  s    ro  
lift_freshc                 C  s   t | tsJ |d u r&tt| jS t |ttjfrDtj	j
|ntdd |D }tt|  |}tt |tsz|fn|}g }t|  D ]4\}}||v rtj	j
jt|ddds|| q||  krt| |S | S )Nc                 s  s   | ]}t jj|V  qd S r   rI   rQ   r   evaluate_static_shaper   drY   rY   rZ   r     r   zsqueeze.<locals>.<genexpr>r'   Tr   )r_   r9   r8   r   r`   r   r   r   rI   rQ   r   rr  r   r   r   r   r    r^   r   r   rb   rS  )r   r   dims	new_shapert  srY   rY   rZ   squeeze  s"    rx  c                 C  s   t t| |S r   )r<  rx  )r   r   rY   rY   rZ   squeeze_copy  s    ry  c                 C  s2   t | |}t| tsJ t|ts&J |j| _| S r   )rx  r_   r9   r`   r   r   valrY   rY   rZ   squeeze_  s
    
r|  c                 C  s2   t | rt| dtjdS td}t|tjd| S )NFr   isinfr;  r   	full_likerl   r   r4   r&  r   rj   rY   rY   rZ   r}    s    r}  c                 C  s2   t | rt| dtjdS td}t|tjd| S )NFr   isnanr;  r~  r  rY   rY   rZ   r    s    r  c                 C  s$   t | rt| S td}t|| S )Nceilr   r<  r4   r&  r  rY   rY   rZ   r    s    r  c                 C  s$   t | rt| S td}t|| S )Nfloorr  r  rY   rY   rZ   r    s    r  c                 C  s(   t | rt| S td}t|| S d S )Nroundr  r  rY   rY   rZ   r    s    r  c                 C  s$   t | rt| S td}t|| S )Ntruncr  r  rY   rY   rZ   r    s    r  c                 C  s   t | g\} t| tjr(t| t|S t| ts6J t|ttfsHJ t| 	 t|kr`| S t
| 	 stjjt| 	 }|dkrt
|s| tjjt||  tt| jt|S r]  )r  r_   r*   r  r/   r   r   r9   r]   r   r   rI   rQ   r   size_hint_or_throwrF   
mark_reuser`   )r   rn  Zx_size_productrY   rY   rZ   r   %  s$    
r   c                 C  sL   t |}|D ]}d||< q| }t|D ]\}}|dkr&t||}q&t||S Nr  )r]   r^   	unsqueezer   )r   shapeZbroadcast_dimensionsrw  Zbroadcast_dimensionr   idxr   rY   rY   rZ   broadcast_in_dim@  s    
r  c                 C  s   t | | S r   )r   r   )r   r   rY   rY   rZ   	expand_asN  s    r  c                   sh  t |   tt krHtjjgtt      t| t  } tt|  ks`J t |  }d}ttD ](}| dkrd}|| |  ||< q||rt|| 	 | 
 dS tdd t D rtt| |S  fdd}t sBt|sBtjjt }|dkrB| tjjt||  |  tj| 
 | 	 |t |d	S )
NFr   Tr   rg   c                 s  s"   | ]\}}|d kp|d kV  qdS r'   NrY   rl  rY   rY   rZ   r   e  r   zrepeat.<locals>.<genexpr>c                   st   t | t ksJ t| } tt D ]B}| dkr( | dkrRtjj| |< q(t| | d | | |< q(| S ra  )r   r]   r   r   r   ZZeror$   )r   rd   Zold_sizerepeatsx_loaderrY   rZ   r  j  s    zrepeat.<locals>.inner_fnr  )r]   r   r   r   r   r   rS  r   emptyr   ra   r   r   r<  r   r   rI   rQ   r   r  rF   r  r  r6   r   )r   r  new_sizeZzero_tensorrd   r  Zold_size_productrY   r  rZ   repeatS  s>    
r  c                 C  s2   t | tsJ t |ttfs J tt| j|S r   )r_   r9   r]   r   r;   r   r`   )r   rn  rY   rY   rZ   rS    s    rS  c                 C  s6   t | tsJ t |ttfs J tt| jt|S r   )r_   r9   r]   r   r5   r   r`   )r   ru  rY   rY   rZ   permute  s    r              c              	   C  s8   t | tsJ t| |d}ttjj| j|||||dS )Nr   clamp)r_   r9   _validate_dimr*   	SliceViewr   r`   )r   r   startendstepr  rY   rY   rZ   slice_  s    r  c              	   C  s   t | tr"t | jtjr"| j } |   t| sDtd|  dt	| \}}t
|j|jdd |D dd |D t|pd}ttj||dS )Nzunrealized as_strided(z, ...)c                 S  s   g | ]}t |qS rY   r   r   r   rw  rY   rY   rZ   r     r   zas_strided.<locals>.<listcomp>c                 S  s   g | ]}t |qS rY   r  r  rY   rY   rZ   r     r   r   r`   layout)r_   r9   r`   r*   BaseViewunwrap_viewr+  is_storage_and_layoutry   as_storage_and_layoutFixedLayoutrg   r   r   r   ReinterpretView)r   r   stridestorage_offsetstorageZ
old_layoutZ
new_layoutrY   rY   rZ   
as_strided  s    

r  c                 C  s$   t | tsJ t| |||j| _| S r   )r_   r9   r  r`   )r   r   r  r  rY   rY   rZ   as_strided_  s    r  c                 C  s   t | |||}t|S r   )r  r<  )r   r   r  r  resultrY   rY   rZ   as_strided_copy  s    r  c                   s   g d}D ]*} |||    f d d }qdd D  fdd}td  }d d | < tjd  d  ||dS )Nr   r  c                 S  s   g | ]}|  qS rY   r  r  rY   rY   rZ   r     r   z!pointwise_cat.<locals>.<listcomp>c           
   	     sB  t |  tj}g }g }ttD ]܉  dkr@t dtjnt   d tj}t   d tj}t ||}t ||} dkr|}n" td kr|}nt 	||}|
| t| t   d  < |
t | fddd q&|d }	ttd ddD ] t |  |  |	}	q |	S )Nr   r'   c                     s     S r   rY   rY   )rd   idx_loadinputs_loadersrY   rZ   <lambda>  r   z1pointwise_cat.<locals>.inner_fn.<locals>.<lambda>        r  r%   )rH   
index_exprrl   int64r   r   constantgeltand_rb   r]   r#   maskedri  )
r  idx_dimmasksZmasked_loadsr  r  Z
start_condZend_condmaskZnext_valr   r  r  Zinputs_ranges)rd   r  rZ   r    sD    
zpointwise_cat.<locals>.inner_fnr  )rb   r   r]   r6   r   ra   r   )r  r   Zprev_endr   r  r  rY   r  rZ   pointwise_cat  s    0

r  )r.  scaleszero_pointsaxis	quant_min	quant_maxr   rk   c              	     s   t  dksJ dt  dks0J d|  tjkrJt| tj} |  tjksjJ d|    t |  k sJ dt |   |     f	dd}tj	| 
 ||  dS )	Nr'   expect scales 1 dimexpect zero_points 1 dim<Expecting input to have dtype torch.float32, but got dtype: Expecting axis to be < c           
        s   |   f}| }|}|}t tjd\}}jtjkrPt|tj}jtjkrjt|tj}t|}t|| | }t	|t
||}	t|	S Nr   )_create_constantsrl   float32r   rH   r   re  
reciprocalr  maximumminimum)
r  channel_idxr.  scale
zero_pointqminqmax	inv_scaler{  clamped	r  r   input_loaderr  r  r  scales_loaderr  zero_points_loaderrY   rZ   r    s    

z;quantized_decomposed_quantize_per_channel.<locals>.inner_fnr  )r   r   r   rl   r  r   r  r  r6   r   ra   )r.  r  r  r  r  r  r   r  rY   r  rZ   )quantized_decomposed_quantize_per_channel  s(    
r  	out_dtypezOptional[torch.dtype])	r.  r  r  r  r  r  r   r  rk   c          	        s   t  dksJ dt  dks0J d|  |ksTJ d| d|    t |  k szJ dt |   d u rtj|     fdd}tj|  ||  d	S )
Nr'   r  r  Expecting input to have dtype , but got dtype: r  c                   s   |   f}| }|}|}j tjkr<t|tj}j tjkrVt|tj}tt|tj|| }t|}|S r   )r   rl   r  rH   r   sub)r  r  r.  r  r  r{  r  r  r  r  r  r  r  rY   rZ   r  U  s    
z=quantized_decomposed_dequantize_per_channel.<locals>.inner_fnr  	r   r   r   rl   r  r  r6   r   ra   )	r.  r  r  r  r  r  r   r  r  rY   r  rZ   +quantized_decomposed_dequantize_per_channel7  s(    r  r   )r.  r  r  r  r  r   rk   c                   s   |   tjkrt| tj} |   tjks:J d|    |   fdd}tj|   t	j
|t|t|d|  dS )Nr  c           	        sf   | }t d| |tjd\}}t|| | }t tjd\}}tt|||}t| S )N      ?r   )r  rl   r  rH   r  r  r  r   )	r  r  r  r.  r  r{  r  r  r  r   r  r  r  rY   rZ   r    s    
zBquantized_decomposed_quantize_per_tensor_default.<locals>.inner_fnr  r  r  )r   rl   r  r   r  r  r6   r   ra   r   r   r   r   r   r.  r  r  r  r  r   r  rY   r  rZ   0quantized_decomposed_quantize_per_tensor_defaultl  s    
r  )r.  r  r  r  r  r   r  rk   c                  sv   |   |ks$J d| d|    d u r2tj|    fdd}tj|  tj|t	|t
|d|  dS )Nr  r  c                   sF    | }t ||tjd\}}tt|tj|| }t|}|S r  )r  rl   r  rH   r  r   )r  r  r  r.  r{  r  r  rY   rZ   r    s
    zDquantized_decomposed_dequantize_per_tensor_default.<locals>.inner_fnr  r  )r   rl   r  r  r6   r   ra   r   r   r   r   r   r.  r  r  r  r  r   r  r  rY   r  rZ   2quantized_decomposed_dequantize_per_tensor_default  s    r  c                   s   |   tjkrt| tj} |   tjks:J d|    t dksrt dkrj d dksrJ dt dkst dkr d dksJ d|     fdd}tj	| 
  ||  dS )	Nr  r   r'   expect scale as scalar tensor"expect zero_point as scalar tensorc                   s   | }t  dkrdnd}t  dkr:dnd}jtjkrZt|tj}jtjkrtt|tj}t|t| | }t	tjd\}}t
t|||}t| S )Nr'   r   rY   r   )r   r   r   rl   r  rH   r   r  r  r  r  r  )r  r.  _scale_zero_pointr{  r  r  r  r   r  r  r  r  scale_loaderr  zero_point_loaderrY   rZ   r    s    zAquantized_decomposed_quantize_per_tensor_tensor.<locals>.inner_fnr  )r   rl   r  r   r  r   r   r  r6   r   ra   r  rY   r  rZ   /quantized_decomposed_quantize_per_tensor_tensor  s:    r  c                  s   t  dks8t  dkr0 d dks8J dt  dkspt  dkrh d dkspJ d|  |ksJ d| d|   d u rtj|      fdd}tj|  ||  d	S )
Nr   r'   r  r  r  r  c                   s    | }t  dkrdnd}t  dkr:dnd}jtjkrZt|tj}jtjkrtt|tj}tt|tj|| }t|}|S )Nr'   r  rY   )r   r   r   rl   r  rH   r   r  )r  r.  r  r  r{  r  r  r  r  r  r  rY   rZ   r    s    zCquantized_decomposed_dequantize_per_tensor_tensor.<locals>.inner_fnr  r  r  rY   r  rZ   1quantized_decomposed_dequantize_per_tensor_tensor  s:    r  c           
        s*  | d   jdk}|rttdd | D rt| D ]}|  q,tdd | D rbttjg| R  \} }ttjj| |S t	| dkrt
| d S t| d |d}t| dtjifdd	| D } d
dddddd fddtfdd| D }ddfddtjr"t| |S |r:ttj| |S fddd}d ddddt	| |kst	| tjkrt fd d| D rtfd!dtjjD }tfd"d| D o|}tfd#d| D otfd$d| D  }	|s|	r|st| |S ttj| |S )%Nr   cpuc                 s  s"   | ]}|  tjtjfv V  qd S r   )r   rl   int8uint8r   r.  rY   rY   rZ   r     s   zcat.<locals>.<genexpr>c                 s  s   | ]}t | d kV  qdS )r   N)r   r   r  rY   rY   rZ   r   &  r   r'   r   c                   s   g | ]}t | qS rY   r   r  r   rY   rZ   r   1  r   zcat.<locals>.<listcomp>zUnion[TensorBox, ir.StorageBox]	ir.IRNoder   rk   c                 S  s>   t | tr(t | jtjr"| j S | jS t | tjr:| jS | S r   )r_   r9   r`   r*   r  r  
StorageBoxr   rY   rY   rZ   unwrap_tensor3  s    

zcat.<locals>.unwrap_tensorc                 S  s   t | tjot | jtjS r   )r_   r*   ComputedBufferr`   r7   rh   rY   rY   rZ   is_reduction?  s    zcat.<locals>.is_reductionc                   sJ   t | ttjfr | S | pHt | tjoHt fdd|  D S )Nc                 3  s   | ]} t j|V  qd S r   )rI   rQ   
get_buffer)r   readcan_fuse_reductionrY   rZ   r   H  s   z2cat.<locals>.can_fuse_reduction.<locals>.<genexpr>)r_   r9   r*   r  r6   r   get_read_namesr  )r  r  r   rY   rZ   r  B  s    zcat.<locals>.can_fuse_reductionc                 3  s   | ]} |V  qd S r   rY   r   rh   r  rY   rZ   r   O  r   r   )rk   c                   sZ   t | r*t j| dd\}}t j| S t| tt jfrF | S t| t jrVdS dS )NF)freezeT)	r*   r  r  ConcatKernelZcan_realize_into_without_copyr_   r9   r  r6   )r   r  r   )should_lower_cat_inputr   rY   rZ   r  Q  s    
z#cat.<locals>.should_lower_cat_inputc                   s\   t | ttjfr | S t | tjs,dS |  j}|  D ]}| tj	
|7 }q>|S r]  )r_   r9   r*   r  r6   Zinner_fn_opcountZnum_opsr  rI   rQ   r  )r   countr  )op_countr   rY   rZ   r  h  s    
zcat.<locals>.op_countr   r%   torch._ops.OpOverloadrT   c                 S  s   | t jjt jjfv S r   )rR  catrK  constant_pad_ndr  rY   rY   rZ   additional_pointwise_ops}  s    z%cat.<locals>.additional_pointwise_opsc                 3  s   | ]}| kV  qd S r   rY   r	  )MAX_SIMPLE_OP_COUNTr  rY   rZ   r     r   c                 3  s   | ]}t | V  qd S r   )r@   )r   Zuse)r  rY   rZ   r     s   c                 3  s   | ]} |V  qd S r   rY   r  r  rY   rZ   r     r   c                 3  s   | ]} |V  qd S r   rY   r  r  rY   rZ   r     s   c                 3  s   | ]} |V  qd S r   rY   r	  r  rY   rZ   r     r   )ra   r  r   r+  require_channels_lastrR  r  rH  rK  r   r<  r  r   r   r   r   r(   Zforce_pointwise_catr  r9   r*   r  r   Zmax_pointwise_cat_inputsrI   rR   rS   )
r  r   Z
cpu_devicer.  r   Zfusable_reductionZMAX_COMPLEX_POINTWISE_CATZpointwise_usesZfuse_pointwise_useZhorizontal_fuse_catrY   )r  r  r  r   r  r  r  r   rZ   r    sb    



r  )offsetdim1dim2c                   s  |   ttdtdtkfdd tjjt	|d}|rtjj
tjj |  d}n(tjj
tjj  | d}d |r| df nd|f fddtD }||  fdd	}ttj| ||S )
N)r  rankc                     s   d  d S )Nz(diagonal dimensions cannot be identical z, rY   rY   r  r  rY   rZ   r    r   zdiagonal.<locals>.<lambda>r   )r   r   c                   s    g | ]\}}| fvr|qS rY   rY   )r   rd   rw  r  rY   rZ   r     r   zdiagonal.<locals>.<listcomp>c                   s   | d }dgt  }d}tD ]L}|kr@| d  ||< q"|krZ| d  ||< q"| | ||< |d7 }q"|t d ksJ |S )Nr  r   r'   r%   )r   r   )r  Zdiag_idxZoriginal_idxZcur_dimrt  Zbase_idxr  r  Znum_dimsZoriginal_shaperY   rZ   	reindexer  s    
zdiagonal.<locals>.reindexer)r   r   r   r   rI   rQ   r   r   r   LtZevaluate_maxZevaluate_minr^   rb   r9   r*   GenericViewr   )r.  r  r  r  Zoffset_negativeZ	diag_sizern  r  rY   r  rZ   diagonal  s:    
r   c                 C  s   t t| |||S r   )r<  r   )r.  r  r  r  rY   rY   rZ   diagonal_copy  s    r!  c                 C  s$   t | }t||||}t|| |S r   )r<  r   	mutate_to)r.  srcr  r  r  r   rU   rY   rY   rZ   diagonal_scatter  s    
r$  c                 C  s,   t ||  | }tt| |||d |S ra  )r;   Zhandle_negative_indexr   rx  r  )r   r   r  rY   rY   rZ   select  s    r%  c           
   
   C  s   t | |d}|}t|ttfsd|  | }tjjt	|| d |}|g| }||d |  |d< g }d}|D ](}|| }	|
t| |||	dd |	}qp|S )Nr   r'   r  Fr  )r  r_   r]   r   r   rI   rQ   r   rr  r"   rb   r  )
r   rn  r   Zsizes_x_sizechunksr  r  r   r  rY   rY   rZ   split  s     
r(  c                 C  s   t | ||S r   )r(  )r   rn  r   rY   rY   rZ   split_with_sizes   s    r)  c                   s>   t  d tjj   } fddt|D }|S )Nr   c                   s   g | ]}t  |qS rY   )r%  r   r   r   rY   rZ   r   	  r   zunbind.<locals>.<listcomp>)r  rI   rQ   r   rr  r   r   )r   r   r&  r  rY   r*  rZ   unbind  s    r+  c                   s   |   }t|}t|| |dkr4tt| d|dS |  }tjj}||| |	d t
|| d }||dkr| |t|| | g |d   || d d  |}	 fdd}
ttj| |	|
S )Nr   )r  r'   c                   s:   | d |     }g | d   ||  d d R S )Nr  r'   rY   )r  Zdim_idxr   r  rY   rZ   r  #  s    zunfold.<locals>.reindexer)r   r   r   r  r  rI   rQ   r   	guard_leqguard_ltr"   r  r  r!   r9   r*   r  r   )r   Z	dimensionr   r  rn  ndimdim_sizer   Znew_dim_sizeout_sizer  rY   r,  rZ   unfold  s"    
(r2  c                 C  s2   t | |d}t|  }||tjj t| |S ra  )r  r]   r   insertr   r   r   rS  )r   r   rv  rY   rY   rZ   r  *  s    r  c                 C  s2   t | |}t| tsJ t|ts&J |j| _| S r   )r  r_   r9   r`   rz  rY   rY   rZ   
unsqueeze_2  s
    
r4  c                 C  sX   t jjjt|}t|  }|dk r6||| 7 }d|  krN|| k sTn J |S r]  )	rI   rQ   r   r   r   r   sympifyr   r   )r   r   r  r/  rY   rY   rZ   r  ;  s    r  r  c                 C  sT   t | |d}tjj|  | d }t| |d|}t| |||d }t|t|S )Nr   r%   )	r  rI   rQ   r   rr  r   r  r  sigmoid)r   r   Znew_lenr   r   rY   rY   rZ   gluD  s
    r7  c                   s$   |rt    fdd}d|_|S )Nc                    s*   dd }t |tjj g| R i |S )Nc                 S  s   t | tjrt| S | S r   )r_   r*   r1   r9   r   r   rY   rY   rZ   wrap_tensorsS  s    z7fallback_handler.<locals>.handler.<locals>.wrap_tensors)pytreetree_mapr*   FallbackKernelr   )re   r   r8  kernelrY   rZ   handlerR  s    z!fallback_handler.<locals>.handlerT)r   r   Z_is_fallback_handler)r=  rD  r>  rY   r<  rZ   rH  N  s
    
	rH  c                   C  s   t d d S )NzjTorchinductor does not support code generation for complex operators. Performance may be worse than eager.)warningswarnrY   rY   rY   rZ   _warn_complex_not_supported`  s    rA  ztorch.Tensorr  c                 C  s   |   rt  dS | jrdS | jtjkr~|s0dS t|jtjj	rb|jt
jjt
jjt
jjt
jjfv pzt|jtjj	ozt|j S dS )z0Do not support reading or writing to this tensorTF)rF  rA  is_metar   rl   Zfloat8_e8m0fnur_   rU   rm   rn   rR  rS  r  rK  r<  Z
_scaled_mmrA   )rh   rW   rY   rY   rZ   unsupported_input_tensori  s(    rC  c                 C  sL   t jjtjjjjf}|dur2|j|v r2| 	 r2dS t
| |r@dS | joJtjS )z2Do not support writing tensor but can read from itNFT)rR  rS  r   rl   rH   rI  rJ  rK  rU   rF  rC  Zis_cpur(   Zdisable_cpp_codegen)rh   rW   Zsupported_complex_viewsrY   rY   rZ   unsupported_output_tensor  s    

rD  ztorch.fx.NoderW   c                   sv    j tjju rdS  jdkr dS  j tjju r2dS  fdd}tj ji  j	D ]}||ddrR dS qR| ddS )NFplaceholderc                   sp   t | tjjsdS d| jvr dS t| jd D ]:}t |tjjsDq0|rZt	| rj dS q0t
| r0 dS q0dS )NFr{  T)r_   rl   fxNoder  r9  tree_leavesZ_subclassesZ
FakeTensorrD  rC  )Zinp_out_node	is_outputr  rE  rY   rZ   check_skip_condition  s    


zCfallback_node_due_to_unsupported_type.<locals>.check_skip_condition)rJ  T)
rU   rR  view_as_complexrK  rT   lift_fresh_copyr9  Zarg_tree_leavesre   r   )rW   Zallow_cpu_inputsrK  r   rY   rE  rZ   %fallback_node_due_to_unsupported_type  s    
rN  c                   s   | t vs|sJ d|  |r|ttdr|t| gr|tjrJ| tjj	j
v s||s|tjjjrldtjj_td td|  d fdd}t| tjjr|  D ]}t| |}|| qn8t| tjjtjjfr||  ntd	|  d
t|  d S )Nz*both a fallback and a decomp for same op: CIFznA make_fallback error occurred in suppress_errors config, and suppress_errors is being disabled to surface it.zmake_fallback(a.  ): a decomposition exists, we should switch to it. To fix this error, either add a decomposition to core_aten_decompositions (preferred) or inductor_decompositions, and delete the corresponding `make_fallback` line. Get help from the inductor team if unsure, don't pick arbitrarily to unblock yourself.c                   s.   t |   d urt|   t| d dt| S Nr   )r}   r   r   rH  )op_overloadlayout_constraintrY   rZ   register_fallback  s    

z(make_fallback.<locals>.register_fallbackzUnsupported fallback z with type )r,   r   osgetenvr-   r(   fallback_randomrl   _decompZdecompositions_for_rngZextra_random_decompsZ_dynamoZsuppress_errorslogwarningru   r_   rm   r   r   r   rn   ZHigherOrderOperatorRuntimeErrorr  )rT   rS  r@  Zoverride_decomprT  olrQ  rY   rR  rZ   make_fallback  s>    	




r]  c                 C  s$   d}| D ]}|| }qt |tjdS )z
    TorchInductor offset calculation differs from PyTorch eager offset
    calculation for random ops (tl.rand vs torch.rand). In future, we should
    strive for same impl for tl.rand and torch.rand.
    r'   r   tensorrl   r  )r  Znumelrw  rY   rY   rZ   philox_rand_offset  s    
r`  c           	        sd   t | | t j|  | |  fdd}tj| |t| d}t	| }||fS )Nc                   sV   t g tj}t g tj}t t | tj|}t ||}t | S r   )rH   r   rl   re  r   r  rand)r   Zseed_index_exprZoffset_index_exprZrand_index_exprr  r   Zoffset_loader
random_posseed_loaderrY   rZ   r    s    zphilox_rand.<locals>.inner_fnr  )
r*   r  FlexibleLayoutcontiguous_stridesmake_indexerr  r6   r   r]   r`  )	r   seedr  r  rg   r   r  Zrandom_values_nodeZoffset_noderY   rb  rZ   philox_rand  s"    
ri  c              	   C  s2   t jr&ttjtjtj	j
| ||S tdd S )Nz&should be handled in replace_random.py)r(   rW  r9  r:  r9   r   r*   r;  rR  native_dropoutrK  ru   )r   pZtrainrY   rY   rZ   rj  "  s    rj  c                 G  sj   t js |  tdks J d|   t|dksBt|d trJt	j
jnt	j
j}tj|| g|R   | S )Nr  Tthis should be handled in decomps unless config.fallback_random or the device is CPUr   )r(   rW  ra   rl   rg   r+  r   r_   r   rR  
bernoulli_Tensorr*   ZInplaceBernoulliFallback)r   re   rQ  rY   rY   rZ   rm  -  s    

rm  c                 G  s4   t js |  tdks J dtt| g|R  S )Nr  rl  )r(   rW  ra   rl   rg   rm  r<  )r   re   rY   rY   rZ   bernoulli_p<  s    ro  c                 C  s   t d S r   ru   r   rY   rY   rZ   _foobarE  s    rr  c                 C  s   t d d S )Nz1using triton random, expect difference from eager)rY  info)ZsaltrY   rY   rZ   _warn_triton_randomJ  s    rt  c                   C  s   t tjj d S r   )rt  rI   rQ   Zcreation_timerY   rY   rY   rZ   warn_triton_randomO  s    ru  c                  O  sJ   | dd d urt| i |S tjr>|dd  t| i |S tdd S N	generatorz-should have been handled in replace_random.py)r  fallback_rand_generatorr(   rW  popfallback_rand_defaultru   r^  rY   rY   rZ   ra  [  s    ra  c                  O  sJ   | dd d urt| i |S tjr>|dd  t| i |S tdd S rv  )r  fallback_randn_generatorr(   rW  ry  fallback_randn_defaultru   r^  rY   rY   rZ   randne  s    r}  c                 C  s   t |}t j| |S r   )r*   get_stride_orderExternKernelrequire_stride_order)Zinput_tensorr  stride_orderrY   rY   rZ   inductor_force_stride_ordero  s    
r  rg   c                 C  s   t dd S )Nz.should be handled in fuse_seed_creation_pass()rp  r  rY   rY   rZ   inductor_seedu  s    r  c                 C  s   t   tt| t|S r   )ru  r9   r   r*   ZRandomSeedsr=   )r  rg   rY   rY   rZ   inductor_seedsz  s    r  c                   s(    fdd}t j  |g dS )Nc                   s   t   S r   )rH   Z	load_seedget_namerq  r   seedsrY   rZ   r    s    z&inductor_lookup_seed.<locals>.inner_fnr  )r6   r   ra   r   )r  r   r  rY   r  rZ   inductor_lookup_seed  s    r  r  z	list[int]str)r   rh  moder  c                  s   t jr
J  dv sJ g | } tj}| }tj||| tj| |d	 |
  fdd}tj|||g | d}|  |S )N)ra  r}  r  c                   s"   t t g t| tjS r   )r   rH   r  rl   re  r  r  rc  rd  rY   rZ   r    s    z!inductor_random.<locals>.inner_fnr  )r(   rW  rl   r  r  r*   r  re  rf  rg  r  r6   r   r+  )r   rh  r  r  r   rg   r  r  rY   r  rZ   inductor_random  s$    

r  )lowhighr   rh  r  c                  sp   t jr
J g |}tj}| }tj|||tj||d	 |
  fdd}tj|||g |dS )Nr  c              	     s6   t g t | tjt tjt  tjS r   )rH   Z	randint64r  rl   re  r  r  r  r  rc  rd  rY   rZ   r    s    z"inductor_randint.<locals>.inner_fnr  )r(   rW  rl   r  r  r*   r  re  rf  rg  r  r6   r   )r  r  r   rh  r  r   rg   r  rY   r  rZ   inductor_randint  s    

r  z.tuple[str, sympy.Expr, sympy.Expr, sympy.Expr])tbrk   c                 C  s4   |   |  d |  d |  d  |  d fS Nr  r   )r  r   
get_strider  rY   rY   rZ   _boundaries_helper  s
    

r  ztuple[str, sympy.Expr]c                 C  s   |   |  d fS r  )r  r  r  rY   rY   rZ   _sorter_helper  s    r  	out_int32rightsidesorterzOptional[str]zOptional[TensorBox])sorted_sequenceselfr  r  r  r  rk   c          
        s   dd }|r(||r(d urH|sHt tjjdd|||dS |d ur\|dkr\d|rftjntj |   d ur  t	
 dkr fd	d
}n fdd
}| }tj| ||jd}	|	  |	S )Nc                 S  s   t j| tjS r   )rI   rQ   r(  r)  	BUCKETIZEr  rY   rY   rZ   r    s   zsearchsorted.<locals>.<lambda>FrC  r  r  Tr'   c              	     s@   | }t j|td d u r&d ntd u r8d nddS )Nr   r  Zsorter_indicesrH   	bucketizer  r  )r  r{  index_dtyper  r  r  Zvalues_loaderrY   rZ   r  	  s    zsearchsorted.<locals>.inner_fnc              	     s\    }dd fdd}t j|t|d u r>d ntd u rPd n|dS )Nr9   r  c                   s>   |   }tttjdd t|d d  d d D S )Nc                 s  s   | ]\}}|| V  qd S r   rY   )r   rw  rd   rY   rY   rZ   r   	  r   zNsearchsorted.<locals>.inner_fn.<locals>.get_flattened_index.<locals>.<genexpr>r  )r  rH   r  r   rm  operatorr   r   )r  strides)r  r  rY   rZ   get_flattened_index	  s    &z;searchsorted.<locals>.inner_fn.<locals>.get_flattened_indexr  r  )r  r{  r  r  r  rZ   r  	  s    	r  )rH  rR  searchsortedrn  rl   re  r  r  r+  r   r   ra   r6   r   r  )
r  r  r  r  r  r  Zvalidate_bucketizer  rg   r  rY   r  rZ   r    sH    
r  r  r  )r.  
boundariesr  r  c                  s   t   dksJ tj| tjr4tj tjsPttj	j
dd|  |dS    |  }|  |rrtjntj fdd}tj|||  d}|  |S )Nr'   FrC  r  c                   s"   | }t |t d}|S r]  )rH   r  r  )r   r{  rk  r  r  r  r  rY   rZ   r  S	  s    zbucketize.<locals>.inner_fnr  )r   r   rI   rQ   r(  r)  r  rH  rR  r  rn  r+  ra   r  rl   re  r  r6   r   )r.  r  r  r  rg   r  r  rY   r  rZ   r  5	  s*    
r  c                 O  s$   t tjtjj||f\}}||fS r   )r9  tree_map_onlyr*   r1   r  Zrequire_stride1r   re   r   rY   rY   rZ   require_denser	  s    r  c                 O  s$   t tjtjj||f\}}||fS r   )r9  r  r*   r1   r  require_contiguousr  rY   rY   rZ   r  y	  s    r  c                 O  s$   t tjtjj||f\}}||fS r   )r9  r  r*   r1   r  rs   r  rY   rY   rZ   rs   	  s    rs   c                 O  s$   t tjtjj||f\}}||fS r   )r9  r  r*   r1   r  r  r  rY   rY   rZ   r  	  s    r  c                   s|   t  tjr,dd  D }tj |S t  trN fdd  D S t  tt	frxt
 dd t D S  S )Nc                 S  s$   g | ]}t |tjr|jjn|qS rY   r_   rl   SymIntrW   exprr  rY   rY   rZ   r   	  s   z,constrain_to_fake_tensor.<locals>.<listcomp>c                   s    i | ]}|t  | | qS rY   constrain_to_fake_tensorr   keyr   fake_argrY   rZ   r   	  s   z,constrain_to_fake_tensor.<locals>.<dictcomp>c                 s  s   | ]\}}t ||V  qd S r   r  )r   r   Zf_arY   rY   rZ   r   	  s   z+constrain_to_fake_tensor.<locals>.<genexpr>)r_   r*   r1   r  r  require_exact_stridesr   r   r   r]   r  r   )r   r  meta_stride_exprrY   r  rZ   r  	  s    
r  c                   s6   t dd t| |D }  fdd| D }| |fS )Nc                 s  s   | ]\}}t ||V  qd S r   r  )r   r   r  rY   rY   rZ   r   	  s   z,constrain_to_fake_tensors.<locals>.<genexpr>c                   s    i | ]\}}|t | | qS rY   r  r   fake_kwargsrY   rZ   r   	  r   z-constrain_to_fake_tensors.<locals>.<dictcomp>)r   r   r   )re   r   Z	fake_argsr  rY   r  rZ   rr   	  s
    rr   c                   sJ    fdd t  fddt|jD } fdd| D }||fS )Nc                   s^   t  tjr6tjd  tjjj	}tj
 |S t  trZ fdd  D S  S )Nr{  c                   s    i | ]}| | | qS rY   rY   r  )apply_constraintr   fx_argrY   rZ   r   	  r   zEconstrain_to_fx_strides.<locals>.apply_constraint.<locals>.<dictcomp>)r_   r*   r1   r~  r  r  rI   rQ   r   r   r  r  r   r   )r   r  r  r  )r   r  rZ   r  	  s    
z1constrain_to_fx_strides.<locals>.apply_constraintc                 3  s   | ]\}} ||V  qd S r   rY   )r   r   r  r  rY   rZ   r   	  s   z*constrain_to_fx_strides.<locals>.<genexpr>c                   s"   i | ]\}}| |j | qS rY   r   r   r  fx_noderY   rZ   r   	  r   z+constrain_to_fx_strides.<locals>.<dictcomp>)r   r   re   r   r  re   r   rY   r  rZ   rt   	  s    

rt   c                   sN   fdd t  fddtt|jD } fdd| D }||fS )Nc                   s2  t |tjs|S |jd }dd | D }t| }|rb|d dkrbtttt	|
 }jtjjkr| dv rt	|dksJ d}|jstj||S d	 jtjjjjko| d
k}t |tsJ t	|
 dvr|S t| rttj||S t |tr>| d ur>t| r>ttj||S |r~t|
 }g }| }	tt	|
 d D ]F}
tjj||
 ds|	d urptjj|	|
 drp||
 qpdgt	| }d|d< d}tt	|d ddD ]}
||
d  dkr|||
d   }|
|v r@tjj||
d    dr@d||
< qtjj|  dsdt|   }|||
< qtj ||S t| rttj||S t |tr| d urt| rttj||S  fdd}t |j!tj"r$||s$||# r$ttj||S tj||S )Nr{  c                 S  s$   g | ]}t |tjr|jjn|qS rY   r  r  rY   rY   rZ   r   	  s   z=sdpa_constraint.<locals>.apply_constraint.<locals>.<listcomp>r  r   )r   r   r   )r   r'   r%   r   r   r   r   r   r'   r%   c                   s   t jj|  d   dkS r  )rI   rQ   r   	size_hintr   r   Z	ALIGNMENTrY   rZ   
is_aligned.
  s    z=sdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned)$r_   r*   r1   r  r  r~  r]   r   r   r   r   rU   rR  0_scaled_dot_product_efficient_attention_backwardrK  Zis_cudar  r  rl   rH   '_scaled_dot_product_efficient_attentionr9   Zis_aligned_realized_tensorZtry_match_insignificant_stridesZrealize_inputmaybe_get_striderI   rQ   r   statically_known_equalsrb   r<   r  r`   r  r  )r  r   r  Zmeta_valr  r  Zeffn_attn_fwd_biasr1  Zexpanded_dimsZmaybe_striderd   Zout_stridesr  r  )r  r  rZ   r  	  s    







z)sdpa_constraint.<locals>.apply_constraintc                 3  s"   | ]\}\}} |||V  qd S r   rY   )r   r  r   r  r  rY   rZ   r   :
  s   
z"sdpa_constraint.<locals>.<genexpr>c                   s$   i | ]\}}| d |j | qS r  r   r   r  rY   rZ   r   >
  r   z#sdpa_constraint.<locals>.<dictcomp>)r   r^   r   re   r   r  rY   r  rZ   sdpa_constraint	  s    {r  )r@  c                 C  sn   |}|   |  kr"t||   }|  | kr@t||  }|  | krft||  }t|S t|S r   )ra   rX  r   r   r   r   r<  )r  r#  rW  r   rc   rY   rY   rZ   r=    s    r=  )memory_formatc                C  s&   t j|  |  |  t|  dS Nr  )r6   r   ra   r   r  r]   r   )r   r  rY   rY   rZ   r<  $  s    
r<  c                 C  s   g }t | trLt | jtjrL| j} t | tjrD||   | j} q"t| } t| } |r| j} |d d d D ]}tj| |d} qlt| } | S )Nr  r  )r_   r9   r`   r*   r  rb   Z
get_layoutr<  )r   Zreinterpret_view_layoutsr  rY   rY   rZ   clone_preserve_reinterpret_view/  s    r  rM  c                  s(    fdd}t jt| || gdS )Nc                   s   t j| d    dS )Nr   r   rH   r  r  r   r  r  rY   rZ   rj   Q  s    ziota.<locals>.fnr  )r6   r   r=   )lengthr  r  r   rg   requires_gradrj   rY   r  rZ   iotaG  s    
r  )r   r   c                   s   |   |  ksJ |  t|  d tjjtdrN| 	    tjj
d tjj| 	    tt| | 	 }|  fdd}tj|  |   |t| 	 dS )Nr   c              	     s6   t t t |   tjt tj| | S r   )rH   ri  eqr  rl   re  r  r   r   
src_loaderr  rY   rZ   r  h  s    z select_scatter.<locals>.inner_fnr  )r   r  r  rI   rQ   r   r   r   r  r   r-  r.  r   r  r6   r   ra   r]   )r   r#  r   r   r  rY   r  rZ   select_scatter\  s     

r  c                   s     |  ksJ  t d    tj \t }t d  | < t	||}|  fdd}t
j   |t dS )Nr   r'   c              
     s4  dkr kr dkr | S t |  tj}t|  t|    < g }dkr~|t |t t	tj kr|t 
|t t	tj dkr|t t t|   dtjt dtj |sJ tt j|}t | fddtrdnd}t ||| S )Nr   r'   c                     s    S r   rY   rY   )src_idxr  rY   rZ   r    r   z1slice_scatter.<locals>.inner_fn.<locals>.<lambda>r  )rH   r  rl   r  r]   r"   rb   r  r   r   r  r  r$   r  r   rm  r  r  r   ri  )r  r  r  Zsrc_valr   r0  r  r  r  r  r   r  )r  rZ   r    sR    zslice_scatter.<locals>.inner_fnr  )r   r  r  r   r*   r  Znormalize_start_endr]   r"   r   r6   r   ra   )r   r#  r   r  r  r  Zsrc_sizer  rY   r  rZ   slice_scatterz  s     
.
r  c                 C  s*   t | ttfr&t| dkr&t| d S | S r]  )r_   r]   r   r   _unwrapr   rY   rY   rZ   r    s    r  r   rg   r  
pin_memoryc                  s  t |d tjfv d|  t | d tt tr@p<tjnpJt g }t tj	rl fdd}nt t
tfr fdd}nft dkst d t
tfrt dkr|tt   fdd}ntjtj |d	S tjt|||d
S )Nlayout=r  c                   s   t  S r   r  r  r`   r   rY   rZ   r    s    ztensor.<locals>.inner_fnc                   s   t  S r   rH   r  r  r  rY   rZ   r    s    r   r   c                   s8    fdd t dkr*tdS  dt S )Nc              	     sr   | |k sJ ||  dkr(t |  S ||  d |  }t t t d tjt |tj | | ||S )Nr'   r%   r   )rH   r  ri  r  r  rl   r  )r  r  Zmid)binary_searchr`   r   r   rY   rZ   r    s    z/tensor.<locals>.inner_fn.<locals>.binary_searchr   )r   rH   r  r  r  )r  r   rZ   r    s    r  r  )r|   rl   stridedr_   r  r   r  get_default_dtyper   r   r   r   rb   IntegerrI   rQ   Zadd_tensor_constantr_  r6   r   r=   )r`   r   rg   r  r  r  r  rY   r  rZ   r_    s,    *r_  c                 C  s@   t | tr2|d urt| |} |d ur.t| |} | S t| ||dS )Nr  )r_   r9   r   rX  r_  )r`   r   rg   rY   rY   rZ   	as_tensor  s    


r  c                 C  s   t | tjdS r  r^  r`   rY   rY   rZ   long_tensor  s    r  c                 C  s   ddl m} |tjjjtjjjd }|d us2J t|dksFJ |t	t
| \}}t||| }tj||_tj| tjjjd }t|tjtjtjfr|jjS t|S d S )Nr   )resolve_unbacked_bindingsunbacked_bindingsr'   r{  )%torch.fx.experimental.symbolic_shapesr  rI   rQ   r   r   rR   r  r   r  iterr   r*   ZDynamicScalarregister_bufferr   register_operationr_   rl   r  ZSymFloatZSymBoolrW   r  r   r5  )r`   r  r  Zbinding_symZkeypathbufferr{  rY   rY   rZ   _local_scalar_dense  s    r  c                 C  s   d S r   rY   )r`   r{   rY   rY   rZ   _assert_scalar<  s    r  )rg   r  c                C  s   d S r   rY   )r   r   r  r   rg   r  rY   rY   rZ   _assert_tensor_metadataF  s    r  c                   s   | t | ttfs"tdr"jt ttfr@ fdd}nDt tjr\ fdd}n(t dkspJ 	 fdd}t
j| |t|dS )Nr   c                   s   t  S r   r  r  r   r   rY   rZ   r  T  s    z_full.<locals>.inner_fnc                   s   t  S r   r  r  r  rY   rZ   r  Y  s    r   c                   s    g S r   rY   r  )value_loaderrY   rZ   r  `  s    r  )r_   r   r   r   r   r   r   r   r   r  r6   r   r]   )
fill_valuerg   r   r   r  rY   )r   r   r  rZ   _fullM  s     r  c                 K  s   t t|| fi |S r   create_tensor_liketensor_constructor)r   r  r   rY   rY   rZ   r  k  s    r  c                   s    d d d d dd d fdd
}|S )NF)namesr   rg   r  r  r  c                   s   t | d u d t |d tjfv d|  t | d t|}|pFt }t|dkrvt|d tttj	frvt|d }|D ]}t|tj
rzJ qzdd |D }t |||S )Nnamed tensorsr  r  r'   r   c                 S  s   g | ]}t |qS rY   r  r  rY   rY   rZ   r     r   z5tensor_constructor.<locals>.inner.<locals>.<listcomp>)r|   rl   r  r=   r  r   r_   r]   r   Sizer  r  )r  r   rg   r  r  r  r   rw  r  rY   rZ   r%  r  s    	"z!tensor_constructor.<locals>.innerrY   )r  r%  rY   r  rZ   r  p  s    r  )r  r   r  rg   r  r  c                 G  sX   t | d u d t|}t|dkrDt|d tttjfrDt|d }t|d ||||dS )Nr  r'   r   r   r  rg   r  )	r|   r=   r   r_   r]   r   rl   r  empty_strided)r  r   r  rg   r  r  r   rY   rY   rZ   r    s    
"r  c                   s   dddddd fdd
}|S )zZ
    Shim to convert X_like(...) into X(...).  For example zeros_like() into zeros().
    NF)r   rg   r  r  r  c                  sj   t | d t |d tjfv d|  |d u r8|  }nt|}|pJ|  }t|  } |||||dS )Nr  r  r  )r|   rl   r  r   r   ra   r]   r   )r   r   rg   r  r  r  r   creation_fnrY   rZ   _constant_like  s    

z*create_tensor_like.<locals>._constant_likerY   )r  r  rY   r  rZ   r    s    
r  c                 C  s   t t| S r   r  r  rY   rY   rZ   rj    s    rj  c                   s   d d d d d fdd
}|S )Nr   c                  st   t |ttfsJ t| d t|d tjfv d|  t|pF|  }|pR|  }dd |D }t	 t
|||S )Nr  r  c                 S  s   g | ]}t |qS rY   )r   r  r  rY   rY   rZ   r     r   z7new_constant.<locals>._new_constant.<locals>.<listcomp>)r_   r]   r   r|   rl   r  r   r   ra   r  r=   r   r   r   r  rg   r  r  rY   rZ   _new_constant  s    z#new_constant.<locals>._new_constantrY   )r  r  rY   r  rZ   new_constant  s    r  r   c                C  s8   |d u r|   }|d u r |  }t|d ||t||dS Nr   r   ra   r  r=   r  rY   rY   rZ   	new_empty  s    r
  c                C  s  t | ttfsJ t |tttd fs*J t| d t|d tjfv d|  t|p^t }|pnt	dj
}t|}td||| d}|  |jj}tj|jdgt|  d|_t |tjsJ dd | D } |rd	d |D n
tj| }tj||| |d
|_|S )Nr  r  r  r   )r  rg   r   r   )r  c                 S  s   g | ]}t |qS rY   r  r  rY   rY   rZ   r     r   z!empty_strided.<locals>.<listcomp>c                 S  s   g | ]}t |qS rY   r  r  rY   rY   rZ   r     r   )rg   r   r   r  )r_   r]   r   r  r|   rl   r  r   r  r_  rg   r=   r  r+  r`   dataclassesreplacer   r*   r  re  rf  r  r  )r   r  r   r  rg   r  Z	pointwiser  rY   rY   rZ   r    s0    
r  c                C  s8   |d u r|   }|d u r |  }t||||t||dS r  r	  )r   r   r  r   r  rg   r  rY   rY   rZ   new_empty_strided  s    r  c                 C  s2   dd |D }t tt||jd}tj| |S )Nc                 S  s   g | ]}t jj|qS rY   )rI   rQ   r   r  r  rY   rY   rZ   r     r   z copy_strided.<locals>.<listcomp>)r  )sortedr   r   __getitem__r*   r  r  )r   r  r  rY   rY   rZ   copy_strided  s    r  c                 K  s*   | dd usJ dt|| fi |S )Nr   z(dtype should be handled by decomposition)r  r  )r   r  r   rY   rY   rZ   full  s    r  c                   s   t | tsJ | dkr(t| | S |  tdk}t|  | |r^t| dg} dg|  |  fdd}t	j
|  |  || dS )Nr   r'   c                   s@   t | } t|   }t| dkr0|g} n||  < | S r]  )r]   rH   indirect_indexingr   )r  Z
gather_idxr   index_loaderr   r  rY   rZ   rj   1  s    zgather.<locals>.fnr  )r_   r9   	get_numelr
  r   r   r  r   r  r6   r   ra   r   )r   r   r   Zsparse_gradr  rj   rY   r  rZ   gather  s$    	r  c                   s   |rt tjj| ||||S |r$J t| ts2J t|ts@J dt| v sTJ |  |  t	|
 | 
 g |
 dd   fdd}tj|  |  |dS )Nr   r'   c                   s\   t | t ks"J |  d  | d  }t|d gg | d   }|S )Nz != r   )r   rH   r  )r  Z	var_indexZ
weight_idxindices_loaderZindices_ndimr  Zweight_loaderZweight_sizerY   rZ   rj   T  s    "
zembedding.<locals>.fnr  )rH  rR  	embeddingrK  r_   r9   r  r   r  r   r   r6   r   ra   )Zweightrk  Zpadding_idxZscale_grad_by_freqsparserj   rY   r  rZ   r  B  s(    

r  c                   s   t dd  D s*J ddd  D  tdd  D rDtddd t D }t|d	ksjJ d
d gt  }t|t fdd|D  D ]$\}}| |krtd|||< q||fS )Nc                 s  s2   | ]*}|d ur|  tjtjtjtjfv V  qd S r   )r   rl   r  re  r   r  r   rY   rY   rZ   r   e  s   z.check_and_broadcast_indices.<locals>.<genexpr>z)indices must be int64, byte or bool. Got c                 S  s   g | ]}|d ur|  qS r   r   r   rY   rY   rZ   r   j  r   z/check_and_broadcast_indices.<locals>.<listcomp>c                 s  s*   | ]"}|d ur|  tjtjfv V  qd S r   )r   rl   r   r  r   rY   rY   rZ   r   l  s   zFallback for bool indicesc                 S  s   g | ]\}}t |tr|qS rY   r   r   rY   rY   rZ   r   q  r   r   z"requires at least 1 non-None indexc                   s   g | ]} | qS rY   rY   r   rk  rY   rZ   r   t  r   z.Fallback when indices is on a different device)r   r   ry   r^   r   r   r   ra   )rk  rg   Z
valid_idxsZnew_indicesrd   r   rY   r  rZ   check_and_broadcast_indicesd  s"    
$
r  c	              
     s   dt dd  D ]\}	}
|
|	 dkrdq
fddtD g 
tt d  d }r| nd |  |d    	f
dd}|fS )	NFr'   Tc                   s    g | ]\}}|d u r | qS r   rY   )r   rd   r{  r&  rY   rZ   r     r   z2index_output_size_and_inner_fn.<locals>.<listcomp>r   c           	   	     s  t | t ksJ t t ks(J t }g }d }rDdn|}d}td d D ]}||krp||7 }| d u r|t | k sJ || |  |d7 }q\| }|d usJ | }|tj|| |||  | d q\g || |d  }	d u r|S 	|S )Nr   r  r'   r   wrap_neg)r   r   rb   rH   r  )	r  r  	new_indexfirst_tensor_indexZstart_offsetZnext_idxrd   loaderr   )
r   indexed_sizerk  indices_loadersnon_consecutive_tensorsoutput_sizetensor_indicestensor_sizer   r  rY   rZ   rj     s>    

z*index_output_size_and_inner_fn.<locals>.fn)r   r^   r   )r&  rk  r(  r)  r%  r$  r  r   r   previouscurrentr"  rj   rY   )r   r$  rk  r%  r&  r'  r(  r)  r   r  r&  rZ   index_output_size_and_inner_fn~  s"     


"r,  c                 C  s,   t | ||\}}}tj|  |  ||dS r  )index_impl_helperr6   r   ra   r   )r   rk  r   r'  r  r   rY   rY   rZ   
index_impl  s    r.  c           
        s   t ttfsJ |  t|  \}t|dks@J ddd D }t|d   }|  fddttD }|rd|v rd|vrt	dfddttD }t
||||d ||d	\}  fd	d
}	||	 fS )Nr   z Must have at least one valid idxc                 S  s    g | ]}|d ur|  nd qS r   r  r   rY   rY   rZ   r     r   z%index_impl_helper.<locals>.<listcomp>c                   s    g | ]} | d ur| qS r   rY   r   )rk  r&  rY   rZ   r     r   z0index is out of bounds for dimension with size 0c                   s   g | ]} | qS rY   rY   r   r  rY   rZ   r     r   r  c                   s    | S r   rY   r  )index_inner_fnr  rY   rZ   r    s    z#index_impl_helper.<locals>.inner_fn)r_   r]   r   r  r  ra   r   r   r   
IndexErrorr,  )
r   rk  r   r   r(  r%  r)  r$  r'  r  rY   )r/  rk  r  r&  rZ   r-    s0    
r-  c                 C  sF   zt | |ddW S  ty@   |   ttjjdd| | Y S 0 d S )NTr   FrC  )r.  ry   r+  rH  rR  r   rn  r   rk  rY   rY   rZ   r     s    r   c                 C  s   t | |ddS )NFr1  )r.  r2  rY   rY   rZ   _unsafe_index  s    r3  c                 C  s   t t| |||dddS )NTFr   may_realizeindex_put_impl_r<  r   rk  r   
accumulaterY   rY   rZ   	index_put  s    r:  c                 C  s   t t| |||dddS )NFr4  r6  r8  rY   rY   rZ   _unsafe_index_put  s    r;  c                 C  sB   |  |   krt||   }|r,t| |}t| t|d || S r]  )ra   rX  r   r"  ri  )r  rk  r   r9  rY   rY   rZ   index_put_as_masked_fill  s
    
r<  c                 C  s   t tjjj| ||| | S r   )r*   ZIndexPutFallbackrI   rQ   rR   rU   r  rk  r   r9  rY   rY   rZ   index_put_fallback&  s    r>  c                 C  s   t | |||dddS )NTr4  r7  r=  rY   rY   rZ   
index_put_+  s    r@  c                 C  s   t | |||dddS )NFTr4  r?  r=  rY   rY   rZ   _unsafe_index_put_2  s    rA  c              
     s  |rBdd }dd  || |  v rBt fdd|D sB|  | dkrt|dkr|d  tjtjfv r|d }t	t|
 t| 
 D ]}t|d	}qt| |g||S t rt| |||S |D ]2}	|	d ur|	 tjtjfv rt| |||  S q| 
 t}
|rbt|  rb|
dkr<t| dg} t| |||} |
dkr^t| g } | S t||  }zt||  \}}W n" ty   t| ||| Y S 0 d
d |D }t| tsJ |   |
dkrt| dg} t||d  
 }fddt	t|D }t|||||d |d\}}t||}tj|  |  | |||r^dnd d}tjd t| |d}tj !||_"tj #| |
dkrt| g } | S )Nc                 S  sP   t | tjr| j} t | tjr&|  } t | tjr8| j} t | tjrL|  S d S r   )	r_   r*   r9   r`   r  r  r  ZBufferr  r   rY   rY   rZ   try_get_name<  s    z%index_put_impl_.<locals>.try_get_namec                 S  sd   t | tr`t | jtjr`| j } t | tjo^t | jtjo^t| jdd o^| jj	j
tjjjjkS dS )Nr  F)r_   r9   r`   r*   r  r  r  r  r   r  rU   rl   rH   rR  randpermrK  )indicerY   rY   rZ   indice_slice_from_randpermE  s    
z3index_put_impl_.<locals>.indice_slice_from_randpermc                 3  s   | ]} |V  qd S r   rY   )r   rD  )rE  rY   rZ   r   S  s   z"index_put_impl_.<locals>.<genexpr>r'   r   r  c                 S  s    g | ]}|d ur|  nd qS r   r  r   rY   rY   rZ   r     r   z#index_put_impl_.<locals>.<listcomp>c                   s   g | ]} | qS rY   rY   r   r  rY   rZ   r     r   r1  
atomic_addrg   r   r  r  output_indexerZscatter_moder   r  r`   )$r  r   r+  r  r   r   rl   r   r  r   r   r  r<  $are_deterministic_algorithms_enabledr>  rB   rS  r   r  ra   ry   r_   r9   r]   r,  r   r*   Scatterr  r  MutationLayoutSHOULDREMOVErI   rQ   r  r   r  )r  rk  r   r9  r   r5  rB  r  r   r   Zx_ndimr(  r%  r)  r$  Zexpected_vals_sizer  scatterr  rY   )rE  r&  rZ   r7  9  s    	









r7  rC  c                   sT   t | |ddd\}}  |   fdd}tj|  |  ||dS )NFr  c                   sB   j tjkr t tj}n }t| fddS )Nc                     s    S r   rY   rY   )_unsafe_index_fnr  self_loaderrY   rZ   r    r   z8_unsafe_masked_index.<locals>.inner_fn.<locals>.<lambda>)r   rl   r   rH   r   r  )r  Zmask_valrN  fillr  Zmask_loaderrO  r  rZ   r    s    z&_unsafe_masked_index.<locals>.inner_fnr  )r-  r  r6   r   ra   r   )r  r  rk  rQ  r  r   r  rY   rP  rZ   _unsafe_masked_index  s    rR  c                   s@   t ||d}|   fddtt D }t| ||ddS )Nr   c                   s6   g | ].} | r.t  | |  | d  ndqS r  r  r   rk  r  rY   rZ   r     s   z7_unsafe_masked_index_put_accumulate.<locals>.<listcomp>T)r9  )ri  r   r   r   r;  )r   r  rk  r   Zmasked_valueZclamped_indicesrY   rS  rZ   #_unsafe_masked_index_put_accumulate  s    
rT  c                 C  s   t |t || S r   )rH   r  r  r   minmaxrY   rY   rZ   r    s    r  c                 C  s$   t | }t||||}t|| |S r   )r<  r  copy_)r  r#  r   r  r  r   Zoutput_viewrY   rY   rZ   as_strided_scatter  s    
rY  r   c                 K  s   t t| |||fi |S r   )scatter_r<  )r   r   r   r#  r   rY   rY   rZ   rM    s    rM  rm  include_selfr  )rQ  r   rm  r]  c             	   C  sf   t |t}t| || ttj|r(| nt||r>| jnd|rbt	j
| ||||||d |S d S )Nznot implr\  )r_   r9   rG   r   r   rl   r   r  ra   r*   ZScatterFallback)rQ  r  r   r   r#  rm  r]  Zsrc_is_tensorrY   rY   rZ   scatter_fallback  s(    

	r^  rm  )r   rm  c                C  sr   |dv sJ |d u rHt tjtjjjj}t|| ||||d}|d urH|S |dkrVd}n|dkrbd}t	| ||||S )N)Nr   multiplyr_  r   sumr`  prod)
r   rR  r[  rI   rQ   rR   rU   Z_overloadnamer^  scatter_reduce_)r  r   r   r#  rm  rQ  fallback_resultrY   rY   rZ   r[    s    r[  c                 C  s   t t| |||S r   )scatter_add_r<  r   r   r   r#  rY   rY   rZ   scatter_add)  s    rg  c                 C  s   t | |||dS )Nra  )rc  rf  rY   rY   rZ   re  .  s    re  c                 K  s   t t| ||||fi |S r   )rc  r<  )r   r   r   r#  reduction_typer   rY   rY   rZ   scatter_reduce3  s    ri  )r]  )r   r]  c             	     s2  |dv sJ t tj dkr,dtj v s4J dttrHtttjj |||d}|rj|S tt	sxJ dt
| v sJ t  }|dkrtdgtt	rt  dkrtdgt|t	rt | dkrt|dg}| dkr
S t    | tt	r8 nd  fdd	}fd
d}	dd }
|stj  fdd| |d d}tjd t|d}tj||_tj| tj  |	| ||
|d}tjd t|d}tj||_tj| |dkr.tg S )N)Nra  rb  meanamaxaminr'   twozKaten.scatter_reduce_.two is not the unique overload of aten.scatter_reduce_r\  r   r   c                   sD     }t|}t| }tj| |dkr.dn|  dd| < |S )Nr   r'   F)r   )r   r   r]   rH   r  )r  r  r/  Zindirect_idx)r   r  r  rY   rZ   rH  f  s    
z'scatter_reduce_.<locals>.output_indexerc                   s    r| S t   S d S r   rH   r  r   r  )r  r#  r  rY   rZ   rj   p  s    zscatter_reduce_.<locals>.fnc                 S  s    | dkrdS | d u sJ d S d S )Nra  rF  rY   r_  rY   rY   rZ   backend_reduce_strw  s    z+scatter_reduce_.<locals>.backend_reduce_strc                   s   t d  S r]  rn  r  )r  rY   rZ   r    r   z!scatter_reduce_.<locals>.<lambda>rG  rI  )r   rR  rc  r   r_   r   r  r^  rm  r9   r  r   r   rS  r  r  r+  r  r*   rK  ra   r  rL  rI   rQ   r  r   r  )r  r   r   r#  rm  r]  rd  r/  rH  rj   ro  Zzero_outr  rM  rY   )r   r  r  r#  r  rZ   rc  8  s    







rc  ztuple[Optional[float], ...])scales_xnexactc           
        s   |    |  |   d  |  d   }dd D t|ksRJ |}dd t|D t|D ]\}}|d urrd| |< qr fddfdd}	tj|  | 	 |	g ||d	S )
Nc                 S  s   g | ]}t jj|qS rY   rq  r   rY   rY   rZ   r     r   z&upsample_nearestnd.<locals>.<listcomp>c                 S  s   g | ]\}}|| qS rY   rY   )r   rd   orY   rY   rZ   r     r   r  c                   s\   t | tj}  r(t | t dtj} t | t |tj} t | tj} t j	| |ddS )N      ?Fr1  )
rH   r  rl   r  r   r  r  r   re  r  )r   r  r   )rr  rY   rZ   scale_fn  s    z$upsample_nearestnd.<locals>.scale_fnc                   sB   |  d  }| d   }g |fddt | D S )Nc                   s   g | ]\}}} |||qS rY   rY   )r   rd   rw  r   )ru  rY   rZ   r     r   z2upsample_nearestnd.<locals>.fn.<locals>.<listcomp>)r   )r  r   r   )i_sizes
inv_scalesrq  ru  r  rY   rZ   rj     s
     zupsample_nearestnd.<locals>.fnr  )
realize_hintr  r   r   r   r^   r6   r   ra   r   )
r   r'  rp  rq  rr  batchZo_sizesrd   r  rj   rY   )rr  rv  rw  rq  ru  r  rZ   upsample_nearestnd  s&    
rz  zOptional[float])r  c                 C  s   t | ||fddS )Nr'   rq  rz  r   r'  r  rY   rY   rZ   upsample_nearest1d  s    r~  c                 C  s   t | ||fdddS )Nr'   Trq  rr  r|  r}  rY   rY   rZ   _upsample_nearest_exact1d  s    r  )scales_hscales_wc                 C  s   t | |||fddS )Nr%   r{  r|  r   r'  r  r  rY   rY   rZ   upsample_nearest2d  s    r  c                 C  s   t | |||fdddS )Nr%   Tr  r|  r  rY   rY   rZ   _upsample_nearest_exact2d  s    r  )scales_dr  r  c                 C  s   t | ||||fddS )Nr   r{  r|  r   r'  r  r  r  rY   rY   rZ   upsample_nearest3d  s    r  c                 C  s   t | ||||fdddS )Nr   Tr  r|  r  rY   rY   rZ   _upsample_nearest_exact3d  s    r  c                   s   t  fdd|D S )Nc                 3  s   | ]}t | V  qd S r   r  r   r   rY   rZ   r   	  r   z$_create_constants.<locals>.<genexpr>)r   )r   re   rY   r   rZ   r    s    r  c                   s:   |   |   fdd}tj|  |  |dS )Nc                   sF   t | } t| tksJ  D ]}| d | |  | |< q | S ra  )r]   r   )r  r   ru  rn  r  rY   rZ   r#    s
    zrev.<locals>.loaderr  )r  r   r6   r   ra   r   )r   ru  r#  rY   r  rZ   rev  s    r  zSequence[int])r   paddingr  rk   c                 C  s  dd }| rdS t |dks.t |  dkr2dS |   t| tjrt| jtjrt| jjtjszt	j
rt| jjtjr| jjjsdS |   t| \}}|j}|d dkrdS |d dks|d dks|d dkrdS |d }|dkrdS |d }|jd }	||	| k rdS | jjj}
|jd |jd | g}|tjj|
< t| ||j|j}t|d|	|	| d	}t|| td
 d  d7  < |S )z
    This optimization changes the semantics of padding from 'clone'
    style to 'view' style.

    Thanks to functionalization, this change can still maintain numerical
    correctness.
    c                  S  sL   t jj} | du rdS t| j}t|dkrH|d jtjj	tj
j	fv rHdS dS )a  
        Conservatively check if padding can be fused with downstream op.
        1. if the downstream op is a sum, then there is little benefit to
           do inplace padding
        2. if the downstream op is a matmul, doing inplace padding can
           save membw.
        NTr'   r   F)rI   rQ   rR   r   rS   r   rU   rR  mmrK  Zaddmm)rR   rS   rY   rY   rZ   _padding_can_be_fused-  s    
z6inplace_constant_pad_nd.<locals>._padding_can_be_fusedNr   r%   r'   r   r   )r   r  r  inductorinplace_padding)r   r   r+  r_   r*   r9   r`   r  r  r(   Zcan_inplace_pad_graph_inputZInputBufferr   Zfreeze_layoutr  r  r   rI   rQ   Zbuffer_to_padded_sizer  r  r  fill_r   )r   r  r  r  r   r  r  ZnpadZstride0ZrowsizeZbufnameZpadded_sizeZ	resized_xZsliced_xrY   rY   rZ   inplace_constant_pad_nd"  s\    

$


r  c              	     sv  t |d dksJ tdd |D r.t| S tjrHt| |}|rH|S |  }tttt	|d d d |dd d  t |t   g  D ] \}}
tjj||f qt|d  }g t	 |d  D ].\\}}	}

|
 |
t|
| |	  qt |t |ks J t|   fddfdd	}|  tj|  |  ||d
S )Nr%   r   c                 s  s   | ]}|d kV  qdS r   NrY   r   rk  rY   rY   rZ   r     r   z"constant_pad_nd.<locals>.<genexpr>r'   c                   s~   g }t  d  D ]>\}\}}}|dkr>|t|d |dkr|t|| qttj|}t| fddS )Nr   c                     s    S r   rY   rY   )r   r  rY   rZ   r    r   z/constant_pad_nd.<locals>.mask.<locals>.<lambda>)	r   rb   range_mask_lowrange_mask_highr   rm  rH   r  r  )r   r  r  r  r  r  )boundsr  
mask_sizesrq  r  r  rZ   r    s    "zconstant_pad_nd.<locals>.maskc                   sZ   t | d  }t| d   D ]\}\}}|||  q"t|t| ksRJ |S r   )r]   r   rb   r   )r   r!  r  r  Z_high)bounds_precompr  rq  rY   rZ   	offset_fn  s
    z"constant_pad_nd.<locals>.offset_fnr  )r   r   r<  r(   r  r  r   r]   r   r   rb   rI   rQ   r   Zlookup_precomputed_sizer   r   r   r   r  r6   r   ra   )r   r  r  rc   rn  lhr'  r  r  r   r  rY   )r  r  r  r  r  rq  r  rZ   r    s:    *

r  z
sympy.ExprzUnion[sympy.Expr, int]rd   r  c                 C  s&   t t | tjt t|tjS r   )rH   r  r  rl   r  r   r  r  rY   rY   rZ   r    s    r  rd   r  c                 C  s    t t | tjt |tjS r   )rH   r  r  rl   r  r  rY   rY   rZ   r    s    r  rd   r  r  c                 C  s   t t| |t| |S r   )rH   r  r  r  r  rY   rY   rZ   
range_mask  s    r  r  c                   sF       d   p&dg   fdd}|S )Nr   c                   s|   | d   |  d   t tj fddtD }rbt| fddS t| 	fddS )Nc                   s.   g | ]&}t |  | |  |  qS rY   )r  r   )r  ih	padding_hrY   rZ   r     r   z=constant_boundary_condition.<locals>.load.<locals>.<listcomp>c                     s   t  dg S )NrZ  )constant_boundary_conditionrY   )r   r  pad_fill_valueprefixr   rY   rZ   r    s   
z;constant_boundary_condition.<locals>.load.<locals>.<lambda>c                     s   g  S r   rY   rY   )r  r  r  rY   rZ   r    r   )r   rm  rH   r  r   r  )r   r  r   r  r  r  r  r  r   r  )r  r  rZ   r    s    z)constant_boundary_condition.<locals>.load)r   r  )r   r  r  r  r   r  rY   r  rZ   r    s
    r  dilationc          	      C  s$  |d u rdgt | }t| d||   || || d   || d  || }|rt| d||   || || d   d|| d   || }tjj|d ||  |  ||  dkr|d8 }tjjd|||  |  ||   tjj|| dkrtjj|| d}n|}||fS )Nr'   r%   r   F)r   r"   rI   rQ   r   r  r-  r   )	r   rd   kernel_sizer  r  	ceil_moder  Zx_outZx_altrY   rY   rZ   pooling_size  s2    .
*$r  c                C  s    t | |} ttj| }|dkS )N   )rC   r   rm  r  r  )r  n_dimwindow_sizerY   rY   rZ   %should_fallback_max_pool_with_indices
  s    
r  assert_fallbackc                C  s   |dkrdg| }|dkr$dg| }|s,|}t ||}t ||}t ||}t ||}t| tsbJ t||ksrJ t||ksJ t||ksJ t||ksJ t|  |d |d fv sJ t||d}|d ur||ksJ |||||fS )Nr   r'   r%   r  )rC   r_   r9   r   r   r  )r   r  r  r  r  r  r  Zuse_fallbackrY   rY   rZ   max_pool_checks  s(    





 r  c             
     sR  |    | jd   }| j d  t fddtD  \} | j}	|	tju rddn|	jrrtdn
t	|	j
}
t|t| }tst stdd D rt| |
dn|  fdd	}tjd
| |  |	|	||d}tjd| |  tj|	||d}t|jjtr2|  t|jjtrJ|  ||fS )Nc                   s&   g | ]}t | | d qS )r  r  rs  )r  dhwr  r  r  r  rY   rZ   r   <  s   z*_max_pool_with_offsets.<locals>.<listcomp>F-infc                 s  s   | ]}|d kV  qdS r  rY   rs  rY   rY   rZ   r   L  r   z)_max_pool_with_offsets.<locals>.<genexpr>rZ  c                   sJ   | d   }|  d    fddt D }g ||S )Nc                   s4   g | ],} | |  | |   |  qS rY   rY   r   bhr  r  reduction_idxr  rY   rZ   r   T  s   z<_max_pool_with_offsets.<locals>.fn_inner.<locals>.<listcomp>r   )r  r  r  r  )r  r  r  r  r  r  r  rZ   fn_innerQ  s    z(_max_pool_with_offsets.<locals>.fn_innerrW  rh  
input_noderg   	dst_dtyper8  r  r  reduction_rangesargmax)rx  r  r   r   r   rl   r   rN  r   rP  rV  r]   r   r  r  r7   r   ra   r  r_   r`   r+  )r   r  r  r  r  r  r  ry  Zdhw_outr   Z	min_valuer  r  r  offsetsrY   )r  r  r  r  r  r  r  r  rZ   _max_pool_with_offsets-  sV    
	"	

r  c           
   
   C  s   t |}t| |||||dd\}}}}}tjdd: t| ||||||d\}}	|t|	tjfW  d    S 1 st0    Y  d S )NFr  r  unroll_reductions_thresholdr  )r   r  r(   r   r  r   rl   r  )
r   r  r  r  r  r  r  r   r  r  rY   rY   rZ   !_low_memory_max_pool_with_offsetsx  s*    	

	r  z"Sequence[Union[int, torch.SymInt]]zxCallable[[Sequence[Union[int, torch.SymInt]], Sequence[Union[int, torch.SymInt]]], torch._inductor.virtualized.OpsValue])r  r  
input_sizeincrements_to_indexrk   c                   sZ   t |  tttj fdd}tj	| 
 tj||  d}|S )Nc                   sJ   | }t |}t|} | |}t t| d  tjS r   )rH   r  r)   Z_flattened_index_to_ndr  Z_flatten_indexrl   r  )r  r  Zoffset_sympyr  Zidhwr  r  r  r  Zoffsets_loaderr  rY   rZ   offsets_to_indices  s    
z4_pool_offsets_to_indices.<locals>.offsets_to_indicesr  )r   r  r   r5  r   rm  r  r  r6   r   ra   rl   r  r   )r  r  r  r  r  rk  rY   r  rZ   _pool_offsets_to_indices  s    		r  c                   s(   t | fdd}t| |||S )Nc                   s,   |  d    fddt D S )Nc                   s4   g | ],} | |  | |   |  qS rY   rY   r   r  rY   rZ   r     s   zX_low_memory_max_pool_offsets_to_indices.<locals>.increments_to_index.<locals>.<listcomp>r  r  r  r  r  r  r  r  rZ   r    s    zD_low_memory_max_pool_offsets_to_indices.<locals>.increments_to_index)r   r  )r  r  r  r  r  r  r  rY   r  rZ   '_low_memory_max_pool_offsets_to_indices  s
    r  c              	   C  s^   t | |||||d\}}}}}t| ||||||d\}}	t|	|| j| d  |||}
||
fS )Nr  )r  r  r  r  )r   r  r  r  r  r  r  r   rc   r  rk  rY   rY   rZ   _max_pool_with_indices  s    	
	r  c              	   C  s   t | |||||ddS Nr%   r  r  r   r  r  r  r  r  rY   rY   rZ   max_pool2d_with_indices  s    	r  c              	   C  s   t | |||||ddS Nr   r  r  r  rY   rY   rZ   max_pool3d_with_indices  s    	r  c                   s\  dkrddg|dkr ddg}s(t |ts6J tdksFJ tdksVJ tdksfJ t|dksvJ t| dv sJ |   |  }t |trt |jjtr|jj}	tj	d tj
|	 |	 |	 d|	d}
|
  |
 }n| }|d ur|d dkp(|d uo(|d dk}tdd |D rTt| ||||S | ^ }}
|  ^ }| |   t| }tfd	dtd d D tfd
dtd d D 		 }|dkr t| ||||S |  	
fdd}tj|  |  ||d}|rTtj|S |S d S )Nr   r'   r%   r  )rg   r   r   rI  c                 s  s   | ]}|d kV  qdS r  rY   rs  rY   rY   rZ   r   9  r   z3max_pool2d_with_indices_backward.<locals>.<genexpr>c                 3  s:   | ]2}t |d   t d | d   d    dV  qdS r   r'   NrW  r   r  r  r  rY   rZ   r   F  s   c                 3  s:   | ]2}t |d   t d| d   d    d V  qdS r'   r   Nr  r   wr  rY   rZ   r   J  s   r  c                   s^  | ^ }}}t |
 | tj}|d  }|d  }t t|d  d  d tj}t t|d  d  d tj}t t|d d tj}t t|d d tj}t |t dtj}t |t dtj}t |t tj}t |t tj}d }	tD ]"}
t	D ]}t 	|t |
tj}t 	|t |tj}g |t j
t |t |t dtjd ddt j
t |t |t dtjd dd}|} |}t ||}|	d u r
t ||t dtj}	n:t t t ||t |||}t |t 	|	||	}	q4q&|	d usZJ |	S )Nr   r'   Fr1  r  r  )rH   r  rl   re  r"   r  r  r  r   r   r  r  r  ri  r  r  r  )r  r  r  r  Z
index_testphstartpwstartphendpwendgradientph_pw_phpwZ
grad_indexZindex_actualZ	grad_partr   r  grad_loaderh_window_sizer  Zindices_sizer  r  pooled_heightpooled_widthr  w_window_sizewidthrY   rZ   rj   Y  sj      


z,max_pool2d_with_indices_backward.<locals>.fnr  )r_   r9   r   r   rx  r  r`   r6   r*   r  re  ra   r   Zdecide_layoutr  r   )fallback_max_pool2d_with_indices_backwardr  r]   rW  r   r   r  r  )grad_outputr   r  r  r  r  r  rk  Z	gO_strider`   Zx_bufferZx_strideZis_channels_last_batchZ_heightr   r  r  rj   rc   rY   r  rZ    max_pool2d_with_indices_backward  s|    	

 ;r  r  c                   s   |    fdd}|S )Nc              
     s   |\|\ |\}}t t t   tjt |tjt t  tjt |tj}t | fddS )Nc                     s   g    S r   rY   rY   )h_start_indexr  iwr  w_start_indexr  rY   rZ   r    r   z3pad_adaptive_loader.<locals>.load.<locals>.<lambda>)rH   r  r  r  rl   r  r  )r  Z
incrementsZstart_indicesZend_indicesh_end_indexw_end_indexr  pad_valr  )r  r  r  r  r  rZ   r    s$    z!pad_adaptive_loader.<locals>.loadr  )r   r  r  rY   r  rZ   pad_adaptive_loader  s    r  c           
      C  sL   t j| ||d}t j|||d}t j| ||d}t j|||d}	||||	fS )N)out_diminp_dim)r   r   )
start_index	end_indexh_inw_inh_outw_outr  r  r  r  rY   rY   rZ    compute_indices_adaptive_pooling  s
    r  c                   sD   |\}}|\}}	t | |||||	\  fdd}
|
S )Nc                   s   | ^ }}}|} |}|}|}d }	t td td D ]:\}
}|||
|g||g||g}|	d u r||}	qL||	}	qL|	S rc  )r   productr   )r  r#  r  r  bwr  r  r  r  r  r  r  r{  h_end_index_fnh_start_index_fnkernel_maxes
pooling_fnw_end_index_fnw_start_index_fnrY   rZ   rj     s"    $z _adaptive_pooling_fn.<locals>.fnr  )r  r  r  in_sizes	out_sizesr	  r  r  r   r  rj   rY   r  rZ   _adaptive_pooling_fn  s    r  c           
        sF   |\}|\}}t | ||||\  fdd}	|	S )Nc                   s   | ^ }}}|} |}|}|}d }	d }
t td td D ]|\}}||||g||g||g}t||  | | tj}|
d u r|}
ntt||	||
}
|	d u r|}	qP||	}	qP|
S rc  )	r   r  r   rH   r  rl   r  ri  gt)r  r#  r  r  r  r  r  r  r  maxvalZmaxindexr  r  r{  r   r  r  r  r	  r
  r  r  rY   rZ   rj     s0    $z)_adaptive_pooling_fn_with_idx.<locals>.fnr  )
r  r  r  r  r  r	  r  r   r  rj   rY   r  rZ   _adaptive_pooling_fn_with_idx  s    #r  c                   s    tjkrtdtts$J t|dks4J    ^ }}}t	j
j|}t	j
j|}|\}}||kr||krtS |dks|dkrg |||}t|   dS || dkr|| dkr|| || g}t|S t|| d |}	t|| d |}
t|||g }  }|	|
 }|dkrHt|S dd }d	d
 }t|||	|
g||g||gtjd tt fdd}tj |||d}|S )Nz0'adaptive_avg_pool2d' not implemented for 'Long'r%   r   r  r'   r  c                 S  s   t | | |S r   r"   r   r  r  rY   rY   rZ   r  O  s    z)_adaptive_avg_pool2d.<locals>.start_indexc                 S  s   t | d | | d |S ra  r  r  rY   rY   rZ   r  R  s    z'_adaptive_avg_pool2d.<locals>.end_indexr  r  r  r  r  r	  c                   s   t  | t | S r   )rH   truedivr  r  fn_sumZones_loaderr   rY   rZ   rj   `  s    z _adaptive_avg_pool2d.<locals>.fnr  )r   rl   r  r[  r_   r9   r   rx  r   rI   rQ   r   rr  r<  r  ra   
avg_pool2dr<   r]   fallback_adaptive_avg_pool2dr  rH   r   r  	ones_liker6   r   )r   r'  ry  r  r  r   r  o_sizer  h_kernel_maxw_kernel_maxr  r   r  r  r  rj   rvrY   r  rZ   _adaptive_avg_pool2d)  sV    


	r!  c                   s    tjkrtdtts$J t|dks4J    ^ }}}t	j
j|}t	j
j|}|\}}|dks|dkrg |||}t|   dt|tj dfS || dkr|| dkrtt|| d |}t|| d |}	t|||g }
  }||	 }|dkr.t|S dd }d	d
 }t||||	g||g||gtjdt||||	g||g||gtjd fdd} fdd}tj |||
d}tj tj||
d}||fS )Nz,adaptive_max_pool2d not implemented for Longr%   r   r  r'   r  c                 S  s   t | | |S r   r  r  rY   rY   rZ   r    s    z(adaptive_max_pool2d.<locals>.start_indexc                 S  s   t | d | | d |S ra  r  r  rY   rY   rZ   r    s    z&adaptive_max_pool2d.<locals>.end_indexr  c                   s    | t tdS Nr  r  r   r  )inner_func_max_valr   rY   rZ   inner_fn_max_val  s    z-adaptive_max_pool2d.<locals>.inner_fn_max_valc                   s    | t tdS r"  r#  r  )inner_func_max_idxr   rY   rZ   inner_fn_max_idx  s    z-adaptive_max_pool2d.<locals>.inner_fn_max_idxr  )r   rl   r  r[  r_   r9   r   rx  r   rI   rQ   r   rr  r  ra   
ValueErrorr<   r]   fallback_adaptive_max_pool2dr  rH   r  r  r6   r   )r   r'  ry  r  r  r   r  r  r  r  r  r   r  r  r  r%  r'  r   rirY   )r&  r$  r   rZ   adaptive_max_pool2dt  sp    

		r+  c                   s<            fdd}|S )Nc           	        s   g | d   }t | }t  tj}t d tj}t t |tjt |tj}t t 	|dd|}t 
|| | t 
||  }t |tj}t ||}t t |||tS )Nr'   r   )rH   r  r   rl   r  r  r   float64ri  r  r  r  r  r   r5  )	r  rd   sampleZi_exprdiffZout_sz_exprr  Zseq_ir  r   in_sz	kernel_szndimsout_szsamplesZsamples_loaderrY   rZ   r    s     z)_fractional_pooling_offsets.<locals>.loadr  )r4  r0  r3  r1  r   r2  r  rY   r/  rZ   _fractional_pooling_offsets  s    r5  c                 C  s   t | |||ddS r  _fractional_max_poolr   r  r'  random_samplesrY   rY   rZ   fractional_max_pool2d  s    r:  c                 C  s   t | |||ddS r  r6  r8  rY   rY   rZ   fractional_max_pool3d  s    r;  c                   s<  |    | jd   | j d   }tjdd fddtD  |  fdd} fddt|t }|  }tj	d	| | 
 ||||d
}	tj	d| | 
 tj|||d
}
t|	jjtr|	  t|
jjtr |
  t|
| j}|	|fW  d    S 1 s.0    Y  d S )Nr  r  c              
     s    g | ]}t  |d qS ))r4  r0  r3  r1  r2  r   )r5  rs  )inp_dhwr  r  r'  r9  rY   rZ   r     s   	z(_fractional_max_pool.<locals>.<listcomp>c                   s$   | d   }g | | |S r   rY   )r  r  r  )r  r  r  rY   rZ   r    s    z&_fractional_max_pool.<locals>.fn_innerc                   s8   | d   |  d    fddt D S )Nc                   s&   g | ]}|  | |  qS rY   rY   rs  )bdhwdhw_index_fnr  r  rY   rZ   r     s   zE_fractional_max_pool.<locals>.increments_to_index.<locals>.<listcomp>r  r  )r>  r  )r=  r  r  rZ   r    s
    z1_fractional_max_pool.<locals>.increments_to_indexrW  r  r  )rx  r  r(   r   r   r  r]   r   r7   r   ra   rl   r  r_   r`   r+  r  )r   r  r'  r9  r  ry  r  r  r   r  r  rk  rY   )r>  r  r<  r  r  r'  r9  r  rZ   r7    sN    "	


r7  c                   s       ^ }}}tjj|}tjj|}|^ }}}	|| dkrr||	 dkrrt|| ||	 gddS t||}
t||	}dd fdd}t||
|g||g||	gt	j
d  fd	d
}tj  |t|d}|S )Nr   r'   )divisor_overridec                 S  s   t | | t|S r   )r!   r   r5  r  rY   rY   rZ   r  ;  s    z0upsample_nearest2d_backward.<locals>.start_indexc                   s    | d ||S ra  rY   r  )r  rY   rZ   r  >  s    z.upsample_nearest2d_backward.<locals>.end_indexr  c                   s    | t S r   )r  r  )r  r   rY   rZ   rj   J  s    z'upsample_nearest2d_backward.<locals>.fnr  )rx  r   rI   rQ   r   rr  r  r<   r  rH   r   r6   r   ra   r   r]   )r   r'  r  r  r  r  Zinp_hZinp_wZout_hZout_wr  r  r  rj   r   rY   )r  r  r   rZ   upsample_nearest2d_backward)  s6    

	r@  rY   c              
   C  s   t | ||||||ddS )Nr%   rZ  _avg_poolndr   r  r  r  r  count_include_padr?  rY   rY   rZ   r  _  s    
r  c              
   C  s   t | ||||||ddS )Nr   rZ  rA  rC  rY   rY   rZ   
avg_pool3du  s    
rE  c              	     s"  
s
sdg t t 

t t| tsBJ tksRJ t
ksbJ tksrJ t|  d d fv sJ |   |  d   }|   d  t 
fddtD  \}	}
tst|
r
t	| ddd}n| 
 d	}t|t|	 }|  ttj}|d
krdkrVt}ndkrft}ntd || 
 |S 
fdd|r|r|r|n|jrd 		fdd}nfdd}n
f	dd}tj|  ||d}|S )Nr   r'   r%   c              	     s"   g | ]}t | | qS rY   r  r   )r  r  r  r  r  rY   rZ   r     s   z_avg_poolnd.<locals>.<listcomp>r  rZ  TFr  r   zUnknown dim: c                   s   | d   }|  d   d }t jfddtD  D ]J fddtD }|g ||}|d u rz|}q<t||}q<|S )Nc                   s   g | ]}t  | qS rY   r  r   )r  rY   rZ   r     r   z/_avg_poolnd.<locals>.fn_sum.<locals>.<listcomp>c                   s,   g | ]$} | |  |  |  qS rY   rY   r   )r   r  r  r  rY   rZ   r     r   )r   r  r   rH   r   )r  r#  r  totalr   r{  )r   r  r  r  )r   r  rZ   r    s     z_avg_poolnd.<locals>.fn_sumc                   s   t | t  S r   )rH   r  r  r  )r   r  r  r  rY   rZ   rj     s    z_avg_poolnd.<locals>.fnc                   s   t | t  S r   )rH   truncdivr  r  )divisorr   r  r  rY   rZ   rj     s    c                   s   |  d  }g }t D ]x}|| |  |  }t||  | |  } svt|d}t|| }t|| tj}|| qt	
tj|}jrt| |S t| |S r]  )r   r   ZMinMaxrH   r  rl   re  rb   r   rm  r  rN  r  rG  )r  r  Zdivide_factorsrd   hstarthendZfactordivide_factor)	rD  r   r   r  r  r  r  r  r  rY   rZ   rj     s     r  )rC   r_   r9   r   r   rx  r   r   r   r  r  r]   r   r   rm  r  r  fallback_avg_pool2dfallback_avg_pool3dr(  rN  r6   r   ra   )r   r  r  r  r  rD  r?  r   ry  r   Z
ceil_modeshad_paddingr  r  fallbackrj   r   rY   )r  rD  r   rH  r   r  r  r  r  r  r  r  rZ   rB    st    




 



rB  c                   s  d u sdksJ ds s,ddgt | ts:J t |tsHJ tdksXJ tdkshJ tdksxJ t| dv sJ |   | ^ }td|\}	}
td|\}}|  d pd p|
p||  ^ }	
t| }| }t	fddt
d d D t	fddt
d d D  }|d	krt| ||S fd
d  	
fdd}tj|  |||d}|S )Nr   divisor must be not zeror%   r  r'   c                 3  s:   | ]2}t |d   t d | d   d    dV  qdS r  r  r  r  rY   rZ   r   3  s   z&avg_pool2d_backward.<locals>.<genexpr>c                 3  s:   | ]2}t |d   t d| d   d    d V  qdS r  r  r  r  rY   rZ   r   7  s   r  c              	     sX  t d tj}t d tj}t d tj}t d tj}t d tj}t d tj}t t | ||}t t |||}	t t ||t t  tj|}
t t |	|t t tj|}t 	|t dtj}t 	|	t dtj}	t |
t  tj}
t |t tj}t t |
|t ||	}|S )z{
        This computes the scaling factor that we will divide an element
        by when `count_include_pad=False`
        r   r'   )
rH   r  rl   re  r  r  r  r   r  r  )r  r  stride_hstride_wpad_hpad_wkernel_hkernel_wrJ  wstartrK  wendrL  )heightr  r  r  r  rY   rZ   !compute_pool_size_without_paddingJ  s,    

z>avg_pool2d_backward.<locals>.compute_pool_size_without_paddingc                   s`  | ^ }}}|d  }|d  }t t|d  
d  
d tj}t t|d  
d  
d tj}t t|
d d tj}t t|
d d tj}t |t dtj}t |t dtj}t |t tj}t |t 	tj}d }tD ]:}	tD ](}
t 	|t |	tj}t 	|t |
tj}d ur`}n(sls~d d  }n
 ||}t 
g |t jt |t |t dtjddt jt |t |t dtj	dd|}t t ||t ||}|d u r0t ||t dtj}nt |t 	|||}qq|d us\J |S )Nr   r'   Fr1  r  )rH   r  r"   rl   re  r  r  r  r   r   r  r  r  r  r  ri  r  )r  r  r  r  r  r  r  r  r  r  r  r  r  r  partr  )r[  rD  r?  r  r  rO  r  r  r  r  r  r  rY   rZ   rj   f  st      

	


zavg_pool2d_backward.<locals>.fnr  )r_   r9   r   r   rx  r  r  r]   r   rW  r   fallback_avg_pool2d_backwardr6   r   ra   )r  r   r  r  r  r  rD  r?  r   _h_outZ
ceil_mode1_w_outZ
ceil_mode2r  r   r  rj   r   rY   )r[  rD  r?  r  r  rO  rZ  r  r  r  r  r  r  r  rZ   avg_pool2d_backward
  sb    
"Ar`  c                   s  d u sdksJ ds 	
s,g d
t | ts:J t |tsHJ t	dksXJ tdkshJ t
dksxJ t| dv sJ |   | ^ }td	
|\}	}
td	
|\}}td	
|\}}|  t
p|
p|p||  ^ }t| }|	 }	fdd	t
dD \  }|d
krzt| |	
|S 	
fdd  	
fdd}tj|  |||d}|S )Nr   rQ  )r   r   r   r   )r   r   r'   r%   c                 3  s4   | ], t  fd dt  d D V  qdS )c                 3  s:   | ]2}t |   t d |       dV  qdS r  r  rs  )rd   r  r  rY   rZ   r     s   z0avg_pool3d_backward.<locals>.<genexpr>.<genexpr>r%   N)rW  r   )r   r  )rd   rZ   r     s   z&avg_pool3d_backward.<locals>.<genexpr>}   c              	     s  dd D \}}}dd D \}}}dd D \}	}
}dd t | ||g|||g|||gD \}}}dd t |||g|	|
|g g|||gD \}}}dd |||fD \}}}dd t |||g gD \}}}ttt||t||t||}|S )	Nc                 s  s   | ]}t |tjV  qd S r   rH   r  rl   re  r  rY   rY   rZ   r     r   zQavg_pool3d_backward.<locals>.compute_pool_size_without_padding.<locals>.<genexpr>c                 s  s   | ]}t |tjV  qd S r   rb  r  rY   rY   rZ   r     r   c                 s  s   | ]}t |tjV  qd S r   rb  r   rY   rY   rZ   r     s   c                 s  s(   | ] \}}}t t |||V  qd S r   )rH   r  r  )r   rk  rw  padrY   rY   rZ   r     s   c              
   s  s<   | ]4\}}}}t t ||t t |tj|V  qd S r   )rH   r  r   r  rl   re  )r   r  r   r   rc  rY   rY   rZ   r     s   
c                 s  s$   | ]}t |t d tjV  qdS r  rH   r  r  rl   re  )r   r  rY   rY   rZ   r     s   c                 s  s(   | ] \}}t |t |tjV  qd S r   rH   r  r  rl   re  )r   r  r   rY   rY   rZ   r     s   )r   rH   r  r  )pdr  r  Zstride_drR  rS  Zpad_drT  rU  Zkernel_drV  rW  ZdstartrJ  rX  ZdendrK  rY  rL  )depthrZ  r  r  r  r  rY   rZ   r[    s8    $z>avg_pool3d_backward.<locals>.compute_pool_size_without_paddingc                   sV  | ^ }}}}dd t |||gD \}}}dd t |||gD \}}}dd t |||gD \}}	}
dd |||fD \}}}dd t ||	|
g	
gD \}}	}
d }tD ]}tD ]t}tD ]d}dd t |||g|||gD \}}}d ur}n2s s:d d	  d
  }n |||}tg |tjt|t|td	tj		ddtjt|t|	td	tj	
ddtjt|t|
td	tj	dd|}t
t
t||t||	t||
}|d u r(t||tdtj}qt|t|||}qqq|d usRJ |S )Nc                 s  s   | ]\}}|| V  qd S r   rY   )r   r   rc  rY   rY   rZ   r     r   z2avg_pool3d_backward.<locals>.fn.<locals>.<genexpr>c                 s  s0   | ](\}}}t t|| | |tjV  qd S r   rH   r  r"   rl   re  )r   r   r   rw  rY   rY   rZ   r   !  s   c                 s  s*   | ]"\}}t t||d  tjV  qdS r  rh  )r   r   rw  rY   rY   rZ   r   &  s   c                 s  s$   | ]}t |t d tjV  qdS r  rd  )r   pstartrY   rY   rZ   r   +  s   c                 s  s(   | ] \}}t |t |tjV  qd S r   re  )r   ZpendZ
pooled_dimrY   rY   rZ   r   /  s   c                 s  s(   | ] \}}t |t |tjV  qd S r   )rH   r   r  rl   re  )r   ri  Zp_rY   rY   rZ   r   ;  s   r   r'   r%   Fr1  r  )r   r   rH   r  r  r  r  r  rl   re  r  r  ri  r  r   )r  r  rt  r  r  Zpdstartr  r  Zpdendr  r  r  Zpd_r  r  rf  r  r  r  r\  r  )r[  rD  d_window_sizer?  r  r  rO  r  r  pooled_depthr  r  r  r  rY   rZ   rj     s     
	

zavg_pool3d_backward.<locals>.fnr  )r_   r9   r   r   rx  r  r  r   r]   r   r   fallback_avg_pool3d_backwardr6   r   ra   )r  r   r  r  r  r  rD  r?  r  Z_d_outZceil_mode_dr^  Zceil_mode_hr_  Zceil_mode_wr   r  r   r  rj   r   rY   )r[  rD  rj  rg  r?  r  r  rO  rZ  r  r  rk  r  r  r  r  r  rZ   avg_pool3d_backward  sf    
%&Wrm  c                 C  s   |   }t|tr|g}n|s*tt|}t|dkrTt|dv sPJ d| g S t|}tt|D ]j}|| dk r||  t|rt|nd7  < d||   krt|k shn t|dkr|| dkshJ qhtt|t|ksJ d|S )Nr   )rY   r  r  zinvalid axis: r'   zreduction axis not unique)r   r_   r   r   r   r   r]   r    )r   r  r   rd   rY   rY   rZ   _validate_reduction_axis}  s    
 <rn  c          
        s   |d urt | |} |  tt t| |}g }g g }g ttD ]>}||v rn| ||  qH| ||  qH fdd}rt}	D ]}t	j
j|	|< qn|}	|   t|  |p|  |  ||	|dS )Nc                   s   t |t ksJ r>t  t ks,J  fddD  t  t ksRJ d gt  t |  }tt t|D ]\}}|||< q|S )Nc                   s   g | ]} | qS rY   rY   r   r  rY   rZ   r     r   z9_make_reduction_inner.<locals>.loader.<locals>.<listcomp>)r   r   r   r   )r   Zreduction_indexr!  r  varZinner_loaderkeepdimsZkept_idxZreduced_idxr   r  rZ   r#    s    
z%_make_reduction_inner.<locals>.loader)rg   r  r8  r  r  r  )r   r   r    r   rn  r   r   rb   r]   r   r   r   r  r   ra   r   )
r   r  rq  r   r  Z
kept_sizesZreduced_sizesrd   r#  r  rY   rp  rZ   _make_reduction_inner  s:    



rr  rJ   )rh  c                   s   dd d fdd}|S )NFr   c                  sB   t | ||| d}tjf | d|}t|jjtr>|  |S )Nr  rq  r   r  )rh  r  )rr  r7   r   r_   r`   r+  )r   r  rq  r   r   r  r  rh  rY   rZ   r%    s    zmake_reduction.<locals>.inner)NFrY   )rh  r  r%  rY   rt  rZ   make_reduction  s    ru  c                C  sB   |d urt | |} t| |}t|  |  f|  f|  |dS )N)rg   dtypes	inner_fnsr   r  )r   r  r   ra   r   r  r   )r   r  r   rY   rY   rZ   _make_scan_inner  s    

rx  c                  s   |d urt | |} |   t| |}|  }|tjtjfv rHt | tj} t| ||}t	 fdd|D }t
j||  |  d}t|t| }t t|||S )Nc                 3  s   | ]} | V  qd S r   rY   r   r   rY   rZ   r     r   zmean.<locals>.<genexpr>r   )r   r   rn  r   rl   r  r  r   sum_rF   r*   r0   ra   r/   r   r]   div)r   r  keepdimr   Zoutput_dtype
sum_resultdenomrY   ry  rZ   rj    s    

rj  c           
        s   |d u rd}|    t| |}t| |dd}|r8|  tt| |}t|||}t fdd|D }|r|t	|| d}t
j||  |  d}t|t|  }t||}	|s|	fS |r|nt||}|	|fS )Nr'   T)r|  c                 3  s   | ]} | V  qd S r   rY   r   ry  rY   rZ   r      r   z var_mean_sum_.<locals>.<genexpr>r   r   )r   rn  rj  r+  squarer  rz  rF   r   rI  r*   r0   r   ra   r/   r   r]   r{  rx  )
r   r  
correctionr|  return_meanZx_meanZdiffsr}  r~  Zx_varrY   ry  rZ   var_mean_sum_  s&    

r  c                 C  sV   t | |}t| ||d d d}|d }t|d }t|tjoTt|tjk oTt|dkS )Nrs  r  r  r'   )	rn  rr  rF   r_   r   r  r   r(   r  )r   r  r|  r   r  reduction_numelrY   rY   rZ   use_two_step_variance  s    


r  c                  s    d u rd t | ||d d d}|d}|d |d tjjf |fd|  d|\}}}	|  |  |  t| |}t	fdd	|D d
d  fdd}
t
|
|}|r|  ||fS |fS )Nr'   rs  r  r  r8  Zwelford_reduce)rw  rh  r   c                 3  s   | ]} | V  qd S r   rY   r   ry  rY   rZ   r   3  r   z$var_mean_welford_.<locals>.<genexpr>c                 S  s4   t | tjr(| js(tt| tj|S t	| |S r   )
r_   r   r   Z	is_numberrH   r   r  rl   r  r  r7  rY   rY   rZ   get_constant_or_index_expr5  s    z5var_mean_welford_.<locals>.get_constant_or_index_exprc                   s4    }}t d}| t |||  S r]  )rH   r  r  )r`   cNZzero)r  r   r  rnumelrY   rZ   ru  :  s    

z#var_mean_welford_.<locals>.scale_fn)rr  ry  r*   ZWelfordReductionr   r   r+  r   rn  rF   r&  )r   r  r  r|  r  r   r#  rj  m2r   ru  ro  rY   )r  r   r  r  r   rZ   var_mean_welford_  s6    




r  c                  s   |    t }t| |dd} t| ||||d}t| ||drLtf i |ntf i |}t fdd|D }|s||d S |S )NFrE  )r   r  r  r|  r  )r  r|  c                 3  s   | ]}t | d dV  qdS )FrE  Nr  r~   r  rY   rZ   r   X  r   z#var_mean_helper_.<locals>.<genexpr>r   )r   r   r   r   r  r  r  r   )r   r  r  r|  r  Zcompute_dtyper   r   rY   r  rZ   var_mean_helper_H  s     	r  )r  r|  c                C  s   t | |||ddS )NFr  r  r|  r  r  r   r  r  r|  rY   rY   rZ   var_\  s    
r  c                C  s   t | |||ddS )NTr  r  r  rY   rY   rZ   var_meanc  s    
r  c                 C  st   |dk rt t| | |S |dkr0td|S |dkr<| S t | |d |}t||}|d dkrpt|| }|S )Nr   r'   r%   )pow_recursiverH   r  r  r  )r   r   r   r  rY   rY   rZ   r  j  s    r  c                 C  s   t | |S r   )rH   powr   r   rY   rY   rZ   
pow_nativey  s    r  )r   c                   sd  t tr$tkr$t tS t tr>dkr>t S t trXdkrXt S tdd  fD }t|}t tod  k odk n  p|odk}|rވ   fdd	}t	j
    |  d
S t  tr dk rtdS  dkrt rtS |rZt  tr:t S t trPt S t S t S )Nrt  r'   c                 s  s"   | ]}t |tjr| V  qd S r   )r_   r*   r9   r   r~   rY   rY   rZ   r     r   zpow.<locals>.<genexpr>i    r   c                   s   t |   S r   )r  r   r  r   r   r#  rY   rZ   rj     s    zpow.<locals>.fnr  r%   )r_   r   r   r  sqrtr<  r  r   r  r6   r   ra   r   r   r   r  r   exp2fallback_pow_scalarfallback_pow_tensor_scalarfallback_pow_tensor_tensorr  )r   r   r   Zis_integer_powZembed_exponentrj   rY   r  rZ   r    s@    
"




r  c                 C  s   t | tr| j}n| }t |tr&|j}t |tjsftj|  |  |	 | 
 dj}t |tjsfJ t |tjr| s| st |jtjs|  |j|_| S tjj|||d | S )Nr  unsafe_alias)r_   r9   r`   r*   r  r6   r   ra   r   r  r   Zis_input_bufferZis_module_bufferZ	NopKernelr+  rL  Zrealize_into)changedr{  r  Zchanged_datarY   rY   rZ   r"    s6    

r"  c                 C  s   t | t| |S r   )r"  r  )r   r  rY   rY   rZ   r    s    r  c                 C  s@   | |u r| S t ||  }t||  }t||  }t| |S r   rX  ra   r   r   r   r   r"  )rL  r#  rW  rY   rY   rZ   rX    s    rX  c                 C  s   t | |S r   )rH   floordivr  rY   rY   rZ   r    s    r  c                 C  s   t | |S r   )rH   rG  r  rY   rY   rZ   rG    s    rG  c                 C  s   t | ot |}t| ot|}|dkrP|r4J d|rBt| |S tt| |S |dkr|rdJ d|rrt| |S tt| |S t| |S )Nr  z5floordiv operands can not be boolean at the same timer  z5truncdiv operands can not be boolean at the same time)r   r   r  r  r{  rG  r  )r   r   Zrounding_modeZboth_integerZboth_booleanrY   rY   rZ   div_mode  s    r  c                 C  s<   t | ot |}|rt| |S ttjj}t|| |S d S r   )r   logical_andr4   rR  r  rZ  r&  )r   r   Z	both_boolrj   rY   rY   rZ   r    s
    
r  r  zOptional[ir.Constant]r  c              	   C  s   t | tjrt| jS t | tjr.t|  S t | tjr>| S t | tjsNdS t	j
j|  }t|N ttjdd | j|   }W d   n1 s0    Y  W d   n1 s0    Y  t |t	j
jjsJ t |jtjr|jS dS )z:Try convert an arbitrary IR node into an ir.Constant valueNZallow_indexingT)r_   r*   Z
MutableBoxget_constant_valuer`   r  r  r   ZLoopsrl   Z	_inductorops_handlerZExtractConstantsHandlerra   rI   Zset_ops_handlerr   objectre  r  Zinner_fn_argsvirtualizedZOpsValuer   )r   r>  rc   rY   rY   rZ   r    s$    
Jr  c                 C  s|   t dd | |fD }|r$t| |S t| }d urf|jdkrRttd|j}n
d|j }t| |S dd }t|| |S )Nc                 s  s   | ]}t |pt|V  qd S r   )r   r   r~   rY   rY   rZ   r   0  r   zdiv_prim.<locals>.<genexpr>r   infr  c                  W  s
   t j|  S r   )rH   r  r   rY   rY   rZ   rj   =  s    zdiv_prim.<locals>.fn)	r   rG  r  r   mathcopysignr   r  r&  )r   r   is_integralrH  r  rj   rY   rY   rZ   div_prim.  s    



r  c                 C  s    t | |ftjd\} }t| |S rP  )r  r   INT_TO_FLOATr  r  rY   rY   rZ   r{  C  s    

r{  c                 C  s4   t | pt| }|rdd }ndd }t|| |S )Nc                 S  s   t | |S r   )rH   modr  rY   rY   rZ   rj   U  s    zfmod.<locals>.fnc                 S  s   t | |S r   )rH   fmodr  rY   rY   rZ   rj   Z  s    )r   r   r&  )r   r   r  rj   rY   rY   rZ   r  O  s
    
r  c                C  sB   t |  st|  r&|d u r&tj}td|d}|| |||dS )Nra  r;  r   r   r   r   rl   r  ru  r   r  rq  r   rj   rY   rY   rZ   rz  `  s    

rz  c                 C  s   t |  st|  r&|d u r&tj}t|  dkr\|dv sBJ |pL|  }t| |ddS dd }t| ||d}t	j
jf i |d|i\}|d u rt| ||d	S |S )
Nr   r   r  TrE  c                 S  s   | \}|\}t ||fS r   )rH   r   a_tupleb_tupler   r   rY   rY   rZ   
combine_fn~  s    zcumsum.<locals>.combine_fnr  r   r  r   r   )r   r   r   rl   r  r   r   r   rx  r*   Scanr   fallback_cumsumr   r  r   r  r   r  rY   rY   rZ   cumsumr  s"    

r  c                 C  s   t |  st|  r&|d u r&tj}t|  dkr\|dv sBJ |pL|  }t| |ddS dd }t| ||d}t	j
jf i |d|i\}|d u rt| ||d	S |S )
Nr   r  TrE  c                 S  s   | \}|\}t ||fS r   )rH   r  r  rY   rY   rZ   r    s    zcumprod.<locals>.combine_fnr  r  r  )r   r   r   rl   r  r   r   r   rx  r*   r  r   fallback_cumprodr  rY   rY   rZ   cumprod  s"    

r  c                 C  sv   dd }|   }t|  dkr4|dv s,J t| S t| ||d}tjjf i |d|i\}|d u rrt| |dS |S )Nc              	   S  s\   | \}|\}t ||}t ||}||kt | B }t |t t || | |fS r   )rH   r  r  r}  ri  log1pexp)r  r  r   r   Zmin_vZmax_vr  rY   rY   rZ   log_add_exp_helper  s    z(logcumsumexp.<locals>.log_add_exp_helperr   r  r  r  rZ  )	r   r   r   r<  rx  r*   r  r   fallback_logcumsumexp)r   r   r  r   r   r  rY   rY   rZ   logcumsumexp  s    r  c                   s   t |  dkr2 dv sJ t| t| tjdfS |  }tjd|dd}t	|  |d}|tjf|d< | 
  fd	d
f|d< tjjf i |d|i\}}|d u rt|  dS ||fS )Nr   r  r   r  Fr   Zarg_break_ties_leftr  rv  c                   s   t |   tjS r   rH   r  rl   r  r  r  rY   rZ   r    r   zcummax.<locals>.<lambda>rw  r  rZ  )r   r   r<  rG  rl   r  r   r*   get_reduction_combine_fnrx  r  r  r   fallback_cummaxr   r  r   r  r   r   rk  rY   r  rZ   cummax  s     
r  c                   s   t |  dkr2 dv sJ t| t| tjdfS |  }tjd|dd}t	|  |d}|tjf|d< | 
  fd	d
f|d< tjjf i |d|i\}}|d u rt|  dS ||fS )Nr   r  r   argminFr  r  rv  c                   s   t |   tjS r   r  r  r  rY   rZ   r    r   zcummin.<locals>.<lambda>rw  r  rZ  )r   r   r<  rG  rl   r  r   r*   r  rx  r  r  r   fallback_cumminr  rY   r  rZ   cummin  s     
r  c                C  sB   t |  st|  r&|d u r&tj}td|d}|| |||dS )Nrb  r;  r   r  r  rY   rY   rZ   rb    s    

rb  c                 C  s   t | tj} td| ||dS )Nr   r  rq  )r   rl   r   ru  r   r   r|  rY   rY   rZ   
reduce_any  s    r  c                 C  s2   |d ur$t | ||dt| ||dfS t | d |dS Nr  )reduce_amaxreduce_argmaxr  rY   rY   rZ   
reduce_max  s
    r  c                 C  s2   |d ur$t | ||dt| ||dfS t | d |dS r  )reduce_aminreduce_argminr  rY   rY   rZ   
reduce_min  s
    r  xor_sumrW  rV  r  r;  r  
logical_or)r"  r  stabler   
descendingc          
   	   C  s>  |d u rd}|   }|  }tt||}t|dkrNt| td|tj|fS t|r^|| nd}tj	j
|ttjjst| |||dS t|ddtj|dd}dgt| }t|r|||< t||}t||}tjj|| j|jf|  | f||||d\}	}|	d u r t| |||dS |d us.J |	t|tjfS )NFr   r'   r  )r  r  r   rg   r  )rg   rv  rw  r   r  r  r  )r   ra   r   r   r<  r  rl   r  rI   rQ   r   Zstatically_known_ltrP  int16rW  sort_fallbackr  rS  r   r*   ZSortr   r   r  r   )
r   r  r   r  r  rg   r0  rk  Z
view_shaper   rY   rY   rZ   sort_stable  s>    



	
r  c                 C  s   t | d||dS )NFr  )r  )r   r   r  rY   rY   rZ   sortF  s    r  c                 C  s   t | |tj|dS )Nr   r   r$  )r[  r   r  )rT   r   r$  rY   rY   rZ   register_pointwise_numericK  s    r  ztorch._ops.OpOverloadPacketr  c                 C  s   t | j t| tjdS rP  )rE   rZ  r[  r   r  r  rY   rY   rZ    register_pointwise_numeric_ldf64T  s
    
r  rg  logical_not)r  )r   r   r  identity)r)  pointwise_overrides_datac                 #  s~   t |  t|  jd }|d u r"d S  fdd}t|tjjrh| D ] }t||}| j||fV  qDn| j||fV  d S )Nc                   s    j d u rt| S d S r   )ZtritonrH  r  r  rY   rZ   make_triton_fallback  s    
z6_get_pointwise_overrides.<locals>.make_triton_fallback)	r  r   r   r_   rl   rm   r   r   r   )nsr   rT   r  Zolnamer\  rY   r  rZ   _get_pointwise_overrides  s    
r  r  c                   s,   | t |< t|   fdd}t| | d S )Nc                    sB    | i |}g }t | d |D ]\}}|t||dd q |S )Nr   Tr  )r   rb   r"  )re   r   resultsZmut_resultsr   r  outplace_oprY   rZ   rj     s
    z$register_foreach_inplace.<locals>.fn)rO   r'  r   r   )aten_opZoutplace_aten_opr  rj   rY   r  rZ   register_foreach_inplace	  s    
r  c                   s   t | d d fdd}|S )Nr   c                    s.    | i |}t || d  }t| d |S r]  )r   r   r"  )re   r   r  r  rY   rZ   rj   -  s    zregister_inplace.<locals>.fn)r   )r  r  rj   rY   r  rZ   register_inplace,  s    
r  c                 C  s   d S r   rY   rU  rY   rY   rZ   sym_constrain_rangeV  s    r  c                 C  s&   t jjjd }t|tjsJ |jjS Nr{  	rI   rQ   rR   r  r_   rl   r  rW   r  r   r   r{  rY   rY   rZ   sym_size[  s    r  c                 C  s&   t jjjd }t|tjsJ |jjS r  r  r  rY   rY   rZ   
sym_stridel  s    r  c                 C  s   |   S r   )r  )r   rY   rY   rZ   	sym_numelt  s    r  c                 C  s
   t j|  S r   )r   Addr   rY   rY   rZ   sym_sum}  s    r  c                 O  s   t dd S )NzHelpful for debuggingrx   )r  re   r   rY   rY   rZ   foobar  s    r  c                 C  s   |    t| S r   )r+  r<  r   rY   rY   rZ   _realize  s    r  c                 C  s   |    t| | | S r   )r+  r*   ZResizeStorageBytes)variabler  rY   rY   rZ   resize_storage_bytes_  s    r  c                 C  s"   |    |   tt| |S r   )r+  r9   r   r*   ZSetSourceTensorKernel)r  Zsource_tensorrY   rY   rZ   set__source_tensor  s    r  c                 C  s@   | |u r| S t ||  }t||  }t||  }t| |S r   r  )rL  r#  rY   rY   rZ   
fsdp_copy_  s    r  c          	        sx  t | tsJ t |ttfs J |d u r.tj}|tjkrFtd| |tjkr`t	|dks`J |tj
krzt	|dkszJ |  |  }|  }t | jtjr| j | _t rtjjjrt|rtdqt|rt|jqdndtjjdrt|||dS t | gd	g}|!  tj"#||}t$||||%  fd
d}t&j'|||t|d}|S )Nzunsupported memory format: r   r   nanTr  r   r  r'   c                   sH   |  t  tj}t tj}t ||}t | fddS )Nc                     s
    gS r   rY   rY   )
flat_indexflat_loaderrY   rZ   r    r   z*resize.<locals>.inner_fn.<locals>.<lambda>)rH   r  rl   r  r  r  )r  Zflat_index_exprlimitr  r   Z	old_numelZout_indexerZuninitalized_val)r  rZ   r    s
    zresize.<locals>.inner_fnr  )(r_   r9   r]   r   rl   Zcontiguous_formatZpreserve_formatr[  Zchannels_lastr   Zchannels_last_3dr  r   r  r`   r*   r  r  rJ  utilsZdeterministicZfill_uninitialized_memoryr   r   r   rP  rW  rI   rQ   r   r  r  r  r  re  Z stride_ordered_for_memory_formatr  rg  r6   r   )	r   r   r  r   rg   Zx_flatZ
out_strider  rc   rY   r  rZ   resize  sT    



	r  )auto_functionalizedc                 C  sB   ddl m} ||}tj| ||i ||d dd | D S )Nr   )kernel_side_table)
kernel_idxgridtma_descriptor_metadataZkernel_argsc                 S  s    i | ]\}}t |tr||qS rY   r   )r   r  r{  rY   rY   rZ   r     r   z'triton_kernel_wrap_.<locals>.<dictcomp>)*torch._higher_order_ops.triton_kernel_wrapr  Zget_constant_argsr*   ZUserDefinedTritonKernelr   )r  Zconstant_args_idxr  r	  r   r  Zconstant_argsrY   rY   rZ   triton_kernel_wrap_  s    	

r  c                 C  sj   t dd | g|D rHd}tjjjdd  }r@| d| }|tj_tj	| |||}t
ttj	|S )Nc                 s  s    | ]}t |tot|V  qd S r   r
  r~   rY   rY   rZ   r     r   zcond.<locals>.<genexpr>z"control flow operator: torch.cond.stack_trace Found from : 
 )r   rI   rQ   rR   r  r  disable_cudagraphs_reasonr*   ZConditionalr   r]   mapr9   )predZtrue_fnZfalse_fnoperandsr{   r  r  rY   rY   rZ   rz     s    rz   c                 C  st   t dd || D rFd}tjjjdd  }r>| d| }|tj_dddd	}tj	| |||}t
t||S )
Nc                 s  s    | ]}t |tot|V  qd S r   r
  r~   rY   rY   rZ   r     s   zwhile_loop.<locals>.<genexpr>z(control flow operator: torch.while_loop.r  r  r   rc   c                 S  sN   t | tr| S t | tjr"t| S t | tjr8t| S tdt|  d S )NzNYI unsupported output type: )r_   r9   r*   r  MultiOutputr   r[  r  r  rY   rY   rZ   _map_output  s    

zwhile_loop.<locals>._map_output)r   rI   rQ   rR   r  r  r  r*   Z	WhileLoopr   r]   r  )Zcond_fnZbody_fnZcarried_inputsadditional_inputsr{   r  r  r  rY   rY   rZ   
while_loop  s    
r  zir.Subgraph)subgraph_fn
identifierc                 G  s$   t jj| g|R  }tttj|S r   )r*   ZInvokeSubgraphr   r]   r  r9   )r  r  r  r  rY   rY   rZ   invoke_subgraph)  s    r  )scheme)r  c          
      G  s   d }t jjjdd }|d us"J t| jjjD ]\}}|jdkrV|| t jj	|< q0q0|jdkrt j
|\}}t|| D ]6}	|	  |jrt jj|	  t jj|	  qtjjt j|||}q0t j|t jj	|< q0|S )Nquant_optionsrF  r   )rI   rQ   rR   r  r  r^   r@  nodesrT   envZfetch_args_kwargs_from_envr   r   r   r+  Zcodegen_low_precisionZlow_precision_codegen_opsr   r,  Zinvoke_quant_opsrl   rG  ZInterpreterr   Zrun_node)
r  r  r  r   r  rd   rW   re   r   r   rY   rY   rZ   invoke_quant_tracer/  s"    

r  ztuple[torch.Tensor])r  r  c                   s   ddl m m} t|dkr$td fddt||D }|| |fdd}t|d dd d	}td
d |D |d< tdd |D |d< t	j
jf |dd|}|d d u rtd|S )Nr'   )InputDescriptorlower_pointwise_subgraphr   zSUnable to generate code for associative_scan op, because there are lifted argumentsc                   s    g | ]} |  | d qS )r  )r   ra   r~   )r  rY   rZ   r   W  s   z$associative_scan.<locals>.<listcomp>c                   s    g t | t |R  S r   )r9  rI  )lhsrhs)lowered_combine_fnrY   rZ   wrapped_combine_fn]  s
    z,associative_scan.<locals>.wrapped_combine_fnr  c                 s  s   | ]}|  V  qd S r   r  r~   rY   rY   rZ   r   d  r   z#associative_scan.<locals>.<genexpr>rv  c                 s  s   | ]}|  V  qd S r   r  r~   rY   rY   rZ   r   e  r   rw  F)r  Zcan_fallback_to_atenz/Unable to generate code for associative_scan op)r?  r  r   r   r[  r   r   rx  r   r*   r  r   )r  xsr  r   Zsubgraph_inputsr$  r   r  rY   )r  r#  rZ   associative_scanL  s,    


r&  c                 C  s   d S r   rY   )tokensrY   rY   rZ   _sink_tokensp  s    r(  c                 O  s   t jj|g|R i |}ddlm} ||||}|d us>J tjj| }|d u rX|fS t	t j
tj|}t|ts|||fS |g|R S d S )Nr   )get_effect_key)r*   ZEffectfulKernelr   Ztorch._higher_order_ops.effectsr)  rI   rQ   Zeffectful_opsr9  r  r  r9   r_   r   )tokenrT   re   r   r  r)  Zeffect_typeZeffectful_kernelrY   rY   rZ   with_effectsu  s    
r+  )register_comm_loweringsc                 C  s   t | |dddd}|d }tjjt|}tjjf i |d|d\}}|dkrtjj	|t
jkrtjf | d|d	|\}}||fS ttd
 t| |dd}	ttj t| |	}
t|
|dd}|	|fS dS )zn
    Lowering inductor_prims.prepare_softmax_online to compute max/sum in one pass if no split is needed.
    TNrs  r  Zonline_softmax_reduce)rh  r  r'   r%   )r  Z
num_outputZreduction_hintz
            Online softmax is disabled on the fly since Inductor decides to
            split the reduction. Cut an issue to PyTorch if this is an
            important use case and you want to speed it up with online
            softmax.
            )rq  )rr  rI   rQ   r   ZsimplifyrF   r*   r7   Z
num_splitsr  r(   r  r3   r   r?  r@  textwrapdedentr  rM   rR  r  r  rz  )r   r   r   r  r  hintZ	num_splitZ
max_tensorZ
sum_tensorrk  r  ZxsumrY   rY   rZ   prepare_softmax_online  s<    



r0  r<  )quantized_lowerings)mkldnn_lowerings)jagged_loweringsc              	   c  sr   t | tjjsJ dt| }z2t| t|  dV  W |rH|t| < qnt|  n|rb|t| < n
t|  0 dS )z^
    A context manager to force fallback an op. Used in unit test
    for FallbackKernel.
    z+Only OpOverload to make the clean up easierN)	r_   rl   rm   rn   rM   r  r   rH  ry  )rT   Zold_handlerrY   rY   rZ   force_fallback  s    


r4  )NN)NNNFN)F)F)F)F)N)N)N)r   r   r  r'   T)N)N)N)r   )r   )r   r   r'   )r   r   r'   )r   r   r'   )r   )r   )r   )r   )r  )T)N)N)T)NTF)F)r   NNr'   )NN)NNN)F)r  FF)T)T)F)F)F)F)F)N)r%   F)N)N)NN)NN)NNN)NNN)r   )Nr  N)F)Nr   r'   F)Nr   r'   F)r  )NNNN)rY   r   FTN)rY   r   FTN)N)N)N)NF)N)N)F)F)N)NF)NN)NN)N)N)NF)NF)NF)NF)r  F)NN)NN(  
__future__r   
contextlibr  r   r   loggingr  r  rU  r-  r?  collectionsr   collections.abcr   r   typingr   r   r   r	   r
   r   r   Ztyping_extensionsr   Zunittest.mockr   r   rl   Z$torch.ao.quantization.fx._decomposedZtorch.fxZtorch.utils._pytreer  Z_pytreer9  Ztorch._dynamo.utilsr   Z(torch._higher_order_ops.associative_scanr   r
  r   Ztorch._prims_commonr   r   r   r   r   r   r   r   r   r   r   Ztorch.fx.experimental.sym_noder   r   r  r   Ztorch.utils._ordered_setr    Ztorch.utils._sympy.functionsr!   r"   r#   r$   Z_dynamo.utilsr&    r(   r)   r*   r+   decompositionr,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   r  rH   rI   r  rJ   rK   rL   ZFALLBACK_ALLOW_LIST	getLoggerrZ  rY  rM   __annotations__rN   rm   rn   r   rR  Ztr_c10drI  r   Z_higher_order_opsrB  rV   r'  rO   Zquantized_decomposedr[   ri   rp   rw   r|   r}   r   r  r  r  r`  ZbmmZconvolutionZconvolution_backwardr  r  r  r  r  r  Z_int_mmr  r  r  re  r  r  r  r,  Z	complex32Z	complex64r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r  r&  r6  r   rJ  rM  rT  rS  r   rU  rX  Z
device_putrY  r[  rf  rh  ri  r   aliasdetachZdetach_ZliftZview_ofro  r   rp  rx  ry  r|  r}  r  r  r  r  rK  r  r   r  r  r  Z_unsafe_viewZreshaper  slicer  r  r  Zquantize_per_channelr  Zdequantize_per_channelr  Zquantize_per_tensorr  Zdequantize_per_tensorr  r_  r  r  r  r   r!  r$  r%  r(  r)  r+  r2  r  r4  r  r7  rH  cacherA  rC  rD  rN  r]  r`  Zrngprimsri  rj  rm  Z	bernoullirk  ro  rr  	lru_cachert  ru  ra  rz  rw  rx  r}  r|  r{  randintZforce_stride_orderr  rh  r  r  r  Zlookup_seedr  randomr  r  r  r  r  rn  r  Z	NO_OPMATHr  r  rs   r  r  rr   rt   r  Z_adaptive_avg_pool3dZadaptive_max_pool3dZ*_scaled_dot_product_attention_math_for_mpsuniformZexponentialZ_pdist_forwardZsoft_margin_loss_backwardZ_fused_rms_normZxpuZis_availableZembedding_dense_backwardZ_cdist_forwardZ_cdist_backwardZ
_trilinearZsegment_reduceZ_segment_reduce_backwardZhistcZ	histogramZbin_ctZ_histogramdd_bin_edgesZ_histogramdd_from_bin_ctsZaddbmmZ_addmm_activationZ_grouped_mmZ
_cudnn_rnnZ_cudnn_rnn_backwardZ_embedding_bagZ_embedding_bag_forward_onlyZ_embedding_bag_backwardZ*_embedding_bag_per_sample_weights_backwardZ_fused_moving_avg_obs_fq_helperZ*_fused_moving_avg_obs_fq_helper_functionalZ max_pool3d_with_indices_backwardZ_adaptive_avg_pool2d_backwardZ_adaptive_avg_pool3d_backwardZadaptive_max_pool2d_backwardZadaptive_max_pool3d_backwardZfractional_max_pool2d_backwardZfractional_max_pool3d_backwardZreplication_pad1d_backwardZreplication_pad2d_backwardZupsample_linear1d_backwardZupsample_bicubic2d_backwardZupsample_trilinear3d_backwardZgrid_sampler_2d_backwardZ_pdist_backwardr  r  ZkthvalueZtopkr  ZmedianZ	nanmedianrC  Zresize_Z
resize_as_Z_linalg_detZlinalg_householder_productZlinalg_inv_exZlinalg_ldl_factor_exZlinalg_ldl_solveZ	linalg_luZlinalg_lu_factor_exZlinalg_lu_solveZlinalg_matrix_expZ	linalg_qrZ_linalg_slogdetZ_linalg_solve_exZlinalg_solve_triangularZ_linalg_svdZ	lu_unpackZormqrZ_linalg_check_errorsZlinalg_pinvZatol_rtol_tensorZ_linalg_eighZtriangular_solveZlinalg_cholesky_exZcholesky_inverseZcholesky_solveZgeqrfZ_fft_r2cZnonzerogcdZ_thnn_fused_lstm_cellZ_primsZ	rng_primsZrun_and_save_rng_stateZrun_with_rng_stateZgraphsafe_run_with_rng_stateZmasked_scatterZmasked_scatter_backwardrL  ZangleZ_efficientzerotensorZ(_sparse_coo_tensor_with_dims_and_tensorsZ	to_sparseZ
_to_sparser   r  r  r  Z#_scaled_dot_product_flash_attentionZ,_scaled_dot_product_flash_attention_backwardZ#_scaled_dot_product_cudnn_attentionZ,_scaled_dot_product_cudnn_attention_backwardZ+_scaled_dot_product_flash_attention_for_cpuZ4_scaled_dot_product_flash_attention_for_cpu_backwardZ0_scaled_dot_product_fused_attention_overrideableZ9_scaled_dot_product_fused_attention_overrideable_backwardZ_flash_attention_forwardZ_flash_attention_backwardZ_efficient_attention_forwardZ_efficient_attention_backwardZindex_reducer=  r<  r  rM  r  r  r  r  Zscalar_tensorr  Z
LongTensorr  r  r  r  r  r  r  r  r  rj  rG  r  Z
zeros_liker  r
  r  r  r  r  r  r  r  r,  r.  r-  r   r3  r:  r;  r<  r>  r@  rA  r7  rR  Zfallback__unsafe_masked_indexrT  Z,fallback__unsafe_masked_index_put_accumulater  rY  rM  r^  r[  rg  re  ri  rc  rz  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r  r+  r)  r5  r:  r;  r7  r@  rM  rE  rN  rB  r]  rm  rl  rn  rr  ru  rx  rj  r  r  r  r  ro  r  r  r  r  r  ZTensor_Tensorr  ZScalarr  ZTensor_Scalarr  r"  r  rX  r  rG  r{  r  r  r  r  Ztrue_divider  r  ra  rz  r  r  r  r  r  r  r  r  r  r  rb  r   r  rW  r  rV  r  r  rk  r  rl  r  r  r  r  r  r   r  r  r  r  Zrsqrtr  r  expm1Zrelur6  r  r  r  cossinabsZbitwise_andZbitwise_left_shiftZbitwise_notZ
bitwise_orZbitwise_right_shiftZbitwise_xorlgammaerfZspecial_erfr  tantanhr  r  r  logical_xorr  r  Z	clamp_minZ	clamp_maxnegr  	remaindersignZsignbitZ	_neg_viewler  r  r  r  necoshsinhacosacoshasinasinhatan2atanatanhr  erfcZerfinvhypotlog10log2	nextafterZcodegen.commonr)  r  r  r   rT   r   r$  Z_foreach_addListZforeach_add_listZforeach_add_scalarZ_foreach_mulZforeach_mul_listZforeach_mul_scalarZ_foreach_subZ_foreach_negZ_foreach_absZ_foreach_powZScalarAndTensorZ_foreach_divZforeach_div_listZforeach_div_scalarZ_foreach_sqrtZ_foreach_rsqrtZ_foreach_maximumZ_foreach_minimumZ_foreach_clamp_minZ_foreach_clamp_maxZ_foreach_reciprocalZ_foreach_signZ_foreach_copyr  Z_foreach_add_Z_foreach_mul_Z_foreach_div_r  Zadd_Zbitwise_and_Zbitwise_left_shift_Zbitwise_not_Zbitwise_or_Zbitwise_right_shift_Zbitwise_xor_Zmul_Zdiv_ZTensor_modeZlogical_and_Zlogical_not_Zlogical_or_Zlogical_xor_Zsub_Zrelu_Zsigmoid___and__
__lshift____or__
__rshift____xor____iand____ilshift____ior____irshift____ixor__r  r  r   r  r  r   methodfuncr  r  Z_inductor_testr+  r  r  r  set_Zsource_Tensorr  Zfsdpr  r  Z*torch._higher_order_ops.auto_functionalizer  r  Zhigher_orderrz   r  r  Zinvoke_quantr  r&  r(  r+  Zcomm_loweringr,  r0  r=  r1  Zregister_quantized_opsZregister_woq_mm_opsr2  Zregister_onednn_fusion_opsr3  Zregister_jagged_opscontextmanagerr4  rY   rY   rY   rZ   <module>   s   $4@8




	H95    
V:
,
-



#












4
E2$2$"$-"/|6



		
(2&




	
	*(	.a$:		 


C8



.
	



$!#I%
~



&!$"t /

  $
  $ _7 K"$      
 
)5


E


Q

B-


        z
	 &

	 H1
+


/#

	 











'	














@
"
#6
