o
    Zh                    @  sZ0  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlZd dlZd dlZd dl Zd dl!m"  m#Z$ d dl%m&Z& d d	l'm(Z( d d
l)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8m9Z9 d dl:m;Z; d dl<m=Z=m>Z>m?Z?m@Z@mAZA ddlBmCZC ddlDmEZEmFZFmGZGmHZH ddlImJZJmKZK ddlGmLZLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZY ddl"mZZZm[Z[m\Z\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZd ddlemfZfmgZg er!ddlhmiZi edZjedZke;ddgZlemenZoi Zpdeqd< i Zrdeqd< e;ejsjt  ZuejfjvZvejfjwZwejfjxZxe;ejsjt  Zye;ejsjt ejzj{gZ|e;ejsjt  Z}i Z~d eqd!< ejfjZd"d# Zdd&d'Zdd,d-Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zeevjevjevjevjevjevjevjevjevjevjevjevjevjg ejejejejejejejejejejejejejd6Zdd9d:Zd;d< Zd=d> ZddAdBZdCdD ZdEdF ZddPdQZdRdS ZddTdUZdVe1jdVfddXdYZdZd[ Zdd\d]Z					V	dd^d_Zdd`daZdddedfZeejzj{ddgdhdi Z{eexjddgddjdkZdVdlddmdnZeevjjddgddodpZdVdVdqddtduZeexjddgdddvdwZddxe1jdVdddVdVdf	dydzZd{d| Ze  	Vdd}d~ZeevjdVdddd ZeevjdVdddd Zeevjevjevjevjexjgdd Zeevdreevjƃeă eevjddgdddZeevjddgdddZeevjgdddZeevjʃdd Zeevj˃dd Zeevj̃dd Zeevj̓dd Zeevjjσdd ZeevjЃdd Zeevjddgdd Zeexjddgdd Zeevjddgdd Zeevjԃdd Zeevjddgeevjddgeevjddgdd Zeevjddgdd ZeevjddgdddZeevjddgdddZeevjddgdddZeevjddgdddZdddZeejddgdddZeejddgdddddZeejjddgdddZeejjddgdddddÄZeejjddgdddńZeejjddgdddddǄZeevjdddɄZeevjddgdʐddd΄ZeevjddgdʐdddЄZeevjddgdʐddd҄ZeevjddgddԄ ZeevjddgdddքZeevjddgddd؄ZeevjddgdddڄZeevjddgdd܄ Zeevjddgddބ Zeevjddgdd ZdddZeevjdddZdddZeddd ZddddZddddZd͐dddZdddZdd Zeejfjjddgdd Zeevjddgdd Zeevjddgdd Zeevj jddgdd Zeevjd d Zeddd Zdd ZeevjjσZeevjjZ	eevj
jσZeevj
jZeevj eevjdd Zeevj
dd	 Z
eeFjddgd
d ZeeFjddgdѐddZeeFjddgdd ZeeFjddgdd ZeeFjddgd ddҐddZeeFjddgd ddӐddZdԐd d!ZdՐd#d$ZeevjjddgdVdVddd%d֐d.d/ZeevjddgdVdVd0dאd2d3Zd4d5 Zd6d7 Zd8d9 Z d:d; Z!d<d= Z"d>d? Z#eevj$ eevj% eevj& eevj' eevj(dVd@ eevj)jdVd@ eevj* eevj+dVd@ eevj, eevj- eevj. eevj/jσ eevj0jσ eevj1 eevj2j3 eevj4jσ eevj5jσ eevj6 eevj7dVd@ eevje" eevj8e eevj9e eevj:e eevj;e eevj< eevj= eevj= eevj> eevj? eevj@ eevjAe eevjB eevjC eevjD eevjE eevjF eevjG eevjH eevjI eevjJe eevjK eevjLe eevjM eevjN eevjNjO eevjP eevjQ eevjR eevjS eevjT eevjU eevjV eevjW eevjX eevjY eevjZ eevj[ eevj\ eevj] eevj^ eevj_ eevj` eevja eevjb eevjc eevjd eevje eevjf eevjg eevjh eevjijj eevjk eevjl eevjm eevjn eevjo eevjp eevjq eevjrjσ eevjsjdVd@ eevjte eejujvjw eejujvjx eejujvjy eevjz eevj{ eevj|e eevj} eevj~ eevj eevj eevj eevjj eevjjϐe#dVd@ eevjjϐe#dVd@ eevjjϐe#dVd@ eevjjϐe#dVd@ eevjjϐe#dVd@ eevjjϐe#dVd@ eevjjϐe#dVd@ eevjjϐe#dVd@ eevjjϐe#dVd@ eevjjϐe#dVd@ eevjjϐe# eevjjϐe# eevjjϐe# eevjjϐe# eevj eevjddgddAdBZeevjddCdDdEZdFdG ZeevdH	reevje eexjdIdJ ZeevjddgdؐdMdNZeevjddgdِdOdPZdQdR ZeejevjgddddVdSdTdUZeejddVdWZeejdXdY ZeevjdZd[ Zeevjd\d] Zeevj	dddd^d_d`Zdadb Zeevjddgdcdd Zdedf ZeejevjgdddddddgdhdiZdjdk Zdldm ZeevjeeZeedZeed Zdndo ZeevjdddddpdqdrZeevjdddddpdsdtZeevjdddddpdudvZeexjjσdwdx Zeejevjgdydz Zeevjddgdd{d|Zeevjddgdېd}d~Zdd Z	xd͐ddZdd Zd͐ddZeevjddgddL Zeevjddgdd ZeevjdddZeevjdddZdd Zdd ZeevjddgdddZeeFjddgdddZdddZeevjjdVdZeevjjdVdZeevjddgdd Zeevjddgdd Zedd ZeevjddgdddZeevjddgdܐddZddxddݐddZeevjddgdddސddZeevjddgdܐddZeevjddgdܐddZeevjddgdܐddZeevjddgdxddߐddZ		VddddZeevjjσddddZeevjjσddddZeevjjσ	dddÐdĄZeevjjσ	dddŐdƄZeevjjσ			dڐddȐdɄZeevjjσ			dڐddʐd˄Zѐd̐d̈́ Zeexjjσdΐdτ ZӐddӐdԄZeevjddgdÐdՐdքZՐddڐdۄZ֐ddܐd݄Zאddސd߄Z	dddZِdd Zڐdd ZddddZ	VdddZeexjddg	VdddZeexjddgdd ZeevjjdVdZeevjddg		 		VdddZeevjjdVdZeevjddgdd ZdddZdd Zdd Zdd ZeevjjdVdZeevjdd ZeevjjdVdZeevjdd  ZeevjjdVdZdd Zeevjdd Zeevjjσ	dddZeevjjdVdZeevjjdVdZeevjddg		 	V	x	ddd	Zeevjddg		 	V	x	dd
dZdd ZeevjjdVdZeevjddg	dddZeevjjdVdZeevjddg	dddZdd Zdd ZddddZdd ZeevjdddddZdd  Zd!d" Zd#d$ Zd%d& ZeevjexjgdddVd'd(d)Zeevj dddVd'd*d+Z d,d- Zed.d/ ZeevjjdVdZeevjjdVdZeevjjdVdZ	eevjdxd0d1d2 Zdd3d4Z
eevjd5d6 Zeevjddgdd7d8Zed9d: Zed;d< Zeevjdxd0dd=d>Zeevjgdxd0d?d@ ZddCdDZeexjgdxd0dEdF Zeevjevjjgdxe1jddGdH Zeevjexjgdxd0dIdJ ZeevjexjgddddKdLZeevjjσZeevjjσZeevjjσZeevjjσZ eevj!jσZ"eevjddMdNZeevjddOdPZeevjdQdR ZeevjddgddSdTZeevj!ddgddUdVZ!eevj#ddddWdXZ#eevj$ddYdZZ%eevj&ddgdd[d\Z'eevj(ddgdd]d^Z)eexj*ed_ eevj+ed`Z,eevj-edaZ.eevj/edbejdcZ0eevj1eddejdcZ2eevj3dxdedfZ3eevjNjOdVdZ4eevjNjOddgdddVdgdhdiZ5eevjNjddgddjdkZNddldmZ6dndo Z7e6evj8Z8e7evj9Z9e6evj:Z:e6evj;Z;eevj<Z<e7evj=Z=e7evj>Z>eevj?Z?eevj@dxdpZ@e7evjA e7evjB eevjCZCeevjDZDeevjEZEeevjFdqdrZFeevjGZGeevjHZHeevjIZIe6evjJ e6evjKZKeevjLe1jdgeK e6evjM e6evjN e6evjO e7evjo eevjPddxejdsZPeevjQddxejdsZQeevjRddxejdsZReevjSddxejdsZSeevjTZTeevjUZUeevjVeT eevjWeU eevjXZXeevjCZCe6evjYZYeevjZ eevj[dtdrZ[eevj̃ eevj\ejdc eevj]eX eevj^ejdc eevj_ejdc eevj`ejdc eevjaejdcZaeevjbejdc eevjcejdc e6evjd e6evje e6evjf e6evjg e6evjh e6evji e6evjj e6evjk e6evjl e6evjm e6evjn e6evjo e6evjp e6evjq e6evjr e6evjs ddultmuZumvZv dvdw ZwevD ]@ZxewevexD ]\ZyZzZ{eeyexeze{dx qewexexD ]\ZyZzZ{eeyexeze{dx qqeevj|j}e3dxdpZ~eevj|je3dxdpZeevj|je3dxdp eevjj}eZeevjje eevjjeZeevjj}e@ eevjje@ eevjjϐeX eevjjϐeC eevjje eevjj}e eevjje eevjj}eZeevjje eevjjeZeevje> eevje8 eevjj}eT eevjjeT eevjj}eU eevjjeU eevjj}eT eevjjeT eevjj}eU eevjjeU eevjeY eevje[ eevje dydz Zeevjj}evj|j}e~ eevjjevj|je eevjj}evjj}e eevjjevjje eevjj}evjj}e eevjjevjje d{d| Zeevje3 eevjeD eevjeE eevjeF eevjeG eevjeH eevjeI eevje eevjje eevjje eevjeP eevjeQ eevjeR eevjeS eevje@ eevje< eevje= eevjeD eevjeE eevjeG eevjeH eevjeI eevjevj eevjevj eevjevj eevjevj eevjevj eevjdd}d~Zeevjjdd Zeevjjdd Zeevjdd Ze8 D ]\ZZee9ee qeejdd Zeevjdd Zeejfjjdd Zeejfjjdd ZeejfjvjÐjădd ZeejfjƐd8reejfjƐjjσdd ZeejfjvjȃddCddZd dlɐmʐZ eeʃ ee*dd Zeejfj̐jddgdd Zeejfj̐jddgdd Zeejfj̐jddgdddZeejzjddgdddddZee(ddgdddZeejfjxjjσdd Zeejfj̐jddgdd ZddlՐm֐Z eփ  eeFjddgdd ZddlDmؐZ eCe؃ ddlDmِZ eِڡ  eِۡ  ddlDmܐZ eܐݡ  ddlDmސZ eސߡ  ejdddZdS (      )annotationsN)defaultdict)IterableSequence)AnyCallablecastOptionalTYPE_CHECKINGTypeVarUnion)	ParamSpec)patch)counters)associative_scan_op)triton_kernel_wrapper_mutation)canonicalize_dimcanonicalize_dimscheckdtype_to_typeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDget_computation_dtypeis_boolean_dtypeis_float_dtypeis_integer_dtypeNumber)magic_methodsmethod_to_operator)
OrderedSet)CeilDivFloorDivIdentity
IntTrueDivModularIndexing   )import_submodule   )configinductor_primsirtest_operators)decompositionsget_decompositions)	DtypeView
ExpandViewIndexingConstantIRNode	is_tritonOnlineSoftmaxReductionops_wrapperPermuteView	Pointwise	ReductionSqueezeView	TensorBoxvalidate_irView)ceildivdecode_device
is_dynamicis_gpuis_pointwise_useis_view,needs_fallback_due_to_atomic_add_limitationspad_listlike#register_op_dtype_propagation_rulessympy_productuse_scatter_fallback)opsV)ReductionType_T_Pztorchvision::roi_alignzaten::index_addz8dict[Union[Callable[..., Any], str], Callable[..., Any]]	loweringsz9dict[torch._ops.OpOverload, Optional[Callable[..., Any]]]_maybe_layout_constraintsz2dict[torch._ops.OpOverload, torch._ops.OpOverload]inplaceable_foreach_opsc                  C  s<   t jjjD ]} | jD ]}|jdkr|jtv s  dS q
qdS )NZcall_functionTF)rH   graphcurrent_nodeusersoptargetforeach_ops)nodeuser rW   G/var/www/auris/lib/python3.10/site-packages/torch/_inductor/lowering.pycur_node_has_non_foreach_users|   s   
rY   	arg_pairs%Iterable[Union[tuple[Any, Any], Any]]c                 C  s   t t}d}t| D ]A\}}t|tsd}|f}t|  ptj}d }|D ]}t|tr2|j	
 } nq$|d us;J d|r@|\}|||f ||f q
|S )NFTz.foreach op should have at least one tensor arg)r   list	enumerate
isinstancer   r>   r(   Z#combo_kernel_foreach_dynamic_shapesr9   data
get_deviceappend)rZ   outZunpack_argsiargsuse_foreachdevicetrW   rW   rX   group_foreach_args   s&   


rh   fnCallable[..., Any]returnOptional[Callable[..., Any]]c                   sR   t  tjjs	dS  tv rt  S  tv rdt < dS  fdd}t }||S )zHGet layout constraints. Returns None if there are no layout constraints.Nc                   sF   | t jjju rtt < t  S | t jjju rd t < d S td|  )NzUnknown layout constraint tag: )torch_CTagneeds_fixed_stride_orderconstrain_to_fx_stridesrM   flexible_layoutAssertionError)tagri   rW   rX   handle_layout_constraint_tag   s   z>maybe_layout_constraints.<locals>.handle_layout_constraint_tag)r^   rm   _ops
OpOverloadrM   rL   get_layout_constraint_tag)ri   rv   rt   rW   ru   rX   maybe_layout_constraints   s   
rz   c                 C  sX   t jjjt jjjg}|D ]}|| jv r|  S qt jj| r$t jjjS t	t jjt
jS N)rm   rn   ro   rp   rr   tagsZ_libraryutils
is_builtingetattrr(   Z#custom_op_default_layout_constraint)ri   Ztags_by_priorityrt   rW   rW   rX   ry      s   

ry   c                 C  s   | s	t d| d S )Nzinductor does not support NotImplementedError)condmsgrW   rW   rX   
assert_nyi   s   r   c                   s\   t  ttttfrdd  D S t  t  tjj	r,t
 fdd  D  d S d S )Nc                 S  s   g | ]}t |qS rW   )add_needs_realized_inputs.0xrW   rW   rX   
<listcomp>       z-add_needs_realized_inputs.<locals>.<listcomp>c                 3  s    | ]}t  |V  qd S r{   )r   )r   overloadru   rW   rX   	<genexpr>   s    

z,add_needs_realized_inputs.<locals>.<genexpr>)r^   r\   settupler   needs_realized_inputsaddrm   rw   OpOverloadPacketupdate	overloadsru   rW   ru   rX   r      s   
r   c                 C  s:   t | tjjr|  D ]	}|tt| |< qd S |t| < d S r{   )r^   rm   rw   r   r   rM   r   )ri   
constraintr   rW   rW   rX   add_layout_constraint   s
   r   )r   r'   r%                     	   
         dtypeintc                 C  s2   t | ts| S | tv sJ d|  dt|  } | S )Nzid z missing from DTYPE_ID_LOOKUP)r^   r   DTYPE_ID_LOOKUPr   rW   rW   rX   decode_dtype  s
   
r   c                 C  sB   t | trt|  pt|  S t | tjr| jdu S t | tS )NT)	r^   r9   r   	get_dtyper   sympyExpr
is_integerr   r   rW   rW   rX   is_integer_type  s
   


r   c                 C  s    t | trt|  S t | tS r{   )r^   r9   r   r   boolr   rW   rW   rX   is_boolean_type  s   

r   type_promotion_kindr   c                   s0   dd   fdd|D }t |d| i\}}|S )Nc                 S  s8   t | ttjfr
| S t|  }tjdg| |  dS )Nr'   r   )	r^   r   r   Basiclenget_sizerm   zerosr   )inpdimrW   rW   rX   construct_input!  s   z+get_promoted_dtype.<locals>.construct_inputc                      g | ]} |qS rW   rW   )r   argr   rW   rX   r   )  r   z&get_promoted_dtype.<locals>.<listcomp>r   )r   )r   rd   Zinps_r   rW   r   rX   get_promoted_dtype   s   r   c                 C  sh   t | ttfs| g} nt| } t| D ]}t |tjjr1| D ]}t||}|tvr0| 	| q q| S r{   )
r^   r\   r   rm   rw   r   r   r   rL   ra   )aten_fnri   r   Zother_fnrW   rW   rX   get_overloads.  s   

r   c                 C  s6   t | tjjr|| jv S t | tjjr||  v S dS NF)r^   rm   rw   r   Z_qualified_op_namerx   name)rR   	namespacerW   rW   rX   in_namespace>  s
   
r   rd   	list[Any]kwargsdict[str, Any]	broadcastr   )Optional[ELEMENTWISE_TYPE_PROMOTION_KIND]convert_input_to_bool tuple[list[Any], dict[str, Any]]c                   s  dd t  D }dd  D }|s|s fS |s|ri|r$tjndd  D }|dd  D  t|d|i|rF |d  n|d   fd	d
fdd D  fdd D |rtt	t
 fdd|D fdd|D  }t	|d  }	t||d t| D ]\}
}| |
< qt||t|d  D ]\}}||< qtt D ]}
t |
 tjrt |
 |	 |
< qD ]}t| tjrt| |	|< qЈ fS )Nc                 S     g | ]\}}t |tr|qS rW   r^   r9   r   rc   r   rW   rW   rX   r   M      z"transform_args.<locals>.<listcomp>c                 S  r   rW   r   r   kvrW   rW   rX   r   N  r   c                 S  s*   g | ]}t |ttjfst|d r|qS r   )r^   r   r   r   hasattrr   arW   rW   rX   r   X  s    c                 s  s    | ]
}t |d r|V  qdS )r   N)r   r   rW   rW   rX   r   ^      z!transform_args.<locals>.<genexpr>r   r   c                   s6   t | tr
t| S t | tjrtj| j dS | S )Nvaluer   rf   )r^   r9   to_dtyper*   Constantr   )r   )rf   r   rW   rX   promotei  s
   

ztransform_args.<locals>.promotec                   r   rW   rW   r   r   rW   rX   r   q  r   c                   s   i | ]	\}}| |qS rW   rW   r   r   rW   rX   
<dictcomp>r      z"transform_args.<locals>.<dictcomp>c                 3      | ]} | V  qd S r{   rW   r   rc   rd   rW   rX   r   x      c                 3  r   r{   rW   r   r   r   rW   rX   r   y  r   )r]   itemsrm   r   extendvaluesr   r`   broadcast_tensorsr\   	itertoolschainr   zipr   ranger^   r*   r   r/   create)rd   r   r   r   r   Zargs_indicesZkwargs_indicesZpromoting_argsZbroadcastedsizerc   r   r   rW   )rd   rf   r   r   r   rX   transform_argsF  sZ   

r   c                   s>   t   fdd}t| }t| tt|| |S )a  
    Add a foreach lowering to lowerings dict.

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                    s*   t | dksJ  | i |}t| |S )Nr%   )r   r:   )rd   r   rb   	decomp_fnrW   rX   wrapped  s   z+_register_foreach_lowering.<locals>.wrapped)	functoolswrapsr   rT   r   rL   dictfromkeys)r   r   r   Zaten_fnsrW   r   rX   _register_foreach_lowering  s   
r   c                   s<   t  fdd}t  tt | |S )a  
    Add a lowering to lowerings dict

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                    s   t | } t|}d}t| dkr!t| d t tfr!d}t | d } tdd  D s9tdd | D r9J dt| |\} }|rH| g} | i |}t	| |S )	NFr'   r   Tc                 s  s"    | ]}|t v pt|d V  qdS )Z_c10d_functionalN)	fallbacksr   )r   ri   rW   rW   rX   r         
z6_register_lowering.<locals>.wrapped.<locals>.<genexpr>c                 s      | ]}|d kV  qdS )rb   NrW   r   rW   rW   rX   r     r   zout= ops aren't yet supported)
r\   r   r   r^   r   allanykeysr   r:   )rd   r   unpackedrb   r   r   r   r   r   rW   rX   r     s(   
z#_register_lowering.<locals>.wrapped)r   r   r   rL   r   r   r   )r   r   r   r   r   r   rW   r   rX   _register_lowering  s
   r   F.Callable[[Callable[_P, _T]], Callable[_P, _T]]c                 C  s   t jt| |||dS )z+
    Shim to support decorator syntax.
    r   r   r   )r   partialr   )r   r   r   r   rW   rW   rX   register_lowering  s   r   c                 C  s   g }t jt| t|tjjdD ]O\}}tjjj	j
t|dddr(|| qtjjj	j
t|dddr<|| qtjj|| tt|jtt|jk rZ|| q|| qtt|S )z
    Broadcasting logic based on symbolic shapes.

    We give the shapes 0 and 1 concrete values, while all other shapes
    are symbolic sympy formulas.
    )	fillvaluer'   TZsize_oblivious)r   zip_longestreversedr   SOnerH   rO   sizevars	shape_envevaluate_exprEqra   guard_equalsr   expandZfree_symbolsr   )r   boutputr   yrW   rW   rX   broadcast_symbolic_shapes  s   $

 r  c              
     s,  |d u s|d u sJ d|d u r|d u rt j}tdd | D s"| S tdd | D rC|p3t| d|ifdd  fdd	| D S td
d | D }g }| D ]C}t|ttfrq|	t
tj|| | dt|  qPt|tjr|	t
t|| | dt|  qP|	| qP|S )NzEonly one of override_return_dtype or type_promotion_kind may be givenc                 s  s"    | ]}t |tjttfV  qd S r{   )r^   r   r   r   floatr   rW   rW   rX   r          z$promote_constants.<locals>.<genexpr>c                 s  s"    | ]}t |tttjfV  qd S r{   )r^   r   r  r   r   r   rW   rW   rX   r     r  r   c                   s4   t | tjrtj|  td dS tj|  td dS )Nindexr   rf   r   )r^   r   r   r*   r0   r=   r   r   r   rW   rX   
const_func  s
   
z%promote_constants.<locals>.const_funcc                   r   rW   rW   r   )r  rW   rX   r   "  r   z%promote_constants.<locals>.<listcomp>c                 s  s&    | ]}t |tttjfr|V  qd S r{   )r^   r9   r/   r*   r   r   rW   rW   rX   r   #  s   $ r   r  )r   DEFAULTr   r   r   nextr^   r   r  ra   r/   r   r*   r   r   get_device_or_errorr\   r   r   r   r0   )inputsoverride_return_dtyper   exrb   r   rW   )r  r   rX   promote_constants  sL   

	r  c              	     s$   d dd fdd}|S )Nalphar  r9   c              
     s  d urt dd D rrJ  S tr2| d ur1| dkr1ttd | d< n| d u s8J dd D d  pLd   ttd  j	dd  D ]!}t
|tjstt| ksJ d d	 d	|  q^tjtjftjd uottjd
d d uotjjjd uotjjjddo v  
f
dd}	sd }D ]}t| j	r| } nq|sՈd  }	p|}tj| |dS )Nc                 s  "    | ]}t |tot|V  qd S r{   r^   r1   r2   r   r   rW   rW   rX   r   H  r   z0make_pointwise.<locals>.inner.<locals>.<genexpr>r'   c                 S     g | ]}|  qS rW   make_loaderr   rW   rW   rX   r   U  r   z1make_pointwise.<locals>.inner.<locals>.<listcomp>r   zndim mismatch  rP   Zlow_precision_pointwise_barrierFc                   s   t  t 
ksJ d  d
 tjkr&	d ur&	 fddD  S r:r:tjkr: fddD  S g }tD ]'\}}| }|  }rb|v rbtj||dd}t||}|| q@| }r|tj|dd}t|S |S )Nzwrong ndim r%  c                      g | ]}| qS rW   rW   r   loadr  rW   rX   r   n  r   zCmake_pointwise.<locals>.inner.<locals>.inner_fn.<locals>.<listcomp>c                   r&  rW   rW   r'  r)  rW   rX   r   t  r   F)Zuse_compute_types)	r   rm   r   float64r]   r   rG   r   ra   )r  Zinputs_loadedZ	inp_indexr(  rb   Z	inp_dtypeZdowncast)
r   emulate_precision_castsri   r  is_gpu_deviceloaders	low_pr_fpoverride_fn_when_gpu_float64override_fn_when_input_boolrangesr)  rX   inner_fnk  s,   $
z/make_pointwise.<locals>.inner.<locals>.inner_fnrf   r   r2  r1  )r   r  r\   mulr   r   r?   r=   r`   typer^   r*   BaseConstantr   rm   bfloat16float16rH   rO   r   rP   metagetr6   r   )r  r  otherr2  rf   rc   allow_alphari   override_devicer/  r0  r  triton_fallback)r   r+  r  r,  r-  r.  r1  rX   innerG  sd   

zmake_pointwise.<locals>.inner)r  r9   rW   )ri   r  r>  r0  r/  r=  r?  r@  rW   r<  rX   make_pointwise>  s    	RrA  c                   s   ddd fdd}|S )Nr'   r  r  list[list[TensorBox]]c                   sT  t tjjjdkptjjjtv pt }d }|D ]}t|t	t
fr$|} nq|d us-J dg }|D ]}t|t	t
fsE||gt |  q1|| q1tt| }d gt | }| D ]@\\}}	}
g }|
D ]-\}} rt|d| i}n| }|||< tj|tjr|	r|r|  ||  qf|rtj| q\tdd |D sJ |S )Nr   z1at least one input must be a list to a foreach opr  c                 s      | ]}|d uV  qd S r{   rW   r   rW   rW   rX   r     r   z8make_foreach_pointwise.<locals>.inner.<locals>.<genexpr>)r   rH   rO   rP   rQ   rS   inplace_foreach_opsrY   r^   r\   r   ra   rh   r   r   has_featureBackendFeatureFOREACHrealizeget_operation_nameregister_operation_listr   )r  r  Zrealize_outputsZa_list_inputinputZbroadcast_inputsgroupsoutputsrf   re   groupoperation_list
output_indrd   r  r=  pw_fnrW   rX   r@    sZ   
z%make_foreach_pointwise.<locals>.inner)r  rB  rW   )rR  r=  r@  rW   rQ  rX   make_foreach_pointwise  s   6rS  r   r9   torch.dtypec                   s>   |    kr|rt| S | S  fdd}t| d| S )Nc                   s   t j|  dS )N)	src_dtype)rG   r   r   r   rU  rW   rX   	_to_dtype     zto_dtype.<locals>._to_dtyper  )r   clonerA  )r   r   copyrW  rW   rV  rX   r     s
   r   r   c                 O  s   ddl m} |}| j}||tjd}t| |j|  W d   n1 s'w   Y  |j}|s3J t|}dgt	| }	|
 D ]0\\}
}}g }|D ]\}}||	|< tj|
tjri|ri|  ||  qL|rrtj| qBtdd |	D s~J |	S )aI  
    This lowers an invocation of foreach_map
    The way this works is that an arbitrary N-arg func is provided by the user, looped over by the
    polyfill with the same semantics as a foreach op (a loop applying an n-ary function to n args)
    and then traced into a subgraph by dynamo.
    This code allows us to inline the subgraph into the main graph lowering using the PontwiseSubgraphLowering.
    The graph outputs represent the vertically fused sequence of ops, and then register_operation_list
    below registers the buffers as horizontally fuseable in the scheduler.
    r'   )PointwiseSubgraphLowering)Zroot_graph_loweringNc                 s  rC  r{   rW   r   rW   rW   rX   r   
  r   z_foreach_map.<locals>.<genexpr>)subgraph_loweringr]  graph_modulerH   rO   Zset_graph_handlerrunZgraph_outputsrh   r   r   rE  rF  rG  rH  ra   rI  rJ  r   )Zsubgraphrd   r   r]  r  ZgmZpw_subgraphZsub_outputsrL  rM  rf   re   rN  rO  rP  r  rW   rW   rX   _foreach_map  s6   ra  c                 C  sZ   |j s|  j r&|  rt| |d}tj||  |S ttj	j
dd| |S t| |ddS )Nr   Fadd_to_fallback_setTr[  )
is_complexr   r   
empty_liker*   ZInplaceCopyFallbackr   fallback_handlerprimsconvert_element_typedefaultr   )r   r   dstrW   rW   rX   _convert_element_type  s   rl  rd  c                C  sb   |   }||kr|rt| S | S dd }||}||}||kr)ttjj| |S tt| |S )Nc                 S  s   | j r	t| jS t| jS r{   )is_floating_pointrm   Zfinfobitsiinfor   rW   rW   rX   _get_primitive_bitwidth#  s   z1to_dtype_bitcast.<locals>._get_primitive_bitwidth)	r   rZ  rg  atenviewr   r9   r.   r   )r   r   r[  Zx_dtyperp  Zsrc_bitsZdst_bitsrW   rW   rX   to_dtype_bitcast  s   rs  c                 C  s8   |j s|  j rttjtjjj	j
| |S t| |S r{   )re  r   r9   r   r*   ZComplexViewrm   rG   rq  rr  r   rs  r   r   rW   rW   rX   _view_dtype2  s
   
ru  r[  non_blockingrf   torch.devicec                C  s:   t |}|  |kr|rt| S | S ttj| ||S r{   )r=   r`   rZ  r9   r   r*   Z
DeviceCopy)r   rf   r[  rw  rW   rW   rX   	to_device;  s   ry  c                 C  s   t | |d|dS )NTrv  )ry  )r   rf   rw  rW   rW   rX   _device_putB     rz  Tc
                 C  s   |p| j }t|}
|rtd| }td| || t||| |dur't|}t|
|||r/|nd||	d}
t| |||d|
}
tt|rPttt|d|d|
 |
S )z3A pointwise function that maps ops.{name} to inputsZ
libdevice_N)r  r0  r/  r=  r?  r   )r   r   )__name__r4   rD   rA  r   r   rh  r   )r   r   r   r   r   r  r0  r=  use_libdevice_for_f64r?  ri   Zfn_libdevicerW   rW   rX   register_pointwiseG  sJ   



r~  c                    sx   d} t d  fdd} fdd}t|t|tjdgfdd}ttj|}tt| r:tt	t| d	d
| |S )z2A pointwise function that maps ops.frexp to inputsfrexpc                        | i |d S Nr   rW   rd   r   r  rW   rX   frexp0     zregister_frexp.<locals>.frexp0c                    r  Nr'   rW   r  r  rW   rX   frexp1  r  zregister_frexp.<locals>.frexp1rY  c                    s$    d | i | d | i |fS Nr   r'   rW   r  )pw_fnsrW   rX   ri     s   $zregister_frexp.<locals>.fnNr\  )
r4   rA  rm   int32r   rq  r  r   rh  r   )r   r  r  ri   rW   )r  r  rX   register_frexp{  s*   
r  c                 C  s   t ||d}t| |}|S )Nr=  )rS  r   )r   Zpointwise_lowering_fnr=  ri   rW   rW   rX   register_foreach_pointwise  s   
r  )r   r   c                   s  dd }t |ttfrt||}t |ttfrt||}| ||g t d  d tjd}dd t D }t|t	 fdd|D  D ]\}}| |< qFt
t D ]}t  | tjrqt | t |d	    |< qUt||d
 d	 t d |t d |S )Nc                  W  
   t j|  S r{   )rG   wherer   rW   rW   rX   ri        
zwhere.<locals>.fnr'   r%   r\  c                 S  r   rW   r   r   rW   rW   rX   r     r   zwhere.<locals>.<listcomp>c                      g | ]} | qS rW   rW   r   r   rW   rX   r     r   r   rY  )r^   r  r   constant_liker   r   r  r]   r   r   r   r   r*   r   r/   r   r\   r   rA  r   )r   r   r  ri   r   indicesrc   r   rW   r   rX   r    s&   
$
$
r  c                  G  s   t | dkrt| d ttfrt| d  S ttdd | D g }g }| D ]$}| }t |t |ks?t	dd t
||D rDt||}|| q%|S )Nr'   r   c                 S  r"  rW   )r   r   rW   rW   rX   r     r   z%broadcast_tensors.<locals>.<listcomp>c                 s  s    | ]?\}}t jjjjt|d ddo"t jjjjt|d dd p?t jjjjt|d dd o?t jjjjt|d ddV  qdS )r'   Tr  N)rH   rO   r  r  r  r   r	  r   r   r  rW   rW   rX   r     s"    


	

z$broadcast_tensors.<locals>.<genexpr>)r   r^   r\   r   r   r   reducer  r   r   r   r  ra   )r  rS   rM  r   sizesrW   rW   rX   r     s   
r   c                 C  s   | S r{   rW   r   rW   rW   rX   nop     r  
lift_freshc                 C  s   t | tsJ |d u rtt| jS t |ttjfr"tj	j
|ntdd |D }tt|  |}tt |ts=|fn|}g }t|  D ]\}}||v r]tj	j
jt|dddsb|| qH||  krnt| |S | S )Nc                 s  s    | ]
}t jj|V  qd S r{   rH   rO   r  evaluate_static_shaper   drW   rW   rX   r     r   zsqueeze.<locals>.<genexpr>r'   Tr  )r^   r9   r8   r   r_   r   r   r   rH   rO   r  r  r   r   r   r   r   r]   r  r	  ra   rr  )r   r   dims	new_shaper  srW   rW   rX   squeeze  s"   
r  c                 C  s   t t| |S r{   )rZ  r  )r   r   rW   rW   rX   squeeze_copy     r  c                 C  2   t | |}t| tsJ t|tsJ |j| _| S r{   )r  r^   r9   r_   r   r   valrW   rW   rX   squeeze_  
   
r  c                 C  2   t | rt| dtjdS td}t|tjd| S )NFr   isinfrY  r   	full_likerm   r   r4   rA  r   ri   rW   rW   rX   r       r  c                 C  r  )NFr   isnanrY  r  r  rW   rW   rX   r    r  r  c                 C  $   t | rt| S td}t|| S )Nceilr   rZ  r4   rA  r  rW   rW   rX   r  %     r  c                 C  r  )Nfloorr  r  rW   rW   rX   r  -  r  r  c                 C  r  )Nroundr  r  rW   rW   rX   r  5  s   r  c                 C  r  )Ntruncr  r  rW   rW   rX   r  >  r  r  c                 C  s   ddl m} t| g\} t| tjrt| t|S t| t	s!J t|t
tfs*J t|  t|kr6| S ||  s]tjjt|  }|dkr]||s]| tjjt||  t	t| jt|S )Nr   )free_unbacked_symbols)%torch.fx.experimental.symbolic_shapesr  r  r^   r*   r6  r/   r   r   r9   r\   r   rH   rO   r  	size_hintrE   
mark_reuser_   )r   r  r  Zx_size_productrW   rW   rX   r  F  s   r  c                 C  sL   t |}|D ]}d||< q| }t|D ]\}}|dkr t||}qt||S Nr!  )r\   r]   	unsqueezer  )r   shapeZbroadcast_dimensionsr  Zbroadcast_dimensionr   idxr   rW   rW   rX   broadcast_in_dim`  s   


r  c                 C  s   t | | S r{   )r  r   )r   r  rW   rW   rX   	expand_asn  r  r  c                   sR  t |   tt kr$tjjgtt      t| t  } tt|  ks0J t |  }d}ttD ]}| dkrHd}|| |  ||< q>|r`t|| 	 | 
 dS tdd t D rstt| |S  fdd}tjjt }|dkr| tjjt||  |  tj| 
 | 	 |t |d	S )
NFr   Tr   rf   c                 s  s$    | ]\}}|d kp|d kV  qdS r'   NrW   r  rW   rW   rX   r        " zrepeat.<locals>.<genexpr>c                   st   t | t ks
J t| } tt D ]!}| dkr5 | dkr)tjj| |< qt| | d | | |< q| S r  )r   r\   r   r   r  ZZeror$   )r  rc   Zold_sizerepeatsx_loaderrW   rX   r2    s   zrepeat.<locals>.inner_fnr3  )r\   r   r   r   r  r  rr  r   emptyr   r`   r   r   rZ  r  rH   rO   r  r  rE   r  r$  r6   r   )r   r  new_sizeZzero_tensorrc   r2  Zold_size_productrW   r  rX   repeats  s8   r  c                 C  s2   t | tsJ t |ttfsJ tt| j|S r{   )r^   r9   r\   r   r;   r   r_   )r   r  rW   rW   rX   rr    s   rr  c                 C  s6   t | tsJ t |ttfsJ tt| jt|S r{   )r^   r9   r\   r   r5   r   r_   )r   r  rW   rW   rX   permute  s   r              c              	   C  s8   t | tsJ t| |d}ttjj| j|||||dS )Nr   clamp)r^   r9   _validate_dimr*   	SliceViewr   r_   )r   r   startendstepr  rW   rW   rX   slice_  s   r  c              	   C  s   t | trt | jtjr| j } |   t| s"td|  dt	| \}}t
|j|jdd |D dd |D t|p@d}ttj||dS )Nzunrealized as_strided(z, ...)c                 S     g | ]}t |qS rW   r   r  r   r  rW   rW   rX   r         zas_strided.<locals>.<listcomp>c                 S  r  rW   r  r  rW   rW   rX   r     r  r   r_   layout)r^   r9   r_   r*   BaseViewunwrap_viewrH  is_storage_and_layoutr   as_storage_and_layoutFixedLayoutrf   r   r   r  ReinterpretView)r   r   stridestorage_offsetstorageZ
old_layoutZ
new_layoutrW   rW   rX   
as_strided  s   

r  c                 C  s$   t | tsJ t| |||j| _| S r{   )r^   r9   r  r_   )r   r   r  r  rW   rW   rX   as_strided_  s   r  c                 C  s   t | |||}t|S r{   )r  rZ  )r   r   r  r  resultrW   rW   rX   as_strided_copy  s   r  c                   s   g d}D ]} |||    f d d }qdd D  fdd}td  }d d | < tjd  d  ||dS )Nr   r!  c                 S  r"  rW   r#  r   rW   rW   rX   r     r   z!pointwise_cat.<locals>.<listcomp>c           
   	     s@  t |  tj}g }g }ttD ]n  dkr t dtjn
t   d tj}t   d tj}t ||}t ||} dkrI|}n td krT|}nt 	||}|
| t| t   d  < |
t | fddd q|d }	ttd ddD ] t |  |  |	}	q|	S )Nr   r'   c                     s     S r{   rW   rW   )rc   idx_loadinputs_loadersrW   rX   <lambda>      z1pointwise_cat.<locals>.inner_fn.<locals>.<lambda>        r!  r%   )rG   
index_exprrm   int64r   r   constantgeltand_ra   r\   r"   maskedr  )
r  idx_dimmasksZmasked_loadsr  r  Z
start_condZend_condmaskZnext_valr   r  r  Zinputs_ranges)rc   r  rX   r2    sD   
zpointwise_cat.<locals>.inner_fnr3  )ra   r   r\   r6   r   r`   r   )r  r   Zprev_endr   r2  r  rW   r  rX   pointwise_cat  s   0

r  rK  scaleszero_pointsaxis	quant_min	quant_maxc              	     s   t  dksJ dt  dksJ d|  tjkr%t| tj} |  tjks5J d|    t |  k sHJ dt |   |     f	dd}tj	| 
 ||  dS )	Nr'   expect scales 1 dimexpect zero_points 1 dim<Expecting input to have dtype torch.float32, but got dtype: Expecting axis to be < c           
        s   |   f}| }|}|}t tjd\}}jtjkr(t|tj}jtjkr5t|tj}t|}t|| | }t	|t
||}	t|	S Nr   )_create_constantsrm   float32r   rG   r   r  
reciprocalr  maximumminimum)
r  channel_idxrK  scale
zero_pointqminqmax	inv_scaler  clamped	r  r   input_loaderr  r  r  scales_loaderr  zero_points_loaderrW   rX   r2  ;  s   

z;quantized_decomposed_quantize_per_channel.<locals>.inner_fnr3  )r   r   r   rm   r7  r   r  r$  r6   r   r`   )rK  r  r  r  r  r  r   r2  rW   r  rX   )quantized_decomposed_quantize_per_channel!  s(   
r  	out_dtyper  Optional[torch.dtype]c          	        s   t  dksJ dt  dksJ d|  |ks*J d| d|    t |  k s=J dt |   d u rDtj|     fdd}tj|  ||  d	S )
Nr'   r  r  Expecting input to have dtype , but got dtype: r   c                   s   |   f}| }|}|}j tjkrt|tj}j tjkr+t|tj}tt|tj|| }t|}|S r{   )r   rm   r  rG   r   sub)r  r  rK  r  r	  r  r  r  r  r  r  r  r  rW   rX   r2  r  s   
z=quantized_decomposed_dequantize_per_channel.<locals>.inner_fnr3  	r   r   r   rm   r  r$  r6   r   r`   )	rK  r  r  r  r  r  r   r  r2  rW   r  rX   +quantized_decomposed_dequantize_per_channelT  s(   r  r  r  r	  c                   s   |   tjkrt| tj} |   tjksJ d|    |   fdd}tj|   t	j
|t|t|d|  dS )Nr  c           	        sf   | }t d| |tjd\}}t|| | }t tjd\}}tt|||}t| S )N      ?r   )r  rm   r  rG   r  r  r  r   )	r  r  r	  rK  r  r  r
  r  r  r   r  r  r  rW   rX   r2    s   
zBquantized_decomposed_quantize_per_tensor_default.<locals>.inner_fnr  r	  r3  )r   rm   r7  r   r  r$  r6   r   r`   r   r   r  r   r   rK  r  r	  r  r  r   r2  rW   r  rX   0quantized_decomposed_quantize_per_tensor_default  s   
r   c                  sv   |   |ksJ d| d|    d u rtj|    fdd}tj|  tj|t	|t
|d|  dS )Nr  r  c                   sF    | }t ||tjd\}}tt|tj|| }t|}|S r  )r  rm   r  rG   r  r   )r  r  r	  rK  r  r  r  rW   rX   r2    s
   zDquantized_decomposed_dequantize_per_tensor_default.<locals>.inner_fnr  r3  )r   rm   r  r$  r6   r   r`   r   r   r  r   r   rK  r  r	  r  r  r   r  r2  rW   r!  rX   2quantized_decomposed_dequantize_per_tensor_default  s   r#  c                   s   |   tjkrt| tj} |   tjksJ d|    t dks9t dkr5 d dks9J dt dksUt dkrQ d dksUJ d|     fdd}tj	| 
  ||  dS )	Nr  r   r'   expect scale as scalar tensor"expect zero_point as scalar tensorc                   s   | }t  dkrdnd}t  dkrdnd}jtjkr-t|tj}jtjkr:t|tj}t|t| | }t	tjd\}}t
t|||}t| S )Nr'   r   rW   r   )r   r   r   rm   r  rG   r   r  r  r  r  r  )r  rK  _scale_zero_pointr  r
  r  r  r   r  r  r  r  scale_loaderr	  zero_point_loaderrW   rX   r2    s   zAquantized_decomposed_quantize_per_tensor_tensor.<locals>.inner_fnr3  )r   rm   r7  r   r  r   r   r$  r6   r   r`   r  rW   r)  rX   /quantized_decomposed_quantize_per_tensor_tensor  s.   ""r,  c                  s   t  dkst  dkr d dksJ dt  dks8t  dkr4 d dks8J d|  |ksJJ d| d|   d u rQtj|      fdd}tj|  ||  d	S )
Nr   r'   r$  r%  r  r  c                   s    | }t  dkrdnd}t  dkrdnd}jtjkr-t|tj}jtjkr:t|tj}tt|tj|| }t|}|S )Nr'   r&  rW   )r   r   r   rm   r  rG   r   r  )r  rK  r'  r(  r  r  r  r  r*  r	  r+  rW   rX   r2  %  s   zCquantized_decomposed_dequantize_per_tensor_tensor.<locals>.inner_fnr3  r  r"  rW   r-  rX   1quantized_decomposed_dequantize_per_tensor_tensor  s.   ""r.  c           
        s  | d   jdk}|r:tdd | D r:| D ]}|  qtdd | D r1ttjg| R  \} }ttjj| |S t	| dkrFt
| d S t| d |d}t| dtjifdd	| D } d%dddd fddtfdd| D }d&fddtjrt| |S |rttj| |S fddd}d d'ddt	| |kst	| tjkrt fd d| D rtfd!dtjjD }tfd"d| D o|}tfd#d| D otfd$d| D  }	|s|	r|st| |S ttj| |S )(Nr   cpuc                 s  s$    | ]}|  tjtjfv V  qd S r{   )r   rm   int8uint8r   rK  rW   rW   rX   r   <  s    
zcat.<locals>.<genexpr>c                 s  s     | ]}t | d kV  qdS )r   N)r   r   r2  rW   rW   rX   r   C      r'   r   c                   s   g | ]}t | qS rW   r   r   r   rW   rX   r   N  r  zcat.<locals>.<listcomp>r   Union[TensorBox, ir.StorageBox]rk   	ir.IRNodec                 S  s>   t | trt | jtjr| j S | jS t | tjr| jS | S r{   )r^   r9   r_   r*   r  r  
StorageBoxr   rW   rW   rX   unwrap_tensorP  s   

zcat.<locals>.unwrap_tensorc                 S  s   t | tjot | jtjS r{   )r^   r*   ComputedBufferr_   r7   rg   rW   rW   rX   is_reduction\     zcat.<locals>.is_reductionc                   sJ   t | ttjfr | S | p$t | tjo$t fdd|  D S )Nc                 3  s     | ]} t j|V  qd S r{   )rH   rO   
get_buffer)r   readcan_fuse_reductionrW   rX   r   e  s
    
z2cat.<locals>.can_fuse_reduction.<locals>.<genexpr>)r^   r9   r*   r7  r6   r   get_read_namesr:  )r@  r;  r8  rW   rX   r@  _  s   zcat.<locals>.can_fuse_reductionc                 3      | ]} |V  qd S r{   rW   r   rg   r?  rW   rX   r   l  r   r   c                   sZ   t | rt j| dd\}}t j| S t| tt jfr# | S t| t jr+dS dS )NF)freezeT)	r*   r  r  ConcatKernelZcan_realize_into_without_copyr^   r9   r7  r6   )r   r  r   )should_lower_cat_inputr8  rW   rX   rF  n  s   
z#cat.<locals>.should_lower_cat_inputc                   s\   t | ttjfr | S t | tjsdS |  j}|  D ]}| tj	
|7 }q|S r  )r^   r9   r*   r7  r6   Zinner_fn_opcountZnum_opsrA  rH   rO   r=  )r   countr>  )op_countr8  rW   rX   rH    s   
zcat.<locals>.op_countr   r%   rR   torch._ops.OpOverloadc                 S  s   | t jjt jjfv S r{   )rq  catrj  constant_pad_ndrR   rW   rW   rX   additional_pointwise_ops     z%cat.<locals>.additional_pointwise_opsc                 3  s    | ]	}| kV  qd S r{   rW   rC  )MAX_SIMPLE_OP_COUNTrH  rW   rX   r         c                 3  s    | ]}t | V  qd S r{   )r@   )r   Zuse)rM  rW   rX   r     s
    
c                 3  rB  r{   rW   r   rF  rW   rX   r     r   c                 3  rB  r{   rW   r   rQ  rW   rX   r         
c                 3  rB  r{   rW   rC  r?  rW   rX   r     r   )r   r5  rk   r6  )rk   r   rR   rI  )r`   r5  r   rH  require_channels_lastrq  rJ  rg  rj  r   rZ  r  r   r   r  r   r(   Zforce_pointwise_catr  r9   r*   rE  r   Zmax_pointwise_cat_inputsrH   rP   rQ   )
r  r   Z
cpu_devicerK  r   Zfusable_reductionZMAX_COMPLEX_POINTWISE_CATZpointwise_usesZfuse_pointwise_useZhorizontal_fuse_catrW   )rO  rM  r@  r   r;  rH  rF  r8  rX   rJ  9  s`   




rJ  offsetdim1dim2c                   s  |   ttdtdtkfdd tjjt	|d}|rBtjj
tjj |  d}ntjj
tjj  | d}d |r`| df nd|f fddtD }||  fdd	}ttj| ||S )
N)r  rankc                     s   d  d S )Nz(diagonal dimensions cannot be identical z, rW   rW   rV  rW  rW   rX   r        zdiagonal.<locals>.<lambda>r   )r   r   c                   s    g | ]\}}| fvr|qS rW   rW   )r   rc   r  rY  rW   rX   r          zdiagonal.<locals>.<listcomp>c                   s   | d }dgt  }d}tD ]&}|kr | d  ||< q|kr-| d  ||< q| | ||< |d7 }q|t d ksBJ |S )Nr!  r   r'   r%   )r   r   )r  Zdiag_idxZoriginal_idxZcur_dimr  Zbase_idxrV  rW  Znum_dimsZoriginal_shaperW   rX   	reindexer  s   
zdiagonal.<locals>.reindexer)r   r   r   r   rH   rO   r  r  r   LtZevaluate_maxZevaluate_minr]   ra   r9   r*   GenericViewr   )rK  rU  rV  rW  Zoffset_negativeZ	diag_sizer  r]  rW   r\  rX   diagonal  s:   
r`  c                 C  s   t t| |||S r{   )rZ  r`  )rK  rU  rV  rW  rW   rW   rX   diagonal_copy     ra  c                 C  $   t | }t||||}t|| |S r{   )rZ  r`  	mutate_to)rK  srcrU  rV  rW  r  rS   rW   rW   rX   diagonal_scatter     
rf  c                 C  s,   t ||  | }tt| |||d |S r  )r;   Zhandle_negative_indexr   r  r  )r   r   r  rW   rW   rX   select  s   rh  c           
   
   C  s   t | |d}|}t|ttfs2|  | }tjjt	|| d |}|g| }||d |  |d< g }d}|D ]}|| }	|
t| |||	dd |	}q8|S )Nr   r'   r!  Fr  )r  r^   r\   r   r   rH   rO   r  r  r!   ra   r  )
r   r  r   Zsizes_x_sizechunksr  r  r   r  rW   rW   rX   split   s    
rk  c                 C  s   t | ||S r{   )rk  )r   r  r   rW   rW   rX   split_with_sizes     rl  c                   s>   t  d tjj   } fddt|D }|S )Nr   c                   s   g | ]}t  |qS rW   )rh  r   r   r   rW   rX   r   &      zunbind.<locals>.<listcomp>)r  rH   rO   r  r  r   r   )r   r   ri  r  rW   rn  rX   unbind"  s   rp  c                   s   |   }t|}t|| |dkrtt| d|dS |  }tjj}||| |	d t
|| d }||dkrK| |t|| | g |d   || d d  |}	 fdd}
ttj| |	|
S )Nr   )r  r'   c                   s:   | d |     }g | d   ||  d d R S )Nr!  r'   rW   )r  Zdim_idxr   r  rW   rX   r]  >  s   &zunfold.<locals>.reindexer)r   r   r   r  r  rH   rO   r  	guard_leqguard_ltr!   r  r  r    r9   r*   r_  r   )r   	dimensionr   r  r  ndimdim_sizer  Znew_dim_sizeout_sizer]  rW   rq  rX   unfold*  s   
(rx  c                 C  s2   t | |d}t|  }||tjj t| |S r  )r  r\   r   insertr   r  r  rr  )r   r   r  rW   rW   rX   r  E  s   
r  c                 C  r  r{   )r  r^   r9   r_   r  rW   rW   rX   
unsqueeze_M  r  rz  c                 C  sZ   t jjjt|}t|  }|dk r||| 7 }d|  kr(|| k s+J  J |S r  )	rH   rO   r  r  r  r   sympifyr   r   )r   r   rU  ru  rW   rW   rX   r  V  s    r  r!  c                 C  sT   t | |d}tjj|  | d }t| |d|}t| |||d }t|t|S )Nr   r%   )	r  rH   rO   r  r  r   r  r4  sigmoid)r   r   Znew_lenr   r  rW   rW   rX   glu_  s
   r}  c                   s$   |rt    fdd}d|_|S )Nc                    s*   dd }t |tjj g| R i |S )Nc                 S  s   t | tjrt| S | S r{   )r^   r*   r1   r9   r   r   rW   rW   rX   wrap_tensorsn  r<  z7fallback_handler.<locals>.handler.<locals>.wrap_tensors)pytreetree_mapr*   FallbackKernelr   )rd   r   r~  kernelrW   rX   handlerm  s   z!fallback_handler.<locals>.handlerT)r   r   Z_is_fallback_handler)r  rc  r  rW   r  rX   rg  i  s
   
	rg  c                   C     t d d S )NzjTorchinductor does not support code generation for complex operators. Performance may be worse than eager.)warningswarnrW   rW   rW   rX   _warn_complex_not_supported{  s   r  rg   torch.Tensorc                 C  sr   |   r|r|jtjjjjtjjjj	fv rdS t
  dS | jtjkr7|s&dS |jtjjtjj	fv p5t|j S dS )z0Do not support reading or writing to this tensorFT)re  rS   rm   rG   rq  rr  r   rh  ri  rj  r  Zfloat8_e8m0fnurJ  rA   rg   parentrU   rW   rW   rX   unsupported_input_tensor  s&   

	r  c                 C  s   t | |rdS | jotjS )z2Do not support writing tensor but can read from itT)r  Zis_cpur(   Zdisable_cpp_codegenr  rW   rW   rX   unsupported_output_tensor  s   
r  rU   torch.fx.Nodec                 C  sh   | j tjju r	dS | j tjju rdS dd }tj| ji | jD ]}||| ddr, dS q || | ddS )NFc                 S  st   t | tjjs	dS d| jvrdS t| jd D ]}t |tjjs"q|r.t	||| r- dS qt
||| r7 dS qdS )NFr  T)r^   rm   fxNoder9  r  tree_leavesZ_subclassesZ
FakeTensorr  r  )rU   r  	is_outputr9  rW   rW   rX   check_skip_condition  s   
zCfallback_node_due_to_unsupported_type.<locals>.check_skip_condition)r  T)	rS   rq  view_as_complexrj  lift_fresh_copyr  Zarg_tree_leavesrd   r   )rU   Zallow_cpu_inputsr  r   rW   rW   rX   %fallback_node_due_to_unsupported_type  s   r  c                   s   | t vs|sJ d|  |r>ttdr>t| gr>tjr%| tjj	j
v s>|s>tjjjr6dtjj_td td|  d fdd}t| tjjr]|  D ]}t| |}|| qOd S t| tjjtjjfrn||  d S td	|  d
t|  )Nz*both a fallback and a decomp for same op: CIFznA make_fallback error occurred in suppress_errors config, and suppress_errors is being disabled to surface it.zmake_fallback(a.  ): a decomposition exists, we should switch to it. To fix this error, either add a decomposition to core_aten_decompositions (preferred) or inductor_decompositions, and delete the corresponding `make_fallback` line. Get help from the inductor team if unsure, don't pick arbitrarily to unblock yourself.c                   s.   t |   d urt|   t| d dt| S Nr\  )r   r   r   rg  )op_overloadlayout_constraintrW   rX   register_fallback  s   

z(make_fallback.<locals>.register_fallbackzUnsupported fallback z with type )r,   r   osgetenvr-   r(   fallback_randomrm   _decompZdecompositions_for_rngZextra_random_decompsZ_dynamoZsuppress_errorslogwarningrs   r^   rw   r   r   r   rx   ZHigherOrderOperatorRuntimeErrorr5  )rR   r  r  Zoverride_decompr  olr  rW   r  rX   make_fallback  s>   




r  c                 C  s$   d}| D ]}|| }qt |tjdS )z
    TorchInductor offset calculation differs from PyTorch eager offset
    calculation for random ops (tl.rand vs torch.rand). In future, we should
    strive for same impl for tl.rand and torch.rand.
    r'   r   tensorrm   r  )r  Znumelr  rW   rW   rX   philox_rand_offset   s   
r  c           	        sd   t | | t j|  | |  fdd}tj| |t| d}t	| }||fS )Nc                   sV   t g tj}t g tj}t t | tj|}t ||}t | S r{   )rG   r   rm   r  r   r  rand)r  Zseed_index_exprZoffset_index_exprZrand_index_exprr  r   Zoffset_loader
random_posseed_loaderrW   rX   r2    s   zphilox_rand.<locals>.inner_fnr3  )
r*   r  FlexibleLayoutcontiguous_stridesmake_indexerr$  r6   r   r\   r  )	r   seedrU  r  rf   r   r2  Zrandom_values_nodeZoffset_noderW   r  rX   philox_rand  s&   
r  c              	   C  s.   t jrttjtjtj	j
| ||S td)Nz&should be handled in replace_random.py)r(   r  r  r  r9   r   r*   r  rq  native_dropoutrj  rs   )r   ptrainrW   rW   rX   r  3  s   r  c                 G  sj   t js|  tdksJ d|   t|dks!t|d tr%t	j
jnt	j
j}tj|| g|R   | S )Nr/  Tthis should be handled in decomps unless config.fallback_random or the device is CPUr   )r(   r  r`   rm   rf   rH  r   r^   r  rq  
bernoulli_Tensorr*   ZInplaceBernoulliFallback)r   rd   r  rW   rW   rX   r  >  s   r  c                 G  s4   t js|  tdksJ dtt| g|R  S )Nr/  r  )r(   r  r`   rm   rf   r  rZ  )r   rd   rW   rW   rX   bernoulli_pM  s   r  c                 C  s   t r{   rs   r   rW   rW   rX   _foobarV  r  r  c                 C  r  )Nz1using triton random, expect difference from eager)r  info)saltrW   rW   rX   _warn_triton_random[  r  r  c                   C  s   t tjj d S r{   )r  rH   rO   Zcreation_timerW   rW   rW   rX   warn_triton_random`  r{  r  c                  O  F   | dd d urt| i |S tjr|dd  t| i |S tdN	generatorz-should have been handled in replace_random.py)r:  fallback_rand_generatorr(   r  popfallback_rand_defaultrs   r  rW   rW   rX   r  l     r  c                  O  r  r  )r:  fallback_randn_generatorr(   r  r  fallback_randn_defaultrs   r  rW   rW   rX   randnv  r  r  c                 C  s   t |}t j| |S r{   )r*   get_stride_orderExternKernelrequire_stride_order)Zinput_tensorr  stride_orderrW   rW   rX   inductor_force_stride_order  s   
r  c                 C     t d)Nz.should be handled in fuse_seed_creation_pass()r  )rf   rW   rW   rX   inductor_seed     r  c                 C  s   t   tt| t|S r{   )r  r9   r   r*   ZRandomSeedsr=   )rG  rf   rW   rW   rX   inductor_seeds  s   r  c                   s(    fdd}t j  |g dS )Nc                   s   t   S r{   )rG   Z	load_seedget_namer  r  seedsrW   rX   r2    rX  z&inductor_lookup_seed.<locals>.inner_fnr3  )r6   r   r`   r   )r  r  r2  rW   r  rX   inductor_lookup_seed  s   r  rU  r   	list[int]r  modestrc                  s   t jrJ  dv sJ g | } tj}| }tj||| tj| |d	 |
  fdd}tj|||g | d}|  |S )N)r  r  r  c                   s"   t t g t| tjS r{   )r   rG   r  rm   r  r)  r  r  r  rW   rX   r2    s   z!inductor_random.<locals>.inner_fnr3  )r(   r  rm   r  r  r*   r  r  r  r  r$  r6   r   rH  )r   r  r  rU  r   rf   r2  r  rW   r  rX   inductor_random  s(   
r  lowhighc                  sp   t jrJ g |}tj}| }tj|||tj||d	 |
  fdd}tj|||g |dS )Nr  c              	     s6   t g t | tjt tjt  tjS r{   )rG   Z	randint64r  rm   r  r  r)  r  r  r  r  rW   rX   r2    s   z"inductor_randint.<locals>.inner_fnr3  )r(   r  rm   r  r  r*   r  r  r  r  r$  r6   r   )r  r  r   r  rU  r   rf   r2  rW   r  rX   inductor_randint  s"   
r  tb.tuple[str, sympy.Expr, sympy.Expr, sympy.Expr]c                 C  s4   |   |  d |  d |  d  |  d fS Nr!  r   )r  r   
get_strider  rW   rW   rX   _boundaries_helper  s
   

r  tuple[str, sympy.Expr]c                 C  s   |   |  d fS r  )r  r  r  rW   rW   rX   _sorter_helper  rN  r  	out_int32rightsidesortersorted_sequenceselfr  r  r  Optional[str]r  Optional[TensorBox]c          	        s   dd }|r||rd ur$|s$t tjjdd|||dS |d ur.|dkr.d|r3tjntj |   d urF  t	
 dkrY fd	d
}n
 fdd
}| }tj| ||jdS )Nc                 S  s   t j| tjS r{   )rH   rO   rE  rF  	BUCKETIZEr  rW   rW   rX   r    s    zsearchsorted.<locals>.<lambda>Frb  r  r  Tr'   c              	     sD   | }t j|td d u rd ntd u rd dS ddS )Nr   r  Zsorter_indicesrG   	bucketizer  r  )r  r  index_dtyper  r  r  Zvalues_loaderrW   rX   r2  	  s   
zsearchsorted.<locals>.inner_fnc              	     s\    }d fdd}t j|t|d u rd ntd u r(d dS |dS )Nr  r9   c                   s>   |   }tttjdd t|d d  d d D S )Nc                 s  s    | ]	\}}|| V  qd S r{   rW   )r   r  rc   rW   rW   rX   r   *	  rP  zNsearchsorted.<locals>.inner_fn.<locals>.get_flattened_index.<locals>.<genexpr>r!  )r  rG   r  r   r  operatorr   r   )r  strides)r  r  rW   rX   get_flattened_index&	  s   &z;searchsorted.<locals>.inner_fn.<locals>.get_flattened_indexr  )r  r9   r  )r  r  r  r  r  rX   r2  !	  s   	
r3  )rg  rq  searchsortedr  rm   r  r  r$  rH  r   r   r`   r6   r   r  )	r  r  r  r  r  r  Zvalidate_bucketizer2  rf   rW   r  rX   r    s>   
r  r  r  
boundariesc                  s   t   dks
J tj| tjrtj tjs(ttj	j
dd|  |dS    |  }|  |r9tjntj fdd}tj|||  dS )Nr'   Frb  r  c                   s"   | }t |t d}|S r  )rG   r  r  )r  r  r  r  r  r  r  rW   rX   r2  ^	  s   zbucketize.<locals>.inner_fnr3  )r   r   rH   rO   rE  rF  r  rg  rq  r  r  rH  r`   r$  rm   r  r  r6   r   )rK  r  r  r  rf   r2  rW   r  rX   r  B	  s&   r  c                 O  $   t tjtjj||f\}}||fS r{   )r  tree_map_onlyr*   r1   r  Zrequire_stride1r   rd   r   rW   rW   rX   require_denser	     r  c                 O  r   r{   )r  r  r*   r1   r  require_contiguousr  rW   rW   rX   r  y	  r  r  c                 O  r   r{   )r  r  r*   r1   r  rT  r  rW   rW   rX   rT  	  r  rT  c                   sH    fdd t  fddt| |D }  fdd| D }| |fS )Nc                   s   t  tjrdd  D }tj |S t  tr( fdd  D S t  tt	fr?t
 fddt D S  S )Nc                 S  $   g | ]}t |tjr|jjn|qS rW   r^   rm   SymIntrU   exprr  rW   rW   rX   r   	  s    zGconstrain_to_fake_tensors.<locals>.apply_constraint.<locals>.<listcomp>c                       i | ]}| | | qS rW   rW   r   key)apply_constraintr   fake_argrW   rX   r   	  s    zGconstrain_to_fake_tensors.<locals>.apply_constraint.<locals>.<dictcomp>c                 3      | ]
\}} ||V  qd S r{   rW   )r   r   Zf_ar  rW   rX   r   	      
zFconstrain_to_fake_tensors.<locals>.apply_constraint.<locals>.<genexpr>)r^   r*   r1   r  r  require_exact_stridesr   r   r   r\   r5  r   )r   r  meta_stride_exprr  )r   r  rX   r  	  s   
z3constrain_to_fake_tensors.<locals>.apply_constraintc                 3  r  r{   rW   )r   r   r  r  rW   rX   r   	  r  z,constrain_to_fake_tensors.<locals>.<genexpr>c                   s    i | ]\}}| || qS rW   rW   r   r  fake_kwargsrW   rX   r   	  r[  z-constrain_to_fake_tensors.<locals>.<dictcomp>)r   r   r   )rd   r   Z	fake_argsr  rW   r  rX   constrain_to_fake_tensors	  s   r  c                   sJ    fdd t  fddt|jD } fdd| D }||fS )Nc                   s^   t  tjrtjd  tjjj	}tj
 |S t  tr- fdd  D S  S )Nr  c                   r
  rW   rW   r  )r  r   fx_argrW   rX   r   	  r[  zEconstrain_to_fx_strides.<locals>.apply_constraint.<locals>.<dictcomp>)r^   r*   r1   r  r9  r  rH   rO   r  r  r  r  r   r   )r   r  r  r  )r   r  rX   r  	  s   
z1constrain_to_fx_strides.<locals>.apply_constraintc                 3  r  r{   rW   )r   r   r  r  rW   rX   r   	  r  z*constrain_to_fx_strides.<locals>.<genexpr>c                   s"   i | ]\}}| |j | qS rW   r   r   r  fx_noderW   rX   r   	  s   " z+constrain_to_fx_strides.<locals>.<dictcomp>)r   r   rd   r   r  rd   r   rW   r  rX   rq   	  s   

rq   c                   sN   fdd t  fddtt|jD } fdd| D }||fS )Nc                   s  t |tjs|S |jd }dd | D }t| }|r1|d dkr1tttt	|
 }jtjjkrF| dv rFt	|dksDJ d}|jsPtj||S d	 jtjjjjko^| d
k}t |tsfJ t	|
 dvrp|S t| rttj||S t |tr| d urt| rttj||S |r4t|
 }g }| }	tt	|
 d D ]}
tjj||
 ds|	d urtjj|	|
 dr||
 qdgt	| }d|d< d}tt	|d ddD ]A}
||
d  dkr|||
d   }|
|v rtjj||
d    drd||
< qtjj|  ds(t|   }|||
< qtj ||S t| rEttj||S t |trc| d urct| rcttj||S  fdd}t |j!tj"r||s||# rttj||S tj||S )Nr  c                 S  r  rW   r  r  rW   rW   rX   r   	  s    z=sdpa_constraint.<locals>.apply_constraint.<locals>.<listcomp>r!  r   )r   r   r   )r   r'   r%   r   r   r   r   r   r'   r%   c                   s   t jj|  d   dkS r  )rH   rO   r  r  r   r   Z	ALIGNMENTrW   rX   
is_aligned$
  s   z=sdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned)$r^   r*   r1   r9  r  r  r\   r  r   r   r   rS   rq  0_scaled_dot_product_efficient_attention_backwardrj  Zis_cudar  r  rm   rG   '_scaled_dot_product_efficient_attentionr9   Zis_aligned_realized_tensorZtry_match_insignificant_stridesZrealize_inputmaybe_get_striderH   rO   r  statically_known_equalsra   r<   r  r_   r  r  )r  r   r  Zmeta_valr  r  Zeffn_attn_fwd_biasrw  Zexpanded_dimsZmaybe_striderc   Zout_stridesr  r  )r  r  rX   r  	  s   







z)sdpa_constraint.<locals>.apply_constraintc                 3  s$    | ]\}\}} |||V  qd S r{   rW   )r   r  r   r  r  rW   rX   r   0
  s
    


z"sdpa_constraint.<locals>.<genexpr>c                   s$   i | ]\}}| d |j | qS r!  r   r   r  rW   rX   r   4
  s   $ z#sdpa_constraint.<locals>.<dictcomp>)r   r]   r   rd   r   r  rW   r  rX   sdpa_constraint	  s   {r#  )r  c                 C  sn   |}|   |  krt||   }|  | kr t||  }|  | kr3t||  }t|S t|S r{   )r`   ry  r   r   r   r  rZ  )r  re  rw  r   rb   rW   rW   rX   r[    s   r[  )memory_formatc                C  s&   t j|  |  |  t|  dS Nr3  )r6   r   r`   r   r$  r\   r   )r   r$  rW   rW   rX   rZ    s   
rZ  c                 C  s   g }t | tr+t | jtjr+| j} t | tjr'||   | j} t | tjst| } t| } |rI| j} |d d d D ]	}tj| |d} q;t| } | S )Nr!  r  )r^   r9   r_   r*   r  ra   Z
get_layoutrZ  )r   Zreinterpret_view_layoutsr  rW   rW   rX   clone_preserve_reinterpret_view  s   r&  r  c                  s(    fdd}t jt| || gdS )Nc                   s   t j| d    dS )Nr   r   rG   r  r)  r   r  r  rW   rX   ri   A  r<  ziota.<locals>.fnr3  )r6   r   r=   )lengthr  r  r   rf   requires_gradri   rW   r(  rX   iota7  s   
r+  r   r  c                   s   |   |  ks
J |  t|  d tjjtdr'| 	    tjj
d tjj| 	    tt| | 	 }|  fdd}tj|  |   |t| 	 dS )Nr   c              	     s6   t t t |   tjt tj| | S r{   )rG   r  eqr  rm   r  r  r   r  
src_loaderr  rW   rX   r2  X  s   z select_scatter.<locals>.inner_fnr3  )r   r$  r  rH   rO   r  r  r   r^  r   rr  rs  r  r  r6   r   r`   r\   )r   re  r   r  r2  rW   r-  rX   select_scatterL  s    

r/  c                   s     |  ks
J  t d    tj \t }t d  | < t	||}|  fdd}t
j   |t dS )Nr   r'   c              
     s2  dkrkrdkr| S t |  tj}t|  t|    < g }dkr?|t |t t	tj krT|t 
|t t	tj dkrs|t t t|   dtjt dtj |swJ tt j|}t | fddtrdnd}t ||| S )Nr   r'   c                         S r{   rW   rW   )src_idxr.  rW   rX   r        z1slice_scatter.<locals>.inner_fn.<locals>.<lambda>r  )rG   r  rm   r  r\   r!   ra   r  r   r  r  r,  r$   r  r   r  r  r  r   r  )r  r  r  Zsrc_valr   rv  r  r.  r  r  r   r  )r1  rX   r2  x  sR   zslice_scatter.<locals>.inner_fnr3  )r   r$  r  r   r*   r  Znormalize_start_endr\   r!   r  r6   r   r`   )r   re  r   r  r  r  Zsrc_sizer2  rW   r3  rX   slice_scatterj  s    
.
r4  c                 C  s*   t | ttfrt| dkrt| d S | S r  )r^   r\   r   r   _unwrapr   rW   rW   rX   r5    s   r5  r   rf   r  
pin_memoryc                  s  t |d tjfv d|  t | d tt tr ptjnp%t g }t tj	r6 fdd}nBt t
tfrE fdd}n3t dksZt d t
tfrlt dkrl|tt   fdd}ntjtj |d	S tjt|||d
S )Nlayout=r7  c                      t  S r{   r'  r)  r_   r   rW   rX   r2       ztensor.<locals>.inner_fnc                   r9  r{   rG   r  r)  r:  rW   rX   r2    r;  r   r   c                   s8    fdd t dkrtdS  dt S )Nc              	     sr   | |k sJ ||  dkrt |  S ||  d |  }t t t d tjt |tj | | ||S )Nr'   r%   r   )rG   r  r  r  r  rm   r  )r  r  mid)binary_searchr_   r   r  rW   rX   r>    s   z/tensor.<locals>.inner_fn.<locals>.binary_searchr   )r   rG   r  r)  r:  )r>  r  rX   r2    s   r  r3  )r   rm   stridedr^   r5  r   r  get_default_dtyper   r   r  r   ra   IntegerrH   rO   Zadd_tensor_constantr  r6   r   r=   )r_   r   rf   r  r7  r1  r2  rW   r:  rX   r    s,   *r  c                 C  s@   t | tr|d urt| |} |d urt| |} | S t| ||dS )Nr  )r^   r9   r   ry  r  )r_   r   rf   rW   rW   rX   	as_tensor  s   


rB  c                 C  s   t | tjdS r  r  r_   rW   rW   rX   long_tensor  r  rD  c                 C  s   ddl m} |tjjjtjjjd }|d usJ t|dks#J |t	t
| \}}t||| }tj||_tj| tjjjd }t|tjtjtjfrW|jjS t|S )Nr   )resolve_unbacked_bindingsunbacked_bindingsr'   r  )r  rE  rH   rO   r  r  rP   r9  r   r  iterr   r*   ZDynamicScalarregister_bufferr   register_operationr^   rm   r  ZSymFloatZSymBoolrU   r	  r   r{  )r_   rE  rF  Zbinding_symZkeypathbufferr  rW   rW   rX   _local_scalar_dense  s   
rK  c                 C     d S r{   rW   )r_   r   rW   rW   rX   _assert_scalar,  s   rM  )rf   r  c                C  rL  r{   rW   )r   r   r  r   rf   r  rW   rW   rX   _assert_tensor_metadata5  s   rN  c                   s   | t | ttfstdrjt ttfr  fdd}n"t tjr. fdd}nt dks8J 	 fdd}t
j| |t|dS )Nr   c                      t  S r{   r<  r)  r   r   rW   rX   r2  C  r;  z_full.<locals>.inner_fnc                   rO  r{   r'  r)  rP  rW   rX   r2  H  r;  r   c                   s    g S r{   rW   r)  )value_loaderrW   rX   r2  O  s   r3  )r^   r   r  r   r   r   r   r   r   r$  r6   r   r\   )
fill_valuerf   r   r   r2  rW   )r   r   rQ  rX   _full<  s    rS  c                 K  s   t t|| fi |S r{   create_tensor_liketensor_constructor)r   rR  r   rW   rW   rX   r  Z  s   r  c                   s    d d d d dd d fdd
}|S )NF)namesr   rf   r  r7  r$  c                   s   t | d u d t |d tjfv d|  t | d t|}|p#t }t|dkr;t|d tttj	fr;t|d }|D ]
}t|tj
rGJ q=dd |D }t |||S )Nnamed tensorsr8  r7  r'   r   c                 S  r  rW   r  r  rW   rW   rX   r   u  r  z5tensor_constructor.<locals>.inner.<locals>.<listcomp>)r   rm   r?  r=   r@  r   r^   r\   r   Sizer  rS  )rW  r   rf   r  r7  r$  r   r  rR  rW   rX   r@  a  s   	"z!tensor_constructor.<locals>.innerrW   )rR  r@  rW   rZ  rX   rV  _  s   rV  )rW  r   r  rf   r7  r$  c                 G  sX   t | d u d t|}t|dkr"t|d tttjfr"t|d }t|d ||||dS )NrX  r'   r   r   r  rf   r7  )	r   r=   r   r^   r\   r   rm   rY  empty_strided)rW  r   r  rf   r7  r$  r   rW   rW   rX   r  {  s   
"r  c                   s   dddddd fdd
}|S )zZ
    Shim to convert X_like(...) into X(...).  For example zeros_like() into zeros().
    NF)r   rf   r  r7  r$  c                  sj   t | d t |d tjfv d|  |d u r|  }nt|}|p%|  }t|  } |||||dS )Nr7  r8  r6  )r   rm   r?  r   r   r`   r\   r   )r   r   rf   r  r7  r$  r   creation_fnrW   rX   _constant_like  s   

z*create_tensor_like.<locals>._constant_likerW   )r^  r_  rW   r]  rX   rU    s   
rU  c                 C  s   t t| S r{   rT  rZ  rW   rW   rX   r    r;  r  c                   s   d d d d d fdd
}|S )Nr[  c                  st   t |ttfs	J t| d t|d tjfv d|  t|p#|  }|p)|  }dd |D }t	 t
|||S )Nr7  r8  c                 S  r  rW   )r   rA  r  rW   rW   rX   r     r  z7new_constant.<locals>._new_constant.<locals>.<listcomp>)r^   r\   r   r   rm   r?  r   r   r`   rS  r=   r   r   r   r  rf   r7  rZ  rW   rX   _new_constant  s   z#new_constant.<locals>._new_constantrW   )rR  ra  rW   rZ  rX   new_constant  s   rb  r[  c                C  s8   |d u r|   }|d u r|  }t|d ||t||dS Nr[  r   r`   r\  r=   r`  rW   rW   rX   	new_empty  s   re  c                C  s  t | ttfs	J t |tttd fsJ t| d t|d tjfv d|  t|p/t }|p7t	dj
}t|}td||| d}|  |jj}tj|jdgt|  d|_t |tjsbJ dd | D } |rrd	d |D ntj| }tj||| |d
|_|S )Nr7  r8  r  r   )rR  rf   r   r   )r1  c                 S  r  rW   r  r  rW   rW   rX   r     r  z!empty_strided.<locals>.<listcomp>c                 S  r  rW   r  r  rW   rW   rX   r     r  )rf   r   r   r  )r^   r\   r   r5  r   rm   r?  r   r@  r  rf   r=   rS  rH  r_   dataclassesreplacer   r*   r9  r  r  r  r  )r   r  r   r  rf   r7  Z	pointwiserJ  rW   rW   rX   r\    s0   
r\  c                C  s8   |d u r|   }|d u r|  }t||||t||dS rc  rd  )r   r   r  r   r  rf   r7  rW   rW   rX   new_empty_strided  s   rh  c                 C  s2   dd |D }t tt||jd}tj| |S )Nc                 S     g | ]	}t jj|qS rW   )rH   rO   r  r  r  rW   rW   rX   r     r   z copy_strided.<locals>.<listcomp>)r  )sortedr   r   __getitem__r*   r  r  )r   r  r  rW   rW   rX   copy_strided  s   rl  c                 K  s*   | dd usJ dt|| fi |S )Nr   z(dtype should be handled by decomposition)r:  rV  )r   rR  r   rW   rW   rX   full  s   rm  c                   s   t | tsJ | dkrt| | S | tjksJ |  tdk}t	|  | |r8t
| dg} dg|  |  fdd}tj|  |  || dS )Nr   r'   c                   sF   t | } t|   }t| dkr|g} | S ||  < | S r  )r\   rG   indirect_indexingr   )r  Z
gather_idxr   index_loaderr   r  rW   rX   ri   !  s   zgather.<locals>.fnr3  )r^   r9   	get_numelre  r   r   rm   r  r   r  r  r$  r6   r   r`   )r   r   r  Zsparse_gradrU  ri   rW   ro  rX   gather  s&   	rr  c                   s   |rJ t | tsJ t |tsJ dt| v sJ |  |  t| |  g | dd   fdd}tj| 	 |  |dS )Nr   r'   c                   s\   t | t ksJ |  d  | d  }t|d gg | d   }|S )Nz != r   )r   rG   rn  )r  Z	var_indexZ
weight_idxindices_loaderZindices_ndimr  Zweight_loaderZweight_sizerW   rX   ri   ?  s   "
zembedding.<locals>.fnr3  )
r^   r9   r  r   r$  r   r   r6   r   r`   )weightr  Zpadding_idxZscale_grad_by_freqsparseri   rW   rs  rX   	embedding2  s    rw  c                   s   t dd  D sJ ddd  D  tdd  D r"tddd t D }t|d	ks5J d
d gt  }t|t fdd|D  D ]\}}| |krXtd|||< qJ||fS )Nc                 s  s4    | ]}|d ur|  tjtjtjtjfv V  qd S r{   )r   rm   r  r  r   r1  r   rW   rW   rX   r   P  s    z.check_and_broadcast_indices.<locals>.<genexpr>z)indices must be int64, byte or bool. Got c                 S  s   g | ]
}|d ur|  qS r{   r   r   rW   rW   rX   r   U      z/check_and_broadcast_indices.<locals>.<listcomp>c                 s  s,    | ]}|d ur|  tjtjfv V  qd S r{   )r   rm   r   r1  r   rW   rW   rX   r   W  s    "zFallback for bool indicesc                 S  r   rW   r   r   rW   rW   rX   r   \  r   r   z"requires at least 1 non-None indexc                   r  rW   rW   r   r  rW   rX   r   _  r   z.Fallback when indices is on a different device)r   r   r   r]   r   r   r   r`   )r  rf   Z
valid_idxsZnew_indicesrc   r   rW   rz  rX   check_and_broadcast_indicesO  s"   
$
r{  c	              
     s   dt dd  D ]\}	}
|
|	 dkrdq
fddtD g 
tt d  d }r> nd |  |d    	f
dd}|fS )	NFr'   Tc                   s    g | ]\}}|d u r | qS r{   rW   )r   rc   r  ri  rW   rX   r     r[  z2index_output_size_and_inner_fn.<locals>.<listcomp>r   c           	   	     s  t | t ks
J t t ksJ t }g }d }r"dn|}d}td d D ]F}||kr8||7 }| d u rR|t | k sFJ || |  |d7 }q.| }|d us\J | }|tj|| |||  | d q.g || |d  }	d u r|S 	|S )Nr   r!  r'   r   wrap_neg)r   r   ra   rG   rn  )	r  rX  	new_indexfirst_tensor_indexZstart_offsetZnext_idxrc   loaderr   )
r   indexed_sizer  indices_loadersnon_consecutive_tensorsoutput_sizetensor_indicestensor_sizer~  r  rW   rX   ri     s>   

z*index_output_size_and_inner_fn.<locals>.fn)r   r]   r   )ri  r  r  r  r  r  r  r   r~  previouscurrentr  ri   rW   )r   r  r  r  r  r  r  r  r~  r  ri  rX   index_output_size_and_inner_fni  s$    


"r  c                 C  s,   t | ||\}}}tj|  |  ||dS r%  )index_impl_helperr6   r   r`   r   )r   r  r   r  r2  r   rW   rW   rX   
index_impl  s   r  c           
        s   t ttfs	J |  t|  \}t|dks J ddd D }t|d   }|  fddttD }|rQd|v rQd|vrQt	dfddttD }t
||||d ||d	\}  fd	d
}	||	 fS )Nr   z Must have at least one valid idxc                 S      g | ]}|d ur|  nd qS r{   r#  r   rW   rW   rX   r     r[  z%index_impl_helper.<locals>.<listcomp>c                   s    g | ]} | d ur| qS r{   rW   r   )r  ri  rW   rX   r     r[  z0index is out of bounds for dimension with size 0c                   r  rW   rW   r   r|  rW   rX   r     r   r}  c                   s    | S r{   rW   r  )index_inner_fnr  rW   rX   r2    r;  z#index_impl_helper.<locals>.inner_fn)r^   r\   r   r$  r{  r`   r   r   r   
IndexErrorr  )
r   r  r   r~  r  r  r  r  r  r2  rW   )r  r  r  ri  rX   r    s0   

r  c                 C  sB   zt | |ddW S  ty    |   ttjjdd| | Y S w )NTr   Frb  )r  r   rH  rg  rq  r  r  r   r  rW   rW   rX   r    s   c                 C  s   t | |ddS )NFr  )r  r  rW   rW   rX   _unsafe_index  r  r  c                 C  s   t t| |||dddS )NTFr   may_realizeindex_put_impl_rZ  r   r  r   
accumulaterW   rW   rX   	index_put     r  c                 C  s   t t| |||dddS )NFr  r  r  rW   rW   rX   _unsafe_index_put  r  r  c                 C  sB   |  |   krt||   }|rt| |}t| t|d || S r  )r`   ry  r   rd  r  )r  r  r   r  rW   rW   rX   index_put_as_masked_fill	  s
   
r  c                 C  sl   t  }t|r(|s|r(|sdnd}tjjjdd  }r$| d| }|tj_t	
tjjj| ||| | S )Nzindex put with accumulate.zdeterministic index put.stack_trace Found from : 
 )rm   $are_deterministic_algorithms_enabledr2   rH   rO   rP   r9  r:  disable_cudagraphs_reasonr*   ZIndexPutFallbackrS   )r  r  r   r  deterministicr   r  rW   rW   rX   index_put_fallback  s   r  c                 C  s   t | |||dddS )NTr  r  r  r  r   r  rW   rW   rX   
index_put_!     r  c                 C  s   t | |||dddS )NFTr  r  r  rW   rW   rX   _unsafe_index_put_(  r  r  c              
     s  |r!dd }dd  || |  v r!t fdd|D s!|  | dkrZt|dkrZ|d  tjtjfv rZ|d }t	t|
 t| 
 D ]}t|d	}qJt| |g||S t ret| |||S |D ]}	|	d ur|	 tjtjfv rt| |||  S qg| 
 t}
|rt|  r|
dkrt| dg} t| |||} |
dkrt| g } | S t||  }zt||  \}}W n ty   t| ||| Y S w d
d |D }t| tsJ |   |
dkrt| dg} t||d  
 }fddt	t|D }t|||||d |d\}}t||}tj|  |  | |||r(dnd d}tjd t| |d}tj !||_"tj #| |
dkrNt| g } | S )Nc                 S  sP   t | tjr	| j} t | tjr|  } t | tjr| j} t | tjr&|  S d S r{   )	r^   r*   r9   r_   r  r  r7  ZBufferr  r   rW   rW   rX   try_get_name2  s   z%index_put_impl_.<locals>.try_get_namec                 S  sd   t | tr0t | jtjr0| j } t | tjo/t | jtjo/t| jdd o/| jj	j
tjjjjkS dS )Nr  F)r^   r9   r_   r*   r  r  r7  r  r   r  rS   rm   rG   rq  randpermrj  )indicerW   rW   rX   indice_slice_from_randperm;  s   
z3index_put_impl_.<locals>.indice_slice_from_randpermc                 3  rB  r{   rW   )r   r  )r  rW   rX   r   I  rR  z"index_put_impl_.<locals>.<genexpr>r'   r   r!  c                 S  r  r{   r#  r   rW   rW   rX   r   }  r[  z#index_put_impl_.<locals>.<listcomp>c                   r  rW   rW   r   r|  rW   rX   r     r   r  
atomic_addrf   r   r2  r1  output_indexerZscatter_moder   r  r_   )$rA  r   rH  rq  r   r   rm   r   r1  r   r   r  r  r  r  rB   rr  r   r{  r`   r   r^   r9   r\   r  r  r*   Scatterr$  r9  MutationLayoutSHOULDREMOVErH   rO   rH  r   rI  )r  r  r   r  r   r  r  r  r   r  Zx_ndimr  r  r  r  Zexpected_vals_sizer2  scatterrJ  rW   )r  ri  rX   r  /  s   	




r  rb  c                   sT   t | |ddd\}}  |   fdd}tj|  |  ||dS )NFr}  c                   sB   j tjkrt tj}n }t| fddS )Nc                     s    S r{   rW   rW   )_unsafe_index_fnr  self_loaderrW   rX   r    r  z8_unsafe_masked_index.<locals>.inner_fn.<locals>.<lambda>)r   rm   r   rG   r   r  )r  Zmask_valr  fillr  Zmask_loaderr  r  rX   r2    s   z&_unsafe_masked_index.<locals>.inner_fnr3  )r  r$  r6   r   r`   r   )r  r  r  r  r1  r   r2  rW   r  rX   _unsafe_masked_index  s   r  c                   s@   t ||d}|   fddtt D }t| ||ddS )Nr   c                   s6   g | ]} | rt  | |  | d  ndqS r  r  r   r  r  rW   rX   r     s    (z7_unsafe_masked_index_put_accumulate.<locals>.<listcomp>T)r  )r  r   r   r   r  )r   r  r  r   Zmasked_valueZclamped_indicesrW   r  rX   #_unsafe_masked_index_put_accumulate  s   
r  c                 C  s   t |t || S r{   )rG   r  r  r   minmaxrW   rW   rX   r       r  c                 C  rc  r{   )rZ  r  copy_)r  re  r   r  r  r  Zoutput_viewrW   rW   rX   as_strided_scatter  rg  r  c                 K  s   t t| |||fi |S r{   )scatter_rZ  )r   r   r  re  r   rW   rW   rX   r       r  r  include_selfr  rI  r  r  c             	   C  sf   t |t}t| || ttj|r| nt||r| jnd|r1t	j
| ||||||d |S d S )Nznot implr  )r^   r9   rF   r   r   rm   r   r5  r`   r*   ZScatterFallback)r  r  r   r  re  r  r  Zsrc_is_tensorrW   rW   rX   scatter_fallback  s(   

	r  r  c                C  sr   |dv sJ |d u r$t tjtjjjj}t|| ||||d}|d ur$|S |dkr+d}n|dkr1d}t	| ||||S )N)Nr   multiplyr  r   sumr  prod)
r   rq  r  rH   rO   rP   rS   Z_overloadnamer  scatter_reduce_)r  r   r  re  r  r  fallback_resultrW   rW   rX   r    s   r  c                 C  s   t t| |||S r{   )scatter_add_rZ  r   r   r  re  rW   rW   rX   scatter_add  rb  r  c                 C  s   t | |||dS )Nr  )r  r  rW   rW   rX   r  $  r{  r  c                 K  s   t t| ||||fi |S r{   )r  rZ  )r   r   r  re  reduction_typer   rW   rW   rX   scatter_reduce)  s   r  )r  c             	     s,  |dv sJ t tj dkrdtj v sJ dttr$tttjj |||d}|r5|S tt	s<J dt
| v sFJ t  }|dkrVtdgtt	rit  dkritdgt|t	r|t | dkr|t|dg}| dkrS t    | tt	r nd  fdd	}fd
d}	dd }
|stj  fdd| |d d}tjd t|d}tj||_tj| tj  |	| ||
|d}tjd t|d}tj||_tj| |dkrtg S )N)Nr  r  meanamaxaminr'   twozKaten.scatter_reduce_.two is not the unique overload of aten.scatter_reduce_r  r   r   c                   sD     }t|}t| }tj| |dkrdn|  dd| < |S )Nr   r'   F)r~  )r   r   r\   rG   rn  )r  r  ru  Zindirect_idx)r   rp  r  rW   rX   r  \  s   
z'scatter_reduce_.<locals>.output_indexerc                   s   r| S t   S r{   rG   r  r   r  )r  re  r.  rW   rX   ri   f  s   zscatter_reduce_.<locals>.fnc                 S  s   | dkrdS | d u sJ d S )Nr  r  rW   r  rW   rW   rX   backend_reduce_strm  s   z+scatter_reduce_.<locals>.backend_reduce_strc                   s   t d  S r  r  r)  )r  rW   rX   r  z  rZ  z!scatter_reduce_.<locals>.<lambda>r  r  )r   rq  r  r   r^   r   r  r  r  r9   r  r   r   rr  rq  r  rH  r$  r*   r  r`   r9  r  rH   rO   rH  r   rI  )r  r   r  re  r  r  r  ru  r  ri   r  Zzero_outrJ  r  rW   )r   rp  r  re  r.  rX   r  .  s   







r  scales_xtuple[Optional[float], ...]nexactc           
        s   |    |  |   d  |  d   }dd D t|ks)J |}dd t|D t|D ]\}}|d urGd| |< q9 fddfdd}	tj|  | 	 |	g ||d	S )
Nc                 S  ri  rW   r  r   rW   rW   rX   r     r   z&upsample_nearestnd.<locals>.<listcomp>c                 S  s   g | ]\}}|| qS rW   rW   )r   rc   orW   rW   rX   r     ro  r  c                   s\   t | tj}  rt | t dtj} t | t |tj} t | tj} t j	| |ddS )N      ?Fr  )
rG   r  rm   r  r   r  r4  r   r  rn  )r   r  r   )r  rW   rX   scale_fn  s   z$upsample_nearestnd.<locals>.scale_fnc                   sB   |  d  }| d   }g |fddt | D S )Nc                   s   g | ]\}}} |||qS rW   rW   )r   rc   r  r   )r  rW   rX   r     r   z2upsample_nearestnd.<locals>.fn.<locals>.<listcomp>)r   )r  r   r  )i_sizes
inv_scalesr  r  r  rW   rX   ri     s
    zupsample_nearestnd.<locals>.fnr3  )
realize_hintr$  r   r   r   r]   r6   r   r`   r   )
r   r  r  r  r  batchZo_sizesrc   r  ri   rW   )r  r  r  r  r  r  rX   upsample_nearestnd  s(   
r  Optional[float]c                 C  s   t | ||fddS )Nr'   r  r  r   r  r  rW   rW   rX   upsample_nearest1d  rb  r  c                 C  s   t | ||fdddS )Nr'   Tr  r  r  r  rW   rW   rX   _upsample_nearest_exact1d  r  r  scales_hscales_wc                 C  s   t | |||fddS )Nr%   r  r  r   r  r  r  rW   rW   rX   upsample_nearest2d  s   r  c                 C  s   t | |||fdddS )Nr%   Tr  r  r  rW   rW   rX   _upsample_nearest_exact2d  s   r  scales_dc                 C  s   t | ||||fddS )Nr   r  r  r   r  r  r  r  rW   rW   rX   upsample_nearest3d  s   r  c                 C  s   t | ||||fdddS )Nr   Tr  r  r  rW   rW   rX   _upsample_nearest_exact3d  s   r  c                   s   t  fdd|D S )Nc                 3  s    | ]	}t | V  qd S r{   r<  r   r   rW   rX   r     rP  z$_create_constants.<locals>.<genexpr>)r   )r   rd   rW   r   rX   r    s   r  c                   s:   |   |   fdd}tj|  |  |dS )Nc                   sF   t | } t| tksJ  D ]}| d | |  | |< q| S r  )r\   r   )r  r   r  r  r  rW   rX   r    s
   zrev.<locals>.loaderr3  )r$  r   r6   r   r`   r   )r   r  r  rW   r  rX   rev  s   r  paddingSequence[int]rR  c                 C  s  dd }| r	dS t |dkst |  dkrdS |   t| tjrBt| jtjrBt| jjtjs=t	j
rBt| jjtjrB| jjjsDdS |   t| \}}|j}|d dkrZdS |d dksl|d dksl|d dkrndS |d }|dkrxdS |d }|jd }	||	| k rdS | jjj}
|jd |jd | g}|tjj|
< t| ||j|j}t|d|	|	| d	}t|| td
 d  d7  < |S )z
    This optimization changes the semantics of padding from 'clone'
    style to 'view' style.

    Thanks to functionalization, this change can still maintain numerical
    correctness.
    c                  S  sL   t jj} | du r
dS t| j}t|dkr$|d jtjj	tj
j	fv r$dS dS )a  
        Conservatively check if padding can be fused with downstream op.
        1. if the downstream op is a sum, then there is little benefit to
           do inplace padding
        2. if the downstream op is a matmul, doing inplace padding can
           save membw.
        NTr'   r   F)rH   rO   rP   r   rQ   r   rS   rq  mmrj  Zaddmm)rP   rQ   rW   rW   rX   _padding_can_be_fused#  s   
z6inplace_constant_pad_nd.<locals>._padding_can_be_fusedNr   r%   r'   r   r   )r   r  r  inductorinplace_padding)r   r   rH  r^   r*   r9   r_   r7  r9  r(   Zcan_inplace_pad_graph_inputZInputBufferr   Zfreeze_layoutr  r  r   rH   rO   Zbuffer_to_padded_sizer  rU  r  fill_r   )r   r  rR  r  r   r  r  ZnpadZstride0ZrowsizeZbufnameZpadded_sizeZ	resized_xZsliced_xrW   rW   rX   inplace_constant_pad_nd  s\   

$


r  c              	     st  t |d dks
J tdd |D rt| S tjr$t| |}|r$|S |  }tttt	|d d d |dd d  t |t   g  D ]\}}
tjj||f qIt|d  }g t	 |d  D ]\\}}	}

|
 |
t|
| |	  qmt |t |ksJ t|   fddfdd	}|  tj|  |  ||d
S )Nr%   r   c                 s  r   r   NrW   r   r  rW   rW   rX   r   z  r   z"constant_pad_nd.<locals>.<genexpr>r'   c                   s~   g }t  d  D ]\}\}}}|dkr|t|d |dkr+|t|| qttj|}t| fddS )Nr   c                     r0  r{   rW   rW   )r  r  rW   rX   r    r2  z/constant_pad_nd.<locals>.mask.<locals>.<lambda>)	r   ra   range_mask_lowrange_mask_highr   r  rG   r  r  )r  r  r  r  r  r)  )boundsrR  
mask_sizesr  r  r)  rX   r    s   "zconstant_pad_nd.<locals>.maskc                   sZ   t | d  }t| d   D ]\}\}}|||  qt|t| ks)J |S r{   )r\   r   ra   r   )r  r  r  r  Z_high)bounds_precompr  r  rW   rX   	offset_fn  s
   z"constant_pad_nd.<locals>.offset_fnr3  )r   r   rZ  r(   r  r  r   r\   r  r   ra   rH   rO   r  Zlookup_precomputed_sizer   r  r   r   r$  r6   r   r`   )r   r  rR  rb   r  lhr  r  r  r   r  rW   )r   r  rR  r  r  r  r  rX   rK  w  s:   *

rK  rc   
sympy.ExprUnion[sympy.Expr, int]c                 C  s&   t t | tjt t|tjS r{   )rG   r  r  rm   r  r   rA  )rc   r  rW   rW   rX   r    s   r  c                 C  s    t t | tjt |tjS r{   )rG   r  r  rm   r  )rc   r  rW   rW   rX   r    s   r  c                 C  s   t t| |t| |S r{   )rG   r  r  r  )rc   r  r  rW   rW   rX   
range_mask  s   r  r  c                   sF       d   pdg   fdd}|S )Nr   c                   s|   | d   |  d   t tj fddtD }r1t| fddS t| 	fddS )Nc                   s.   g | ]}t |  | |  |  qS rW   )r  r   )r  ih	padding_hrW   rX   r     s   . z=constant_boundary_condition.<locals>.load.<locals>.<listcomp>c                     s   t  dg S )Nr   )constant_boundary_conditionrW   )r   r	  pad_fill_valueprefixr   rW   rX   r    s    
z;constant_boundary_condition.<locals>.load.<locals>.<lambda>c                     s   g  S r{   rW   rW   )r	  r  r  rW   rX   r    rZ  )r   r  rG   r  r   r  )r  r  r   rR  r  r  r  r
  r   r  )r	  r  rX   r(    s   	z)constant_boundary_condition.<locals>.load)r   r$  )r   rR  r  r  r   r(  rW   r  rX   r    s
   r  c                 C  s   t | d||   || d  || d  || }|r|t | d||   || d  d|| d   || }tjj|d ||  |  ||  dkra|d8 }tjjd|||  |  ||   tjj|| dkrztjj|| d}||fS |}||fS )Nr%   r'   r   F)r!   rH   rO   r  r  rr  r
  )r   rc   kernel_sizer  r  	ceil_modeZx_outZx_altrW   rW   rX   pooling_size  s    ,0*$r  c                 C  s4   t | d} | d | d  }|dkptdd |D S )Nr%   r   r'      c                 s      | ]}|d kV  qdS r  rW   r  rW   rW   rX   r     r   z:should_fallback_max_pool2d_with_indices.<locals>.<genexpr>)rC   r   )r  dilationwindow_sizerW   rW   rX   'should_fallback_max_pool2d_with_indices  s   
r  assert_fallbackc                C  s   |dkrddg}|dkrddg}|s|}t |d}t |d}t |d}t |d}t| ts/J t|dks7J t|dks?J t|dksGJ t|dksOJ t|  dv sYJ t||}|d urh||kshJ |||||fS )Nr   r'   r%   r  )rC   r^   r9   r   r   r  )r   r  r  r  r  r  Zuse_fallbackrW   rW   rX   max_pool2d_checks  s(   




r  c              
     s:  |    |  ^ }}}t|d||\}	}
t|d||\}}| j}|tju r,dn|jr3tdnt|j	}t
||	|g }d sMd sM|
sM|rUt| |ddn|  d  fdd}tjd	| |  |||||d
}tjd| |  tj||||d
}t|jjtr|  t|jjtr|  ||fS )Nr   r'   F-infr%   r  c                   sH   | d   }|  d    fddt D }g ||S )Nc                   s,   g | ]} | |  |  |  qS rW   rW   r   )bhr  reduction_idxr  rW   rX   r   ;     , z>_max_pool2d_with_offsets.<locals>.fn_inner.<locals>.<listcomp>r   )r  r  r  r	  r   r  r  r  )r  r  rX   fn_inner8  s   z*_max_pool2d_with_offsets.<locals>.fn_innerr  )r  
input_noderf   	dst_dtyperU  r2  r1  reduction_rangesargmax)r  r   r  r   rm   r   rm  r  ro  r  r\   r  r$  r7   r   r`   r  r^   r_   rH  )r   r  r  r  r  r  r  r  wh_out
ceil_mode1w_out
ceil_mode2r   Z	min_valuer  r!  r  offsetsrW   r   rX   _max_pool2d_with_offsets  sP   


r,  c           	      C  st   t | ||||dd\}}}}}tjdd t| |||||\}}|t|tjfW  d    S 1 s3w   Y  d S )NFr  r  )unroll_reductions_threshold)r  r(   r   r,  r   rm   r0  )	r   r  r  r  r  r  r   r  r+  rW   rW   rX   #_low_memory_max_pool2d_with_offsets\  s&   
	$r.  c                   sH   |   fdd  fdd}tj|  tj||  d}|S )Nc           	        sf   t  tj}t |d  d  tj}t |d  d  tj}||  }|| }|| | S r  )rG   r  rm   r  )	h_incw_incr  bww_inZhbaseZwbaser	  iw)input_widthr  r  rW   rX   increments_to_index  s   zF_low_memory_max_pool2d_offsets_to_indices.<locals>.increments_to_indexc                   sP   | ^ }}}g |||}t tj}|| }|||  } ||||S r{   rG   r  rm   r  )r  r  r  r1  rU  Zkw_constr/  r0  )r5  kernel_widthoffsets_loaderrW   rX   offsets_to_indices  s   zE_low_memory_max_pool2d_offsets_to_indices.<locals>.offsets_to_indicesr3  )r$  r6   r   r`   rm   r  r   )r+  r7  r4  r  r  r9  r  rW   )r5  r4  r7  r8  r  r  rX   )_low_memory_max_pool2d_offsets_to_indices{  s   r:  c           
      C  sx   t | ||||\}}}}}tdd |D r t| |||||dS t| |||||\}}t||d | jd ||}	||	fS )Nc                 s  r  r  rW   r  rW   rW   rX   r     r   z*max_pool2d_with_indices.<locals>.<genexpr>)r  r!  )r  r    fallback_max_pool2d_with_indicesr,  r:  r  )
r   r  r  r  r  r  r   rb   r+  r  rW   rW   rX   max_pool2d_with_indices  s   	
r<  c                   sN  dkrddg|dkrddg}st |tsJ tdks#J tdks+J tdks3J t|dks;J t| dv sEJ |   |  }t |trzt |jjtrz|jj}	tj	d tj
|	 |	 |	 d|	d}
|
  |
 }n| }|d ur|d dkp|d uo|d dk}tdd |D rt| ||||S | ^ }}
|  ^ }| |   t| }tfd	dtd d D tfd
dtd d D 		 }|dkrt| ||||S |  	
fdd}tj|  |  ||d}|r%tj|S |S )Nr   r'   r%   r  )rf   r   r   r  c                 s  s    | ]}|d kV  qdS r  rW   r  rW   rW   rX   r     r   z3max_pool2d_with_indices_backward.<locals>.<genexpr>c                 3  <    | ]}t |d   t d | d   d    dV  qdS r   r'   Nr  r   r  r  r  rW   rX   r     
    *
c                 3  <    | ]}t |d   t d| d   d    d V  qdS r'   r   Nr?  r   r&  rA  rW   rX   r     rB  r  c                   sV  | ^ }}}t |
 | tj}|d  }|d  }t t|d  d  d tj}t t|d  d  d tj}t t|d d tj}t t|d d tj}t |t dtj}t |t dtj}t |t tj}t |t tj}d }	tD ]}
t	D ]}t 	|t |
tj}t 	|t |tj}g |t j
t |t |t dtjd ddt j
t |t |t dtjd dd}|} |}t ||}|	d u rt ||t dtj}	qt t t ||t |||}t |t 	|	||	}	qq|	d us)J |	S )Nr   r'   Fr  r!  r  )rG   r  rm   r  r!   r  r  r  r   r   rn  r  r,  r  r  r  r  )r  r  r  r&  Z
index_testphstartpwstartphendpwendgradientph_pw_phpwZ
grad_indexZindex_actualZ	grad_partr   r  grad_loaderh_window_sizert  Zindices_sizer  r  pooled_heightpooled_widthr  w_window_sizewidthrW   rX   ri     sl     


#z,max_pool2d_with_indices_backward.<locals>.fnr3  )r^   r9   r   r   r  r   r_   r6   r*   r9  r  r`   r   Zdecide_layoutr  r   )fallback_max_pool2d_with_indices_backwardr$  r\   r  r   r   r  rT  )grad_outputr   r  r  r  r  r  r  Z	gO_strider_   Zx_bufferZx_strideZis_channels_last_batch_heightr   r  r  ri   rb   rW   rP  rX    max_pool2d_with_indices_backward  s|   	
 ;r[  r  c                   s   |    fdd}|S )Nc              
     s   |\|\ |\}}t t t   tjt |tjt t  tjt |tj}t | fddS )Nc                     s   g    S r{   rW   rW   )h_start_indexr	  r3  r  w_start_indexr  rW   rX   r  m  ry  z3pad_adaptive_loader.<locals>.load.<locals>.<lambda>)rG   r  r  r  rm   r  r  )r  Z
incrementsZstart_indicesZend_indicesh_end_indexw_end_indexr  pad_valr  )r\  r	  r3  r  r]  rX   r(  [  s$   z!pad_adaptive_loader.<locals>.loadr#  )r   ra  r(  rW   r`  rX   pad_adaptive_loaderX  s   rb  c           
      C  sL   t j| ||d}t j|||d}t j| ||d}t j|||d}	||||	fS )N)out_diminp_dim)r   r   )
start_index	end_indexh_inr2  r'  r)  r\  r^  r]  r_  rW   rW   rX    compute_indices_adaptive_poolingt  s
   rh  c                   sD   |\}}|\}}	t | |||||	\  fdd}
|
S )Nc                   s   | ^ }}}|} |}|}|}d }	t td td D ]\}
}|||
|g||g||g}|	d u r>|}	q&||	}	q&|	S r  )r   productr   )r  r  r  r  r1  r\  r^  r]  r_  r  r	  r3  r  h_end_index_fnh_start_index_fnkernel_maxes
pooling_fnw_end_index_fnw_start_index_fnrW   rX   ri     s"   $z _adaptive_pooling_fn.<locals>.fnrh  )re  rf  rm  in_sizes	out_sizesrn  rg  r2  r'  r)  ri   rW   rj  rX   _adaptive_pooling_fn~  s   rt  c           
        sF   |\}|\}}t | ||||\  fdd}	|	S )Nc                   s   | ^ }}}|} |}|}|}d }	d }
t td td D ]>\}}||||g||g||g}t||  | | tj}|
d u rO|}
ntt||	||
}
|	d u ra|}	q(||	}	q(|
S r  )	r   ri  r   rG   r  rm   r  r  gt)r  r  r  r  r1  r\  r^  r]  r_  maxvalmaxindexr	  r3  r  r  rk  rl  rm  rn  ro  r2  rp  rW   rX   ri     s0   $z)_adaptive_pooling_fn_with_idx.<locals>.fnrq  )
re  rf  rm  rr  rs  rn  rg  r'  r)  ri   rW   rx  rX   _adaptive_pooling_fn_with_idx  s   #ry  c                   s    tjkrtdttsJ t|dksJ    ^ }}}t	j
j|}t	j
j|}|\}}||krD||krDtS |dksL|dkr_g |||}t|   dS || dkrx|| dkrx|| || g}t|S t|| d |}	t|| d |}
t|||g }  }|	|
 }|dkrt|S dd }d	d
 }t|||	|
g||g||gtjd tt fdd}tj |||d}|S )Nz0'adaptive_avg_pool2d' not implemented for 'Long'r%   r   r  r'   r  c                 S     t | | |S r{   r!   r  rc  rd  rW   rW   rX   re       z)_adaptive_avg_pool2d.<locals>.start_indexc                 S     t | d | | d |S r  r{  r|  rW   rW   rX   rf  
  r<  z'_adaptive_avg_pool2d.<locals>.end_indexre  rf  rm  rr  rs  rn  c                   s   t  | t | S r{   )rG   truedivrb  r  fn_sumZones_loaderr   rW   rX   ri     s   z _adaptive_avg_pool2d.<locals>.fnr3  )r   rm   r  r  r^   r9   r   r  r   rH   rO   r  r  rZ  r  r`   
avg_pool2dr<   r\   fallback_adaptive_avg_pool2drt  rG   r   rb  	ones_liker6   r   )r   r  r  rg  r2  r'  r)  o_sizer  h_kernel_maxw_kernel_maxr  r   r  re  rf  ri   rvrW   r  rX   _adaptive_avg_pool2d  sV   

	r  c                   s    tjkrtdttsJ t|dksJ    ^ }}}t	j
j|}t	j
j|}|\}}|dks@|dkr]g |||}t|   dt|tj dfS || dkrk|| dkrktt|| d |}t|| d |}	t|||g }
  }||	 }|dkrt|S dd }d	d
 }t||||	g||g||gtjdt||||	g||g||gtjd fdd} fdd}tj |||
d}tj tj||
d}||fS )Nz,adaptive_max_pool2d not implemented for Longr%   r   r  r'   r  c                 S  rz  r{   r{  r|  rW   rW   rX   re  Q  r}  z(adaptive_max_pool2d.<locals>.start_indexc                 S  r~  r  r{  r|  rW   rW   rX   rf  T  r<  z&adaptive_max_pool2d.<locals>.end_indexr  c                       | t tdS Nr  rb  r  r  )inner_func_max_valr   rW   rX   inner_fn_max_vali  rN  z-adaptive_max_pool2d.<locals>.inner_fn_max_valc                   r  r  r  r  )inner_func_max_idxr   rW   rX   inner_fn_max_idxl  rN  z-adaptive_max_pool2d.<locals>.inner_fn_max_idxr3  )r   rm   r  r  r^   r9   r   r  r   rH   rO   r  r  r  r`   
ValueErrorr<   r\   fallback_adaptive_max_pool2drt  rG   r  ry  r6   r   )r   r  r  rg  r2  r'  r)  r  r  r  r  r   r  re  rf  r  r  r  rirW   )r  r  r   rX   adaptive_max_pool2d,  sp   
		r  c                   sP      t  d    fdd}|S )Nr'   c                   s   g | d  }t | }t   }t || | t ||  }t |tj}t |t d tj}t ||t  tjS r  )	rG   r  r   r  r   rm   r  r  r  )r  rc   sampleZi_exprZ
alpha_exprZseq_ir  r  r   in_sz	kernel_szndimsout_szsamplesZsamples_loaderrW   rX   r(    s   z)_fractional_pooling_offsets.<locals>.load)r#   r$  )r  r  r  r  r   r  r(  rW   r  rX   _fractional_pooling_offsets  s   r  c                   s   |    |  ^ }\}}|\}}|| dkr!t| ||S tjt|g|dd}	|	dd |	dd|   fdd}
t|||g }tj	| 
 |  tj|
d	d
|d}tj	| 
 tjtj|
dd
|d}||fS )Nr  r%   )r  r  r  r  r  r   r  r'   c              	     s   | ^ }}}t  ||}t ||}d }d }ttd td D ]I\}	}
g |||	 ||
 }|rft ||	  | |
 tj}|d u rT|}nt t 	t 
||t |||}|d u rm|}q*t ||}q*|rx|S |S r  )rG   rn  r   ri  r   r  rm   r  r  or_ru  r  r  )r  return_indexr  r  r1  r\  r]  rv  rw  r	  r3  r  r  Z
h_index_fninp_hinp_wr  Z
w_index_fnr  rW   rX   ri     s,   $z!fractional_max_pool2d.<locals>.fnF)r  r3  T)r  r   fallback_fractional_max_pool2dr   r   r  r$  r\   r6   r   r`   r   rm   r  )r   r  r  Zrandom_samplesr  kernel_hkernel_wr'  r)  Zgen_offsets_for_dimri   r  r  r  rW   r  rX   fractional_max_pool2d  sD   
	
r  c                   s       ^ }}}tjj|}tjj|}|^ }}}	|| dkr9||	 dkr9t|| ||	 gddS t||}
t||	}dd fdd}t||
|g||g||	gt	j
d  fd	d
}tj  |t|d}|S )Nr   r'   )divisor_overridec                 S  s   t | | t|S r{   )r    r   r{  r|  rW   rW   rX   re    rN  z0upsample_nearest2d_backward.<locals>.start_indexc                   s    | d ||S r  rW   r|  )re  rW   rX   rf    rX  z.upsample_nearest2d_backward.<locals>.end_indexr  c                   s    | t S r{   )rb  r  )r  r   rW   rX   ri     r}  z'upsample_nearest2d_backward.<locals>.fnr3  )r  r   rH   rO   r  r  r  r<   rt  rG   r   r6   r   r`   r   r\   )r   r  Z
input_sizer  r  rY  r  r  Zout_hZout_wr  r  rf  ri   r  rW   )r  re  r   rX   upsample_nearest2d_backward  s6   

	r  rW   c              
   C     t | ||||||ddS )Nr%   r  _avg_poolndr   r  r  r  r  count_include_padr  rW   rW   rX   r       
r  c              
   C  r  )Nr   r  r  r  rW   rW   rX   
avg_pool3d/  r  r  c              	     s  
s
sdg t t 

t t| ts!J tks)J t
ks1J tks9J t|  d d fv sIJ |   |  d   }|   d  t 
fddtD  \}	}
tszt|
rt	| ddd}n| 
 d	}t|t|	 }|  ttj}|d
krdkrt}ndkrt}ntd || 
 |S 
fdd|r|r|r|n|jrd 		fdd}nfdd}n
f	dd}tj|  ||d}|S )Nr   r'   r%   c              	     s"   g | ]}t | | qS rW   )r  r   )r  r  r  r  r  rW   rX   r   b  s    z_avg_poolnd.<locals>.<listcomp>r  r  TFr  r   zUnknown dim: c                   s   | d   }|  d   d }t jfddtD  D ]% fddtD }|g ||}|d u r=|}qt||}q|S )Nc                   s   g | ]}t  | qS rW   r  r   )r  rW   rX   r     ro  z/_avg_poolnd.<locals>.fn_sum.<locals>.<listcomp>c                   s,   g | ]} | |  |  |  qS rW   rW   r   )r  r	  r  r  rW   rX   r     r  )r   ri  r   rG   r   )r  r  r  totalr   r  )r   r  r  r  )r  r	  rX   r    s    z_avg_poolnd.<locals>.fn_sumc                   s   t | t  S r{   )rG   r4  r  r  )r   r  r  r  rW   rX   ri     r<  z_avg_poolnd.<locals>.fnc                   s   t | t  S r{   )rG   truncdivr  r  )divisorr   r  r  rW   rX   ri     r  c                   s   |  d  }g }t D ]<}|| |  |  }t||  | |  } s;t|d}t|| }t|| tj}|| qt	
tj|}jr]t| |S t| |S r  )r   r   ZMinMaxrG   r  rm   r  ra   r   r  r4  rm  r  r  )r  r  Zdivide_factorsrc   hstarthendfactordivide_factor)	r  r   r   r  r  r  r  r  r  rW   rX   ri     s    r3  )rC   r^   r9   r   r   r  r   r   r   r  r$  r\   r   r   r  r  r4  fallback_avg_pool2dfallback_avg_pool3dr  rm  r6   r   r`   )r   r  r  r  r  r  r  r   r  r'  Z
ceil_modeshad_paddingr  r  fallbackri   r  rW   )r  r  r   r  r   r  r  r  r  r  r  r  rX   r  E  st   




 
r  c                   s  d u sdksJ dssddgt | tsJ t |ts$J tdks,J tdks4J tdks<J t| dv sFJ |   | ^ }td|\}	}
td|\}}|  d pwd pw|
pw||  ^ }	
t| }| }t	fddt
d d D t	fddt
d d D  }|d	krt| ||S fd
d  	
fdd}tj|  |||d}|S )Nr   divisor must be not zeror%   r  r'   c                 3  r=  r>  r?  r@  rA  rW   rX   r     rB  z&avg_pool2d_backward.<locals>.<genexpr>c                 3  rC  rD  r?  rE  rA  rW   rX   r     rB  r  c              	     sX  t d tj}t d tj}t d tj}t d tj}t d tj}t d tj}t t | ||}t t |||}	t t ||t t  tj|}
t t |	|t t tj|}t 	|t dtj}t 	|	t dtj}	t |
t  tj}
t |t tj}t t |
|t ||	}|S )z{
        This computes the scaling factor that we will divide an element
        by when `count_include_pad=False`
        r   r'   )
rG   r  rm   r  r  r4  r  r   r  r  )rN  rO  stride_hstride_wpad_hpad_wr  r  r  wstartr  wendr  )heightr  r  r  rV  rW   rX   !compute_pool_size_without_padding  s,   

z>avg_pool2d_backward.<locals>.compute_pool_size_without_paddingc                   sR  | ^ }}}|d  }|d  }t t|d  
d  
d tj}t t|d  
d  
d tj}t t|
d d tj}t t|
d d tj}t |t dtj}t |t dtj}t |t tj}t |t 	tj}d }tD ]}	tD ]}
t 	|t |	tj}t 	|t |
tj}d ur}nssd d  }n ||}t 
g |t jt |t |t dtjddt jt |t |t dtj	dd|}t t ||t ||}|d u rt ||t dtj}qt |t 	|||}qq|d us'J |S )Nr   r'   Fr  r  )rG   r  r!   rm   r  r  r  r  r   r   r  rn  r  r  r  r  r  )r  r  r  r&  rG  rH  rI  rJ  rK  rL  rM  rN  rO  r  partr  )r  r  r  rQ  rR  r  r  r  rS  rT  r  rU  rW   rX   ri      sv     
	


*zavg_pool2d_backward.<locals>.fnr3  )r^   r9   r   r   r  r  r$  r\   r   r  r   fallback_avg_pool2d_backwardr6   r   r`   )rX  r   r  r  r  r  r  r  r   _h_outr(  _w_outr*  r  r   r  ri   r  rW   )r  r  r  rQ  rR  r  r  r  r  rS  rT  r  rU  rV  rX   avg_pool2d_backward  sb   "Ar  c                   s  d u sdksJ ds	
sg d
t | tsJ t |ts$J t	dks,J tdks4J t
dks<J t| dv sFJ |   | ^ }td	
|\}	}
td	
|\}}td	
|\}}|  t
p|
p|p||  ^ }t| }|	 }	fdd	t
dD \  }|d
krt| |	
|S 	
fdd  	
fdd}tj|  |||d}|S )Nr   r  )r   r   r   r   )r   r   r'   r%   c                 3  s6    | ] t  fd dt  d D V  qdS )c                 3  s<    | ]}t |   t d |       dV  qdS r>  r?  r  )rc   r  r  rW   rX   r     rB  z0avg_pool3d_backward.<locals>.<genexpr>.<genexpr>r%   N)r  r   )r   rA  )rc   rX   r     s    
z&avg_pool3d_backward.<locals>.<genexpr>}   c              	     s  dd D \}}}dd D \}}}dd D \}	}
}dd t | ||g|||g|||gD \}}}dd t |||g|	|
|g g|||gD \}}}dd |||fD \}}}dd t |||g gD \}}}ttt||t||t||}|S )	Nc                 s      | ]
}t |tjV  qd S r{   r6  r  rW   rW   rX   r     r   zQavg_pool3d_backward.<locals>.compute_pool_size_without_padding.<locals>.<genexpr>c                 s  r  r{   r6  r  rW   rW   rX   r     r   c                 s  r  r{   r6  r   rW   rW   rX   r     r  c                 s  s*    | ]\}}}t t |||V  qd S r{   )rG   r  r4  )r   r  r  padrW   rW   rX   r     s
    
c              
   s  s>    | ]\}}}}t t ||t t |tj|V  qd S r{   )rG   r  r   r  rm   r  )r   r  r   r   r  rW   rW   rX   r     s    

c                 s  &    | ]}t |t d tjV  qdS r  rG   r  r  rm   r  )r   r  rW   rW   rX   r     
    
c                 s  *    | ]\}}t |t |tjV  qd S r{   rG   r  r  rm   r  )r   r  r   rW   rW   rX   r     
    
)r   rG   r4  r  )pdrN  rO  Zstride_dr  r  Zpad_dr  r  Zkernel_dr  r  Zdstartr  r  Zdendr  r  r  )depthr  r  r  r  rV  rW   rX   r    s8   $z>avg_pool3d_backward.<locals>.compute_pool_size_without_paddingc                   sJ  | ^ }}}}dd t |||gD \}}}dd t |||gD \}}}dd t |||gD \}}	}
dd |||fD \}}}dd t ||	|
g	
gD \}}	}
d }tD ]}tD ]}tD ]}dd t |||g|||gD \}}}d ur}nssd d	  d
  }n |||}tg |tjt|t|td	tj		ddtjt|t|	td	tj	
ddtjt|t|
td	tj	dd|}t
t
t||t||	t||
}|d u rt||tdtj}qjt|t|||}qjqdq^|d us#J |S )Nc                 s  s    | ]	\}}|| V  qd S r{   rW   )r   r   r  rW   rW   rX   r     rP  z2avg_pool3d_backward.<locals>.fn.<locals>.<genexpr>c                 s  s2    | ]\}}}t t|| | |tjV  qd S r{   rG   r  r!   rm   r  )r   r   r   r  rW   rW   rX   r     s
    
c                 s  s,    | ]\}}t t||d  tjV  qdS r  r  )r   r   r  rW   rW   rX   r     s
    
c                 s  r  r  r  )r   pstartrW   rW   rX   r     r  c                 s  r  r{   r  )r   ZpendZ
pooled_dimrW   rW   rX   r     r  c                 s  r  r{   )rG   r   r  rm   r  )r   r  Zp_rW   rW   rX   r     r  r   r'   r%   Fr  r  )r   r   rG   r  rn  r  r  r  rm   r  r  r  r  r  r   )r  r  r  r  r&  ZpdstartrG  rH  ZpdendrI  rJ  rK  Zpd_rL  rM  r  rN  rO  r  r  r  )r  r  d_window_sizer  rQ  rR  r  r  r  pooled_depthrS  rT  r  rU  rW   rX   ri     s    	

8zavg_pool3d_backward.<locals>.fnr3  )r^   r9   r   r   r  r  r$  r   r\   r   r   fallback_avg_pool3d_backwardr6   r   r`   )rX  r   r  r  r  r  r  r  rY  Z_d_outZceil_mode_dr  Zceil_mode_hr  Zceil_mode_wr   r  r   r  ri   r  rW   )r  r  r  r  r  rQ  rR  r  r  r  r  r  rS  rT  r  rU  rV  rX   avg_pool3d_backwardo  sf   &%Wr  c                 C  s   |   }t|tr|g}n|stt|}t|dkr*t|dv s(J d| g S t|}tt|D ]5}|| dk rL||  t|rHt|nd7  < d||   krZt|k sin t|dkrg|| dksiJ q4tt|t|ksxJ d|S )Nr   )rW   r&  r"  zinvalid axis: r'   zreduction axis not unique)r   r^   r   r   r   r   r\   r   )r   r  r   rc   rW   rW   rX   _validate_reduction_axis7  s    
 :r  c          
        s   |d ur	t | |} |  tt t| |}g }g g }g ttD ]}||v r7| ||  q$| ||  q$ fdd}r`t}	D ]}t	j
j|	|< qVn|}	|   t|  |po|  |  ||	|dS )Nc                   s   t |t ks
J rt  t ksJ  fddD  t  t ks)J d gt  t |  }tt t|D ]\}}|||< q@|S )Nc                   r  rW   rW   r   r)  rW   rX   r   _  r   z9_make_reduction_inner.<locals>.loader.<locals>.<listcomp>)r   r   r   r   )r  Zreduction_indexr  r  varZinner_loaderkeepdimsZkept_idxZreduced_idxr   r)  rX   r  [  s   
z%_make_reduction_inner.<locals>.loader)rf   r#  rU  r2  r1  r$  )r   r   r   r   r  r   r   ra   r\   r   r  r  r$  r   r`   r   )
r   r  r  r   r  Z
kept_sizesZreduced_sizesrc   r  r  rW   r  rX   _make_reduction_innerI  s<   



r  r  rI   c                   s   dd d fdd}|S )NFr   c                  sB   t | ||| d}tjd| d|}t|jjtr|  |S )Nr  r  r   r  )r  r"  rW   )r  r7   r   r^   r_   rH  )r   r  r  r   r   r  r  r  rW   rX   r@  {  s   zmake_reduction.<locals>.innerr   rW   )r  r  r@  rW   r  rX   make_reductionz  s   r  c                C  sB   |d ur	t | |} t| |}t|  |  f|  f|  |dS )N)rf   dtypes	inner_fnsr   r  )r   r  r   r`   r   r$  r   )r   r  r   rW   rW   rX   _make_scan_inner  s   

r  r   c                  s   |d ur	t | |} |   t| |}|  }|tjtjfv r$t | tj} t| ||}t	 fdd|D }t
j||  |  d}t|t| }t t|||S )Nc                 3  r   r{   rW   r   r   rW   rX   r     r   zmean.<locals>.<genexpr>r  )r   r   r  r   rm   r8  r7  r  sum_rE   r*   r0   r`   r/   r   r\   div)r   r  keepdimr   Zoutput_dtype
sum_resultdenomrW   r  rX   r    s   

r  c           
        s   |d u rd}|    t| |}t| |dd}|r|  tt| |}t|||}t fdd|D }|r>t	|| d}t
j||  |  d}t|t|  }t||}	|s^|	fS |rb|nt||}|	|fS )Nr'   T)r  c                 3  r   r{   rW   r   r  rW   rX   r     r   z var_mean_sum_.<locals>.<genexpr>r   r  )r   r  r  rH  squarer  r  rE   r   r  r*   r0   r   r`   r/   r   r\   r  r  )
r   r  
correctionr  return_meanZx_meanZdiffsr  r  Zx_varrW   r  rX   var_mean_sum_  s&   

r  c                 C  sV   t | |}t| ||d d d}|d }t|d }t|tjo*t|tjk o*t|dkS )Nr  r1  r$  r'   )	r  r  rE   r^   r   rA  r   r(   r-  )r   r  r  r   r1  reduction_numelrW   rW   rX   use_two_step_variance  s   


r  c                  s    d u rd t | ||d d d}|d}|d |d tjjd|fd|  d|\}}}	|  |  |  t| |}t	fdd	|D d
d  fdd}
t
|
|}|rj|  ||fS |fS )Nr'   r  r2  r#  rU  Zwelford_reduce)r  r  r   c                 3  r   r{   rW   r   r  rW   rX   r     r   z$var_mean_welford_.<locals>.<genexpr>c                 S  s4   t | tjr| jstt| tj|S t	| |S r{   )
r^   r   r   Z	is_numberrG   r   r  rm   r  r  rt  rW   rW   rX   get_constant_or_index_expr  s   z5var_mean_welford_.<locals>.get_constant_or_index_exprc                   s4    }}t d}| t |||  S r  )rG   r  r  )r_   cNzero)r  r   r  rnumelrW   rX   r    s   

z#var_mean_welford_.<locals>.scale_fnrW   )r  r  r*   ZWelfordReductionr   r   rH  r   r  rE   rA  )r   r  r  r  r  r   r  r  m2r   r  r  rW   )r  r   r  r  r   rX   var_mean_welford_  s6   




r  c                  s   |    t }t| |dd} t| ||||d}t| ||dr&tdi |ntdi |}t fdd|D }|s>|d S |S )	NFrd  )r   r  r  r  r  )r  r  c                 3  s    | ]
}t | d dV  qdS )Frd  Nr4  r   r  rW   rX   r     r   z#var_mean_helper_.<locals>.<genexpr>r   rW   )r   r   r   r   r  r  r  r   )r   r  r  r  r  Zcompute_dtyper   r  rW   r  rX   var_mean_helper_  s    	r  )r  r  c                C     t | |||ddS )NFr  r  r  r  r  r   r  r  r  rW   rW   rX   var_     
r  c                C  r  )NTr  r  r  rW   rW   rX   var_mean  r  r   c                 C  st   |dk rt t| | |S |dkrtd|S |dkr| S t | |d |}t||}|d dkr8t|| }|S )Nr   r'   r%   )pow_recursiverG   r  r  r4  )r   r  r   r  rW   rW   rX   r  $  s   r  c                 C     t | |S r{   )rG   powr   r  rW   rW   rX   
pow_native3  rm  r  )r   c                   sV  t trtkrt tS t trdkrt S t tr,dkr,t S tdd  fD }t|}t toQd  k oIdk n  pQ|oQdk}|ro   fdd	}t	j
    |  d
S t  tr dkr}tdS  dkrt rtS |rt  trt S t trt S t S t S )Nr  r'   c                 s  s$    | ]}t |tjr| V  qd S r{   )r^   r*   r9   r   r   rW   rW   rX   r   K  r  zpow.<locals>.<genexpr>i    r   c                   s   t |   S r{   )r  r   r  r   r  r  rW   rX   ri   U  rN  zpow.<locals>.fnr3  r%   )r^   r  r   r  sqrtrZ  r  r   r$  r6   r   r`   r   r   r   r  r   exp2fallback_pow_scalarfallback_pow_tensor_scalarfallback_pow_tensor_tensorr  )r   r  r   Zis_integer_powZembed_exponentri   rW   r  rX   r  A  s@   
"







r  c                 C  s   t | tr	| j}n| }t |tr|j}t |tjs3tj|  |  |	 | 
 dj}t |tjs3J t |tjrR| sR| sRt |jtjsR|  |j|_| S tjj|||d | S )Nr3  unsafe_alias)r^   r9   r_   r*   r7  r6   r   r`   r   r$  r   Zis_input_bufferZis_module_bufferZ	NopKernelrH  r  Zrealize_into)changedr  r  Zchanged_datarW   rW   rX   rd  q  s:   

rd  c                 C  s   t | t| |S r{   )rd  r  )r   rR  rW   rW   rX   r    r{  r  c                 C  @   | |u r| S t ||  }t||  }t||  }t| |S r{   ry  r`   r   r   r  r   rd  )rk  re  rw  rW   rW   rX   r       
r  c                 C  r  r{   )rG   floordivr  rW   rW   rX   r    rm  r  c                 C  r  r{   )rG   r  r  rW   rW   rX   r    rm  r  c                 C  s   t | ot |}t| ot|}|dkr(|rJ d|r!t| |S tt| |S |dkr@|r2J d|r9t| |S tt| |S t| |S )Nr  z5floordiv operands can not be boolean at the same timer  z5truncdiv operands can not be boolean at the same time)r   r   r  r  r  r  r  )r   r  Zrounding_modeZboth_integerZboth_booleanrW   rW   rX   div_mode  s   
r  c                 C  s8   t | ot |}|rt| |S ttjj}t|| |S r{   )r   logical_andr4   rq  r4  r|  rA  )r   r  Z	both_boolri   rW   rW   rX   r4    s
   
r4  r6  Optional[ir.Constant]c              	   C  s   t | tjrt| jS t | tjrt|  S t | tjr| S t | tjs'dS t	j
j|  }t|' ttjdd | j|   }W d   n1 sPw   Y  W d   n1 s_w   Y  t |t	j
jjsnJ t |jtjrx|jS dS )z:Try convert an arbitrary IR node into an ir.Constant valueNZallow_indexingT)r^   r*   Z
MutableBoxget_constant_valuer_   r  r  r   ZLoopsrm   Z	_inductorops_handlerZExtractConstantsHandlerr`   rH   Zset_ops_handlerr   objectr  r2  Zinner_fn_argsvirtualizedZOpsValuer   )r   r  rb   rW   rW   rX   r    s*   
 r  c                 C  s|   t dd | |fD }|rt| |S t| }d ur3|jdkr)ttd|j}nd|j }t| |S dd }t|| |S )Nc                 s  s     | ]}t |pt|V  qd S r{   )r   r   r   rW   rW   rX   r     r3  zdiv_prim.<locals>.<genexpr>r   infr  c                  W  r  r{   )rG   r  r   rW   rW   rX   ri     r  zdiv_prim.<locals>.fn)	r   r  r  r   mathcopysignr  r4  rA  )r   r  is_integralr  r  ri   rW   rW   rX   div_prim  s   



r  c                 C  s    t | |ftjd\} }t| |S r  )r  r   INT_TO_FLOATr  r  rW   rW   rX   r    s   


r  c                 C  s4   t | pt| }|rdd }ndd }t|| |S )Nc                 S  r  r{   )rG   modr  rW   rW   rX   ri     r;  zfmod.<locals>.fnc                 S  r  r{   )rG   fmodr  rW   rW   rX   ri     r;  )r   r   rA  )r   r  r  ri   rW   rW   rX   r"  	  s
   
r"  c                C  B   t |  st|  r|d u rtj}td|d}|| |||dS )Nr  rY  r   r   r   r   rm   r  r  r   r  r  r   ri   rW   rW   rX   r       

r  c                 C     t |  st|  r|d u rtj}t|  dkr.|dv s!J |p&|  }t| |ddS dd }t| ||d}t	j
jd
i |d|i\}|d u rRt| ||d	S |S )Nr   r   r!  Trd  c                 S     | \}|\}t ||fS r{   )rG   r   a_tupleb_tupler   r  rW   rW   rX   
combine_fn8     zcumsum.<locals>.combine_fnr  r   r-  r   r   rW   )r   r   r   rm   r  r   r   r   r  r*   Scanr   fallback_cumsumr   r  r   r-  r   r  rW   rW   rX   cumsum,      

r4  c                 C  r'  )Nr   r(  Trd  c                 S  r)  r{   )rG   r4  r*  rW   rW   rX   r-  P  r.  zcumprod.<locals>.combine_fnr/  r-  r0  rW   )r   r   r   rm   r  r   r   r   r  r*   r1  r   fallback_cumprodr3  rW   rW   rX   cumprodD  r5  r7  c                 C  sv   dd }|   }t|  dkr|dv sJ t| S t| ||d}tjjdi |d|i\}|d u r9t| |dS |S )	Nc              	   S  s\   | \}|\}t ||}t ||}||kt | B }t |t t || | |fS r{   )rG   r  r  r  r  log1pexp)r+  r,  r   r  Zmin_vZmax_vr  rW   rW   rX   log_add_exp_helper^  s   $z(logcumsumexp.<locals>.log_add_exp_helperr   r(  r/  r-  r  rW   )	r   r   r   rZ  r  r*   r1  r   fallback_logcumsumexp)r   r   r:  r   r   r  rW   rW   rX   logcumsumexp\  s   r<  c                 C     t |  dkr|dv sJ t| t| tjdfS |  }tjd|dd}t	| ||d}|tjf|d< | 
 d	d
 f|d< tjjdi |d|i\}}|d u rVt| |dS ||fS )Nr   r(  r   r%  Fr   Zarg_break_ties_leftr/  r  c                 S     dS NrindexrW   r  rW   rW   rX   r        zcummax.<locals>.<lambda>r  r-  r  rW   )r   r   rZ  rf  rm   r  r   r*   get_reduction_combine_fnr  r$  r1  r   fallback_cummaxr   r  r   r-  r   r   r  rW   rW   rX   cummaxr     rF  c                 C  r=  )Nr   r(  r   argminFr>  r/  r  c                 S  r?  r@  rW   r  rW   rW   rX   r    rB  zcummin.<locals>.<lambda>r  r-  r  rW   )r   r   rZ  rf  rm   r  r   r*   rC  r  r$  r1  r   fallback_cumminrE  rW   rW   rX   cummin  rG  rJ  c                C  r#  )Nr  rY  r   r$  r%  rW   rW   rX   r    r&  r  c                 C  s   t | tj} td| ||dS )Nr   r  r  )r   rm   r   r  r   r   r  rW   rW   rX   
reduce_any  s   rM  c                 C  2   |d urt | ||dt| ||dfS t | d |dS NrK  )reduce_amaxreduce_argmaxrL  rW   rW   rX   
reduce_max  
   rR  c                 C  rN  rO  )reduce_aminreduce_argminrL  rW   rW   rX   
reduce_min  rS  rV  xor_sumr  r  r%  rY  rH  
logical_or)r=  r0  stabler   
descendingc          
   	   C  s:  |d u rd}|   }|  }tt||}t|dkr't| td|tj|fS t|r/|| nd}tj	j
|ttjjsFt| |||dS t|ddtj|dd}dgt| }t|r`|||< t||}t||}tjj|| j|jf|  | f||||d\}	}|	d u rt| |||dS |d usJ |	t|tjfS )NFr   r'   rY  )r  r  r   rf   r*  )rf   r  r  r   r  rZ  r[  )r   r`   r   r   rZ  rS  rm   r  rH   rO   r  Zstatically_known_ltro  int16r  sort_fallbackr+  rr  r  r*   ZSortr   r   r$  r   )
r   rZ  r   r[  r  rf   rv  r  Z
view_shaper   rW   rW   rX   sort_stable  s>   



	r^  c                 C  s   t | d||dS )NFrY  )r^  )r   r   r[  rW   rW   rX   sort  r{  r_  c                 C  s   t | |tj|dS )Nr   r   r?  r~  r   r   )rR   r   r?  rW   rW   rX   register_pointwise_numeric  s   rb  c                 C  s   t | tjddS )NT)r   r}  ra  rL  rW   rW   rX    register_pointwise_numeric_ldf64  s
   rc  r  logical_not)r0  )r   r   r  identity)rF  pointwise_overrides_datac                 #  s    t |  t|  jd }|d u rd S  fdd}t|tjjr6| D ]}t||}| j||fV  q#d S | j||fV  d S )Nc                   s    j d u r	t| S d S r{   )Ztritonrg  rL  rC  rW   rX   make_triton_fallbacku  s   
z6_get_pointwise_overrides.<locals>.make_triton_fallback)	rf  r   r   r^   rm   rw   r   r   r   )nsr   rR   rg  Zolnamer  rW   rC  rX   _get_pointwise_overrideso  s   
ri  r`  c                   s,   | t |< t|   fdd}t| | d S )Nc                    sB    | i |}g }t | d |D ]\}}|t||dd q|S )Nr   Tr  )r   ra   rd  )rd   r   resultsZmut_resultsr   r  outplace_oprW   rX   ri     s
   z$register_foreach_inplace.<locals>.fn)rN   rD  r   r   )aten_opZoutplace_aten_oprl  ri   rW   rk  rX   register_foreach_inplace  s   
rn  c                   s   t | d d fdd}|S )Nr\  c                    s.    | i |}t || d  }t| d |S r  )r   r   rd  )rd   r   r  rk  rW   rX   ri     s   zregister_inplace.<locals>.fn)r   )rm  rl  ri   rW   rk  rX   register_inplace  s   
ro  c                 C  rL  r{   rW   r  rW   rW   rX   sym_constrain_range
  r  rp  c                 C  &   t jjjd }t|tjsJ |jjS Nr  	rH   rO   rP   r9  r^   rm   r  rU   r	  r   r   r  rW   rW   rX   sym_size  s   ru  c                 C  rq  rr  rs  rt  rW   rW   rX   
sym_stride   s   rv  c                 C  s   |   S r{   )rq  )r   rW   rW   rX   	sym_numel(  r  rw  c                 C  r  r{   )r   Addr   rW   rW   rX   sym_sum1  s   
ry  c                 O  r  )NzHelpful for debuggingr   )r  rd   r   rW   rW   rX   foobar6  r  rz  c                 C  s   |    t| S r{   )rH  rZ  r   rW   rW   rX   _realize;  s   r{  c                 C  s   |    t| | | S r{   )rH  r*   ZResizeStorageBytes)variabler  rW   rW   rX   resize_storage_bytes_A  s   r}  c                 C  s"   |    |   tt| |S r{   )rH  r9   r   r*   ZSetSourceTensorKernel)r  Zsource_tensorrW   rW   rX   set__source_tensorH  s   r~  c                 C  r  r{   r  )rk  re  rW   rW   rX   
fsdp_copy_Q  r  r  c          	        sv  t | tsJ t |ttfsJ |d u rtj}|tjkr#td| |tjkr0t	|dks0J |tj
kr=t	|dks=J |  |  }|  }t | jtjrV| j | _t rvtjjjrvt|rhtdnt|rst|jndndtjjdrt|||dS t | gd	g}|!  tj"#||}t$||||%  fd
d}t&j'|||t|d}|S )Nzunsupported memory format: r   r   nanTr  r   r  r'   c                   sH   |  t  tj}t tj}t ||}t | fddS )Nc                     s
    gS r{   rW   rW   )
flat_indexflat_loaderrW   rX   r    s   
 z*resize.<locals>.inner_fn.<locals>.<lambda>)rG   r  rm   r  r  r  )r  Zflat_index_exprlimitr  r  Z	old_numelZout_indexerZuninitalized_val)r  rX   r2    s
   zresize.<locals>.inner_fnr3  )(r^   r9   r\   r   rm   Zcontiguous_formatZpreserve_formatr  Zchannels_lastr   Zchannels_last_3drq  r   r  r_   r*   r  r  r  r}   r  Zfill_uninitialized_memoryr   r  r   ro  r  rH   rO   r  r!  rm  r  r$  r  Z stride_ordered_for_memory_formatr  r  r6   r   )	r   r   r$  r   rf   Zx_flatZ
out_strider2  rb   rW   r  rX   resize\  sT   



	r  )auto_functionalizedc                 C  sB   ddl m} ||}tj| ||i ||d dd | D S )Nr   )kernel_side_table)
kernel_idxgridtma_descriptor_metadataZkernel_argsc                 S  s    i | ]\}}t |tr||qS rW   r   )r   r  r  rW   rW   rX   r     r[  z'triton_kernel_wrap_.<locals>.<dictcomp>)*torch._higher_order_ops.triton_kernel_wrapr  Zget_constant_argsr*   ZUserDefinedTritonKernelr   )r  Zconstant_args_idxr  r  r   r  Zconstant_argsrW   rW   rX   triton_kernel_wrap_  s   	

r  c                 C  sj   t dd | g|D r$d}tjjjdd  }r | d| }|tj_tj	| |||}t
ttj	|S )Nc                 s  r  r{   r  r   rW   rW   rX   r     r  zcond.<locals>.<genexpr>z"control flow operator: torch.cond.r  r  )r   rH   rO   rP   r9  r:  r  r*   ZConditionalr   r\   mapr9   )predZtrue_fnZfalse_fnoperandsr   r  r  rW   rW   rX   r     s   r   c                 C  sh   t dd || D r#d}tjjjdd  }r| d| }|tj_tj	| |||}t
ttj	|S )Nc                 s  r  r{   r  r   rW   rW   rX   r     s
    
zwhile_loop.<locals>.<genexpr>z(control flow operator: torch.while_loop.r  r  )r   rH   rO   rP   r9  r:  r  r*   Z	WhileLoopr   r\   r  r9   )Zcond_fnZbody_fnZcarried_inputsadditional_inputsr   r  r  rW   rW   rX   
while_loop  s   r  subgraph_fnir.Subgraph
identifierc                 C  s   t j| |}tttj|S r{   )r*   ZInvokeSubgraphr   r\   r  r9   )r  r  r  r  rW   rW   rX   invoke_subgraph  s   r  )schemec          
      G  s   d }t jjjdd }|d usJ t| jjjD ]Y\}}|jdkr*|| t jj	|< q|jdkrgt j
|\}}t|| D ]}	|	  |jrQt jj|	  t jj|	  q?tjjt j|||}qt j|t jj	|< q|S )Nquant_optionsplaceholderr  )rH   rO   rP   r9  r:  r]   r_  nodesrR   envZfetch_args_kwargs_from_envr   r   r   rH  Zcodegen_low_precisionZlow_precision_codegen_opsr   rI  Zinvoke_quant_opsrm   r  ZInterpreterr  Zrun_node)
r  r  r  r  r  rc   rU   rd   r   r   rW   rW   rX   invoke_quant_tracer  s"   

r  r-  r  tuple[torch.Tensor]c                   s   ddl m m} t|dkrtd fddt||D }|| |fdd}t|d dd d	}td
d |D |d< tdd |D |d< t	j
jd|dd|}|d d u r_td|S )Nr'   )InputDescriptorlower_pointwise_subgraphr   zSUnable to generate code for associative_scan op, because there are lifted argumentsc                   s    g | ]} |  | d qS )r  )r   r`   r   )r  rW   rX   r     s    z$associative_scan.<locals>.<listcomp>c                   s    g t | t |R  S r{   )r  r  )lhsrhs)lowered_combine_fnrW   rX   wrapped_combine_fn  s
   z,associative_scan.<locals>.wrapped_combine_fnr/  c                 s      | ]}|  V  qd S r{   rx  r   rW   rW   rX   r     r   z#associative_scan.<locals>.<genexpr>r  c                 s  r  r{   r#  r   rW   rW   rX   r     r   r  F)r-  Zcan_fallback_to_atenz/Unable to generate code for associative_scan oprW   )r^  r  r  r   r  r   r   r  r   r*   r1  r   )r-  Zxsr  r  Zsubgraph_inputsr  r   r  rW   )r  r  rX   associative_scan  s,   


r  c                 C  rL  r{   rW   )tokensrW   rW   rX   _sink_tokens  r  r  c                 O  s   t jj|g|R i |}ddlm} ||||}|d usJ tjj| }|d u r,|fS t	t j
tj|}t|ttfs@||fS |g|R S )Nr   )get_effect_key)r*   ZEffectfulKernelr   Ztorch._higher_order_ops.effectsr  rH   rO   Zeffectful_opsr  r  ZMultiOutputr9   r^   r\   r   )tokenrR   rd   r   r  r  Zeffect_typeZeffectful_kernelrW   rW   rX   with_effects  s   r  )register_comm_loweringsc                 C  s   t | |dddd}|d }tjjt|}tjjdi |d|d\}}|dkrFtjj	|t
jkrFtjd| d|d	|\}}||fS ttd
 t| |dd}	ttj t| |	}
t|
|dd}|	|fS )zn
    Lowering inductor_prims.prepare_softmax_online to compute max/sum in one pass if no split is needed.
    TNr  r$  Zonline_softmax_reduce)r  r  r'   r%   )r"  Z
num_outputZreduction_hintz
            Online softmax is disabled on the fly since Inductor decides to
            split the reduction. Cut an issue to PyTorch if this is an
            important use case and you want to speed it up with online
            softmax.
            )r  rW   )r  rH   rO   r  simplifyrE   r*   r7   Z
num_splitsr  r(   r-  r3   r   r  r  textwrapdedentrP  rL   rq  r9  r  r  )r   r   r   r$  r  hintZ	num_splitZ
max_tensorZ
sum_tensorr  r9  ZxsumrW   rW   rX   prepare_softmax_online9  s8   



r  r  )quantized_lowerings)mkldnn_lowerings)jagged_loweringsrR   c              	   c  st    t | tjjsJ dt| }zt| t|  dV  W |r&|t| < dS t|  dS |r4|t| < w t|  w )z^
    A context manager to force fallback an op. Used in unit test
    for FallbackKernel.
    z+Only OpOverload to make the clean up easierN)	r^   rm   rw   rx   rL   r:  r   rg  r  )rR   Zold_handlerrW   rW   rX   force_fallback  s   

r  )rZ   r[   )ri   rj   rk   rl   )r   r   )r   r   )rd   r   r   r   r   r   r   r   r   r   rk   r   )r   r   )r   r   rk   r   )NN)NNNNFN)F)r   r9   r   rT  )r   r9   rf   rx  r{   )r   r   r  r'   Tr&  )rK  r9   r  r9   r  r9   r  r   r  r   r  r   r   rT  rk   r9   )rK  r9   r  r9   r  r9   r  r   r  r   r  r   r   rT  r  r  rk   r9   )rK  r9   r  r  r	  r   r  r   r  r   r   rT  rk   r9   )rK  r9   r  r  r	  r   r  r   r  r   r   rT  r  r  rk   r9   )rK  r9   r  r9   r	  r9   r  r   r  r   r   rT  rk   r9   )rK  r9   r  r9   r	  r9   r  r   r  r   r   rT  r  r  rk   r9   )r   r   r'   )rU  r   rV  r   rW  r   r"  )T)rg   r  )rU   r  )NTF)rf   rx  )r   r  r  r9   r  r  rU  r   )
r  r   r  r   r   r  r  r9   rU  r   )r  r9   rk   r  )r  r9   rk   r  )r  r9   r  r9   r  r   r  r   r  r  r  r  rk   r9   )rK  r9   r  r9   r  r   r  r   )r   r   r  r   )r   NNr'   )NNN)r!  FF)r   r   )r  rI  r   r   r  r  r  r   )r   r   r  r  )r   r   r  r   )r%   F)r  r  r  r   r  r   )r  r  )r  r  r  r  )r  r  r  r  r  r  )r   r9   r  r  rR  r  rk   r  )rc   r  r  r  )rc   r  r  r  )rc   r  r  r  r  r  )Nr  N)Nr   r'   F)r  )NNNN)rW   r   FTN)r  rI   r   )r   r6  rk   r  )r!  F)r  r  r  r  )r  r  )r-  r  r  r  rS  (  
__future__r   
contextlibrf  r   r   loggingr  r  r  r  r  collectionsr   collections.abcr   r   typingr   r   r   r	   r
   r   r   Ztyping_extensionsr   Zunittest.mockr   r   rm   Z$torch.ao.quantization.fx._decomposedZtorch.fxZtorch.utils._pytreer}   Z_pytreer  Ztorch._dynamo.utilsr   Z(torch._higher_order_ops.associative_scanr   r  r   Ztorch._prims_commonr   r   r   r   r   r   r   r   r   r   r   Ztorch.fx.experimental.sym_noder   r   Ztorch.utils._ordered_setr   Ztorch.utils._sympy.functionsr    r!   r"   r#   r$   Z_dynamo.utilsr&    r(   r)   r*   r+   decompositionr,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   r  rG   rH   r  rI   rJ   rK   ZFALLBACK_ALLOW_LIST	getLoggerr|  r  rL   __annotations__rM   rw   rx   r   rq  Ztr_c10drh  r   Z_higher_order_opsra  rT   rD  rN   Zquantized_decomposedrY   rh   rz   ry   r   r   r   r  r  r  r  ZbmmZconvolutionZconvolution_backwardr<  r[  r  r  r  Z_int_mmr1  r0  r\  r  r  r8  r  r*  Z	complex32Z	complex64r   r7  r   r   r   r   r   r   r   r   r   r   r  r   r  r  rA  rS  r   ri  rl  rs  rr  r   ru  ry  Z
device_putrz  r~  r  r  r  r   aliasdetachZdetach_ZliftZview_ofr  r   r  r  r  r  r  r  r  r  r  rj  r  r  r  r  r  Z_unsafe_viewZreshaper  slicer  r  r  Zquantize_per_channelr  Zdequantize_per_channelr  Zquantize_per_tensorr   Zdequantize_per_tensorr#  r  r,  r.  rJ  r`  ra  rf  rh  rk  rl  rp  rx  r  rz  r  r}  rg  	lru_cacher  r  r  r  r  r  Zrngprimsr  r  r  Z	bernoullir  r  r  r  r  r  r  r  r  r  r  r  randintZforce_stride_orderr  r  r  r  r  Zlookup_seedr  randomr  r  r  r  r  r  r  r  r  rT  r  rq   r#  Z_adaptive_avg_pool3dZadaptive_max_pool3dZfractional_max_pool3dZmax_pool3d_with_indicesuniformZexponentialZ_pdist_forwardZsoft_margin_loss_backwardZ_cdist_forwardZ_cdist_backwardZ
_trilinearZsegment_reduceZ_segment_reduce_backwardZhistcZ	histogramZbin_ctZ_histogramdd_bin_edgesZ_histogramdd_from_bin_ctsZaddbmmZ_addmm_activationZ
_cudnn_rnnZ_cudnn_rnn_backwardZ_embedding_bagZ_embedding_bag_forward_onlyZ_embedding_bag_backwardZ*_embedding_bag_per_sample_weights_backwardZ_fused_moving_avg_obs_fq_helperZ*_fused_moving_avg_obs_fq_helper_functionalZ max_pool3d_with_indices_backwardZ_adaptive_avg_pool2d_backwardZ_adaptive_avg_pool3d_backwardZadaptive_max_pool2d_backwardZadaptive_max_pool3d_backwardZfractional_max_pool2d_backwardZfractional_max_pool3d_backwardZreplication_pad1d_backwardZreplication_pad2d_backwardZupsample_linear1d_backwardZupsample_bicubic2d_backwardZupsample_trilinear3d_backwardZgrid_sampler_2d_backwardZ_pdist_backwardr_  rZ  ZkthvalueZtopkr  ZmedianZ	nanmedianr  Zresize_Z
resize_as_Z_linalg_detZlinalg_householder_productZlinalg_inv_exZlinalg_ldl_factor_exZlinalg_ldl_solveZ	linalg_luZlinalg_lu_factor_exZlinalg_lu_solveZlinalg_matrix_expZ	linalg_qrZ_linalg_slogdetZ_linalg_solve_exZlinalg_solve_triangularZ_linalg_svdZ	lu_unpackZormqrZ_linalg_check_errorsZlinalg_pinvZatol_rtol_tensorZ_linalg_eighZtriangular_solveZlinalg_cholesky_exZcholesky_inverseZcholesky_solveZgeqrfZ_fft_r2cZnonzerogcdZ_thnn_fused_lstm_cellZ_primsZ	rng_primsZrun_and_save_rng_stateZrun_with_rng_stateZgraphsafe_run_with_rng_stateZmasked_scatterZmasked_scatter_backwardr  ZangleZ_efficientzerotensorZ(_sparse_coo_tensor_with_dims_and_tensorsZ	to_sparseZ
_to_sparser   rW  r  r  Z#_scaled_dot_product_flash_attentionZ,_scaled_dot_product_flash_attention_backwardZ#_scaled_dot_product_cudnn_attentionZ,_scaled_dot_product_cudnn_attention_backwardZ+_scaled_dot_product_flash_attention_for_cpuZ4_scaled_dot_product_flash_attention_for_cpu_backwardZ0_scaled_dot_product_fused_attention_overrideableZ9_scaled_dot_product_fused_attention_overrideable_backwardZ_flash_attention_forwardZ_flash_attention_backwardZ_efficient_attention_forwardZ_efficient_attention_backwardZindex_reducer[  rZ  r&  r  r+  r/  r4  r5  Zscalar_tensorrB  Z
LongTensorrD  rK  rM  rN  rS  r  rV  r  rU  r  rf  r  Z
zeros_likerb  re  r\  rh  rl  rm  rr  rw  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  Zfallback__unsafe_masked_indexr  Z,fallback__unsafe_masked_index_put_accumulater  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rK  r  r  r  r  r  r  r  r,  r.  r:  r;  rW  rb  rh  rt  ry  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  ZTensor_Tensorr  ZScalarr
  ZTensor_Scalarr  rd  r  r  r  r  r  r  r4  r  r  Ztrue_divider   r"  r  r  r4  r2  r7  r6  r<  r;  rF  rD  rJ  rI  r  r   rM  r  rR  r  rV  rW  r  rP  r  rT  r%  rQ  rH  rU  r   r]  r^  rb  rc  Zrsqrtr9  r	  expm1Zrelur|  r  r  r  cossinabsZbitwise_andZbitwise_left_shiftZbitwise_notZ
bitwise_orZbitwise_right_shiftZbitwise_xorlgammaerfZspecial_erfr8  tantanhr  rd  rX  logical_xorr  r  Z	clamp_minZ	clamp_maxnegr  	remaindersignZsignbitZ	_neg_viewler  r  ru  r,  necoshsinhacosacoshasinasinhatan2atanatanhr  erfcZerfinvhypotlog10log2	nextafterZcodegen.commonrF  rf  ri  r   rR   r   r?  Z_foreach_addListZforeach_add_listZforeach_add_scalarZ_foreach_mulZforeach_mul_listZforeach_mul_scalarZ_foreach_subZ_foreach_negZ_foreach_absZ_foreach_powZScalarAndTensorZ_foreach_divZforeach_div_listZforeach_div_scalarZ_foreach_sqrtZ_foreach_rsqrtZ_foreach_maximumZ_foreach_minimumZ_foreach_clamp_minZ_foreach_clamp_maxZ_foreach_reciprocalZ_foreach_signZ_foreach_copyrn  Z_foreach_add_Z_foreach_mul_Z_foreach_div_ro  Zadd_Zbitwise_and_Zbitwise_left_shift_Zbitwise_not_Zbitwise_or_Zbitwise_right_shift_Zbitwise_xor_Zmul_Zdiv_ZTensor_modeZlogical_and_Zlogical_not_Zlogical_or_Zlogical_xor_Zsub_Zrelu_Zsigmoid___and__
__lshift____or__
__rshift____xor____iand____ilshift____ior____irshift____ixor__rp  ru  r   rv  rw  r   methodfuncry  rz  Z_inductor_testrH  r{  r  r}  set_Zsource_Tensorr~  Zfsdpr  r  Z*torch._higher_order_ops.auto_functionalizer  r  Zhigher_orderr   r  r  Zinvoke_quantr  r  r  r  Zcomm_loweringr  r  r  r  Zregister_quantized_opsZregister_woq_mm_opsr  Zregister_onednn_fusion_opsr  Zregister_jagged_opscontextmanagerr  rW   rW   rW   rX   <module>   s
  $4@4



	H85^:
,
4


#












1
E22$$-/|6



		
%2
&





			]/ 


C
8



.



%#I%

~



!t/


_7#A 

 
)5


E


Q


F-


z
	 &

	 H1
+


/
#
	 











'	














@

#6
