o
    vZhE                     @  s  d dl mZ d dlmZ d dlmZ d dlm  mZ d dl	m
Z
 d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZmZmZmZmZmZmZ d d	lm Z  ercd d
l!m"Z" d dlm#Z# eddG dd dZ$eddG dd dZ%d5ddZ&d6ddZ'eddG dd dZ(e(d dej)de(dd ej*dgZ+d7d(d)Z,ed8d*d+Z-eddG d,d- d-Z.ed8d.d/Z/d9d1d2Z0ed8d3d4Z1dS ):    )annotations)	dataclass)TYPE_CHECKINGN)	translate)		BaseCTypeBindingCTypeExpr
NamedCTypeopmath_tscalar_tStructuredImplSignatureVectorizedCType)with_native_function)ArgumentBaseTyBaseTypeDispatchKeyNativeFunctionsGroup
ScalarTypeUfuncKey)
OrderedSet)Sequence)UfunctorBindingsT)frozenc                   @  sb   e Zd ZU ded< ded< ded< dd	d
ZdddZdddZdddZdddZdddZ	dS )UfunctorSignaturer   gz
int | Nonescalar_tensor_idxstrnamereturnr   c                 C  s   t j| j| jtdS )N)r   r   )ufuncZufunctor_argumentsr   r   r   self r$   B/var/www/auris/lib/python3.10/site-packages/torchgen/dest/ufunc.py	argumentsI   s   
zUfunctorSignature.argumentslist[Binding]c                 C  s   dd |   jD S )Nc                 S  s   g | ]}| |j d qS )_)renamer   .0br$   r$   r%   
<listcomp>P   s    z,UfunctorSignature.fields.<locals>.<listcomp>)r&   ctorr"   r$   r$   r%   fieldsN   s   zUfunctorSignature.fieldsr   c                 C  s   t tS N)r   r   r"   r$   r$   r%   returns_typeR   s   zUfunctorSignature.returns_typec                 C  s   d dd |  D S )N
c                 s  s$    | ]}|j  d |j dV  qdS ) ;N)typer   )r+   fr$   r$   r%   	<genexpr>X      " z0UfunctorSignature.decl_fields.<locals>.<genexpr>)joinr/   r"   r$   r$   r%   decl_fieldsW   s   zUfunctorSignature.decl_fieldsc                 C  sL   d dd |  jD }d dd |  jD }| j d| d| dS )N, c                 s      | ]}|  V  qd S r0   declr+   ar$   r$   r%   r7   [       z5UfunctorSignature.inline_defn_ctor.<locals>.<genexpr>c                 s  s$    | ]}|j  d |j  dV  qdS )z_()Nr   r?   r$   r$   r%   r7   ^   r8   (z) : z {})r9   r&   r.   r   )r#   args_strZinit_strr$   r$   r%   inline_defn_ctorZ   s   z"UfunctorSignature.inline_defn_ctorc                 C  s2   d dd |  jD }|    d| dS )Nr;   c                 s  r<   r0   r=   r?   r$   r$   r%   r7   b   rA   z/UfunctorSignature.decl_apply.<locals>.<genexpr>z operator()(z) const)r9   r&   applyr1   Zcpp_type)r#   rE   r$   r$   r%   
decl_applya   s   zUfunctorSignature.decl_applyN)r    r   r    r'   )r    r   r    r   )
__name__
__module____qualname____annotations__r&   r/   r1   r:   rF   rH   r$   r$   r$   r%   r   C   s   
 




r   c                   @  s:   e Zd ZU ded< ded< ded< dd	d
ZdddZdS )UfuncSignaturer   r   r   r   r   	compute_tr    r'   c                 C  s   t j| j| jdS )N)rP   )r!   Zufunc_argumentsr   rP   r"   r$   r$   r%   r&   l   s   zUfuncSignature.argumentsctxSequence[Binding | Expr]c              	   C  ,   | j  dddd t||  D  dS )NrD   r;   c                 s      | ]}|j V  qd S r0   exprr?   r$   r$   r%   r7   p       z&UfuncSignature.call.<locals>.<genexpr>rB   r   r9   r   r&   r#   rQ   r$   r$   r%   callo      ,zUfuncSignature.callNrI   )rQ   rR   r    r   )rK   rL   rM   rN   r&   rZ   r$   r$   r$   r%   rO   f   s   
 
rO   r   r   r    boolc                 C  s"   t dd | jjjjD }|dkS )Nc                 s  s    | ]
}|j  rd V  qdS )   N)r5   is_tensor_liker?   r$   r$   r%   r7      s    

z<eligible_for_binary_scalar_specialization.<locals>.<genexpr>   )sum
functionalfuncr&   flat_non_out)r   Znum_tensorsr$   r$   r%   )eligible_for_binary_scalar_specialization   s   
rd   ?tuple[dict[ScalarType, dict[UfuncKey, UfunctorSignature]], str]c                 C  s  i }g }| j j}tjdtjdtjd i}t| r tjtjtjg}ntjg}tjtjfD ]}||vs8J d| dq*|D ]}||v r^t| || || jd}|| j	D ]
}||
|i |< qRq;d }	t }
tjtjfD ]#}||vrpqi|	d u rz|| j}	n|	|| jksJ d|
|| j	O }
qi|	d usJ | d|	 }t| || |d}|
D ]
}||
|i |< qt| d|	 ttd	}| | j }|d
|j d|  d|  d|  d|| d q;|d|fS )Nr]   r   zcannot use z on non-binary function)r   r   z0ScalarOnly and Generic must have same ufunc namer(   ufunc::r   rP   z%
template <typename scalar_t>
struct z3 {
  using opmath_t = at::opmath_type<scalar_t>;
  z
  z
  __device__ z {
    return z	;
  }
};
r2   )outufunc_inner_loopr   CUDAFunctorOnSelfCUDAFunctorOnOtherCUDAFunctorrd   r   r   supported_dtypes
setdefaultr   
ScalarOnlyGenericrO   r   r   r/   r&   rG   appendr:   rF   rH   rZ   r9   )r   ufunctor_sigs	ufunctorsloopsZscalar_tensor_idx_lookupkeyskufunctor_sigdtypeZ
ufunc_namerm   lkr   Z	ufunc_sigZ	apply_ctxr$   r$   r%   compute_ufunc_cuda_functors   sv   	
rz   c                   @  s&   e Zd ZU ded< ded< ded< dS ) BinaryScalarSpecializationConfigint
scalar_idxr   ctor_tensorr   	ufunc_keyN)rK   rL   rM   rN   r$   r$   r$   r%   r{      s   
 r{   r#   )r}   r~   r   r]   otherrx   r   inner_loops!dict[UfuncKey, UfunctorSignature]
parent_ctxSequence[Binding]r   c           
      C  s   d}|d7 }t D ]K}|j|vrq||j }|jd }t|}|td| dt|jtt	d d
dd	 t|| jD }	|d
| d|j d|	 d| d	7 }q|tj }d
dd	 t|| jD }	|d|j d|	 d7 }|S )Nz+using opmath_t = at::opmath_type<scalar_t>;zif (false) {}
r]   ziter.scalar_value<opmath_t>(rB   )rV   r5   r;   c                 s  rT   r0   rU   r?   r$   r$   r%   r7   
      
z0compute_ufunc_cuda_dtype_body.<locals>.<genexpr>zelse if (iter.is_cpu_scalar(z)) {
  z<scalar_t> ufunctor(z);
  iter.remove_operand(z");
  gpu_kernel(iter, ufunctor);
}c                 s  rT   r0   rU   r?   r$   r$   r%   r7     r   z
else {
  gpu_kernel(iter, z<scalar_t>(z
));
}
    )!BinaryScalarSpecializationConfigsr   r}   listrq   r	   r
   r~   r   r   r9   r   r&   r.   r   r   rl   )
r   rx   r   r   bodyconfigrw   r}   rQ   Zufunctor_ctor_exprs_strr$   r$   r%   compute_ufunc_cuda_dtype_body   sH   







r   c           	      C  s   t | \}}t| t| tj}g }| D ]\}}|d| dt| |||	  d qd
|}t| }d| d|  d|  d|  d|j d| d	|j d
|j d|  d||	  dS )N"
AT_DISPATCH_CASE(at::ScalarType::,
  [&]() {
    
  }
)
r2   z

;
. {
  AT_DISPATCH_SWITCH(iter.common_dtype(), "",
    z
  );
}
REGISTER_DISPATCH(, &z)

 {
  ;
}
)rz   r   r!   kernel_namer   CUDAitemsrq   r   r&   r9   StubSignature	type_defndispatch_declkernel_defnr   defndirect_call)	r   rr   rs   sigdtype_casesrx   inner_ufunc_sigsdtype_cases_strstub_sigr$   r$   r%   compute_ufunc_cuda#  sD   

r   c                   @  s   e Zd ZU ded< edddZedddZedd	d
Zd ddZdddZ	dddZ
dddZdddZdddZd!ddZd!ddZdS )"r   r   r   r    r   c                 C     t | jjjjj dS )NZ_stubr   r   ra   rb   r   r"   r$   r$   r%   r   X     zStubSignature.namec                 C  r   )NZ_kernelr   r"   r$   r$   r%   r   \  r   zStubSignature.kernel_namec                 C  r   )N_fnr   r"   r$   r$   r%   	type_name`  r   zStubSignature.type_namer'   c                 C  s   t | jS r0   )r!   Zstub_argumentsr   r"   r$   r$   r%   r&   d  s   zStubSignature.argumentsc                 C  s$   |   }dddd |D  dS )Nzvoid(*)(TensorIteratorBase&, r;   c                 s  rT   r0   )r5   r?   r$   r$   r%   r7   i  rW   z%StubSignature.type.<locals>.<genexpr>rB   )r&   r9   )r#   Zcpp_argsr$   r$   r%   r5   g  s   zStubSignature.typec                 C  s   d| j  d| j dS )NzDECLARE_DISPATCH(r;   rB   )r   r   r"   r$   r$   r%   r   k     zStubSignature.dispatch_declc                 C  s   d| j  dS )NzDEFINE_DISPATCH(rB   rC   r"   r$   r$   r%   dispatch_defnn  s   zStubSignature.dispatch_defnc                 C  s(   d| j  dddd |  D  dS )Nzvoid z(TensorIteratorBase& iter, r;   c                 s  r<   r0   )r   r?   r$   r$   r%   r7   r  rA   z,StubSignature.kernel_defn.<locals>.<genexpr>rB   )r   r9   r&   r"   r$   r$   r%   r   q  s   (zStubSignature.kernel_defnc                 C  s   d| j  d|   S )Nzusing  = )r   r5   r"   r$   r$   r%   r   t  r   zStubSignature.type_defnrQ   r   c              	   C  rS   )Nz(device_type(), *this, r;   c                 s  rT   r0   rU   r?   r$   r$   r%   r7   y  rW   z%StubSignature.call.<locals>.<genexpr>rB   rX   rY   r$   r$   r%   rZ   x  r[   zStubSignature.callc              	   C  rS   )Nz(*this, r;   c                 s  rT   r0   rU   r?   r$   r$   r%   r7   }  rW   z,StubSignature.direct_call.<locals>.<genexpr>rB   )r   r9   r   r&   rY   r$   r$   r%   r   |  r[   zStubSignature.direct_callNrJ   rI   )rQ   r   r    r   )rK   rL   rM   rN   propertyr   r   r   r&   r5   r   r   r   r   rZ   r   r$   r$   r$   r%   r   T  s    
 






r   c                 C  sZ   t | }t| t| tj}d|  d|  d|  d|	  d|
|  dS )Nr2   r   z;

r   r   )r   r   r!   r   r   ZCPUr   r   r   r   rZ   r&   )r   r   r   r$   r$   r%   compute_ufunc_cpu  s   r   dict[UfuncKey, UfuncSignature]c                   s|  t j|v sJ | d|  | t jt jhksJ |t j }d }t j|v r-|t j }g }g  |D ]3}t|jtrE|jjtt	j
krEq3|d|j d|j d  td|j t|jjtt q3|d ur|D ]5}t|jtr|jjtt	j
krqm|d|j d|j d  td	|j t|jjttt qmg }g }	| jjjjD ]:}
|
j sq|
jtt	jksJ |t|
jt|
jtt|
d
 |d ur|	t|
jt|
jttt|
d
 qd fdd}d|}|d ur%d| dddd |D  d||| dddd |	D  d|||	 dS d| dddd |D  d||| dS )Nr;   zauto _s_r   z.to<scalar_t>();Z_s_zauto _v_z$ = at::vec::Vectorized<scalar_t>(_s_z);Z_v_)r   nctypeargumentr,   r   r    list[Expr | Binding]c                   s   g }|   | |  |S r0   )extend)r,   rrQ   r$   r%   with_ctx  s   

z.compute_ufunc_cpu_dtype_body.<locals>.with_ctxr2   z
cpu_kernel_vec(iter,
  [=](c                 s  r<   r0   r=   r*   r$   r$   r%   r7     rA   z/compute_ufunc_cpu_dtype_body.<locals>.<genexpr>z) { return z; },
  [=](c                 s  r<   r0   r=   r*   r$   r$   r%   r7     rA   z; }
);
z
cpu_kernel(iter,
  [=](c                 s  r<   r0   r=   r*   r$   r$   r%   r7     rA   )r,   r   r    r   )r   	CPUScalarru   	CPUVector
isinstancer   r   r5   r   r   ZScalarrq   r   r	   r
   r   r   r   r   ra   rb   r&   rc   r^   ZTensorr   r9   rZ   )r   rx   r   r   Zscalar_loopZvec_loopr   r,   Zscalar_bindingsZvec_bindingsr@   r   Zbody_strr$   r   r%   compute_ufunc_cpu_dtype_body  s    


	(
	


r   c                 C  sv  t | }| jj}i }tjtjfD ]f}g }||v r|| tj|v r-|tju r-|tj tj|v r8|tj |D ];}|| j	D ]3}|tju rMt
t}n|tju rYtt
t}nt||i }	||	vrtt| d|| j |d|	|< qAq:qg }
| D ]\}}	|
d| dt| ||	|  d q}d|
}d|  d|j d	| d
|  d|  d|j d|j dS )Nrf   rg   r   r   r   r2   z
namespace {

r   r   z#
  );
}

} // anonymous namespace

r   z
REGISTER_DISPATCH(r   z)
)r   rh   ri   r   r   r   rq   ro   rp   rm   r   r   r   AssertionErrorrn   rO   r   r   r   r&   r9   r   r   r   r   )r   r   rt   Z
ufunc_sigsrv   Zlksry   rx   rP   r   r   r   r$   r$   r%   compute_ufunc_cpu_kernel  sf   







r   )r   r   r    r\   )r   r   r    re   )
r   r   rx   r   r   r   r   r   r    r   )r   r   r    r   )
r   r   rx   r   r   r   r   r   r    r   )2
__future__r   dataclassesr   typingr   Ztorchgen.api.ufuncapir!   Ztorchgen.api.translater   Ztorchgen.api.typesr   r   r   r	   r
   r   r   r   r   Ztorchgen.contextr   Ztorchgen.modelr   r   r   r   r   r   r   Ztorchgen.utilsr   collections.abcr   r   r   rO   rd   rz   r{   rk   rj   r   r   r   r   r   r   r   r$   r$   r$   r%   <module>   sR    ,$	 "

S
/0+
]