a
    h8                     @   s  d dl mZ d dlZd dlm  mZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZ d d
lmZmZ dd Zd ddZejdddZejdddZdd Zdd Z dd Z!dd Z"e! Z#e" Z$dd Z%e% Z&dd Z'dS )!    )OptionalN)_prims)DispatchKey)autograd_not_implemented)HigherOrderOperator)CUDARngStateHelpermake_contiguous_strides_for)FakeTensorMode)disable_proxy_modes_tracingProxyTorchDispatchModetrack_tensor_tree)_device_dtypec                 C   s&   t d| j d| j d| j dd S )Nz"You are trying to functionalize a z RNG operator but zE does not use Philox/counter-based RNG. Therefore, functionalizing a zo RNG operator is not supported. We are discussing the possibility of a Philox-based RNG implementation for CPU.)RuntimeErrortype)device r   D/var/www/auris/lib/python3.9/site-packages/torch/_prims/rng_prims.pythrow_on_non_cuda   s
    r   c           
      C   s|   t jjd|  |d|d}|| tt jjj| }|j}|rB||_	||fD ],}	||	_
t jjj|	_| | |	_||	_||	_qJd S )Nz
rngprims::r   )Zmutates_argsschema)torchlibraryZ	custom_opZregister_fakegetattrZ_opsopsZrngprimsdefaultZ_tags__doc__Z_prims_commonZRETURN_TYPEZNEWreturn_typer   	impl_atenZprim_meta_impl)
namer   r   	impl_metadoctagsZrngprim_defZprim_packetZprimpr   r   r   register_rng_prim   s    

r#   shapec                 C   s   t tjdtjdS )Nr   dtype)r   Z
TensorLiker   Ztensorint64r$   r   r   r   philox_rand_offset_meta2   s    r)   c                 C   s   d}| D ]}||9 }qt j|t jd}d}d}d}t jt j }|j| }|| d | }	t|	|j| }	|d ||	 |  d | }
|
S )N   r&         )	r   Zscalar_tensorr(   cudaZget_device_propertiesZcurrent_deviceZmax_threads_per_multi_processorminZmulti_processor_count)r%   Znumel_scalarZdim_sizeZnumel
block_sizeZunrollZcurand4_engine_callsZdevice_propertyZblocks_per_smZ	grid_sizeoffsetr   r   r   philox_rand_offset8   s    

r1   c                  C   sz   d} d}t jt jt jtttdf  ttddd}t jt jt jtttdf  ttddd}t| |||d	t j	j
fd
 d S )NZphilox_randz{(SymInt[] size, Tensor seed, Tensor offset, int[]? stride, Device? device=None, ScalarType? dtype=None) -> (Tensor, Tensor).)r%   seedr0   strider   r'   c                 S   s6   |d u sJ t | }tj| |||d}t| }||fS )N)r%   stridesr'   r   )r   r   Z
TensorMetar)   )r%   r2   r0   r3   r   r'   random_valuesr   r   r   _philox_rand_metaT   s    	z/register_philox_rand.<locals>._philox_rand_metac                 S   s   |d u sJ |j dkrg }n|g}|j dkr4t|tj|, t|| tj| ||d}W d    n1 sr0    Y  |t| fS )Ncpur-   )r   r'   )	r   r   r   randomZfork_rngr   Zset_torch_state_tensorZrandr1   )r%   r2   r0   r3   r   r'   devicesr5   r   r   r   _philox_rande   s    	

.z*register_philox_rand.<locals>._philox_randz$Philox based stateless rand operator)r   r   r   r   r    r!   )r   SizeTensorr   tupleintr   r   r#   TagZnondeterministic_seeded)r   r   r6   r:   r   r   r   register_philox_randP   s0    r@   c                 C   s   | dr.| d}t|tr(t|}|jS dd | D }tdd |D rRdS tdd |D rhdS td	d |D r~d
S tdd |D rdS d S )Nr   c                 S   s    h | ]}t |tjr|jjqS r   )
isinstancer   r<   r   r   ).0argr   r   r   	<setcomp>       zget_device.<locals>.<setcomp>c                 s   s   | ]}|d kV  qdS )r-   Nr   rB   devr   r   r   	<genexpr>   rE   zget_device.<locals>.<genexpr>r-   c                 s   s   | ]}|d kV  qdS )xpuNr   rF   r   r   r   rH      rE   rI   c                 s   s   | ]}|d kV  qdS )hpuNr   rF   r   r   r   rH      rE   rJ   c                 s   s   | ]}|d kV  qdS )r7   Nr   rF   r   r   r   rH      rE   r7   )getrA   strr   r   r   any)argskwargsr   r9   r   r   r   
get_device   s    



rP   c                     s   G dd dt } |  tjtdd tjdd tjdd tjd	d
 tjdd tj	fdd t
 fdd}t fdd}S )Nc                       s(   e Zd Z fddZ fddZ  ZS )z>register_run_and_save_rng_state_op.<locals>.RunAndSaveRngStatec                    s   t  d d S )Nrun_and_save_rng_statesuper__init__self	__class__r   r   rT      s    zGregister_run_and_save_rng_state_op.<locals>.RunAndSaveRngState.__init__c                    s   t  j|g|R i |S NrS   __call__)rV   oprN   rO   rW   r   r   r[      s    zGregister_run_and_save_rng_state_op.<locals>.RunAndSaveRngState.__call____name__
__module____qualname__rT   r[   __classcell__r   r   rW   r   RunAndSaveRngState   s   rb   TZdeferred_errorc                 _   s   t j | |i |fS rY   )r   r-   get_rng_stater\   rN   rO   r   r   r   	impl_cuda   s    z5register_run_and_save_rng_state_op.<locals>.impl_cudac                 _   s   t  | |i |fS rY   )r   rd   re   r   r   r   impl_cpu   s    z4register_run_and_save_rng_state_op.<locals>.impl_cpuc                 _   s.   t tdr"tj | |i |fS tdd S NrJ   z2functionalize a hpu RNG operator is not supported.)hasattrr   rJ   rd   r   re   r   r   r   impl_hpu   s    
z4register_run_and_save_rng_state_op.<locals>.impl_hpuc                 _   s   t j | |i |fS rY   )r   rI   rd   re   r   r   r   impl_xpu   s    z4register_run_and_save_rng_state_op.<locals>.impl_xpuc                    sL    d}t ||}||v s.J d| || }|| g|R i |S N)r-   r7   rJ   rI   zBackend not supported for rP   )r\   rN   rO   impl_mapr   implrg   rf   rj   rk   r   r   impl_backend_select   s    
z?register_run_and_save_rng_state_op.<locals>.impl_backend_selectc                    s>   | &  |g|R i |W  d    S 1 s00    Y  d S rY   r   )moder\   rN   rO   )rq   r   r   impl_fake_tensor_mode   s    zAregister_run_and_save_rng_state_op.<locals>.impl_fake_tensor_modec                    sb    |g|R i |}t | jj|g|R }t | jj|}| jd||}t||d | jdS Ncall_functionZconstanttracer)pytreetree_maprw   unwrap_proxycreate_proxyr   )rr   r\   rN   rO   out
proxy_argsproxy_kwargs	out_proxy)rq   rQ   r   r   impl_proxy_dispatch_mode   s    zDregister_run_and_save_rng_state_op.<locals>.impl_proxy_dispatch_mode)r   py_implr   Autogradr   CUDACPUHPUXPUBackendSelectr	   r   )rb   rs   r   r   )rq   rg   rf   rj   rk   rQ   r   "register_run_and_save_rng_state_op   s(    










	r   c                     s   G dd dt } |  tjtdd tjdd tjdd  tjd	d
 tjdd t	fdd}tj
 fdd}tdd }jfdd}S )Nc                       s(   e Zd Z fddZ fddZ  ZS )z7register_run_with_rng_state_op.<locals>.RunWithRngStatec                    s   t  d d S )Nrun_with_rng_staterR   rU   rW   r   r   rT      s    z@register_run_with_rng_state_op.<locals>.RunWithRngState.__init__c                    s   t  j||g|R i |S rY   rZ   )rV   	rng_stater\   rN   rO   rW   r   r   r[      s    z@register_run_with_rng_state_op.<locals>.RunWithRngState.__call__r]   r   r   rW   r   RunWithRngState   s   r   Trc   c                 _   s8   t j }t j|   ||i |}t j| |S rY   )r   r-   rd   set_rng_stater7   r   r\   rN   rO   current_stater|   r   r   r   rf      s
    
z1register_run_with_rng_state_op.<locals>.impl_cudac                 _   s.   t  }t |  ||i |}t | |S rY   )r   rd   r   r   r   r   r   rg      s
    

z0register_run_with_rng_state_op.<locals>.impl_cpuc                 _   sJ   t tdr>tj }tj|  ||i |}tj| |S tdd S rh   )ri   r   rJ   rd   r   r   r   r   r   r   rj      s    

z0register_run_with_rng_state_op.<locals>.impl_hpuc                 _   s4   t j }t j|  ||i |}t j| |S rY   )r   rI   rd   r   r   r   r   r   rk     s
    
z0register_run_with_rng_state_op.<locals>.impl_xpuc           	         s   t  (  ||g|R i |}W d    n1 s40    Y  t| jj||g|R }t| jj|}| jd ||}t||d | jdS rt   r
   rx   ry   rw   rz   r{   r   )	rr   r   r\   rN   rO   r|   r}   r~   r   r   r   r   r   
  s    6z@register_run_with_rng_state_op.<locals>.impl_proxy_dispatch_modec                    sN    d}t ||}||v s.J d| || }|| |g|R i |S rl   rm   )r   r\   rN   rO   rn   r   ro   rp   r   r   rq     s    
z;register_run_with_rng_state_op.<locals>.impl_backend_selectc                 _   s6   |  ||i |W  d    S 1 s(0    Y  d S rY   r   )rr   r   r\   rN   rO   r   r   r   rs   $  s    z=register_run_with_rng_state_op.<locals>.impl_fake_tensor_modec           	         sl   |  |}|  |}|  |}|  2  ||g|R i |}| |W  d    S 1 s^0    Y  d S rY   Zunwrap_tensorsZredispatch_to_nextZwrap_tensors)	ctxr   r\   rN   rO   unwrapped_rng_stateunwrapped_argsunwrapped_kwargsr|   r   r   r   impl_functional+  s    



z7register_run_with_rng_state_op.<locals>.impl_functional)r   r   r   r   r   r   r   r   r   r   r   r	   py_functionalize_impl)r   r   rq   rs   r   r   )rg   rf   rj   rk   r   r   register_run_with_rng_state_op   s,    







	



r   c                     s   G dd dt } |    tjt dd  tjd ddd tjd dfdd	
} td dd
d} td d fdd
} j	d d fdd
} S )Nc                       s.   e Zd Z fddZdd fdd
Z  ZS )zJregister_graphsafe_run_with_rng_state_op.<locals>.GraphSafeRunWithRngStatec                    s   t  d d S )Ngraphsafe_run_with_rng_staterR   rU   rW   r   r   rT   @  s    zSregister_graphsafe_run_with_rng_state_op.<locals>.GraphSafeRunWithRngState.__init__Nr   c                   s   t  j|g|R d|i|S Nr   rZ   )rV   r\   r   rN   rO   rW   r   r   r[   C  s    zSregister_graphsafe_run_with_rng_state_op.<locals>.GraphSafeRunWithRngState.__call__r]   r   r   rW   r   GraphSafeRunWithRngState?  s   r   Trc   r   c                _   sB   |j j}tjj| }| }|| | |i |}|| |S rY   )r   indexr   r-   Zdefault_generatorsZgraphsafe_get_stateZgraphsafe_set_state)r\   r   rN   rO   Z
device_idx	generatorr   r|   r   r   r   rf   L  s    

z;register_graphsafe_run_with_rng_state_op.<locals>.impl_cudac                   s:   t ||}|dks J d|  | g|R d|i|S )Nr-   z6GraphSafe RNG operations only supported for CUDA, got r   rm   )r\   r   rN   rO   r   )rf   r   r   rq   V  s    
zEregister_graphsafe_run_with_rng_state_op.<locals>.impl_backend_selectc                _   s6   |  ||i |W  d    S 1 s(0    Y  d S rY   r   )rr   r\   r   rN   rO   r   r   r   rs   ^  s    zGregister_graphsafe_run_with_rng_state_op.<locals>.impl_fake_tensor_modec          	         s   t  *  |g|R d|i|}W d    n1 s60    Y  t| jj|g|R }t| jjd|i|}| jd ||}t||d | jdS )Nr   ru   rv   r   )	rr   r\   r   rN   rO   r|   r}   r~   r   r   r   r   r   c  s    8zJregister_graphsafe_run_with_rng_state_op.<locals>.impl_proxy_dispatch_modec          	         sz   |d ur|  |nd }|  |}|  |}|  4  |g|R d|i|}| |W  d    S 1 sl0    Y  d S r   r   )	r   r\   r   rN   rO   r   r   r   r|   r   r   r   r   p  s    


zAregister_graphsafe_run_with_rng_state_op.<locals>.impl_functional)
r   r   r   r   r   r   r   r	   r   r   )r   rq   rs   r   r   r   )r   rf   r   (register_graphsafe_run_with_rng_state_op>  s     


	
r   c                   C   s
   t   d S rY   )r@   r   r   r   r   register_rng_prims  s    r   )N)(typingr   r   Ztorch.utils._pytreeutilsZ_pytreerx   r   Ztorch._Cr   Ztorch._higher_order_ops.utilsr   Z
torch._opsr   Ztorch._prims_commonr   r   Ztorch._subclasses.fake_tensorr	   Z"torch.fx.experimental.proxy_tensorr
   r   r   Ztorch.typesr   r   r   r#   r;   r)   r1   r@   rP   r   r   rQ   r   r   r   r   r   r   r   r   <module>   s2   
7@`C