
    [Th8                     <   S SK Jr  S SKrS SKJs  Jr  S SKJr  S SKJ	r	  S SK
Jr  S SKJr  S SKJrJr  S SKJr  S S	KJrJrJr  S S
KJrJr  S rSS jrS\R8                  4S jrS\R8                  4S jrS rS r S r!S r"\!" 5       r#\"" 5       r$S r%\%" 5       r&S r'g)    )OptionalN)_prims)DispatchKey)autograd_not_implemented)HigherOrderOperator)CUDARngStateHelpermake_contiguous_strides_for)FakeTensorMode)disable_proxy_modes_tracingProxyTorchDispatchModetrack_tensor_tree)_device_dtypec           	      h    [        SU R                   SU R                   SU R                   S35      e)Nz"You are trying to functionalize a z RNG operator but zE does not use Philox/counter-based RNG. Therefore, functionalizing a zo RNG operator is not supported. We are discussing the possibility of a Philox-based RNG implementation for CPU.)RuntimeErrortype)devices    N/var/www/auris/envauris/lib/python3.13/site-packages/torch/_prims/rng_prims.pythrow_on_non_cudar      sB    

,V[[M9KFKK= YFFLkk] Si	i     c                    [         R                  R                  SU -   USUS9nUR                  U5        [	        [         R
                  R                  R                  U 5      nUR                  nU(       a  XXl	        Xx4 HG  n	XIl
        [         R                  R                  R                  U	l        X-   U	l        X)l        X9l        MI     g )Nz
rngprims:: )mutates_argsschema)torchlibrary	custom_opregister_fakegetattr_opsopsrngprimsdefault_tags__doc___prims_commonRETURN_TYPENEWreturn_typer   	impl_atenprim_meta_impl)
namer   r*   	impl_metadoctagsrngprim_defprim_packetprimps
             r   register_rng_primr4      s    --))tYR * K i(%**..1148KD
 	++77;;=$ !r   shapec                 p    [         R                  " [        R                  " S[        R                  S95      $ )Nr   dtype)r   
TensorLiker   tensorint64)r5   s    r   philox_rand_offset_metar<   2   s$     U\\!5;;?@@r   c                 t   SnU  H  nX-  nM	     [         R                  " U[         R                  S9nSnSnSn[         R                  R	                  [         R                  R                  5       5      nUR                  U-  nX4-   S-
  U-  n	[        XR                  U-  5      n	US-
  XI-  U-  -  S-   U-  n
U
$ )N   r7         )	r   scalar_tensorr;   cudaget_device_propertiescurrent_devicemax_threads_per_multi_processorminmulti_processor_count)r5   numel_scalardim_sizenumel
block_sizeunrollcurand4_engine_callsdevice_propertyblocks_per_sm	grid_sizeoffsets              r   philox_rand_offsetrR   8   s     L  EKK@EJFjj66uzz7P7P7RSO#CCzQM#a'J6IIDD}TUI	
.781<F Mr   c                     Sn SnS[         R                  S[         R                  S[         R                  S[        [        [
        S4      S[        S	[        4S
 jnS[         R                  S[         R                  S[         R                  S[        [        [
        S4      S[        S	[        4S jn[        U UUUS[         R                  R                  4S9  g )Nphilox_randz{(SymInt[] size, Tensor seed, Tensor offset, int[]? stride, Device? device=None, ScalarType? dtype=None) -> (Tensor, Tensor)r5   seedrQ   stride.r   r8   c                 h    Ub   e[        U 5      n[        R                  " XXTS9n[        U 5      nXb4$ )N)r5   stridesr8   r   )r	   r   
TensorMetar<   )r5   rU   rQ   rV   r   r8   random_valuess          r   _philox_rand_meta/register_philox_rand.<locals>._philox_rand_metaT   sB     ~~,U3))u
 )/&&r   c                 P   Ub   eUR                   S:X  a  / nOU/nUR                   S:w  a  [        U5      e[        R                  R	                  U5         [
        R                  " X5        [        R                  " XUS9nS S S 5        W[        U 5      4$ ! , (       d  f       N= f)NcpurB   )r   r8   )	r   r   r   randomfork_rngr   set_torch_state_tensorrandrR   )r5   rU   rQ   rV   r   r8   devicesrZ   s           r   _philox_rand*register_philox_rand.<locals>._philox_rande   s     ~~;;%GhG;;& #F++\\""7+55dC!JJu5IM , 0777	 ,+s   ,B
B%z$Philox based stateless rand operator)r,   r   r*   r-   r.   r/   )r   SizeTensorr   tupleintr   r   r4   Tagnondeterministic_seeded)r,   r   r[   rd   s       r   register_philox_randrl   P   s    D KF'zz'll' ' sCx)	'
 ' '"8zz8ll8 8 sCx)	8
 8 80 #2ii//1r   c                    UR                  S5      (       aH  UR                  S5      n[        U[        5      (       a  [        R                  " U5      nUR
                  $ U  Vs1 s H:  n[        U[        R                  5      (       d  M$  UR                  R
                  iM<     nn[        S U 5       5      (       a  g[        S U 5       5      (       a  g[        S U 5       5      (       a  g[        S U 5       5      (       a  g	g s  snf )
Nr   c              3   *   #    U  H	  oS :H  v   M     g7f)rB   Nr   .0devs     r   	<genexpr>get_device.<locals>.<genexpr>   s     
,GS&=G   rB   c              3   *   #    U  H	  oS :H  v   M     g7f)xpuNr   ro   s     r   rr   rs           -WcE\Wrt   rv   c              3   *   #    U  H	  oS :H  v   M     g7f)hpuNr   ro   s     r   rr   rs      rw   rt   ry   c              3   *   #    U  H	  oS :H  v   M     g7f)r^   Nr   ro   s     r   rr   rs      rw   rt   r^   )get
isinstancestrr   r   r   rg   any)argskwargsr   argrc   s        r   
get_devicer      s    zz(H%fc""\\&)F{{*.P$3*S%,,2Oszz$GP

,G
,,,	-W-	-	-	-W-	-	-	-W-	-	- Qs   ##D
Dc                    ^^^^^^  " S S[         5      n U " 5       mTR                  [        R                  5      " [	        TSS95        TR                  [        R
                  5      S 5       mTR                  [        R                  5      S 5       mTR                  [        R                  5      S 5       mTR                  [        R                  5      S 5       mTR                  [        R                  5      UUUU4S	 j5       mTR                  [        5      U4S
 j5       nTR                  [        5      UU4S j5       nT$ )Nc                   4   ^  \ rS rSrU 4S jrU 4S jrSrU =r$ )>register_run_and_save_rng_state_op.<locals>.RunAndSaveRngState   c                 $   > [         TU ]  S5        g )Nrun_and_save_rng_statesuper__init__self	__class__s    r   r   Gregister_run_and_save_rng_state_op.<locals>.RunAndSaveRngState.__init__   s    G56r   c                 ,   > [         TU ]  " U/UQ70 UD6$ Nr   __call__)r   opr   r   r   s       r   r   Gregister_run_and_save_rng_state_op.<locals>.RunAndSaveRngState.__call__   s    7#B8888r   r   __name__
__module____qualname____firstlineno__r   r   __static_attributes____classcell__r   s   @r   RunAndSaveRngStater      s    	7	9 	9r   r   Tdeferred_errorc                 N    [         R                  R                  5       U " U0 UD64$ r   )r   rB   get_rng_stater   r   r   s      r   	impl_cuda5register_run_and_save_rng_state_op.<locals>.impl_cuda   s$    zz'')2t+>v+>>>r   c                 <    [         R                  " 5       U " U0 UD64$ r   )r   r   r   s      r   impl_cpu4register_run_and_save_rng_state_op.<locals>.impl_cpu   s     ""$b$&9&&999r   c                     [        [        S5      (       a&  [        R                  R                  5       U " U0 UD64$ [	        S5      eNry   z2functionalize a hpu RNG operator is not supported.)hasattrr   ry   r   r   r   s      r   impl_hpu4register_run_and_save_rng_state_op.<locals>.impl_hpu   s=    5%  99**,b$.A&.AAAOPPr   c                 N    [         R                  R                  5       U " U0 UD64$ r   )r   rv   r   r   s      r   impl_xpu4register_run_and_save_rng_state_op.<locals>.impl_xpu   s$    yy&&("d*=f*===r   c                 f   > TTTT	S.n[        X5      nXC;   d
   SU 35       eX4   nU" U /UQ70 UD6$ N)rB   r^   ry   rv   zBackend not supported for r   )
r   r   r   impl_mapr   implr   r   r   r   s
         r   impl_backend_select?register_run_and_save_rng_state_op.<locals>.impl_backend_select   sZ     	
 D)!H%?x#HH!B((((r   c                 V   > U    T" U/UQ70 UD6sS S S 5        $ ! , (       d  f       g = fr   r   )moder   r   r   r   s       r   impl_fake_tensor_modeAregister_run_and_save_rng_state_op.<locals>.impl_fake_tensor_mode   s$     &r;D;F; TTs   
(c                 4  > T" U/UQ70 UD6n[         R                  " U R                  R                  U/UQ75      n[         R                  " U R                  R                  U5      nU R                  R	                  ST	XV5      n[        XGS U R                  S9$ Ncall_functionconstanttracer)pytreetree_mapr   unwrap_proxycreate_proxyr   )
r   r   r   r   out
proxy_argsproxy_kwargs	out_proxyr   r   s
           r   impl_proxy_dispatch_modeDregister_run_and_save_rng_state_op.<locals>.impl_proxy_dispatch_mode   s    !"6t6v6__T[[%=%={T{K
t{{'?'?HKK,,3Z
	 !$t{{SSr   )r   py_implr   Autogradr   CUDACPUHPUXPUBackendSelectr
   r   )	r   r   r   r   r   r   r   r   r   s	      @@@@@@r   "register_run_and_save_rng_state_opr      s=   90 9 01"";#7#78 !7M ##K$4$45? 6? ##KOO4: 5: ##KOO4Q 5Q
 ##KOO4> 5> ##K$=$=>
) ?
) ##N3< 4<
 ##$:;T <T "!r   c                    ^^^^^	  " S S[         5      n U " 5       m	T	R                  [        R                  5      " [	        T	SS95        T	R                  [        R
                  5      S 5       mT	R                  [        R                  5      S 5       mT	R                  [        R                  5      S 5       mT	R                  [        R                  5      S 5       mT	R                  [        5      U	4S	 j5       nT	R                  [        R                  5      UUUU4S
 j5       nT	R                  [        5      S 5       nT	R                  U	4S j5       nT	$ )Nc                   4   ^  \ rS rSrU 4S jrU 4S jrSrU =r$ )7register_run_with_rng_state_op.<locals>.RunWithRngState   c                 $   > [         TU ]  S5        g )Nrun_with_rng_stater   r   s    r   r   @register_run_with_rng_state_op.<locals>.RunWithRngState.__init__   s    G12r   c                 ,   > [         TU ]  " X/UQ70 UD6$ r   r   )r   	rng_stater   r   r   r   s        r   r   @register_run_with_rng_state_op.<locals>.RunWithRngState.__call__   s    7#ICDCFCCr   r   r   r   s   @r   RunWithRngStater      s    	3	D 	Dr   r   Tr   c                     [         R                  R                  5       n[         R                  R                  U R	                  5       5        U" U0 UD6n[         R                  R                  U5        U$ r   )r   rB   r   set_rng_stater^   r   r   r   r   current_stater   s         r   r   1register_run_with_rng_state_op.<locals>.impl_cuda   sR    

002

  1$!&!

  /
r   c                     [         R                  " 5       n[         R                  " U 5        U" U0 UD6n[         R                  " U5        U$ r   )r   r   r   r   s         r   r   0register_run_with_rng_state_op.<locals>.impl_cpu   s@    ++-I&$!&!M*
r   c                    [        [        S5      (       af  [        R                  R                  5       n[        R                  R	                  U 5        U" U0 UD6n[        R                  R	                  U5        U$ [        S5      er   )r   r   ry   r   r   r   r   s         r   r   0register_run_with_rng_state_op.<locals>.impl_hpu   se    5%  !II335MII##I.d%f%CII##M2JOPPr   c                     [         R                  R                  5       n[         R                  R                  U 5        U" U0 UD6n[         R                  R                  U5        U$ r   )r   rv   r   r   r   s         r   r   0register_run_with_rng_state_op.<locals>.impl_xpu  sL    		//1			*$!&!		.
r   c                 ~  > [        5          T	" X/UQ70 UD6nS S S 5        [        R                  " U R                  R                  X/UQ75      n[        R                  " U R                  R                  U5      nU R                  R                  ST	Xg5      n[        WUS U R                  S9$ ! , (       d  f       N= fr   r   r   r   r   r   r   r   )
r   r   r   r   r   r   r   r   r   r   s
            r   r   @register_run_with_rng_state_op.<locals>.impl_proxy_dispatch_mode
  s     )*$YDTDVDC +__T[[%=%=	?UPT?UV
t{{'?'?HKK,,/
	 !i$t{{SS +*s   B..
B<c                 f   > TTT	T
S.n[        X#5      nXT;   d
   SU 35       eXE   nU" X/UQ70 UD6$ r   r   )r   r   r   r   r   r   r   r   r   r   r   s          r   r   ;register_run_with_rng_state_op.<locals>.impl_backend_select  sZ     	
 D)!H%?x#HH!I3D3F33r   c                 L    U    U" U0 UD6sS S S 5        $ ! , (       d  f       g = fr   r   )r   r   r   r   r   s        r   r   =register_run_with_rng_state_op.<locals>.impl_fake_tensor_mode$  s     t&v& TT   
#c                    > U R                  U5      nU R                  U5      nU R                  U5      nU R                  5          T	" XR/UQ70 UD6nU R                  U5      sS S S 5        $ ! , (       d  f       g = fr   unwrap_tensorsredispatch_to_nextwrap_tensors)
ctxr   r   r   r   unwrapped_rng_stateunwrapped_argsunwrapped_kwargsr   r   s
            r   impl_functional7register_run_with_rng_state_op.<locals>.impl_functional+  sz    !00;++D1--f5##%$#*8<LC ##C(	 &%%s   A,,
A:)r   r   r   r   r   r   r   r   r   r   r   r
   py_functionalize_impl)
r   r   r   r   r   r   r   r   r   r   s
        @@@@@r   register_run_with_rng_state_opr      sY   D- D )*{334 !3DI  0 01 2 0 1 0Q 1Q 0 1  67
T 8
T  9 9:
4 ;
4 /' 0' --	) .	) r   c                    ^^  " S S[         5      n U " 5       mTR                  [        R                  5      " [	        TSS95        TR                  [        R
                  5      S S.S j5       mTR                  [        R                  5      S S.U4S jj5       nTR                  [        5      S S.S j5       nTR                  [        5      S S.U4S	 jj5       nTR                  S S.U4S
 jj5       nT$ )Nc                   <   ^  \ rS rSrU 4S jrSS.U 4S jjrSrU =r$ )Jregister_graphsafe_run_with_rng_state_op.<locals>.GraphSafeRunWithRngStatei?  c                 $   > [         TU ]  S5        g )Ngraphsafe_run_with_rng_stater   r   s    r   r   Sregister_graphsafe_run_with_rng_state_op.<locals>.GraphSafeRunWithRngState.__init__@  s    G;<r   Nr   c                0   > [         TU ]  " U/UQ7SU0UD6$ Nr   r   )r   r   r   r   r   r   s        r   r   Sregister_graphsafe_run_with_rng_state_op.<locals>.GraphSafeRunWithRngState.__call__C  s"    7#BMMMfMMr   r   r   r   s   @r   GraphSafeRunWithRngStater  ?  s    	= 15 	N 	Nr   r	  Tr   r  c                    UR                   R                  n[        R                  R                  U   nUR                  5       nUR                  U5        U " U0 UD6nUR                  U5        U$ r   )r   indexr   rB   default_generatorsgraphsafe_get_stategraphsafe_set_state)r   r   r   r   
device_idx	generatorr   r   s           r   r   ;register_graphsafe_run_with_rng_state_op.<locals>.impl_cudaL  se    %%++
JJ11*=	!557%%i0$!&!%%m4
r   c                V   > [        X#5      nUS:X  d
   SU 35       eT" U /UQ7SU0UD6$ )NrB   z6GraphSafe RNG operations only supported for CUDA, got r   r   )r   r   r   r   r   r   s        r   r   Eregister_graphsafe_run_with_rng_state_op.<locals>.impl_backend_selectV  sH    D)f	MCF8L	MBdBiB6BBr   c                L    U    U" U0 UD6sS S S 5        $ ! , (       d  f       g = fr   r   )r   r   r   r   r   s        r   r   Gregister_graphsafe_run_with_rng_state_op.<locals>.impl_fake_tensor_mode^  s    t&v& TTr   c                  > [        5          T	" U/UQ7SU0UD6nS S S 5        [        R                  " U R                  R                  U/UQ75      n[        R                  " U R                  R                  SU0UE5      nU R                  R                  ST	Xg5      n[        WUS U R                  S9$ ! , (       d  f       N= f)Nr   r   r   r   )
r   r   r   r   r   r   r   r   r   r  s
            r   r   Jregister_graphsafe_run_with_rng_state_op.<locals>.impl_proxy_dispatch_modec  s    (*.rXDXIXQWXC +__T[[%=%={T{K
KK$${I&H&H
 KK,,9:
	 !i$t{{SS +*s   B44
Cc                  > Ub  U R                  U5      OS nU R                  U5      nU R                  U5      nU R                  5          T	" U/UQ7SU0UD6nU R                  U5      sS S S 5        $ ! , (       d  f       g = fr  r   )
r   r   r   r   r   r   r   r   r   r  s
            r   r   Aregister_graphsafe_run_with_rng_state_op.<locals>.impl_functionalp  s     .7-BCy) 	 ++D1--f5##%.#/BFVC ##C(	 &%%s   
A33
B)
r   r   r   r   r   r   r   r
   r   r   )r	  r   r   r   r   r  r   s        @@r   (register_graphsafe_run_with_rng_state_opr  >  s   N#6 N $<#=  (()=)=> !=dS "))+*:*:;'+  < "))+*C*CD15 C EC ")).99= ' :' "))*@A<@ 
T B
T "7726 ) 8) ('r   c                      [        5         g r   )rl   r   r   r   register_rng_primsr    s    r   r   )(typingr   r   torch.utils._pytreeutils_pytreer   r   torch._Cr   torch._higher_order_ops.utilsr   
torch._opsr   torch._prims_commonr   r	   torch._subclasses.fake_tensorr
   "torch.fx.experimental.proxy_tensorr   r   r   torch.typesr   r   r   r4   rf   r<   rR   rl   r   r   r   r   r   r  r  r  r   r   r   <module>r(     s      $ $    B * O 8 
 (%,A::A::04n&="@]@ <= 35 @(F  HI r   