
    (Th2W                   b   S SK Jr  S SKrS SKJrJrJrJrJr  S SK	r	SSK
Jr  SSKJr  \" S5      r " S	 S
\5      rStS jrStS jrSuS jr    SvS jrSwSxS jjrSyS jr  Sz S{S jjrS|S jr    S}S jr    S}S jr    S}S jrS~S jrS~S jr    SS jrS~S jr SS jr!SS jr"SS jr#    SS jr$SS jr%SS jr&SS  jr'SS! jr(SS" jr)SS# jr*SS$ jr+SS% jr,SS& jr-SS' jr.SS( jr/SS) jr0SS* jr1SS+ jr2SS, jr3SS- jr4SS. jr5SS/ jr6SS0 jr7SS1 jr8SS2 jr9SS3 jr:SS4 jr;SS5 jr<SS6 jr=SS7 jr>SS8 jr?SS9 jr@SS: jrASS; jrBSS< jrCSS= jrD S   SS> jjrES? rFS@ rGSA rHSB rISC rJSD rKSE rLSF rMSG rN            SSH jrOSSI jrPSJ rQ    SSK jrR    SSL jrS    SSM jrTSSN jrU                        SSO jrVSSP jrWSQ rXSR rY      SSS jrZSST jr[    SSU jr\SSV jr]SSW jr^SSX jr_SSY jr`SSZ jraSS[ jrb    SS\ jrcS] rd      SS^ jreSS_ jrfSS` jrg            SSa jrhSSb jriSc rjSSd jrk    SSe jrlSSf jrmSSg jrnSSh jroSSi jrpSSj jrqSSk jrrSSl jrsSSm jrtSSn jruSo rvSwSp jrwSSq jrxSSr jry            SSs jrzg)    )annotationsN)ListOptionalSequenceTupleTypeVar   )ir   )coreTc                  (   ^  \ rS rSrU 4S jrSrU =r$ )IncompatibleTypeErrorImpl   c                   > Xl         X l        SU R                   R                  5       -   S-   U R                  R                  5       -   U l        [        [
        U ]  U R                  5        g )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__s      P/var/www/auris/envauris/lib/python3.13/site-packages/triton/language/semantic.pyr   "IncompatibleTypeErrorImpl.__init__   sT    2T[[5I5I5KKgUX\XcXcXlXlXnn'7E    )r   r   r   )__name__
__module____qualname____firstlineno__r   __static_attributes____classcell__)r   s   @r   r   r      s    F Fr   r   c                    U S;  a  [        SU  35      e[        R                  " UR                  U 5      [        R                  5      $ )Nr   r   r	   z+program_id axis must be 0, 1, or 2 but got )
ValueErrortltensorcreate_get_program_idint32axisbuilders     r   
program_idr.      s=    9FtfMNN99W2248"((CCr   c                    U S;  a  [        SU  35      e[        R                  " UR                  U 5      [        R                  5      $ )Nr%   z-num_programs axis must be 0, 1, or 2 but got )r&   r'   r(   create_get_num_programsr*   r+   s     r   num_programsr1   !   s=    9HOPP99W44T:BHHEEr   c                d   U R                   nUR                   nU R                  nUR                  nXE:X  a	  X#:  a  U $ U$ U[        R                  R                  R
                  :X  a	  X#:  a  U $ U$ U[        R                  R                  R
                  :X  a	  X2:  a  U$ U $ [        SU SU 35      e)Nzunexpected signedness r   )int_bitwidthint_signednessr'   dtype
SIGNEDNESSUNSIGNED	TypeError)a_tyb_tya_rankb_ranka_snb_sns         r   integer_promote_implr?   ,   s    FFDD |t0D0	$$--	-'t1T1	$$--	-'t1T1
,TF%v>
??r   c                f   X:w  a  U(       a  X4OX 4u  pVUR                  5       R                  UR                  5       R                  ::  a=  U(       a4  U[        R                  [        R                  4;   a  [        R
                  $ U$ U R                  5       (       d  UR                  5       (       a  [        R                  $ U R                  5       (       d  UR                  5       (       a  [        R
                  $ U R                  5       (       d  UR                  5       (       a'  U(       a  [        R
                  $ [        R                  $ U R                  5       (       a<  UR                  5       (       a'  U(       a  [        R
                  $ [        R                  $ U R                  5       (       d  UR                  5       (       a  [        R
                  $ U R                  5       (       a,  UR                  5       (       a  X:X  a  U $ [        R                  $ U R                  5       (       a  UR                  5       (       d  [        SU  SU 35      eU(       aM  U R                  UR                  :w  a3  [        SU R                  5       -   S-   UR                  5       -   S-   5      e[!        X5      $ )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer'   float16bfloat16float32is_fp64float64is_fp32is_fp16is_bf16is_fp8is_intr8   r4   r   r?   )r9   a_is_scalarr:   b_is_scalar
div_or_mod	scalar_ty	tensor_tys          r   computation_type_implrT   <   s   
 !/:|	>>!!Y^^%5%;%;;yRZZ,EEzz! ||~~zz ||~~zz ||~~::::||~~$,,..::;;||~~zz{{}}|t33;;==*4&dV<== d))T-@-@@5G'QTXTaTaTcckk l 	l  ++r   c                   [        U [        5      (       a4  [        R                  " UR	                  U 5      [        R
                  5      $ [        U [        5      (       a  SU s=::  a  S:  a  O  O[        R                  nOrSU s=::  a  S:  a  O  O[        R                  nOQSU s=::  a  S:  a  O  O[        R                  nO0SU s=::  a  S:  a  O  O[        R                  nO[        SU  S35      e[        S	XUS
9$ [        U [        5      (       an  SnSSS-  -  n[        S   " U 5      nU[        S5      :X  d  US:X  d  X :w  d  XFs=::  a  U::  a  O  O[        R                  nO[        R                   n[        S	XUS
9$ [        U [        R"                  5      (       a  [%        U R&                  U5      $ [        U [        R                  5      (       a  U $ U(       a  [)        SU  S[+        U 5       S35      eU $ )N           l                             l            zNonrepresentable integer . r5   r-   g      8g   ?r	      absinf        zcannot convert z	 of type z
 to tensor)
isinstanceboolr'   r(   get_int1int1intr*   uint32int64uint64r&   fullfloat__builtins__rG   rI   	constexpr	to_tensorrD   r8   type)xr-   
check_typer5   min_float32max_float32abs_xs          r   rm   rm   o   s   !Tyy))!,bgg66	As		QHHEa%IIEq 5 HHEa%IIE81=>>B88	Au		!QV+U#A&E%L C<6.;.JJEJJEB88	Ar||	$	$'**	Aryy	!	!/!Id1gYjIJJHr   c                    U R                  5       (       aX  U(       d  [        X5      eUR                  5       (       a  X:w  a  [        X5      eUR                  5       (       a  [        X5      eg g N)is_ptrr   is_floating)r   r   allow_ptr_as      r   check_ptr_type_implry      sZ    }}+F;;==?? 0+F;;+F;;   r   c                2   [        U [        R                  5      n[        U[        R                  5      nU(       a  U n	[        X5      n U(       a  Un
[        X5      nU R                  R
                  nUR                  R
                  n[        XU5        [        XU5        U(       Ga_  UR                  5       (       GdI  UR                  5       (       Gd3  [        XXU5      nU(       a  W	S:  a  UR                  5       (       d"  U(       a&  W
S:  a   UR                  5       (       a  [        S5      eUR                  5       (       a  U(       a<  UR                  5       W	s=::  a  UR                  5       ::  d  O  [        SU	 SU 35      eU(       a<  UR                  5       W
s=::  a  UR                  5       ::  d  O  [        SU
 SU 35      eU(       a  [        SW	XS9O[        XU5      n U(       a  [        SW
XS9O[        XU5      n[!        XU5      u  pX4$ )Nr   z{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.zScalar z is out of range for type r[   r\   )ra   numbersNumberrm   rn   scalarry   rv   rT   is_int_unsignedr&   rN   get_int_min_valueget_int_max_valueri   castbroadcast_impl_value)lhsrhsr-   allow_lhs_ptrallow_rhs_ptrarithmetic_checkrQ   lhs_is_scalarrhs_is_scalar
lhs_scalar
rhs_scalar
lhs_sca_ty
rhs_sca_ty
ret_sca_tys                 r   binary_op_type_checking_implr      s    sGNN3MsGNN3M
%
% JJ
>
>
 1 1 3 3J<M<M<O<O*:jakl
j1n1K1K1M1M Z!^
8R8R8T8T G H Hj&B&B&D
&tV`VrVrVt&t 7:,6PQ[P\!]^^j&B&B&D
&tV`VrVrVt&t 7:,6PQ[P\!]^^BO 
*?UYZ]krUs 	 CP 
*?UYZ]krUs 	 $Cg6HC8Or   c                :   U R                   R                  R                  S:  d  UR                  R                  (       d  g U R                   R                  nUR                   R                  nXE:X  d   eUR                  5       (       d   e[        U [        R                  U5      n [        U[        R                  U5      nU" XSU5      nUR                  5       n[        R                  " UR                  U5      [        R                  5      nUR                  5       n[        R                  " UR                  U5      [        R                  5      n[        [        XgU5      [        XhU5      U5      n	SUR                   SUR                    3n
[#        XU5        g )N@   Fre   z! overflow detected for operation )rn   r}   r3   optionssanitize_overflowrN   r   r'   rg   r   r(   	get_int64r   and_
less_equalgreater_equalr   device_assert)r   r   r-   	binary_opr   r   ret	max_value	min_valuecondmsgs              r    binary_op_sanitize_overflow_implr      s7   
xx##r)1R1RJJ###
sBHHg
&C
sBHHg
&C
CeW
-C,,.I		'++I6AI,,.I		'++I6AI
373]3SZ5[]deD
''((I)J\J\I]
^C$W%r   c                   [        XUSS5      u  pU R                  R                  nUR                  R                  nUR                  5       (       a   UR                  5       (       a  [	        S5      eUR                  5       (       aC  UR                  5       (       d.  XpU R                  R                  nUR                  R                  nUR                  5       (       Ga(  UR
                  nUR                  R                  5       (       a  UR                  R                  S:  a  UR                  R                  5       (       aM  [        R                  " [        R                  UR                  R                  5       5      R                  U5      nO[        R                  R                  U5      nUR                  UR
                  US5      n[        R                   " UR#                  U R
                  U5      U R                  5      $ UR%                  5       (       aE  [        R                   " UR'                  U R
                  UR
                  5      U R                  5      $ UR)                  5       (       a]  U(       a  [+        XU[,        5        [        R                   " UR/                  U R
                  UR
                  5      U R                  5      $ [	        SU 35      e)NTzcannot add pointers togetherr   FrA   )r   rn   r}   rv   r8   handler5   r~   r3   is_blockr'   
block_typerg   get_block_shapesto_ircreate_int_castr(   create_addptrrw   create_faddrN   r   add
create_add)inputotherr   r-   input_scalar_tyother_scalar_tyother_handlei64_tys           r   r   r      s   /gtTRLEjj''Ojj''OO$:$:$<$<677 (>(>(@(@u**++**++||;;&&((U[[-E-E-Jzz""$$rxx1L1L1NOUUV]^0"225<<OLyy..u||\JEJJWW		$	$	&	&yy,,U\\5<<H%**UU				!	!,U7CHyy++ELL%,,GTT
&&78
99r   c           	        [        XUSS5      u  pU R                  R                  nUR                  5       (       aN  [        R
                  " UR                  U R                  [        X5      R                  5      U R                  5      $ UR                  5       (       aE  [        R
                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       a]  U(       a  [        XU[        5        [        R
                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      e)NTFrA   )r   rn   r}   rv   r'   r(   r   r   minusrw   create_fsubrN   r   sub
create_subr8   r   r   r   r-   rR   s        r   r   r     s    /gtUSLE

!!Iyy..u||U5=R=Y=YZ\a\f\fggyy,,U\\5<<H%**UU					,U7CHyy++ELL%,,GTT
&yk2
33r   c                   [        XU5      u  pU R                  R                  nUR                  5       (       aE  [        R
                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       a]  U(       a  [        XU[        5        [        R
                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      eNrA   )r   rn   r}   rw   r'   r(   create_fmulr   rN   r   mul
create_mulr8   r   s        r   r   r     s    /gFLE

!!Iyy,,U\\5<<H%**UU					,U7CHyy++ELL%,,GTT
&yk2
33r   c           	        [        XUSSSS5      u  pU R                  R                  nUR                  R                  nUR                  5       (       a#  UR	                  5       (       a  [        XU5      nGOUR	                  5       (       a"  UR                  5       (       a  [        XU5      n OUR	                  5       (       aL  UR	                  5       (       a7  [        U [        R                  U5      n [        U[        R                  U5      nOlUR                  5       (       aI  UR                  5       (       a4  UR                  UR                  :  a  [        XU5      nO[        XU5      n O[        SU 35      e[        R                  " UR                  U R                  UR                  5      U R                  5      $ NFTrA   )r   rn   r}   rw   rN   r   r'   rG   fp_mantissa_widthr8   r(   create_fdivr   )r   r   r-   r   r   s        r   truedivr   #  sT   /gueUY[_`LEjj''Ojj''O""$$)?)?)A)AUW5				!	!o&A&A&C&CUW5				!	!o&<&<&>&>UBJJ0UBJJ0		$	$	&	&?+F+F+H+H,,/P/PP9E9E *?*;<==99W((u||DejjQQr   c           	     r   [        XUSSSS5      u  pU R                  R                  nUR                  R                  nUR                  5       (       a  UR                  5       (       a  [	        X45      n[        XU5      n [        XU5      nUR                  5       (       aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ [        R                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      er   )r   rn   r}   rN   r?   r   is_int_signedr'   r(   create_sdivr   create_udivr8   )r   r   r-   r   r   ret_tys         r   floordivr   =  s    /gueUY[_`LEjj''Ojj''OO$:$:$<$<%oGUG,UG,!!99W00u||LejjYY99W00u||LejjYY
&&78
99r   c           	     t   U R                   R                  nUR                   R                  nUR                  5       (       a  UR                  5       (       d  [        S5      e[	        XUSSSS5      u  pUR                  U R                  UR                  5      n[        R                  " X`R                   5      $ )Nz4both operands of fdiv must have floating scalar typeFT)	rn   r}   rw   r8   r   r   r   r'   r(   )r   r   ieee_roundingr-   r   r   r   s          r   fdivr   L  s    jj''Ojj''O&&((0K0K0M0MNOO/gueUZ\`aLE


ellELL
9C99S**%%r   c           	     P   [        XUSSSS5      u  pU R                  R                  nUR                  R                  nUR                  5       (       aE  [        R
                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       a  UR                  UR                  :w  a3  [        SUR                  5       -   S-   UR                  5       -   S-   5      eUR                  5       (       aE  [        R
                  " UR                  U R                  UR                  5      U R                  5      $ [        R
                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      e)NFTzCannot mod z by rB   rA   )r   rn   r}   rw   r'   r(   create_fremr   rN   r4   r8   r   r   create_sremcreate_urem)r   r   r-   rR   r   s        r   modr   W  sK   /gueUY[_`LE

!!Ijj''Oyy,,U\\5<<H%**UU					##'E'EEMI,>,>,@@6IOLdLdLff jo o p p ""$$99W00u||LejjYY99W00u||LejjYY
&yk2
33r   c                   [        XU5      u  pU R                  nUR                  5       (       a  U[        R                  R
                  :X  aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ U[        R                  R                  :X  aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      eUR                  5       (       aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       aE  [        R                  " UR!                  U R                  UR                  5      U R                  5      $ [#        SU 35      eNzUnexpected propagate_nan Unexpected dtype )r   r5   rw   r'   PropagateNanALLr(   create_minimumfr   rn   NONEcreate_minnumfr&   r   create_minsir~   create_minuir8   ro   ypropagate_nanr-   r5   s        r   minimumr   p  8   'g6DAGGEBOO///99W44QXXqxxH!&&QQboo22299W33AHHahhGPP8HII					yy--ahhA166JJ				 	 yy--ahhA166JJ+E7344r   c                   [        XU5      u  pU R                  nUR                  5       (       a  U[        R                  R
                  :X  aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ U[        R                  R                  :X  aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ [        SU 35      eUR                  5       (       aE  [        R                  " UR                  U R                  UR                  5      U R                  5      $ UR                  5       (       aE  [        R                  " UR!                  U R                  UR                  5      U R                  5      $ [#        SU 35      er   )r   r5   rw   r'   r   r   r(   create_maximumfr   rn   r   create_maxnumfr&   r   create_maxsir~   create_maxuir8   r   s        r   maximumr     r   r   c                X   [        XU5      u  p[        XU5      u  p[        XU5      u  pU R                  nUR                  5       (       aQ  [        R                  " UR                  U R                  UR                  UR                  U5      U R                  5      $ [        SU S35      e)Nr   z(. Only floating point clamp is supported)	r   r5   rw   r'   r(   create_clampfr   rn   r8   )ro   minmaxr   r-   r5   s         r   clampr     s    +Cg>HC)!':FA)!':FAGGEyy..qxxSZZQ^_abagaghh+E72Z[\\r   c                @   [        XU5      u  pU R                  R                  nUR                  R                  nUR                  5       (       a  UR                  5       (       d  [	        X45      e[        X45      nXS:w  a  [        XU5      n XT:w  a  [        XU5      nX4$ ru   )r   rn   r}   rN   r   r?   r   )r   r   r-   input_sca_tyother_sca_tyr   s         r   bitwise_op_type_checking_implr     s    /gFLE::$$L::$$L  (;(;(=(='CC%lAJ!U0!U0<r   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ ru   )r   r'   r(   
create_andr   rn   r   r   r-   s      r   r   r     :    0wGLE99W''ellCUZZPPr   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ ru   )r   r'   r(   	create_orr   rn   r   s      r   or_r     s:    0wGLE99W&&u||U\\BEJJOOr   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ ru   )r   r'   r(   
create_xorr   rn   r   s      r   xor_r     r   r   c                   U R                   R                  5       (       d!  [        U [        R                  " S5      U5      n UR                   R                  5       (       d!  [        U[        R                  " S5      U5      n[        XU5      $ Nrd   )rn   is_int1bitcastr'   r5   r   r   s      r   logical_andr     sc    ::rxx/9::rxx/9g&&r   c                   U R                   R                  5       (       d!  [        U [        R                  " S5      U5      n UR                   R                  5       (       d!  [        U[        R                  " S5      U5      n[        XU5      $ r   )rn   r   r   r'   r5   r   r   s      r   
logical_orr     sc    ::rxx/9::rxx/9uW%%r   c                    U R                   R                  5       (       d!  [        U [        R                  " S5      U5      n [        X5      $ r   )rn   r   r   r'   r5   invert)r   r-   s     r   not_r     s7    ::rxx/9%!!r   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ ru   )r   r'   r(   create_lshrr   rn   r   s      r   lshrr    :    0wGLE99W((u||DejjQQr   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ ru   )r   r'   r(   create_ashrr   rn   r   s      r   ashrr    r  r   c                    [        XU5      u  p[        R                  " UR                  U R                  UR                  5      U R
                  5      $ ru   )r   r'   r(   
create_shlr   rn   r   s      r   shlr
    r   r   c                    U $ ru   r[   )r   s    r   plusr    s    Lr   c                   U R                   R                  nUR                  5       (       a  [        SUR	                  5       -   S-   5      e[
        R                  " UR                  UR                  U5      5      U5      n[        X0SU5      $ )Nz$wrong type argument to unary minus ()T)
rn   r}   rv   r&   r   r'   r(   get_null_valuer   r   )r   r-   r   _0s       r   r   r     su    ::$$L?,BWBWBYY\__``	7)),*<*<W*EF	UBr$((r   c                B   U R                   R                  nUR                  5       (       d  UR                  5       (       a  [	        SUR                  5       -   S-   5      e[        R                  " UR                  UR                  U5      5      U5      n[        XU5      $ )Nz%wrong type argument to unary invert (r  )rn   r}   rv   rw   r&   r   r'   r(   get_all_ones_valuer   r   )r   r-   r   _1s       r   r   r     s    ::$$L 8 8 : :@<CXCXCZZ]``aa	7--l.@.@.IJL	YB7##r   c                    U R                   R                  5       (       d  [        R                  $ U R                   R                  n[        R
                  " [        R                  U5      $ ru   )rn   r   r'   rd   shaper   )vr  s     r   
_bool_liker    s>    66??wwFFLLE==%((r   c                |   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       a  UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   rn   r}   rw   r'   r(   create_fcmpOGTr   r  rN   r   create_icmpSGTcreate_icmpUGTr8   r   r   r-   rR   s       r   greater_thanr        /gFLE

!!Iyy//ellKZX]M^__					""$$99W33ELL%,,OQ[\aQbcc99W33ELL%,,OQ[\aQbcc
&yk2
33r   c                |   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       a  UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   rn   r}   rw   r'   r(   create_fcmpOGEr   r  rN   r   create_icmpSGEcreate_icmpUGEr8   r  s       r   r   r     r  r   c                |   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       a  UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   rn   r}   rw   r'   r(   create_fcmpOLTr   r  rN   r   create_icmpSLTcreate_icmpULTr8   r  s       r   	less_thanr'  )  r  r   c                |   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       a  UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   rn   r}   rw   r'   r(   create_fcmpOLEr   r  rN   r   create_icmpSLEcreate_icmpULEr8   r  s       r   r   r   8  r  r   c                   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   rn   r}   rw   r'   r(   create_fcmpOEQr   r  rN   create_icmpEQr8   r  s       r   equalr/  G      /gFLE

!!Iyy//ellKZX]M^__					yy..u||U\\JJW\L]^^
&yk2
33r   c                   [        XU5      u  pU R                  R                  nUR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ UR                  5       (       aD  [        R
                  " UR                  U R                  UR                  5      [        U 5      5      $ [        SU 35      er   )r   rn   r}   rw   r'   r(   create_fcmpUNEr   r  rN   create_icmpNEr8   r  s       r   	not_equalr4  S  r0  r   c                   [        U [        5      (       a  [        U[        5      (       d  [        S5      e[        U S-	  5      n[        US-	  5      nU(       d  U(       a  [        S5      eX::  a  [        S5      eX-
  nXUS-
  -  S:w  a  [        S5      eU/n[        R
                  " [        R                  U5      n[        R                  " UR                  X5      U5      $ )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)	ra   re   r&   rb   r'   r   r*   r(   create_make_range)startendr-   is_start_int64is_end_int64ranger  r   s           r   aranger=  d  s    eS!!C)=)=JKK%2+&Nr	?L344
|XYYKE!>??GE]]288U+F99W..u:FCCr   c                   [        U[        R                  5      (       a.  UR                  R                  S:X  d   S5       e[        XU5      nOlUc  [        S5      eUS:X  a!  UR                  UR                  U5      5      nO![        USUR                   35      nU" U5      n[        R                  " X5      n[        XU5      $ )Nr   zonly accepts size-1 tensorz2dtype must be specified when value is not a tensorr   get_)ra   r'   r(   numelrD   r   r&   r  r   getattrnamesplat)r  rD   r5   r-   get_value_fns        r   ri   ri   u  s    %##{{  A%C'CC%U7+ =QRRA:**5;;w+?@E"7d5::,,?@L 'E		%'w''r   c                   U R                   R                  5       (       a   S5       e[        U5      S:X  a  U $ [        R                  " U R
                  U5      n[        R                  " UR                  U R                  U5      U5      $ )NzCannot splat a block tensorr   )	rn   r   lenr'   r   r5   r(   create_splatr   )rD   r  r-   r   s       r   rC  rC    sg    zz""$$C&CC$
5zQ]]5;;.F99W))%,,>GGr   c                "   SnU H  nXE-  nM	     U R                   R                  U:w  a  [        S5      e[        R                  " U R                   R
                  U5      n[        R                  " UR                  U R                  X5      U5      $ )Nr   z:reshape() cannot change total number of elements in tensor)	rn   r@  r&   r'   r   r}   r(   create_reshaper   )r   	dst_shapecan_reorderr-   r@  sr   s          r   reshaperM    ss    E
 zz5 UVV]]5::,,i8F99W++ELL)QSYZZr   c                   U R                    Vs/ s H  n[        R                  " U5      PM     nnUR                  US5        U R                  R                  5       (       d
  [        XUS9$ [        R                  " U R                  R                  U5      n[        R                  " UR                  U R                  U5      U5      $ s  snf )Nr   )r  r-   )r  r'   _constexpr_to_valueinsertrn   r   rC  r   r}   r(   create_expand_dimsr   )r   r,   r-   ro   rJ  r   s         r   expand_dimsrR    s    49KK@Kq''*KI@T1::  UW==]]5::,,i8F99W//dCVLL As    Cc                \   U(       d   S5       e[        U R                  5      S:X  d   e[        R                  " U R                  R
                  U R                  S   UR                  S   -   /5      n[        R                  " UR                  U R                  UR                  5      U5      $ )Nz;current implementation of `cat` always may reorder elementsr   r   )	rF  r  r'   r   rn   r}   r(   
create_catr   )r   r   rK  r-   ret_types        r   catrV    s|    UUU;syy>Q}}SXX__syy|ciil/J.KLH99W''

CJJ?JJr   c                    [        XU5      u  pU R                  / :H  nU(       a  [        U SU5      n [        USU5      n[        U R                  S   [        R
                  5      (       a  [        R
                  " S5      nOSnU R                  U/-   n[        R                  " U R                  R                  U5      n[        R                  " UR                  U R                  UR                  5      U5      nU(       a  [        US/SUS9nU$ )Nr   r	   FrK  r-   )r   r  rR  ra   r'   rl   r   rn   r}   r(   create_joinr   rM  )abr-   
was_rank_1two	new_shaperU  r   s           r   joinr`    s    g.DA BJ1g&1g&!''"+r||,,ll1o3%I}}QVV]]I6H
))G''!((;X
FCcA3E7CJr   c                   [        U R                  5      S:  d   e[        R                  " U R                  S   5      S:X  d   eU R                  S S n[        R                  " U R
                  R                  U5      nUR                  U R                  5      u  pE[        R                  " XC5      [        R                  " XS5      4$ )Nr   rX  r	   )
rF  r  r'   rO  r   rn   r}   create_splitr   r(   )r[  r-   r_  rU  outLHSoutRHSs         r   splitre    s    L1""1772;/1454I}}QVV]]I6H))!((3NF
		&#
		&# r   c                   [        U R                  5      [        U5      :w  a  [        S5      e[        S U 5       5      [	        [        [        U5      5      5      :w  a  [        SU 35      e[        R                  " U R                  R                  U Vs/ s H  o0R                  U   PM     sn5      n[        R                  " UR                  U R                  U5      U5      $ s  snf )Nz5permute dims must have the same length as input shapec              3  N   #    U  H  n[         R                  " U5      v   M     g 7fru   )r'   rO  ).0ds     r   	<genexpr>permute.<locals>.<genexpr>  s     6Ab$$Q''s   #%z?permute dims must be a permutation of 0, 1, ..., n-1, but were )rF  r  r&   sortedlistr<  r'   r   rn   r}   r(   create_transr   )r   dimsr-   ri  rU  s        r   permuterp    s    
5;;3t9$PQQ666$uSY?O:PPZ[_Z`abb}}UZZ..0NAQ0NOH99W))%,,=xHH 1Os   C&
c                   U R                   R                  5       (       dR  [        R                  " U R                   U5      n[        R                  " UR                  U R                  U5      U5      $ U R                   R                  5       n[        U5      [        U5      :w  a  [        SU SU 35      eX:X  a  U $ [        U5       H1  u  pVX   U:w  d  M  US:w  d  M  [        SX    SU SU SU SU 3
5      e   [        R                  " U R                   R                  U5      n[        R                  " UR                  U R                  U5      U5      $ )Nz!Cannot broadcast, rank mismatch: z, r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )rn   r   r'   r   r(   rG  r   r   rF  r&   	enumerater}   create_broadcast)r   r  r-   r   	src_shapeiitems          r   broadcast_impl_shaperx    s0   ::  uzz51yy--ellEBFKK

++-I
9~U#<YKr%QRRY'8t	RSXS[R\ ]??Cf E!!"2i[5'; < < (
 ]]5::,,e4F99W--ellEBFKKr   c           	        U R                   nUR                   nUR                  5       (       a  UR                  5       (       dm  [        R                  " UR                  UR
                  5      n[        R                  " UR                  UR                  UR                  5       5      U5      nX4$ UR                  5       (       d  UR                  5       (       am  [        R                  " UR                  UR
                  5      n[        R                  " UR                  U R                  UR                  5       5      U5      n X4$ UR                  5       (       Ga  UR                  5       (       Ga  UR                  5       nUR                  5       n[        U5      [        U5      :  a  [        [        U5      [        U5      5       H}  n[        R                  " UR                  U R                  S5      [        R                  " UR                  S/UR                  -   5      5      n U R                   nUR                  5       nM     O[        U5      [        U5      :  a  [        [        U5      [        U5      5       H}  n[        R                  " UR                  UR                  S5      [        R                  " UR                  S/UR                  -   5      5      nUR                   nUR                  5       nM     [        U5      [        U5      :X  d   e/ n[        U5       Hs  u  pXi   nU
S:X  a  UR                  U5        M"  US:X  d  X:X  a  UR                  U
5        M@  [!        S[#        U	5      -   S-   [#        U
5      -   S-   [#        U5      -   5      e   XX:w  aR  [        R                  " UR                  U5      n[        R                  " UR%                  U R                  U5      U5      n Xh:w  aR  [        R                  " UR                  U5      n[        R                  " UR%                  UR                  U5      U5      nX4$ )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index rr  r   )rn   r   r'   r   r}   r  r(   rG  r   r   rF  r<  rQ  valuesrs  appendr&   strrt  )r   r   r-   lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaperv  leftrightr   s                r   r   r     sP   XXFXXF !2!2v}}fll;ii,,SZZ9P9P9RSU[\V 8OS __6??#4#4v}}fll;ii,,SZZ9P9P9RSU[\N 8OK 
		v00++-	++-	y>C	N*3y>3y>:ii : :3::q I "fmmaS9CSCS=S TV"335		 ;
 ^c)n,3y>3y>:ii : :3::q I "fmmaS9CSCS=S TV"335		 ;
 9~Y///	 +GALEqy  '1*%-  &  "-/21v"68<"=?B4y"IKR"SUXY^U_"` a a , !]]6==)<F))G44SZZKVTC!]]6==)<F))G44SZZKVTC8Or   c                    U c  g U S:X  a  [         R                  R                  $ U S:X  a  [         R                  R                  $ [	        SU  S35      e)NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r
   ROUNDING_MODERTNERTZr&   )rounding_modes    r   _str_to_rounding_moder  ,  sU    $$$###
.}o=mn
oor   c                f   U R                   nUR                  5       (       a9  [        R                  " UR                  U R                   R                  5       5      nX1:X  a  U $ UR                  nUR                  nUR                  5       (       d  UR                  5       (       a  [        XU5      $ UR                  nUR                  nXg:w  a&  [        S[        U5      -   S-   [        U5      -   5      e[        R                  " UR                  U R                  UR                  U5      5      U5      $ )Nz!Cannot bitcast data-type of size z to data-type of size )rn   r   r'   r   r}   r   rv   r   primitive_bitwidthr&   r|  r(   create_bitcastr   r   )r   dst_tyr-   src_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitss           r   r   r   6  s    ZZFv}}ejj.I.I.KLJJj//11E7++,,H,,H<s8}L P. .03H> ? 	?99W++ELL&,,w:OPRXYYr   c                v   U R                   nUR                  5       (       a9  [        R                  " UR                  U R                   R                  5       5      nXA:X  a  U $ UR                  nUR                  n[        U5      nSnUR                  5       (       an  UR                  5       (       aY  UR                  UR                  :  a?  Uc  [        R                  R                  nOJU[        R                  R                  :w  a  SnO)Ub&  [        S[        U5      -   S-   [        U5      -   5      eUR                  5       (       d  UR                  5       (       a8  UR                  R!                  S5       c   S5       eUR                  S   " XX2S9$ UR#                  5       (       a  UR                  5       (       d1  UR                  5       (       a  UR#                  5       (       d  U(       aA  [        R$                  " UR'                  U R(                  UR+                  U5      U5      U5      $ UR-                  5       (       a  UR/                  5       (       a*  UR1                  5       (       a:  UR/                  5       (       d%  [3        [3        U [        R4                  U5      Xb5      $ UR                  5       =(       a0    UR                  5       =(       a    UR                  UR                  :  nU(       a@  [        R$                  " UR7                  U R(                  UR+                  U5      5      U5      $ UR                  5       =(       a0    UR                  5       =(       a    UR                  UR                  :  n	U	(       a@  [        R$                  " UR9                  U R(                  UR+                  U5      5      U5      $ UR;                  5       (       Ga#  UR;                  5       (       Ga  UR<                  UR<                  :w  d  UR>                  UR>                  :w  a  URA                  5       =(       a    URC                  5       (       + n
URC                  5       (       aW  U RD                  R+                  U5      n[        R$                  " URG                  U5      U RD                  5      n[I        XU5      $ [        R$                  " URK                  U R(                  UR+                  U5      U
5      U5      $ URM                  5       (       Ga  UR;                  5       (       Ga  URC                  5       (       aW  U RD                  R+                  U5      n[        R$                  " URG                  U5      U RD                  5      n[I        XU5      $ URA                  5       (       a@  [        R$                  " URO                  U R(                  UR+                  U5      5      U5      $ [        R$                  " URQ                  U R(                  UR+                  U5      5      U5      $ UR;                  5       (       a  URM                  5       (       a  URC                  5       (       d  URA                  5       (       d@  [        R$                  " URS                  U R(                  UR+                  U5      5      U5      $ [        R$                  " URU                  U R(                  UR+                  U5      5      U5      $ URW                  5       (       a  UR;                  5       (       a  UR<                  nUS:X  a@  [        R$                  " URY                  U R(                  UR+                  U5      5      U5      $ US	:X  aX  [I        [3        U [        RZ                  U5      [        R$                  " UR]                  S
5      [        RZ                  5      U5      $ UR;                  5       (       aU  URW                  5       (       a@  [        R$                  " UR_                  U R(                  UR+                  U5      5      U5      $ URW                  5       (       aU  URW                  5       (       a@  [        R$                  " URa                  U R(                  UR+                  U5      5      U5      $  SU  SU 35       e)NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is convert_custom_typesz0target doesn't provide conversion for this type._builderr   r   r   zcannot cast z to )1rn   r   r'   r   r}   r   r  rw   r  r
   r  r  r&   r|  is_fp8e4b15codegen_fnsgetrM   r(   create_fp_to_fpr   r   rK   rJ   rL   r   rG   create_fp_trunccreate_fp_extrN   r3   r4   r   is_boolr5   r  r4  r   is_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fprv   create_ptr_to_intrg   r   create_int_to_ptrr  )r   r  r-   fp_downcast_roundingr  r  r  use_custom_roundingtruncate_fpext_fpsign_extendtyr  bitwidths                 r   r   r   I  s   ZZFv}}ejj.I.I.KLJJ 11EFJ$:$: % %

'
'**G*G
G'@P@P@U@U)=!R%5%5%:%::RV<O+ 68;JHJefhklvhwx y y 	  J$:$:$<$<""&&"$+/0 	d1c	d 0""#9:5J^qq 	
 6 6 8 8  Z%6%6%8%8yy00v||G?TVjkmstt 	Z%7%7%9%9Z%7%7%9%9D

G4jJJ
 ((* F F%%
(E(EE  yy00v||G?TUW]^^ ##% F F%%
(E(EE  yy..u||V\\'=RSU[\\ z0022:#:#::j>W>W[e[t[t>t ..0M9K9K9M5M""7+B711"5u{{CBU0099W44U\\6<<PWCXZefhnoo &&((Z->->-@-@""7+B711"5u{{CBU00%%''99W44U\\6<<PWCXY[abb99W44U\\6<<PWCXY[abb z>>@@z'?'?'A'A99W44U\\6<<PWCXY[abb99W44U\\6<<PWCXY[abb z0022**r>99W66u||V\\RYEZ[]cddq=T%7;RYYwGXGXYZG[]_]e]e=fhopp z0022yy225<<gAVWY_`` z0022yy//fll7>STV\]]4LtF8445r   c                2   [         R                  R                  nU (       au  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R
                  nU$ [        SU  S35      eU$ )Nz.ca.cgz.cvCache modifier  not supported)r
   CACHE_MODIFIERr   CACGCVr&   cache_modifiercaches     r   _str_to_load_cache_modifierr    s    ""EU"%%((E L u$%%((E
 L	 u$%%((E L ~.>nMNNLr   c                v   [         R                  R                  nU (       a  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R
                  nU$ U S:X  a  [         R                  R                  nU$ [        SU  S35      eU$ )Nz.wbr  z.csz.wtr  r  )r
   r  r   WBr  CSWTr&   r  s     r   _str_to_store_cache_modifierr    s    ""EU"%%((E L u$%%((E L u$%%((E
 L	 u$%%((E L ~.>nMNNLr   c                    [         R                  R                  nU (       aS  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ [        SU  S35      eU$ )N
evict_lastevict_firstzEviction policy r  )r
   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr&   )eviction_policyevictions     r   _str_to_eviction_policyr    su    !!((Hl*))44H
 O	 -))55H O //@OPPOr   c                    S nU (       aS  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ [	        SU  S35      eU$ )NzeronanzPadding option r  )r
   PADDING_OPTIONPAD_ZEROPAD_NANr&   )padding_optionpaddings     r   _str_to_padding_optionr    sh    GV#''00G
 N	 u$''//G N ~.>nMNNNr   c                v   [         R                  R                  nU (       a  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R
                  nU$ [        SU  S35      eU$ )Nacquirereleaseacq_relrelaxedMemory semantic r  )r
   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr&   )
sem_optionsems     r   _str_to_semr    s    
//
)
)C"//))C J 9$//))C J 9$//11C
 J	 9$//))C J /
|>JKKJr   c                2   [         R                  R                  nU (       au  U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ U S:X  a  [         R                  R                  nU$ [        SU  S35      eU$ )Ngpuctasysr  r  )r
   MEM_SYNC_SCOPEGPUCTASYSTEMr&   )scope_optionscopes     r   _str_to_scoper    s    !!E5 %%))E L U"%%))E
 L	 U"%%,,E L /~^LMMLr   c                   U (       a  [        U S5      (       d  U /n U  Vs/ s H0  n[        U[        R                  5      (       a  UR                  OUPM2     n nU  H3  n[        U[
        5      (       a  SUs=::  a  [        U5      :  a  M0   e   e   [        U 5      S:  d   e[        U 5      [        [        U 5      5      :X  d   S5       e[        U 5      $ gs  snf )N__iter__r   z'Duplicate dimension in `boundary_check`r[   )	hasattrra   r'   rl   rD   re   rF  setrl  )boundary_checkblock_shapeelemdims       r   _canonicalize_boundary_checkr    s    ~z22,-N]kl]kUY
4(F(F$**DP]kl!Cc3''A,Gs;7G,GGG,GGG ">"Q&&&>"c#n*=&>>i@ii>n%% ms   7Cc	           
        Uc  Ub  [        S5      eU R                  R                  R                  n	U	[        R                  :w  d   S5       eU	R                  5       (       a)  U[        R                  R                  :X  a  [        S5      eU R                  R                  n
[        X:R                  5       5      n[        R                  " UR                  U R                  X4XVU5      U
5      $ )NK`mask` and `other` arguments cannot be specified for loading block pointers4`tl.int1` should be rewritten in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r&   rn   
element_tyr'   rd   rN   r
   r  r  r  r   r(   create_tensor_pointer_loadr   )ptrmaskr   r  r  r  r  is_volatiler-   elt_tyr  s              r   _load_block_pointerr    s     5,fggXX  ++FRWWTTT}}7b&7&7&?&??[\\ XX  F 2.BYBYB[\N 99**3::~PUalmouw wr   c	           
        U R                   R                  R                  5       (       d'  [        SU R                   R	                  5        S35      eUc  Ub  [        S5      eU(       d  U(       a  [        S5      eU R                   R                  5       (       db  U(       a*  UR                   R                  5       (       a  [        S5      eU(       a*  UR                   R                  5       (       a  [        S5      eU R                   R                  5       (       aN  Ub$  [        XR                   R                  5       U5      nUb$  [        X R                   R                  5       U5      nU R                   R                  n	U	R                  n
U
[        R                  :H  nU(       a<  [        R                  n
[        R                  " XR                  5      n	[        X	U5      n Ub  [        X*U5      nU R                   R                  5       (       a1  U R                   R                  5       n[        R                  " X5      nOU
nUc3  [        R                   " UR#                  U R$                  XVU5      U5      nOQ[        R                   " UR'                  U R$                  UR$                  U(       a  UR$                  OS XVU5      U5      nU(       a  [        U[        R                  U5      nU$ )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)rn   r}   rv   r&   r   r   rx  r   r  r'   rd   int8pointer_typeaddress_spacer   r   r(   create_loadr   create_masked_load)r  r  r   r  r  r  r  r  r-   ptr_tyr  r  r  r  r   s                  r   _load_legacyr  0  sB   88??!!##01B1B1D0E]STT |)DEE. T U 	U
 88DII&&((deeUZZ((**eff xx'hh.G.G.I7SD(0I0I0KWUE XX__FF G)=)=>3( UG, xx))+v-  |ii++CJJUW]^ii&&szz4;;PU[_af'245;= 3)Jr   c	                   [        U5      n	[        U5      n
[        U5      nU R                  R	                  5       (       a8  U R                  R
                  R                  5       (       a  [        XX#XXU5	      $ [        XX#XXU5	      $ ru   )	r  r  r  rn   rv   r  r   r  r  )r  r  r   r  r  r  r  r  r-   r  r  r  s               r   loadr
  n  sv     (7E&7H$^4G
xxSXX0099;;"3eWU]lstt Cughelmmr   c                    UR                  U R                  UR                  U5      5      n[        R                  " X15      $ ru   )$create_reinterpret_tensor_descriptorr   r   r'   $_experimental_tensor_descriptor_base)desc_ptrblock_tyr-   r   s       r   reinterpret_tensor_descriptorr  ~  s3    99(//8>>ZaKbcF226DDr   c                    [        U 5      S:w  a  g U S   S:  d   SU S    35       eSUR                  -  S-  nU S   U:  d   U SU SU S    35       eg )	Nr	   r      zAtensor descriptor block shape must have at least 8 rows, but got r6  r   z2 tensor descriptor block shape must have at least  columns, but got )rF  r  )r  r5   min_colss      r   validate_descriptor_blockr    s    
5zQ 8q=h]^cde^f]ghh=U---1H	 { '!ST\S]]opuvwpxoyz{ r   c                   [        U [        R                  5      (       d   e[        U R                  U R
                  5        [        U R                  5      n[        U5      U:X  d   SU S[        U5       35       e[        XASS9nUR                  U R                  U[        U5      [        U5      5      n[        R                  " X`R                  5      $ Nz	expected z offsets, but got Frequire_i64)ra   r'   r  r  r  r5   rF  _convert_to_ir_valuescreate_descriptor_loadr   r  r  r(   r   )descoffsetsr  r  r-   ndimro   s          r   descriptor_loadr    s    dBCCDDDDd..

;t Dw<4S9TF2DS\N!SS#G%HG&&t{{G=XYg=h'>'O	QA99Q((r   c                   [        U [        R                  5      (       d   e[        U R                  U R
                  5        [        U R                  5      n[        U5      U:X  d   SU S[        U5       35       eUR                  U R                  :X  d   e[        X2SS9n[        R                  " UR                  U R                  UR                  U5      [        R                  5      $ r  )ra   r'   r  r  r  r5   rF  r  r  r(   create_descriptor_storer   void)r  rD   r  r-   r  s        r   descriptor_storer#    s    dBCCDDDDd..

;t Dw<4S9TF2DS\N!SS;;$*****#G%HG99W44T[[%,,PWXZ\ZaZabbr   c                   [        U [        R                  5      (       d   eUS:X  d   S5       eUS:X  d   S5       e[        U R                  5      S:X  d   SU R                   35       eU R                  S   S:X  d   SU R                   35       e[        UR
                  5      S:X  d   S	UR
                   35       eUR
                  S   S
:  d   SUR
                   35       eU R                  nSUR                  -  S
-  nU R                  S   U:  d   SU SU SU R                  S    35       e[        R                  " U R                  UR
                  S   U R                  S   /5      n[        XR4SS9S   nUR                  U R                  UR                  X(R                  U5      5      n	[        R                  " X5      $ )N z#cache modifier is not supported yetz$eviction policy is not supported yetr	   descriptor must be 2D, but got r   r   *descriptor block must have 1 row, but got x offsets must be 1D, but got r  z5descriptor gather must have at least 8 rows, but got r6  zdescriptor gather of  must have at least r  Fr  )ra   r'   r  rF  r  r  r5   r  r   r  create_descriptor_gatherr   r   r(   )
r  	x_offsetsy_offsetr  r  r-   r5   r  rn   ro   s
             r   descriptor_gatherr-    s   dBCCDDDDRF!FFb H"HH  t A%[)HIYIYHZ'[[%A!#d'QRVRbRbQc%dd# y1$X(FyFW&XX$ ??1"m&[\e\k\k[l$mm"JJEU---1H	 }/w6J8*Tfgkgwgwxygzf{|}  ==iooa&8$:J:J1:M%NOD$WlNqQH((i6F6FR\R\]dRefA99Qr   c                   [        U [        R                  5      (       d   e[        U R                  5      S:X  d   SU R                   35       eU R                  S   S:X  d   SU R                   35       e[        UR
                  5      S:X  d   SUR                   35       eUR
                  S   S:  d   SUR
                   35       eU R                  nS	UR                  -  S-  nU R                  S   U:  d   S
U SU SU R                  S    35       e[        XC4SS9S   nUR                  U R                  UR                  UR                  U5        [        R                  " S [        R                  5      $ )Nr	   r&  r   r   r'  r(  r  z6descriptor scatter must have at least 8 rows, but got r6  zdescriptor scatter of r)  r  Fr  )ra   r'   r  rF  r  r  shapaer5   r  r  create_descriptor_scatterr   r(   r"  )r  rD   r+  r,  r-   r5   r  s          r   descriptor_scatterr1    s   dBCCDDDD t A%[)HIYIYHZ'[[%A!#d'QRVRbRbQc%dd# y1$Y(FyGWGWFX&YY$ ??1"n&\]f]l]l\m$nn"JJEU---1H	 ~07KH:Ughlhxhxyzh{g|}~  %WlNqQH%%dkk5<<AQAQS[\99T277##r   c                   U(       a#  US   R                   [        R                  :X  d   e[        R                  " U
R	                  U R
                  UR
                  U Vs/ s H  oR
                  PM     snU Vs/ s H  oR
                  PM     snU Vs/ s H  oR
                  PM     snU Vs/ s H  oR
                  PM     snUUUU	5
      [        R                  5      $ s  snf s  snf s  snf s  snf )Nr   )r5   r'   rg   r(   create_tensormap_creater   r"  )r  global_addressbox_dim
global_dimglobal_strideelement_stride	elem_typeinterleave_layoutswizzle_mode	fill_moder-   ro   s               r   tensormap_creater=    s     a 0 6 6"(( BBB99''OO!!&'w!XXw')*z!XXz*,-}!XX}--.~!XX~.	
 	  (*-.s    C$;C)C.1C3c                ~    [         R                  " UR                  U R                  5      [         R                  5      $ ru   )r'   r(   #create_tensormap_fenceproxy_acquirer   r"  )r  r-   s     r   tensormap_fenceproxy_acquirer@    s)    99W@@QSUSZSZ[[r   c           	        Ub  [        S5      eU R                  R                  R                  5       nUR                  R	                  5       (       d  [        XU5      nUR                  R	                  5       (       d   S5       eXqR                  R                  5       :X  d&   SU SUR                  R                  5        S35       eU R                  R                  R                  UR                  R                  :X  d@   SU R                  R                  R                   SUR                  R                   S35       eU R                  R                  R                  nU[        R                  :w  d   S5       e[        X75      n[        XU5      n[        R                  " UR                  U R                  UR                  X4U5      [        R                  5      $ )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r&   rn   r  r   r   rx  r'   rd   r  r   r(   create_tensor_pointer_storer   r"  )	r  valr  r  r  r  r-   r  r  s	            r   _store_block_pointerrD    s    fgg ((%%668K88"3W=88O OO((33   ]	k]"4SXX5N5N5P4QQ[\] 88))SXX-@-@@  qDWX[X`X`XkXkXvXvWw  xQ  RU  RZ  RZ  Re  Re  Qf  fp  Cq  q@XX  ++FRWWTTT 2.NN sG
$C 99W88SZZQ_hpqWW r   c           	     6   U R                   R                  R                  5       (       d'  [        SU R                   R	                  5        S35      eU(       a  [        S5      eU R                   R                  5       (       d[  UR                   R                  5       (       a  [        S5      eU(       a*  UR                   R                  5       (       a  [        S5      eU R                   R                  5       (       aK  [        XR                   R                  5       U5      nUb$  [        X R                   R                  5       U5      nU R                   R                  nUR                  nU[        R                  :X  a<  [        R                  n[        R                  " XR                  5      n[        XU5      n [        XU5      nUcJ  [        R                  " UR!                  U R"                  UR"                  XE5      [        R$                  5      $ UR                   R                  R'                  5       (       d  [        S5      e[        R                  " UR)                  U R"                  UR"                  UR"                  XE5      [        R$                  5      $ )Nr   z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  z"Mask must have boolean scalar type)rn   r}   rv   r&   r   r   rx  r   r  r'   rd   r  r  r  r   r(   create_storer   r"  r  create_masked_store)	r  rC  r  r  r  r  r-   r  r  s	            r   _store_legacyrH    s   88??!!##01B1B1D0E^TUU  A B 	B
 8888effDII&&((dee xx"3(A(A(CWM'hh.G.G.I7SDXX__FF )=)=>3( sG
$C |yy--cjj#**eVXZX_X_``99##%%=>>99W00SZZV[fhjhohoppr   c           	        [        U5      n[        U5      nU R                  R                  5       (       d)  U R                  R                  R                  5       (       a  [        S5      eU R                  R                  5       (       a7  U R                  R                  R                  5       (       a  [        XX#XxU5      $ [        XX#XxU5      $ )N"Cannot store to a constant pointer)r  r  rn   is_constr}   r&   rv   r  r   rD  rH  )	r  rC  r  r  r  r  r-   r  r  s	            r   storerL  C  s     )8E&7H
xxchhoo6688=>>
xxSXX0099;;#CdEU\]] StUgVVr   c           	     F   [        U5      n[        U5      nU R                  R                  R                  nUR
                  S;  a  [        S5      e[        R                  " UR                  U R                  UR                  UR                  X45      UR                  5      $ )N)   r6  r   z9atomic_cas only supports elements with width {16, 32, 64})r  r  rn   r}   r  r  r&   r'   r(   create_atomic_casr   )r  cmprC  r  r  r-   r  s          r   
atomic_casrQ  Y  sy    
c
C% E++J$$L8TUU99W..szz3::szzSV^`c`h`hiir   c                L   U R                   R                  R                  5       (       d&  [        SU R                   R	                  5       -   5      eU R                   R                  5       (       d)  U R                   R                  R                  5       (       a  [        S5      eU R                   R                  R                  nU[        R                  L a  US:w  a  [        SU-   S-   5      eU[        R                  [        R                  [        R                  [        R                  4;   a  [        SU-   S-   [        U5      -   5      eU R                   R                  5       (       aN  Ub$  [        X R                   R!                  5       U5      nUb$  [        XR                   R!                  5       U5      n[#        XR                   R                  R                  U5      nUc  UR%                  S5      n[        R                  nU R                   R                  5       (       af  UR'                  X`R                   R!                  5       5      n[        R(                  " [        R                  U R                   R!                  5       5      n[        R*                  " Xg5      nXU4$ )Nz)Pointer argument of store instruction is rJ  r   atomic_z does not support fp16z does not support T)rn   r}   rv   r&   r   rK  r  r'   rE   rd   r  int16rF   r|  r   rx  r   r   rc   rG  r   r(   )r  rC  r  opr-   r  mask_irmask_tys           r   atom_red_typechecking_implrX  b  s   88??!!##DsxxGXGXGZZ[[
xxchh11::<<=>>++JRZZB%KR*BBCCbggrww"++>>R*>>ZPQQ
xx'hh.G.G.I7SD?&sHH,E,E,GQC
sHHOO..
8C|""4(''88**7HH4M4M4OPGmmBGGSXX-F-F-HIGyy*T>r   c                   [        XUSU5      u  pn[        U5      n[        U5      nUR                  R                  nUR                  5       (       a  UR                  5       (       aj  [        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ [        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ U[        R                  [        R                   1;  a  [#        SU 35      e[%        / SXe5      nU[        R                  :X  a  [        R&                  O[        R(                  n[+        XU5      n	[+        U [        R,                  " US5      U5      n
U[        R                  :X  a  [        R.                  O[        R0                  n[+        XU5      n[+        U [        R,                  " US5      U5      n[3        XU5      n[5        XU5      n[        R                  " UR                  [        R                  R                  U
R                  U	R                  [7        X.U5      R                  X45      U	R                  5      n[        R                  " UR                  [        R                  R8                  UR                  UR                  [7        X/U5      R                  X45      UR                  5      n[;        UUUU5      n[+        UXe5      $ )Nr   z#atomic_max not supported for dtype r`   r   )rX  r  r  rn   r}   rN   r   r'   r(   create_atomic_rmwr
   	ATOMIC_OPMAXr   UMAXrG   rI   r8   ri   r*   rg   r   r  rf   rh   r   r'  r   UMINwherer  rC  r  r  r  r-   sca_tyr  i_typei_vali_ptrui_typeui_valui_ptrposnegpos_retneg_retr   s                      r   
atomic_maxrl  }  |   /$wONCd
c
C% EXX__F}}!!99))",,*:*:CJJ

TXT_T_adlnqnvnvx x 99))",,*;*;SZZUYU`U`bemorowowy y
 bjj"**--=fXFGGC)D2::-RXX288FC)EC3W=E!RZZ/biiRYYGS7+FS"//'15w?F
7
+C
Cw
'Cii!!",,"2"2ELL%,,"&t'":"A"A3	OPUPZPZ\G ii!!",,"3"3V]]FMM"&t'":"A"A3	OPVP[P[]G Wgw
/C3((r   c                   [        XUSU5      u  pn[        U5      n[        U5      nUR                  R                  nUR                  5       (       a  UR                  5       (       aj  [        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ [        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ U[        R                  [        R                   1;  a  [#        SU 35      e[%        / SXe5      nU[        R                  :X  a  [        R&                  O[        R(                  n[+        XU5      n	[+        U [        R,                  " US5      U5      n
U[        R                  :X  a  [        R.                  O[        R0                  n[+        XU5      n[+        U [        R,                  " US5      U5      n[3        XU5      n[5        XU5      n[        R                  " UR                  [        R                  R                  U
R                  U	R                  [7        X.U5      R                  X45      U	R                  5      n[        R                  " UR                  [        R                  R8                  UR                  UR                  [7        X/U5      R                  X45      UR                  5      n[;        UUUU5      n[+        UXe5      $ )Nr   z#atomic_min not supported for dtype r`   r   )rX  r  r  rn   r}   rN   r   r'   r(   rZ  r
   r[  MINr   r^  rG   rI   r8   ri   r*   rg   r   r  rf   rh   r   r'  r   r]  r_  r`  s                      r   
atomic_minrp    rm  r   c           
        [        XUSU5      u  pn[        U5      n[        U5      nUR                  R                  nUR                  5       (       a  [        R                  R                  O[        R                  R                  n[        R                  " UR                  XpR                  UR                  UR                  X45      UR                  5      $ )Nr   )rX  r  r  rn   r}   rw   r
   r[  FADDADDr'   r(   rZ  r   )r  rC  r  r  r  r-   ra  rU  s           r   
atomic_addrt    s    /$wONCd
c
C% EXX__F$0022		8H8HB99W..r::szz4;;X[cehememnnr   c           
     $   [        XUSU5      u  pn[        U5      n[        U5      n[        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ )Nand)rX  r  r  r'   r(   rZ  r
   r[  ANDr   rn   r  rC  r  r  r  r-   s         r   
atomic_andry    q    /$wONCd
c
C% E99W..r||/?/?SZZY]YdYdfiqXX r   c           
     $   [        XUSU5      u  pn[        U5      n[        U5      n[        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ )Nor)rX  r  r  r'   r(   rZ  r
   r[  ORr   rn   rx  s         r   	atomic_orr~    so    /$gNNCd
c
C% E99W..r||

CJJX\XcXcehpXX r   c           
     $   [        XUSU5      u  pn[        U5      n[        U5      n[        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ )Nxor)rX  r  r  r'   r(   rZ  r
   r[  XORr   rn   rx  s         r   
atomic_xorr    rz  r   c           
     $   [        XUSU5      u  pn[        U5      n[        U5      n[        R                  " UR                  [        R                  R                  U R                  UR                  UR                  X45      UR                  5      $ )Nxchg)rX  r  r  r'   r(   rZ  r
   r[  XCHGr   rn   rx  s         r   atomic_xchgr    sq    /$PNCd
c
C% E99W..r||/@/@#**cjjZ^ZeZegjrXX r   c                    U R                  5       UR                  R                  ;   d!   SUR                  R                   SU  35       eU R                  5       n U S:X  a  Sn [	        [
        R                  U 5      $ )Nzinput_precision must be one of . Got TF32X3TF32x3)lowerr   allowed_dot_input_precisionsupperrA  r
   INPUT_PRECISION)input_precisionr-   s     r   _str_to_dot_input_precisionr    sx      "goo&R&RR p
)'//*V*V)WW]^m]nopR%++-O(""2%%77r   c           
        U R                   R                  5       (       a  UR                   R                  5       (       d   eU R                  R                  5       (       a!  UR                  R                  5       (       a  GOU R                  [        R
                  [        R                  [        R                  [        R                  [        R                  4;   d   SU R                   35       eUR                  [        R
                  [        R                  [        R                  [        R                  [        R                  4;   d   SUR                   35       eU R                  UR                  :X  d!   SU R                   SUR                   35       eU R                  R                  5       (       d  UR                  R                  5       (       a6  [        U [        R                  U5      n [        U[        R                  U5      nUc  UR                  R                  n[        X65      n[        U R                   5      n[        UR                   5      nXxs=:X  a  S:X  d2  O  Xxs=:X  a  S:X  d$  O   SU R                    SUR                    S	35       eU R                   S
   R"                  UR                   S   R"                  :X  dV   SU R                    SUR                    SU R                   S
   R"                   SUR                   S   R"                   S	3	5       eUR$                  R'                  S5      c   S5       eUR$                  S   " U R                   UR                   5      n	U R                   S   R"                  U	S   :  a@  U R                   S
   R"                  U	S   :  a   UR                   S
   R"                  U	S   :  d   SU	S    SU	S    SU	S    35       eU R                   R(                  R+                  5       (       aQ  U R                   R(                  [        R
                  :X  d   S5       eUR-                  S5      n
[        R.                  nOUR1                  5       (       a  [3        S5      eU R                   R(                  R5                  5       (       d)  U R                   R(                  R1                  5       (       a"  UR7                  S5      n
[        R                  nO9UR9                  5       (       a  UR;                  S5      OUR7                  S5      n
UnU R                   R                   S   nUR                   R                   S
   nU R                   R                   S
   nUS:X  a  U R                   R                   S   OS n[        R<                  " X(       a  XU/OX/5      nUc  UR?                  X(       a  XU/OX/5      nOUR@                  nUR                   U:X  d   eUcX  U R                  R                  5       (       a6  UR                  R                  5       (       a  UR                  RB                  nOXSnOUU R                  R                  5       (       a6  UR                  R                  5       (       a  XN:  a  [3        SU SU S	35      e[        RD                  " URG                  U R@                  UR@                  UX45      U5      $ )NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got r   r	      +Both inputs must be either 2D or 3D; (lhs: 	 vs rhs: r  rX  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (min_dot_sizez2target doesn't provide lower shape bounds for dot.r   r   zInput shapes should have M >= z, N >= z
 and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ()$rn   r   r5   rM   r'   r  uint8rE   rF   rG   r  r   r   default_dot_input_precisionr  rF  r  rD   r  r  r}   rN   	get_int32r*   rL   r&   rJ   get_fp32rK   get_fp16r   rG  r   max_num_imprecise_acc_defaultr(   
create_dot)r   r   accr  max_num_imprecise_acc	out_dtyper-   lhs_rankrhs_rankr  r  ret_scalar_tyMNKBr   
acc_handles                     r   dotr    s   88388#4#4#6#666
yycii..00yyRWWbhh

BKKZZ) ) 	O,B399+*N	O )yyRWWbhh

BKKZZ) ) 	O,B399+*N	O )yyCII%k)OPSPYPY{Z_`c`i`i_j'kk%
yy#))"7"7"9"93

G,3

G,!//EE1/KO399~H399~H$1$(A(A  REpqtqzqzp{  |E  FI  FO  FO  EP  PQ  DR  RA99R=#))
#E q(3LSYYK  XU  VY  V_  V_  `b  Vc  Vi  Vi  Uj  jZ  [^  [d  [d  eg  [h  [n  [n  Zo  op  qq "">2>t@tt>&&~6sxxJL99R=,q/1ciim6I6I\Z[_6\IIbM<?2r,\!_,=W\RS_DUU_`lmn`o_pqr 3 xxxx"'')A+AA)q!					vx 	x		 	 	"	"chhoo&=&=&?&?a 

$-$5$5$7$7Wa W=M=Ma=P!rArArA%]qA]]=q1)qfEF
{))"1qQi1&I
ZZ
xx6!!! $99#))"2"2"4"4$+OO$Q$Q!$%!99#))"2"2"4"49N9R67L6MM]^_]``abcc99W''

CJJ
Os r   c                |    [        [        R                  U R                  5       S 5      nUc  [	        SU  S35      eU$ )NzInvalid float format: rZ   )rA  r
   ScaleDotElemTypeTYr  r&   )float_formatty_enums     r   _str_to_fp_typer  L  s>    b++\-?-?-A4HG1,qABBNr   c                :   [         R                  [         R                  [         R                  [         R                  S.R                  U5      nUcD  US:X  d
   SU 35       eU R                  [         R                  :X  d   SU R                   35       eU $ U R                  U:X  a  U $ [         R                  [         R                  [         R                  [         R                  S.U   nU R                  U:X  d   SU SU R                   35       e[        XU5      $ )z
If float_format is subbyte, make sure it's packed as uint8 and return it.
Otherwise, return a tensor (perhaps bitcasting) of the specified float format.
)e5m2e4m3bf16fp16e2m1z)Internal Error: Unexpected float format: z)e2m1 format must be packed as uint8. Got zUnexpected dtype for r  )
r'   float8e5
float8e4nvrF   rE   r  r5   r  uint16r   )rC  r  r-   	triton_tyunsigned_tys        r   _bitcast_to_fp_typer  S  s    
 bmmR[[Z\ZdZdeiijvwIv%a)RS_R`'aa%yyBHH$](QRUR[R[Q\&]]$

yyI
!xx299VXV_V_`amnyyK'`+@fUXU^U^T_)``'sw//r   c
                *   U R                   R                  5       (       a  UR                   R                  5       (       d   e[        U R                  5      n
[        UR                  5      nXs=:X  a  S:X  d2  O  Xs=:X  a  S:X  d$  O   SU R                   SUR                   S35       eUR                  nUR                  n[        U5      n[        U5      n1 SknX.;   d
   SU 35       eX^;   d
   SU 35       eUS L =(       d/    [        U[        R                  5      =(       a    UR                  S L nUS L =(       d/    [        U[        R                  5      =(       a    UR                  S L n[        XU	5      n [        X5U	5      nU R                   R                  S	   nUR                   R                  S	S  u  nnUS
:X  a  SOSnUS
:X  a  SOSnUU-  UU R                   R                  S   -  :X  d"   SU R                   SUR                   S35       eU
S:X  a  U R                   R                  S   OS n[        R                  " UU(       a  UUU/OUU/5      nU	R                  S5      nUc!  U	R                  UU(       a  UUU/OUU/5      nOUR                  nUR                   U:X  d   eU(       a  S OUR                  nU(       a  S OUR                  n[        R                  " U	R                  U R                  UXR                  UXU5      U5      $ )Nr	   r  r  r  r  >   r  r  r  r  r  zNYI: lhs_format zNYI: rhs_format r  r  r   rX  zCReduction dimension should pack the same number of elements; (lhs: r   )rn   r   rF  r  rD   r  ra   r'   rl   r  r   r  rG  r   r(   create_dot_scaled)r   	lhs_scale
lhs_formatr   	rhs_scale
rhs_formatr  	fast_mathr  r-   r  r  lhs_format_enumrhs_format_enumallowed_formatsrhs_scale_is_nonelhs_scale_is_noner  r  r  PACKED_APACKED_Br  r   r  r  rhs_scale_handlelhs_scale_handles                               r   
dot_scaledr  e  s    88388#4#4#6#666399~H399~H$1$(A(A  REpqtqzqzp{  |E  FI  FO  FO  EP  PQ  DR  RA &&J &&J%j1O%j1O>O(I,<ZL*II((I,<ZL*II(!T)njBLL.Q.mV_VeVeimVm!T)njBLL.Q.mV_VeVeimVm
cw
7C
cw
7CrA88>>"#DAq&(qaH&(qaHx<8chhnn
'   tRSVS\S\R]]fgjgpgpfqqrst  &]qA]]91q!Qi1a&AF			!	B
{))"1q!Qi1a&I
ZZ
xx6!!!0ti6F6F0ti6F6F99!!#**.>Q[Q[]m"1j	JKQS Sr   c                   U R                   [        R                  :w  a#  [        R                  " SU R                    35        [        U [        R                  U5      n [        XUSS5      u  pU R                  R                  5       (       a  [        XU5      u  p[        XU5      u  pO[        XU5      u  pUR                  n[        R                  " UR                  U R                  UR                  UR                  5      U5      $ )Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)r5   r'   rd   warningswarnr   r   rn   r   r   r(   create_selectr   )	conditionro   r   r-   r  r   s         r   r_  r_    s    "''!uv  wF  wF  vG  H	
 Y1I'gtTBDA~~  +I'B	#A'21+I'B	VVF99W**9+;+;QXXqxxPRXYYr   c                n    U(       a  [         R                  " X5      nOUn[         R                  " X5      $ ru   )r'   r   r(   )ro   rR   r  res_tys       r   wrap_tensorr    s)    y4 99Qr   c                (  ^ ^^^	^
 Uc  [        U4S jT  5       5      m SnT S   R                  R                  m
[        T
5      nX:  d   SU S35       e[	        T
5       VVs/ s H  u  pVXQ:w  d  M  UPM     snnm	[        U
4S jT  5       5      (       d   S5       eTR                  T  Vs/ s H  owR                  PM     snU5      mU" T5        TR                  5         [        U UU	4S j[        [        T 5      5       5       5      $ s  snnf s  snf )Nc              3  d   >#    U  H%  n[        XR                  R                  /S TS9v   M'     g7f)TrY  N)rM  r@  rD   )rh  tr-   s     r   rj  reduction.<locals>.<genexpr>  s'     f_eZ[wq77==/tWU_es   -0r   z&reduction axis must be < inputs rank (r  c              3  T   >#    U  H  oR                   R                  T:H  v   M     g 7fru   )rn   r  )rh  r  r  s     r   rj  r    s     5fvv||u$fs   %(z-all reduction inputs must have the same shapec              3     >#    U  H8  n[        TR                  U5      TU   R                  R                  T5      v   M:     g 7fru   r  
get_resultrn   r}   )rh  rv  inputs	reduce_opr  s     r   rj  r    s9     tas\]Y11!4fQinn6K6KYWWas   A A)
tuplern   r  rF  rs  allcreate_reducer   verifyr<  )r  r,   region_builder_fnr-   rankrv  rL  r  r  r  r  s   `  `    @@@r   	reductionr    s    |f_eff1INN  Eu:D;H@aHH;(/=/tq19/=I5f555f7ff5%%&@Axx&@$GIi tafgjkqgrasttt > 'As   "D	1D	+Dc                  ^ ^^ T S   R                   R                  m[        T5      nU* Us=::  a  U:  d  O   SU SU S35       eUS:  a  X-  nT  H$  nUR                   R                  T:X  a  M   S5       e   UR                  T  Vs/ s H  ofR                  PM     snX5      mU" T5        TR                  5         [        U UU4S j[        [        T 5      5       5       5      $ s  snf )Nr   z
scan axis z must be < inputs rank (r  z(all scan inputs must have the same shapec              3     >#    U  H8  n[        TR                  U5      TU   R                  R                  T5      v   M:     g 7fru   r  )rh  rv  r  scan_opr  s     r   rj  #associative_scan.<locals>.<genexpr>  s9     n[mVWW//2F1INN4I4I5QQ[mr  )rn   r  rF  create_scanr   r  r  r<  )	r  r,   r  reverser-   r  r  r  r  s	   `      @@r   associative_scanr    s    1INN  Eu:D5D4S:dV3KD6QR!SSaxvv||u$P&PP$  !!V"<V88V"<dLGgNNn[`adekal[mnnn	 #=s   	C'c                   UR                   R                  5       (       d   S5       e[        U R                  R                  5      n[        UR                  R                  5      U:X  d   S5       eU* Us=::  a  U:  d  O   SU SU S35       eUS:  a  X$-  n[        U5       HI  nXR:X  a  M
  UR                  R                  U   U R                  R                  U   :X  a  M@   SU S35       e   UR                  U R                  UR                  U5      n[        X`R                  R                  UR                  R                  5      $ )	Nzindex must be an integer tensorz0source and index tensors must have the same rankzgather axis z must be < source rank (r  r   z
index dim z( must match the corresponding source dim)
r5   rN   rF  rn   r  r<  create_gatherr   r  r}   )srcindexr,   r-   r  ri  gathers          r   r  r    s   ;;B!BBsxx~~Duzz D(\*\\(5D4U<v5MdVST!UUax4[9zz"chhnnQ&77t:dVKs9tt7 
 ""3::u||TBFvxx

0@0@AAr   c                8   [        U R                  5      S:X  d   S5       eU R                  R                  5       (       d   S5       e[        R
                  " UR                  U R                  U5      [        R                  " [        R                  U/5      5      $ )Nr   z histogram only supports 1D inputz%histogram only supports integer input)
rF  r  r5   rN   r'   r(   create_histogramr   r   r*   )r   num_binsr-   s      r   	histogramr    sv    u{{q D"DD ;;H!HH99W--ellHEr}}UWU]U]`h_iGjkkr   c                   [        S[        U R                  5      5      [        U5      :w  a  [        S5      eU R                  R                  S[        R                  " XR                  R                  5       5      5        U $ )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   rF  r  r&   r   set_attrr
   	make_attrget_contextro   rz  s     r   multiple_ofr    sY    
1c!''ls6{*\]]HH'fhh>R>R>T)UVHr   c                    [        U R                  5      [        U5      :w  a  [        S5      eU R                  R	                  S[
        R                  " XR                  R                  5       5      5        U $ )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityrF  r  r&   r   r  r
   r  r  r  s     r   max_contiguousr  	  sQ    
177|s6{"_``HHor||FHH<P<P<R'STHr   c                    [        U R                  5      [        U5      :w  a  [        S5      eU R                  R	                  S[
        R                  " XR                  R                  5       5      5        U $ )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  s     r   max_constancyr     sQ    
177|s6{"^__HHnbll688;O;O;Q&RSHr   c                h    [         R                  " U R                  5       [         R                  5      $ ru   )r'   r(   create_barrierr"  )r-   s    r   debug_barrierr    s     99W++-rww77r   c           	     j   U R                  S5      (       d  U(       a  U S-  n U R                  S5      (       d  U(       a  U S S S-   n [        U 5      S:  a  U R                  S5      (       d  SU -   n U Vs/ s H  oDR                  PM     nnU Vs/ s H\  oDR                  [
        R                  [
        R                  [
        R                  [
        R                  [
        R                  4;   PM^     nn[
        R                  " UR                  XXV5      [
        R                  5      $ s  snf s  snf )N rr  rX  r	   )endswithrF  
startswithr   r5   r'   rd   r  rT  r*   rg   r(   create_printr"  )prefixargshexr-   argnew_args	is_signeds          r   device_printr    s     ??3D#??4  Tt#
6{Qv0055v&*+ds

dH+Z^_Z^SVrww288RXXNNZ^I_99W))&xKRWWUU ,_s   6D+A#D0c                    UR                   R                  (       d  g [        R                  " UR	                  U R
                  U5      [        R                  5      $ ru   )r   debugr'   r(   create_assertr   r"  )r   r   r-   s      r   r   r   *  s8    ??  99W**4;;<bggFFr   c                ~    [         R                  " UR                  U R                  5      [         R                  5      $ ru   )r'   r(   create_assumer   r"  )r   r-   s     r   assumer  0  s&    99W**4;;7AAr   c                   [        U[        5      (       a  [        R                  " U5      n[        U[        R                  5      (       a  U(       aI  SUR                  s=::  a  S:  d  O   SUR                   S35       eU R                  UR                  5      $ SUR                  s=::  a  S:  d  O   SUR                   S35       eU R                  UR                  5      $ [        U[        R                  5      (       a  UR                  R                  S:X  d   S	5       eUR                  R                  5       (       d   S
5       eUR                  [        R                  :w  aJ  U(       aC  U R                  UR                  U R                  5       UR                  R                  5       5      $ UR                  [        R                   :w  a  U(       d   S5       eUR                  $  S[#        U5       35       e)NrX   rY   z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the rangerV   rW   zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetszzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )ra   re   r'   rl   rD   r   r  r(   r@  r5   rN   rg   r   r   get_int64_tyr   r*   rn   )r-   r  r  s      r   _convert_elem_to_ir_valuer  4  s   $||D!$%%TZZ/%/ F 4#zzl*D2F F/$$TZZ00TZZ/%/ F 4#zzl*D2F F/$$TZZ00	D"))	$	$zz1$R&RR$zz  ""^$^^"::!k**4;;8L8L8NPTPZPZPhPhPjkkZZ288#KS S S5{{TGT
|TT5r   c                    [        US5      (       a  U Vs/ s H  n[        XU5      PM     sn$ [        XU5      /$ s  snf )Nr  )r  r  )r-   	list_liker  r  s       r   r  r  L  sD    y*%%R[\R[$)'ER[\\%g+FGG ]s   <c           	       ^ [        Xa5      n[        Xb5      n[        XcSS9nU R                  R                  5       (       a)  U R                  R                  R	                  5       (       a  [        S5      eU R                  R                  [        R                  :X  aD  [        U [        R                  " [        R                  U R                  R                  5      U5      n [        TS5      (       d  T/mT Vs/ s H0  n[        U[        R                  5      (       a  UR                  OUPM2     snm[!        S T 5       5      (       d   S5       e[        US5      (       d  U/nU Vs/ s H0  n[        U[        R                  5      (       a  UR                  OUPM2     nn[#        U5      [%        ['        [)        U5      5      5      :X  d   S5       e[!        U4S jXX54 5       5      (       d   S	5       eUR+                  U R,                  XUTU5      n[        R.                  " U[        R                  " [        R0                  " U R                  R                  T5      5      5      $ s  snf s  snf )
NFr  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c              3  z   #    U  H1  n[        U[        5      =(       a    S Us=:*  =(       a    S:  Os  v   M3     g7f)rV   rW   N)ra   re   )rh  r  s     r   rj  !make_block_ptr.<locals>.<genexpr>e  s,     XKDz$$?4)?)?%)??Ks   9;zGExpected a list of constant integers (`int32_t` range) in `block_shape`z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc              3  R   >#    U  H  n[        T5      [        U5      :H  v   M     g 7fru   )rF  )rh  r  r  s     r   rj  r  o  s!     dCcis;3y>1Ccs   $'zBExpected shape/strides/offsets/block_shape to have the same length)r  rn   rv   r  r   r&   r'   rd   r   r  r  r  r  ra   rl   rD   r  rl  rm  r<  rF  create_make_block_ptrr   r(   r   )	baser  stridesr  r  orderr-   r  r   s	       `    r   make_block_ptrr#  R  s    "'1E#G5G#G%HG 99!5!5!>!>!@!@hii yyrww&D"//"''4993J3JKWU ;
++"mVabVadD",,!?!?4::TIVabKXKXXX RQRX 5*%%PUVPU:dBLL99TZZtCPUEV%=Ds5z!233s5ss3 dET[Ccddd MLMd **4;;Q\^cdF99VR__R]]499;O;OQ\-]^__% c Ws   97I))7I.c                    [        X!SS9n[        R                  " UR                  U R                  U5      U R
                  5      $ )NFr  )r  r'   r(   create_advancer   rn   )r   r  r-   s      r   advancer&  y  s6    #G%HG 99W++DKKA499MMr   c                   [        U5      nSUs=::  a  S::  d  O  [        SU S35      e[        U5      U:w  a  [        SU S[        U5       35      e[        U5      U:w  a  [        SU S[        U5       35      e[        R                  " US	   5      US	'   US	   S
:w  a  [        SUS	    35      eU Vs/ s H  n[	        Xd5      PM     nnU Vs/ s H*  n[	        Xd5      R                  [        R                  US9PM,     nn[        R                  " U5      n[        U R                  [        R                  5      (       d   e[        R                  " U R                  R                  U5      nUR                  U R                  U Vs/ s H  oR                  PM     snU Vs/ s H  oR                  PM     snU5      n	[        R                  " XX'5      $ s  snf s  snf s  snf s  snf )Nr	      z Expected 2 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got rX  r   z-Tensor descriptor last dim must be 1 but got r  )rF  r&   r'   rO  rm   torg   _unwrap_shapera   rn   r  r   r  create_make_tensor_descriptorr   _experimental_tensor_descriptor)
r   r  r!  r  r-   r  ro   rn   rL  r   s
             r   make_tensor_descriptorr-    s    u:DNN;D6MNN
7|t9TF*;CL>JKK
;48>RSVW^S_R`abb((5GBKr{aHQSVWW,12EqYq"EE2MTUWy$''7'CWGU "";/Kdii1111==--{;D224;;SX@YSXaSX@Ynu[vnuij\d\dnu[v3>@F--fWKK 3U AZ[vs   6G1G
G#
%G()r,   re   r-   
ir.builderreturn	tl.tensor)r9   tl.dtyper:   r1  r/  r1  )r9   r1  rO   rb   r:   r1  rP   rb   rQ   rb   r/  r1  )T)rp   rb   )r   r1  r   r1  rx   rb   r/  None)FFTF)r   tl.tensor | numbers.Numberr   r3  r-   r.  r/  Tuple[tl.tensor, tl.tensor])r   r0  r   r0  r-   r.  r   callable)
r   r3  r   r3  r   rb   r-   r.  r/  r0  )r   r3  r   r3  r-   r.  r/  r0  )
r   r3  r   r3  r   rb   r-   r.  r/  r0  )ro   r0  r   r0  r   tl.PropagateNanr-   r.  )
ro   r0  r   r0  r   r0  r   r6  r-   r.  )r   r0  r   r0  r-   r.  r/  r4  )r   r0  r   r0  r-   r.  r/  r0  )r   r0  r-   r.  )r   r0  r/  r0  )r   r0  r-   r.  r/  r0  )r   r0  r-   r0  r/  r0  )r  r0  r/  tl.block_type)r8  re   r9  re   r-   r.  r/  r0  )r  	List[int]r5   r1  r-   r.  r/  r0  )rD   r0  r  r8  r-   r.  r/  r0  )
r   r0  rJ  r8  rK  rb   r-   r.  r/  r0  )r   r0  r,   re   r-   r.  r/  r0  )
r   r0  r   r0  rK  rb   r-   r.  r/  r0  )r[  r0  r\  r0  r-   r.  r/  r0  )r[  r0  r-   r.  r/  r4  )r   r0  ro  z
Tuple[int]r-   r.  r/  r0  )r   r0  r  r8  r-   r.  r/  r0  )r   r0  r   r0  r-   r.  r/  r0  )r  Optional[str])r   r0  r  r1  r-   r.  r/  r0  ru   )
r   r0  r  r1  r-   r.  r  r9  r/  r0  )r  r0  r  Optional[tl.tensor]r   r:  r  r   r  r|  r  r|  r  r|  r  rb   r-   r.  r/  r0  )r  r0  r  r7  r-   r.  )
r  z&tl._experimental_tensor_desciptor_baser  r|  r  r|  r-   r.  r/  r0  )r  z'tl._experimental_tensor_descriptor_baserD   r0  r-   r.  r/  r0  )r  r|  r  r|  r-   r.  r/  r0  )rD   r0  r-   r.  r/  r0  )r  r0  r4  r0  r5  List[tl.tensor]r6  r;  r7  r;  r8  r;  r9  re   r:  re   r;  re   r<  re   r-   r.  r/  r0  )r  r0  r-   r.  r/  r0  )r  r0  rC  r0  r  r:  r  r|  r  r|  r-   r.  r/  r0  )r  r0  rP  r0  rC  r0  r  r|  r  r|  r-   r.  r/  r0  )r  r0  rC  r0  r  r0  rU  r|  r-   r.  r/  z&Tuple[tl.tensor, tl.tensor, tl.tensor])r  r0  rC  r0  r  r0  r  r|  r  r|  r-   r.  r/  r0  )r   r0  r   r0  r  r0  r  r9  r  re   r  r1  r-   r.  r/  r0  )r  r|  )rC  r0  r  r|  r-   r.  )r   r0  r  r0  r  r|  r   r0  r  r:  r  r|  r  ztl.tensor | Noner  rb   r  r1  r-   r.  r/  r0  )
r  r0  ro   r0  r   r0  r-   r.  r/  r0  )r  Sequence[tl.tensor]r,   re   r-   r.  r/  Tuple[tl.tensor, ...])
r  r<  r,   re   r  rb   r-   r.  r/  r=  )
r  r0  r  r0  r,   re   r-   r.  r/  r0  )r   r0  r  re   r-   r.  r/  r0  )ro   r0  rz  r8  r/  r0  )r-   r.  r/  r0  )
r	  r|  r
  r;  r  rb   r-   r.  r/  r0  )r   r0  r   r|  r-   r.  r/  r0  )r   r0  r-   r.  r/  r0  )r   r0  r  r;  r!  r;  r  zList[tl.constexpr]r-   r.  r/  z"tl._experimental_tensor_descriptor){
__future__r   r  typingr   r   r   r   r   r{   _C.libtritonr
   r%  r   r'   r   	Exceptionr   r.   r1   r?   rT   rm   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r
  r  r   r   r  r  r   r'  r   r/  r4  r=  ri   rC  rM  rR  rV  r`  re  rp  rx  r   r  r   r   r  r  r  r  r  r  r  r  r  r
  r  r  r  r#  r-  r1  r=  r@  rD  rH  rL  rQ  rX  rl  rp  rt  ry  r~  r  r  r  r  r  r  r  r_  r  r  r  r  r  r  r  r   r  r  r   r  r  r  r#  r&  r-  r[   r   r   <module>rB     s   "  ; ;   CLF	 FDF@ 0,&*0,/70,f#V	< ]a,1#6Q#L&& : : ) :F44 )4$44 )4R4:&&!*&425$5$	]"+5:UQ
P
Q
'&"R
R
Q)$)4444	4	4"D"(.H[MK2
IL$2tpZ( 04h5,h58Ah5` 		 
w,;|nn.1nDGnVZnn!*n E
{
)'
),5
)	c(	c-6	c).74$,   	
 # $      >\:)qXWW)3W8AW,j(27]6$)N$)No#(18FF&0F5>FR0$(S(S%5(SBF(SS[(S"(S'0(S`Z, u.o(o-Bo2B0l8VGBU0H$`NNL
LL L $	L
 L (Lr   