
    (Th-                     8    S r SSKr " S S5      r " S S5      rg)aN  
Helper classes for working with low precision floating point types that
align with the opencompute (OCP) microscaling (MX) specification.
  * MXFP4Tensor: 4-bit E2M1 floating point data
  * MXScaleTensor: 8-bit E8M0 floating point data
Reference: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
    Nc                   <    \ rS rSrS
S jrS rS rS rS rS r	S	r
g)MXFP4Tensor   Nc                    X0l         UbN  [        U[        R                  5      (       d   S5       eUR                   U l         U R	                  U5      U l        gUb   [        U[        5      (       a  UOU4U l        g[        S5      e)a<  
Tensor class for working with four bit E2M1 floating point data as defined by the
opencompute microscaling specification.


Parameters:
- data: A torch tensor of float32 numbers to convert to fp4e2m1 microscaling format.
- size: The size of the tensor to create.
- device: The device on which to create the tensor.
N%Parameter data must be a torch tensor.Either parameter data or size must be provided	device
isinstancetorchTensor_from_floatdatatuplesize
ValueErrorselfr   r   r
   s       I/var/www/auris/envauris/lib/python3.13/site-packages/triton/tools/mxfp.py__init__MXFP4Tensor.__init__   sr     dELL11Z3ZZ1++DK((.DI *4 7 7dXDIMNN    c                    [         R                  " SSU R                  [         R                  U R                  S9n[         R                  " SSU R                  [         R                  U R                  S9n[         R                  " SSU R                  [         R                  U R                  S9nUS-  US-  -  U-  R                  [         R                  5      U l        U $ )Nr      r   dtyper
            )r   randintr   uint8r
   typer   )r   SEMs       r   randomMXFP4Tensor.random#   s    MM!QTYYekk$++VMM!QTYYekk$++VMM!QTYYekk$++V1fa(1,225;;?	r   c                    U[         R                  :X  d   S5       eU R                  nUS-	  S-  R                  U5      nUS-	  S-  R                  U5      nUS-  R                  U5      n[         R                  " U5      nUS:H  US:H  -  nU) nUR                  5       (       a  X8   n	XH   n
XX   n[         R                  " SU	5      n[         R                  " U
S:H  XS-
  5      n[         R                  " U
S:H  US-  SUS-  -   5      nU[         R                  " SU5      -  U-  nXU'   XgUS:H  -  ==   S-  ss'   UR                  [         R                  5      $ )	zj
Convert fp4e2m1 data to float32.

Returns:
- A torch tensor of type dtype representing the fp4e2m1 data.
zCCurrently only float32 is supported for fp4e2m1 to float conversionr   r   r         ?      ?r   )r   float32r   r"   
zeros_likeanypowwhere)r   r   r   r#   r$   r%   valueis_zeronon_zero_maskS_nzE_nzM_nzsignexponentmantissavalue_nzs                   r   toMXFP4Tensor.to+   sV    %l'll%yyai3$$U+ai3$$U+CZe$   #6a1f% #D#D#D99R&D{{419d1H=H{{419dSj#s
:JKHeii844x?H#+-  	a !R'!zz%--((r   c                 n   [         R                  " U5      R                  [         R                  5      n[         R                  " U5      nUS:H  n[         R
                  " U5      [         R                  " U5      -  n[         R                  " / SQ[         R                  U R                  S9n[         R                  " SS/[         R                  U R                  S9n/ n/ n	/ n
U H  nUS:X  aM  SnU HC  nUS-  nUSU-  -  nUR                  U5        U	R                  U5        U
R                  U5        ME     MV  UR                  5       S-
  nU HF  nSUS-  -   nUSU-  -  nUR                  U5        U	R                  U5        U
R                  U5        MH     M     [         R                  " U[         R                  U R                  S9n[         R                  " U	[         R                  U R                  S9n	[         R                  " U
[         R                  U R                  S9n
UR                  S5      nUR                  S   nUR                  S5      nUR                  5       R                  5       nUUUR                  S5      '   [         R                  " UUR                  S5      -
  5      n[         R                   " USS	S
9u  nnUU:H  nUR#                  5       S:  aK  U
R                  S5      R%                  US5      nUS:H  R                  [         R&                  5      nUUS-  -
  n[         R(                  " USS9nU	U   nU
U   nUR                  UR                  5      nUR                  UR                  5      nSX'   SX'   US-  US-  -  U-  R                  [         R                  5      $ )a  
Convert float32 numbers to mxf4 e2m1 format.
* No encodings are reserved for Inf or NaN in mxf4.
* Conversion from float supports roundTiesToEven rounding mode.
* If a value exceeds the mxf4 representable range after rounding,
  clamps to the maximum mxf4 magnitude, preserving the sign.
* If a value has magnitude less than the minimum subnormal magnitude
  in mxf4 after rounding, converts to zero.

Parameters:
- values: A torch tensor of float32 numbers to convert to fp4 format.
r   )r   r   r   r   r   r
   r   r*   r   r+   r)   T)dimkeepdimgư>r?   r   )r   signbitr"   r!   absisnanisinftensorr
   appenditemr,   viewshape	unsqueezemaxminsumexpandint32argmin)r   valuesr#   
abs_valuesr2   
is_invalidE_bitsM_bitscandidate_valuescandidate_Ecandidate_Mr$   r8   r%   significandr1   
candidatesabs_values_flatNabs_values_expandedmax_candidate_valueerrors
min_errors_is_tieM_bits_expandedtie_breakerbest_indices
E_selected
M_selecteds                                 r   r   MXFP4Tensor._from_floatN   s$    MM&!&&u{{3YYv&
?[[(5;;v+>>

 l%++dkkRq!fEKKLAAvA"#c'K'1h;7E$++E2&&q)&&q)   668a<A"%C-K'1h;7E$++E2&&q)&&q)   ( \\"2%--PTP[P[\
ll;ekk$++Vll;ekk$++V$//"-!!!$-77: )nn.335/B
+, .1E1Ea1HHI
 		&a>
AJ&::<!)33A6==aDO*a/55ekkBK{T12F||F2 .
 .
OOJ,,-OOJ,,-

aAF#a'--ekk::r   c                 <   U R                   nSUs=::  a  UR                  :  d   S5       e   S5       eUR                  U5      nUS-   S-  nUS-  S:w  aX  S/SUR                  -  -  nUR                  U-
  S-
  S-  S-   nSXV'   [        R                  R
                  R                  X%SSS9n[        UR                  5      nXGU'   UR                  US-   S5        UR                  " U6 nUR                  US-   S5      nUR                  US-   S5      n	U	S-  U-  n
U
$ )z
Packs two e2m1 elements into a single uint8 along the specified dimension.

Parameters:
- dim: The dimension along which to pack the elements.

Returns:
- A torch tensor of dtype uint8 with two e2m1 elements packed into one uint8.
r   zHThe dimension to pack along is not within the range of tensor dimensionsr   r   constant)moder1   r   )r   ndimr   r   nn
functionalpadlistrJ   insertreshapeselect)r   r?   r   size_along_dimnew_size_along_dim	pad_sizes	pad_index	new_shapelowhighpackeds              r   to_packed_tensorMXFP4Tensor.to_packed_tensor   s<    yyC#$))# 	WV	W# 	WV	W# 3,q0Q6 A"q499}-IS1,1A5I#$I 88&&**4ST*UD$	+#q!$||Y'kk#'1%{{37A&!)s"r   c                    US-	  S-  nUS-  n[         R                  " XT4US-   S9n[        UR                  5      nUSU Xr   S-  /-   XrS-   S -   nUR                  " U6 n	X2   S-  S:w  a7  [        S5      /U	R                  -  n
[        SX2   5      X'   U	[        U
5         n	U	R                  [         R                  5      $ )a}  
Unpacks a tensor where two fp4 elements are packed into a single uint8.

Parameters:
- packed_tensor: The packed tensor
- dim: The dimension along which the tensor was packed.
- original_shape: The shape of the original tensor before packing.

Returns:
- A tensor with the original data unpacked into uint8 elements containing one
  fp4e2m1 element in the least significant bits.
r      r   rA   Nr   r   )
r   stackrq   rJ   rs   slicerm   r   r"   r!   )r   packed_tensorr?   original_shaper{   rz   stackedrJ   ry   r   indicess              r   unpack_packed_tensor MXFP4Tensor.unpack_packed_tensor   s     "c)c!++sksQw7 W]]#$3K5:>"22U78_D		* "a'T{mdii/G N$78GLg'Dyy%%r   r   r
   r   NNN)__name__
__module____qualname____firstlineno__r   r&   r;   r   r}   r   __static_attributes__ r   r   r   r      s%    O*!)FV;p!F&r   r   c                   4    \ rS rSrSS jrS	S jrS rS rSrg)
MXScaleTensor   Nc                    X0l         UbN  [        U[        R                  5      (       d   S5       eUR                   U l         U R	                  U5      U l        gUb   [        U[        5      (       a  UOU4U l        g[        S5      e)a  
Tensor class for working with microscaling E8M0 block scale factors.

Parameters:
- data: A torch tensor of float32 numbers to convert to fp8e8m0 microscaling format.
- size: The size of the tensor to create.
- device: The device on which to create the tensor.
Nr   r   r	   r   s       r   r   MXScaleTensor.__init__   sr     dELL11Z3ZZ1++DK((.DI *4 7 7dXDIMNNr   c                    SnUc  SO?[        S[        [        R                  " [        R                  " U5      5      5      U-   5      nUc  SOI[        S[        S[        [        R                  " [        R                  " U5      5      5      U-   5      5      nXE::  d   S5       e[        R                  " XES-   U R                  [        R                  U R                  S9nX`l
        U $ )zX
Generate random E8M0 data within a specified range.
* Excludes the NaN encoding (255).
   r      z&Low must be less than or equal to highr   r   )rL   intr   log2rF   rM   r    r   r!   r
   r   )r   rz   r{   biasmin_exponentmax_exponentr$   s          r   r&   MXScaleTensor.random   s    
 KqSC

5<<PSCT8U4VY]4]-^"lsCQEJJu||\`OaDb@cfj@j9k0l+U-UU+MM,q(8tyyPUP[P[dhdodop	r   c                    U[         R                  :X  d   S5       eU R                  R                  U5      nUS:H  nUR	                  5       nSXC'   US-
  n[         R
                  " SU5      n[         R                  Xc'   UR                  U5      $ )NzBCurrently only float32 is supported for f8e8m0 to float conversion   r   r   g       @)r   r,   r   r"   cloner/   nan)r   r   r   is_nane_biaseder1   s          r   r;   MXScaleTensor.to  s{    %k'kk%yy~~e$#+::<sN		#q!		zz%  r   c                    [         R                  " U[         R                  U R                  S9n[         R                  " U5      [         R
                  " U5      -  US:*  -  nSX#'   X)    n[         R                  " [         R                  " U5      5      nUS-   nUR                  [         R                  5      n[         R                  " USS5      nUR                  [         R                  5      X#) '   U$ )a  
Convert float32 numbers to E8M0 format.
* Values <= 0, NaNs, and Infs are converted to the NaN encoding (255).
* Positive values are converted by computing the floor of log2(value) to get the exponent.

Parameters:
- values: A torch tensor of float32 numbers to convert to E8M0 format.
r>   r   r   r   r   )r   
empty_liker!   r
   rD   rE   floorr   r"   rP   clamp)	r   rR   resultrT   valid_valuesr   r   e_biased_inte_biased_clampeds	            r   r   MXScaleTensor._from_float  s     !!&DKKP[[(5;;v+>>&A+N
 k*KK

<01s7}}U[[1 ;;|Q<.33EKK@{r   r   r   )NN)	r   r   r   r   r   r&   r;   r   r   r   r   r   r   r      s    O&	!r   r   )__doc__r   r   r   r   r   r   <module>r      s(    Z& Z&zD Dr   