o
    cZh-                     @   s,   d Z ddlZG dd dZG dd dZdS )aN  
Helper classes for working with low precision floating point types that
align with the opencompute (OCP) microscaling (MX) specification.
  * MXFP4Tensor: 4-bit E2M1 floating point data
  * MXScaleTensor: 8-bit E8M0 floating point data
Reference: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
    Nc                   @   s>   e Zd ZdddZdd Zdd Zdd	 Zd
d Zdd ZdS )MXFP4TensorNc                 C   j   || _ |durt|tjsJ d|j | _ | || _dS |dur1t|tr+|| _dS |f| _dS td)at  
        Tensor class for working with four bit E2M1 floating point data as defined by the
        opencompute microscaling specification.


        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp4e2m1 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        N%Parameter data must be a torch tensor.Either parameter data or size must be provided	device
isinstancetorchZTensor_from_floatdatatuplesize
ValueErrorselfr   r   r    r   @/var/www/auris/lib/python3.10/site-packages/triton/tools/mxfp.py__init__   s    zMXFP4Tensor.__init__c                 C   sp   t jdd| jt j| jd}t jdd| jt j| jd}t jdd| jt j| jd}|d> |d> B |B t j| _| S )Nr      r   dtyper            )r	   randintr   uint8r   typer   )r   SEMr   r   r   random#   s
   zMXFP4Tensor.randomc                 C   s
  |t jks	J d| j}|d? d@ |}|d? d@ |}|d@ |}t |}|dk|dk@ }| }| rs|| }	|| }
|| }t d|	}t |
dk|
|
d }t |
dk|d d|d  }|t d| | }|||< |||dk@   d9  < |t jS )	z
        Convert fp4e2m1 data to float32.

        Returns:
        - A torch tensor of type dtype representing the fp4e2m1 data.
        zCCurrently only float32 is supported for fp4e2m1 to float conversionr   r   r         ?      ?r   )r	   float32r   r   Z
zeros_likeanypowwhere)r   r   r   r   r   r   valueis_zeroZnon_zero_maskZS_nzZE_nzZM_nzsignexponentZmantissaZvalue_nzr   r   r   to+   s&   
zMXFP4Tensor.toc                 C   sT  t |t j}t |}|dk}t |t |B }t jg dt j| jd}t jddgt j| jd}g }g }	g }
|D ]M}|dkrcd}|D ]}|d }|d|  }|	| |		| |
	| qFq<|
 d }|D ]}d|d  }|d|  }|	| |		| |
	| qkq<t j|t j| jd}t j|	t j| jd}	t j|
t j| jd}
|d}|jd }|d}| 
 }|||d< t ||d }t j|dd	d
\}}||k}| dkr|
d|d}|dkt j}||d  }t j|dd}|	| }|
| }||j}||j}d||< d||< |d> |d> B |B t jS )a5  
        Convert float32 numbers to mxf4 e2m1 format.
        * No encodings are reserved for Inf or NaN in mxf4.
        * Conversion from float supports roundTiesToEven rounding mode.
        * If a value exceeds the mxf4 representable range after rounding,
          clamps to the maximum mxf4 magnitude, preserving the sign.
        * If a value has magnitude less than the minimum subnormal magnitude
          in mxf4 after rounding, converts to zero.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to fp4 format.
        r   )r   r   r   r   r   r   r   r"   r   r#   r!   T)dimZkeepdimgư>r.   r   )r	   Zsignbitr   r   absisnanisinftensorr   appenditemr$   viewshapeZ	unsqueezemaxminsumexpandint32Zargmin)r   valuesr   Z
abs_valuesr)   
is_invalidZE_bitsZM_bitsZcandidate_valuesZcandidate_EZcandidate_Mr   r+   r   Zsignificandr(   
candidatesZabs_values_flatNZabs_values_expandedZmax_candidate_valueerrorsZ
min_errors_Zis_tieZM_bits_expandedZtie_breakerZbest_indicesZ
E_selectedZ
M_selectedr   r   r   r
   N   sd   







zMXFP4Tensor._from_floatc                 C   s   | j }d|  kr|jk sJ d J d||}|d d }|d dkrIdgd|j  }|j| d d d }d||< tjjj||ddd}t|j}|||< |	|d d |j
| }||d d}||d d}	|	d> |B }
|
S )a  
        Packs two e2m1 elements into a single uint8 along the specified dimension.

        Parameters:
        - dim: The dimension along which to pack the elements.

        Returns:
        - A torch tensor of dtype uint8 with two e2m1 elements packed into one uint8.
        r   zHThe dimension to pack along is not within the range of tensor dimensionsr   r   Zconstant)moder(   r   )r   ndimr   r	   nnZ
functionalpadlistr7   insertreshapeselect)r   r.   r   Zsize_along_dimZnew_size_along_dimZ	pad_sizesZ	pad_index	new_shapelowhighpackedr   r   r   to_packed_tensor   s*   



zMXFP4Tensor.to_packed_tensorc                 C   s   |d? d@ }|d@ }t j||f|d d}t|j}|d| || d g ||d d  }|j| }	|| d dkrStdg|	j }
td|| |
|< |	t|
 }	|	t j	S )a  
        Unpacks a tensor where two fp4 elements are packed into a single uint8.

        Parameters:
        - packed_tensor: The packed tensor
        - dim: The dimension along which the tensor was packed.
        - original_shape: The shape of the original tensor before packing.

        Returns:
        - A tensor with the original data unpacked into uint8 elements containing one
          fp4e2m1 element in the least significant bits.
        r      r   r/   Nr   r   )
r	   stackrG   r7   rI   slicerD   r   r   r   )r   Zpacked_tensorr.   Zoriginal_shaperM   rL   Zstackedr7   rK   r   indicesr   r   r   unpack_packed_tensor   s   
*
z MXFP4Tensor.unpack_packed_tensorNNN)	__name__
__module____qualname__r   r    r,   r
   rO   rT   r   r   r   r   r      s    
#X#r   c                   @   s0   e Zd Zd
ddZdddZdd Zdd	 ZdS )MXScaleTensorNc                 C   r   )a6  
        Tensor class for working with microscaling E8M0 block scale factors.

        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp8e8m0 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        Nr   r   r   r   r   r   r   r      s   	 zMXScaleTensor.__init__c              
   C   s   d}|du rdnt dttt|| }|du rdntdt dttt|| }||ks7J dtj||d | jtj| j	d}|| _
| S )zp
        Generate random E8M0 data within a specified range.
        * Excludes the NaN encoding (255).
           Nr      z&Low must be less than or equal to highr   r   )r8   intr	   log2r3   r9   r   r   r   r   r   )r   rL   rM   ZbiasZmin_exponentZmax_exponentr   r   r   r   r       s   *0zMXScaleTensor.randomc                 C   s^   |t jks	J d| j|}|dk}| }d||< |d }t d|}t j||< ||S )NzBCurrently only float32 is supported for f8e8m0 to float conversion   r   rZ   g       @)r	   r$   r   r   cloner&   nan)r   r   r   is_nane_biaseder(   r   r   r   r,     s   

zMXScaleTensor.toc           	      C   s   t j|t j| jd}t |t |B |dkB }d||< ||  }t t |}|d }|t j	}t 
|dd}|t j|| < |S )aO  
        Convert float32 numbers to E8M0 format.
        * Values <= 0, NaNs, and Infs are converted to the NaN encoding (255).
        * Positive values are converted by computing the floor of log2(value) to get the exponent.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to E8M0 format.
        r-   r   r^   rZ   r[   )r	   Z
empty_liker   r   r1   r2   floorr]   r   r<   clamp)	r   r=   resultr>   Zvalid_valuesrc   rb   Ze_biased_intZe_biased_clampedr   r   r   r
     s   	
zMXScaleTensor._from_floatrU   )NN)rV   rW   rX   r   r    r,   r
   r   r   r   r   rY      s
    

rY   )__doc__r	   r   rY   r   r   r   r   <module>   s
     ^