a
    kº”hÙ-  ã                   @   s,   d Z ddlZG dd„ dƒZG dd„ dƒZdS )aN  
Helper classes for working with low precision floating point types that
align with the opencompute (OCP) microscaling (MX) specification.
  * MXFP4Tensor: 4-bit E2M1 floating point data
  * MXScaleTensor: 8-bit E8M0 floating point data
Reference: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
é    Nc                   @   s>   e Zd Zddd„Zdd„ Zdd„ Zdd	„ Zd
d„ Zdd„ ZdS )ÚMXFP4TensorNc                 C   sd   || _ |dur8t|tjƒs"J dƒ‚|j | _ |  |¡| _n(|durXt|tƒrN|n|f| _ntdƒ‚dS )at  
        Tensor class for working with four bit E2M1 floating point data as defined by the
        opencompute microscaling specification.


        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp4e2m1 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        Nú%Parameter data must be a torch tensorú.Either parameter data or size must be provided©	ÚdeviceÚ
isinstanceÚtorchZTensorÚ_from_floatÚdataÚtupleÚsizeÚ
ValueError©Úselfr
   r   r   © r   ú?/var/www/auris/lib/python3.9/site-packages/triton/tools/mxfp.pyÚ__init__   s    zMXFP4Tensor.__init__c                 C   sp   t jdd| jt j| jd}t jdd| jt j| jd}t jdd| jt j| jd}|d> |d> B |B  t j¡| _| S )Nr   é   ©r   Údtyper   é   é   é   )r   Úrandintr   Úuint8r   Útyper
   )r   ÚSÚEÚMr   r   r   Úrandom#   s
    zMXFP4Tensor.randomc                 C   s
  |t jksJ dƒ‚| j}|d? d@  |¡}|d? d@  |¡}|d@  |¡}t  |¡}|dk|dk@ }| }| ¡ ræ|| }	|| }
|| }t  d|	¡}t  |
dk|
|
d ¡}t  |
dk|d d|d  ¡}|t  d|¡ | }|||< |||dk@   d9  < | t j¡S )	zŠ
        Convert fp4e2m1 data to float32.

        Returns:
        - A torch tensor of type dtype representing the fp4e2m1 data.
        zCCurrently only float32 is supported for fp4e2m1 to float conversionr   r   r   éÿÿÿÿç      à?ç      ð?r   )r   Úfloat32r
   r   Z
zeros_likeÚanyÚpowÚwhere)r   r   r
   r   r   r   ÚvalueÚis_zeroZnon_zero_maskZS_nzZE_nzZM_nzÚsignÚexponentZmantissaZvalue_nzr   r   r   Úto+   s&    
zMXFP4Tensor.toc                 C   sV  t  |¡ t j¡}t  |¡}|dk}t  |¡t  |¡B }t jg d¢t j| jd}t jddgt j| jd}g }g }	g }
|D ]š}|dkrÆd}|D ]6}|d }|d|  }| 	|¡ |	 	|¡ |
 	|¡ qŒqx| 
¡ d }|D ]:}d|d  }|d|  }| 	|¡ |	 	|¡ |
 	|¡ qÖqxt j|t j| jd}t j|	t j| jd}	t j|
t j| jd}
| d¡}|jd }| d¡}| ¡  
¡ }||| d¡< t  || d¡ ¡}t j|dd	d
\}}||k}| ¡ dkrô|
 d¡ |d¡}|dk t j¡}||d  }t j|dd}|	| }|
| }| |j¡}| |j¡}d||< d||< |d> |d> B |B  t j¡S )a5  
        Convert float32 numbers to mxf4 e2m1 format.
        * No encodings are reserved for Inf or NaN in mxf4.
        * Conversion from float supports roundTiesToEven rounding mode.
        * If a value exceeds the mxf4 representable range after rounding,
          clamps to the maximum mxf4 magnitude, preserving the sign.
        * If a value has magnitude less than the minimum subnormal magnitude
          in mxf4 after rounding, converts to zero.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to fp4 format.
        r   )r   r   r   r   ©r   r   r   r!   r   r"   r    T)ÚdimZkeepdimgíµ ÷Æ°>©r-   r   )r   Zsignbitr   r   ÚabsÚisnanÚisinfÚtensorr   ÚappendÚitemr#   ÚviewÚshapeZ	unsqueezeÚmaxÚminÚsumÚexpandÚint32Zargmin)r   Úvaluesr   Z
abs_valuesr(   Ú
is_invalidZE_bitsZM_bitsZcandidate_valuesZcandidate_EZcandidate_Mr   r*   r   Zsignificandr'   Ú
candidatesZabs_values_flatÚNZabs_values_expandedZmax_candidate_valueÚerrorsZ
min_errorsÚ_Zis_tieZM_bits_expandedZtie_breakerZbest_indicesZ
E_selectedZ
M_selectedr   r   r   r	   N   s`    







zMXFP4Tensor._from_floatc                 C   sè   | j }d|  kr|jk s&n J dƒ‚| |¡}|d d }|d dkrŒdgd|j  }|j| d d d }d||< tjjj||ddd}t|jƒ}|||< | 	|d d¡ |j
|Ž }| |d d¡}| |d d¡}	|	d> |B }
|
S )a  
        Packs two e2m1 elements into a single uint8 along the specified dimension.

        Parameters:
        - dim: The dimension along which to pack the elements.

        Returns:
        - A torch tensor of dtype uint8 with two e2m1 elements packed into one uint8.
        r   zHThe dimension to pack along is not within the range of tensor dimensionsr   r   Zconstant)Úmoder'   r   )r
   Úndimr   r   ÚnnZ
functionalÚpadÚlistr6   ÚinsertÚreshapeÚselect)r   r-   r
   Zsize_along_dimZnew_size_along_dimZ	pad_sizesZ	pad_indexÚ	new_shapeÚlowÚhighÚpackedr   r   r   Úto_packed_tensor¦   s&    
ÿ


zMXFP4Tensor.to_packed_tensorc                 C   s²   |d? d@ }|d@ }t j||f|d d}t|jƒ}|d|… || d g ||d d…  }|j|Ž }	|| d dkr¦tdƒg|	j }
td|| ƒ|
|< |	t|
ƒ }	|	 t j	¡S )aÅ  
        Unpacks a tensor where two fp4 elements are packed into a single uint8.

        Parameters:
        - packed_tensor: The packed tensor
        - dim: The dimension along which the tensor was packed.
        - original_shape: The shape of the original tensor before packing.

        Returns:
        - A tensor with the original data unpacked into uint8 elements containing one
          fp4e2m1 element in the least significant bits.
        r   é   r   r.   Nr   r   )
r   ÚstackrF   r6   rH   ÚslicerC   r   r   r   )r   Zpacked_tensorr-   Zoriginal_shaperL   rK   Zstackedr6   rJ   r
   Úindicesr   r   r   Úunpack_packed_tensorÉ   s    
*
z MXFP4Tensor.unpack_packed_tensor)NNN)	Ú__name__Ú
__module__Ú__qualname__r   r   r+   r	   rN   rS   r   r   r   r   r      s   
#X#r   c                   @   s0   e Zd Zd
dd„Zddd„Zdd„ Zdd	„ ZdS )ÚMXScaleTensorNc                 C   sd   || _ |dur8t|tjƒs"J dƒ‚|j | _ |  |¡| _n(|durXt|tƒrN|n|f| _ntdƒ‚dS )a6  
        Tensor class for working with microscaling E8M0 block scale factors.

        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp8e8m0 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        Nr   r   r   r   r   r   r   r   ë   s    	zMXScaleTensor.__init__c              
   C   s–   d}|du rdnt dtt t |¡¡ƒ| ƒ}|du r:dn"tdt dtt t |¡¡ƒ| ƒƒ}||ksnJ dƒ‚tj||d | jtj| j	d}|| _
| S )zp
        Generate random E8M0 data within a specified range.
        * Excludes the NaN encoding (255).
        é   Nr   éþ   z&Low must be less than or equal to highr   r   )r7   Úintr   Úlog2r2   r8   r   r   r   r   r
   )r   rK   rL   ZbiasZmin_exponentZmax_exponentr   r   r   r   r   þ   s    *0zMXScaleTensor.randomc                 C   s^   |t jksJ dƒ‚| j |¡}|dk}| ¡ }d||< |d }t  d|¡}t j||< | |¡S )NzBCurrently only float32 is supported for f8e8m0 to float conversionéÿ   r   rX   g       @)r   r#   r
   r   Úcloner%   Únan)r   r   r
   Úis_nanÚe_biasedÚer'   r   r   r   r+     s    
zMXScaleTensor.toc           	      C   sŠ   t j|t j| jd}t  |¡t  |¡B |dkB }d||< ||  }t  t  |¡¡}|d }| t j	¡}t  
|dd¡}| t j¡|| < |S )aO  
        Convert float32 numbers to E8M0 format.
        * Values <= 0, NaNs, and Infs are converted to the NaN encoding (255).
        * Positive values are converted by computing the floor of log2(value) to get the exponent.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to E8M0 format.
        r,   r   r\   rX   rY   )r   Z
empty_liker   r   r0   r1   Úfloorr[   r   r;   Úclamp)	r   r<   Úresultr=   Zvalid_valuesra   r`   Ze_biased_intZe_biased_clampedr   r   r   r	     s    	
zMXScaleTensor._from_float)NNN)NN)rT   rU   rV   r   r   r+   r	   r   r   r   r   rW   é   s   

rW   )Ú__doc__r   r   rW   r   r   r   r   Ú<module>   s    ^