o
    Zh                     @   s   d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 ddl
mZ ddlmZ er.ddlmZ dd	lmZmZmZmZ dd
lmZ e rGd dlZeeZG dd deZdS )    N)TYPE_CHECKINGAnyDictListOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigc                
       s:  e Zd ZdZddgZdZdZdef fddZd	d
 Z	dd Z
dd Zd4ddZdee dedee fddZdddddedeeef def
ddZd eeeeef f deeeeef f fd!d"Zddddded#d$fd%d&Zd5d(d)Z	*d6ddd+eee  fd,d-Zd.d/ Zed6ded fd0d1Zd6d2d3Z  ZS )7QuantoHfQuantizerz*
    Quantizer for the quanto library
    Zquanto
accelerateTFquantization_configc                    s    t  j|fi | |   d S N)super__init__	post_init)selfr   kwargs	__class__ W/var/www/auris/lib/python3.10/site-packages/transformers/quantizers/quantizer_quanto.pyr   2   s   zQuantoHfQuantizer.__init__c                 C   s"   | j jdur| jstddS dS )z 
        Safety checker
        NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   ZactivationsZpre_quantized
ValueError)r   r   r   r   r   6   s
   zQuantoHfQuantizer.post_initc                 O   s    t  stdt stdd S )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   ImportErrorr   )r   argsr   r   r   r   validate_environment@   s   z&QuantoHfQuantizer.validate_environmentc                 C   s   |d u rddi}t d |S )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_mapr   r   r   update_device_mapJ   s   z#QuantoHfQuantizer.update_device_maptorch_dtypetorch.dtypereturnc                 C   s   |d u rt d tj}|S )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)r&   r'   torchZfloat32)r   r*   r   r   r   update_torch_dtypeT   s   
z$QuantoHfQuantizer.update_torch_dtypemissing_keysprefixc                    s   t  r	ddlm} g  | D ]*\}}t||r9|D ]}||v s)|| d| v r8|ds8|ds8 | qq fdd|D S )Nr   QModuleMixin.z.weightz.biasc                    s   g | ]}| vr|qS r   r   ).0kZnot_missing_keysr   r   
<listcomp>h   s    z9QuantoHfQuantizer.update_missing_keys.<locals>.<listcomp>)r   optimum.quantor2   Znamed_modules
isinstanceendswithappend)r   modelr/   r0   r2   namemodulemissingr   r6   r   update_missing_keysZ   s   

z%QuantoHfQuantizer.update_missing_keysr<   r   param_valueztorch.Tensor
param_name
state_dictc                 K   s   t  r	ddlm} |dd}|dd}|dur:|dur:t| }	|dkr:t|	dkr:|	dhks:|	ddhks:d	S t||\}
}t|
|rNd
|v rN|
j	 S d	S )z=
        Check if a parameter needs to be quantized.
        r   r1   r(   Nparam_devicer%   r	   ZdiskFweight)
r   r8   r2   getsetvalueslenr   r9   frozen)r   r<   rA   rB   rC   r   r2   r(   rD   Zdevice_map_valuesr>   Ztensor_namer   r   r   check_quantized_paramj   s   z'QuantoHfQuantizer.check_quantized_param
max_memoryc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   )r4   keyvalr   r   r   
<dictcomp>   s    z7QuantoHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   rL   r   r   r   adjust_max_memory   s   z#QuantoHfQuantizer.adjust_max_memorytarget_deviceztorch.devicec           
      O   s<   ddl m} ||||| t||\}}	|  d|j_dS )ze
        Create the quantized parameter by calling .freeze() after setting it to the module.
        r   )set_module_tensor_to_deviceFN)accelerate.utilsrS   r   freezerE   Zrequires_grad)
r   r<   rA   rB   rR   r"   r   rS   r>   _r   r   r   create_quantized_param   s
   z(QuantoHfQuantizer.create_quantized_paramtarget_dtypec                 C   sV   t tj dt dkr'ddlm} tj|j|j	|j
d}|| jj }|S td)Nr   z0.27.0r   )CustomDtype)int8Zfloat8Zint4Zint2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r   parse	importlibmetadatarT   rY   r-   rZ   ZFP8ZINT4ZINT2r   weightsr    )r   rX   rY   mappingr   r   r   adjust_target_dtype   s   z%QuantoHfQuantizer.adjust_target_dtypeNkeep_in_fp32_modulesc                 K   sD   ddl m} | || jj|| _||| j| jd\}}| j|j_d S )Nr   )replace_with_quanto_layers)modules_to_not_convertr   )Zintegrationsrb   Zget_modules_to_not_convertr   rc   config)r   r<   ra   r   rb   rV   r   r   r   $_process_model_before_weight_loading   s   


z6QuantoHfQuantizer._process_model_before_weight_loadingc                 K   s   |S r   r   )r   r<   r   r   r   r   #_process_model_after_weight_loading      z5QuantoHfQuantizer._process_model_after_weight_loadingc                 C      dS )NTr   )r   r<   r   r   r   is_trainable   s   zQuantoHfQuantizer.is_trainablec                 C   rh   )NFr   )r   Zsafe_serializationr   r   r   is_serializable   rg   z!QuantoHfQuantizer.is_serializable)r*   r+   r,   r+   )rX   r+   r,   r+   r   ) __name__
__module____qualname____doc__Zrequired_packagesZ requires_parameters_quantizationZrequires_calibrationr   r   r   r#   r)   r.   r   strr@   r   r   boolrK   r   intrQ   rW   r`   r   re   rf   propertyri   rj   __classcell__r   r   r   r   r   )   sV    





2



r   )r\   typingr   r   r   r   r   r   	packagingr   baser
   Zquantizers_utilsr   Zmodeling_utilsr   utilsr   r   r   r   Zutils.quantization_configr   r-   Z
get_loggerrk   r&   r   r   r   r   r   <module>   s    
