o
    Zh                     @   s   d dl mZmZmZ ddlmZ erddlmZ ddlm	Z	 ddl
mZmZmZmZ ddlmZ e r7d d	lZeeZG d
d deZd	S )    )TYPE_CHECKINGListOptional   )HfQuantizer   )PreTrainedModel)replace_with_spqr_linear)is_accelerate_availableis_spqr_availableis_torch_availablelogging)QuantizationConfigMixinNc                       s   e Zd ZdZdZdef fddZdd ZdddZ	dddde	e
e  fddZdddZedde	d fddZdddZ  ZS )SpQRHfQuantizerzS
    Quantizer of the SpQR method. Enables the loading of prequantized models.
    Tquantization_configc                    s   t  j|fi | || _d S N)super__init__r   )selfr   kwargs	__class__ U/var/www/auris/lib/python3.10/site-packages/transformers/quantizers/quantizer_spqr.pyr   (   s   
zSpQRHfQuantizer.__init__c                 O   s2   t j s	tdt stdt stdd S )Nz,GPU is required to run SpQR quantized model.zGUsing `spqr` quantization requires Accelerate: `pip install accelerate`zFUsing `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`)torchcudaZis_availableRuntimeErrorr
   ImportErrorr   )r   argsr   r   r   r   validate_environment,   s   
z$SpQRHfQuantizer.validate_environmenttorch_dtypetorch.dtypereturnc                 C   s2   |d u rt j}td |S |t jkrtd|S )NzHAssuming SpQR inference on GPU and loading the model in `torch.float16`.z|You cannot use any type other than torch.float16 for SpQR. Please either leave it None or set it totorch.float16 explicitly.)r   Zfloat16loggerinfo
ValueError)r   r    r   r   r   update_torch_dtype6   s   

z"SpQRHfQuantizer.update_torch_dtypeNmodelr   keep_in_fp32_modulesc                 K   s4   |  || jj|| _t|| j| jd | j|j_d S )N)r   modules_to_not_convert)Zget_modules_to_not_convertr   r)   r	   config)r   r'   r(   r   r   r   r   $_process_model_before_weight_loadingA   s   
z4SpQRHfQuantizer._process_model_before_weight_loadingc                 K   s   |S r   r   )r   r'   r   r   r   r   #_process_model_after_weight_loadingR      z3SpQRHfQuantizer._process_model_after_weight_loadingc                 C      dS )NFr   )r   r'   r   r   r   is_trainableU   s   zSpQRHfQuantizer.is_trainablec                 C   r.   )NTr   )r   Zsafe_serializationr   r   r   is_serializableY   r-   zSpQRHfQuantizer.is_serializable)r    r!   r"   r!   r   )r'   r   )__name__
__module____qualname____doc__Zrequires_calibrationr   r   r   r&   r   r   strr+   r,   propertyr/   r0   __classcell__r   r   r   r   r   !   s     




r   )typingr   r   r   baser   Zmodeling_utilsr   Zintegrationsr	   utilsr
   r   r   r   Zutils.quantization_configr   r   Z
get_loggerr1   r#   r   r   r   r   r   <module>   s   
