o
    Zh.                     @   s   d dl Z d dlmZmZ d dlmZ ddlmZ er ddlm	Z	 ddl
mZmZmZmZmZ dd	lmZmZ e r=d dlZeeZG d
d deZdS )    N)TYPE_CHECKINGOptional)version   )HfQuantizer   )PreTrainedModel)is_auto_gptq_availableis_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                       s   e Zd ZdZdZg dZdZdef fddZdd	 Z	dddZ
dd ZdddZdddZedded fddZdddZ  ZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    `auto_gptq` or `gptqmodel` package. Quantization is done under the hood for users if they load a non-prequantized model.
    F)optimum	auto_gptq	gptqmodelNquantization_configc                    sD   t  j|fi | t stdddlm} || j | _	d S )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r   ImportErrorZoptimum.gptqr   	from_dictr   Zto_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__ U/var/www/auris/lib/python3.10/site-packages/transformers/quantizers/quantizer_gptq.pyr   -   s
   zGptqHfQuantizer.__init__c                 O   s   t  stdt rt rtd t o"ttj	dtdkp%t }|s1t
j s1tdt s;t s;tdt rPttj	dtdk rPtdt rsttj	d	td
k sottj	dtdk rutdd S d S )Nr   z4Detected gptqmodel and auto-gptq, will use gptqmodelz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.z|Loading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) or auto-gptq (`pip install auto-gptq`) library. r   zYou need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq` or use gptqmodel by `pip install gptqmodel>=1.4.3`.r   z1.4.3r   1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r   r   r	   r
   loggerwarningr   parse	importlibmetadatatorchcudaZis_availableRuntimeError)r   argsr   Zgptq_supports_cpur    r    r!   validate_environment6   s6   
z$GptqHfQuantizer.validate_environmenttorch_dtypetorch.dtypereturnc                 C   s4   |d u rt j}td |S |t jkrtd |S )NzRLoading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with GPTQ.)r(   Zfloat16r#   info)r   r-   r    r    r!   update_torch_dtypeR   s   


z"GptqHfQuantizer.update_torch_dtypec                 C   sB   |d u rdt di}t s|ddt difv r|ddik |S )N cpur   )r(   Zdevicer
   )r   Z
device_mapr    r    r!   update_device_mapZ   s
   z!GptqHfQuantizer.update_device_mapmodelr   c                 K   sb   |j jdkr
td| jr/ttjdtdkr#| j	|}d S | jj	|fi |}d S d S )NZ	input_idsz%We can only quantize pure text model.r   r"   )
r   Zmain_input_namer*   pre_quantizedr   r%   r&   r'   r   Zconvert_modelr   r5   r   r    r    r!   $_process_model_before_weight_loadingb   s   z4GptqHfQuantizer._process_model_before_weight_loadingc                 K   sV   | j r| j|}d S | jjd u r|j| j_| j|| jj t| j	 |j
_d S N)r6   r   Zpost_init_modelr   Z	tokenizerZname_or_pathZquantize_modelr   r   to_dictconfigr7   r    r    r!   #_process_model_after_weight_loadingm   s   
z3GptqHfQuantizer._process_model_after_weight_loadingc                 C      dS NTr    )r   r5   r    r    r!   is_trainablew   s   zGptqHfQuantizer.is_trainablec                 C   r=   r>   r    )r   Zsafe_serializationr    r    r!   is_serializable{   s   zGptqHfQuantizer.is_serializable)r-   r.   r/   r.   )r5   r   r9   )__name__
__module____qualname____doc__Zrequires_calibrationZrequired_packagesr   r   r   r,   r1   r4   r8   r<   propertyr   r?   r@   __classcell__r    r    r   r!   r   #   s    	



r   )r&   typingr   r   	packagingr   baser   Zmodeling_utilsr   utilsr	   r
   r   r   r   Zutils.quantization_configr   r   r(   Z
get_loggerrA   r#   r   r    r    r    r!   <module>   s   
