
    fTh.                         S SK r S SKJrJr  S SKJr  SSKJr  \(       a  SSKJ	r	  SSK
JrJrJrJrJr  SS	KJrJr  \" 5       (       a  S SKr\R(                  " \5      r " S
 S\5      rg)    N)TYPE_CHECKINGOptional)version   )HfQuantizer   )PreTrainedModel)is_auto_gptq_availableis_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                      ^  \ rS rSrSrSr/ SQrSrS\4U 4S jjr	S r
SS	 jrS
 rSS jrSS jr\SS\S   4S jj5       rSS jrSrU =r$ )GptqHfQuantizer#   z
Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
`auto_gptq` or `gptqmodel` package. Quantization is done under the hood for users if they load a non-prequantized model.
F)optimum	auto_gptq	gptqmodelNquantization_configc                    > [         TU ]  " U40 UD6  [        5       (       d  [        S5      eSSKJn  UR                  U R                  R                  5       5      U l	        g )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       ^/var/www/auris/envauris/lib/python3.13/site-packages/transformers/quantizers/quantizer_gptq.pyr   GptqHfQuantizer.__init__-   sP    ,77#%%ghh.!.!8!89Q9Q9a9a9c!d    c                 ^   [        5       (       d  [        S5      e[        5       (       a$  [        5       (       a  [        R                  S5        [        5       =(       aJ    [        R                  " [        R                  R                  S5      5      [        R                  " S5      :  =(       d
    [        5       nU(       d.  [        R                  R                  5       (       d  [        S5      e[        5       (       d  [        5       (       d  [        S5      e[        5       (       aV  [        R                  " [        R                  R                  S5      5      [        R                  " S5      :  a  [        S5      e[        5       (       a  [        R                  " [        R                  R                  S	5      5      [        R                  " S
5      :  dK  [        R                  " [        R                  R                  S5      5      [        R                  " S5      :  a  [        S5      eg g )Nr   z4Detected gptqmodel and auto-gptq, will use gptqmodelz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.z|Loading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) or auto-gptq (`pip install auto-gptq`) library. r   zYou need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq` or use gptqmodel by `pip install gptqmodel>=1.4.3`.r   z1.4.3r   1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r   r   r
   r   loggerwarningr   parse	importlibmetadatatorchcudais_availableRuntimeError)r"   argsr#   gptq_supports_cpus       r%   validate_environment$GptqHfQuantizer.validate_environment6   s   #%%ghh!##(>(@(@NNQR #$ `i0088EFW^I__& $% 	 !)@)@)B)BSTT(**.D.F.F O  $%%'--	8J8J8R8RS^8_*`cjcpcpd
 +
  ^  $%%MM),,44[ABW]]SZE[[}}Y//77	BCgmmT]F^^jkk _ &r'   c                     Uc'  [         R                  n[        R                  S5        U$ U[         R                  :w  a  [        R                  S5        U$ )NzRLoading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with GPTQ.)r/   float16r*   info)r"   torch_dtypes     r%   update_torch_dtype"GptqHfQuantizer.update_torch_dtypeR   sG    --KKKlm  EMM)KKlmr'   c                     Uc  S[         R                  " S5      0n[        5       (       d%  USS[         R                  " S5      04;   a  USS0:H    U$ )N cpur   )r/   devicer   )r"   
device_maps     r%   update_device_map!GptqHfQuantizer.update_device_mapZ   sQ    ell512J%''J52u||TYGZB[:\,\2q'!r'   modelr	   c                 z   UR                   R                  S:w  a  [        S5      eU R                  (       a  [        R
                  " [        R                  R	                  S5      5      [        R
                  " S5      ::  a  U R                  R                  U5      ng U R                  R                  " U40 UD6ng g )N	input_idsz%We can only quantize pure text model.r   r)   )
r$   main_input_namer2   pre_quantizedr   r,   r-   r.   r!   convert_modelr"   rD   r#   s      r%   $_process_model_before_weight_loading4GptqHfQuantizer._process_model_before_weight_loadingb   s    ??**k9FGG}}Y//77	BCw}}U^G__..<<UC..<<UMfM r'   c                    U R                   (       a  U R                  R                  U5      ng U R                  R                  c  UR
                  U R                  l        U R                  R                  XR                  R                  5        [        R                  " U R                  R                  5       5      UR                  l        g N)rH   r!   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigrJ   s      r%   #_process_model_after_weight_loading3GptqHfQuantizer._process_model_after_weight_loadingm   s    **::5AE''1195:5G5G((2""11%9Q9Q9[9[\/9/C/CDDZDZDbDbDd/eELL,r'   c                     gNT )r"   rD   s     r%   is_trainableGptqHfQuantizer.is_trainablew   s    r'   c                     grX   rY   )r"   safe_serializations     r%   is_serializableGptqHfQuantizer.is_serializable{   s    r'   )r!   )r:   torch.dtypereturnr`   )rD   r	   rN   )__name__
__module____qualname____firstlineno____doc__requires_calibrationrequired_packagesr!   r   r   r5   r;   rB   rK   rU   propertyr   rZ   r^   __static_attributes____classcell__)r$   s   @r%   r   r   #   sr    
 !=e,C el8	Nf (+<"=   r'   r   )r-   typingr   r   	packagingr   baser   modeling_utilsr	   utilsr
   r   r   r   r   utils.quantization_configr   r   r/   
get_loggerrb   r*   r   rY   r'   r%   <module>rs      sP     *   0 u u K 			H	%Yk Yr'   