
    fTh                         S SK JrJrJr  SSKJr  \(       a  SSKJr  SSKJ	r	J
r
JrJr  SSKJr  \
" 5       (       a  S SKr\R                   " \5      r " S	 S
\5      rg)    )TYPE_CHECKINGListOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableis_vptq_availablelogging)QuantizationConfigMixinNc                      ^  \ rS rSrSrSrS/rS\4U 4S jjrS r	SS jr
 SS	S
S\\\      4S jjrSS jr\SS	\S
   4S jj5       rSS jrSrU =r$ )VptqHfQuantizer    zK
Quantizer of the VPTQ method. Enables the loading of prequantized models.
Tvptqquantization_configc                 4   > [         TU ]  " U40 UD6  Xl        g N)super__init__r   )selfr   kwargs	__class__s      ^/var/www/auris/envauris/lib/python3.13/site-packages/transformers/quantizers/quantizer_vptq.pyr   VptqHfQuantizer.__init__(   s    ,77#6     c                 l    [        5       (       d  [        S5      e[        5       (       d  [        S5      eg )NzGUsing `vptq` quantization requires Accelerate: `pip install accelerate`zEUsing `vptq` quantization requires VPTQ>=0.0.4: `pip install -U vptq`)r
   ImportErrorr   )r   argsr   s      r   validate_environment$VptqHfQuantizer.validate_environment,   s1    &((ghh ""eff #r   c                 :   Uc  [         R                  R                  5       (       a'  [         R                  n[        R                  S5        U$ SS Kn[        USS 5      nU" S5      SL a  [        S5      e[         R                  n[        R                  S5        U$ )	NzCUDA available. Assuming VPTQ inference on GPU and loading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.r   device_availabilityc                     gNF )devices    r   <lambda>4VptqHfQuantizer.update_torch_dtype.<locals>.<lambda>=   s    Z_r   cpuTzKNo GPU found. Please wait for the next release of VPTQ to use CPU inferencezVNo GPU found. Assuming VPTQ inference on CPU and loading the model in `torch.float32`.)
torchcudais_availablefloat16loggerinfor   getattrRuntimeErrorfloat32)r   torch_dtyper   r$   s       r   update_torch_dtype"VptqHfQuantizer.update_torch_dtype3   s    zz&&((#mm \  &-d4IK_&`#&u-5&'tuu#mmtur   modelr	   keep_in_fp32_modulesc                     SSK Jn  U R                  XR                  R                  U5      U l        U" UU R                  U R                  S9  U R                  UR
                  l        g)z
we don't have param like modules_to_not_convert to indicate which layers should not be quantized
because `quantization_config` include the layers that should be quantized
r   )replace_with_vptq_linear)r   modules_to_not_convertN)integrationsr;   get_modules_to_not_convertr   r<   config)r   r8   r9   r   r;   s        r   $_process_model_before_weight_loading4VptqHfQuantizer._process_model_before_weight_loadingD   s_     	<&*&E&E++BBDX'
# 	! $ 8 8#'#>#>	

 ,0+C+C(r   c                     U$ r   r'   )r   r8   r   s      r   #_process_model_after_weight_loading3VptqHfQuantizer._process_model_after_weight_loading[   s    r   c                     gr&   r'   )r   r8   s     r   is_trainableVptqHfQuantizer.is_trainable^   s    r   c                     g)NTr'   )r   safe_serializations     r   is_serializableVptqHfQuantizer.is_serializableb   s    r   )r<   r   )r5   torch.dtypereturnrL   r   )r8   r	   )__name__
__module____qualname____firstlineno____doc__requires_calibrationrequired_packagesr   r   r!   r6   r   r   strr@   rC   propertyrF   rJ   __static_attributes____classcell__)r   s   @r   r   r       s      7,C 7g( 59D D 'tCy1D. (+<"=   r   r   )typingr   r   r   baser   modeling_utilsr	   utilsr
   r   r   r   utils.quantization_configr   r,   
get_loggerrN   r0   r   r'   r   r   <module>r_      sM    1 0  0 [ [ ? 			H	%Ck Cr   