
    fTh9                         S SK JrJrJrJrJr  SSKJr  \(       a  SSKJ	r	  SSK
JrJrJrJr  SSKJr  \" 5       (       a  S SKr\R$                  " \5      r " S	 S
\5      rg)    )TYPE_CHECKINGAnyDictListOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_eetq_availableis_torch_availablelogging)get_module_from_nameNc                      ^  \ rS rSrSrSrSrSS/rU 4S jrS r	SS
 jr
SSSSS\S\\\4   4S jr SSSSSS\SSS\\\4   S\\\      4S jjrSS jr SSSS\\\      4S jjrSS jr\S	\4S j5       rSrU =r$ )EetqHfQuantizer!   a  
8-bit quantization from EETQ quantization method:
    before loading: converts transformer layers into W8A16Linear during loading: load 16bit weight and pass to the
    layer object after: quantizes individual weights in Linear8bitLt into 8bit at first .cuda() call
TFeetq
acceleratec                 4   > [         TU ]  " U40 UD6  Xl        g N)super__init__quantization_config)selfr   kwargs	__class__s      ^/var/www/auris/envauris/lib/python3.13/site-packages/transformers/quantizers/quantizer_eetq.pyr   EetqHfQuantizer.__init__-   s    ,77#6     c                    [        5       (       d  [        S5      e SS Kn[	        5       (       d  [        S5      eUR                  SS5      (       d  UR                  SS5      (       a  [        S	5      e[        R                  R                  5       (       d  [        S
5      eUR                  SS 5      nUc  [        R                  S5        g UbJ  [        U[        5      (       a4  SUR                  5       ;   d  SUR                  5       ;   a  [        S5      eg g g ! [         a!  nS[        U5      ;   a  [        S5      Uee S nAff = f)NzUsing `eetq` 8-bit quantization requires eetq.Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQr   shard_checkpointzYou are using a version of EETQ that is incompatible with the current transformers version. Either downgrade transformers to <= v4.46.3 or, if available, upgrade EETQ to > v1.0.0.zNLoading an EETQ quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxzConverting into 8-bit weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.
device_mapzYou have loaded an EETQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load an EETQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrorr   strr   get
ValueErrortorchcudais_availableRuntimeErrorloggerwarning_once
isinstancedictvalues)r   argsr   r   excr%   s         r   validate_environment$EetqHfQuantizer.validate_environment1   sQ    ""h 
	 '((noo::i''6::k5+I+I; 
 zz&&((PQQZZd3
I #*d++*:K:K:M1MQW[e[l[l[nQn h  Ro+ $=  
	!SX- "n 
 
	s   D 
E$E  Ereturnc                     Uc(  [         R                  n[        R                  SU5        U$ U[         R                  :w  a  [        R                  S5        U$ )Na  Overriding torch_dtype=%s with `torch_dtype=torch.float16` due to requirements of `eetq` to enable model loading in 8-bit. Pass your own torch_dtype to specify the dtype of the remaining non-linear layers or pass torch_dtype=torch.float16 to remove this warning.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with EETQ.)r,   float16r0   info)r   torch_dtypes     r   update_torch_dtype"EetqHfQuantizer.update_torch_dtype_   sQ    --KKKE   EMM)KKlmr    modelr   param_valueztorch.Tensor
param_name
state_dictc                     SSK Jn  [        X5      u  px[        Xv5      (       aY  U R                  (       d  US:X  a0  US:X  a)  UR
                  [        R                  :w  a  [        S5      egUS:X  a  [        S5      eg	g)
Nr   )
EetqLinearbiasweightz6Expect quantized weights but got an unquantized weightFweight_scalez;Expect unquantized weights but got a quantized weight_scaleT)	r   rE   r   r2   pre_quantizeddtyper,   int8r+   )	r   r@   rA   rB   rC   r   rE   moduletensor_names	            r   check_quantized_param%EetqHfQuantizer.check_quantized_paramm   sq     	$25Ef))!![F%:(*{/@/@EJJ/N$%]^^.0$%bccr    target_deviceztorch.deviceunexpected_keysc                     SSK Jn  [        X5      u  pU" U5      u  pU
R                  U5      UR                  U	'   UR                  SUR                  U5      5        g)z2
quantizes weights into qweight and weight_scales
r   )quantize_and_preprocess_weightsweight_scalesN)r   rS   r   to_buffersregister)r   r@   rA   rB   rP   rC   rQ   rS   rL   rM   	new_valuerH   s               r   create_quantized_param&EetqHfQuantizer.create_quantized_param   sO     	925E"A+"N	'0||M'B$)GHr    c                     U$ r    )r   r@   r   s      r   #_process_model_after_weight_loading3EetqHfQuantizer._process_model_after_weight_loading   s    r    keep_in_fp32_modulesc                     SSK Jn  U R                  XR                  R                  U5      U l        U" UU R                  U R                  U R
                  S9nU R                  UR                  l        g )Nr
   )replace_with_eetq_linear)modules_to_not_convertr   rI   )integrationsra   get_modules_to_not_convertr   rb   rI   config)r   r@   r_   r   ra   s        r   $_process_model_before_weight_loading4EetqHfQuantizer._process_model_before_weight_loading   sj     	<&*&E&E++BBDX'
# )#'#>#> $ 8 8,,	
 ,0+C+C(r    c                     gNTr\   )r   safe_serializations     r   is_serializableEetqHfQuantizer.is_serializable   s    r    c                     gri   r\   )r   s    r   is_trainableEetqHfQuantizer.is_trainable   s    r    )rb   r   )r=   torch.dtyper9   rp   r   )r@   r   )__name__
__module____qualname____firstlineno____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r7   r>   r)   r   r   rN   r   r   rY   r]   rf   rk   propertyboolrn   __static_attributes____classcell__)r   s   @r   r   r   !   s    (,$ .7,\  $ 	
 cN< 04I I $I 	I
 &I cNI "$s),I( 59D D 'tCy1D* d  r    r   )typingr   r   r   r   r   baser	   modeling_utilsr   utilsr   r   r   r   quantizers_utilsr   r,   
get_loggerrq   r0   r   r\   r    r   <module>r      sO    < ;  0 [ [ 2  
		H	%Tk Tr    