o
    Zh0                     @   s   d dl mZmZmZmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZ ddlmZ er2ddlmZ e r;d d	lmZ e
 rBd d
lZeeZdd ZG dd deZd
S )    )TYPE_CHECKINGAnyDictList   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 C   s.   | dd d }| }|D ]}|j| }q|S )N.)splitZ_modules)modelnameZmodule_treeparentm r   T/var/www/auris/lib/python3.10/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parent%   s
   r   c                       s  e Zd ZdZdZdZdZdgZ fddZdd Z	d	d
de
e dede
e fddZd	d
de
e de
e de
e fddZd	d
dddedeeef def
ddZd	d
dddedddeeef de
e fddZdd  Z			
d*d!d"Zd*d#d$Zd+d&d'Zedefd(d)Z  ZS ),HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    The actual quantization and offloading to the GPU is done in check_quantized_param().
    FTZhqqc                    s$   t  j|fi | d | _d| _d S )NF)super__init__torch_dtypeusing_multi_gpu)selfquantization_configkwargs	__class__r   r   r   9   s   
zHqqHfQuantizer.__init__c                 O   s   t  std|dds|ddrtdtj s td| jd u r8d|v r/|d | _n	tj	| _t
d |d	d }t|tr`d
| v sOd| v rStdtt| dk| _d S d S )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.Zfrom_tfFZ	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.r   zUSetting torch_dtype to torch.float32 as the default value since it was not specified.
device_mapcpuZdiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r   )r	   ImportErrorget
ValueErrortorchcudaZis_availableRuntimeErrorr   Zfloat32loggerinfo
isinstancedictvalueslensetr   )r    argsr"   r%   r   r   r   validate_environment>   s.   



z#HqqHfQuantizer.validate_environmentr   r   missing_keysprefixreturnc                 K   s   | j r
dd |D S |S )Nc                 S   s   g | ]}d |vr|qS )weightr   ).0keyr   r   r   
<listcomp>b       z6HqqHfQuantizer.update_missing_keys.<locals>.<listcomp>)pre_quantized)r    r   r6   r7   r"   r   r   r   update_missing_keys^   s   z"HqqHfQuantizer.update_missing_keysexpected_keysloaded_keysc                    sH  | j s|S  fdd t|}t rddlm} | D ]\}}||_qt } || t }	|D ]|jjd D ]}
|
v rD|		 q9q1||	8 }|d d t
jdd dh }t }|D ]tfd	d
|D ro|	 q]||8 }|D ])d |v r|	d  n|fdd|D  d |v r|	d  qvt|S )Nc                    s:   |   D ]\}}t|tjjr||j  || qd S N)Znamed_childrenr/   r*   nnLinearaddr   )r   Zlayersr   module)_find_hqq_quantizable_layersr   r   rG   n   s
   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersr   	HQQLinearskip_modulesr&   Zlinear_layerquant_configcompute_dtypedevicebiasc                 3   s    | ]}| v V  qd S rB   r   )r:   _module)r;   r   r   	<genexpr>   s    z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>z.weightc                    s   h | ]} d  | qS )r   r   )r:   Z_ref_key)rP   r   r   	<setcomp>   r=   z6HqqHfQuantizer.update_expected_keys.<locals>.<setcomp>z.bias)r>   r3   r	   hqq.core.quantizerI   Znamed_modulesr   configr!   rE   r*   Zfloat16Zstate_dict_keysanyupdatelist)r    r   r@   rA   Znew_keysrI   r   rF   Z_valid_modulesZ_skipped_modulesZ_skip_moduleZ	_ref_keysZ_rm_keysr   )rG   rP   r;   r   update_expected_keysg   sJ   



z#HqqHfQuantizer.update_expected_keysparam_valueztorch.Tensor
param_name
state_dictc           	      K   sn   t  r	ddlm} t||\}}| jr#t|tjjst||o"|dkS t|tjjr.|dkp6t||o6|dkS )Nr   rH   r9   rO   )	r	   rS   rI   r   r>   r/   r*   rC   rD   )	r    r   rY   rZ   r[   r"   rI   rF   tensor_namer   r   r   check_quantized_param   s    z$HqqHfQuantizer.check_quantized_paramtarget_deviceztorch.deviceunexpected_keysc                 C   sX  t  r	ddlm} t||\}}	d|ddd }
t||
}|
dd }|	dkr.dS i }| D ] \}}|
d |v rT|||dd < |durT||v rT|| q4| j	rt
||r_dS |dd| j|d}|| |jdurt
|jtjrtj|j|_| jr| |}t||| |`~tj  dS |D ]}t||tj||  q|jjd }|jjd	 }d|jdd
d }d}d|v r|}n||v r|| }|D ]}||jv rd} nq|dur|||| j|dd}|jdurt
|jtjrtj|j|_| jr| |}t||| n|j| j|d}t||| tj  dS )a  
        Each nn.Linear layer is processed here.
        We first check if the corresponding module state_dict contains already HQQ quantized parameters.
        If not, we create a temp linear layer with the module state_dict params and use it for quantization
        r   rH   r   Nr   rO   rK   rL   rJ   Zweight_quant_paramsT)rL   rM   rN   Zdel_orig)ZdtyperN   )r	   rS   rI   r   joinr   r   itemsremover>   r/   r   Zload_state_dictrO   r*   ZTensorrC   	Parameterr   _patch_layer_for_multigpusetattr__dict__r+   Zempty_cacherT   r!   r   to)r    r   rY   rZ   r^   r[   r_   rI   rF   r\   Z
layer_nameparent_modulenodeZmodule_state_dictkv	hqq_layerr;   rL   rJ   Z
module_tagZmodule_quant_configZskip_moduler   r   r   create_quantized_param   s   








z%HqqHfQuantizer.create_quantized_paramc                    s$   t dd   fdd_S )Nc                 S   s4   t || j|   }| jd ur|| j7 }|S rB   )r*   matmulrh   rN   Z
dequantizetrO   )r    xoutr   r   r   forward_with_device#  s   

zEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_devicec                    s
    | S rB   r   )rq   rs   rm   r   r   <lambda>)  s   
 z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>)r   forward)r    rm   r   rt   r   re      s   z(HqqHfQuantizer._patch_layer_for_multigpuc                 K   s   t || jd}d S )N)r!   )r   r!   r    r   r"   r   r   r   $_process_model_before_weight_loading,  s   z3HqqHfQuantizer._process_model_before_weight_loadingc                 K   s   d|_ |  |_|S NT)Zis_hqq_quantizedis_serializableZis_hqq_serializablerw   r   r   r   #_process_model_after_weight_loading5  s   
z2HqqHfQuantizer._process_model_after_weight_loadingNc                 C      dS ry   r   )r    Zsafe_serializationr   r   r   rz   :  s   zHqqHfQuantizer.is_serializablec                 C   r|   ry   r   )r    r   r   r   is_trainable=  s   zHqqHfQuantizer.is_trainable)r   r   rB   )__name__
__module____qualname____doc__Zuse_keep_in_fp32_modulesZ requires_parameters_quantizationZrequires_calibrationZrequired_packagesr   r5   r   strr?   rX   r   r   boolr]   rn   re   rx   r{   rz   propertyr}   __classcell__r   r   r#   r   r   -   st     
	
9



i

	
r   )typingr   r   r   r   Zintegrationsr   utilsr   r	   r
   r   baser   Zquantizers_utilsr   Zmodeling_utilsr   Zaccelerate.hooksr   r*   Z
get_loggerr~   r-   r   r   r   r   r   r   <module>   s   
