o
    Zh2                     @   st   d dl Z d dlZd dlmZ ddlmZmZmZ ddlm	Z	 ddl
mZ e r+d dlZeeZG dd	 d	eZdS )
    N)List   )is_compressed_tensors_availableis_torch_availablelogging)CompressedTensorsConfig   )HfQuantizerc                       s   e Zd ZdZdZdgZdef fddZdee	 de	d	ee	 fd
dZ
dee	 de	d	ee	 fddZdd Zd"ddZdd Zdd Zdd Zedd Zd	efddZd#d	efd d!Z  ZS )$CompressedTensorsHfQuantizerz
    Quantizer for the compressed_tensors package.  Loads and restores models to
    quantized state with compressed_tensors
    TZcompressed_tensorsquantization_configc                    sT   t  j|fi | t std|  ddlm} ||| _|j	| _	|| _
d S )NuUsing `compressed_tensors` quantized models requires the compressed-tensors library: `pip install compressed-tensors`r   )ModelCompressor)super__init__r   ImportErrorZ	post_initZcompressed_tensors.compressorsr   Zfrom_compression_config
compressorrun_compressedr   )selfr   kwargsr   	__class__ c/var/www/auris/lib/python3.10/site-packages/transformers/quantizers/quantizer_compressed_tensors.pyr   (   s   
z%CompressedTensorsHfQuantizer.__init__missing_keysprefixreturnc                    (   | j r|S | j|  fdd|D S )a}  
        Update missing keys after loading the model. This is necessary for compressed tensors
        to load the model correctly. We expect weights to be present in missing keys.
        The weight's are re-constructed by ModelCompressor in _process_model_after_weight_loading

        This function cleans up expected missing keys and returns the remaining missing keys
        c                    &   g | ] t  fd dD s qS )c                 3   "    | ]}t d |  V  qdS z.*Nrematch.0patternkeyr   r   	<genexpr>N        z\CompressedTensorsHfQuantizer.update_missing_keys_after_loading.<locals>.<listcomp>.<genexpr>anyr$   Zexpected_missing_keysr&   r   
<listcomp>M   s
    zRCompressedTensorsHfQuantizer.update_missing_keys_after_loading.<locals>.<listcomp>)r   r   Zget_missing_module_keys)r   modelr   r   r   r-   r   !update_missing_keys_after_loading;   s   	
z>CompressedTensorsHfQuantizer.update_missing_keys_after_loadingunexpected_keysc                    r   )z
        Override this method if you want to adjust the `unexpected_keys`.

        Args:
            unexpected_keys (`List[str]`, *optional*):
                The list of unexpected keys in the checkpoint compared to the state dict of the model
        c                    r   )c                 3   r   r   r    r#   r&   r   r   r(   `   r)   zQCompressedTensorsHfQuantizer.update_unexpected_keys.<locals>.<listcomp>.<genexpr>r*   r,   Zkeys_to_ignorer&   r   r.   `   s   & zGCompressedTensorsHfQuantizer.update_unexpected_keys.<locals>.<listcomp>)r   r   Zget_unexpected_file_keys)r   r/   r1   r   r   r2   r   update_unexpected_keysQ   s   	z3CompressedTensorsHfQuantizer.update_unexpected_keysc                 O   s    t  stdt stdd S )Nr   z;torch is required for using compressed-tensors quantization)r   r   r   )r   argsr   r   r   r   validate_environmentb   s   z1CompressedTensorsHfQuantizer.validate_environmenttorch_dtypetorch.dtypec                 C   s4   |d u rt d tj}|S |tjkrt d |S )NzELoading model using torch.float16 for compressed-tensors quantizationz`We suggest you to set `torch_dtype=torch.float16` for better efficiency with compressed_tensors.)loggerinfotorchZfloat16)r   r6   r   r   r   update_torch_dtypel   s   

z/CompressedTensorsHfQuantizer.update_torch_dtypec                 K   sF   ddl m} | jj}| jr|||dd d S | jjs!||| d S d S )Nr   )apply_quantization_configT)r   )compressed_tensors.quantizationr<   r   r   r   is_quantization_compressed)r   r/   r   r<   Zct_quantization_configr   r   r   $_process_model_before_weight_loadingv   s   zACompressedTensorsHfQuantizer._process_model_before_weight_loadingc                 K   s   | j jr| jr| j jrR|dd}|j}tj|s5ddl	m
} ||d}tjj|tjjdd }| j jrH| jsHddlm} |j| jj _| jj||d dS dS )	z3Decompress loaded model if necessary - need for qatconfigNr   )cached_filezconfig.json)QuantizationStatus)Z
model_pathr/   )r   r>   r   Zis_sparsification_compressedgetZ_name_or_pathospathexistsZtransformers.utilsrA   sepjoinsplitr=   rC   FROZENr   Zquantization_status
decompress)r   r/   r   r@   
cache_pathrA   Zconfig_file_pathrC   r   r   r   #_process_model_after_weight_loading   s"   
 z@CompressedTensorsHfQuantizer._process_model_after_weight_loadingc                 C   s>   dddddd}|  d ur|  jd ur|  j| |S )NZlocal_colwiseZlocal_rowwise)z0layers.*.feed_forward.experts.*.gate_proj.weightz6layers.*.feed_forward.experts.*.gate_proj.weight_scalez.layers.*.feed_forward.experts.*.up_proj.weightz4layers.*.feed_forward.experts.*.up_proj.weight_scalez0layers.*.feed_forward.experts.*.down_proj.weight)Zget_text_configZbase_model_tp_planupdate)r   r@   Zadditional_planr   r   r   update_tp_plan   s   z+CompressedTensorsHfQuantizer.update_tp_planc                 C      dS )NTr   r   r   r   r   is_trainable      z)CompressedTensorsHfQuantizer.is_trainablec                 C   s   | j  p| jj S )z7Loaded Models can carry out quantization aware training)r   r   r>   rR   r   r   r   is_qat_trainable   s   z-CompressedTensorsHfQuantizer.is_qat_trainableNc                 C   rQ   )z>Models quantized using compressed tensors can be saved to diskTr   )r   Zsafe_serializationr   r   r   is_serializable   rT   z,CompressedTensorsHfQuantizer.is_serializable)r6   r7   r   r7   )N)__name__
__module____qualname____doc__Zrequires_calibrationZrequired_packagesr   r   r   strr0   r3   r5   r;   r?   rN   rP   propertyrS   boolrU   rV   __classcell__r   r   r   r   r
      s     




r
   )rE   r!   typingr   utilsr   r   r   Zutils.quantization_configr   baser	   r:   Z
get_loggerrW   r8   r
   r   r   r   r   <module>   s   
