o
    Zh+                     @   s  d dl Z d dlmZmZmZ ddlmZ ddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ dd	lm Z  dd
l!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8 ddl9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB i de$de(de*de4de d e:d!e<d"e.d#e6d$e8d%e,d&e0d'e@d(e&d)eBd*e>d+e2d,e"iZCi dededed"edede
d ed!ed$ed%ed&ed#ed'ed(ed)ed*ed+ed,eiZDeEeFZGG d-d. d.ZHG d/d0 d0ZId1eJfd2d3ZKd4eJfd5d6ZLdS )7    N)DictOptionalUnion   )
AutoConfig)logging)
AqlmConfigAutoRoundConfig	AwqConfigBitNetQuantConfigBitsAndBytesConfigCompressedTensorsConfig
EetqConfigFbgemmFp8ConfigFineGrainedFP8Config
GPTQConfigHiggsConfig	HqqConfigQuantizationConfigMixinQuantizationMethodQuantoConfigQuarkConfig
SpQRConfigTorchAoConfig
VptqConfig   )HfQuantizer)AqlmHfQuantizer)AutoRoundQuantizer)AwqQuantizer)BitNetHfQuantizer)Bnb4BitHfQuantizer)Bnb8BitHfQuantizer)CompressedTensorsHfQuantizer)EetqHfQuantizer)FbgemmFp8HfQuantizer)FineGrainedFP8HfQuantizer)GptqHfQuantizer)HiggsHfQuantizer)HqqHfQuantizer)QuantoHfQuantizer)QuarkHfQuantizer)SpQRHfQuantizer)TorchAoHfQuantizer)VptqHfQuantizerZawqZbitsandbytes_4bitZbitsandbytes_8bitZgptqZaqlmZquantoZquarkZeetqZhiggsZhqqzcompressed-tensorsZ
fbgemm_fp8ZtorchaoZbitnetZvptqZspqrZfp8z
auto-roundc                   @   s.   e Zd ZdZedefddZedd ZdS )AutoQuantizationConfigz
    The Auto-HF quantization config class that takes care of automatically dispatching to the correct
    quantization config given a quantization config stored in a dictionary.
    quantization_config_dictc                 C   s   | dd }| dds| ddr"| ddrdnd}tj| }n|d u r*td|t vr>td| d	tt  t| }||S )
Nquant_methodload_in_8bitFload_in_4bit_4bit_8bitThe model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantizedUnknown quantization type, got  - supported types are: )	getr   BITS_AND_BYTES
ValueError AUTO_QUANTIZATION_CONFIG_MAPPINGkeyslistAUTO_QUANTIZER_MAPPING	from_dict)clsr0   r1   suffix
target_cls rD   K/var/www/auris/lib/python3.10/site-packages/transformers/quantizers/auto.pyr@   q   s    

z AutoQuantizationConfig.from_dictc                 K   sV   t j|fi |}t|dd d u rtd| d|j}| |}|jdi | |S )Nquantization_configz)Did not found a `quantization_config` in z2. Make sure that the model is correctly quantized.rD   )r   from_pretrainedgetattrr;   rF   r@   update)rA   pretrained_model_name_or_pathkwargsZmodel_configr0   rF   rD   rD   rE   rG      s   

z&AutoQuantizationConfig.from_pretrainedN)__name__
__module____qualname____doc__classmethodr   r@   rG   rD   rD   rD   rE   r/   k   s    r/   c                   @   sd   e Zd ZdZedeeef fddZedd Z	edee
ef dee fdd	Zed
d ZdS )AutoHfQuantizerz
     The Auto-HF quantizer class that takes care of automatically instantiating to the correct
    `HfQuantizer` given the `QuantizationConfig`.
    rF   c                 K   s|   t |tr
t|}|j}|tjkr|jr|d7 }n|d7 }|t	 vr2t
d| dtt	  t| }||fi |S )Nr5   r4   r7   r8   )
isinstancedictr/   r@   r1   r   r:   r2   r?   r=   r;   r>   )rA   rF   rK   r1   rC   rD   rD   rE   from_config   s   




zAutoHfQuantizer.from_configc                 K   s   t j|fi |}| |S )N)r/   rG   rT   )rA   rJ   rK   rF   rD   rD   rE   rG      s   
zAutoHfQuantizer.from_pretrainedquantization_config_from_argsc                 C   s   |durd}nd}t |trt |trt|}nt|}t |tttttfrK|durK|	 }|
 D ]
\}}t||| q4|dt|  d7 }|dkrTt| |S )z
        handles situations where both quantization_config from args and quantization_config from model config are present.
        NzYou passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used. z"However, loading attributes (e.g. z]) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.)rR   rS   r	   r@   r/   r   r
   r   r   Zget_loading_attributesitemssetattrr>   r=   warningswarn)rA   rF   rU   Zwarning_msgZloading_attr_dictattrvalrD   rD   rE   merge_quantization_configs   s(   	



z*AutoHfQuantizer.merge_quantization_configsc                 C   s   |  dd }|  dds|  ddr"|  ddrdnd}tj| }n|d u r*td|t vrBtd| d	tt	  d
 dS dS )Nr1   r2   Fr3   r4   r5   r6   r7   r8   z~. Hence, we will skip the quantization. To remove the warning, you can delete the quantization_config attribute in config.jsonT)
r9   r   r:   r;   r<   r=   loggerwarningr>   r?   )r0   r1   rB   rD   rD   rE   supports_quant_method   s    
z%AutoHfQuantizer.supports_quant_methodN)rL   rM   rN   rO   rP   r   r   r   rT   rG   rS   r   r]   staticmethodr`   rD   rD   rD   rE   rQ      s    

)rQ   methodc                        fdd}|S )z-Register a custom quantization configuration.c                    6    t v rtd  dt| tstd| t  < | S )NzConfig '' already registeredz*Config must extend QuantizationConfigMixin)r<   r;   
issubclassr   rA   rb   rD   rE   register_config_fn      
z8register_quantization_config.<locals>.register_config_fnrD   )rb   ri   rD   rh   rE   register_quantization_config      
rk   namec                    rc   )zRegister a custom quantizer.c                    rd   )NzQuantizer 're   z!Quantizer must extend HfQuantizer)r?   r;   rf   r   rg   rm   rD   rE   register_quantizer_fn
  rj   z1register_quantizer.<locals>.register_quantizer_fnrD   )rm   ro   rD   rn   rE   register_quantizer  rl   rp   )MrY   typingr   r   r   Zmodels.auto.configuration_autor   utilsr   Zutils.quantization_configr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   baser   Zquantizer_aqlmr   Zquantizer_auto_roundr   Zquantizer_awqr   Zquantizer_bitnetr    Zquantizer_bnb_4bitr!   Zquantizer_bnb_8bitr"   Zquantizer_compressed_tensorsr#   Zquantizer_eetqr$   Zquantizer_fbgemm_fp8r%   Zquantizer_finegrained_fp8r&   Zquantizer_gptqr'   Zquantizer_higgsr(   Zquantizer_hqqr)   Zquantizer_quantor*   Zquantizer_quarkr+   Zquantizer_spqrr,   Zquantizer_torchaor-   Zquantizer_vptqr.   r?   r<   Z
get_loggerrL   r^   r/   rQ   strrk   rp   rD   rD   rD   rE   <module>   s   T	
	

)c