
    fTh                         S SK r S SKJrJrJrJrJrJr  S SKJ	r	  SSK
Jr  SSKJr  \(       a  SSKJr  SS	KJrJrJrJr  SS
KJr  \" 5       (       a  S SKr\R0                  " \5      r " S S\5      rg)    N)TYPE_CHECKINGAnyDictListOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigc            
       ^  ^  \ rS rSrSrSS/rSrSrS\4U 4S jjr	S	 r
S
 rS rS$S jrS\\   S\S\\   4S jrSSSSS\S\\\4   S\4
S jrS\\\\\4   4   S\\\\\4   4   4S jrSSSSS\SS4S jrS%S jr S&SSS\\\      4S jjrS  r\S&S\S   4S! jj5       rS&S" jrS#r U =r!$ )'QuantoHfQuantizer)   z"
Quantizer for the quanto library
quanto
accelerateTFquantization_configc                 H   > [         TU ]  " U40 UD6  U R                  5         g N)super__init__	post_init)selfr   kwargs	__class__s      `/var/www/auris/envauris/lib/python3.13/site-packages/transformers/quantizers/quantizer_quanto.pyr   QuantoHfQuantizer.__init__2   s     ,77    c                 l    U R                   R                  b  U R                  (       d  [        S5      egg)z
Safety checker
NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   activationspre_quantized
ValueError)r   s    r"   r   QuantoHfQuantizer.post_init6   s;     ##//;DDVDVO  EW;r$   c                 l    [        5       (       d  [        S5      e[        5       (       d  [        S5      eg )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   ImportErrorr   )r   argsr    s      r"   validate_environment&QuantoHfQuantizer.validate_environment@   s;    *,,z  '((r  )r$   c                 >    Uc  SS0n[         R                  S5        U$ )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_maps     r"   update_device_map#QuantoHfQuantizer.update_device_mapJ   s+    eJKK\
 r$   returnc                 V    Uc%  [         R                  S5        [        R                  nU$ )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)r2   r3   torchfloat32)r   torch_dtypes     r"   update_torch_dtype$QuantoHfQuantizer.update_torch_dtypeT   s$    KKpq--Kr$   missing_keysprefixc                 |   [        5       (       a  SSKJn  / nUR                  5        Hs  u  pg[	        UW5      (       d  M  U HU  nXh;   d  Xc SU 3;   d  M  UR                  S5      (       a  M,  UR                  S5      (       a  MD  UR                  U5        MW     Mu     U V	s/ s H  oU;  d  M
  U	PM     sn	$ s  sn	f )Nr   QModuleMixin.z.weightz.bias)r   optimum.quantorB   named_modules
isinstanceendswithappend)
r   modelr>   r?   rB   not_missing_keysnamemodulemissingks
             r"   update_missing_keys%QuantoHfQuantizer.update_missing_keysZ   s    &((3!//1LD&,//+GDhay4I,I ' 0 0 ; ; ' 0 0 9 9(//8  , 2 (E<a4D+D<EEEs   #	B90B9rI   r   param_valueztorch.Tensor
param_name
state_dictc                 h   [        5       (       a  SSKJn  UR                  SS5      nUR                  SS5      nUbA  Ub>  [	        UR                  5       5      n	US:X  a  [        U	5      S:  a  U	S1:X  d	  U	SS1:X  d  g	[        X5      u  p[        U
W5      (       a  S
U;   a  U
R                  (       + $ g	)z-
Check if a parameter needs to be quantized.
r   rA   r4   Nparam_devicer1   r
   diskFweight)
r   rD   rB   getsetvalueslenr   rF   frozen)r   rI   rQ   rR   rS   r    rB   r4   rU   device_map_valuesrL   tensor_names               r"   check_quantized_param'QuantoHfQuantizer.check_quantized_paramj   s     '((3ZZd3
zz.$7!l&> #J$5$5$7 8u$->)?!)C)eW48IeU[_8\ 25Efl++K0G}}$$r$   
max_memoryc                 `    UR                  5        VVs0 s H
  u  p#X#S-  _M     nnnU$ s  snnf )Ng?)items)r   ra   keyvals       r"   adjust_max_memory#QuantoHfQuantizer.adjust_max_memory   s5    6@6F6F6HI6H(#c:o6H
I Js   *target_deviceztorch.devicec                 ~    SSK Jn  U" XXB5        [        X5      u  pUR                  5         SUR                  l        g)zU
Create the quantized parameter by calling .freeze() after setting it to the module.
r   )set_module_tensor_to_deviceFN)accelerate.utilsrj   r   freezerW   requires_grad)
r   rI   rQ   rR   rh   r,   r    rj   rL   _s
             r"   create_quantized_param(QuantoHfQuantizer.create_quantized_param   s3     	A#E}R(;	&+#r$   c                 T   [         R                  " [        R                  R                  S5      5      [         R                  " S5      :  aS  SSKJn  [        R                  UR                  UR                  UR                  S.nX0R                  R                     nU$ [        S5      e)Nr   z0.27.0r   )CustomDtype)int8float8int4int2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r	   parse	importlibmetadatark   rr   r9   rs   FP8INT4INT2r   weightsr(   )r   target_dtyperr   mappings       r"   adjust_target_dtype%QuantoHfQuantizer.adjust_target_dtype   s    ==++33LABW]]S[E\\4 

%//#((#((	G ##;#;#C#CDLP r$   keep_in_fp32_modulesc                     SSK Jn  U R                  XR                  R                  U5      U l        U" XR                  U R                  S9u  pU R                  UR
                  l        g )Nr   )replace_with_quanto_layers)modules_to_not_convertr   )integrationsr   get_modules_to_not_convertr   r   config)r   rI   r   r    r   rn   s         r"   $_process_model_before_weight_loading6QuantoHfQuantizer._process_model_before_weight_loading   s`     	>&*&E&E++BBDX'
# .*E*E[_[s[s
 ,0+C+C(r$   c                     U$ r    )r   rI   r    s      r"   #_process_model_after_weight_loading5QuantoHfQuantizer._process_model_after_weight_loading   s    r$   c                     g)NTr   )r   rI   s     r"   is_trainableQuantoHfQuantizer.is_trainable   s    r$   c                     g)NFr   )r   safe_serializations     r"   is_serializable!QuantoHfQuantizer.is_serializable   s    r$   )r   )r;   torch.dtyper7   r   )r~   r   r7   r   r   )"__name__
__module____qualname____firstlineno____doc__required_packages requires_parameters_quantizationrequires_calibrationr   r   r   r-   r5   r<   r   strrO   r   r   boolr_   r   intrf   ro   r   r   r   r   propertyr   r   __static_attributes____classcell__)r!   s   @r"   r   r   )   sl    "<0'+$ L FtCy F# FRVWZR[ F   $ 	
 cN 
>DeCHo1E,F 4PSUZ[^`c[cUdPdKe , , $, 	,
 &,&( UYD&D>FtCy>QD (+<"=   r$   r   )rx   typingr   r   r   r   r   r   	packagingr	   baser   quantizers_utilsr   modeling_utilsr   utilsr   r   r   r   utils.quantization_configr   r9   
get_loggerr   r2   r   r   r$   r"   <module>r      s\     B B   2 0  5 			H	%` `r$   