
    fTh0                         S SK JrJrJrJr  SSKJr  SSKJrJ	r	J
r
Jr  SSKJr  SSKJr  \(       a  SSKJr  \" 5       (       a  S S	KJr  \
" 5       (       a  S S
Kr\R*                  " \5      rS r " S S\5      rg
)    )TYPE_CHECKINGAnyDictList   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 b    UR                  S5      S S nU nU H  nUR                  U   nM     U$ )N.)split_modules)modelnamemodule_treeparentms        ]/var/www/auris/envauris/lib/python3.13/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parentr   %   s8    **S/#2&KF# M    c                   0  ^  \ rS rSrSrSrSrSrS/rU 4S jr	S r
SS	S
\\   S\S\\   4S jrSS	S\\   S\\   S\\   4S jrSS	SSS\S\\\4   S\4
S jrSS	SSS\SSS\\\4   S\\   4S jrS r  S S jrS S jrS!S jr\S\4S j5       rSrU =r$ )"HqqHfQuantizer-   z
HQQ quantizer base HF class.
nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
The actual quantization and offloading to the GPU is done in check_quantized_param().
FThqqc                 D   > [         TU ]  " U40 UD6  S U l        SU l        g )NF)super__init__torch_dtypeusing_multi_gpu)selfquantization_configkwargs	__class__s      r   r%   HqqHfQuantizer.__init__9   s&    ,77$r   c                    [        5       (       d  [        S5      eUR                  SS5      (       d  UR                  SS5      (       a  [        S5      e[        R
                  R                  5       (       d  [        S5      eU R                  c;  SU;   a  US   U l        O*[        R                  U l        [        R                  S5        UR                  S	S 5      n[        U[        5      (       a^  S
UR                  5       ;   d  SUR                  5       ;   a  [        S5      e[        [!        UR                  5       5      5      S:  U l        g g )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.from_tfF	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.r&   zUSetting torch_dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r   )r
   ImportErrorget
ValueErrortorchcudais_availableRuntimeErrorr&   float32loggerinfo
isinstancedictvalueslensetr'   )r(   argsr*   r0   s       r   validate_environment#HqqHfQuantizer.validate_environment>   s&    "" T  ::i''6::k5+I+I; 
 zz&&((PQQ#&#)-#8 #(== stZZd3
j$''
))++v9J9J9L/L h 
 (+3z/@/@/B+C'Dq'H$ (r   r   r   missing_keysprefixreturnc                 h    U R                   (       a  U Vs/ s H  nSU;  d  M  UPM     sn$ U$ s  snf )Nweight)pre_quantized)r(   r   rE   rF   r*   keys         r   update_missing_keys"HqqHfQuantizer.update_missing_keys^   s5     #/I<CHC4GC<II Js   
//expected_keysloaded_keysc                 N  ^^ U R                   (       d  U$ U4S jm[        U5      n[        5       (       Ga`  SSKJn  UR                  5        H  u  pgXgl        M     [        5       nT" X5        [        5       n	U H;  n
UR                  R                  S    H  nX;   d  M
  U	R                  U
5        M     M=     X-  nU" S S [        R                  SS9R                  5       S1-
  n[        5       nU H0  m[        U4S jU 5       5      (       d  M  UR                  T5        M2     XM-  nU Hg  n
U
S	-   U;   a  UR                  U
S	-   5        O'UR                  U Vs1 s H
  oS
-   U-   iM     sn5        U
S-   U;   d  MS  UR                  U
S-   5        Mi     [        U5      $ s  snf )Nc                    > U R                  5        HQ  u  p#[        U[        R                  R                  5      (       a  UR                  UR                  5        T" X15        MS     g N)named_childrenr=   r6   nnLinearaddr   )r   layersr   module_find_hqq_quantizable_layerss       r   rY   IHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersn   sE     % 4 4 6fuxx88JJv{{+,V< !7r   r   	HQQLinearskip_modulesr1   linear_layerquant_configcompute_dtypedevicebiasc              3   ,   >#    U  H	  oT;   v   M     g 7frR    ).0_modulerK   s     r   	<genexpr>6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>   s     D^'#~^s   z.weightr   z.bias)rJ   rA   r
   hqq.core.quantizer\   named_modulesr   configr)   rV   r6   float16state_dict_keysanyupdatelist)r(   r   rN   rO   new_keysr\   r   rX   _valid_modules_skipped_modulesrg   _skip_module	_ref_keys_rm_keys_ref_keyrY   rK   s                  @@r   update_expected_keys#HqqHfQuantizer.update_expected_keysg   s    !!  	= }%3 !& 3 3 5" !6 !UN(?  #u)$)LL$D$D^$TL#.(,,W5 %U * .N "!EMMZ_o6(+I
 uHD^DDDLL%    H *Y&+5LL9!45OOi$Xi(s]X%=i$XYW$3LL7!23 * H~	 %Ys   F"
param_valueztorch.Tensor
param_name
state_dictc                    [        5       (       a  SSKJn  [        X5      u  pxU R                  (       aC  [        U[        R                  R                  5      =(       d    [        UW5      =(       a    US:g  $ [        U[        R                  R                  5      =(       a    US:H  =(       d    [        UW5      =(       a    US:H  $ )Nr   r[   rI   rc   )	r
   rj   r\   r   rJ   r=   r6   rT   rU   )	r(   r   r{   r|   r}   r*   r\   rX   tensor_names	            r   check_quantized_param$HqqHfQuantizer.check_quantized_param   s     325Evuxx7X:fi;Xu^imu^uu 6588??3 ,8+M vy1KkV6Kr   target_deviceztorch.deviceunexpected_keysc           	         [        5       (       a  SSKJn  [        X5      u  pSR	                  UR                  S5      SS 5      n
[        X5      nU
R                  S5      S   nU	S:X  a  g0 nUR                  5        HC  u  pU
S-   U;   d  M  XUR                  S5      S   '   Uc  M+  X;   d  M2  UR                  U5        ME     U R                  (       a  [        UW5      (       a  gU" SSU R                  US9nUR                  U5        UR                  bW  [        UR                  [        R                  5      (       a.  [        R                   R#                  UR                  5      Ul        U R$                  (       a  U R'                  U5      n[)        XU5        U?A[        R,                  R/                  5         gU H0  n[)        UU[        R                   R#                  UU   5      5        M2     UR0                  R2                  S   nUR0                  R2                  S	   nSR	                  UR4                  R                  S5      S
S 5      nSnSU;   a  UnOUU;   a  UU   nU H  nUUR4                  ;   d  M  Sn  O   Ub  W" UUU R                  USS9nUR                  bW  [        UR                  [        R                  5      (       a.  [        R                   R#                  UR                  5      Ul        U R$                  (       a  U R'                  U5      n[)        XU5        O&UR7                  U R                  US9n[)        XU5        [        R,                  R/                  5         g)z
Each nn.Linear layer is processed here.
We first check if the corresponding module state_dict contains already HQQ quantized parameters.
If not, we create a temp linear layer with the module state_dict params and use it for quantization
r   r[   r   Nr   rc   r^   r`   r]   weight_quant_paramsT)r`   ra   rb   del_orig)dtyperb   )r
   rj   r\   r   joinr   r   itemsremoverJ   r=   r&   load_state_dictrc   r6   TensorrT   	Parameterr'   _patch_layer_for_multigpusetattr__dict__r7   empty_cacherl   r)   r   to)r(   r   r{   r|   r   r}   r   r\   rX   r   
layer_nameparent_modulenodemodule_state_dictkv	hqq_layerrK   r`   r]   
module_tagmodule_quant_configskip_modules                          r   create_quantized_param%HqqHfQuantizer.create_quantized_param   s    325EXXj..s3CR89
#E6$R(&  $$&DAC1$67!''#,r"23".13G#**1-	 ' &),,%!%!%"&"2"2(		 %%&78~~)j.V.V!&!3!3INN!C	## ::9E	M3 JJ""$ %CFC!3!34Ec4J!KL %
 ||77G||77GXXfkk//4RS9:
" L0".<'".z":'Kfkk)&*# (
 *!0"..$I ~~)j.V.V!&!3!3INN!C	## ::9E	M3 YYT%5%5mYLFM0

 r   c                 >   ^^ [        T5      mS mUU4S jTl        T$ )Nc                     [         R                  " UR                  U R                  5      U R	                  5       R                  5       5      nU R                  b  X R                  -  nU$ rR   )r6   matmulr   rb   
dequantizetrc   )r(   xouts      r   forward_with_deviceEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_device#  sJ    ,,qttDKK0$//2C2E2E2GHCyy$yy Jr   c                    > T" TU 5      $ rR   re   )r   r   r   s    r   <lambda>:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>)  s    &9)Q&Gr   )r   forward)r(   r   r   s    `@r   r   (HqqHfQuantizer._patch_layer_for_multigpu   s#    +I6		 H	r   c                 *    [        XR                  S9ng )N)r)   )r   r)   r(   r   r*   s      r   $_process_model_before_weight_loading3HqqHfQuantizer._process_model_before_weight_loading,  s     'uBZBZ[r   c                 >    SUl         U R                  5       Ul        U$ NT)is_hqq_quantizedis_serializableis_hqq_serializabler   s      r   #_process_model_after_weight_loading2HqqHfQuantizer._process_model_after_weight_loading5  s     !%$($8$8$:!r   c                     gr   re   )r(   safe_serializations     r   r   HqqHfQuantizer.is_serializable:  s    r   c                     gr   re   )r(   s    r   is_trainableHqqHfQuantizer.is_trainable=  s    r   )r&   r'   )r   r   rR   )__name__
__module____qualname____firstlineno____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr%   rC   r   strrL   ry   r   r   boolr   r   r   r   r   r   propertyr   __static_attributes____classcell__)r+   s   @r   r    r    -   sY     %'+$ %
I@ & 6:3i IL 	c 7&77;Cy7OSTWy7	c7r  $ 	
 cN 
.f! f! $f! 	f!
 &f! cNf! cf!R
\ \
 d  r   r    )typingr   r   r   r   integrationsr   utilsr	   r
   r   r   baser   quantizers_utilsr   modeling_utilsr   accelerate.hooksr   r6   
get_loggerr   r;   r   r    re   r   r   <module>r      sa    2 1 1 Z Z  2 0 8			H	%R[ Rr   