
    fThT!                         S SK JrJrJrJrJr  SSKJr  SSKJ	r	  SSK
Jr  \(       a  SSKJr  SSKJrJrJrJrJr  SS	KJr  \" 5       (       a  S S
Kr\R.                  " \5      r " S S\	5      rg
)    )TYPE_CHECKINGAnyDictListOptional   )tqdm   )HfQuantizer)get_module_from_name)PreTrainedModel)is_accelerate_availableis_flute_availableis_hadamard_availableis_torch_availablelogging)QuantizationConfigMixinNc                   &  ^  \ rS rSrSrSrSrSS/rS\4U 4S jjr	S	 r
S S jr S!SSSSS\SSS\\\4   S\\\      4S jjr  S"S jrS"S jrS\\   S\S
\\   4S jr\S!S\S   4S jj5       rS!S jrSSSSS\S\\\4   S
\4
S jrS rSrU =r$ )#HiggsHfQuantizer"   z
Quantizer of the HIGGS method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
FTzflute-kernelfast_hadamard_transformquantization_configc                 4   > [         TU ]  " U40 UD6  Xl        g N)super__init__r   )selfr   kwargs	__class__s      _/var/www/auris/envauris/lib/python3.13/site-packages/transformers/quantizers/quantizer_higgs.pyr   HiggsHfQuantizer.__init__+   s    ,77#6     c                    [         R                  R                  5       (       d  [        S5      e[	        5       (       d  [        S5      e[        5       (       d  [        S5      e[        5       (       d  [        S5      eUc  [        S5      e[        U[        5      (       a4  SUR                  5       ;   d  SUR                  5       ;   a  [        S5      eg g )	NzNHIGGS quantization is only supported on GPU. Please use a different quantizer.zHUsing `higgs` quantization requires Accelerate: `pip install accelerate`zLUsing `higgs` quantization requires FLUTE: `pip install flute-kernel>=0.3.0`zbUsing `higgs` quantization requires fast_hadamard_transform: `pip install fast_hadamard_transform`zwYou are attempting to load a HIGGS model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a HIGGS model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr   ImportErrorr   r   
ValueError
isinstancedictvalues)r   
device_mapr   s      r    validate_environment%HiggsHfQuantizer.validate_environment/   s    zz&&((%&vww&((hii!##lmm$&&t  F  
D))u
8I8I8K/KvYcYjYjYlOld  Pm)r"   returnc                     Uc'  [         R                  S5        [        R                  nU$ U[        R                  :w  a#  U[        R                  :w  a  [        SU S35      eU$ )NzS`torch_dtype` is None. Setting `torch_dtype=torch.float16` for FLUTE compatibility.zInvalid `torch_dtype` z_. HIGGS quantization only supports `torch_dtype=torch.float16` or `torch_dtype=torch.bfloat16`.)loggerinfor&   float16bfloat16r+   )r   torch_dtypes     r    update_torch_dtype#HiggsHfQuantizer.update_torch_dtypeI   sg    KKmn--K  EMM)kU^^.K(  6U  V  r"   modelr   param_valueztorch.Tensor
param_nametarget_deviceztorch.device
state_dictunexpected_keysc                 ,   SSK Jn   U" UR                  U5      U R                  R                  U R                  R
                  U R                  R                  U R                  R                  5      nA[        X5      u  pSR                  UR                  S5      S S 5      nUR                  5        H  u  pXR                  ;   a-  [        R                  R                  USS9U	R                  U'   MA  XR                   ;   a.  [        R                  R#                  U5      U	R                   U'   M~  US:X  a/  Xl        UR'                  5       U R                  R$                  U'   M  [)        SU S	U	 35      e   Ub  X6;   a  UR+                  U5        g g g )
Nr   )quantize_with_higgs.F)requires_gradtune_metadatazUnexpected key z in module )integrationsrB   tor   bitsp
group_sizehadamard_sizer   joinsplititems_parametersr&   nn	Parameter_buffersBufferrF   to_dictr+   remove)r   r;   r<   r=   r>   r?   r@   rB   
flute_dictmodule_module_namekeyvalues                 r    create_quantized_param'HiggsHfQuantizer.create_quantized_paramT   sb    	7	 )NN=)$$))$$&&$$//$$22

 (;	hhz//4Sb9:$**,JC(((*/((*<*<URW*<*X""3''',xxu'=$'',$FKmmo((66{C ?3%{6(!KLL - &:+H"":. ,I&r"   c                 h    SSK Jn  U" UU R                  S9  U R                  UR                  l        g )Nr   )replace_with_higgs_linearr   )rG   r`   r   config)r   r;   r   r`   s       r    $_process_model_before_weight_loading5HiggsHfQuantizer._process_model_before_weight_loading{   s/    
 	=! $ 8 8	
 ,0+C+C(r"   c                    SSK JnJn  SSKJn  SSKJn  0 nUR                  5        VV	s0 s H  u  p[        X5      (       d  M  X_M     n
nn	[        U
R                  5       SSS9 GH  u  pU	R                  R                  U;  a0  U" U	R                  R                  S	9XyR                  R                  '   XyR                  R                     U	l        UR                  U R                  R                   U   5      U	l        U" U	R                  R"                  U	R$                  R"                  U	R                   S
9u  U	R                  l        U	l        U	R                   R'                  5       U R                  R                   U'   GM     g s  sn	nf )Nr   )TuneMetaDatamaybe_tune_and_repack)make_workspace_streamkr   HiggsLinearzRepacking HIGGS modulesF)descleave)device)weightscalesmetadata)
flute.tunerf   rg   flute.utilsrh   rG   rj   named_modulesr,   r	   rO   rn   rm   	workspace	from_dictr   rF   dataro   rU   )r   r;   r   rf   rg   rh   rj   flute_workspacesnamerX   flute_moduless              r    #_process_model_after_weight_loading4HiggsHfQuantizer._process_model_after_weight_loading   s@   B6.:?:M:M:Os:O,$S]^dSr:Os !4!4!6=V^cdLD }}##+;;9OW]WdWdWkWk9l !5!56/0D0DEF $0#9#9$:R:R:`:`ae:f#gF 7L}}))}}))--84FMM 4
 <B;O;O;W;W;YD$$2248 e ts   FFmissing_keysprefixc                   ^^	 SSK Jn  UR                  5        VVs1 s H  u  pV[        Xd5      (       d  M  UiM     snnm	S[        S[
        4U	U4S jjnU Vs/ s H  o" U5      (       a  M  UPM     sn$ s  snnf s  snf )Nr   ri   r[   r2   c                    >^ ^ T R                  S5      (       d  T R                  S5      (       a  gT ST  3m[        UU 4S jT 5       5      $ )Nz.weightz.biasFrC   c              3   D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr    ).0rx   full_keyr[   s     r    	<genexpr>NHiggsHfQuantizer.update_missing_keys.<locals>.should_update.<locals>.<genexpr>   s      O;4s{6dh&66;s    )endswithany)r[   r   higgs_namesr}   s   `@r    should_update;HiggsHfQuantizer.update_missing_keys.<locals>.should_update   sD    ||I&&#,,w*?*? 3%(HO;OOOr"   )rG   rj   rs   r,   strbool)
r   r;   r|   r}   rj   rx   rX   r   r[   r   s
      `     @r    update_missing_keys$HiggsHfQuantizer.update_missing_keys   su    .050C0C0Ei0ETZIht0Ei	Ps 	Pt 	P 	P  ,F|=3E|FF j Gs   A6A6A<-A<c                     g)NFr   )r   r;   s     r    is_trainableHiggsHfQuantizer.is_trainable   s    r"   c                     g)NTr   )r   safe_serializations     r    is_serializable HiggsHfQuantizer.is_serializable   s    r"   c                     SSK Jn  [        X5      u  px[        Xv5      (       a%  US:X  a  UR                  [
        R                  :w  a  gg)Nr   ri   rn   TF)rG   rj   r   r,   dtyper&   int16)	r   r;   r<   r=   r?   r   rj   rX   tensor_names	            r    check_quantized_param&HiggsHfQuantizer.check_quantized_param   s@     	/25Ef**{h/F;K\K\`e`k`kKkr"   c                 "    SSK Jn  U" U5      nU$ )Nr   )dequantize_higgs)rG   r   )r   r;   r   s      r    _dequantizeHiggsHfQuantizer._dequantize   s    3 'r"   ra   )r8   torch.dtyper2   r   r   )r;   r   )__name__
__module____qualname____firstlineno____doc__requires_calibration requires_parameters_quantizationrequired_packagesr   r   r0   r9   r   r   r   r   r   r]   rc   rz   r   propertyr   r   r   r   r   __static_attributes____classcell__)r   s   @r    r   r   "   s:    !'+$')BC7,C 74	$ 04%/ %/ $%/ 	%/
 &%/ cN%/ "$s),%/ND DZ2GtCy G# GRVWZR[ G (+<"=    $ 	
 cN 
" r"   r   )typingr   r   r   r   r   utils.loggingr	   baser   quantizers_utilsr   modeling_utilsr   utilsr   r   r   r   r   utils.quantization_configr   r&   
get_loggerr   r4   r   r   r"   r    <module>r      sS    < ;    2 0 s s ? 			H	%h{ hr"   