
    fThG                         S SK JrJrJrJrJr  SSKJr  \(       a  SSKJ	r	  SSK
JrJrJr  \" 5       (       a  S SKr\R                  " \5      r " S S	\5      rg)
    )TYPE_CHECKINGDictListOptionalUnion   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                      ^  \ rS rSrSrSrSrS/rU 4S jrS r	SS
 jr
 SSS	S\\\      4S jjrS\\\\\4   4   S\\\\\4   4   4S jrSS jrSS jr\S\4S j5       r\S\4S j5       rSrU =r$ )BitNetHfQuantizer    z
1.58-bit quantization from BitNet quantization method:
Before loading: it converts the linear layers into BitLinear layers during loading.

Checkout the paper introducing this method : https://arxiv.org/pdf/2402.17764
FT
acceleratec                 4   > [         TU ]  " U40 UD6  Xl        g N)super__init__quantization_config)selfr   kwargs	__class__s      `/var/www/auris/envauris/lib/python3.13/site-packages/transformers/quantizers/quantizer_bitnet.pyr   BitNetHfQuantizer.__init__-   s    ,77#6     c                    [        5       (       d  [        S5      eUR                  SS5      (       d  UR                  SS5      (       a  [        S5      e[        R
                  R                  5       (       d  [        R                  S5        g UR                  SS 5      nUc  [        R                  S5        g UbJ  [        U[        5      (       a4  S	UR                  5       ;   d  S
UR                  5       ;   a  [        S5      eg g g )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrorget
ValueErrortorchcudais_availableloggerwarning_once
isinstancedictvalues)r   argsr   r!   s       r   validate_environment&BitNetHfQuantizer.validate_environment1   s    &((opp::i''6::k5+I+I; 
 zz&&((z ZZd3
I #*d++*:K:K:M1MQW[e[l[l[nQn g  Ro+ $r   modelr   c                     U$ r    )r   r2   r   s      r   #_process_model_after_weight_loading5BitNetHfQuantizer._process_model_after_weight_loadingN   s    r   keep_in_fp32_modulesc                     SSK Jn  U R                  XR                  R                  U5      U l        U" UU R                  U R                  U R
                  S9ng )Nr
   )replace_with_bitnet_linear)modules_to_not_convertr   pre_quantized)integrationsr9   get_modules_to_not_convertr   r:   r;   )r   r2   r7   r   r9   s        r   $_process_model_before_weight_loading6BitNetHfQuantizer._process_model_before_weight_loadingQ   sV     	>&*&E&E++BBDX'
# +#'#>#> $ 8 8,,	
r   
max_memoryreturnc                 `    UR                  5        VVs0 s H
  u  p#X#S-  _M     nnnU$ s  snnf )Ng?)items)r   r@   keyvals       r   adjust_max_memory#BitNetHfQuantizer.adjust_max_memoryd   s5    6@6F6F6HI6H(#c:o6H
I Js   *c                 &    [         R                  nU$ r   )r'   int8)r   target_dtypes     r   adjust_target_dtype%BitNetHfQuantizer.adjust_target_dtypeh   s    zzr   c                     g)NTr4   )r   safe_serializations     r   is_serializable!BitNetHfQuantizer.is_serializablel   s    r   c                 t    U R                   R                  S:H  =(       a    U R                   R                  S:H  $ )Nautobitlinearonliner   linear_classquantization_moder   s    r   is_trainableBitNetHfQuantizer.is_trainableo   s7     $$11_D G((::hF	
r   c                 t    U R                   R                  S:H  =(       a    U R                   R                  S:H  $ )zUFlag indicating whether the quantized model can carry out quantization aware trainingrR   rS   rT   rW   s    r   is_qat_trainable"BitNetHfQuantizer.is_qat_trainablev   s7     $$11_D G((::hF	
r   )r:   r   )r2   r   r   )rJ   torch.dtyperA   r]   )__name__
__module____qualname____firstlineno____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r0   r5   r   r   strr>   r   r   intrF   rK   rO   propertyboolrX   r[   __static_attributes____classcell__)r   s   @r   r   r       s     (-$%7: 59
 
 'tCy1
&DeCHo1E,F 4PSUZ[^`c[cUdPdKe  
d 
 
 
$ 
 
r   r   )typingr   r   r   r   r   baser	   modeling_utilsr   utilsr   r   r   r'   
get_loggerr^   r*   r   r4   r   r   <module>rq      sL    > =  0 H H  
		H	%\
 \
r   