
    eTh                     >    S r SSKJr  SSKJr  SSKJr      SS jrg)z9VPTQ (Vector Post-Training Quantization) integration file    N)init_empty_weights)VQuantLinearc                 l   U(       d  S/OUnU R                  5        GH  u  pVUc  / nUR                  U5        SR                  U5      nUR                  nUR                  n	[        U[        R                  5      (       a  Xr;  a  Xy;   d	  US   U;   a  U	R                  US5      =(       d    UR                  US   S5      n
[        5          UR                  nUR                  n[        UUU
S   U
S   U
S   U
S   U
S	   U
S
   U
S   U
S   U
S   SSUR                  SLS9U R                  U'   SnU R                  U   R                  S5        SSS5        [!        [#        UR%                  5       5      5      S:  a  ['        UUUUUS9u  pUR)                  S5        GM     X4$ ! , (       d  f       N\= f)a3  
Public method that recursively replaces the Linear layers of the given model with VPTQ quantized layers.
`accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the
conversion has been successful or not.

Args:
    model (`torch.nn.Module`):
        The model to convert, can be any `torch.nn.Module` instance.
    quantization_config (`VptqConfig`):
        The quantization config object that contains the quantization parameters.
    modules_to_not_convert (`List[`str`]`, *optional*, defaults to `["lm_head"]`):
        Names of the modules to not convert in `VQuantLinear`. In practice we keep the `lm_head` in full precision
        for numerical stability reasons.
    current_key_name (`list`, *optional*):
        A list that contains the current key name. This is used for recursion and should not be passed by the user.
    has_been_replaced (`bool`, *optional*):
        A boolean that indicates if the conversion has been successful or not. This is used for recursion and
        should not be passed by the user.
lm_headN.vector_lensnum_centroidsnum_res_centroids	group_num
group_sizeoutlier_sizeindices_as_floatenable_normenable_permTF)r	   r
   r   r   r   r   r   r   r   is_indice_packedenable_proxy_errorbiasr   )quantization_configmodules_to_not_convertcurrent_key_namehas_been_replaced)named_childrenappendjoinshared_layer_configconfig_for_layers
isinstancennLineargetr   in_featuresout_featuresr   r   _modulesrequires_grad_lenlistchildrenreplace_with_vptq_linearpop)modelr   r   r   r   namemodule
layer_namer   r   layer_paramsr"   r#   _s                 V/var/www/auris/envauris/lib/python3.13/site-packages/transformers/integrations/vptq.pyr)   r)      s   6 1Gi[Lb,,.#!%XX./
1EE/AA vryy))817G7KOb7b,00TB FYF]F] $dGL $%$00%22'3  ,] ;"."?&23F&G*;7+L9!-n!=%12D%E ,] ; ,] ;%)',D0(t$  %)! t$33E:/ &0 tFOO%&'!+#;$7'=!1"3$ A 	R e /f ##E &%s   BF%%
F3	)NNNF)__doc__torch.nnr   
accelerater   vptqr   r)        r1   <module>r8      s'    <  ) 
 P$r7   