o
    ZhO                     @   s  d dl Z d dlmZmZ d dlZd dlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ e r9d dlZeeZd	d
dddddde	de
d ide
d idZeed  ZG dd deZG dd dZG dd deZG dd deZG dd deZG dd  d eZG d!d" d"eZG d#d$ d$eZ G d%d& d&eZ!G d'd( d(eZ"eeeeeee e!e"e"d)
Z#d*d+ Z$			,d5d-ee% d.ee& d/e%fd0d1Z'd6d3d4Z(dS )7    N)
NamedTupleOptional)tqdm   )GGUF_CONFIG_MAPPINGGGUF_TOKENIZER_MAPPING_gguf_parse_value)is_torch_available)is_gguf_available)
get_loggerversiontensor_countkv_count)r   r   r   	file_typequantization_version)r   r   )ZGGUFZgeneral	tokenizertokenizer_config)ignoreconfigr   r   r   c                   @   s(   e Zd ZU ejed< eed< eed< dS )
GGUFTensorweightsnamemetadataN)__name__
__module____qualname__npndarray__annotations__strdict r!   r!   W/var/www/auris/lib/python3.10/site-packages/transformers/modeling_gguf_pytorch_utils.pyr   7   s   
 
r   c                   @   s   e Zd ZdddZdd ZdS )TensorProcessorNc                 C   s   |pi | _ d S Nr   selfr   r!   r!   r"   __init__>   s   zTensorProcessor.__init__c                 K   s   t ||i S r$   r   r'   r   r   kwargsr!   r!   r"   processA   s   zTensorProcessor.processr$   )r   r   r   r(   r,   r!   r!   r!   r"   r#   =   s    
r#   c                	       sL   e Zd Zd fdd	Zdd Z	ddejdedee d	ejfd
dZ	  Z
S )LlamaTensorProcessorNc                       t  j|d d S Nr%   superr(   r&   	__class__r!   r"   r(   F      zLlamaTensorProcessor.__init__c                 K   sz   d|v sd|v r7| j d}| j d}d ||fv r t||i S d|v r,| |||}nd|v r7| |||}t||i S )Nz.attn_k.z.attn_q.Znum_attention_headsZnum_key_value_heads)r   getr   _reverse_permute_weights)r'   r   r   r+   	num_headsnum_kv_headsr!   r!   r"   r,   I   s   zLlamaTensorProcessor.processr   n_headr8   returnc                 C   sZ   |d ur
||kr
|}|j d | d }|j||dg|j dd  R  }|dd|j S )Nr      r   )shapereshapeZswapaxes)r'   r   r9   r8   dimwr!   r!   r"   r6   V   s
    z-LlamaTensorProcessor._reverse_permute_weightsr$   )r   r   r   r(   r,   r   r   intr   r6   __classcell__r!   r!   r2   r"   r-   E   s    r-   c                       sJ   e Zd Zd fdd	Zdd Zdejdeeef ded	efd
dZ	  Z
S )Qwen2MoeTensorProcessorNc                    r.   r/   r0   r&   r2   r!   r"   r(   d   r4   z Qwen2MoeTensorProcessor.__init__c                 K   s^   d|v r| d}| d}|r| |||| t|d i S d|v r)tj|dd}t||i S )N_exptensor_key_mappingparsed_parametersZffn_gate_inp_shexpr   Zaxis)r5   _split_moe_expert_tensorr   r   expand_dims)r'   r   r   r+   rD   rE   r!   r!   r"   r,   g   s   

zQwen2MoeTensorProcessor.processr   rE   r   rD   c           	      C   s^   || }| j dd}td|D ]}|dd| d}|| }tt||d |< qd S )NZnum_experts<   r   mlp.experts..tensors)r   r5   rangereplacetorch
from_numpyr   copy)	r'   r   rE   r   rD   Z	w_counteriZ	temp_nameZ
exp_weightr!   r!   r"   rG   t   s   z0Qwen2MoeTensorProcessor._split_moe_expert_tensorr$   )r   r   r   r(   r,   r   r   r    r   rG   rA   r!   r!   r2   r"   rB   c   s    
rB   c                       sV   e Zd Zd fdd	Zdd Zdejdedefd	d
ZdejdedefddZ	  Z
S )BloomTensorProcessorNc                    r.   r/   r0   r&   r2   r!   r"   r(      r4   zBloomTensorProcessor.__init__c                 K   sN   d|v r!| j d }| j d }d|v r| |||}n| |||}t||i S )NZattn_qkvr9   Zhidden_sizeweight)r   _reverse_reshape_weights_reverse_reshape_biasr   )r'   r   r   r+   r7   n_embedr!   r!   r"   r,      s   

zBloomTensorProcessor.processr   r9   rW   c                 C   sx   t j|ddd\}}}|||| |}|||| |}|||| |}t j|||gdd}||d ||  |S )N   r   rF   r   )r   array_splitr=   stack)r'   r   r9   rW   qkvZqkv_weightsr!   r!   r"   rU      s   z-BloomTensorProcessor._reverse_reshape_weightsc                 C   s^   t |d\}}}|||| }|||| }|||| }t j|||gdd }|S )NrX   r   rF   )r   rY   r=   rZ   flatten)r'   r   r9   rW   Zq_biasZk_biasZv_biasqkv_biasr!   r!   r"   rV      s   z*BloomTensorProcessor._reverse_reshape_biasr$   )r   r   r   r(   r,   r   r   r@   rU   rV   rA   r!   r!   r2   r"   rS      s
    
 rS   c                       &   e Zd Zd fdd	Zdd Z  ZS )T5TensorProcessorNc                    r.   r/   r0   r&   r2   r!   r"   r(      r4   zT5TensorProcessor.__init__c                 K   s8   d }| dD ]}| rt|} nqt||d|iS )NrK   bid)splitisdigitr@   r   )r'   r   r   r+   rb   chunkr!   r!   r"   r,      s   zT5TensorProcessor.processr$   r   r   r   r(   r,   rA   r!   r!   r2   r"   ra          ra   c                       r`   )GPT2TensorProcessorNc                    r.   r/   r0   r&   r2   r!   r"   r(      r4   zGPT2TensorProcessor.__init__c                 K   sf   d|v sd|v sd|v sd|v r|j }|dkr-d}|di }tt||d |< d }t||i S )	Nzattn_qkv.weightzffn_down.weightzffn_up.weightzattn_output.weightoutput.weightzlm_head.weightrE   rL   )Tr5   rO   rP   r   rQ   r   )r'   r   r   r+   rE   r!   r!   r"   r,      s   zGPT2TensorProcessor.processr$   rf   r!   r!   r2   r"   rh      rg   rh   c                       r`   )MambaTensorProcessorNc                    r.   r/   r0   r&   r2   r!   r"   r(      r4   zMambaTensorProcessor.__init__c                 K   s6   d|v rt j|dd}d|v rt | }t||i S )Nzssm_conv1d.weightr   rF   Zssm_a)r   rH   logr   r*   r!   r!   r"   r,      s
   zMambaTensorProcessor.processr$   rf   r!   r!   r2   r"   rk      rg   rk   c                       r`   )NemotronTensorProcessorNc                    r.   r/   r0   r&   r2   r!   r"   r(      r4   z NemotronTensorProcessor.__init__c                 K      d|v r|d }t ||i S Nznorm.weightr   r)   r*   r!   r!   r"   r,         zNemotronTensorProcessor.processr$   rf   r!   r!   r2   r"   rm      s    rm   c                       r`   )Gemma2TensorProcessorNc                    r.   r/   r0   r&   r2   r!   r"   r(      r4   zGemma2TensorProcessor.__init__c                 K   rn   ro   r)   r*   r!   r!   r"   r,      rp   zGemma2TensorProcessor.processr$   rf   r!   r!   r2   r"   rq      s    rq   )
llamaqwen2moebloomt5	t5encoderZgpt2ZmambaZnemotronZgemma2gemma3c                    s,   || j vrg S | j |   fdd jD S )Nc                       g | ]}t  j|  jqS r!   r   partstypes.0Z_data_indexvaluer!   r"   
<listcomp>      zread_field.<locals>.<listcomp>)fieldsdata)readerfieldr!   r~   r"   
read_field  s   

r    
model_type
num_layers	qual_namec                    s  t  rt rddlm}m} n	td td|du r | jj	n|}|du r*| jj
n|}|dkr3d}n|dkr:d	}n|d
kr@d}d}| D ]\}}||krR|} nqF|du r_td| d|||}	i  |  }
|
 D ]>}|d	krd|v rtdd|}|d}}|ds|dr|dd\}}d| }|	|}|du rqn||  || < qn|   }r|D ]!\}}t|||| | dd} fdd| D } | q S )aY  
    GGUF uses this naming convention for their tensors from HF checkpoint:
    `blk.N.BB.weight` and `blk.N.BB.bias`
    where N signifies the block number of a layer, and BB signifies the
    attention/mlp layer components.
    See "Standardized tensor names" in
    https://github.com/ggerganov/ggml/blob/master/docs/gguf.md for details.
    r   )MODEL_ARCH_NAMESget_tensor_name_mapLoading a GGUF checkpoint in PyTorch, requires both PyTorch and GGUF>=0.10.0 to be installed. Please see https://pytorch.org/ and https://github.com/ggerganov/llama.cpp/tree/master/gguf-py for installation instructions.KPlease install torch and gguf>=0.10.0 to load a GGUF checkpoint in PyTorch.NZcoherez	command-r	qwen2_moers   gemma3_textrw   zUnknown gguf model_type: z in gguf-py. This might because you're using an outdated version of gguf-py package, you can install `gguf` package from source refer to https://github.com/ggerganov/llama.cpp/tree/master/gguf-py#developmentrJ   zmlp.experts.\d+.r   z.weightz.biasrK   r   )r   c                    s   i | ]\}}| vr||qS r!   r!   )r}   r\   r]   Zgguf_to_hf_name_mapr!   r"   
<dictcomp>T  r   z+get_gguf_hf_weights_map.<locals>.<dictcomp>)r
   r	   ggufr   r   loggererrorImportErrorr   r   Znum_hidden_layersitemsNotImplementedError
state_dictkeysresubendswithrsplitget_namenamed_childrenget_gguf_hf_weights_mapupdate)Zhf_modelr   r   r   r   r   archkeyr   Zname_mapr   Zhf_namer   suffixZ	gguf_namer   childZsub_mapr!   r   r"   r     sX   



r   Fc                     s  t  rt rddlm}m} n	td td|| }|j}t	|
 }dd tD }t|dd }	t|d}
d	}d
|	v rEd|
v rEd}nd|	v sMd|	v rad|d d< d|	v r^dg|d d< d}n|	}d|	v rid}d|	v rh d dt fdd|jD }tfdd|jD }||d d< | |d d< |	tvr|tvrtd|	 ddd g}td!d |jD p|	|v |d d"< |j D ]\}||	|}|d#}|d }d#|d$d	 }fd%d&jD }t|d$kr|d }t|tr|	|v r||	|}tD ]5}t| }||v r:||| v r:|| | }|d'kr%q|d	ur0||| |< ||v r:|| q||v rLtd(| d)|  q|d d* d+kr\d,|d d*< d-|d vr||d. }d/|v rwt|d/ |d d-< ntd0 |ri |d1< t|}|di }t|	t }||d2}t!|jd3d4D ]3}|j"}||j|j#}|j$||||d5}|j%}|j"}||vrq|| }t&'t()||d1 |< qt|dkrtd6|  |S )7a  
    Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed
    tokenizer and config attributes.

    Args:
        gguf_checkpoint_path (`str`):
            The path the to GGUF file to load
        return_tensors (`bool`, defaults to `False`):
            Whether to read the tensors from the file and return them. Not doing so is faster
            and only loads the metadata in memory.
    r   )
GGUFReader
dequantizer   r   c                 S   s   i | ]}|i qS r!   r!   )r}   r\   r!   r!   r"   r   s  s    z(load_gguf_checkpoint.<locals>.<dictcomp>zgeneral.architecturezgeneral.nameNrr   Zmistralru   rv   Tr   Zis_gated_actZT5EncoderModelZarchitecturesrs   r   Zstablelm>   zattn_v.biaszattn_q.biaszattn_k.biasZffn_normc                 3   s$    | ]} D ]}||j v V  qqd S r$   r   )r}   tensorZ	bias_name)attn_bias_namer!   r"   	<genexpr>  s   " z'load_gguf_checkpoint.<locals>.<genexpr>c                 3   s    | ]} |j v V  qd S r$   r   r}   r   )ffn_norm_namer!   r"   r         Zuse_qkv_biasuse_parallel_residualzGGUF model with architecture z is not supported yet.Zfalconrt   c                 s   s    | ]}d |j kV  qdS )ri   Nr   r   r!   r!   r"   r     r   Ztie_word_embeddingsrK   r   c                    rx   r!   ry   r|   )r   r!   r"   r     r   z(load_gguf_checkpoint.<locals>.<listcomp>z1Some keys were not parsed and added into account z | r   rw   r   Z
vocab_sizer   tokenszCan't find a way to retrieve missing config vocab_size from tokenizer parameters. This will use default value from model config class and cause unexpected behavior.rL   r%   z,Converting and de-quantizing GGUF tensors...)Zdesc)r   r   rD   rE   z0Some keys of the GGUF file were not considered: )*r
   r	   r   r   r   r   r   r   r   listr   GGUF_TO_TRANSFORMERS_MAPPINGr   anyrL   GGUF_SUPPORTED_ARCHITECTURES
ValueErrorallr   rN   rc   joinr   len
isinstancer   removeinfowarningr   r5   TENSOR_PROCESSORSr#   r   r   Ztensor_typer,   r   rO   rP   r   rQ   ) Zgguf_checkpoint_pathZreturn_tensorsZmodel_to_loadr   r   r   r   Zreader_keysrE   architectureZ
model_nameZupdated_architecturer_   r   
exceptionsZgguf_keyrc   prefixZ
config_keyr   Z	parameterZparameter_renamesZrenamed_config_keyZtokenizer_parametersrD   r   ZProcessorClass	processorr   r   r   resultr!   )r   r   r   r"   load_gguf_checkpointZ  s   










r   )NNr   )FN))r   typingr   r   numpyr   Z	tqdm.autor   Zintegrationsr   r   r   utilsr	   Zutils.import_utilsr
   Zutils.loggingr   rO   r   r   r   r   r   r   r   r#   r-   rB   rS   ra   rh   rk   rm   rq   r   r   r   r@   r   r   r!   r!   r!   r"   <module>   sn   

'

N