
    eTh'k                     ^   S r SSKJr  SSKrSSKJrJrJrJrJ	r	  SSK
JrJr  SSKJr  SSKJrJrJrJrJr  SS	KJr  SS
KJr  \R2                  " \5      r0 SSSS._SSSSSSSSSSSS.
_SSSSSSSSSSSS.
_SSSSSSSSSSSS.
_SSSSSSSSSSSSSS ._S!SSSSSSSSSSS.
_S"S#S$S%S&S'._S(SSSSSSSSSSS.
_S)S*SS+SS,S-._S.S/S0S1S2S3S4SS,S5S6SS7._S8SSSSSSSS9SS:.	_S;S*S<S=S>S+S,S?._S@SSSSSSSASB._SCSSSS,SSDSESFSSG.	_SHSSSSSSSSSISS.
_SJSSSSSSSSSSSKSSL._SMSSSSSSSSSSSKSSL._rSNSOSPSQSRS#S$S%S&SSST.
SUSS#S$S%S&SV.SW.rSX r " SY SZ5      r " S[ S\\5      r  " S] S^\5      r! " S_ S`\5      r" " Sa Sb\5      r# " Sc Sd\5      r$ " Se Sf\5      r%\ \!\!\"\#\#\#\#\#\$\#\#\%\%Sg.r&Sh\4Si jr'g)jz
Integration with GGML / The file is copied and adapted from https://github.com/99991/pygguf
with extra methods beings exposed
    )arrayN)	Tokenizerdecodersnormalizerspre_tokenizers
processors)BPEUnigram   )
AddedToken)GemmaConverterGPT2ConverterLlamaConverterQwen2ConverterT5Converter)logging)tqdmgeneral
model_type_model_name_or_path)architecturenamellamamax_position_embeddingsnum_hidden_layersintermediate_sizehidden_sizehead_dim
rope_thetanum_attention_headsnum_key_value_headsrms_norm_eps
vocab_size)
context_lengthblock_countfeed_forward_lengthembedding_lengthrope.dimension_countrope.freq_baseattention.head_countattention.head_count_kv attention.layer_norm_rms_epsilonr#   mistralqwen2qwen2moenum_expertsnum_experts_per_tok)r$   r%   r&   r'   r(   r)   r*   r+   r,   r#   expert_countexpert_used_countfalcon	tokenizerbos_token_ideos_token_idunk_token_idpad_token_id)ggml.bos_token_idggml.eos_token_idggml.unknown_token_idggml.padding_token_idphi3bloomn_layern_headlayer_norm_epsilon)r%   r'   r*   r#   attention.layer_norm_epsilont5n_positions
num_layersd_ffd_modeld_kv	num_headsrelative_attention_num_bucketsdecoder_start_token_id)r$   r%   r&   r'   attention.key_lengthr*   r+   rC   z attention.relative_buckets_countrL   r#   stablelmlayer_norm_eps)	r$   r%   r&   r'   r(   r*   r+   rC   r#   gpt2n_ctxn_embdr&   )r%   r$   r'   r&   r*   rC   
starcoder2norm_epsilon)r%   r$   r'   r&   r*   r+   rC   mambaconv_kernel
state_sizetime_step_rank)	r#   r$   r'   r,   r%   zssm.conv_kernelzssm.state_sizezssm.time_step_rankzssm.inner_sizenemotronnorm_epsgemma2sliding_window)r$   r%   r&   r'   r(   r)   rM   r*   r+   r,   zattention.sliding_windowr#   gemma3tokenizer_typetokensscores
token_typemergesadd_prefix_space)

ggml.modelzggml.tokenszggml.scoreszggml.token_typezggml.mergesr:   r;   r<   r=   zggml.add_space_prefixchat_template)re   rd   r:   r;   r<   r=   )r5   tokenizer_configc                    [        U[        5      (       d  U/n[        U5      S:X  a  US   nS nOUS   S:w  a  [        S5      eUu  pUS;   a  [	        U S   5      n U $ US;   a  [        U S   5      n U $ US;   a  [        U S   5      n U $ US;   a3  [        S	[        U 5      5      R                  5       R                  5       n U $ US
;   a  [        X5      n U $ )N   r   	   zPReceived multiple types, therefore expected the first type to indicate an array.)r   rh   r            
      )      )   )   B)ri   )
isinstancelistlen
ValueErrorintfloatboolr   tobytesdecode_gguf_parse_value)_value	data_typearray_data_types      V/var/www/auris/envauris/lib/python3.13/site-packages/transformers/integrations/ggml.pyr}   r}      s    i&&K	
9~aL	Q<1opp%."	..VAY M 
g	vay! M 
c	fQi
 M	 
c	sDL)113::< M 
c	"6;M    c                       \ rS rSrS rSrg)GGUFTokenizerSkeletoni  c                   ^ UR                  5        H  u  p#[        XU5        M     [        U S5      (       Gd4  [        U S5      (       a  [        U S5      (       d  [        S5      eU R                  nU R
                  n[        U5       VVs0 s H
  u  pgXuU   _M     snnm[        R                  S5        / n[        TR                  5       5       Hl  u  p/ n[        S[        U	5      5       H,  nU	S U XS  pX;   d  M  X;   d  M  UR                  XU
45        M.     [        UU4S jSS	9nUR                  U5        Mn     [        US
 SS	9nU Vs/ s H  oS   US   4PM     nnXl        OU R                   V	s/ s H  n	[!        U	R#                  S5      5      PM     sn	U l        [        U S5      (       d4  [        [        U R                  5      5       Vs/ s H  nS PM     snU l        [        U S5      (       d  / U l        [        U S5      (       d  S U l        [        U S5      (       a   U R&                  c  U R(                  U l        g g g s  snnf s  snf s  sn	f s  snf )Nrb   r_   r`   z\tokens and scores need to be passed for a LLaMa tokenizer without merges to be instantiated.z:Merges were not in checkpoint, building merges on the fly.rh   c                 $   > TU S      TU S      4$ )Nr   rh    )xvocabs    r   <lambda>0GGUFTokenizerSkeleton.__init__.<locals>.<lambda>*  s    U1Q4[%!+4Nr   T)keyreversec                     U S   $ )Nr   r   )vals    r   r   r   ,  s    CFr   r    added_tokensr8   unknown_token_id)itemssetattrhasattrrw   r_   r`   	enumerateloggerwarningr   rangerv   appendsortedextendrb   tuplesplitr   r8   r   )selfdict_kvr_   r`   itrb   mergepiece_scorelocalindexpiece_lpiece_rr   _r   s                    @r   __init__GGUFTokenizerSkeleton.__init__  s   KKMDADQ " tX&&4**'$2I2I r  [[F[[F.7.?@.?daQq	\.?@ENNWXF&*5;;=&9""1c%j1E',Ve}eFmW(W->g%DE 2 u*NX\]e$ ': F(:DIF289&31vs1v&&F9 K@DLu5S!12LDK4**-23t{{3C-DE-Dt-DEt^,, "Dt^,, $D 4+,,1B1B1J $ 5 5D 2K,7 A : MEs   II;$II)r   rb   r`   r8   N)__name__
__module____qualname____firstlineno__r   __static_attributes__r   r   r   r   r     s    '6r   r   c                   8    \ rS rSrS rS rS rS rS rS r	Sr
g	)
GGUFLlamaConverteri?  c                     [        U5      U l        U R                  U l        0 U l        [	        U R                  SS5      S:g  U l        g )Nr^   r   )r   protooriginal_tokenizeradditional_kwargsgetattris_llama_3_tokenizerr   tokenizer_dicts     r   r   GGUFLlamaConverter.__init__@  s>    *>:
"&**!#$+DJJ8H'$RV]$]!r   c                 T    [        [        UR                  UR                  5      5      $ Nru   zipr_   r`   r   r   s     r   r   GGUFLlamaConverter.vocabF      Cell344r   c                     UR                   $ r   rb   r   s     r   rb   GGUFLlamaConverter.mergesI      ||r   c                    U R                  U R                  5      nU R                  U R                  5      n[        U5       VVVs0 s H
  u  nu  pVXT_M     nnnnUR                  b  UR
                  UR                     OS n[        USS 5      b  UR
                  UR                     OS n	[        USS 5      b  UR
                  UR                     OS n
[        [        UUUSSS95      n/ n[        U R                  S5      (       dX  Ub  UR                  [        USSS95        U	b  UR                  [        U	SSS95        U
b  UR                  [        U
SSS95        O~[        R                  " [        R                  " U R                  R                   5      S:H  5      S	   nU H4  nUR                  [        U R                  R
                  U   SSS95        M6     [#        U5      S	:w  a  UR%                  U5        [#        U R                  R&                  5      S	:w  a?  UR)                  U R                  R&                   Vs/ s H  n[        USSS9PM     sn5        XR*                  S
'   XR*                  S'   XR*                  S'   U R,                  (       a>  S U R*                  S'   SU R*                  S'   SU R*                  S'   SU R.                  l        U$ s  snnnf s  snf )Nr6   r7   T)	unk_tokenfuse_unkbyte_fallbackra   F
normalizedspecialrj   r   r   	eos_token	bos_tokenrc   clean_up_tokenization_spaceslegacy)r   r   rb   r   r8   r_   r   r6   r   r	   r   r   r   npwherer   ra   rv   add_special_tokensr   
add_tokensr   r   r   r   )r   r   vocab_scoresrb   r   word_score	bpe_vocabr   r   r   r5   special_tokensspecial_tokens_idxidxadded_tokens                   r   r5   GGUFLlamaConverter.tokenizerL  s   zz$**-TZZ(6?6MN6M!2NTTW6M	N8=8J8J8VELL!3!34\`	8?~W[8\8hELL!3!34nr	8?~W[8\8hELL!3!34nr	#"
	 tzz<00$%%juVZ&[\$%%juVZ&[\$%%juVZ&[\ "$"((4::3H3H*IQ*N!OPQ!R)%%j1B1B31GTYcg&hi * ~!#((8tzz&&'1,  ]a]g]g]t]tu]tkKE5I]tu /8{+.7{+.7{+$$9=D""#56EID""#AB/4D""8,-2D##*k ON vs   KK"c                 D   [         R                  " 5       [         R                  " 5       [         R                  " SS5      /nU R                  (       a  U[         R
                  " SSSS9/-  nU(       a  U[         R                  " SSS9/-  n[         R                  " U5      $ )N   ▁r   FTrc   trim_offsets	use_regexrh   contentleft)r   ByteFallbackFuseReplacer   	ByteLevelStripSequencer   replacementrc   sequences       r   decoderGGUFLlamaConverter.decoder  s    !!#MMOUC(
 $$++UQVbfghhH!<==H  **r   c                    U R                  U R                  5      nU R                  U R                  5      nUb  X!l        SnSn[        U R                  S5      (       a  U R                  R
                  nU R                  X45      nUb  XQl        U R                  X45      Ul        U R                  5       nU(       a  Xal        U R                  (       a6  [        R                  " SSSS9Ul        [        R                  " / 5      Ul        U$ )Nr   Trc   Fr   )r5   r   
normalizerr   r   rc   pre_tokenizerr   post_processorr   r   r   r   r   )r   r5   r   r   rc   r   r   s          r   	convertedGGUFLlamaConverter.converted  s    NN4::.	 __TZZ0
!#- 4**,>??#66GG**;I$&3# LLG	,,.'5$ $$&4&>&>!&Ud'I#
 $/#7#7#;I r   )r   r   r   r   N)r   r   r   r   r   r   rb   r5   r   r   r   r   r   r   r   r   ?  s"    ^58t+!r   r   c                   6   ^  \ rS rSrS rS\4U 4S jjrSrU =r$ )GGUFQwen2Converteri  c                 2    [        U5      U l        0 U l        g r   r   r   r   r   s     r   r   GGUFQwen2Converter.__init__      "7"G!#r   returnc           
      "  > [        U R                  R                  5       VVs0 s H  u  pX!_M	     nnnU R                  R                  n[        TU ]  X45      nUR                  [        SSSS9[        SSSS9[        SSSS9/5        U$ s  snnf )N<|endoftext|>FTr   z<|im_start|>z
<|im_end|>)r   r   r_   rb   superr   r   r   r   r   r   r   rb   r5   	__class__s         r   r   GGUFQwen2Converter.converted  s    (1$2I2I2P2P(QR(QWQ(QR((//G%e4	$$?udK>eTJ<E4H	
  Ss   Br   r   	r   r   r   r   r   r   r   r   __classcell__r  s   @r   r   r     s    $9  r   r   c                   @    \ rS rSrS rS rS rS rS rS\	4S jr
S	rg
)GGUFPhi3Converteri  c                 T    [        U5      U l        U R                  U l        0 U l        g r   r   r   r   r   r   s     r   r   GGUFPhi3Converter.__init__  s"    *>:
"&**!#r   c                 T    [        [        UR                  UR                  5      5      $ r   r   r   s     r   r   GGUFPhi3Converter.vocab  r   r   c                     UR                   $ r   r   r   s     r   rb   GGUFPhi3Converter.merges  r   r   c                    U R                  U R                  5      nU R                  U R                  5      n[        U5       VVVs0 s H
  u  nu  pVXT_M     nnnn[	        [        Xs5      5      nUR                  [        SSSSSS9[        SSSS9[        SSSSS9[        S	SSSS9[        S
SSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9[        SSSSS9/5        UR                  b  UR                  UR                     OS U R                  S'   UR                  b  UR                  UR                     OS U R                  S'   UR                  b  UR                  UR                     OS U R                  S'   UR                  b  UR                  UR                     OS U R                  S'   U$ s  snnnf )N</s>TF)rstriplstripr   r   r  r   z<|assistant|>)r  r   r   z<|placeholder1|>z<|placeholder2|>z<|placeholder3|>z<|placeholder4|>z
<|system|>z<|end|>z<|placeholder5|>z<|placeholder6|>z<|user|>r   r   r   	pad_token)r   r   rb   r   r   r	   r   r   r8   r_   r   r7   r6   r9   )	r   r   r   rb   r   r   r   r   r5   s	            r   r5   GGUFPhi3Converter.tokenizer  s   zz$**-TZZ(6?6MN6M!2NTTW6M	Nc)45	$$6$uX\]?udK?4ESWX-duVZ[-duVZ[-duVZ[-duVZ[<PTU9TeTR-duVZ[-duVZ[:dudS	
$ 160B0B0NELL++,TX 	{+ 160B0B0NELL++,TX 	{+ 160B0B0NELL++,TX 	{+ 160B0B0NELL++,TX 	{+ E Os   Gc                     [         R                  " 5       [         R                  " 5       [         R                  " US5      /nU(       a  U[         R                  " SSS9/-  n[         R
                  " U5      $ )Nr   rh   r   )r   r   r   r   r   r   r   s       r   r   GGUFPhi3Converter.decoder  s\    !!#MMO[#.
 !<==H  **r   r   c                     U R                  U R                  5      nSnSn[        U R                  S5      (       a  U R                  R                  nU R                  X#5      Ul        U$ )Nr   Trc   )r5   r   r   r   rc   r   )r   r5   r   rc   s       r   r   GGUFPhi3Converter.converted
  s\    NN4::.	4**,>??#66GG LLG	r   r   r   r   N)r   r   r   r   r   r   rb   r5   r   r   r   r   r   r   r   r  r    s(    $
5%N	+
9 
r   r  c                   6   ^  \ rS rSrS rS\4U 4S jjrSrU =r$ )GGUFGPTConverteri  c                 2    [        U5      U l        0 U l        g r   r   r   s     r   r   GGUFGPTConverter.__init__  r   r   r   c                    > [        U R                  R                  5       VVs0 s H  u  pX!_M	     nnnU R                  R                  n[        TU ]  X45      nU$ s  snnf r   )r   r   r_   rb   r  r   r  s         r   r   GGUFGPTConverter.converted  sZ    (1$2I2I2P2P(QR(QWQ(QR((//G%e4	 Ss   Ar  r  r
  s   @r   r   r     s    $9  r   r   c                   :    \ rS rSrS rS rS rS rS\4S jr	Sr
g	)
GGUFT5Converteri#  c                     S/US'   [        U5      U l        [        U R                  R                  5       VVs0 s H  u  p#X2_M	     snnU l        U R                  U l        0 U l        g s  snnf Nz
dummy textrb   )r   r   r   r_   token2idr   r   )r   r   r   r   s       r   r   GGUFT5Converter.__init__$  s_    $0>x *>:
*3DJJ4E4E*FG*F$!*FG"&**!# Hs   A)c                 T    [        [        UR                  UR                  5      5      $ r   r   r   s     r   r   GGUFT5Converter.vocab-  r   r   c                    [        U R                  SS5      (       ae  / n[        U R                  SS5      (       a  U[        R                  " SS9/-  nU[        R                  " SSS9/-  n[        R
                  " U5      $ g )Nr   Trc   r   )prependr   )patternr   )r   r   r   Prependr   r   )r   r   r   s      r   r   GGUFT5Converter.normalizer0  sx    4**Hd;;Ht..0BDII[00?@@,,S%HIIH''11r   c                 V    [         R                  " SS// SQSU R                  S   4/S9$ )N$Ar  )r3  r  z$Br  )singlepairr   )r   TemplateProcessingr)  )r   s    r   r   GGUFT5Converter.post_processor9  s5    ,,&>-v./
 	
r   r   c                    U R                  U R                  5      n[        [        UU R                  R                  SS95      nU R                  U R                  5      nUb  X2l        SnSn[        U R                  S5      (       a  U R                  R                  nU R                  XE5      nUb  Xbl	        U R                  XE5      Ul
        U R                  5       nU(       a  Xrl        U$ )NFunk_idr   r   Trc   )r   r   r   r
   r8   r   r   r   rc   r   r   r   )r   r   r5   r   r   rc   r   r   s           r   r   GGUFT5Converter.convertedB  s    zz$**-zz..#
	 __TZZ0
!#- 4**,>??#66GG**;I$&3# LLG	,,.'5$r   )r   r   r   r)  N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r&  r&  #  s"    $5
9 r   r&  c                   :    \ rS rSrS rS rS rS rS\4S jr	Sr
g	)
GGUFGemmaConverterib  c                 `    S/US'   [        U5      U l        U R                  U l        0 U l        g r(  r  r   s     r   r   GGUFGemmaConverter.__init__c  s.    $0>x *>:
"&**!#r   c                 V   [        [        UR                  UR                  5      5      n/ nU Hw  u  pEUS:X  a  UR	                  SU45        M   SU;   a?  [        UR                  5       5      S:X  a"  S[        U5      -  nUR	                  Xe45        Me  UR	                  XE45        My     U$ )Nz<0x09>	r   r   r   )ru   r   r_   r`   r   rv   strip)r   r   original_vocabupdated_vocabtokenscoreunderscoress          r   r   GGUFGemmaConverter.vocabk  s    c%,,=>*LE $$dE]3#ekkm"4"9#c%j0$$k%9:$$e^4 + r   c                 0    [         R                  " SS5      $ )Nr   r   )r   r   r   s     r   r   GGUFGemmaConverter.normalizerz  s    ""3..r   c                     [         R                  " SS5      [         R                  " 5       [         R                  " 5       /nU(       a  U[         R                  " SSS9/-  n[         R
                  " U5      $ )Nr   r   rh   r   )r   r   r   r   r   r   r   s       r   r   GGUFGemmaConverter.decoder}  s\    UC(!!#MMO
 !<==H  **r   r   c                    U R                  U R                  5      n[        [        UU R                  R                  U R
                  S95      nU R                  U R                  5      nUb  X2l        SnSn[        U R                  S5      (       a  U R                  R                  nU R                  XE5      Ul
        U R                  XE5      nUb  Xbl        U$ )Nr9  r   Trc   )r   r   r   r
   r8   handle_byte_fallbackr   r   r   rc   r   r   )r   r   r5   r   r   rc   r   s          r   r   GGUFGemmaConverter.converted  s    zz$**-zz.."77
	 __TZZ0
!#- 4**,>??#66GG LLG	**;I$&3#r   r  N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r=  r=  b  s"    $/	+9 r   r=  )r   r.   	qwen2_moer>   r?   r4   rN   rP   rS   rD   rU   rY   r[   gemma3_textr   c                 ^    U n[         U   " U5      nUR                  5       nXCR                  4$ )a  
Utilities to convert a slow tokenizer instance in a fast tokenizer instance.

Args:
    architecture (`str`): The model architecture derived from gguf file.
    transformer_tokenizer ([`~tokenization_utils_base.PreTrainedTokenizer`]):
        Instance of a slow tokenizer to convert in the backend tokenizer for
        [`~tokenization_utils_base.PreTrainedTokenizerFast`].

Return:
    A instance of [`~tokenizers.Tokenizer`] to be used as the backend tokenizer of a
    [`~tokenization_utils_base.PreTrainedTokenizerFast`]
)GGUF_TO_FAST_CONVERTERSr   r   )r   r   tokenizer_class_name	converterfast_tokenizers        r   convert_gguf_tokenizerrW    s7     ('(<=nMI((*N6666r   )(__doc__r   numpyr   
tokenizersr   r   r   r   r   tokenizers.modelsr	   r
    r   convert_slow_tokenizerr   r   r   r   r   utilsr   utils.loggingr   
get_loggerr   r   GGUF_CONFIG_MAPPINGGGUF_TOKENIZER_MAPPINGr}   r   r   r   r  r   r&  r=  rS  rW  r   r   r   <module>rc     s   
   S S *  o o    
		H	%~$%~
 3*2) *& 5#8,:"~$ 3*2) *& 5#8,:"%~> 3*2) $& 5#8,:"?~V 3*2) $& 5#8,:"%2W~r 3*2) $& 5#8,:"s~J ++!/!/	K~V 3*2) $& 5#8,:"W~n  ) ("(<o~| 	'#%% & +#8(<,L":"}~V 3*2) $ 5#8(8"
W~l  !$4 ((<m~| *3)2 5#8(6}~N "3),@*(&.-
O~d 3*2) $& 5#8,6"e~| 3*2) $& !+ 5#8,:$4"}~\ 3*2) $& !+ 5#8,:$4"]~ D ''++!/!/!3 )"++!/!/ 00(6 (6Vv vr (H HV	} 	<k <~> >D  # "
  % $7I 7r   