o
    ZhT                     @   s&  d Z ddlZddlmZ ddlmZmZ ddlZddlm	Z
 ddl	ZddlmZ ddlmZmZ ddlZddlmZmZ dd	lmZmZ e rKddlZe rZdd
lmZ ddlmZ eeZ 	dddZ!dee" dej#dee"e
j#f de"dee" ej#ff
ddZ$dd Z%dd Z&dd Z'dd Z(dS )z!PyTorch - Flax general utilities.    N)UnpicklingError)DictTuple)
from_bytes)flatten_dictunflatten_dict   )is_safetensors_availableis_torch_available)check_torch_load_is_safelogging)	safe_open)	load_fileFc           
   	   C   s   |swt j|}td|  |dr<i }t|dd}| D ]	}||||< q"W d   n1 s6w   Y  n4zddl	}W n t
tfyQ   td  w t  |j|dd	d
}tdtdd | D dd t|| }	|	S t|| }	|	S )z(Load pytorch checkpoints in a flax modelzLoading PyTorch weights from .safetensorsZflax)Z	frameworkNr   zLoading a PyTorch model in Flax, requires both PyTorch and Flax to be installed. Please see https://pytorch.org/ and https://flax.readthedocs.io/en/latest/index.html#installation for installation instructions.cpuT)Zmap_locationweights_onlyzPyTorch checkpoint contains c                 s   s    | ]}|  V  qd S N)Znumel).0t r   W/var/www/auris/lib/python3.10/site-packages/transformers/modeling_flax_pytorch_utils.py	<genexpr>L   s    z=load_pytorch_checkpoint_in_flax_state_dict.<locals>.<genexpr>,z parameters.)ospathabspathloggerinfoendswithr   keysZ
get_tensortorchImportErrorModuleNotFoundErrorerrorr   loadsumvalues"convert_pytorch_state_dict_to_flax*convert_pytorch_sharded_state_dict_to_flax)

flax_modelZpytorch_checkpoint_pathZ
is_shardedZallow_missing_keysZpt_pathpt_state_dictfkr    flax_state_dictr   r   r   *load_pytorch_checkpoint_in_flax_state_dict1   s4   
&

r.   pt_tuple_key	pt_tensorrandom_flax_state_dictmodel_prefixreturnc                    s  dt t dtf fdd}| dd d }| d dv r$||r$||fS | dd d	 }| d d
kr:|| s:||fS | dd d }| d dkrP|| sP||fS | dd d }| d dkrf||rf||fS | dd d }| d dkr|jdkr|| s|dddd}||fS | dd d }| d dkr|| s|j}||fS | dd d }| d dkr||fS | dd d }| d dkr||fS d}| ddd dkr| d d }n| ddd dkr| d d }|dur| dd |f }||fS | |fS )zYRename PT weight names to corresponding Flax weight names and reshape tensor if necessarykeyr3   c                    s   t t|  f|  h@ dkS )zAChecks if `key` of `(prefix,) + key` is in random_flax_state_dictr   )lenset)r4   r2   r1   r   r   is_key_or_prefix_key_in_dict]   s   zCrename_key_and_reshape_tensor.<locals>.is_key_or_prefix_key_in_dictN)scale)weightgamma)meanrunning_mean)varrunning_var)	embeddingr;   )kernel         r   r   r;   r<   )Zbiasbeta)parametrizations	original0_g)rI   	original1_v)r   strboolndim	transposeT)r/   r0   r1   r2   r8   Zrenamed_pt_tuple_keynamer   r7   r   rename_key_and_reshape_tensorU   sH   rU   c              	   C   sf  t  ottt|  tj}|rtjnd}dd |  D }|r;|  D ]\}}|j	|kr2|
 }|  | |< q%|j}d|jv rI|jd }n|j}t|}	d|jv rat|jd }
|	|
 i }||voq|dd |  D v }||v o|dd |  D v}|  D ]\}}t|d	}|| |k}|d
 |k}|r|r|dd  }t|||	|\}}|f| |	v }|r|r|f| }||	v r|j|	| jkrtd| d|	| j d|j d	d|jv rd|d v sd|d v rt||d| < qd|d v r||d  q|st|ntj|tjd|d| < q|s$t|ntj|tjd||< qt|S )Nbfloat16c                 S      i | ]\}}||j qS r   dtyper   r,   vr   r   r   
<dictcomp>       z6convert_pytorch_state_dict_to_flax.<locals>.<dictcomp>paramsbatch_statsc                 S      h | ]	}| d d qS .r   splitr   r,   r   r   r   	<setcomp>       z5convert_pytorch_state_dict_to_flax.<locals>.<setcomp>c                 S   r`   ra   rc   re   r   r   r   rf      rg   rb   r   r   1PyTorch checkpoint seems to be incorrect. Weight  was expected to be of shape 	, but is r=   r9   r?   r_   num_batches_trackedrX   r^   )r
   
isinstancenextiterr&   r    ZTensorrV   itemsrY   floatr   numpybase_model_prefixr^   r   updater   tuplerd   rU   shape
ValueErrorjnpasarraypopr   )r*   r)   Zfrom_binrV   weight_dtypesr,   r[   r2   flax_model_paramsr1   Zflax_batch_statsr-   $load_model_with_head_into_base_model$load_base_model_into_model_with_headpt_keyr0   r/   is_bfloat_16has_base_model_prefixflax_keyflax_tensorrequire_base_model_prefixr   r   r   r'      sp   




r'   c              
      sN  dd l  i }| D ]}t   j|dd}dd | D } fdd| D }|j}d|jv rE|jd }t|}|t|jd  n|j}t|}||voZ|d	d
 | D v }	||v oi|dd
 | D v}
| D ]\}}t	|
d}||  jk}|d |k}|	r|r|dd  }t||||\}}|f| |v }|
r|r|f| }||v r|j|| jkrtd| d|| j d|j dd|jv rd|d v rt||d| < qnd|d v rt||d| < qnd|d v r||d  qn|st|ntj|tjd|d| < qn|st|ntj|tjd||< qnqt|S )Nr   T)r   c                 S   rW   r   rX   rZ   r   r   r   r\      r]   z>convert_pytorch_sharded_state_dict_to_flax.<locals>.<dictcomp>c                    s2   i | ]\}}||j  jkr| n|  qS r   )rY   rV   rs   rr   rZ   r    r   r   r\      s    &r_   r^   c                 S   r`   ra   rc   re   r   r   r   rf     rg   z=convert_pytorch_sharded_state_dict_to_flax.<locals>.<setcomp>c                 S   r`   ra   rc   re   r   r   r   rf     rg   rb   r   rh   ri   rj   r=   r9   rk   r?   rl   rX   rm   )r    r   r$   rq   rt   r^   r   ru   r   rv   rd   rV   rU   rw   rx   ry   rz   r{   r   )Zshard_filenamesr)   r-   Z
shard_filer*   r|   r2   r}   r1   r~   r   r   r0   r/   r   r   r   r   r   r   r   r   r(      sv   




0r(   c              	   C   s   t j|}td|  ttd| jj }|	dr't
|}t|dd}n.t|d!}z	t|| }W n tyE   td| dw W d	   n1 sPw   Y  t| |S )
(Load flax checkpoints in a PyTorch modelzLoading Flax weights from ZFlaxr   rb   )seprbzUnable to convert z  to Flax deserializable object. N)r   r   r   r   r   getattrtransformers	__class____name__r   safe_load_filer   openr   readr   EnvironmentError"load_flax_weights_in_pytorch_model)modelZflax_checkpoint_pathZflax_clsr-   Zstate_fr   r   r   %load_flax_checkpoint_in_pytorch_modelM  s   

r   c                 C   s  zddl }W n ttfy   td  w ttjdd |	 }t
|r5td tjdd |}t|}|  }| j|v oM| jdd	 | D v}| j|vo^| jd
d	 | D v }g }t| }	| D ]9\}
}|
d | jk}d| jf|
 |v }|r|r|
dd }
n
|r|r| jf|
 }
|
d dkr|jdkrd|
|vr|
dd d }
t|d}nE|
d dkrd|
|vr|
dd d }
|j}n,|
d dv r|
dd d }
nd|
d v r|
dd d }
nd|
d v r|
dd d }
d|v rd|
dd }nd|
}i }|D ]F}|d}d}|ddd ddgkr1|d d }n|ddd ddgkrC|d d }|durZ|dd |g }d|}|||< q||v re|| }||v r|j|| jkrtd |
 d!|| j d"|j dt|tjst|n|}||||< |	| qk|| qk|  | t!|	}	t"|dkrtd#| j#j$ d$| d%| j#j$ d&| j#j$ d'	 ntd(| j#j$ d) t"|	dkrtd*| j#j$ d+|	 d, | S td-| j#j$ d.| j#j$ d/ | S )0r   r   NzLoading a Flax weights in PyTorch, requires both PyTorch and Flax to be installed. Please see https://pytorch.org/ and https://flax.readthedocs.io/en/latest/index.html#installation for installation instructions.c                 S   s   | j tjkS r   )rY   ry   rV   )xr   r   r   <lambda>q  s    z4load_flax_weights_in_pytorch_model.<locals>.<lambda>zFound ``bfloat16`` weights in Flax model. Casting all ``bfloat16`` weights to ``float32`` before loading those in PyTorch model.c                 S   s   | j tjkr| tjS | S r   )rY   ry   rV   ZastypenpZfloat32rm   r   r   r   r   z  s    c                 S   r`   ra   rc   re   r   r   r   rf     rg   z5load_flax_weights_in_pytorch_model.<locals>.<setcomp>c                 S   r`   ra   rc   re   r   r   r   rf     rg   rb   r   r9   rB   rC   rF   )rE   rD   r   r   )r:   rA   r=   )r>   r?   )r@   r_   rH   rD   rI   rJ   rK   rL   rM   rN   z.Flax checkpoint seems to be incorrect. Weight ri   rj   zQSome weights of the Flax model were not used when initializing the PyTorch model z: z,
- This IS expected if you are initializing z from a Flax model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a FlaxBertForPreTraining model).
- This IS NOT expected if you are initializing z from a Flax model that you expect to be exactly identical (e.g. initializing a BertForSequenceClassification model from a FlaxBertForSequenceClassification model).z3All Flax model weights were used when initializing z.
zSome weights of zE were not initialized from the Flax model and are newly initialized: zo
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.zAll the weights of z were initialized from the Flax model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use z* for predictions without further training.)%r    r!   r"   r   r#   r   jaxZ	tree_utilZtree_mapr&   anywarningZ
state_dictrt   r   r6   rq   joinrQ   ry   rR   rS   rd   rw   rx   rn   r   ndarrayrz   Z
from_numpyremoveappendZload_state_dictlistr5   r   r   )Zpt_modelZ
flax_stater    Zis_type_bf16r-   Zpt_model_dictr~   r   Zunexpected_keysZmissing_keysZflax_key_tupler   r   r   r   Zspecial_pt_namesr4   Zkey_componentsrT   Zkey_to_checkr   r   r   r   c  s   	

$








r   )F))__doc__r   pickler   typingr   r   r   Z	jax.numpyrs   ry   r   Zflax.serializationr   Zflax.traverse_utilr   r   r    r	   r
   utilsr   r   r    Zsafetensorsr   Zsafetensors.flaxr   r   Z
get_loggerr   r   r.   rO   r   rU   r'   r(   r   r   r   r   r   r   <module>   sF   
	
$
C\Y