o
    vZhm`                     @   s*  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZ d dlmZ ddlmZ dd	lmZmZmZ dd
lmZmZmZmZ ddlm Z m!Z! ddl"m#Z#m$Z$m%Z% ddl&m'Z' ddl(m)Z) erddl*m+Z+ e,e-Z.dZ/dZ0dZ1g dZ2ee3 e4d< ddddddddddde5dee5ef de5dee5 dee6 deeee5 e5f  d eeee5 e5f  d!ee3 d"e6d#e3fd$d%Z7G d&d' d'e j8Z9eeef Z:G d(d) d)Z;d*e;ddde5de5de5f
d+d,Z<d*e;d-eee9ee: f  fd.d/Z=d0e:d-dfd1d2Z>d3ee: ddde5de5de5d-dfd4d5Z?d0e:ddde5de5de5d-dfd6d7Z@d3ee: ddde5de5de5d-dfd8d9ZAG d:d; d;eZBd0e:d-eBfd<d=ZCd>d?d-ee: fd@dAZDd>d?dBe3d-ee: fdCdDZEd>d?d-ee: fdEdFZFdGe5d-dfdHdIZGdS )J    N)datetime)Path)Lock)TYPE_CHECKINGListOptionalTupleUnion)quote   )	constants)CommitOperationAdd
UploadInfo_fetch_upload_modes)LocalUploadFileMetadataLocalUploadFilePathsget_local_upload_pathsread_upload_metadata)DEFAULT_REVISION
REPO_TYPES)DEFAULT_IGNORE_PATTERNSfilter_repo_objectstqdm)_format_size)sha_fileobj)HfApi
   K      )
   2   r   d   }         i  iX  i  COMMIT_SIZE_SCALET<   )revisionprivateallow_patternsignore_patternsnum_workersprint_reportprint_report_everyapir   repo_idfolder_path	repo_typer'   r(   r)   r*   r+   r,   r-   c                   s  du rt dtvrt dt du rtt   s-t d d|du r4g }nt|tr<|g}|t	7 }|du rQt
 pId}t|d d} j|dd	}td
|  |jtfdddD ||d}fdd|D }tdt| d fddt|ddD }t| fddt|D }|D ]}|  q|	rtd   t }	 td t | |
kr|	rt  t } rtd nq|D ]}|  qt  td dS )zUpload a large folder to the Hub in the most resilient way possible.

    See [`HfApi.upload_large_folder`] for the full documentation.
    NzFor large uploads, `repo_type` is explicitly required. Please set it to `model`, `dataset` or `space`. If you are using the CLI, pass it as `--repo-type=model`.z"Invalid repo type, must be one of zProvided path: 'z' is not a directoryr      T)r/   r1   r(   exist_okzRepo created: c                 3   s&    | ]}|  r|  V  qd S N)is_filerelative_toas_posix).0pathr0    S/var/www/auris/lib/python3.10/site-packages/huggingface_hub/_upload_large_folder.py	<genexpr>d   s   $ z/upload_large_folder_internal.<locals>.<genexpr>z**/*)r)   r*   c                    s   g | ]}t  |qS r;   )r   )r8   relpathr:   r;   r<   
<listcomp>h   s    z0upload_large_folder_internal.<locals>.<listcomp>zFound z candidate files to uploadc                    s   g | ]
}|t  |jfqS r;   )r   path_in_repo)r8   pathsr:   r;   r<   r?   l   s    zRecovering from metadata files)Zdescc              
      s&   g | ]}t jt d dqS ))statusr.   r/   r1   r'   )targetkwargs)	threadingThread_worker_jobr8   _)r.   r/   r1   r'   rB   r;   r<   r?   s   s    z

zIs done: exiting main loopzUpload is complete!) 
ValueErrorr   r   r   
expanduserresolveis_dir
isinstancestrr   os	cpu_countmaxZcreate_repologgerinfor/   r   globlenr   LargeUploadStatusrangestartprintcurrent_reporttimesleep_print_overwriteis_doneloggingjoin)r.   r/   r0   r1   r'   r(   r)   r*   r+   r,   r-   Znb_coresrepo_urlZfiltered_paths_listZ
paths_listitemsthreadsthreadZlast_report_tsr;   )r.   r0   r/   r1   r'   rB   r<   upload_large_folder_internal2   sn   







rf   c                   @   s4   e Zd Ze Ze Ze Ze Ze Z	dS )	WorkerJobN)
__name__
__module____qualname__enumautoSHA256GET_UPLOAD_MODEPREUPLOAD_LFSCOMMITWAITr;   r;   r;   r<   rg      s    rg   c                   @   sf   e Zd ZdZdee fddZdefddZde	d	ed
e
ddfddZdefddZde	fddZdS )rW   zBContains information, queues and tasks for a large upload process.rc   c                 C   s   || _ t | _t | _t | _t | _t | _d| _	d| _
d| _d| _d| _d | _t | _d| _t | _| j D ]A}|\}}|jd u rN| j| q<|jd u rZ| j| q<|jdkri|jsi| j| q<|jss| j| q<td|j d q<d S )Nr   r   lfszSkipping file z! (already uploaded and committed))rc   queueQueuequeue_sha256queue_get_upload_modequeue_preupload_lfsqueue_commitr   locknb_workers_sha256nb_workers_get_upload_modenb_workers_preupload_lfsnb_workers_commitnb_workers_waitinglast_commit_attemptr   now_started_at
_chunk_idx_chunk_locksha256putupload_modeis_uploadedis_committedrS   debugr@   )selfrc   itemrA   metadatar;   r;   r<   __init__   s6   







zLargeUploadStatus.__init__returnc                 C   s4   | j  t| j W  d    S 1 sw   Y  d S r4   )r   r%   r   r   r;   r;   r<   target_chunk   s   $zLargeUploadStatus.target_chunksuccessnb_itemsdurationNc                 C   s   | j E |std| d |  jd8  _n|t| j kr2|dk r2td| d |  jd7  _tdt| jttd | _W d    d S 1 sKw   Y  d S )NzFailed to commit z9 files at once. Will retry with less files in next batch.r   (   zSuccessfully committed z. at once. Increasing the limit for next batch.r   )	r   rS   warningr   r%   rT   rR   minrV   )r   r   r   r   r;   r;   r<   update_chunk   s   "zLargeUploadStatus.update_chunkc                 C   sd  d}d}d}d}d}d}d}d}d}	d}
d}| j  | jD ]M\}}|jr*|
d7 }
q|	|j7 }	|d7 }|jdurA|d7 }||j7 }|jdkrJ|d7 }|jdu rS|d7 }|jr_|d7 }||j7 }|jrk|d7 }||j7 }qt|	}t	
 }|d}|| j }t|dd }d}|d| d	| d
7 }|d7 }|d7 }|d| d| d	t| d| d	7 }|d| d| d	t| d| d	7 }|dkr|d| d7 }|d| d| d	t| d| d	7 }|d|
 d7 }|d7 }|d| j d7 }|d| j d7 }|d| j d7 }|d| j d7 }|d| j d7 }|d7 }|W  d   S 1 s+w   Y  dS )z<Generate a report of the current status of the large upload.r   r   Nrr   z%Y-%m-%d %H:%M:%S.z
---------- z (z) z----------
z	Files:   zhashed /z) | zpre-uploaded: )z (+z unsure)z | committed: z | ignored: 
z	Workers: z	hashing: z | zget upload mode: zpre-uploading: zcommitting: z	waiting: z3---------------------------------------------------)ry   rc   should_ignoresizer   r   r   r   r   r   r   strftimer   rO   splitrz   r{   r|   r}   r~   )r   Z	nb_hashedZsize_hashedZnb_preuploadednb_lfsZnb_lfs_unsureZsize_preuploadedZnb_committedZsize_committed
total_sizeZignored_filesZtotal_filesrI   r   Ztotal_size_strr   Znow_strelapsedZelapsed_strmessager;   r;   r<   r[      sp   









&&&&z LargeUploadStatus.current_reportc                 C   s>   | j  tdd | jD W  d    S 1 sw   Y  d S )Nc                 s        | ]\}}|j p|jV  qd S r4   r   r   r8   rI   r   r;   r;   r<   r=         z,LargeUploadStatus.is_done.<locals>.<genexpr>)ry   allrc   r   r;   r;   r<   r_     s   $zLargeUploadStatus.is_done)rh   ri   rj   __doc__r   
JOB_ITEM_Tr   intr   boolfloatr   rO   r[   r_   r;   r;   r;   r<   rW      s    !?rW   rB   c              
   C   s  	 d}t | }|du rdS |\}}|tjkrn|d }zt| | j| W n+ ty.     tyQ }	 zt	d|	  t
  | j| W Y d}	~	nd}	~	ww | j |  jd8  _W d   n1 sgw   Y  n|tjkrzt|||||d W n% ty     ty }	 zt	d|	  t
  W Y d}	~	nd}	~	ww |D ](}|\}
}|jrq|jdkr| j| q|jd	kr| j| q| j| q| j |  jd8  _W d   n1 sw   Y  n|tjkrP|d }zt|||||d | j| W n- ty     ty3 }	 zt	d
|	  t
  | j| W Y d}	~	nd}	~	ww | j |  jd8  _W d   n	1 sJw   Y  n|tjkrt }d}zt|||||d W n5 typ     ty }	 z t	d|	  t
  |D ]	}| j| qd}W Y d}	~	nd}	~	ww t | }| |t|| | j t | _|  jd8  _W d   n	1 sw   Y  n&|tj krt!t" | j |  j#d8  _#W d   n	1 sw   Y  q)a  
    Main process for a worker. The worker will perform tasks based on the priority list until all files are uploaded
    and committed. If no tasks are available, the worker will wait for 10 seconds before checking again.

    If a task fails for any reason, the item(s) are put back in the queue for another worker to pick up.

    Read `upload_large_folder` docstring for more information on how tasks are prioritized.
    TNr   zFailed to compute sha256: r   )r.   r/   r1   r'   zFailed to get upload mode: rr   ZregularzFailed to preupload LFS: zFailed to commit: F)$_determine_next_jobrg   rm   _compute_sha256rv   r   KeyboardInterrupt	ExceptionrS   error	traceback
format_excru   ry   rz   rn   _get_upload_moder   r   rw   rx   r{   ro   _preupload_lfsr|   rp   r\   _commitr   rV   r   r}   rq   r]   WAITING_TIME_IF_NO_TASKSr~   )rB   r.   r/   r1   r'   Znext_jobZjobrc   r   erI   r   Zstart_tsr   r   r;   r;   r<   rG      s   

	



rG   r   c                 C   s  | j 1 | jdkr:| j dkr:| jd ur:t | j dkr:|  jd7  _td tj	t
| jfW  d    S | jdkra| j dkra|  jd7  _td tj	t
| jfW  d    S | j dkr|  jd7  _td tjt| j|  fW  d    S | j dkr| jdkr|  jd7  _td	 tjt| jfW  d    S | j dkr| jdkr|  jd7  _td
 tjt| jfW  d    S | j dkr| jdkr|  jd7  _td tjt| j|  fW  d    S | j dkr+| jdkstjs+|  jd7  _td tjt| jfW  d    S | j dkrN|  jd7  _td tjt| jfW  d    S | j dkrt|  jd7  _td tjt| j|  fW  d    S | jdkr| j dkr| jd urt | j dkr|  jd7  _td tj	t
| jfW  d    S | jdkr | j dkr | j dkr | j dkr | j dkr | jdkr | jdkr | jdkr |  jd7  _td tj	t
| jfW  d    S tdd | jD rtd 	 W d    d S |  jd7  _tdt d tjg fW  d    S 1 s9w   Y  d S )Nr   i,  r   z;Job: commit (more than 5 minutes since last commit attempt)r   zJob: commit (>100 files ready)r   z&Job: get upload mode (>10 files ready)z5Job: preupload LFS (no other worker preuploading LFS)z.Job: sha256 (no other worker computing sha256)z:Job: get upload mode (no other worker getting upload mode)zJob: preupload LFSzJob: sha256zJob: get upload moder&   z-Job: commit (1 min since last commit attempt)zJob: commitc                 s   r   r4   r   r   r;   r;   r<   r=     r   z&_determine_next_job.<locals>.<genexpr>z.All files have been processed! Exiting worker.zNo task available, waiting... (zs))ry   r}   rx   qsizer   r\   rS   r   rg   rp   _get_items_to_commitrv   r{   rn   _get_nr   rw   r|   ro   _get_oneru   rz   rm   r   ZHF_HUB_ENABLE_HF_TRANSFERr   rc   rT   r~   r   rq   )rB   r;   r;   r<   r     s   







%
,
4
:
A
M
[
a&r   r   c                 C   sZ   | \}}|j du r&|jd}t| |_ W d   n1 s!w   Y  || dS )z1Compute sha256 of a file and save it in metadata.Nrb)r   	file_pathopenr   hexsave)r   rA   r   fr;   r;   r<   r     s   
r   rc   c           
   	   C   sf   dd | D }t |||| t|ddd t| |D ]\}}|\}}	|j|	_|j|	_|	| qdS )zmGet upload mode for each file and update metadata.

    Also receive info if the file should be ignored.
    c                 S      g | ]}t |qS r;   _build_hacky_operationr8   r   r;   r;   r<   r?         z$_get_upload_mode.<locals>.<listcomp> )safe)	additionsr1   r/   headersr'   N)	r   Z_build_hf_headersr
   zipZ_upload_moder   Z_should_ignorer   r   )
rc   r.   r/   r1   r'   r   r   additionrA   r   r;   r;   r<   r     s   
r   c                 C   s8   | \}}t | }|j||||gd d|_|| dS )z'Preupload LFS file and update metadata.)r/   r1   r'   r   TN)r   Zpreupload_lfs_filesr   r   )r   r.   r/   r1   r'   rA   r   r   r;   r;   r<   r     s   r   c                 C   sD   dd | D }|j ||||dd | D ]\}}d|_|| qdS )zCommit files to the repo.c                 S   r   r;   r   r   r;   r;   r<   r?   &  r   z_commit.<locals>.<listcomp>z(Add files using upload-large-folder tool)r/   r1   r'   
operationsZcommit_messageTN)Zcreate_commitr   r   )rc   r.   r/   r1   r'   r   rA   r   r;   r;   r<   r   $  s   r   c                   @   s   e Zd ZdddZdS )HackyCommitOperationAddr   Nc                 C   s    t | jtrt| j| _d S d S r4   )rN   path_or_fileobjr   rO   r   r;   r;   r<   __post_init__9  s   z%HackyCommitOperationAdd.__post_init__)r   N)rh   ri   rj   r   r;   r;   r;   r<   r   8  s    r   c                 C   s   | \}}t |j|jd}|jd}|dd d }W d    n1 s&w   Y  |jd u r4tdtt	|j|j
|d|_|S )N)r@   r   r   i   z&sha256 must have been computed by now!)r   r   sample)r   r@   r   r   peekr   rJ   r   bytesfromhexr   Zupload_info)r   rA   r   Z	operationfiler   r;   r;   r<   r   >  s   
r   rs   zqueue.Queue[JOB_ITEM_T]c                 C   s
   |   gS r4   getrs   r;   r;   r<   r   N  s   
r   nc                    s     fddt t  |D S )Nc                    s   g | ]}   qS r;   r   rH   r   r;   r<   r?   S  r   z_get_n.<locals>.<listcomp>)rX   r   r   )rs   r   r;   r   r<   r   R  s    r   c                 C   sj   g }d\}}	 |   dkr|S |tks|tkr|S |  }|| |\}}|jdkr0|d7 }n|d7 }q)zXSpecial case for commit job: the number of items to commit depends on the type of files.)r   r   Tr   rr   r   )r   MAX_NB_LFS_FILES_PER_COMMITMAX_NB_REGULAR_FILES_PER_COMMITr   appendr   )rs   rc   r   Z
nb_regularr   rI   r   r;   r;   r<   r   V  s   


r   reportc                    s   | d7 } t  j t fdd|  D }t|D ]}tjd tjd qtj|  tjd t	|  d    tj
  dS )	zPrint a report, overwriting the previous lines.

    Since tqdm in using `sys.stderr` to (re-)write progress bars, we need to use `sys.stdout`
    to print the report.

    Note: works well only if no other process is writing to `sys.stdout`!
    r   c                 3   s     | ]}t |  d  V  qdS )r   N)rV   )r8   lineterminal_widthr;   r<   r=   {  r   z#_print_overwrite.<locals>.<genexpr>z[Kz[Fr   N)shutilget_terminal_sizecolumnssum
splitlinesrX   sysstdoutwriterV   flush)r   Znb_linesrI   r;   r   r<   r^   n  s   
 r^   )Hrk   r`   rP   rs   r   r   rE   r\   r   r   pathlibr   r   typingr   r   r   r   r	   urllib.parser
   r   r   Z_commit_apir   r   r   Z_local_folderr   r   r   r   r   r   utilsr   r   r   Zutils._cache_managerr   Z	utils.shar   Zhf_apir   	getLoggerrh   rS   r   r   r   r%   r   __annotations__rO   r   rf   Enumrg   r   rW   rG   r   r   r   r   r   r   r   r   r   r   r^   r;   r;   r;   r<   <module>   s   

	
	

lw
"ll&	"&