o
    rZh2                     @   s   d dl Z d dlmZ d dlmZ d dlmZ eddgdZe jede 	 d	d
 Z
e
de jddddde jdddddde jddddde jddddde jdddd dd!d" ZdS )#    N)tqdm)word_tokenize)parallelize_preprocessz-hz--help)Zhelp_option_names)Zcontext_settingsc                   C   s   d S )N r   r   r   7/var/www/auris/lib/python3.10/site-packages/nltk/cli.pycli   s   r   tokenizez
--languagez-lenz1The language for the Punkt sentence tokenization.)defaulthelpz--preserve-lineTzIAn option to keep the preserve the sentence and not sentence tokenize it.)r
   Zis_flagr   z--processesz-j   zNo. of processes.z
--encodingz-eutf8zSpecify encoding of file.z--delimiterz-d z%Specify delimiter to join the tokens.c           	   	   C   s   t jd|dY}t jd|d9}|dkr*t| D ]}t|t|d|d qntt| |ddD ]}t||d|d q4W d	   n1 sKw   Y  W d	   d	S W d	   d	S 1 scw   Y  d	S )
z;This command tokenizes text stream using nltk.word_tokenizestdin)encodingstdoutr   
)endfileT)progress_barN)clickZget_text_streamr   	readlinesprintjoinr   r   )	languageZpreserve_lineZ	processesr   	delimiterZfinZfoutlineoutliner   r   r   tokenize_file   s   
"r   )r   r   Znltkr   Z	nltk.utilr   dictZCONTEXT_SETTINGSgroupZversion_optionr   commandoptionr   r   r   r   r   <module>   s8   
