o
    rZh-                     @   s   d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZ dd Zd	d
 ZG dd dZG dd deZG dd deZeddZeddZeddZG dd deZG dd deeZdS )    )
namedtuple)partialwraps)CategorizedCorpusReader)PlaintextCorpusReader)concatread_blankline_block)blankline_tokenizesent_tokenizeword_tokenizec                    s   t   fdd}|S )z
    A decorator that allows a function to be called with
    a single string of comma-separated values which become
    individual function arguments.
    c                     s   t  }| D ]'}t|tr|dd |dD  qt|t r'|t| q|| q| D ]\}}t|trFdd |dD ||< q1 |i |S )Nc                 S      h | ]}|  qS  strip.0partr   r   J/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/markdown.py	<setcomp>       z?comma_separated_string_args.<locals>.wrapper.<locals>.<setcomp>,c                 S   r   r   r   r   r   r   r   r      r   )list
isinstancestrappendsplitsetitems)argskwargs_argsargnamevaluefuncr   r   wrapper   s   


z,comma_separated_string_args.<locals>.wrapper)r   )r%   r&   r   r$   r   comma_separated_string_args
   s   r'   c                 C   s    t | }|r||d gS |S Nr   )r   render)streamparserblockr   r   r   read_parse_blankline_block#   s   r-   c                   @   sT   e Zd Zdd Zdd Zdd Zedd Zed	d
 Zedd Z	edd Z
dS )MarkdownBlockc                 C   s   || _ d| _d S )N   )contenttruncate_at)selfr0   r   r   r   __init__+   s   
zMarkdownBlock.__init__c                 C   s   | j j dtt|  dS )Nz	(content=))	__class____name__reprr   r2   r   r   r   __repr__/   s   zMarkdownBlock.__repr__c                 C   s0   | j d | j  t| j | jkrd S d S )Nz... )r0   r1   lenr8   r   r   r   __str__2   s   zMarkdownBlock.__str__c                 C   s   | j S Nr0   r8   r   r   r   raw8   s   zMarkdownBlock.rawc                 C   s
   t | jS r=   )r   r0   r8   r   r   r   words<      
zMarkdownBlock.wordsc                 C      dd t | jD S )Nc                 S      g | ]}t |qS r   r   r   sentr   r   r   
<listcomp>B   r   z'MarkdownBlock.sents.<locals>.<listcomp>)r
   r0   r8   r   r   r   sents@      zMarkdownBlock.sentsc                 C   rB   )Nc                 S   s   g | ]}d d t |D qS )c                 S   rC   r   rD   rE   r   r   r   rG   G   r   z2MarkdownBlock.paras.<locals>.<listcomp>.<listcomp>)r
   r   parar   r   r   rG   F       z'MarkdownBlock.paras.<locals>.<listcomp>r	   r0   r8   r   r   r   parasD      zMarkdownBlock.parasN)r6   
__module____qualname__r3   r9   r<   propertyr?   r@   rH   rN   r   r   r   r   r.   *   s    


r.   c                       s@   e Zd Z fddZedd Zedd Zedd Z  ZS )		CodeBlockc                    s   || _ t j|  d S r=   )languagesuperr3   )r2   rT   r   r5   r   r   r3   M   s   zCodeBlock.__init__c                 C   s   dd | j  D S )Nc                 S   rC   r   rD   r   liner   r   r   rG   S   r   z#CodeBlock.sents.<locals>.<listcomp>r0   
splitlinesr8   r   r   r   rH   Q   rI   zCodeBlock.sentsc                 C   s
   | j  S r=   rY   r8   r   r   r   linesU   rA   zCodeBlock.linesc                 C   rB   )Nc                 S   s   g | ]}d d |  D qS )c                 S   rC   r   rD   rW   r   r   r   rG   \   r   z.CodeBlock.paras.<locals>.<listcomp>.<listcomp>)rZ   rJ   r   r   r   rG   [   rL   z#CodeBlock.paras.<locals>.<listcomp>rM   r8   r   r   r   rN   Y   rO   zCodeBlock.paras)	r6   rP   rQ   r3   rR   rH   r[   rN   __classcell__r   r   rV   r   rS   L   s    

rS   c                       s   e Zd Z fddZ  ZS )MarkdownSectionc                    s   || _ || _t j|  d S r=   )headinglevelrU   r3   )r2   r^   r_   r   rV   r   r   r3   b   s   zMarkdownSection.__init__)r6   rP   rQ   r3   r\   r   r   rV   r   r]   a   s    r]   Imagezlabel, src, titleLinkzlabel, href, titleListzis_ordered, itemsc                       s*   e Zd Zdd fdd
Zdd Z  ZS )MarkdownCorpusReaderNr+   c                   sz   ddl m} ddlm} ddlm} || _| jd u r'|d|d| _| j| |dt	t
| jd t j|i | d S )	Nr   )
MarkdownIt)RendererPlain)front_matter_pluginZ
commonmark)Zrenderer_clsZpara_block_readerrd   )Zmarkdown_itre   Zmdit_plain.rendererrf   Zmdit_py_plugins.front_matterrg   r+   Zuse
setdefaultr   r-   rU   r3   )r2   r+   r   r   re   rf   rg   rV   r   r   r3   n   s   
zMarkdownCorpusReader.__init__c                 C   s,   t  }| |D ]}|| j| q|S r=   )r   Z_para_block_readerextendZ_word_tokenizertokenize)r2   r*   r@   rK   r   r   r   _read_word_block~   s   z%MarkdownCorpusReader._read_word_block)r6   rP   rQ   r3   rk   r\   r   r   rV   r   rc   m   s    rc   c                       s0  e Zd ZdZddddZed1 fdd	Zed1 fd	d
	Zed2 fdd	Zed2 fdd	Z	ed2 fdd	Z
ed2 fdd	Zdd Zdd Zed2ddZdd Zed2ddZdd Zed2dd Zd!d" Zed2d#d$Zd%d& Zed2d'd(Zd)d* Zed2d+d,Zd-d. Zed2d/d0Z  ZS )3CategorizedMarkdownCorpusReadera  
    A reader for markdown corpora whose documents are divided into
    categories based on their file identifiers.

    Based on nltk.corpus.reader.plaintext.CategorizedPlaintextCorpusReader:
    https://www.nltk.org/_modules/nltk/corpus/reader/api.html#CategorizedCorpusReader
    tags)	cat_fieldc                   s   g d}t  fdd|D st  d< t|   tj| g|R i   | jdurF| jsH| jD ]}| |}|rE|d |g | j|< q1dS dS dS )a  
        Initialize the corpus reader. Categorization arguments
        (``cat_pattern``, ``cat_map``, and ``cat_file``) are passed to
        the ``CategorizedCorpusReader`` constructor.  The remaining arguments
        are passed to the ``MarkdownCorpusReader`` constructor.
        )Zcat_patterncat_mapZcat_filec                 3   s    | ]}| v V  qd S r=   r   )r   r!   r   r   r   	<genexpr>   s    z;CategorizedMarkdownCorpusReader.__init__.<locals>.<genexpr>ro   Nr   )	anydictr   r3   rc   _map_fileidsmetadataget)r2   rn   r   r   Zcat_argsZfile_idrv   r   rp   r   r3      s   


z(CategorizedMarkdownCorpusReader.__init__Nc                    s   t  |S r=   )rU   
categories)r2   fileidsrV   r   r   rx      s   z*CategorizedMarkdownCorpusReader.categoriesc                    s   |d u r| j S t |S r=   )ru   rU   ry   )r2   rx   rV   r   r   ry      s   z'CategorizedMarkdownCorpusReader.fileidsc                       t  | ||S r=   )rU   r?   _resolver2   ry   rx   rV   r   r   r?      rI   z#CategorizedMarkdownCorpusReader.rawc                    rz   r=   )rU   r@   r{   r|   rV   r   r   r@      rI   z%CategorizedMarkdownCorpusReader.wordsc                    rz   r=   )rU   rH   r{   r|   rV   r   r   rH      rI   z%CategorizedMarkdownCorpusReader.sentsc                    rz   r=   )rU   rN   r{   r|   rV   r   r   rN      rI   z%CategorizedMarkdownCorpusReader.parasc                    s*   t  fddj||ddD S )Nc                    s    g | ]\}}j | |d qS ))encoding)Z
CorpusView)r   pathencreaderr2   r   r   rG      s    zECategorizedMarkdownCorpusReader.concatenated_view.<locals>.<listcomp>T)Zinclude_encoding)r   Zabspathsr{   )r2   r   ry   rx   r   r   r   concatenated_view   s   z1CategorizedMarkdownCorpusReader.concatenated_viewc                    s*   ddl m   fdd| j| D S )Nr   	safe_loadc                    s    g | ]}|j d kr |jqS )Zfront_matter)typer0   r   tr   r   r   rG      s
    
zCCategorizedMarkdownCorpusReader.metadata_reader.<locals>.<listcomp>)yamlr   r+   parsereadr2   r*   r   r   r   metadata_reader   s   
z/CategorizedMarkdownCorpusReader.metadata_readerc                 C      |  | j||S r=   )r   r   r|   r   r   r   rv         z(CategorizedMarkdownCorpusReader.metadatac           
         s    j | }tdd |}tdd |}t }t||D ]\}}||}|||}	||||	d   q fdd|D S )Nc                 S      | j dko	| jdkS )Nr   Zblockquote_openr_   r   r   r   r   r   <lambda>   r   zCCategorizedMarkdownCorpusReader.blockquote_reader.<locals>.<lambda>c                 S   r   )Nr   Zblockquote_closer   r   r   r   r   r      r      c                    s(   g | ]}t  jjj| jjd dqS )Nenv)r.   r+   rendererr)   optionsr   r,   r8   r   r   rG      s    zECategorizedMarkdownCorpusReader.blockquote_reader.<locals>.<listcomp>r+   r   r   filterr   zipindexr   )
r2   r*   tokensopening_tokensclosing_tokensblockquotesocopening_indexclosing_indexr   r8   r   blockquote_reader   s   

z1CategorizedMarkdownCorpusReader.blockquote_readerc                 C   r   r=   )r   r   r|   r   r   r   r      r   z+CategorizedMarkdownCorpusReader.blockquotesc                 C   s   dd | j | D S )Nc                 S   s.   g | ]}|j d kr|jdv rt|j|jqS )r   )ZfenceZ
code_block)r_   r   rS   infor0   r   r   r   r   rG      s    zECategorizedMarkdownCorpusReader.code_block_reader.<locals>.<listcomp>)r+   r   r   r   r   r   r   code_block_reader   s   z1CategorizedMarkdownCorpusReader.code_block_readerc                 C   r   r=   )r   r   r|   r   r   r   code_blocks   r   z+CategorizedMarkdownCorpusReader.code_blocksc                 C   $   dd t dd | j| D S )Nc              	   S   s<   g | ]}|j D ]}|jd krt|j|d|dqqS )imagesrctitle)childrenr   r`   r0   attrGet)r   inline_tokenchild_tokenr   r   r   rG     s    	
	z@CategorizedMarkdownCorpusReader.image_reader.<locals>.<listcomp>c                 S   
   | j dkS Ninliner   r   r   r   r   r   	     
 z>CategorizedMarkdownCorpusReader.image_reader.<locals>.<lambda>r   r+   r   r   r   r   r   r   image_reader  
   z,CategorizedMarkdownCorpusReader.image_readerc                 C   r   r=   )r   r   r|   r   r   r   images  r   z&CategorizedMarkdownCorpusReader.imagesc                 C   r   )Nc              	   S   sN   g | ]#}t |jD ]\}}|jd kr	t|j|d  j|d|dq	qS )Z	link_openr   hrefr   )	enumerater   r   ra   r0   r   )r   r   ir   r   r   r   rG     s    	
	z?CategorizedMarkdownCorpusReader.link_reader.<locals>.<listcomp>c                 S   r   r   r   r   r   r   r   r     r   z=CategorizedMarkdownCorpusReader.link_reader.<locals>.<lambda>r   r   r   r   r   link_reader  r   z+CategorizedMarkdownCorpusReader.link_readerc                 C   r   r=   )r   r   r|   r   r   r   links!  r   z%CategorizedMarkdownCorpusReader.linksc           
         s   | j | }dtfdd|}d t fdd|}t }t||D ]\}}||}|||}	||||	d   q&dd |D S )	N)Zbullet_list_openordered_list_openc                       | j dko	| j v S r(   r   r   )opening_typesr   r   r   )  r   z=CategorizedMarkdownCorpusReader.list_reader.<locals>.<lambda>)Zbullet_list_closeZordered_list_closec                    r   r(   r   r   )closing_typesr   r   r   -  r   r   c                 S   s*   g | ]}t |d  jdkdd |D qS )r   r   c                 S   s   g | ]}|j r|j qS r   r>   r   r   r   r   rG   7  s    zJCategorizedMarkdownCorpusReader.list_reader.<locals>.<listcomp>.<listcomp>)rb   r   )r   r   r   r   r   rG   4  s    z?CategorizedMarkdownCorpusReader.list_reader.<locals>.<listcomp>r   )
r2   r*   r   r   r   Zlist_blocksr   r   r   r   r   )r   r   r   list_reader%  s"   
z+CategorizedMarkdownCorpusReader.list_readerc                 C   r   r=   )r   r   r|   r   r   r   lists<  r   z%CategorizedMarkdownCorpusReader.listsc                    s   t  t  }} j| D ]$}|jdkr,|jdkr,|s#|| q|| |g}q|r3|| q|r;||  fdd|D S )Nr   Zheading_openc              
      s>   g | ]}t |d  j|d jd jjj| jjddqS )r   r   #Nr   )r]   r0   markupcountr+   r   r)   r   r   r8   r   r   rG   M  s    zBCategorizedMarkdownCorpusReader.section_reader.<locals>.<listcomp>)r   r+   r   r   r_   r   r   )r2   r*   Zsection_blocksr,   r   r   r8   r   section_reader@  s   



z.CategorizedMarkdownCorpusReader.section_readerc                 C   r   r=   )r   r   r|   r   r   r   sectionsV  r   z(CategorizedMarkdownCorpusReader.sectionsr=   )NN)r6   rP   rQ   __doc__r3   r'   rx   ry   r?   r@   rH   rN   r   r   rv   r   r   r   r   r   r   r   r   r   r   r   r   r\   r   r   rV   r   rl      sJ    
	
rl   N)collectionsr   	functoolsr   r   Znltk.corpus.reader.apir   Znltk.corpus.reader.plaintextr   Znltk.corpus.reader.utilr   r   Znltk.tokenizer	   r
   r   r'   r-   r.   rS   r]   r`   ra   rb   rc   rl   r   r   r   r   <module>   s     "


