o
    rZhF                     @   sL   d dl Z d dlT d dlT d dlmZmZ G dd deZG dd deZ	dS )    N)*)map_tag	str2tuplec                   @   s    e Zd ZdZdd Zdd ZdS )SwitchboardTurnaE  
    A specialized list object used to encode switchboard utterances.
    The elements of the list are the words in the utterance; and two
    attributes, ``speaker`` and ``id``, are provided to retrieve the
    spearker identifier and utterance id.  Note that utterance ids
    are only unique within a given discourse.
    c                 C   s    t | | || _t|| _d S N)list__init__speakerintid)selfwordsr	   r    r   M/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/switchboard.pyr      s   zSwitchboardTurn.__init__c                 C   s\   t | dkr	d}nt| d trddd | D }nd| }d| j d| j d|d	S )
Nr     c                 s   s    | ]}d | V  qdS )z%s/%sNr   ).0wr   r   r   	<genexpr>    s    z+SwitchboardTurn.__repr__.<locals>.<genexpr><.z: >)len
isinstancetuplejoinr	   r   )r   textr   r   r   __repr__   s   
zSwitchboardTurn.__repr__N)__name__
__module____qualname____doc__r   r   r   r   r   r   r      s    r   c                   @   s   e Zd ZdgZd"ddZdd Zd"ddZd	d
 Zd"ddZdd Z	d#ddZ
dd Zd"ddZdd Zd"ddZdd Zd"ddZedZdZd"d d!ZdS )$SwitchboardCorpusReadertaggedNc                 C   s   t | || j || _d S r   )CorpusReaderr   _FILES_tagset)r   roottagsetr   r   r   r   +   s   
z SwitchboardCorpusReader.__init__c                 C      t | d| jS Nr#   )StreamBackedCorpusViewabspath_words_block_readerr   r   r   r   r   /      zSwitchboardCorpusReader.wordsc                        fdd}t  d|S )Nc                         | S r   )_tagged_words_block_readerstreamr   r(   r   r   tagged_words_block_reader3      zGSwitchboardCorpusReader.tagged_words.<locals>.tagged_words_block_readerr#   r+   r,   )r   r(   r6   r   r5   r   tagged_words2      z$SwitchboardCorpusReader.tagged_wordsc                 C   r)   r*   )r+   r,   _turns_block_readerr.   r   r   r   turns8   r/   zSwitchboardCorpusReader.turnsc                    r0   )Nc                    r1   r   )_tagged_turns_block_readerr3   r5   r   r   tagged_turns_block_reader<   r7   zGSwitchboardCorpusReader.tagged_turns.<locals>.tagged_turns_block_readerr#   r8   )r   r(   r>   r   r5   r   tagged_turns;   r:   z$SwitchboardCorpusReader.tagged_turnsc                 C   r)   r*   )r+   r,   _discourses_block_readerr.   r   r   r   
discoursesA   s   z"SwitchboardCorpusReader.discoursesFc                    r0   )Nc                    r1   r   _tagged_discourses_block_readerr3   r5   r   r   tagged_discourses_block_readerG   r7   zQSwitchboardCorpusReader.tagged_discourses.<locals>.tagged_discourses_block_readerr#   r8   )r   r(   rD   r   r5   r   tagged_discoursesF   s   
z)SwitchboardCorpusReader.tagged_discoursesc                    s    fddt |D gS )Nc                    s2   g | ]}| d D ]}| r	 j|ddq	qS )
F)include_tagsplitstrip_parse_utterancer   bur.   r   r   
<listcomp>Q   s    zDSwitchboardCorpusReader._discourses_block_reader.<locals>.<listcomp>Zread_blankline_blockr   r4   r   r.   r   r@   N   s   
z0SwitchboardCorpusReader._discourses_block_readerc                    s    fddt |D gS )Nc                    s4   g | ]}| d D ]}| r	 j|ddq	qS )rF   T)rG   r(   rH   rL   r5   r   r   rO   \   s    zKSwitchboardCorpusReader._tagged_discourses_block_reader.<locals>.<listcomp>rP   r   r4   r(   r   r5   r   rC   Y   s   z7SwitchboardCorpusReader._tagged_discourses_block_readerc                 C   s   |  |d S Nr   )r@   rQ   r   r   r   r;   d   s   z+SwitchboardCorpusReader._turns_block_readerc                 C   s   |  ||d S rS   rB   rR   r   r   r   r=   g   s   z2SwitchboardCorpusReader._tagged_turns_block_readerc                 C   s   t | |d g S rS   )sumr@   rQ   r   r   r   r-   j   s   z+SwitchboardCorpusReader._words_block_readerc                 C   s   t | ||d g S rS   )rT   rC   rR   r   r   r   r2   m   s   z2SwitchboardCorpusReader._tagged_words_block_readerz(\w+)\.(\d+)\:\s*(.*)/c           	         s    j |}|d u rtd| | \}}} fdd| D }|s,dd |D }nr= jkr= fdd|D }t|||S )NzBad utterance %rc                    s   g | ]}t | jqS r   )r   _SEP)r   sr.   r   r   rO   x   s    z<SwitchboardCorpusReader._parse_utterance.<locals>.<listcomp>c                 S   s   g | ]\}}|qS r   r   r   r   tr   r   r   rO   z   s    c                    s"   g | ]\}}|t  j|fqS r   )r   r&   rX   r5   r   r   rO   |   s   " )_UTTERANCE_REmatch
ValueErrorgroupsrI   r&   r   )	r   Z	utterancerG   r(   mr	   r   r   r   r   r5   r   rK   s   s   z(SwitchboardCorpusReader._parse_utterancer   )F)r   r   r    r%   r   r   r9   r<   r?   rA   rE   r@   rC   r;   r=   r-   r2   recompilerZ   rV   rK   r   r   r   r   r"   &   s$    







r"   )
r_   Znltk.corpus.reader.apiZnltk.corpus.reader.utilZnltk.tagr   r   r   r   r$   r"   r   r   r   r   <module>   s   