o
    rZhi                     @   sH   d Z ddlT ddlT ddlmZmZ G dd deZG dd deZ	dS )	z
Indian Language POS-Tagged Corpus
Collected by A Kumaran, Microsoft Research, India
Distributed with permission

Contents:
  - Bangla: IIT Kharagpur
  - Hindi: Microsoft Research India
  - Marathi: IIT Bombay
  - Telugu: IIIT Hyderabad
    )*)map_tag	str2tuplec                   @   s8   e Zd ZdZdddZdddZdddZdd	d
ZdS )IndianCorpusReaderz@
    List of words, one per line.  Blank lines are ignored.
    Nc                 C      t dd | |dD S )Nc                 S   s   g | ]\}}t ||d d qS )FIndianCorpusView.0Zfileidenc r   H/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/indian.py
<listcomp>!       z,IndianCorpusReader.words.<locals>.<listcomp>Tconcatabspathsselffileidsr   r   r   words   
   
zIndianCorpusReader.wordsc                    @   r j kr fddnd tfdd |dD S )Nc                       t  j| S Nr   _tagsettr   tagsetr   r   <lambda>)       z1IndianCorpusReader.tagged_words.<locals>.<lambda>c                    s    g | ]\}}t ||d d qS )TFr   r	   tag_mapping_functionr   r   r   -       z3IndianCorpusReader.tagged_words.<locals>.<listcomp>Tr   r   r   r   r   r    r   r   r$   r    r   tagged_words'      

zIndianCorpusReader.tagged_wordsc                 C   r   )Nc                 S   s   g | ]\}}t ||d dqS )FTr   r	   r   r   r   r   5   r   z,IndianCorpusReader.sents.<locals>.<listcomp>Tr   r   r   r   r   sents3   r   zIndianCorpusReader.sentsc                    r   )Nc                    r   r   r   r   r   r   r   r!   =   r"   z1IndianCorpusReader.tagged_sents.<locals>.<lambda>c                    s    g | ]\}}t ||d d  qS )Tr   r	   r#   r   r   r   A   r%   z3IndianCorpusReader.tagged_sents.<locals>.<listcomp>Tr&   r'   r   r(   r   tagged_sents;   r*   zIndianCorpusReader.tagged_sentsr   )NN)__name__
__module____qualname____doc__r   r)   r+   r,   r   r   r   r   r      s    


r   c                   @   s    e Zd Z	dddZdd ZdS )r   Nc                 C   s&   || _ || _|| _tj| ||d d S )N)encoding)_tagged_group_by_sent_tag_mapping_functionStreamBackedCorpusView__init__)r   Zcorpus_filer1   ZtaggedZgroup_by_sentr$   r   r   r   r6   I   s   zIndianCorpusView.__init__c                    sd   |  }|drg S dd | D } jr  fdd|D } js*dd |D } jr0|gS |S )N<c                 S   s   g | ]}t |d dqS )_)sep)r   )r
   wordr   r   r   r   U   s    z/IndianCorpusView.read_block.<locals>.<listcomp>c                    s   g | ]\}}|  |fqS r   )r4   r
   wr   r   r   r   r   W   s    c                 S   s   g | ]\}}|qS r   r   r;   r   r   r   r   Y   s    )readline
startswithsplitr4   r2   r3   )r   streamlinesentr   r=   r   
read_blockQ   s   
zIndianCorpusView.read_blockr   )r-   r.   r/   r6   rD   r   r   r   r   r   H   s    
r   N)
r0   Znltk.corpus.reader.apiZnltk.corpus.reader.utilZnltk.tagr   r   ZCorpusReaderr   r5   r   r   r   r   r   <module>   s   .