o
    rZh	                     @   s0   d Z ddlmZ ddlmZ G dd deZdS )z5
UDHR corpus reader. It mostly deals with encodings.
    )PlaintextCorpusReader)find_corpus_fileidsc                       s.   e Zd Zg dZh dZd fdd	Z  ZS )UdhrCorpusReader))z
.*-Latin1$zlatin-1)z
.*-Hebrew$hebrew)z
.*-Arabic$cp1256)zCzech_Cesky-UTF8cp1250)zPolish-Latin2r   )zPolish_Polski-Latin2r   )z.*-Cyrillic$cyrillic)z.*-SJIS$SJIS)z
.*-GB2312$GB2312)z
.*-Latin2$z
ISO-8859-2)z	.*-Greek$greek)z.*-UTF8$zutf-8)Hungarian_Magyar-Unicodez	utf-16-le)ZAmahuacalatin1)zTurkish_Turkce-Turkishlatin5)zLithuanian_Lietuviskai-Balticlatin4)zJapanese_Nihongo-EUCzEUC-JP)Japanese_Nihongo-JIS
iso2022_jp)Chinese_Mandarin-HZhz)zAbkhaz\-Cyrillic\+Abkhcp1251>   zArmenian-DallakHelvzEsperanto-T61zVietnamese-VIQRzCzech-Latin2-errz2Azeri_Azerbaijani_Cyrillic-Az.Times.Cyr.Normal0117z
Tamil-UTF8zMarathi-UTF8zVietnamese-TCVNzAmharic-Afenegus6..60375zBurmese_Myanmar-UTF8r   zBurmese_Myanmar-WinResearcherzVietnamese-VPSzLao-UTF8r   z(Azeri_Azerbaijani_Latin-Az.Times.Lat0117zNavaho_Dine-Navajo-Navaho-fontzMagahi-AgrazGujarati-UTF8r   zChinese_Mandarin-UTF8zMagahi-UTF8zRussian_Russky-UTF8~zTigrinya_Tigrigna-VG2MainzBhojpuri-Agraudhrc                    s0   t |d}t j| fdd|D  jd d S )Nz(?!README|\.).*c                    s   g | ]	}| j vr|qS  )SKIP).0Zfileidselfr   F/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/udhr.py
<listcomp>H   s    z-UdhrCorpusReader.__init__.<locals>.<listcomp>)encoding)r   super__init__	ENCODINGS)r   rootZfileids	__class__r   r   r   D   s   

zUdhrCorpusReader.__init__)r   )__name__
__module____qualname__r    r   r   __classcell__r   r   r"   r   r   	   s    #r   N)__doc__Znltk.corpus.reader.plaintextr   Znltk.corpus.reader.utilr   r   r   r   r   r   <module>   s    