o
    rZh(                     @   s  d Z ddlZddlZddlT ddlmZ ddlmZ ddlT ddl	m
Z
 G dd deZG d	d
 d
eZG dd deZi dddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.i d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdNi dPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqi drdsdtdudvdwdxdydzd{d|d}d~dddddddddddddddddddddi ddddddddddddddddddddddddddddddddddddddddddddddddddŜZdS )a  
Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old
English Prose (YCOE), a 1.5 million word syntactically-annotated
corpus of Old English prose texts. The corpus is distributed by the
Oxford Text Archive: http://www.ota.ahds.ac.uk/ It is not included
with NLTK.

The YCOE corpus is divided into 100 files, each representing
an Old English prose text. Tags used within each text complies
to the YCOE standard: https://www-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm
    N)*)BracketParseCorpusReader)TaggedCorpusReader)RegexpTokenizerc                   @   s|   e Zd ZdZdddZdddZddd	Zd
d ZdddZdddZ	dddZ
dddZdddZdddZdddZdS )YCOECorpusReaderz
    Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old
    English Prose (YCOE), a 1.5 million word syntactically-annotated
    corpus of Old English prose texts.
    utf8c                 C   s   t | |g | t| jddd|d| _t| jddd| _dd | j D }d	d | j D |kr:t	d
t
dd |D dd |D  }t | ||| t
|| _d S )Npsdz.*.psd)encodingpos.posc                 S      h | ]}|d d qS N .0fr   r   F/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/ycoe.py	<setcomp>.       z,YCOECorpusReader.__init__.<locals>.<setcomp>c                 S   r   r   r   r   r   r   r   r   /   r   z5Items in "psd" and "pos" subdirectories do not match.c                 S      g | ]}d | qS z%s.psdr   r   docr   r   r   
<listcomp>3       z-YCOECorpusReader.__init__.<locals>.<listcomp>c                 S   r   z%s.posr   r   r   r   r   r   4   r   )CorpusReader__init__YCOEParseCorpusReaderrootjoin_psd_readerYCOETaggedCorpusReader_pos_readerfileids
ValueErrorsorted
_documents)selfr!   r
   	documentsr&   r   r   r   r   %   s   zYCOECorpusReader.__init__Nc                 C   sP   |du r| j S t|tr|g}|D ]}|| jvrtd| qtdd |D S )z
        Return a list of document identifiers for all documents in
        this corpus, or for the documents with the given file(s) if
        specified.
        NzFile id %s not foundc                 S   r   r   r   r   r   r   r   r   G   r   z-YCOECorpusReader.documents.<locals>.<setcomp>)r)   
isinstancestr_fileidsKeyErrorr(   )r*   r&   r   r   r   r   r+   9   s   

zYCOECorpusReader.documentsc                 C   sB   |du r| j S t|tr|g}ttdd |D dd |D  S )z
        Return a list of file identifiers for the files that make up
        this corpus, or that store the given document(s) if specified.
        Nc                 S   r   r   r   r   r   r   r   r   T   r   z,YCOECorpusReader.fileids.<locals>.<listcomp>c                 S   r   r   r   r   r   r   r   r   U   r   )r.   r,   r-   r(   setr*   r+   r   r   r   r&   I   s   
zYCOECorpusReader.fileidsc                    sj   |du r| j }n$t|tr|g}|D ]}|| j vr+|dd dv r%tdtd| q fdd|D S )z
        Helper that selects the appropriate fileids for a given set of
        documents from a given subcorpus (pos or psd).
        Nr   )r   r	   zvExpected a document identifier, not a file identifier.  (Use corpus.documents() to get a list of document identifiers.z Document identifier %s not foundc                    s   g | ]	}| d   qS ).r   )r   d	subcorpusr   r   r   m   s    z0YCOECorpusReader._getfileids.<locals>.<listcomp>)r)   r,   r-   r'   )r*   r+   r5   Zdocumentr   r4   r   _getfileidsY   s   

	zYCOECorpusReader._getfileidsc                 C      | j | |dS Nr   )r%   wordsr6   r1   r   r   r   r9   p      zYCOECorpusReader.wordsc                 C   r7   r8   )r%   sentsr6   r1   r   r   r   r;   s   r:   zYCOECorpusReader.sentsc                 C   r7   r8   )r%   parasr6   r1   r   r   r   r<   v   r:   zYCOECorpusReader.parasc                 C   r7   r8   )r%   tagged_wordsr6   r1   r   r   r   r=   y   r:   zYCOECorpusReader.tagged_wordsc                 C   r7   r8   )r%   tagged_sentsr6   r1   r   r   r   r>   |   r:   zYCOECorpusReader.tagged_sentsc                 C   r7   r8   )r%   tagged_parasr6   r1   r   r   r   r?      r:   zYCOECorpusReader.tagged_parasc                 C   r7   )Nr   )r#   parsed_sentsr6   r1   r   r   r   r@      r:   zYCOECorpusReader.parsed_sentsr   )N)__name__
__module____qualname____doc__r   r+   r&   r6   r9   r;   r<   r=   r>   r?   r@   r   r   r   r   r      s    








r   c                   @   s   e Zd ZdZdd ZdS )r    zrSpecialized version of the standard bracket parse corpus reader
    that strips out (CODE ...) and (ID ...) nodes.c                 C   s*   t dd|}t d|rd S t| |S )Nz(?u)\((CODE|ID)[^\)]*\) z\s*\(\s*\)\s*$)resubmatchr   _parse)r*   tr   r   r   rJ      s   zYCOEParseCorpusReader._parseN)rB   rC   rD   rE   rJ   r   r   r   r   r       s    r    c                   @   s   e Zd ZdddZdS )r$   r   c                 C   s(   d}t |dd}tj| ||d|d d S )Nz+(?u)(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*T)Zgaps_)sepsent_tokenizer)r   r   r   )r*   r!   itemsr
   Zgaps_rerN   r   r   r   r      s
   

zYCOETaggedCorpusReader.__init__NrA   )rB   rC   rD   r   r   r   r   r   r$      s    r$   zcoadrian.o34zAdrian and Ritheuszcoaelhom.o3u   Ælfric, Supplemental Homilieszcoaelive.o3u   Ælfric's Lives of SaintsZcoalcuinzAlcuin De virtutibus et vitiisz
coalex.o23zAlexander's Letter to Aristotlezcoapollo.o3zApollonius of TyreZcoaugustZ	Augustinez	cobede.o2z$Bede's History of the English Churchzcobenrul.o3zBenedictine Rulezcoblick.o23zBlickling Homiliesz
coboeth.o2z#Boethius' Consolation of Philosophyzcobyrhtf.o3zByrhtferth's ManualZ	cocanedgDzCanons of Edgar (D)Z	cocanedgXzCanons of Edgar (X)zcocathom1.o3u   Ælfric's Catholic Homilies Izcocathom2.o3u   Ælfric's Catholic Homilies IIz
cochad.o24z
Saint ChadZcochdrulzChrodegang of Metz, RuleZcochristophzSaint ChristopherzcochronA.o23zAnglo-Saxon Chronicle AZcochronCzAnglo-Saxon Chronicle CZcochronDzAnglo-Saxon Chronicle DzcochronE.o34zAnglo-Saxon Chronicle Ez	cocura.o2zCura PastoralisZcocuraCzCura Pastoralis (Cotton)zcodicts.o34zDicts of Catoz
codocu1.o1zDocuments 1 (O1)zcodocu2.o12zDocuments 2 (O1/O2)z
codocu2.o2zDocuments 2 (O2)zcodocu3.o23zDocuments 3 (O2/O3)z
codocu3.o3zDocuments 3 (O3)zcodocu4.o24zDocuments 4 (O2/O4)Zcoeluc1z Honorius of Autun, Elucidarium 1Zcoeluc2zcoepigen.o3u   Ælfric's Epilogue to GenesisZcoeuphrzSaint EuphrosyneZcoeustz Saint Eustace and his companionsZ	coexodusPz
Exodus (P)Z	cogenesiCzGenesis (C)zcogregdC.o24zGregory's Dialogues (C)zcogregdH.o23zGregory's Dialogues (H)ZcoherbarzPseudo-Apuleius, HerbariumzcoinspolD.o34z"Wulfstan's Institute of Polity (D)Z	coinspolXz"Wulfstan's Institute of Polity (X)ZcojameszSaint Jameszcolacnu.o23ZLacnungaz
colaece.o2Z	Leechdomszcolaw1cn.o3zLaws, Cnut Izcolaw2cn.o3zLaws, Cnut IIzcolaw5atr.o3u   Laws, Æthelred Vzcolaw6atr.o3u   Laws, Æthelred VIz
colawaf.o2zLaws, Alfredzcolawafint.o2zAlfred's Introduction to Lawszcolawger.o34zLaws, Gerefazcolawine.ox2z	Laws, Inezcolawnorthu.o3zNorthumbra Preosta Laguzcolawwllad.o4zLaws, William I, Ladzcoleofri.o4ZLeofriczcolsigef.o3u   Ælfric's Letter to SigefyrthZ	colsigewBu!   Ælfric's Letter to Sigeweard (B)zcolsigewZ.o34u!   Ælfric's Letter to Sigeweard (Z)Zcolwgeatu   Ælfric's Letter to WulfgeatZ	colwsigeTu    Ælfric's Letter to Wulfsige (T)zcolwsigeXa.o34u!   Ælfric's Letter to Wulfsige (Xa)zcolwstan1.o3u   Ælfric's Letter to Wulfstan Izcolwstan2.o3u   Ælfric's Letter to Wulfstan IIzcomargaC.o34zSaint Margaret (C)ZcomargaTzSaint Margaret (T)Zcomart1zMartyrology, IZcomart2zMartyrology, IIzcomart3.o23zMartyrology, IIIzcomarvel.o23zMarvels of the EastZcomaryzMary of EgyptZconeotz
Saint NeotZconicodAzGospel of Nicodemus (A)ZconicodCzGospel of Nicodemus (C)ZconicodDzGospel of Nicodemus (D)ZconicodEzGospel of Nicodemus (E)zcoorosiu.o2ZOrosiusz
cootest.o3Z
Heptateuchzcoprefcath1.o3u(   Ælfric's Preface to Catholic Homilies Izcoprefcath2.o3u)   Ælfric's Preface to Catholic Homilies IIzcoprefcura.o2zPreface to the Cura Pastoraliszcoprefgen.o3u   Ælfric's Preface to Genesiszcopreflives.o3u$   Ælfric's Preface to Lives of Saintsz"Preface to Augustine's Soliloquiesz*Pseudo-Apuleius, Medicina de quadrupedibuszHistory of the Holy Rood-TreezSeven SleeperszSt. Augustine's SoliloquieszSolomon and Saturn IzSolomon and Saturn IIu   Ælfric's De Temporibus AnnizVercelli HomilieszVercelli Homilies (E)zVercelli Homilies (L)zSaint Vincent (Bodley 343)zVindicta SalvatoriszWest-Saxon GospelszWulfstan's Homilies)Zcoprefsolilozcoquadru.o23ZcoroodZ	cosevenslZcosolilozcosolsat1.o4Z	cosolsat2z
cotempo.o3ZcoverhomZ	coverhomEZ	coverhomLZcovinceBZcovinsalzcowsgosp.o3z
cowulf.o34)rE   osrG   Znltk.corpus.reader.apiZ nltk.corpus.reader.bracket_parser   Znltk.corpus.reader.taggedr   Znltk.corpus.reader.utilZnltk.tokenizer   r   r   r    r$   r+   r   r   r   r   <module>   s  h
	
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUV