o
    rZhG                     @   sZ   d dl Z d dlmZ d dlmZ d dlmZ G dd deZdd Ze	d	kr+e  dS dS )
    N)defaultdict)reduce)CorpusReaderc                       s^   e Zd ZdZedZedd Zd fdd	Z	dd	d
Z
dddZdddZdd Z  ZS )LinThesaurusCorpusReaderzEWrapper for the LISP-formatted thesauruses distributed by Dekang Lin.z \("?([^"]+)"? \(desc [0-9.]+\).+c                   C   s   t tS )z6Factory for creating defaultdict of defaultdict(dict)s)r   dict r   r   E/var/www/auris/lib/python3.10/site-packages/nltk/corpus/reader/lin.pyZ__defaultdict_factory   s   z.LinThesaurusCorpusReader.__defaultdict_factory        c              	      s   t  |d ttj| _|| _| jdddD ]T\}}}t|C}d}|D ]6}|	 }|r7tj
d|}	d}q%|dkr>d}q%|d}
t|
dkr[|
\}}t|| j| |	 |	d	< q%W d
   n1 sfw   Y  qd
S )a  
        Initialize the thesaurus.

        :param root: root directory containing thesaurus LISP files
        :type root: C{string}
        :param badscore: the score to give to words which do not appear in each other's sets of synonyms
        :type badscore: C{float}
        zsim[A-Z]\.lspT)Zinclude_encodingZinclude_fileidz\1Fz))	   "N)super__init__r   r   ._LinThesaurusCorpusReader__defaultdict_factory
_thesaurus	_badscoreZabspathsopenstrip_key_resubsplitlenfloat)selfrootZbadscorepathencodingfileidZlin_filefirstlinekeyZ
split_linengramZscore	__class__r   r   r      s4   


z!LinThesaurusCorpusReader.__init__Nc                    sf    kr|rdS dd j D S |r'j|   v r$j|    S jS  fddj D S )a  
        Returns the similarity score for two ngrams.

        :param ngram1: first ngram to compare
        :type ngram1: C{string}
        :param ngram2: second ngram to compare
        :type ngram2: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, just the score for the two ngrams; otherwise,
                 list of tuples of fileids and scores.
              ?c                 S   s   g | ]}|d fqS )r$   r   .0Zfidr   r   r   
<listcomp>Q   s    z7LinThesaurusCorpusReader.similarity.<locals>.<listcomp>c                    s:   g | ]}|j |   v rj |    njfqS r   )r   r   r%   ngram1ngram2r   r   r   r'   Z   s    	)_fileidsr   r   )r   r)   r*   r   r   r(   r   
similarity?   s   	z#LinThesaurusCorpusReader.similarityc                    ,   |rj |    S  fddjD S )a   
        Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, list of tuples of scores and synonyms; otherwise,
                 list of tuples of fileids and lists, where inner lists consist of tuples of
                 scores and synonyms.
        c                    "   g | ]}|j |    fqS r   )r   itemsr&   r   r!   r   r   r   r'   u       z<LinThesaurusCorpusReader.scored_synonyms.<locals>.<listcomp>)r   r/   r+   r   r!   r   r   r1   r   scored_synonymsf   s
   z(LinThesaurusCorpusReader.scored_synonymsc                    r-   )a  
        Returns a list of synonyms for the current ngram.

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and
                 lists, where inner lists contain synonyms.
        c                    r.   r   )r   keysr0   r1   r   r   r'      r2   z5LinThesaurusCorpusReader.synonyms.<locals>.<listcomp>)r   r5   r+   r3   r   r1   r   synonymsz   s
   z!LinThesaurusCorpusReader.synonymsc                    s   t  fddjdS )z
        Determines whether or not the given ngram is in the thesaurus.

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :return: whether the given ngram is in the thesaurus.
        c                    s   | p j | v S N)r   )accumr   r1   r   r   <lambda>   s    z7LinThesaurusCorpusReader.__contains__.<locals>.<lambda>F)r   r+   )r   r!   r   r1   r   __contains__   s
   z%LinThesaurusCorpusReader.__contains__)r	   r7   )__name__
__module____qualname____doc__recompiler   staticmethodr   r   r,   r4   r6   r:   __classcell__r   r   r"   r   r      s    


$
'
r   c                  C   s   ddl m}  d}d}td|  t| | td|  t| | td|  t| j|dd	 td|  t| j|dd	 td
| d| d t| || d S )Nr   )lin_thesaurusZbusinessZ
enterprisezGetting synonyms for zGetting scored synonyms for z5Getting synonyms from simN.lsp (noun subsection) for zsimN.lsp)r   zSimilarity score for z and :)Znltk.corpusrC   printr6   r4   r,   )ZthesZword1Zword2r   r   r   demo   s   rF   __main__)
r?   collectionsr   	functoolsr   Znltk.corpus.readerr   r   rF   r;   r   r   r   r   <module>   s    
