o
    rZŽh5%  ã                   @   s  d Z ddlZddlmZ ddlmZmZmZmZ ddl	m
Z
 ddlmZ e d¡Ze d¡Ze d	¡Ze d
ej¡Ze dej¡Ze dej¡Ze d¡ZG dd„ dƒZG dd„ dƒZdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zd%dd„Zd&dd „Ze
d!ƒd"d#„ ƒZed$ƒZ dS )'z
CCG Lexicons
é    N)Údefaultdict)ÚCCGVarÚ	DirectionÚFunctionalCategoryÚPrimitiveCategory)Ú
deprecated)Ú
Expressionz([A-Za-z]+)(\[[A-Za-z,]+\])?z"([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)z([\\/])([.,]?)([.,]?)(.*)z([\S_]+)\s*(::|[-=]+>)\s*(.+)z([^{}]*[^ {}])\s*(\{[^}]+\})?z\{([^}]+)\}z([^#]*)(?:#.*)?c                   @   s:   e Zd ZdZddd„Zdd„ Zdd„ Zd	d
„ Zdd„ ZdS )ÚTokenzÄ
    Class representing a token.

    token => category {semantics}
    e.g. eat => S\var[pl]/var {\x y.eat(x,y)}

    * `token` (string)
    * `categ` (string)
    * `semantics` (Expression)
    Nc                 C   s   || _ || _|| _d S ©N)Ú_tokenÚ_categÚ
_semantics)ÚselfÚtokenÚcategÚ	semantics© r   ú?/var/www/auris/lib/python3.10/site-packages/nltk/ccg/lexicon.pyÚ__init__9   s   
zToken.__init__c                 C   ó   | j S r
   )r   ©r   r   r   r   r   >   ó   zToken.categc                 C   r   r
   )r   r   r   r   r   r   A   r   zToken.semanticsc                 C   s2   d}| j d urdt| j ƒ d }dt| jƒ | S )NÚ z {Ú})r   Ústrr   )r   Úsemantics_strr   r   r   Ú__str__D   s   
zToken.__str__c                 C   s*   t |tƒsdS t| j| jf| ¡ | ¡ ƒS )Néÿÿÿÿ)Ú
isinstancer	   Úcmpr   r   r   r   )r   Úotherr   r   r   Ú__cmp__J   s   
zToken.__cmp__r
   )	Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r!   r   r   r   r   r	   -   s    
r	   c                   @   s0   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
S )Ú
CCGLexiconzâ
    Class representing a lexicon for CCG grammars.

    * `primitives`: The list of primitive categories for the lexicon
    * `families`: Families of categories
    * `entries`: A mapping of words to possible categories
    c                 C   s    t |ƒ| _|| _|| _|| _d S r
   )r   Ú_startZ_primitivesZ	_familiesÚ_entries)r   ÚstartÚ
primitivesÚfamiliesÚentriesr   r   r   r   Y   s   

zCCGLexicon.__init__c                 C   s
   | j | S )z@
        Returns all the possible categories for a word
        )r(   )r   Úwordr   r   r   Ú
categories_   s   
zCCGLexicon.categoriesc                 C   r   )z;
        Return the target category for the parser
        )r'   r   r   r   r   r)   e   s   zCCGLexicon.startc                 C   sf   d}d}t | jƒD ]'}|s|d }|| d }d}| j| D ]}|s'|d }nd}|d|  }qq	|S )zK
        String representation of the lexicon. Used for debugging.
        r   TÚ
z => z | Fz%s)Úsortedr(   )r   ÚstringÚfirstÚidentÚcatr   r   r   r   k   s   
ûzCCGLexicon.__str__N)r"   r#   r$   r%   r   r.   r)   r   r   r   r   r   r&   P   s    r&   c                 C   sš   | dd… }d}|dkr6|  d¡s6|  d¡r!t|ƒ\}}|| }n||d  }|dd… }|dkr6|  d¡r|  d¡rE|d |dd… fS td|  d ƒ‚)	zb
    Separate the contents matching the first set of brackets from the rest of
    the input.
    é   Nú(r   ú)r   zUnmatched bracket in string 'ú')Ú
startswithÚmatchBracketsÚAssertionError)r1   ÚrestZinsideÚpartr   r   r   r:   …   s   

ú
r:   c                 C   s    |   d¡r	t| ƒS t | ¡ ¡ S )zb
    Separate the string for the next portion of the category from the rest
    of the string
    r6   )r9   r:   ÚNEXTPRIM_REÚmatchÚgroups)r1   r   r   r   ÚnextCategory™   s   
rA   c                 C   s   t | d | dd… ƒS )z'
    Parse an application operator
    r   r5   N)r   )Úappr   r   r   ÚparseApplication£   s   rC   c                 C   s   | r| dd…   d¡S g S )z7
    Parse the subscripts for a primitive category
    r5   r   ú,)Úsplit)Zsubscrr   r   r   ÚparseSubscriptsª   s   rF   c                 C   s¨   | d dkr| d du r|du rt ƒ }||fS | d }||v r;|| \}}|du r/|}||fS | ||fg¡}||fS ||v rLt| d ƒ}t||ƒ|fS td| d ƒ‚)zƒ
    Parse a primitive category

    If the primitive is the special category 'var', replace it with the
    correct `CCGVar`.
    r   Úvarr5   NzString 'z-' is neither a family nor primitive category.)r   Ú
substituterF   r   r;   )Úchunksr*   r+   rG   Úcatstrr4   ZcvarZsubscrsr   r   r   ÚparsePrimitiveCategory³   s&   ÿ
ÿrK   c           
      C   sæ   t | ƒ\}}| d¡rt|dd… |||ƒ\}}ntt |¡ ¡ |||ƒ\}}|dkrot |¡ ¡ }t|dd… ƒ}|d }t |ƒ\}}| d¡rWt|dd… |||ƒ\}	}ntt |¡ ¡ |||ƒ\}	}t	||	|ƒ}|dks+||fS )z{
    Parse a string representing a category, and returns a tuple with
    (possibly) the CCG variable for the category
    r6   r5   r   r   r   é   )
rA   r9   ÚaugParseCategoryrK   ÚPRIM_REr?   r@   ÚAPP_RErC   r   )
Úliner*   r+   rG   Z
cat_stringr<   ÚresrB   Ú	directionÚargr   r   r   rM   Ñ   s&   
ÿ
ÿôrM   Fc                 C   s"  t  ¡  g }i }ttƒ}|  ¡ D ]w}t |¡ ¡ d  ¡ }|dkr"q| 	d¡r:|dd„ |dd…  ¡  
d¡D ƒ }qt |¡ ¡ \}}}t |¡ ¡ \}	}
t|	||ƒ\}}|d	kr`||f||< qd}|d
u r||
du rpt|d ƒ‚t t |
¡ ¡ d ¡}||  t|||ƒ¡ qt|d |||ƒS )z@
    Convert string representation into a lexicon for CCGs.
    r   r   z:-c                 S   s   g | ]}|  ¡ ‘qS r   )Ústrip)Ú.0Zprimr   r   r   Ú
<listcomp>  s    ÿzfromstring.<locals>.<listcomp>é   NrD   z::Tz@ must contain semantics because include_semantics is set to True)r   Zreset_idr   ÚlistÚ
splitlinesÚCOMMENTS_REr?   r@   rT   r9   rE   ÚLEX_REÚRHS_RErM   r;   r   Ú
fromstringÚSEMANTICS_REÚappendr	   r&   )Úlex_strZinclude_semanticsr*   r+   r,   rP   r3   ÚsepÚrhsrJ   r   r4   rG   r   r   r   r   r]   ñ   s<   

ÿÿÿÿr]   zUse fromstring() instead.c                 C   s   t | ƒS r
   )r]   )r`   r   r   r   ÚparseLexicon"  s   rc   a¡  
    # Rather minimal lexicon based on the openccg `tinytiny' grammar.
    # Only incorporates a subset of the morphological subcategories, however.
    :- S,NP,N                    # Primitive categories
    Det :: NP/N                  # Determiners
    Pro :: NP
    IntransVsg :: S\NP[sg]    # Tensed intransitive verbs (singular)
    IntransVpl :: S\NP[pl]    # Plural
    TransVsg :: S\NP[sg]/NP   # Tensed transitive verbs (singular)
    TransVpl :: S\NP[pl]/NP   # Plural

    the => NP[sg]/N[sg]
    the => NP[pl]/N[pl]

    I => Pro
    me => Pro
    we => Pro
    us => Pro

    book => N[sg]
    books => N[pl]

    peach => N[sg]
    peaches => N[pl]

    policeman => N[sg]
    policemen => N[pl]

    boy => N[sg]
    boys => N[pl]

    sleep => IntransVsg
    sleep => IntransVpl

    eat => IntransVpl
    eat => TransVpl
    eats => IntransVsg
    eats => TransVsg

    see => TransVpl
    sees => TransVsg
    r
   )F)!r%   ÚreÚcollectionsr   Znltk.ccg.apir   r   r   r   Znltk.internalsr   Znltk.sem.logicr   ÚcompilerN   r>   rO   ÚUNICODEr[   r\   r^   rZ   r	   r&   r:   rA   rC   rF   rK   rM   r]   rc   Zopenccg_tinytinyr   r   r   r   Ú<module>   s6   



#5
	

 1
ÿ