o
    rZh2?                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ dd Zd	d
 Zdd ZG dd deZedkrY	 d dlZe  dS dS )    N)ZipFilePathPointer)find_dir	find_filefind_jars_within_path)ParserI)DependencyGraph)taggedsents_to_conllc                  C   s   ddl m}  | g d}|jS )Nr   )RegexpTagger))z\.$.)z\,$,)z\?$?)z\($()z\)$))z\[$[)z\]$])z^-?[0-9]+(\.[0-9]+)?$ZCD)z(The|the|A|a|An|an)$ZDT)z&(He|he|She|she|It|it|I|me|Me|You|you)$ZPRP)z(His|his|Her|her|Its|its)$PRP$)z(my|Your|your|Yours|yours)$r   )z (on|On|in|In|at|At|since|Since)$IN)z (for|For|ago|Ago|before|Before)$r   )z(till|Till|until|Until)$r   )z(by|By|beside|Beside)$r   )z(under|Under|below|Below)$r   )z(over|Over|above|Above)$r   )z (across|Across|through|Through)$r   )z(into|Into|towards|Towards)$r   )z(onto|Onto|from|From)$r   )z.*able$ZJJ)z.*ness$NN)z.*ly$ZRB)z.*s$ZNNS)z.*ing$ZVBG)z.*ed$ZVBD)z.*r   )Znltk.tagr	   tag)r	   Z_tagger r   >/var/www/auris/lib/python3.10/site-packages/nltk/parse/malt.pymalt_regex_tagger   s
    r   c                 C   st   t j| r	| }nt| dd}g d}tt|}dd |D }h d}||s+J ttdd |s6J t	|S )	zE
    A module to find MaltParser .jar file and its dependencies.
    )ZMALT_PARSER)env_vars) r   r   c                 S   s   h | ]
}t j|d  qS )   )ospathsplit).0jarr   r   r   	<setcomp>H   s    z"find_maltparser.<locals>.<setcomp>>   z	log4j.jarzliblinear-1.8.jarz
libsvm.jarc                 S   s   |  do	| dS )Nzmaltparser-z.jar)
startswithendswith)ir   r   r   <lambda>M   s    z!find_maltparser.<locals>.<lambda>)
r   r   existsr   setr   issubsetanyfilterlist)parser_dirnameZ	_malt_dirZmalt_dependenciesZ
_malt_jarsZ_jarsr   r   r   find_maltparser=   s   r,   c                 C   s*   | du rdS t j| r| S t| dddS )z8
    A module to find pre-trained MaltParser model.
    Nmalt_temp.mco)Z
MALT_MODELF)r   verbose)r   r   r%   r   )model_filenamer   r   r   find_malt_modelR   s
   r0   c                   @   sb   e Zd ZdZ				dddZddd	Zdd
dZdddZedddZ	dddZ
dddZdS )
MaltParsera  
    A class for dependency parsing with MaltParser. The input is the paths to:
    - (optionally) a maltparser directory
    - (optionally) the path to a pre-trained MaltParser .mco model file
    - (optionally) the tagger to use for POS tagging before parsing
    - (optionally) additional Java arguments

    Example:
        >>> from nltk.parse import malt
        >>> # With MALT_PARSER and MALT_MODEL environment set.
        >>> mp = malt.MaltParser(model_filename='engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
        >>> # Without MALT_PARSER and MALT_MODEL environment.
        >>> mp = malt.MaltParser('/home/user/maltparser-1.9.2/', '/home/user/engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
    r   Nc                 C   sZ   t || _|dur|ng | _t|| _| jdk| _t | _|dur'|| _
dS t	 | _
dS )a  
        An interface for parsing with the Malt Parser.

        :param parser_dirname: The path to the maltparser directory that
            contains the maltparser-1.x.jar
        :type parser_dirname: str
        :param model_filename: The name of the pre-trained model with .mco file
            extension. If provided, training will not be required.
            (see http://www.maltparser.org/mco/mco.html and
            see http://www.patful.com/chalk/node/185)
        :type model_filename: str
        :param tagger: The tagger used to POS tag the raw string before
            formatting to CONLL format. It should behave like `nltk.pos_tag`
        :type tagger: function
        :param additional_java_args: This is the additional Java arguments that
            one can use when calling Maltparser, usually this is the heapsize
            limits, e.g. `additional_java_args=['-Xmx1024m']`
            (see https://goo.gl/mpDBvQ)
        :type additional_java_args: list
        Nr-   )r,   	malt_jarsadditional_java_argsr0   model_trainedtempfile
gettempdirworking_dirr   tagger)selfr+   r/   r9   r3   r   r   r   __init__r   s   


zMaltParser.__init__Fnullc              
   c   s   | j stdtjd| jddd}tjd| jddd~}t|D ]	}|t| q"|  | j	|j
|j
dd}t }zttj| jd	  W n   Y | ||}	t| |	d	krktd
d||	f t|j
}
|
 dD ]}tt||dgV  qxW d   n1 sw   Y  W d   n1 sw   Y  W d   n1 sw   Y  t|j
 t|j
 dS )a  
        Use MaltParser to parse multiple POS tagged sentences. Takes multiple
        sentences where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
        :return: iter(iter(``DependencyGraph``)) the dependency graph
            representation of each sentence
        z0Parser has not been trained. Call train() first.zmalt_input.conll.wFprefixdirmodedeletezmalt_output.conll.parserA   r   z0MaltParser parsing (%s) failed with exit code %d z

top_relation_labelN)r5   	Exceptionr6   NamedTemporaryFiler8   r   writestrclosegenerate_malt_commandnamer   getcwdchdirr   r   r4   _executejoinopenreaditerr   remove)r:   	sentencesr.   rG   
input_fileZoutput_filelinecmdZ_current_pathretinfileZtree_strr   r   r   parse_tagged_sents   sd   


1zMaltParser.parse_tagged_sentsc                    s"    fdd|D } j |||dS )an  
        Use MaltParser to parse multiple sentences.
        Takes a list of sentences, where each sentence is a list of words.
        Each sentence will be automatically tagged with this
        MaltParser instance's tagger.

        :param sentences: Input sentences to parse
        :type sentence: list(list(str))
        :return: iter(DependencyGraph)
        c                 3   s    | ]}  |V  qd S )N)r9   )r   Zsentencer:   r   r   	<genexpr>       z)MaltParser.parse_sents.<locals>.<genexpr>rF   )r]   )r:   rW   r.   rG   Ztagged_sentencesr   r^   r   parse_sents   s   zMaltParser.parse_sentsc                 C   s   dg}|| j 7 }tjdrdnd}|d|| jg7 }|dg7 }tj| j	r6|dtj
| j	d g7 }n|d| j	g7 }|d	|g7 }|d
krM|d|g7 }|d|g7 }|S )a  
        This function generates the maltparser command use at the terminal.

        :param inputfilename: path to the input file
        :type inputfilename: str
        :param outputfilename: path to the output file
        :type outputfilename: str
        javawin;:z-cpzorg.maltparser.Maltz-cz-irC   z-oz-m)r3   sysplatformr!   rR   r2   r   r   r%   r4   r   )r:   ZinputfilenameZoutputfilenamerA   rZ   Zclasspaths_separatorr   r   r   rM      s    



z MaltParser.generate_malt_commandc                 C   s&   |rd nt j}t j| ||d}| S )N)stdoutstderr)
subprocessPIPEPopenwait)rZ   r.   outputpr   r   r   rQ     s   zMaltParser._executec                 C   sv   t jd| jddd}ddd |D }|t| W d   n1 s&w   Y  | j|j|d	 t	|j dS )
z
        Train MaltParser from a list of ``DependencyGraph`` objects

        :param depgraphs: list of ``DependencyGraph`` objects for training input data
        :type depgraphs: DependencyGraph
        malt_train.conll.r=   Fr>   
c                 s   s    | ]}| d V  qdS )
   N)Zto_conll)r   dgr   r   r   r_      r`   z#MaltParser.train.<locals>.<genexpr>Nr.   )
r6   rI   r8   rR   rJ   rK   train_from_filerN   r   rV   )r:   Z	depgraphsr.   rX   Z	input_strr   r   r   train  s   	
zMaltParser.trainc              	   C   s   t |trFtjd| jddd/}| }| }|t| W d   n1 s*w   Y  | j	|j
|dW  d   S 1 sAw   Y  | j|dd}| ||}|d	krbtd
d||f d| _dS )z
        Train MaltParser from a file
        :param conll_file: str for the filename of the training input data
        :type conll_file: str
        rq   r=   Fr>   Nru   ZlearnrD   r   z1MaltParser training (%s) failed with exit code %drE   T)
isinstancer   r6   rI   r8   rS   rT   rJ   rK   rv   rN   rM   rQ   rH   rR   r5   )r:   Z
conll_filer.   rX   Zconll_input_fileZ	conll_strrZ   r[   r   r   r   rv   '  s(   
	

 	
zMaltParser.train_from_file)r   NNN)Fr<   )NN)F)__name__
__module____qualname____doc__r;   r]   ra   rM   staticmethodrQ   rw   rv   r   r   r   r   r1   ^   s    

*
B
 
r1   __main__)inspectr   rk   rg   r6   Z	nltk.datar   Znltk.internalsr   r   r   Znltk.parse.apir   Znltk.parse.dependencygraphr   Znltk.parse.utilr   r   r,   r0   r1   ry   doctesttestmodr   r   r   r   <module>   s(   	& gB