
    /hI                         S SK r S SKrS SKrS SKJr  S SKJrJrJrJ	r	J
r
  S SKJr  S SKJr  S SKJr  Sr " S S	\5      r " S
 S\5      r " S S\5      r " S S\5      rg)    N)PIPE)_java_optionsconfig_javafind_jar_iterfind_jars_within_pathjava)ParserIDependencyGraph)Treez2https://nlp.stanford.edu/software/lex-parser.shtmlc                       \ rS rSrSrSrSrSrSrSr	       SS jr
S	 rSS
 jrSS jrSS jrSS jrSS jrSS jrSrg)GenericStanfordParser   z Interface to the Stanford Parserz+stanford-parser-(\d+)(\.(\d+))+-models\.jarzstanford-parser\.jarz3edu.stanford.nlp.parser.lexparser.LexicalizedParserFNc                 J   [        [        U R                  USS[        USS9S S9n[        [        U R                  USS[        USS9S S9n	[
        R                  R                  U5      S	   n
[        U	/[        U
5      -   5      U l
        X0l        X@l        Xpl        X`l        g )
N)STANFORD_PARSERSTANFORD_CORENLP T)env_vars
searchpathurlverboseis_regexc                 @    [         R                  R                  U 5      $ Nospathdirname
model_paths    K/var/www/auris/envauris/lib/python3.13/site-packages/nltk/parse/stanford.py<lambda>0GenericStanfordParser.__init__.<locals>.<lambda>;       277??:#>    )key)STANFORD_MODELSr   c                 @    [         R                  R                  U 5      $ r   r   r   s    r!   r"   r#   H   r$   r%   r   )maxr   _JAR_stanford_url_MODEL_JAR_PATTERNr   r   splittupler   
_classpathr    	_encodingcorenlp_optionsjava_options)selfpath_to_jarpath_to_models_jarr    encodingr   r2   r1   stanford_jar	model_jarstanford_dirs              r!   __init__GenericStanfordParser.__init__&   s     		@! ?
 ''"@! ?
	  ww}}\215.CL.Q QR$!.(r%   c           
         / n/ n/ nSnUR                  S5       H  nUS:X  a  U(       a   UR                  [        U5      5        / nSnM0  U R                  (       a5  UR                  U R	                  SR                  U5      5      5        / nSnMv  UR                  [        U R	                  SR                  U5      5      /5      5        / nM  UR                  U5        SnM     [        U5      $ )NF 
T)
splitlinesappenditer_DOUBLE_SPACED_OUTPUT
_make_treejoin)r3   output_res	cur_lines	cur_treesblanklines          r!   _parse_trees_output)GenericStanfordParser._parse_trees_outputV   s    		&&u-DrzJJtI/ "I!E//$$T__TYYy5I%JK "I EJJtT__TYYy5I%J$KLM "I  & .  Cyr%   c           
          U R                   SU R                  SSSU R                  SSS/
nU R                  U R	                  USR                  S	 U 5       5      U5      5      $ )
a  
Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
list where each sentence is a list of words.
Each sentence will be automatically tagged with this StanfordParser instance's
tagger.
If whitespaces exists inside a token, then the token will be treated as
separate tokens.

:param sentences: Input sentences to parse
:type sentences: list(list(str))
:rtype: iter(iter(Tree))
-model
-sentencesnewline-outputFormat
-tokenizedz-escaperz-edu.stanford.nlp.process.PTBEscapingProcessorr>   c              3   D   #    U  H  nS R                  U5      v   M     g7f) NrD   ).0sentences     r!   	<genexpr>4GenericStanfordParser.parse_sents.<locals>.<genexpr>   s     L)hsxx11)s    _MAIN_CLASSr    _OUTPUT_FORMATrK   _executerD   r3   	sentencesr   cmds       r!   parse_sents!GenericStanfordParser.parse_sentsm   sp     OO;
 ''MMTYYL)LLg
 	
r%   c                 :    [        U R                  U/U5      5      $ )z
Use StanfordParser to parse a sentence. Takes a sentence as a string;
before parsing, it will be automatically tokenized and tagged by
the Stanford Parser.

:param sentence: Input sentence to parse
:type sentence: str
:rtype: iter(Tree)
)nextraw_parse_sentsr3   rW   r   s      r!   	raw_parseGenericStanfordParser.raw_parse   s     D(((W=>>r%   c                     U R                   SU R                  SSSU R                  /nU R                  U R	                  USR                  U5      U5      5      $ )a  
Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
list of strings.
Each sentence will be automatically tokenized and tagged by the Stanford Parser.

:param sentences: Input sentences to parse
:type sentences: list(str)
:rtype: iter(iter(Tree))
rN   rO   rP   rQ   r>   rZ   r^   s       r!   re   %GenericStanfordParser.raw_parse_sents   s^     OO
 ''MM#tyy3W=
 	
r%   c                 :    [        U R                  U/U5      5      $ )z
Use StanfordParser to parse a sentence. Takes a sentence as a list of
(word, tag) tuples; the sentence must have already been tokenized and
tagged.

:param sentence: Input sentence to parse
:type sentence: list(tuple(str, str))
:rtype: iter(Tree)
)rd   tagged_parse_sentsrf   s      r!   tagged_parse"GenericStanfordParser.tagged_parse   s     D++XJ@AAr%   c                    ^ SmU R                   SU R                  SSSU R                  SSTSS	S
S/nU R                  U R	                  USR                  U4S jU 5       5      U5      5      $ )a,  
Use StanfordParser to parse multiple sentences. Takes multiple sentences
where each sentence is a list of (word, tag) tuples.
The sentences must have already been tokenized and tagged.

:param sentences: Input sentences to parse
:type sentences: list(list(tuple(str, str)))
:rtype: iter(iter(Tree))
/rN   rO   rP   rQ   rR   z-tagSeparatorz-tokenizerFactoryz,edu.stanford.nlp.process.WhitespaceTokenizerz-tokenizerMethodnewCoreLabelTokenizerFactoryr>   c              3   Z   >#    U  H   nS R                  U4S jU 5       5      v   M"     g7f)rT   c              3   F   >#    U  H  nTR                  U5      v   M     g 7fr   rU   )rV   taggedtag_separators     r!   rX   EGenericStanfordParser.tagged_parse_sents.<locals>.<genexpr>.<genexpr>   s     OhF]//77hs   !NrU   )rV   rW   ru   s     r!   rX   ;GenericStanfordParser.tagged_parse_sents.<locals>.<genexpr>   s*      $- HHOhOOO$-s   (+rZ   )r3   r_   r   r`   ru   s       @r!   rl   (GenericStanfordParser.tagged_parse_sents   s     OO:*
" ''MM		 $-  	
 		
r%   c           	         U R                   nUR                  SU/5        U R                  (       a)  UR                  U R                  R                  5       5        SR	                  [
        5      n[        U R                  US9  [        R                  " SSS9 n[        U[        5      (       a  U(       a  UR                  U5      nUR                  U5        UR                  5         U R                  (       a3  UR!                  S5        [#        UU R$                  U[&        [&        S9u  pxO:UR)                  UR*                  5        [#        XR$                  [&        [&        S	9u  pxUR-                  S
S5      nUR-                  SS5      nUR/                  U5      nS S S 5        [0        R2                  " WR*                  5        [        USS9  W$ ! , (       d  f       N:= f)Nz	-encodingrT   )optionsr   wbF)modedeleter   )	classpathstdinstdoutstderr)r~   r   r   s        s    )r0   extendr1   r-   rD   r   r   r2   tempfileNamedTemporaryFile
isinstancestrencodewriteflush
_USE_STDINseekr   r/   r   r@   namereplacedecoder   unlink)	r3   r`   input_r   r6   default_options
input_filer   r   s	            r!   r]   GenericStanfordParser._execute   sn   >>

K*+JJt++1134((=1 	D--w? ((d5AZ&#&&8x0V$ "!%"oo$" 

:??+!%??4" ^^K6F^^K6F]]8,F3 B6 			*//" 	OU;A BAs   DG
G)r/   r0   r1   r2   r    )NNz4edu/stanford/nlp/models/lexparser/englishPCFG.ser.gzutf8Fz-mx4gr=   F)__name__
__module____qualname____firstlineno____doc__r,   r*   r[   r   rB   r:   rK   ra   rg   re   rm   rl   r]   __static_attributes__r   r%   r!   r   r      sb    *G"DGKJ! I.)`.
>
?
.
B%
N,r%   r   c                   6   ^  \ rS rSrSrSrU 4S jrS rSrU =r	$ )StanfordParseri  a
  
>>> parser=StanfordParser(
...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
... ) # doctest: +SKIP

>>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) # doctest: +NORMALIZE_WHITESPACE +SKIP
[Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]

>>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
...     "the quick brown fox jumps over the lazy dog",
...     "the quick grey wolf jumps over the lazy fox"
... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
[Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
[Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP',
[Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]

>>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents((
...     "I 'm a dog".split(),
...     "This is my friends ' cat ( the tabby )".split(),
... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
[Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', [Tree('', []),
Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', [])])])])])])])]

>>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
...     (
...         ("The", "DT"),
...         ("quick", "JJ"),
...         ("brown", "JJ"),
...         ("fox", "NN"),
...         ("jumped", "VBD"),
...         ("over", "IN"),
...         ("the", "DT"),
...         ("lazy", "JJ"),
...         ("dog", "NN"),
...         (".", "."),
...     ),
... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP
[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
[Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
pennc                 Z   > [         R                  " S[        SS9  [        TU ]  " U0 UD6  g )NzcThe StanfordParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPParser[0m instead.   
stacklevelwarningswarnDeprecationWarningsuperr:   r3   argskwargs	__class__s      r!   r:   StanfordParser.__init__F  s/    R		
 	$)&)r%   c                 .    [         R                  " U5      $ r   )r   
fromstringr3   results     r!   rC   StanfordParser._make_treeP  s    v&&r%   r   
r   r   r   r   r   r\   r:   rC   r   __classcell__r   s   @r!   r   r     s    0d N*' 'r%   r   c                   6   ^  \ rS rSrSrSrU 4S jrS rSrU =r	$ )StanfordDependencyParseriT  a	  
>>> dep_parser=StanfordDependencyParser(
...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
... ) # doctest: +SKIP

>>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
[Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])]

>>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
[[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]

>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
...     "The quick brown fox jumps over the lazy dog.",
...     "The quick grey wolf jumps over the lazy fox."
... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
[Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]),
Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])]

>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
...     "I 'm a dog".split(),
...     "This is my friends ' cat ( the tabby )".split(),
... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
[Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])]

>>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents((
...     (
...         ("The", "DT"),
...         ("quick", "JJ"),
...         ("brown", "JJ"),
...         ("fox", "NN"),
...         ("jumped", "VBD"),
...         ("over", "IN"),
...         ("the", "DT"),
...         ("lazy", "JJ"),
...         ("dog", "NN"),
...         (".", "."),
...     ),
... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP
[[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]

	conll2007c                 Z   > [         R                  " S[        SS9  [        TU ]  " U0 UD6  g )NzwThe StanfordDependencyParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPDependencyParser[0m instead.r   r   r   r   s      r!   r:   !StanfordDependencyParser.__init__  s/    \		
 	$)&)r%   c                     [        USS9$ )Nroottop_relation_labelr
   r   s     r!   rC   #StanfordDependencyParser._make_tree      v&AAr%   r   r   r   s   @r!   r   r   T  s!    .` !N*B Br%   r   c                   T   ^  \ rS rSrSrSrSrSrSrSr	Sr
U 4S jrSS	 jrS
 rSrU =r$ )StanfordNeuralDependencyParseri  a  
>>> from nltk.parse.stanford import StanfordNeuralDependencyParser # doctest: +SKIP
>>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g')# doctest: +SKIP

>>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
[Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.'])]

>>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
[[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det',
(u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'),
u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')),
((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det',
(u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ')), ((u'jumps', u'VBZ'),
u'punct', (u'.', u'.'))]]

>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
...     "The quick brown fox jumps over the lazy dog.",
...     "The quick grey wolf jumps over the lazy fox."
... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
[Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over',
'the', 'lazy']), '.']), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']),
Tree('fox', ['over', 'the', 'lazy']), '.'])]

>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
...     "I 'm a dog".split(),
...     "This is my friends ' cat ( the tabby )".split(),
... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
[Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends',
['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])]
conllz)edu.stanford.nlp.pipeline.StanfordCoreNLPz%stanford-corenlp-(\d+)(\.(\d+))+\.jarz,stanford-corenlp-(\d+)(\.(\d+))+-models\.jarTc                    > [         R                  " S[        SS9  [        TU ]  " U0 UD6  U =R
                  S-  sl        g )Nz}The StanfordNeuralDependencyParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPDependencyParser[0m instead.r   r   z(-annotators tokenize,ssplit,pos,depparse)r   r   r   r   r:   r1   r   s      r!   r:   'StanfordNeuralDependencyParser.__init__  sA    \		
 	$)&) JJr%   c                     [        S5      e)z
Currently unimplemented because the neural dependency parser (and
the StanfordCoreNLP pipeline class) doesn't support passing in pre-
tagged tokens.
zxtagged_parse[_sents] is not supported by StanfordNeuralDependencyParser; use parse[_sents] or raw_parse[_sents] instead.)NotImplementedError)r3   r_   r   s      r!   rl   1StanfordNeuralDependencyParser.tagged_parse_sents  s     ":
 	
r%   c                     [        USS9$ )NROOTr   r
   r   s     r!   rC   )StanfordNeuralDependencyParser._make_tree  r   r%   r   r   )r   r   r   r   r   r\   r[   r*   r,   r   rB   r:   rl   rC   r   r   r   s   @r!   r   r     sA    > N=K3DHJ 	K

B Br%   r   )r   r   r   
subprocessr   nltk.internalsr   r   r   r   r   nltk.parse.apir	   nltk.parse.dependencygraphr   	nltk.treer   r+   r   r   r   r   r   r%   r!   <module>r      sk    
     # 6 DrG rj@'* @'F>B4 >BB?B%: ?Br%   