
    /h                          S r SSKJr  SSKJrJrJr  SSKJrJ	r	  SSK
JrJr  SSKJr   SS jrS	 rS
 r " S S5      rSS jrg)z 
Utility functions for parsers.
    )load)CFGPCFGFeatureGrammar)ChartChartParser)FeatureChartFeatureChartParser)InsideChartParserNc                 2   [        U 40 UD6n[        U[        5      (       d  [        S5      e[        U[        5      (       a  Uc  [
        nU" XaUS9$ [        U[        5      (       a  Uc  [        nUc  [        nU" XaUS9$ Uc  [        nUc  [        nU" XaUS9$ )a6  
Load a grammar from a file, and build a parser based on that grammar.
The parser depends on the grammar format, and might also depend
on properties of the grammar itself.

The following grammar formats are currently supported:
  - ``'cfg'``  (CFGs: ``CFG``)
  - ``'pcfg'`` (probabilistic CFGs: ``PCFG``)
  - ``'fcfg'`` (feature-based CFGs: ``FeatureGrammar``)

:type grammar_url: str
:param grammar_url: A URL specifying where the grammar is located.
    The default protocol is ``"nltk:"``, which searches for the file
    in the the NLTK data package.
:type trace: int
:param trace: The level of tracing that should be used when
    parsing a text.  ``0`` will generate no tracing output;
    and higher numbers will produce more verbose tracing output.
:param parser: The class used for parsing; should be ``ChartParser``
    or a subclass.
    If None, the class depends on the grammar format.
:param chart_class: The class used for storing the chart;
    should be ``Chart`` or a subclass.
    Only used for CFGs and feature CFGs.
    If None, the chart class depends on the grammar format.
:type beam_size: int
:param beam_size: The maximum length for the parser's edge queue.
    Only used for probabilistic CFGs.
:param load_args: Keyword parameters used when loading the grammar.
    See ``data.load`` for more information.
z1The grammar must be a CFG, or a subclass thereof.)trace	beam_size)r   chart_class)r   
isinstancer   
ValueErrorr   r   r   r
   r	   r   r   )grammar_urlr   parserr   r   	load_argsgrammars          G/var/www/auris/envauris/lib/python3.13/site-packages/nltk/parse/util.pyload_parserr      s    D ;,),Ggs##OPP'4  >&Fgi@@	G^	,	,>'F&KgDD > FKgDD    c              #      #    [        U SS9 H4  u  nu  p#[        U5      USX3SSSSS/
nSR                  U5      S-   nUv   M6     g7f)	a  
A module to convert a single POS tagged sentence into CONLL format.

>>> from nltk import word_tokenize, pos_tag
>>> text = "This is a foobar sentence."
>>> for line in taggedsent_to_conll(pos_tag(word_tokenize(text))): # doctest: +NORMALIZE_WHITESPACE
...         print(line, end="")
    1       This    _       DT      DT      _       0       a       _       _
    2       is      _       VBZ     VBZ     _       0       a       _       _
    3       a       _       DT      DT      _       0       a       _       _
    4       foobar  _       JJ      JJ      _       0       a       _       _
    5       sentence        _       NN      NN      _       0       a       _       _
    6       .               _       .       .       _       0       a       _       _

:param sentence: A single input sentence to parse
:type sentence: list(tuple(str, str))
:rtype: iter(str)
:return: a generator yielding a single sentence in CONLL format.
   )start_0a	
N)	enumeratestrjoin)sentenceiwordtag	input_strs        r   taggedsent_to_conllr)   O   sR     ( $HA6;DVT3#sCcJ	IIi(4/	 7s   AAc              #   P   #    U  H  n[        U5       Sh  vN   Sv   M     g N7f)a  
A module to convert the a POS tagged document stream
(i.e. list of list of tuples, a list of sentences) and yield lines
in CONLL format. This module yields one line per word and two newlines
for end of sentence.

>>> from nltk import word_tokenize, sent_tokenize, pos_tag
>>> text = "This is a foobar sentence. Is that right?"
>>> sentences = [pos_tag(word_tokenize(sent)) for sent in sent_tokenize(text)]
>>> for line in taggedsents_to_conll(sentences): # doctest: +NORMALIZE_WHITESPACE
...     if line:
...         print(line, end="")
1   This    _       DT      DT      _       0       a       _       _
2   is      _       VBZ     VBZ     _       0       a       _       _
3   a       _       DT      DT      _       0       a       _       _
4   foobar  _       JJ      JJ      _       0       a       _       _
5   sentence        _       NN      NN      _       0       a       _       _
6   .               _       .       .       _       0       a       _       _
<BLANKLINE>
<BLANKLINE>
1   Is      _       VBZ     VBZ     _       0       a       _       _
2   that    _       IN      IN      _       0       a       _       _
3   right   _       NN      NN      _       0       a       _       _
4   ?       _       .       .       _       0       a       _       _
<BLANKLINE>
<BLANKLINE>

:param sentences: Input sentences to parse
:type sentence: list(list(tuple(str, str)))
:rtype: iter(str)
:return: a generator yielding sentences in CONLL format.
Nz

)r)   )	sentencesr$   s     r   taggedsents_to_conllr,   i   s(     B &x000 0s   &$&c                   ,    \ rS rSrSrSS jrSS jrSrg)	TestGrammar   z
Unit tests for  CFG.
Nc                 R    Xl         [        USS9U l        X l        X0l        X@l        g )Nr   )r   )test_grammarr   cpsuite_accept_reject)selfr   r3   acceptrejects        r   __init__TestGrammar.__init__   s%    #gQ/
r   c                    U R                    H  n[        US   S-   SS9  S H  nX#    H  nUR                  5       n[        U R                  R                  U5      5      nU(       a0  U(       a)  [        5         [        U5        U H  n[        U5        M     US:X  a  U/ :X  a  [        SU-  5      eSnM  U(       a  [        S	U-  5      eSn	M     M     W(       d  M  W	(       d  M  [        S
5        M     g)aE  
Sentences in the test suite are divided into two classes:

- grammatical (``accept``) and
- ungrammatical (``reject``).

If a sentence should parse according to the grammar, the value of
``trees`` will be a non-empty list. If a sentence should be rejected
according to the grammar, then the value of ``trees`` will be None.
doc: )end)r7   r8   r7   zSentence '%s' failed to parse'TzSentence '%s' received a parse'zAll tests passed!N)r3   printsplitlistr2   parser   )
r6   
show_treestestkeysenttokenstreestreeacceptedrejecteds
             r   runTestGrammar.run   s     JJD$u+#-+ ID!ZZ\F v!67E!ed$)D!$K %*h B;",-MPT-T"UU'+H ",-NQU-U"VV'+H# & ,& xHH)*- r   )r4   r5   r2   r3   r1   )NN)F)__name__
__module____qualname____firstlineno____doc__r9   rM   __static_attributes__ r   r   r.   r.      s    !+r   r.   c                 \   Ub  U R                  U5      n / nU R                  S5       H  nUS:X  d	  US   U;   a  M  UR                  SS5      nSn[        U5      S:X  a*  US   S;   a  US   S	;   nUS   nO[        US   5      nUS   nUR                  5       nU/ :X  a  My  X7U4/-  nM     U$ )
a\  
Parses a string with one test sentence per line.
Lines can optionally begin with:

- a bool, saying if the sentence is grammatical or not, or
- an int, giving the number of parse trees is should have,

The result information is followed by a colon, and then the sentence.
Empty lines and lines beginning with a comment char are ignored.

:return: a list of tuple of sentences and expected results,
    where a sentence is a list of str,
    and a result is None, or bool, or int

:param comment_chars: ``str`` of possible comment characters.
:param encoding: the encoding of the string, if it is binary
Nr     r   r=   r      )TruetrueFalsefalse)rY   rZ   )decoderA   lenint)stringcomment_charsencodingr+   r$   
split_inforesultrH   s           r   extract_test_sentencesre      s    $ x(ILL&r>Xa[M9^^C+
z?a!} BB#A*::%a=Z]+%a=!R<v&''	 '  r   )r   NNr   )z#%;N)rS   	nltk.datar   nltk.grammarr   r   r   nltk.parse.chartr   r   nltk.parse.featurechartr	   r
   nltk.parse.pchartr   r   r)   r,   r.   re   rU   r   r   <module>rk      sG     2 2 / D / DE6Er4#V.+ .+b%r   