o
    rZhx                     @   s:   d Z ddlZddlmZ ddlmZ G dd dejZdS )z
Unit tests for nltk.tgrep.
    N)tgrep)ParentedTreec                   @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:d; Z d<S )=TestSequenceFunctionsz5
    Class containing unit tests for nltk.tgrep.
    c                 C   s   t d}| |g d dS )z.
        Simple test of tokenization.
        %A .. (B !< C . D) | ![<< (E , F) $ G])A..(B!<C.D)|r
   [<<r   E,Fr   $G]N)r   tgrep_tokenizeassertEqual)selftokens r   H/var/www/auris/lib/python3.10/site-packages/nltk/test/unit/test_tgrep.pytest_tokenize_simple   s
   
z*TestSequenceFunctions.test_tokenize_simplec                 C   s   |  tdtd dS )zM
        Test that tokenization handles bytes and strs the same way.
        s%   A .. (B !< C . D) | ![<< (E , F) $ G]r   Nr   r   r   r   r   r   r   test_tokenize_encoding;   s   z,TestSequenceFunctions.test_tokenize_encodingc                 C   sX  |  tdg d |  tdg d |  tdg d |  tdg d |  td	g d
 |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d  |  td!g d" |  td#g d$ |  td%g d& |  td'g d( |  td)g d* |  td+g d, |  td-g d. |  td/g d0 |  td1g d2 |  td3g d4 |  td5g d6 |  td7g d8 |  td9g d: |  td;g d< |  td=g d> |  td?g d@ |  tdAg dB |  tdCg dD |  tdEg dF |  tdGg dH |  tdIg dJ |  tdKg dL |  tdMg dN |  tdOg dP |  tdQg dR |  tdSg dT |  tdUg dV |  tdWg dX |  tdYg dZ |  td[g d\ |  td]g d^ |  td_g d` |  tdag db |  tdcg dd |  tdeg df |  tdgg dh |  tdig dj |  tdkg dl |  tdmg dn |  tdog dp |  tdqg dr |  tdsg dt |  tdug dv |  tdwg dx |  tdyg dz |  td{g d| d}S )~z8
        Test tokenization of basic link types.
        zA<B)r   r   r	   zA>B)r   >r	   zA<3B)r   <3r	   zA>3B)r   >3r	   zA<,B)r   <,r	   zA>,B)r   >,r	   zA<-3B)r   <-3r	   zA>-3B)r   >-3r	   zA<-B)r   <-r	   zA>-B)r   >-r	   zA<'B)r   <'r	   zA>'B)r   >'r	   zA<:B)r   <:r	   zA>:B)r   >:r	   zA<<B)r   r   r	   zA>>B)r   >>r	   zA<<,B)r   <<,r	   zA>>,B)r   >>,r	   zA<<'B)r   <<'r	   zA>>'B)r   >>'r	   zA<<:B)r   <<:r	   zA>>:B)r   >>:r	   zA.B)r   r   r	   zA,B)r   r   r	   zA..B)r   r   r	   zA,,B)r   ,,r	   zA$B)r   r   r	   zA$.B)r   $.r	   zA$,B)r   $,r	   zA$..B)r   $..r	   zA$,,B)r   $,,r	   zA!<B)r   r
   r   r	   zA!>B)r   r
   r#   r	   zA!<3B)r   r
   r$   r	   zA!>3B)r   r
   r%   r	   zA!<,B)r   r
   r&   r	   zA!>,B)r   r
   r'   r	   zA!<-3B)r   r
   r(   r	   zA!>-3B)r   r
   r)   r	   zA!<-B)r   r
   r*   r	   zA!>-B)r   r
   r+   r	   zA!<'B)r   r
   r,   r	   zA!>'B)r   r
   r-   r	   zA!<:B)r   r
   r.   r	   zA!>:B)r   r
   r/   r	   zA!<<B)r   r
   r   r	   zA!>>B)r   r
   r0   r	   zA!<<,B)r   r
   r1   r	   zA!>>,B)r   r
   r2   r	   zA!<<'B)r   r
   r3   r	   zA!>>'B)r   r
   r4   r	   zA!<<:B)r   r
   r5   r	   zA!>>:B)r   r
   r6   r	   zA!.B)r   r
   r   r	   zA!,B)r   r
   r   r	   zA!..B)r   r
   r   r	   zA!,,B)r   r
   r7   r	   zA!$B)r   r
   r   r	   zA!$.B)r   r
   r8   r	   zA!$,B)r   r
   r9   r	   zA!$..B)r   r
   r:   r	   zA!$,,B)r   r
   r;   r	   Nr    r!   r   r   r   test_tokenize_link_typesD   s|   z.TestSequenceFunctions.test_tokenize_link_typesc                 C   s   |  tdg d |  tddg |  tdg d |  tdg d |  tdg d	 |  td
g d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d dS )zJ
        Test tokenization of the TGrep2 manual example patterns.
        NP < PP)NPr   PP/^NP/NP << PP . VP)r>   r   r?   r   VPNP << PP | . VP)r>   r   r?   r   r   rB   NP !<< PP [> NP | >> VP])r>   r
   r   r?   r   r#   r>   r   r0   rB   r   NP << (PP . VP))r>   r   r   r?   r   rB   r   NP <' (PP <, (IN < on)))r>   r,   r   r?   r&   r   INr   onr   r   S < (A < B) < C)	Sr   r   r   r   r	   r   r   r   S < ((A < B) < C))rJ   r   r   r   r   r   r	   r   r   r   r   S < (A < B < C))	rJ   r   r   r   r   r	   r   r   r   zA<B&.C)r   r   r	   &r   r   Nr    r!   r   r   r   test_tokenize_examples   sB   z,TestSequenceFunctions.test_tokenize_examplesc                 C      |  tdg d dS )z/
        Test tokenization of quoting.
        z"A<<:B"<<:"A $.. B"<"A>3B"<C)z"A<<:B"r5   z	"A $.. B"r   z"A>3B"r   r   Nr    r!   r   r   r   test_tokenize_quoting   s   z+TestSequenceFunctions.test_tokenize_quotingc                 C   s   |  tddg |  tddg |  tddg |  tddg |  tdddg |  tdg d	 |  td
g d |  tdg d dS )z2
        Test tokenization of node names.
        ZRobertz	/^[Bb]ob/*__zN()N(r   zN(0,))rS   0r   r   zN(0,0))rS   rT   r   rT   r   zN(0,0,))rS   rT   r   rT   r   r   Nr    r!   r   r   r   test_tokenize_nodenames   s   z-TestSequenceFunctions.test_tokenize_nodenamesc                 C   rO   )z9
        Test tokenization of macro definitions.
        z4@ NP /^NP/;
@ NN /^NN/;
@NP [!< NP | < @NN] !$.. @NN)@r>   r@   ;rV   NNz/^NN/rW   z@NPr   r
   r   r>   r   r   @NNr   r
   r:   rY   Nr    r!   r   r   r   test_tokenize_macros   s   z*TestSequenceFunctions.test_tokenize_macrosc                 C   sv   t d}| ttd|gddgg | ttd|g|d |d gg | ttd|gg dg dS )z`
        Test a simple use of tgrep for finding nodes matching a given
        pattern.
        A(S (NP (DT the) (JJ big) (NN dog)) (VP bit) (NP (DT a) (NN cat)))rX   r      r]      NN|JJ)r   r_   r\   r^   Nr   
fromstringr   listr   tgrep_positionsZtgrep_nodesr   treer   r   r   test_node_simple   s     z&TestSequenceFunctions.test_node_simplec                 C   s^   t d}| ttd|gttd|g | ttd|gttd|g dS )z9Test that the tgrep print operator ' is properly ignored.(S (n x) (N x))Nz'Nz/[Nn]/z'/[Nn]/Nr   rc   r   rd   r   re   rf   r   r   r   test_node_printing   s   
z(TestSequenceFunctions.test_node_printingc                 C   s   t d}| ttd|gttd|g | ttd|gttd|g | ttd|gttd|g dS )z]
        Test that tgrep search strings handles bytes and strs the same
        way.
        r[   s   NNrX   s   NN|JJr`   Nrb   rf   r   r   r   test_node_encoding  s   z(TestSequenceFunctions.test_node_encodingc                 C   L   t d}| ttd|gdgg | ttd|gddgg dS )zI
        Test selecting nodes using case insensitive node names.
        ri   "N"r_   zi@"N"r   Nrk   rf   r   r   r   test_node_nocase  s   
$z&TestSequenceFunctions.test_node_nocasec                 C   s   t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gd	gg d
S )z?
        Test selecting nodes using quoted node names.
        z(N ("N" x) (N" x) ("\" x))ro   r   z"\"N\""rq   z"N\""rp   z"\"\\\""r]   Nrk   rf   r   r   r   test_node_quoted  s
   
"z&TestSequenceFunctions.test_node_quotedc                 C   s.   t d}| ttd|gddgg dS )/
        Test regex matching on nodes.
        $(S (NP-SBJ x) (NP x) (NNP x) (VP x))r@   rq   rp   Nrk   rf   r   r   r   test_node_regex)  s   
$z%TestSequenceFunctions.test_node_regexc                 C   sN   t d}| ttd|gddgg | ttd|gg dg dS )ru   z(S (SBJ x) (SBJ1 x) (NP-SBJ x))z/^SBJ/rq   rp   z/SBJ/)rq   rp   rs   Nrk   rf   r   r   r   test_node_regex_22  s
   
 z'TestSequenceFunctions.test_node_regex_2c                    s   t dfddtt D   fdd D }|D ]$}d| }tt|g}| 	t|d d | 	|d d | q!d	S )
zE
        Test matching on nodes based on NLTK tree position.
        rv   c                    s   h | ]}  |qS r   )Zleaf_treeposition.0x)rg   r   r   	<setcomp>D  s    z@TestSequenceFunctions.test_node_tree_position.<locals>.<setcomp>c                    s   g | ]}| vr|qS r   r   ry   )leaf_positionsr   r   
<listcomp>E  s    zATestSequenceFunctions.test_node_tree_position.<locals>.<listcomp>rj   r   r_   N)
r   rc   rangelenleavesZtreepositionsrd   r   re   r   )r   Ztree_positionspositionZnode_idre   r   )r}   rg   r   test_node_tree_position>  s   

z-TestSequenceFunctions.test_node_tree_positionc                 C   sL   t d}| ttd|gddgg | ttd|gdg g dS )zS
        Test node name matching with the search_leaves flag set to False.
        (S (A (T x)) (B (N x)))r{   r   r   r   r_   r   r   FNrk   rf   r   r   r   test_node_noleavesL  s
   
"z(TestSequenceFunctions.test_node_noleavesc                 C   s  t d}| ttd|gdgg | ttd|gdgg | ttd|gg dg | ttd|gdgg | ttd	|gd
gg | ttd|gdgg | ttd|gg dg | ttd|gg dg | ttd|gg dg | ttd|gdd
gg | ttd|gddgg | ttd|gddgg | ttd|gdgg | ttd|gdgg | ttd|gg dg t d}| ttd|gdgg | ttd|gddgg | ttd|gg dg | ttd|gdgg t d }| ttd!|gdgg | ttd"|gg d#g t d$}| ttd%|gg d&g | ttd'|gg d(g d)S )*zC
        Test matching nodes based on dominance relations.
        r   z* < Trq   z	* < T > Sz* !< T)r   r   r   r   rp   r_   r   r   z
* !< T > Srp   z* > Ar   z* > Br   z* !> B)r   rq   r   r   rp   r   z* !> B >> S)rq   r   rp   z* >> S)rq   r   rp   r   z* >>, Sz* >>' Sz* << Tr   z* <<' Tz* <<1 Nz* !<< T)r   r   rp   r   r   z(S (A (T x)) (B (T x) (N x )))z* <: Tz* !<: T)r   r   r   rp   r   r   )r_   r_   )r_   r_   r   z* !<: T > Sz(S (T (A x) (B x)) (T (C x)))z* >: Tz* !>: T)r   rq   r   r   ra   r   r_   r   rp   r   z=(S (A (B (C (D (E (T x)))))) (A (B (C (D (E (T x))) (N x)))))z* <<: T)rq   r   r   r   r   r   r   r   r   r   r   r   r_   r   r   r   )r_   r   r   r   r   z* >>: A)r   r   r   r   )r   r   r   r   r   r   r   r   Nrk   rf   r   r   r   tests_rel_dominanceV  sz   
 
 
z)TestSequenceFunctions.tests_rel_dominancec                 C   s(   t d}| tjttd|g dS )zC
        Test error handling of undefined tgrep operators.
        r   z* >>> SN)r   rc   assertRaisesr   TgrepExceptionrd   re   rf   r   r   r   test_bad_operator  s   
z'TestSequenceFunctions.test_bad_operatorc                 C   sV   t d}d}| tt||gddgg d}| tt||gddgg dS )z`
        Test that comments are correctly filtered out of tgrep search
        strings.
        z(S (NN x) (NP x) (NN x))z=
        @ NP /^NP/;
        @ NN /^NN/;
        @NN
        rq   rs   zg
        # macros
        @ NP /^NP/;
        @ NN /^NN/;

        # search string
        @NN
        Nrk   )r   rg   Zsearch1Zsearch2r   r   r   test_comments  s
   
 $z#TestSequenceFunctions.test_commentsc                 C   s   t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gddgg d	S )
z7
        Test matching sister nodes in a tree.
        (S (A x) (B x) (C x))z* $. Brq   z* $.. Bz* $, Brs   z* $,, Bz* $ BNrk   rf   r   r   r   test_rel_sister_nodes  s   
$z+TestSequenceFunctions.test_rel_sister_nodesc                 C   s  t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd	|gdgg | ttd
|gdgg | ttd|gdgg | ttd|gdgg t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg dS )zP
        Test matching nodes based on their index in their parent node.
        r   z* >, Srq   z* >1 Sz* >2 Srp   z* >3 Srs   z* >' Sz* >-1 Sz* >-2 Sz* >-3 SzE(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) (F (C x) (A x) (B x)))z* <, Az* <1 Az* <2 Az* <3 Az* <' Az* <-1 Az* <-2 Az* <-3 ANrk   rf   r   r   r   tests_rel_indexed_children  s(   
"z0TestSequenceFunctions.tests_rel_indexed_childrenc                 C   s  t d}| ttd|gg dg | ttd|gddgg | ttd|gg dg | ttd	|gg d
g | ttd|gddgg | ttd|gg dg | ttd|gg dg | ttd|gg dg dS )zD
        Test matching nodes based on precedence relations.
        zV(S (NP (NP (PP x)) (NP (AP x))) (VP (AP (X (PP x)) (Y (AP x)))) (NP (RC (NP (AP x)))))z* . X)rq   ra   r   z* . Yr   r   z* .. X)rq   r   r   ra   r   z* .. Y)rq   r   r   ra   r   r   r   z* , Xr_   r   r_   r_   r   r_   r   z* , Y)rs   r]   r   r]   r   r   r]   r   r   r   z* ,, X)r   r   rs   r   r   r   z* ,, YNrk   rf   r   r   r   test_rel_precedence  s@   z)TestSequenceFunctions.test_rel_precedencec                 C   sf  t d}| ttd|gdgg t d}| ttd|gdgg t d}| ttd|gddgg t d	}| ttd
|gddgg t d}| ttd|gdgg t d}| ttd|gdgg t d}| ttd|gdgg t d}| ttd|gdgg | ttd|gdgg dS )zA
        Test the Basic Examples from the TGrep2 manual.
        z(S (NP (AP x)) (NP (PP x)))r=   rp   z$(S (NP x) (VP x) (NP (PP x)) (VP x))rA   rs   z6(S (NP (AP x)) (NP (PP x)) (NP (DET x) (NN x)) (VP x))rC   zX(S (NP (NP (PP x)) (NP (AP x))) (VP (AP (NP (PP x)) (NP (AP x)))) (NP (RC (NP (AP x)))))rD   ra   r   z:(S (NP (AP (PP x) (VP x))) (NP (AP (PP x) (NP x))) (NP x))rE   rq   ze(S (NP (DET a) (NN cat) (PP (IN on) (NP x))) (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x)) (NP x))rF   z;(S (S (C x) (A (B x))) (S (C x) (A x)) (S (D x) (A (B x))))rI   z/(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))rK   rL   Nrk   rf   r   r   r   test_examples  sX   

z#TestSequenceFunctions.test_examplesc                 C   sH   t d}| ttd|gddgg | tjttd|g dS )z8
        Test defining and using tgrep2 macros.
        zi(VP (VB sold) (NP (DET the) (NN heiress)) (NP (NN deed) (PREP to) (NP (DET the) (NN school) (NN house))))z+@ NP /^NP/;
@ NN /^NN/;
@NP !< @NP !$.. @NNrp   )r]   r]   z,@ NP /^NP/;
@ NN /^NN/;
@CNP !< @NP !$.. @NNN)r   rc   r   rd   r   re   r   r   rf   r   r   r   test_use_macrosf  s$   	z%TestSequenceFunctions.test_use_macrosc                 C   s0   |  tdg d |  tdg d dS )z#Test tokenization of labeled nodes.!S < @SBJ < (@VP < (@VB $.. @OBJ)))rJ   r   @SBJr   r   @VPr   r   @VBr:   @OBJr   r   z%S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)))rJ   r   r   =sr   r   r   r   vr   r   r   r:   r   r   r   Nr    r!   r   r   r   test_tokenize_node_labels  s   z/TestSequenceFunctions.test_tokenize_node_labelsc                 C   rO   )z(Test tokenization of segmented patterns.z0S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v)rJ   r   r   r   r   r   r   r   r   r   r   r   r   r:   r   r   r   :z=sr   z=vNr    r!   r   r   r    test_tokenize_segmented_patterns  s   z6TestSequenceFunctions.test_tokenize_segmented_patternsc                 C   s*  d}t d}t d}|dd d }d}| tt||gd  | tt||gd  | tt||gd  | tt||gtt||g | tt||gd  | tt||gd  | tt||gd  | tt||gtt||g dS )	zN
        Test labeled nodes.

        Test case from Emily M. Bender.
        z
            # macros
            @ SBJ /SBJ/;
            @ VP /VP/;
            @ VB /VB/;
            @ VPoB /V[PB]/;
            @ OBJ /OBJ/;

            # 1 svo
            S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =vz2(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))z2(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))z

r   r   z-S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))N)	r   rc   split
assertTruerd   r   re   r   ZassertFalse)r   searchZsent1Zsent2Zsearch_firsthalfZsearch_rewriter   r   r   test_labeled_nodes  s.   
z(TestSequenceFunctions.test_labeled_nodesc                 C   rn   )zm
        Test that multiple (3 or more) conjunctions of node relations are
        handled properly.
        z'((A (B b) (C c)) (A (B b) (C c) (D d)))z(A < B < C < D)rp   z(A < B < C)rq   Nrk   )r   sentr   r   r   test_multiple_conjs  s   
z)TestSequenceFunctions.test_multiple_conjsc                 C   sn   t d}| ttd|gddgg | ttd|gddgg | ttd|gddgg dS )zp
        Test that semicolons at the end of a tgrep2 search string won't
        cause a parse failure.
        r[   rX   r\   r^   zNN;zNN;;Nrk   rf   r   r   r   test_trailing_semicolon  s     z-TestSequenceFunctions.test_trailing_semicolonN)!__name__
__module____qualname____doc__r   r"   r<   rN   rP   rU   rZ   rh   rl   rm   rr   rt   rw   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      s>    !	C&	 
	
W	'O+(r   )r   ZunittestZnltkr   Z	nltk.treer   ZTestCaser   r   r   r   r   <module>   s
   	