
    /h                         S SK Jr  S rg)    )defaultdictc                   ^ ^^	^
^^ UR                  5        Vs/ s H+  n[        [        [        UR                  S5      5      5      PM-     nnUR                  5        Vs/ s H+  n[        [        [        UR                  S5      5      5      PM-     nn/ SQm[	        U5      R                  [	        U5      5      m
[	        U5      R                  [	        U5      5      m[        [        5      m	T
 H-  u  pVT	S   R                  U5        T	S   R                  U5        M/     U	U
UU UU4S jnU	U
U UU4S jnU" 5         U" U5        U" U5        [        T
5      $ s  snf s  snf )ub
  
This module symmetrisatizes the source-to-target and target-to-source
word alignment output and produces, aka. GDFA algorithm (Koehn, 2005).

Step 1: Find the intersection of the bidirectional alignment.

Step 2: Search for additional neighbor alignment points to be added, given
        these criteria: (i) neighbor alignments points are not in the
        intersection and (ii) neighbor alignments are in the union.

Step 3: Add all other alignment points that are not in the intersection, not in
        the neighboring alignments that met the criteria but in the original
        forward/backward alignment outputs.

    >>> forw = ('0-0 2-1 9-2 21-3 10-4 7-5 11-6 9-7 12-8 1-9 3-10 '
    ...         '4-11 17-12 17-13 25-14 13-15 24-16 11-17 28-18')
    >>> back = ('0-0 1-9 2-9 3-10 4-11 5-12 6-6 7-5 8-6 9-7 10-4 '
    ...         '11-6 12-8 13-12 15-12 17-13 18-13 19-12 20-13 '
    ...         '21-3 22-12 23-14 24-17 25-15 26-17 27-18 28-18')
    >>> srctext = ("この よう な ハロー 白色 わい 星 の Ｌ 関数 "
    ...            "は Ｌ と 共 に 不連続 に 増加 する こと が "
    ...            "期待 さ れる こと を 示し た 。")
    >>> trgtext = ("Therefore , we expect that the luminosity function "
    ...            "of such halo white dwarfs increases discontinuously "
    ...            "with the luminosity .")
    >>> srclen = len(srctext.split())
    >>> trglen = len(trgtext.split())
    >>>
    >>> gdfa = grow_diag_final_and(srclen, trglen, forw, back)
    >>> gdfa == sorted(set([(28, 18), (6, 6), (24, 17), (2, 1), (15, 12), (13, 12),
    ...         (2, 9), (3, 10), (26, 17), (25, 15), (8, 6), (9, 7), (20,
    ...         13), (18, 13), (0, 0), (10, 4), (13, 15), (23, 14), (7, 5),
    ...         (25, 14), (1, 9), (17, 13), (4, 11), (11, 17), (9, 2), (22,
    ...         12), (27, 18), (24, 16), (21, 3), (19, 12), (17, 12), (5,
    ...         12), (11, 6), (12, 8)]))
    True

References:
Koehn, P., A. Axelrod, A. Birch, C. Callison, M. Osborne, and D. Talbot.
2005. Edinburgh System Description for the 2005 IWSLT Speech
Translation Evaluation. In MT Eval Workshop.

:type srclen: int
:param srclen: the number of tokens in the source language
:type trglen: int
:param trglen: the number of tokens in the target language
:type e2f: str
:param e2f: the forward word alignment outputs from source-to-target
            language (in pharaoh output format)
:type f2e: str
:param f2e: the backward word alignment outputs from target-to-source
            language (in pharaoh output format)
:rtype: set(tuple(int))
:return: the symmetrized alignment points from the GDFA algorithm
-))r   )r   r   )   r   )r   r   )r   r   )r   r   )r   r   )r   r   efc            
        > [        T5      S-
  n U [        T5      :  a  Sn[        T
5       H  n[        T5       H  nX#4T;   d  M  T	 H|  n[        S [        X#4U5       5       5      nUu  pVUT;  d  M,  UT;  d  M4  UT;   d  M<  TR	                  U5        TS   R	                  U5        TS   R	                  U5        U S-  n SnM~     M     M     U(       a  gU [        T5      :  a  M  gg)zb
Search for the neighbor points and them to the intersected alignment
points if criteria are met.
r   Tc              3   .   #    U  H  u  pX-   v   M     g 7f)N ).0ijs      K/var/www/auris/envauris/lib/python3.13/site-packages/nltk/translate/gdfa.py	<genexpr>9grow_diag_final_and.<locals>.grow_diag.<locals>.<genexpr>c   s     ,U?TtqQU?Ts   r   r	   FN)lenrangetuplezipadd)prev_lenno_new_pointsr   r	   neighbore_newf_newaligned	alignment	neighborssrclentrglenunions          r   	grow_diag&grow_diag_final_and.<locals>.grow_diagR   s    
 y>A%Y' M6]vAv*(1H',,UsA68?T,U'UH+3LE !&W 4g9M"*e"3 )h 7 ' 0 0 7 ' 0 0 7 (A05 )2	 ' #( 1 Y'    c                    > [        T5       Hh  n[        T5       HV  nUT;  d  M  UT;  d  M  X4T;   d  M  TR                  X45        TS   R                  U5        TS   R                  U5        MX     Mj     g)z
Adds remaining points that are not in the intersection, not in the
neighboring alignments but in the original *e2f* and *f2e* alignments
r   r	   N)r   r   )ar   r   r   r   r    r!   r"   s      r   	final_and&grow_diag_final_and.<locals>.final_ands   so     6]Ev (W,%/MM5.1CL$$U+CL$$U+ ' #r%   )
splitr   mapintsetintersectionr"   r   r   sorted)r    r!   e2ff2er'   r   r   r#   r(   r   r   r   r"   s   ``       @@@@r   grow_diag_final_andr2      s   t 36))+
>+Q5S!''#,'(+C
>25))+
>+Q5S!''#,'(+C
>VIC%%c#h/IHNN3s8$E #G  B, ,( KcNcN)K ?
>s   2E2EN)collectionsr   r2   r   r%   r   <module>r4      s    $r%   