
    /hT*                     ~    S SK r S SKJr   " S S5      r " S S\5      rS rS rS	 r\" S
SS/5      r	 " S S5      r
g)    N)
namedtuplec                   ~    \ rS rSrSrSS jr\S 5       r\S 5       rS r	S r
\" \	\
5      rS	 rS
 rS rS rS rSrg)AlignedSent   a  
Return an aligned sentence object, which encapsulates two sentences
along with an ``Alignment`` between them.

Typically used in machine translation to represent a sentence and
its translation.

    >>> from nltk.translate import AlignedSent, Alignment
    >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
    ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))
    >>> algnsent.words
    ['klein', 'ist', 'das', 'Haus']
    >>> algnsent.mots
    ['the', 'house', 'is', 'small']
    >>> algnsent.alignment
    Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])
    >>> from nltk.corpus import comtrans
    >>> print(comtrans.aligned_sents()[54])
    <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
    >>> print(comtrans.aligned_sents()[54].alignment)
    0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13

:param words: Words in the target language sentence
:type words: list(str)
:param mots: Words in the source language sentence
:type mots: list(str)
:param alignment: Word-level alignments between ``words`` and ``mots``.
    Each alignment is represented as a 2-tuple (words_index, mots_index).
:type alignment: Alignment
Nc                 x    Xl         X l        Uc  [        / 5      U l        g [	        U5      [        L d   eX0l        g N)_words_mots	Alignment	alignmenttype)selfwordsmotsr   s       J/var/www/auris/envauris/lib/python3.13/site-packages/nltk/translate/api.py__init__AlignedSent.__init__/   s4    
&r]DN	?i///&N    c                     U R                   $ r   )r	   r   s    r   r   AlignedSent.words8   s    {{r   c                     U R                   $ r   )r
   r   s    r   r   AlignedSent.mots<   s    zzr   c                     U R                   $ r   )
_alignmentr   s    r   _get_alignmentAlignedSent._get_alignment@   s    r   c                 v    [        [        U R                  5      [        U R                  5      U5        Xl        g r   )_check_alignmentlenr   r   r   )r   r   s     r   _set_alignmentAlignedSent._set_alignmentC   s#    TZZ#dii.)D#r   c                     SSR                  S U R                   5       5      -  nSSR                  S U R                   5       5      -  nSU SU SU R                  < S3$ )zG
Return a string representation for this ``AlignedSent``.

:rtype: str
z[%s]z, c              3   ,   #    U  H
  nS U-  v   M     g7fz'%s'N .0ws     r   	<genexpr>'AlignedSent.__repr__.<locals>.<genexpr>O   s     #D1FQJ   c              3   ,   #    U  H
  nS U-  v   M     g7fr%   r&   r'   s     r   r*   r+   P   s     "Bz!6A:zr,   zAlignedSent())joinr	   r
   r   )r   r   r   s      r   __repr__AlignedSent.__repr__I   s^     $))#D#DDE"Btzz"BBCeWBtfBt.ACCr   c                    SnUS-  nUSR                  U R                   Vs/ s H  nSU SU S3PM     sn5      -  nUSR                  U R                   Vs/ s H  nSU SU S3PM     sn5      -  nUSR                  U R                   VVs/ s H(  u  p4SU R                  U    SU R                  U    S	3PM*     snn5      -  n[	        [        U R                  5      S
-
  5       H5  nUSR                  U R                  U   U R                  US
-      5      -  nM7     [	        [        U R                  5      S
-
  5       H5  nUSR                  U R                  U   U R                  US
-      5      -  nM7     USSR                  S U R                   5       5      -  -  nUSSR                  S U R                   5       5      -  -  nUS-  nU$ s  snf s  snf s  snnf )z,
Dot representation of the aligned sentence
zgraph align {
znode[shape=plaintext]
 "z_source" [label="z"] 
z_target" [label="z_source" -- "z
_target" 
   z)"{}_source" -- "{}_source" [style=invis]
z)"{}_target" -- "{}_target" [style=invis]
z{rank = same; %s}
 c              3   ,   #    U  H
  nS U-  v   M     g7f)z"%s_source"Nr&   r'   s     r   r*   &AlignedSent._to_dot.<locals>.<genexpr>v   s     .V+Q}q/@+r,   c              3   ,   #    U  H
  nS U-  v   M     g7f)z"%s_target"Nr&   r'   s     r   r*   r8   w   s     .U*Q}q/@*r,   })r/   r	   r
   r   ranger    format)r   sr)   uvis         r   _to_dotAlignedSent._to_dotT   s    	&& 	
RWWL1!-aS6LMM	RWW

K
1!-aS6
KLL 	
RWW !OO+DA DKKN#=A{K+
 	
 s4;;'!+,A=DDAAE" A - s4::*+A=DD

1

1q5! A , 	
"chh.V$++.V&VWW	"chh.U$**.U&UVV	S? MKs   G
G
/Gc                 V   U R                  5       R                  S5      nSn [        R                  " SSU-  /[        R                  [        R                  [        R                  S9nUR                  U5      u  pVUR                  S5      $ ! [
         a  n[        S5      UeSnAff = f)zB
Ipython magic : show SVG representation of this ``AlignedSent``.
utf8svgdotz-T%s)stdinstdoutstderrz0Cannot find the dot binary from Graphviz packageN)	rA   encode
subprocessPopenPIPEOSError	Exceptioncommunicatedecode)r   
dot_stringoutput_formatprocesseouterrs          r   
_repr_svg_AlignedSent._repr_svg_}   s     \\^**62
	W &&./ oo!!	G &&z2zz&!!	  	WNOUVV	Ws   AB 
B(B##B(c                     SR                  U R                  5      SS S-   nSR                  U R                  5      SS S-   nSU SU S3$ )zV
Return a human-readable string representation for this ``AlignedSent``.

:rtype: str
r6   N   z...z<AlignedSent: 'z' -> 'z'>)r/   r	   r
   )r   sourcetargets      r   __str__AlignedSent.__str__   sU     $++&s+e3$**%cr*U2 vhb99r   c                 t    [        U R                  U R                  U R                  R	                  5       5      $ )zU
Return the aligned sentence pair, reversing the directionality

:rtype: AlignedSent
)r   r
   r	   r   invertr   s    r   ra   AlignedSent.invert   s(     4::t{{DOO4J4J4LMMr   )r   r
   r	   r   r   )__name__
__module____qualname____firstlineno____doc__r   propertyr   r   r   r!   r   r0   rA   rX   r^   ra   __static_attributes__r&   r   r   r   r      sh    >'    $ 8I	D'R"&:Nr   r   c                   V    \ rS rSrSrS r\S 5       rS rS r	SS jr
S	 rS
 rS rSrg)r      a  
A storage class for representing alignment between two sequences, s1, s2.
In general, an alignment is a set of tuples of the form (i, j, ...)
representing an alignment between the i-th element of s1 and the
j-th element of s2.  Tuples are extensible (they might contain
additional data, such as a boolean to indicate sure vs possible alignments).

    >>> from nltk.translate import Alignment
    >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)])
    >>> a.invert()
    Alignment([(0, 0), (1, 0), (2, 1), (2, 2)])
    >>> print(a.invert())
    0-0 1-0 2-1 2-2
    >>> a[0]
    [(0, 1), (0, 0)]
    >>> a.invert()[2]
    [(2, 1), (2, 2)]
    >>> b = Alignment([(0, 0), (0, 1)])
    >>> b.issubset(a)
    True
    >>> c = Alignment.fromstring('0-0 0-1')
    >>> b == c
    True
c                     [         R                  X5      nU[        / 5      :w  a  [        S U 5       5      OSUl        S Ul        U$ )Nc              3   *   #    U  H	  oS    v   M     g7fr   Nr&   r(   ps     r   r*   $Alignment.__new__.<locals>.<genexpr>   s     +d!d   r   )	frozenset__new__max_len_index)clspairsr   s      r   rt   Alignment.__new__   s=      ,/3y}/DC+d++!	r   c                 r    [        UR                  5        Vs/ s H  n[        U5      PM     sn5      $ s  snf )a]  
Read a giza-formatted string and return an Alignment object.

    >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5')
    Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)])

:type s: str
:param s: the positional alignments in giza format
:rtype: Alignment
:return: An Alignment object corresponding to the string representation ``s``.
)r   split
_giza2pair)rx   r=   as      r   
fromstringAlignment.fromstring   s+     ;A*Q-;<<;s   4c                 z    U R                   (       d  U R                  5         U R                   R                  U5      $ )z>
Look up the alignments that map from a given index or slice.
)rw   _build_index__getitem__)r   keys     r   r   Alignment.__getitem__   s,     {{{{&&s++r   c                 &    [        S U  5       5      $ )z9
Return an Alignment object, being the inverted mapping.
c              3   @   #    U  H  oS    US   4USS -   v   M     g7f)r5   r      Nr&   ro   s     r   r*   #Alignment.invert.<locals>.<genexpr>   s%     >AQ41,12.s   )r   r   s    r   ra   Alignment.invert   s     >>>>r   Nc                 &   [        5       nU R                  (       d  U R                  5         U(       d'  [        [	        [        U R                  5      5      5      nU H(  nUR                  S U R                  U    5       5        M*     [        U5      $ )z
Work out the range of the mapping from the given positions.
If no positions are specified, compute the range of the entire mapping.
c              3   *   #    U  H	  u  pUv   M     g 7fr   r&   )r(   _fs      r   r*   "Alignment.range.<locals>.<genexpr>   s     6~tq~rr   )setrw   r   listr;   r    updatesorted)r   	positionsimagerp   s       r   r;   Alignment.range   sg    
 {{U3t{{#345IALL6t{{1~66 e}r   c                     S[        U 5      -  $ )=
Produce a Giza-formatted string representing the alignment.
zAlignment(%r))r   r   s    r   r0   Alignment.__repr__   s     --r   c                 D    SR                  S [        U 5       5       5      $ )r   r6   c              3   2   #    U  H  nS USS -  v   M     g7f)z%d-%dNr   r&   ro   s     r   r*   $Alignment.__str__.<locals>.<genexpr>   s     >A!BQ%s   )r/   r   r   s    r   r^   Alignment.__str__   s     xx>>>>r   c                     [        U R                  S-   5       Vs/ s H  n/ PM     snU l        U  H$  nU R                  US      R                  U5        M&     gs  snf )zh
Build a list self._index such that self._index[i] is a list
of the alignments originating from word i.
r5   r   N)r;   rv   rw   append)r   r   rp   s      r   r   Alignment._build_index   sS    
 $)Q#78#7ar#78AKK!$$Q'  9s   A)rw   rv   r   )rc   rd   re   rf   rg   rt   classmethodr   r   ra   r;   r0   r^   r   ri   r&   r   r   r   r      s>    2 = =,?.?(r   r   c                 T    U R                  S5      u  p[        U5      [        U5      4$ N-r|   int)pair_stringr@   js      r   r}   r}     s&    S!DAq63q6>r   c                 V    U R                  S5      u  pn[        U5      [        U5      4$ r   r   )r   r@   r   rp   s       r   _naacl2pairr   
  s(    $GA!q63q6>r   c                    ^ ^ [        U5      [        L d   e[        U 4S jU 5       5      (       d  [        S5      e[        U4S jU 5       5      (       d  [        S5      eg)a>  
Check whether the alignments are legal.

:param num_words: the number of source language words
:type num_words: int
:param num_mots: the number of target language words
:type num_mots: int
:param alignment: alignment to be checked
:type alignment: Alignment
:raise IndexError: if alignment falls outside the sentence
c              3   T   >#    U  H  nS US    s=:*  =(       a    T:  Os  v   M     g7frn   r&   )r(   pair	num_wordss     r   r*   #_check_alignment.<locals>.<genexpr>  s%     >IDqDG''i''Is   %(z&Alignment is outside boundary of wordsc              3   n   >#    U  H*  oS    SL =(       d    SUS    s=:*  =(       a    T:  Os  v   M,     g7f)r5   Nr   r&   )r(   r   num_motss     r   r*   r      s0     PidAw$9!tAw"9"9"99is   25z%Alignment is outside boundary of motsN)r   r   all
IndexError)r   r   r   s   `` r   r   r     sW     	?i'''>I>>>ABBPiPPP@AA Qr   PhraseTableEntry
trg_phraselog_probc                   0    \ rS rSrSrS rS rS rS rSr	g)	PhraseTablei'  zg
In-memory store of translations for a given phrase, and the log
probability of the those translations
c                 "    [        5       U l        g r   )dictsrc_phrasesr   s    r   r   PhraseTable.__init__-  s    6r   c                      U R                   U   $ )a}  
Get the translations for a source language phrase

:param src_phrase: Source language phrase of interest
:type src_phrase: tuple(str)

:return: A list of target language phrases that are translations
    of ``src_phrase``, ordered in decreasing order of
    likelihood. Each list element is a tuple of the target
    phrase and its log probability.
:rtype: list(PhraseTableEntry)
r   r   
src_phrases     r   translations_forPhraseTable.translations_for0  s     
++r   c                     [        X#S9nXR                  ;  a  / U R                  U'   U R                  U   R                  U5        U R                  U   R                  S SS9  g)z
:type src_phrase: tuple(str)
:type trg_phrase: tuple(str)

:param log_prob: Log probability that given ``src_phrase``,
    ``trg_phrase`` is its translation
:type log_prob: float
)r   r   c                     U R                   $ r   )r   )rU   s    r   <lambda>!PhraseTable.add.<locals>.<lambda>L  s    

r   T)r   reverseN)r   r   r   sort)r   r   r   r   entrys        r   addPhraseTable.add?  sb     !JJ---+-DZ($++E2$)).BD)Qr   c                     XR                   ;   $ r   r   r   s     r   __contains__PhraseTable.__contains__N  s    ----r   r   N)
rc   rd   re   rf   rg   r   r   r   r   ri   r&   r   r   r   r   '  s    
",R.r   r   )rK   collectionsr   r   rs   r   r}   r   r   r   r   r&   r   r   <module>r      sZ     "QN QNh_(	 _(D

B* 0<2LM (. (.r   