o
    rZhT*                     @   sj   d dl Z d dlmZ G dd dZG dd deZdd Zd	d
 Zdd ZedddgZ	G dd dZ
dS )    N)
namedtuplec                   @   st   e Zd ZdZdddZedd Zedd Zd	d
 Zdd Z	eee	Z
dd Zdd Zdd Zdd Zdd ZdS )AlignedSenta#  
    Return an aligned sentence object, which encapsulates two sentences
    along with an ``Alignment`` between them.

    Typically used in machine translation to represent a sentence and
    its translation.

        >>> from nltk.translate import AlignedSent, Alignment
        >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
        ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))
        >>> algnsent.words
        ['klein', 'ist', 'das', 'Haus']
        >>> algnsent.mots
        ['the', 'house', 'is', 'small']
        >>> algnsent.alignment
        Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])
        >>> from nltk.corpus import comtrans
        >>> print(comtrans.aligned_sents()[54])
        <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
        >>> print(comtrans.aligned_sents()[54].alignment)
        0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13

    :param words: Words in the target language sentence
    :type words: list(str)
    :param mots: Words in the source language sentence
    :type mots: list(str)
    :param alignment: Word-level alignments between ``words`` and ``mots``.
        Each alignment is represented as a 2-tuple (words_index, mots_index).
    :type alignment: Alignment
    Nc                 C   s<   || _ || _|d u rtg | _d S t|tu sJ || _d S N)_words_mots	Alignment	alignmenttype)selfwordsmotsr    r   A/var/www/auris/lib/python3.10/site-packages/nltk/translate/api.py__init__/   s   
zAlignedSent.__init__c                 C      | j S r   )r   r
   r   r   r   r   8      zAlignedSent.wordsc                 C   r   r   )r   r   r   r   r   r   <   r   zAlignedSent.motsc                 C   r   r   )
_alignmentr   r   r   r   _get_alignment@   s   zAlignedSent._get_alignmentc                 C   s"   t t| jt| j| || _d S r   )_check_alignmentlenr   r   r   )r
   r   r   r   r   _set_alignmentC   s   
zAlignedSent._set_alignmentc                 C   sN   dd dd | jD  }dd dd | jD  }d| d| d| jdS )z_
        Return a string representation for this ``AlignedSent``.

        :rtype: str
        z[%s]z, c                 s       | ]}d | V  qdS z'%s'Nr   .0wr   r   r   	<genexpr>O       z'AlignedSent.__repr__.<locals>.<genexpr>c                 s   r   r   r   r   r   r   r   r   P   r   zAlignedSent())joinr   r   r   )r
   r   r   r   r   r   __repr__I   s   zAlignedSent.__repr__c                    s  d}|d7 }|d dd  jD 7 }|d dd  jD 7 }|d  fdd jD 7 }tt jd D ]}|d	 j|  j|d  7 }q8tt jd D ]}|d
 j|  j|d  7 }qT|dd dd  jD  7 }|dd dd  jD  7 }|d7 }|S )z<
        Dot representation of the aligned sentence
        zgraph align {
znode[shape=plaintext]
 c                 S      g | ]}d | d| dqS )"z_source" [label=""] 
r   r   r   r   r   
<listcomp>\       z'AlignedSent._to_dot.<locals>.<listcomp>c                 S   r#   )r$   z_target" [label="r%   r   r   r   r   r   r&   ]   r'   c                    s.   g | ]\}}d  j |  d j|  dqS )r$   z_source" -- "z
_target" 
)r   r   )r   uvr   r   r   r&   a   s       z)"{}_source" -- "{}_source" [style=invis]
z)"{}_target" -- "{}_target" [style=invis]
z{rank = same; %s}
 c                 s   r   )z"%s_source"Nr   r   r   r   r   r   v   r   z&AlignedSent._to_dot.<locals>.<genexpr>c                 s   r   )z"%s_target"Nr   r   r   r   r   r   w   r   })r    r   r   r   ranger   format)r
   sir   r   r   _to_dotT   s.   
zAlignedSent._to_dotc              
   C   sr   |   d}d}ztjdd| gtjtjtjd}W n ty, } ztd|d}~ww ||\}}|dS )zR
        Ipython magic : show SVG representation of this ``AlignedSent``.
        utf8svgdotz-T%s)stdinstdoutstderrz0Cannot find the dot binary from Graphviz packageN)	r1   encode
subprocessPopenPIPEOSError	Exceptioncommunicatedecode)r
   Z
dot_stringZoutput_formatprocesseouterrr   r   r   
_repr_svg_}   s   



zAlignedSent._repr_svg_c                 C   sB   d | jdd d }d | jdd d }d| d| dS )zn
        Return a human-readable string representation for this ``AlignedSent``.

        :rtype: str
        r+   N   z...z<AlignedSent: 'z' -> 'z'>)r    r   r   )r
   sourcetargetr   r   r   __str__   s   zAlignedSent.__str__c                 C   s   t | j| j| j S )zm
        Return the aligned sentence pair, reversing the directionality

        :rtype: AlignedSent
        )r   r   r   r   invertr   r   r   r   rI      s   zAlignedSent.invertr   )__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r!   r1   rD   rH   rI   r   r   r   r   r      s    
	


)
r   c                   @   sV   e Zd ZdZdd Zedd Zdd Zdd	 ZdddZ	dd Z
dd Zdd Zd
S )r   ac  
    A storage class for representing alignment between two sequences, s1, s2.
    In general, an alignment is a set of tuples of the form (i, j, ...)
    representing an alignment between the i-th element of s1 and the
    j-th element of s2.  Tuples are extensible (they might contain
    additional data, such as a boolean to indicate sure vs possible alignments).

        >>> from nltk.translate import Alignment
        >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)])
        >>> a.invert()
        Alignment([(0, 0), (1, 0), (2, 1), (2, 2)])
        >>> print(a.invert())
        0-0 1-0 2-1 2-2
        >>> a[0]
        [(0, 1), (0, 0)]
        >>> a.invert()[2]
        [(2, 1), (2, 2)]
        >>> b = Alignment([(0, 0), (0, 1)])
        >>> b.issubset(a)
        True
        >>> c = Alignment.fromstring('0-0 0-1')
        >>> b == c
        True
    c                 C   s:   t | |}|t g krtdd |D nd|_d |_|S )Nc                 s   s    | ]}|d  V  qdS r   Nr   r   pr   r   r   r      r   z$Alignment.__new__.<locals>.<genexpr>r   )	frozenset__new__max_len_index)clspairsr
   r   r   r   rS      s   $zAlignment.__new__c                 C   s   t dd | D S )a  
        Read a giza-formatted string and return an Alignment object.

            >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5')
            Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)])

        :type s: str
        :param s: the positional alignments in giza format
        :rtype: Alignment
        :return: An Alignment object corresponding to the string representation ``s``.
        c                 S   s   g | ]}t |qS r   )
_giza2pair)r   ar   r   r   r&      s    z(Alignment.fromstring.<locals>.<listcomp>)r   split)rW   r/   r   r   r   
fromstring   s   zAlignment.fromstringc                 C   s   | j s|   | j |S )zN
        Look up the alignments that map from a given index or slice.
        )rV   _build_index__getitem__)r
   keyr   r   r   r^      s   zAlignment.__getitem__c                 C   s   t dd | D S )zI
        Return an Alignment object, being the inverted mapping.
        c                 s   s,    | ]}|d  |d f|dd  V  qdS )r*   r      Nr   rP   r   r   r   r         * z#Alignment.invert.<locals>.<genexpr>)r   r   r   r   r   rI      s   zAlignment.invertNc                 C   sV   t  }| js
|   |sttt| j}|D ]}|dd | j| D  qt|S )z
        Work out the range of the mapping from the given positions.
        If no positions are specified, compute the range of the entire mapping.
        c                 s   s    | ]\}}|V  qd S r   r   )r   _fr   r   r   r      r   z"Alignment.range.<locals>.<genexpr>)setrV   r]   listr-   r   updatesorted)r
   Z	positionsimagerQ   r   r   r   r-      s   zAlignment.rangec                 C   s   dt |  S )M
        Produce a Giza-formatted string representing the alignment.
        zAlignment(%r))rg   r   r   r   r   r!      s   zAlignment.__repr__c                 C   s   d dd t| D S )ri   r+   c                 s   s     | ]}d |dd  V  qdS )z%d-%dNr`   r   rP   r   r   r   r      s    z$Alignment.__str__.<locals>.<genexpr>)r    rg   r   r   r   r   rH      s   zAlignment.__str__c                 C   s<   dd t | jd D | _| D ]}| j|d  | qdS )z
        Build a list self._index such that self._index[i] is a list
        of the alignments originating from word i.
        c                 S   s   g | ]}g qS r   r   )r   rb   r   r   r   r&      s    z*Alignment._build_index.<locals>.<listcomp>r*   r   N)r-   rU   rV   append)r
   rQ   r   r   r   r]      s   zAlignment._build_indexr   )rJ   rK   rL   rM   rS   classmethodr\   r^   rI   r-   r!   rH   r]   r   r   r   r   r      s    

r   c                 C   s   |  d\}}t|t|fS N-r[   int)pair_stringr0   jr   r   r   rY     s   rY   c                 C   s    |  d\}}}t|t|fS rl   rn   )rp   r0   rq   rQ   r   r   r   _naacl2pair
  s   rr   c                    sP   t |tu sJ tfdd|D stdt fdd|D s&tddS )ab  
    Check whether the alignments are legal.

    :param num_words: the number of source language words
    :type num_words: int
    :param num_mots: the number of target language words
    :type num_mots: int
    :param alignment: alignment to be checked
    :type alignment: Alignment
    :raise IndexError: if alignment falls outside the sentence
    c                 3   s,    | ]}d |d    ko k n  V  qdS rO   r   r   pair)	num_wordsr   r   r     ra   z#_check_alignment.<locals>.<genexpr>z&Alignment is outside boundary of wordsc                 3   s8    | ]}|d  du pd|d    ko k n  V  qdS )r*   Nr   r   rs   )num_motsr   r   r      s   6 z%Alignment is outside boundary of motsN)r	   r   all
IndexError)ru   rv   r   r   )rv   ru   r   r     s   r   PhraseTableEntry
trg_phraselog_probc                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )PhraseTablezs
    In-memory store of translations for a given phrase, and the log
    probability of the those translations
    c                 C   s   t  | _d S r   )dictsrc_phrasesr   r   r   r   r   -  s   zPhraseTable.__init__c                 C   s
   | j | S )a  
        Get the translations for a source language phrase

        :param src_phrase: Source language phrase of interest
        :type src_phrase: tuple(str)

        :return: A list of target language phrases that are translations
            of ``src_phrase``, ordered in decreasing order of
            likelihood. Each list element is a tuple of the target
            phrase and its log probability.
        :rtype: list(PhraseTableEntry)
        r~   r
   
src_phraser   r   r   translations_for0  s   
zPhraseTable.translations_forc                 C   sL   t ||d}|| jvrg | j|< | j| | | j| jdd dd dS )z
        :type src_phrase: tuple(str)
        :type trg_phrase: tuple(str)

        :param log_prob: Log probability that given ``src_phrase``,
            ``trg_phrase`` is its translation
        :type log_prob: float
        )rz   r{   c                 S   r   r   )r{   )rA   r   r   r   <lambda>L  s    z!PhraseTable.add.<locals>.<lambda>T)r_   reverseN)ry   r~   rj   sort)r
   r   rz   r{   entryr   r   r   add?  s
   	

zPhraseTable.addc                 C   s
   || j v S r   r   r   r   r   r   __contains__N  s   
zPhraseTable.__contains__N)rJ   rK   rL   rM   r   r   r   r   r   r   r   r   r|   '  s    r|   )r9   collectionsr   r   rR   r   rY   rr   r   ry   r|   r   r   r   r   <module>   s   
 b