
    /h!                         S r SSKr SSKJr  SSKJr  \R                  " S5      r
 " S	 S
5      rS rS r\4S jr\4S jrS rS rg! \ a    S rS rS r NFf = f)z

A port of the Gale-Church Aligner.

Gale & Church (1993), A Program for Aligning Sentences in Bilingual Corpora.
https://aclweb.org/anthology/J93-1004.pdf

    N)logsf)normc                     [        U 5      nSSSU-  -   -  nU[        R                  " U* U-  S-
  USUSUSUSUSUS	US
USUS-  -   -  -   -  -   -  -   -  -   -  -   -  -   -  -   -  -   5      -  nU S:  a  U$ SU-
  $ )zComplementary error function.         ?gś??g5 ?g`yg?gƸ?gꪂIǿg#v?g9)gS?gޅ1Ogv(?g        g       @)absmathexp)xztrs       R/var/www/auris/envauris/lib/python3.13/site-packages/nltk/translate/gale_church.pyerfccr      s    FS1WBF"' *"#$/&''1Aq:~9U4V'V'X%X#"!"	

 
< 8H7N    c                 R    SS[        U [        R                  " S5      -  5      -  -
  $ )u>   Return the area under the normal distribution from M{-∞..x}.r   r      )r   r	   sqrtr   s    r   norm_cdfr   @   s$    3q499Q</0000r   c                 ~     [         R                  " S[        U 5      -
  5      $ ! [         a    [	        S5      s $ f = f)Nr   -inf)r	   logr   
ValueErrorfloatr   s    r   
norm_logsfr   D   s7    	!88AO,, 	!= 	!s   !$ <<r   c                   .    \ rS rSrSSSSSSS.rSrSrS	rg
)LanguageIndependentN   gׁsF?g{Gz?gbX9ȶ?gI+?))r   r   )r   r   )r   r   )r   r   )r   r   )r   r   r   g333333@ N)__name__
__module____qualname____firstlineno__PRIORSAVERAGE_CHARACTERSVARIANCE_CHARACTERS__static_attributes__r    r   r   r   r   N   s+     F r   r   c                    / n[        U5      [        U5      4nUS:w  a  [        S U 5       5      (       a   X   u  pV[        U5       H:  n[        U5       H(  nUR	                  US   U-
  S-
  US   U-
  S-
  45        M*     M<     US   U-
  US   U-
  4nUS:w  a  [        S U 5       5      (       a  M  USSS2   $ ! [         a    US   S-
  US   S-
  4n M  f = f)a  
Traverse the alignment cost from the tracebacks and retrieves
appropriate sentence pairs.

:param backlinks: A dictionary where the key is the alignment points and value is the cost (referencing the LanguageIndependent.PRIORS)
:type backlinks: dict
:param source_sents_lens: A list of target sentences' lengths
:type source_sents_lens: list(int)
:param target_sents_lens: A list of target sentences' lengths
:type target_sents_lens: list(int)
)r   r   c              3   *   #    U  H	  oS :  v   M     g7f)r   Nr    ).0ps     r   	<genexpr>trace.<locals>.<genexpr>n   s     $>X!VXs   r   r   N)lenall	TypeErrorrangeappend)		backlinkssource_sents_lenstarget_sents_lenslinkspositionsr   ijs	            r   tracer=   `   s    E%&,=(>?H
f
$>X$>!>!>	&DA qA1XhqkAo18A;?Q3FGH   QK!OXa[1_5 f
$>X$>!>!> 2;  	 a!q9H	s   B= =CCc                   ^ ^^^ [        U U4S j[        US   5       5       5      n[        UU4S j[        US   5       5       5      n XgUR                  -  -   S-  nXeR                  -  U-
  [        R                  " XR
                  -  5      -  n	[        [        [        U	5      5      -   [        R                  " UR                  U   5      -   * $ ! [         a    [        S5      s $ f = f)a,  Returns the log probability of the two sentences C{source_sents[i]}, C{target_sents[j]}
being aligned with a specific C{alignment}.

@param i: The offset of the source sentence.
@param j: The offset of the target sentence.
@param source_sents: The list of source sentence lengths.
@param target_sents: The list of target sentence lengths.
@param alignment: The alignment type, a tuple of two integers.
@param params: The sentence alignment parameters.

@returns: The log probability of a specific alignment between the two sentences, given the parameters.
c              3   :   >#    U  H  nTTU-
  S -
     v   M     g7fr   Nr    )r+   offsetr;   source_sentss     r   r-   !align_log_prob.<locals>.<genexpr>         M9Lvl1v:>*9L   r   c              3   :   >#    U  H  nTTU-
  S -
     v   M     g7fr@   r    )r+   rA   r<   target_sentss     r   r-   rC      rD   rE   r   r   r   )sumr3   r&   r	   r   r'   ZeroDivisionErrorr   LOG2r   r   r   r%   )
r;   r<   rB   rG   	alignmentparamsl_sl_tmdeltas
   ````      r   align_log_probrQ   |   s     My|9LM
MC
My|9LM
MC 2222a700036$))***;
 
 Js5z**TXXfmmI6N-OOPP  V}s   
AC C*)C*c                 R   [        UR                  R                  5       5      n/ /n0 n[        [	        U 5      S-   5       H  n[        [	        U5      S-   5       H  n[        S5      nSn	U HJ  n
SU
S   -
  nXzS   -
  nU[	        U5      * :  d  US:  a  M*  XK   U   [        XgXX5      -   nX:  d  MF  UnU
n	ML     U[        S5      :X  a  SnXXg4'   US   R                  U5        M     [	        U5      S:  a  UR                  S5        UR                  / 5        M     [        XPU5      $ )am  Return the sentence alignment of two text blocks (usually paragraphs).

    >>> align_blocks([5,5,5], [7,7,7])
    [(0, 0), (1, 1), (2, 2)]
    >>> align_blocks([10,5,5], [12,20])
    [(0, 0), (1, 1), (2, 1)]
    >>> align_blocks([12,20], [10,5,5])
    [(0, 0), (1, 1), (1, 2)]
    >>> align_blocks([10,2,10,10,2,10], [12,3,20,3,12])
    [(0, 0), (1, 1), (2, 2), (3, 2), (4, 3), (5, 4)]

@param source_sents_lens: The list of source sentence lengths.
@param target_sents_lens: The list of target sentence lengths.
@param params: the sentence alignment parameters.
@return: The sentence alignments, a list of index pairs.
r   infNr/   r   r   )
listr%   keysr3   r0   r   rQ   r4   popr=   )r6   r7   rL   alignment_typesDr5   r;   r<   min_dist	min_alignaprev_iprev_jr,   s                 r   align_blocksr^      s5   $ 6==--/0O 
AI3()A-.s,-12AU|HI$adqTSVG#vzIf%+)  < H !I % 5<' )qfbELL"' 3* q6A:EE!H	1 /4 /@AAr   c           	          [        U 5      [        U5      :w  a  [        S5      e[        X5       VVs/ s H  u  p4[        X4U5      PM     snn$ s  snnf )a  Creates the sentence alignment of two texts.

Texts can consist of several blocks. Block boundaries cannot be crossed by sentence
alignment links.

Each block consists of a list that contains the lengths (in characters) of the sentences
in this block.

@param source_blocks: The list of blocks in the source text.
@param target_blocks: The list of blocks in the target text.
@param params: the sentence alignment parameters.

@returns: A list of sentence alignment lists
z>Source and target texts do not have the same number of blocks.)r0   r   zipr^   )source_blockstarget_blocksrL   source_blocktarget_blocks        r   align_textsre      s\     =S//L
 	
 +.m*K*K&L 	\8*K  s   Ac              #   R   ^ ^#    U U4S jn U" T R                  5       5      v   M  7f)zSplits an iterator C{it} at values of C{split_value}.

Each instance of C{split_value} is swallowed. The iterator produces
subiterators which need to be consumed fully before the next subiterator
can be used.
c              3   X   >#    U nUT:w  a  Uv   TR                  5       nUT:w  a  M  g g 7fNnext)firstvitsplit_values     r   _chunk_iterator!split_at.<locals>._chunk_iterator   s-     ;G	A ;s   #**ri   )rm   rn   ro   s   `` r   split_atrq      s&      bggi(( s   #'c                     [        X5       VVs/ s H/  n[        X15       Vs/ s H  n[        S U 5       5      PM     snPM1     snn$ s  snf s  snnf )zParses a stream of tokens and splits it into sentences (using C{soft_delimiter} tokens)
and blocks (using C{hard_delimiter} tokens) for use with the L{align_texts} function.
c              3   8   #    U  H  n[        U5      v   M     g 7frh   )r0   )r+   tokens     r   r-   %parse_token_stream.<locals>.<genexpr>  s     4uE

s   )rq   rH   )streamsoft_delimiterhard_delimiterblock_itsentence_its        r   parse_token_streamr{      s_     !8
 9H  (A	
A 444A	
 9 	
s   AA	A	A)__doc__r	   r   r   r   scipy.statsImportErrorr   r   r   rJ   r   r=   rQ   r^   re   rq   r{   r    r   r   <module>r      s    4!( j xx{ $8Q8 ?R 3Bl 6I :)$
M  1!%N1![1!s   A	 	AA