
    /h>                         S r SSKrSSKJrJr  SSKJr  S r\R                  r
S rSr SSKJr  Sr S
r Sr  " S S\S9r " S S\5      r " S S\5      r " S S\5      r " S S5      rg! \ a    S	 r NIf = f)z
Provides scoring functions for a number of association measures through a
generic, abstract implementation in ``NgramAssocMeasures``, and n-specific
``BigramAssocMeasures`` and ``TrigramAssocMeasures``.
    N)ABCMetaabstractmethodreducec                 .    [         R                  " U 5      $ N)_mathlog2)xs    P/var/www/auris/envauris/lib/python3.13/site-packages/nltk/metrics/association.py<lambda>r      s    %**Q-    c                     [        S U 5      $ )Nc                 
    X-  $ r    )r   ys     r   r   <lambda>.<locals>.<lambda>   s    r   r   )ss    r   r   r      s    V.2r   g#B;)fisher_exactc                      [         er   NotImplementedError)_args_kwargss     r   r   r      s    !!r   c                       \ rS rSrSrSr\\S 5       5       r\\S 5       5       r	\
S 5       r\S 5       r\
S 5       r\
S	 5       r\S
 5       r\
S 5       r\
S 5       r\
S 5       r\
S 5       rSrg)NgramAssocMeasures-   a  
An abstract class defining a collection of generic association measures.
Each public method returns a score, taking the following arguments::

    score_fn(count_of_ngram,
             (count_of_n-1gram_1, ..., count_of_n-1gram_j),
             (count_of_n-2gram_1, ..., count_of_n-2gram_k),
             ...,
             (count_of_1gram_1, ..., count_of_1gram_n),
             count_of_total_words)

See ``BigramAssocMeasures`` and ``TrigramAssocMeasures``

Inheriting classes should define a property _n, and a method _contingency
which calculates contingency values from marginals in order for all
association measures defined here to be usable.
r   c                      [        S5      e)z>Calculates values of a contingency table from marginal values.?The contingency table is not availablein the general ngram caser   	marginalss    r   _contingencyNgramAssocMeasures._contingencyB        "P
 	
r   c                      [        S5      e)ACalculates values of contingency table marginals from its values.r!   r   )contingencys    r   
_marginalsNgramAssocMeasures._marginalsJ   r&   r   c              #     ^ ^^#    [        T5      n[        T R                  5       Vs/ s H  nSU-  PM
     nn[        [        T5      5       H/  m[	        U UU4S jU 5       5      UT R                  S-
  -  -  v   M1     gs  snf 7f)3Calculates expected values for a contingency table.   c              3      >^#    U  H2  m[        UUU4S  j[        STR                  -  5       5       5      v   M4     g7f)c              3   H   >#    U  H  oT-  TT-  :X  d  M  TU   v   M     g 7fr   r   ).0r   contijs     r   	<genexpr>@NgramAssocMeasures._expected_values.<locals>.<genexpr>.<genexpr>]   s&     P)9A!eQ=OQ)9s   ""   N)sumrange_n)r1   r4   clsr2   r3   s    @r   r5   6NgramAssocMeasures._expected_values.<locals>.<genexpr>\   s3      ! Pq#&&y)9PPP!s   :>N)r8   r9   r:   len_product)r;   r2   n_allr3   bitss   `` ` r   _expected_values#NgramAssocMeasures._expected_valuesR   s~      D	 %cff.1Q. s4y!A  !  SVVaZ(	* " /s   #BBABc                  (    U [            U [           -  $ )z Scores ngrams by their frequency)NGRAMTOTALr"   s    r   raw_freqNgramAssocMeasures.raw_freqc   s     )E"222r   c                     U[            [        U[           5      U[           U R                  S-
  -  -  -
  U[            [
        -   S-  -  $ )zqScores ngrams using Student's t test with independence hypothesis
for unigrams, as in Manning and Schutze 5.3.1.
r.   g      ?)rD   r>   UNIGRAMSrE   r:   _SMALLr;   r#   s     r   	student_tNgramAssocMeasures.student_th   sR     ey*+y/?CFFQJ/OPQu&3./ 	/r   c                 x    U R                   " U6 nU R                  U5      n[        S [        X#5       5       5      $ )zJScores ngrams using Pearson's chi-square as in Manning and Schutze
5.3.3.
c              3   H   #    U  H  u  pX-
  S -  U[         -   -  v   M     g7f)r7   N)rJ   r1   obsexps      r   r5   ,NgramAssocMeasures.chi_sq.<locals>.<genexpr>y   s"     U_CI!#sV|4_s    ")r$   rA   r8   zip)r;   r#   r2   expss       r   chi_sqNgramAssocMeasures.chi_sqr   s9    
 +##D)US_UUUr   c                  `    U [            UR                  SS5      -  [        U [           5      -  $ )zScores ngrams using a variant of mutual information. The keyword
argument power sets an exponent (default 3) for the numerator. No
logarithm of the result is calculated.
power   )rD   getr>   rI   )r#   kwargss     r   mi_likeNgramAssocMeasures.mi_like{   s5     6::gq#99Hh=
 
 	
r   c                     [        U[           U[           U R                  S-
  -  -  5      [        [	        U[
           5      5      -
  $ )zNScores ngrams by pointwise mutual information, as in Manning and
Schutze 5.4.
r.   )_log2rD   rE   r:   r>   rI   rK   s     r   pmiNgramAssocMeasures.pmi   sG    
 Yu%	%(8SVVaZ(HHIEYx()M
 
 	
r   c           
      z    U R                   " U6 nS[        S [        X R                  U5      5       5       5      -  $ )zFScores ngrams using likelihood ratios as in Manning and Schutze 5.3.4.r7   c              3   b   #    U  H%  u  pU[        X[        -   -  [        -   5      -  v   M'     g 7fr   )_lnrJ   rP   s      r   r5   6NgramAssocMeasures.likelihood_ratio.<locals>.<genexpr>   s/      
A #c6\*V344As   -/)r$   r8   rT   rA   r;   r#   r2   s      r   likelihood_ratio#NgramAssocMeasures.likelihood_ratio   sE     +3 
&:&:4&@A
 
 
 	
r   c                     [        U[           5      U[           U R                  S-
  -  -  nU[           [        U[           U-  5      S-
  -  $ )z1Scores ngrams using the Poisson-Stirling measure.r.   )r>   rI   rE   r:   rD   r`   )r;   r#   rR   s      r   poisson_stirling#NgramAssocMeasures.poisson_stirling   sN     y*+y/?CFFQJ/OP55)9C)?#@1#DEEr   c                 H    U R                   " U6 nUS   [        USS 5      -  $ )z&Scores ngrams using the Jaccard index.r   Nr   )r$   r8   rg   s      r   jaccardNgramAssocMeasures.jaccard   s-     +AwT#2Y''r   r   N)__name__
__module____qualname____firstlineno____doc__r:   staticmethodr   r$   r*   classmethodrA   rF   rL   rV   r]   ra   rh   rk   rn   __static_attributes__r   r   r   r   r   -   s    $ 
B
  
 
  
    3 3 / / V V 
 
 
 
 
 
 F F
 ( (r   r   )	metaclassc                       \ rS rSrSrSr\S 5       r\S 5       r\S 5       r	\
S 5       r\
S 5       r\
S	 5       r\S
 5       rSrg)BigramAssocMeasures   a8  
A collection of bigram association measures. Each association measure
is provided as a function with three arguments::

    bigram_score_fn(n_ii, (n_ix, n_xi), n_xx)

The arguments constitute the marginals of a contingency table, counting
the occurrences of particular events in a corpus. The letter i in the
suffix refers to the appearance of the word in question, while x indicates
the appearance of any word. Thus, for example:

- n_ii counts ``(w1, w2)``, i.e. the bigram being scored
- n_ix counts ``(w1, *)``
- n_xi counts ``(*, w2)``
- n_xx counts ``(*, *)``, i.e. any bigram

This may be shown with respect to a contingency table::

            w1    ~w1
         ------ ------
     w2 | n_ii | n_oi | = n_xi
         ------ ------
    ~w2 | n_io | n_oo |
         ------ ------
         = n_ix        TOTAL = n_xx
r7   c                 4    Uu  p4X@-
  nX0-
  nXXbU -
  U-
  U-
  4$ )zECalculates values of a bigram contingency table from marginal values.r   )n_iin_ix_xi_tuplen_xxn_ixn_xin_oin_ios          r   r$    BigramAssocMeasures._contingency   s2     %{{D+"4t";<<r   c                 (    XU -   X -   4X1-   U-   U -   4$ )r(   r   )r}   r   r   n_oos       r   r*   BigramAssocMeasures._marginals   s&     d{DK0$+2Dt2KLLr   c              #      #    [        U 5      n[        S5       H!  nX   XS-     -   X   XS-     -   -  U-  v   M#     g7f)r-      r.   r7   N)r8   r9   )r2   r   r3   s      r   rA   $BigramAssocMeasures._expected_values   sH      4yqA7Ta%[(TWtE{-BCdJJ s   =?c                 f    U R                   " U6 u  p#pEX%-  X4-  -
  S-  X#-   X$-   -  X5-   -  XE-   -  -  $ )zTScores bigrams using phi-square, the square of the Pearson correlation
coefficient.
r7   )r$   )r;   r#   r}   r   r   r   s         r   phi_sqBigramAssocMeasures.phi_sq   sN    
 "%!1!19!=Ddk)a/[T[)T[9T[I
 	
r   c                 6    Uu  pEX0R                  XU4U5      -  $ )zsScores bigrams using chi-square, i.e. phi-sq multiplied by the number
of bigrams, as in Manning and Schutze 5.3.3.
)r   )r;   r}   r~   r   r   r   s         r   rV   BigramAssocMeasures.chi_sq   s#    
 %jjd|T:::r   c                 J    U R                   " U6 u  p#pE[        X#/XE//SS9u  pgU$ )zScores bigrams using Fisher's Exact Test (Pedersen 1996).  Less
sensitive to small counts than PMI or Chi Sq, but also more expensive
to compute. Requires scipy.
less)alternative)r$   r   )r;   r#   r}   r   r   r   oddspvalues           r   fisherBigramAssocMeasures.fisher   s6     "%!1!19!=D%|d\&BPVWr   c                     Uu  p4SU -  X4-   -  $ )z(Scores bigrams using Dice's coefficient.r7   r   )r}   r~   r   r   r   s        r   diceBigramAssocMeasures.dice   s     %4x4;''r   r   N)rp   rq   rr   rs   rt   r:   ru   r$   r*   rA   rv   r   rV   r   r   rw   r   r   r   rz   rz      s    6 
B= = M M K K 
 
 ; ; 	 	 ( (r   rz   c                   <    \ rS rSrSrSr\S 5       r\S 5       rSr	g)TrigramAssocMeasures   a  
A collection of trigram association measures. Each association measure
is provided as a function with four arguments::

    trigram_score_fn(n_iii,
                     (n_iix, n_ixi, n_xii),
                     (n_ixx, n_xix, n_xxi),
                     n_xxx)

The arguments constitute the marginals of a contingency table, counting
the occurrences of particular events in a corpus. The letter i in the
suffix refers to the appearance of the word in question, while x indicates
the appearance of any word. Thus, for example:

- n_iii counts ``(w1, w2, w3)``, i.e. the trigram being scored
- n_ixx counts ``(w1, *, *)``
- n_xxx counts ``(*, *, *)``, i.e. any trigram
rZ   c                     Uu  pEnUu  pxn	X`-
  n
XP-
  nX@-
  nX-
  U
-
  U-
  nX-
  U
-
  U-
  nXp-
  U-
  U-
  nX0-
  U
-
  U-
  U-
  U-
  U-
  U-
  nX
XXUU4$ )zCalculates values of a trigram contingency table (or cube) from
marginal values.
>>> TrigramAssocMeasures._contingency(1, (1, 1, 1), (1, 73, 1), 2000)
(1, 0, 0, 0, 0, 72, 0, 1927)
r   )n_iiin_iix_tuplen_ixx_tuplen_xxxn_iixn_ixin_xiin_ixxn_xixn_xxin_oiin_ioin_iion_ooin_oion_ioon_ooos                    r   r$   !TrigramAssocMeasures._contingency  s     !,u +u%-%-%-%-5=EMeE%GGr   c                  v    U u  pp4pVpxUX-   X-   X-   4X-   U-   U-   X-   U-   U-   X-   U-   U-   4[        U 5      4$ )zCalculates values of contingency table marginals from its values.
>>> TrigramAssocMeasures._marginals(1, 0, 0, 0, 0, 72, 0, 1927)
(1, (1, 1, 1), (1, 73, 1), 2000)
r8   )	r)   r   r   r   r   r   r   r   r   s	            r   r*   TrigramAssocMeasures._marginals&  sm     BM>eE%]EM5=9%-%-%-
 	
 		
r   r   N
rp   rq   rr   rs   rt   r:   ru   r$   r*   rw   r   r   r   r   r      s6    & 
BH H$ 
 
r   r   c                   <    \ rS rSrSrSr\S 5       r\S 5       rSr	g)QuadgramAssocMeasuresi9  a
  
A collection of quadgram association measures. Each association measure
is provided as a function with five arguments::

    trigram_score_fn(n_iiii,
                    (n_iiix, n_iixi, n_ixii, n_xiii),
                    (n_iixx, n_ixix, n_ixxi, n_xixi, n_xxii, n_xiix),
                    (n_ixxx, n_xixx, n_xxix, n_xxxi),
                    n_all)

The arguments constitute the marginals of a contingency table, counting
the occurrences of particular events in a corpus. The letter i in the
suffix refers to the appearance of the word in question, while x indicates
the appearance of any word. Thus, for example:

- n_iiii counts ``(w1, w2, w3, w4)``, i.e. the quadgram being scored
- n_ixxi counts ``(w1, *, *, w4)``
- n_xxxx counts ``(*, *, *, *)``, i.e. any quadgram
r   c                    Uu  pVpxUu  pppUu  nnnnX-
  nXp-
  nX`-
  nX-
  U-
  U-
  nX-
  U-
  U-
  nX-
  U-
  U-
  nUU -
  U-
  U-
  U-
  U-
  U-
  U-
  nXP-
  nX-
  U-
  U-
  nX-
  U-
  U-
  nUU -
  U-
  U-
  U-
  U-
  U-
  U-
  nX-
  U-
  U-
  nUU -
  U-
  U-
  U-
  U-
  U-
  U-
  nX-
  U-
  U-
  U-
  U-
  U-
  U-
  n UU -
  U-
  U-
  U-
  U-
  U-
  U-
  U-
  U-
  U-
  U-
  U-
  U-
  U-
  U -
  n!U UUUUUUUUUUUUUU U!4$ )zHCalculates values of a quadgram contingency table from
marginal values.
r   )"n_iiiin_iiix_tuplen_iixx_tuplen_ixxx_tuplen_xxxxn_iiixn_iixin_ixiin_xiiin_iixxn_ixixn_ixxin_xixin_xxiin_xiixn_ixxxn_xixxn_xxixn_xxxin_oiiin_ioiin_iioin_ooiin_oioin_iooin_oooin_iiion_oiion_ioion_ooion_iioon_oioon_iooon_oooos"                                     r   r$   "QuadgramAssocMeasures._contingencyP  s   
 ,8(;G8+7(6)F26)F26)F2&6)F2V;fDvMPVV6)F26)F2&6)F2V;fDvMPVV6)F2&6)F2V;fDvMPVV6)F2V;fDvMPVV  	
     	 
      	( !
 	
r   c                     U u  nnnnnnnnn	n
nnnnnnX-   nX-   nX-   nX-   nX-   U	-   U-   nX-   U	-   U-   nX-   U-   U-   nX-   U-   U-   nX-   U-   U-   nX-   U	-   U
-   nX-   U-   U	-   U-   U-   U-   U-   nX-   U-   U	-   U-   U
-   U-   U-   nX-   U-   U	-   U-   U-   U
-   U-   nX-   U-   U-   U-   U-   U-   U-   n[        U 5      nUUUUU4UUUUUU4UUUU4U4$ )a  Calculates values of contingency table marginals from its values.
QuadgramAssocMeasures._marginals(1, 0, 2, 46, 552, 825, 2577, 34967, 1, 0, 2, 48, 7250, 9031, 28585, 356653)
(1, (2, 553, 3, 1), (7804, 6, 3132, 1378, 49, 2), (38970, 17660, 100, 38970), 440540)
r   ) r)   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r?   s                                    r   r*    QuadgramAssocMeasures._marginals  s   . #	
 6)F26)F26)F26)F26)F26)F26)F2V;fDvMPVV6)F2V;fDvMPVV6)F2V;fDvMPVV6)F2V;fDvMPVVK  VVV,VVVVV<VVV,
 	
r   r   Nr   r   r   r   r   r   9  s5    ( 
B9
 9
v 1
 1
r   r   c                   .    \ rS rSrSrS r\S 5       rSrg)ContingencyMeasuresi  zWraps NgramAssocMeasures classes such that the arguments of association
measures are contingency table values rather than marginals.
c                 $   SUR                   R                  -   U R                   l        [        U5       HY  nUR                  S5      (       a  M  [	        X5      nUR                  S5      (       d  U R                  X5      n[        XU5        M[     g)zAConstructs a ContingencyMeasures given a NgramAssocMeasures classContingency___N)	__class__rp   dir
startswithgetattr_make_contingency_fnsetattr)selfmeasureskvs       r   __init__ContingencyMeasures.__init__  sr    "/(2D2D2M2M"MXA||D!!$A<<$$--h:DQ r   c                 \   ^ ^ U U4S jnTR                   Ul         TR                  Ul        U$ )zwFrom an association measure function, produces a new function which
accepts contingency table values as its arguments.
c                  (   > T" TR                   " U 6 6 $ r   )r*   )r)   r   old_fns    r   res5ContingencyMeasures._make_contingency_fn.<locals>.res  s    8..<==r   )rt   rp   )r   r   r   s   `` r   r   (ContingencyMeasures._make_contingency_fn  s%    	> nn
r   r   N)	rp   rq   rr   rs   rt   r   ru   r   rw   r   r   r   r   r     s     	  
 
r   r   )rt   mathr	   abcr   r   	functoolsr   r`   logre   r>   rJ   scipy.statsr   ImportErrorrD   rI   rE   r   rz   r   r   r   r   r   r   <module>r      s     ' ii2	"( 	
 ) 7
 9t(7 t(nV(, V(r9
- 9
xE
. E
P M  """s   A. .	A:9A: