
    /h&                         S SK r S SKJr  S SKJr  S SKJr   S SKrS SK	J
r
   " S S\
5      rS rS	 r " S
 S5      r " S S5      rg! \ a     N4f = f)    N)abstractmethod)sqrt)stdout)ClusterIc                   j    \ rS rSrSrSS jrSS jr\S 5       rS r	\S 5       r
S	 rS
 rS rS rSrg)VectorSpaceClusterer   z
Abstract clusterer which takes tokens and maps them into a vector space.
Optionally performs singular value decomposition to reduce the
dimensionality.
Nc                 *    SU l         Xl        X l        g)z
:param normalise:       should vectors be normalised to length 1
:type normalise:        boolean
:param svd_dimensions:  number of dimensions to use in reducing vector
                        dimensionsionality with SVD
:type svd_dimensions:   int
N_Tt_should_normalise_svd_dimensions)self	normalisesvd_dimensionss      I/var/www/auris/envauris/lib/python3.13/site-packages/nltk/cluster/util.py__init__VectorSpaceClusterer.__init__   s     !*-    c                 N   [        U5      S:  d   eU R                  (       a  [        [        U R                  U5      5      nU R
                  (       Ga  U R
                  [        US   5      :  a  [        R                  R                  [        R                  " [        R                  " U5      5      5      u  pEnUS U R
                   [        R                  " U R
                  [        R                  5      -  nUS S 2S U R
                  24   nUS U R
                  2S S 24   n	[        R                  " [        R                  " Xy5      5      n[        R                  " U5      U l        U R                  X5        U(       a!  U V
s/ s H  oR!                  U
5      PM     sn
$ g s  sn
f Nr   )lenr   listmap
_normaliser   numpylinalgsvd	transposearrayidentityfloat64dotr   cluster_vectorspaceclassify)r   vectorsassign_clusterstraceudvtSTDtvectors              r   clusterVectorSpaceClusterer.cluster(   sK   7|a !!3t89G D$8$83wqz?$J))%//%++g:N*OPJQ2(D(()ENN$$emm- A !+t++++,A*d***A-.Booeii&67Gq)DH 	  0 8?@fMM&)@@ @s   F"c                     g)z4
Finds the clusters using the given set of vectors.
N )r   r&   r(   s      r   r$   (VectorSpaceClusterer.cluster_vectorspaceA       r   c                     U R                   (       a  U R                  U5      nU R                  b!  [        R                  " U R                  U5      nU R                  U5      nU R                  U5      $ N)r   r   r   r   r#   classify_vectorspacecluster_name)r   r/   r0   s      r   r%   VectorSpaceClusterer.classifyG   sX    !!__V,F88YYtxx0F++F3  ))r   c                     g)z>
Returns the index of the appropriate cluster for the vector.
Nr3   r   r/   s     r   r8   )VectorSpaceClusterer.classify_vectorspaceO   r5   r   c                     U R                   (       a  U R                  U5      nU R                  b!  [        R                  " U R                  U5      nU R                  X5      $ r7   )r   r   r   r   r#   likelihood_vectorspace)r   r/   labels      r   
likelihoodVectorSpaceClusterer.likelihoodU   sH    !!__V,F88YYtxx0F**699r   c                 6    U R                  U5      nX#:X  a  S$ S$ )z@
Returns the likelihood of the vector belonging to the cluster.
g      ?g        )r8   )r   r/   r0   	predicteds       r   r?   +VectorSpaceClusterer.likelihood_vectorspace\   s$     --f5	*s33r   c                     U R                   (       a  U R                  U5      nU R                  b!  [        R                  " U R                  U5      nU$ )zE
Returns the vector after normalisation and dimensionality reduction
)r   r   r   r   r#   r<   s     r   r/   VectorSpaceClusterer.vectorc   s>     !!__V,F88YYtxx0Fr   c                 F    U[        [        R                  " X5      5      -  $ )z'
Normalises the vector to unit length.
r   r   r#   r<   s     r   r   VectorSpaceClusterer._normalisem   s     UYYv6777r   r   )FN)FF)__name__
__module____qualname____firstlineno____doc__r   r0   r   r$   r%   r8   rA   r?   r/   r   __static_attributes__r3   r   r   r   r      sS    
.A2  
*  
:48r   r   c                 H    X-
  n[        [        R                  " X"5      5      $ )zq
Returns the euclidean distance between vectors u and v. This is equivalent
to the length of the vector (u - v).
rI   )r)   vdiffs      r   euclidean_distancerT   t   s    
 5D		$%&&r   c           	          S[         R                  " X5      [        [         R                  " X 5      5      [        [         R                  " X5      5      -  -  -
  $ )zk
Returns 1 minus the cosine of the angle between vectors v and u. This is
equal to ``1 - (u.v / |u||v|)``.
   )r   r#   r   )r)   rR   s     r   cosine_distancerW   }   s;    
 		!4		!#84		!;P#PQRRr   c                   4    \ rS rSrSrS rS	S jrS rS rSr	g)
_DendrogramNode   zTree node of a dendrogram.c                     Xl         X l        g r7   )_value	_children)r   valuechildrens      r   r   _DendrogramNode.__init__   s    !r   c                     U R                   (       a7  / nU R                    H#  nUR                  UR                  U5      5        M%     U$ U(       a  U R                  /$ U /$ r7   )r]   extendleavesr\   )r   valuesrc   childs       r   rc   _DendrogramNode.leaves   sL    >>Fell623 (MKK= 6Mr   c                    U R                   U 4/n[        U5      U:  a  UR                  5       u  p4UR                  (       d  UR	                  X445        OwUR                   HF  nUR                  (       a  UR                  UR                   U45        M3  UR                  SU45        MH     UR                  5         [        U5      U:  a  M  / nU H$  u  p4UR                  UR                  5       5        M&     U$ r   )r\   r   popr]   pushappendsortrc   )r   nqueueprioritynodere   groupss          r   rp   _DendrogramNode.groups   s    ++t$%%j1n"YY[NH>>

H+,??LL%,,!67LL!U,	 ( JJL %j1n #NHMM$++-( $r   c                 H    [        U R                  UR                  5      S:  $ r   )rW   r\   )r   
comparators     r   __lt___DendrogramNode.__lt__   s    t{{J,=,=>BBr   )r]   r\   N)T)
rK   rL   rM   rN   rO   r   rc   rp   rt   rP   r3   r   r   rY   rY      s    $"	*Cr   rY   c                   B    \ rS rSrSr/ 4S jrS rS r/ 4S jrS r	Sr
g	)

Dendrogram   z
Represents a dendrogram, a tree with a specified branching order.  This
must be initialised with the leaf items, then iteratively call merge for
each branch. This class constructs a tree representing the order of calls
to the merge function.
c                     U Vs/ s H  n[        U5      PM     snU l        [        R                  " U R                  5      U l        SU l        gs  snf )z[
:param  items: the items at the leaves of the dendrogram
:type   items: sequence of (any)
rV   N)rY   _itemscopy_original_items_merge)r   itemsitems      r   r   Dendrogram.__init__   sA    
 :??t,?#yy5 @s   Ac                    ^  [        U5      S:  d   e[        T R                  /U 4S jU 5       Q76 nT =R                  S-  sl        UT R                  US   '   USS  H  nT R                  U	 M     g)a  
Merges nodes at given indices in the dendrogram. The nodes will be
combined which then replaces the first node specified. All other nodes
involved in the merge will be removed.

:param  indices: indices of the items to merge (at least two)
:type   indices: seq of int
   c              3   B   >#    U  H  nTR                   U   v   M     g 7fr7   )rz   ).0ir   s     r   	<genexpr>#Dendrogram.merge.<locals>.<genexpr>   s     -Ngdkk!ngs   rV   r   N)r   rY   r}   rz   )r   indicesro   r   s   `   r   mergeDendrogram.merge   sk     7|q   t{{O-Ng-NOq"&GAJAA r   c                     [        U R                  5      S:  a!  [        U R                  /U R                  Q76 nOU R                  S   nUR	                  U5      $ )zr
Finds the n-groups of items (leaves) reachable from a cut at depth n.
:param  n: number of groups
:type   n: int
rV   r   )r   rz   rY   r}   rp   )r   rl   roots      r   rp   Dendrogram.groups   sG     t{{a"4;;==D;;q>D{{1~r   c                 X  ^^^ Su  p#n[        U R                  5      S:  a!  [        U R                  /U R                  Q76 nOU R                  S   nU R                  nU(       a  UnOU Vs/ s H  nSUR
                  -  PM     nn[        [        [         U5      5      S-   mTS-  m[        TT-
  S-
  5      mSUU4S jjn	S n
UR
                  U4/nU Vs/ s H
  o" S5      PM     nnU(       Ga  UR                  5       u  p[        [        S	 UR                  5      5      n[        [        UR                  U5      5      nU(       a  [        U5      n[        U5      n[        [        U5      5       H  nUU   U;   aI  UW:X  a  U
" U	" USU5      5        O%UW:X  a  U
" U	" X#S5      5        OU
" U	" X#U5      5        U	" U5      UU'   MU  WUs=::  a  W::  a  O  OU
" U	" X3U5      5        Mv  U
" UU   5        M     U
" S
5        UR                   H3  nUR                  (       d  M  UR                  UR
                  U45        M5     UR!                  5         U H  nU
" U5        M     U
" S
5        U(       a  GM  U
" SR#                  U4S jU 5       5      5        U
" S
5        gs  snf s  snf )z
Print the dendrogram in ASCII art to standard out.

:param leaf_labels: an optional list of strings to use for labeling the
                    leaves
:type leaf_labels: list
)+-|rV   r   z%sr    c                     > TU-   U  UT-   3$ r7   r3   )centreleftrightlhalfrhalfs      r   formatDendrogram.show.<locals>.format   s    dl^F8EEM?;;r   c                 0    [         R                  " U 5        g r7   )r   write)strs    r   display Dendrogram.show.<locals>.display   s    LLr   c                 *    U R                  S5      S   $ )NFr   )rc   )cs    r   <lambda>!Dendrogram.show.<locals>.<lambda>  s    %1Cr   
 c              3   D   >#    U  H  oR                  T5      v   M     g 7fr7   )center)r   r   widths     r   r   "Dendrogram.show.<locals>.<genexpr>#  s     @xtE**xs    N)r   r   )r   rz   rY   r}   r|   r\   maxr   intrh   r   r]   indexminrangerj   rk   join)r   leaf_labelsJOINHLINKVLINKr   rc   last_rowleafr   r   rm   	verticalsrn   ro   child_left_leafr   min_idxmax_idxr   re   verticalr   r   r   s                         @@@r   showDendrogram.show   sP    +U t{{a"4;;==D;;q>D%%"H7=>vtt{{*vH> CX&'!+
EEMA%&	< 	<	 ++t$%-34VTVC[V	4"YY[NH"3'CT^^#TUO3v||_=>Gg,g,3v;'!9/G|tS% 89gtC 89tE :;#)%=IaL,W,F578IaL) ( DM???LL%,,!67 ( JJL%! &DM9 e> 	@x@@Ac ?  5s   ,J"J'c                     [        U R                  5      S:  a!  [        U R                  /U R                  Q76 nOU R                  S   nUR	                  S5      nS[        U5      -  $ )NrV   r   Fz<Dendrogram with %d leaves>)r   rz   rY   r}   rc   )r   r   rc   s      r   __repr__Dendrogram.__repr__&  sT    t{{a"4;;==D;;q>DU#,s6{::r   )rz   r}   r|   N)rK   rL   rM   rN   rO   r   r   rp   r   r   rP   r3   r   r   rw   rw      s,        
  " GR;r   rw   )r{   abcr   mathr   sysr   r   ImportErrornltk.cluster.apir   r   rT   rW   rY   rw   r3   r   r   <module>r      sh       	 &\88 \8~'S(C (CV|; |;C  		s   A AA