
    /hw                     r     S SK r S SKJrJrJr   " S S\5      rS r\S:X  a  \" 5         gg! \ a     N/f = f)    N)
DendrogramVectorSpaceClusterercosine_distancec                   Z    \ rS rSrSrSS jrSS jrSS jrS rS r	S	 r
S
 rS rS rSrg)GAAClusterer   a)  
The Group Average Agglomerative starts with each of the N vectors as singleton
clusters. It then iteratively merges pairs of clusters which have the
closest centroids.  This continues until there is only one cluster. The
order of merges gives rise to a dendrogram: a tree with the earlier merges
lower than later merges. The membership of a given number of clusters c, 1
<= c <= N, can be found by cutting the dendrogram at depth c.

This clusterer uses the cosine similarity metric only, which allows for
efficient speed-up in the clustering process.
Nc                 Z    [         R                  " XU5        Xl        S U l        S U l        g N)r   __init___num_clusters_dendrogram_groups_values)selfnum_clusters	normalisesvd_dimensionss       I/var/www/auris/envauris/lib/python3.13/site-packages/nltk/cluster/gaac.pyr   GAAClusterer.__init__   s(    %%d~F)"    c           	          [        U Vs/ s H(  n[        R                  " U[        R                  5      PM*     sn5      U l        [
        R                  " XX#5      $ s  snf r
   )r   numpyarrayfloat64r   r   cluster)r   vectorsassign_clusterstracevectors        r   r   GAAClusterer.cluster#   sK    %>EFgFU[[/gF
 $++D?RR Gs   /Ac                 J   [        U5      nS/U-  nUn[        R                  " U5      nX34n[        R                  " U[        S9[        R
                  -  n[        U5       H-  n	[        U	S-   U5       H  n
[        X   X   5      XU
4'   M     M/     U[        U R                  S5      :  a  [        R                  " UR                  5       U5      u  pU(       a  [        SX4-  5        U R                  XX5        [        R
                  US S 2U
4'   [        R
                  XS S 24'   XI   XJ   -   XI'   U R                  R                  Xi   Xj   5        US-  nXjS-   S === S-  sss& X6U
'   U[        U R                  S5      :  a  M  U R!                  U R                  5        g )N   )dtypezmerging %d and %d)lenr   arangeonesfloatinfranger   maxr   unravel_indexargminprint_merge_similaritiesr   mergeupdate_clusters)r   r   r   Ncluster_lencluster_count	index_mapdimsdistijs              r   cluster_vectorspace GAAClusterer.cluster_vectorspace*   sz   LcAgLLO	 vzz$e,uyy8qA1q5!_,WZDT
 %  c$"4"4a88&&t{{}d;DA)QF23 $$T= DAJDAJ )^kn<KN""9<>QM !eg!#aL) c$"4"4a88, 	T//0r   c                 <   X#   nX$   nXV-   nUS U2U4   U-  US U2U4   U-  -   US U2U4'   US U2U4==   U-  ss'   XUS-   U24   U-  XS-   U2U4   U-  -   XUS-   U24'   XUS-   S 24   U-  XUS-   S 24   U-  -   XUS-   S 24'   XUS-   S 24==   U-  ss'   g )Nr!    )r   r5   r1   r6   r7   i_weightj_weight
weight_sums           r   r-    GAAClusterer._merge_similaritiesP   s    >>(
 2A2q5kH,tBQBE{X/EERaRURaRUz! AEAI)DQA,>,II 	A	\  1q57
+h6Qj9IH9TTAZAZJ&r   c                    U R                   R                  U5      n/ U l        U H  n[        U5      S:  d   eU R                  (       a  U R                  US   5      nO[        R                  " US   5      nUSS   H-  nU R                  (       a  X@R                  U5      -  nM)  XE-  nM/     U[        U5      -  nU R                  R                  U5        M     [        U R                  5      U l	        g Nr   r!   )
r   groups
_centroidsr#   _should_normalise
_normaliser   r   appendr   )r   r   clustersr   centroidr   s         r   r/   GAAClusterer.update_clustersc   s    ##**<8Gw<!###%%??71:6 ;;wqz2!!"+)) 77H&H	 &
 G$HOO""8,   !1r   c                     S n[        U R                  5       H1  nU R                  U   n[        X5      nU(       a
  XRS   :  d  M.  XS4nM3     US   $ rA   )r(   r   rC   r   )r   r   bestr6   rH   r5   s         r   classify_vectorspace!GAAClusterer.classify_vectorspaceu   sQ    t))*Aq)H"64D4q'>y	 +
 Awr   c                     U R                   $ )zQ
:return: The dendrogram representing the current clustering
:rtype:  Dendrogram
)r   r   s    r   
dendrogramGAAClusterer.dendrogram~   s    
 r   c                     U R                   $ r
   r   rO   s    r   r   GAAClusterer.num_clusters   s    !!!r   c                      SU R                   -  $ )Nz*<GroupAverageAgglomerative Clusterer n=%d>rS   rO   s    r   __repr__GAAClusterer.__repr__   s    ;d>P>PPPr   )rC   r   r   r   )r!   TN)FF)F)__name__
__module____qualname____firstlineno____doc__r   r   r8   r-   r/   rL   rP   r   rV   __static_attributes__r;   r   r   r   r      s7    
#S$1L'&2$ "Qr   r   c                     SSK Jn   SS/SS/SS/SS/SS/SS/4 Vs/ s H  n[        R                  " U5      PM     nnU " S5      nUR	                  US5      n[        SU5        [        S	U5        [        S
U5        [        5         UR                  5       R                  5         [        R                  " SS/5      n[        SU-  SS9  [        UR                  U5      5        [        5         gs  snf )zG
Non-interactive demonstration of the clusterers with simple 2-D data.
r   )r      r!         Tz
Clusterer:z
Clustered:zAs:zclassify(%s): )endN)	nltk.clusterr   r   r   r   r,   rP   showclassify)r   fr   	clustererrG   r   s         r   demori      s    
 * *+AAAAAQRTUPV'WX'W!u{{1~'WGX QI  $/H	,	"	, 	%	G ! [[!Q F	/F
",	)

V
$%	G% Ys    C/__main__)	r   ImportErrornltk.cluster.utilr   r   r   r   ri   rX   r;   r   r   <module>rm      sV   	 P OyQ' yQx: zF   		s   - 66