o
    rZhw                     @   sb   zd dl Z W n	 ey   Y nw d dlmZmZmZ G dd deZdd Zedkr/e  dS dS )    N)
DendrogramVectorSpaceClusterercosine_distancec                   @   s^   e Zd ZdZdddZddd	Zdd
dZdd Zdd Zdd Z	dd Z
dd Zdd ZdS )GAAClustereraM  
    The Group Average Agglomerative starts with each of the N vectors as singleton
    clusters. It then iteratively merges pairs of clusters which have the
    closest centroids.  This continues until there is only one cluster. The
    order of merges gives rise to a dendrogram: a tree with the earlier merges
    lower than later merges. The membership of a given number of clusters c, 1
    <= c <= N, can be found by cutting the dendrogram at depth c.

    This clusterer uses the cosine similarity metric only, which allows for
    efficient speed-up in the clustering process.
       TNc                 C   s$   t | || || _d | _d | _d S N)r   __init___num_clusters_dendrogramZ_groups_values)selfnum_clustersZ	normaliseZsvd_dimensions r   @/var/www/auris/lib/python3.10/site-packages/nltk/cluster/gaac.pyr      s   
zGAAClusterer.__init__Fc                 C   s$   t dd |D | _t| |||S )Nc                 S   s   g | ]	}t |t jqS r   )numpyarrayZfloat64).0vectorr   r   r   
<listcomp>&   s    z(GAAClusterer.cluster.<locals>.<listcomp>)r   r
   r   cluster)r   vectorsZassign_clusterstracer   r   r   r   #   s   zGAAClusterer.clusterc                 C   sV  t |}dg| }|}t|}||f}tj|tdtj }t|D ]}	t|	d |D ]}
t||	 ||
 ||	|
f< q+q"|t| j	dkrt
| |\}	}
|rXtd|	|
f  | |||	|
 tj|d d |
f< tj||
d d f< ||	 ||
  ||	< | j||	 ||
  |d8 }||
d d   d8  < |||
< |t| j	dksD| | j	 d S )Nr   )Zdtypezmerging %d and %d)lenr   ZarangeZonesfloatinfranger   maxr	   Zunravel_indexZargminprint_merge_similaritiesr
   mergeupdate_clusters)r   r   r   Ncluster_lenZcluster_countZ	index_mapdimsdistijr   r   r   cluster_vectorspace*   s0   

z GAAClusterer.cluster_vectorspacec                 C   s  || }|| }|| }|d ||f | |d ||f |  |d ||f< |d ||f  |  < |||d |f | ||d ||f |  |||d |f< |||d d f | |||d d f |  |||d d f< |||d d f  |  < d S )Nr   r   )r   r#   r!   r$   r%   Zi_weightZj_weightZ
weight_sumr   r   r   r   P   s   4.@ z GAAClusterer._merge_similaritiesc                 C   s   | j |}g | _|D ]@}t|dksJ | jr | |d }nt|d }|dd  D ]}| jr:|| |7 }q-||7 }q-|t| }| j| qt| j| _	d S Nr   r   )
r
   groups
_centroidsr   Z_should_normaliseZ
_normaliser   r   appendr	   )r   r   clustersr   centroidr   r   r   r   r   c   s   
zGAAClusterer.update_clustersc                 C   sH   d }t | jD ]}| j| }t||}|r||d k r||f}q|d S r'   )r   r	   r)   r   )r   r   bestr$   r,   r#   r   r   r   classify_vectorspaceu   s   

z!GAAClusterer.classify_vectorspacec                 C      | j S )zi
        :return: The dendrogram representing the current clustering
        :rtype:  Dendrogram
        )r
   r   r   r   r   
dendrogram~   s   zGAAClusterer.dendrogramc                 C   r/   r   r	   r0   r   r   r   r      s   zGAAClusterer.num_clustersc                 C   s
   d| j  S )Nz*<GroupAverageAgglomerative Clusterer n=%d>r2   r0   r   r   r   __repr__   s   
zGAAClusterer.__repr__)r   TN)FF)F)__name__
__module____qualname____doc__r   r   r&   r   r   r.   r1   r   r3   r   r   r   r   r      s    


&	r   c                  C   s   ddl m}  dd ddgddgddgddgddgddgfD }| d}||d	}td
| td| td| t  |   tddg}td| dd t|| t  dS )zO
    Non-interactive demonstration of the clusterers with simple 2-D data.
    r   )r   c                 S   s   g | ]}t |qS r   )r   r   )r   fr   r   r   r      s    zdemo.<locals>.<listcomp>   r         Tz
Clusterer:z
Clustered:zAs:zclassify(%s): )endN)	Znltk.clusterr   r   r   r1   showr   r   Zclassify)r   r   Z	clustererr+   r   r   r   r   demo   s   2



r?   __main__)	r   ImportErrorZnltk.cluster.utilr   r   r   r   r?   r4   r   r   r   r   <module>   s   |
