o
    rZh&                     @   s   d dl Z d dlmZ d dlmZ d dlmZ zd dlZW n	 ey%   Y nw d dl	m
Z
 G dd de
Zdd	 Zd
d ZG dd dZG dd dZdS )    N)abstractmethod)sqrt)stdout)ClusterIc                   @   sd   e Zd ZdZdddZdddZedd	 Zd
d Zedd Z	dd Z
dd Zdd Zdd ZdS )VectorSpaceClustererz
    Abstract clusterer which takes tokens and maps them into a vector space.
    Optionally performs singular value decomposition to reduce the
    dimensionality.
    FNc                 C   s   d| _ || _|| _dS )a)  
        :param normalise:       should vectors be normalised to length 1
        :type normalise:        boolean
        :param svd_dimensions:  number of dimensions to use in reducing vector
                                dimensionsionality with SVD
        :type svd_dimensions:   int
        N)_Tt_should_normalise_svd_dimensions)selfZ	normaliseZsvd_dimensions r   @/var/www/auris/lib/python3.10/site-packages/nltk/cluster/util.py__init__   s   
zVectorSpaceClusterer.__init__c           
         s   t |dksJ  jrtt j|} jrb jt |d k rbtjt	t
|\}}}|d  j t jtj }|d d d  jf }|d  jd d f }	t	t||	}t	| _ || |rs fdd|D S d S )Nr   c                    s   g | ]}  |qS r   )classify).0vectorr
   r   r   
<listcomp>?       z0VectorSpaceClusterer.cluster.<locals>.<listcomp>)lenr   listmap
_normaliser	   numpyZlinalgZsvdZ	transposearrayidentityZfloat64dotr   cluster_vectorspace)
r
   vectorsZassign_clusterstraceudvtSTZDtr   r   r   cluster(   s    zVectorSpaceClusterer.clusterc                 C      dS )zD
        Finds the clusters using the given set of vectors.
        Nr   )r
   r   r   r   r   r   r   A       z(VectorSpaceClusterer.cluster_vectorspacec                 C   s<   | j r| |}| jd urt| j|}| |}| |S N)r   r   r   r   r   classify_vectorspaceZcluster_name)r
   r   r$   r   r   r   r   G   s   



zVectorSpaceClusterer.classifyc                 C   r%   )zN
        Returns the index of the appropriate cluster for the vector.
        Nr   r
   r   r   r   r   r(   O   r&   z)VectorSpaceClusterer.classify_vectorspacec                 C   s4   | j r| |}| jd urt| j|}| ||S r'   )r   r   r   r   r   likelihood_vectorspace)r
   r   labelr   r   r   
likelihoodU   s
   

zVectorSpaceClusterer.likelihoodc                 C   s   |  |}||krdS dS )zP
        Returns the likelihood of the vector belonging to the cluster.
        g      ?g        )r(   )r
   r   r$   Z	predictedr   r   r   r*   \   s   
z+VectorSpaceClusterer.likelihood_vectorspacec                 C   s,   | j r| |}| jdurt| j|}|S )zU
        Returns the vector after normalisation and dimensionality reduction
        N)r   r   r   r   r   r)   r   r   r   r   c   s
   

zVectorSpaceClusterer.vectorc                 C   s   |t t|| S )z7
        Normalises the vector to unit length.
        r   r   r   r)   r   r   r   r   m   s   zVectorSpaceClusterer._normalise)FN)FF)__name__
__module____qualname____doc__r   r$   r   r   r   r(   r,   r*   r   r   r   r   r   r   r      s    




r   c                 C   s   | | }t t||S )z}
    Returns the euclidean distance between vectors u and v. This is equivalent
    to the length of the vector (u - v).
    r-   )r   vdiffr   r   r   euclidean_distancet   s   r4   c                 C   s0   dt | |tt | | tt ||   S )zw
    Returns 1 minus the cosine of the angle between vectors v and u. This is
    equal to ``1 - (u.v / |u||v|)``.
       )r   r   r   )r   r2   r   r   r   cosine_distance}   s   0r6   c                   @   s2   e Zd ZdZdd ZdddZdd Zd	d
 ZdS )_DendrogramNodezTree node of a dendrogram.c                 G   s   || _ || _d S r'   )_value	_children)r
   valuechildrenr   r   r   r      s   
z_DendrogramNode.__init__Tc                 C   s<   | j rg }| j D ]
}||| q|S |r| jgS | gS r'   )r9   extendleavesr8   )r
   valuesr=   childr   r   r   r=      s   
z_DendrogramNode.leavesc                 C   s   | j | fg}t||k r@| \}}|js|||f n#|jD ]}|jr.||j |f q |d|f q |  t||k sg }|D ]\}}||  qD|S Nr   )r8   r   popr9   pushappendsortr=   )r
   nqueueprioritynoder?   groupsr   r   r   rI      s    
z_DendrogramNode.groupsc                 C   s   t | j|jdk S r@   )r6   r8   )r
   Z
comparatorr   r   r   __lt__   s   z_DendrogramNode.__lt__N)T)r.   r/   r0   r1   r   r=   rI   rJ   r   r   r   r   r7      s    
r7   c                   @   s@   e Zd ZdZg fddZdd Zdd Zg fdd	Zd
d ZdS )
Dendrograma  
    Represents a dendrogram, a tree with a specified branching order.  This
    must be initialised with the leaf items, then iteratively call merge for
    each branch. This class constructs a tree representing the order of calls
    to the merge function.
    c                 C   s(   dd |D | _ t| j | _d| _dS )zs
        :param  items: the items at the leaves of the dendrogram
        :type   items: sequence of (any)
        c                 S   s   g | ]}t |qS r   )r7   r   itemr   r   r   r          z'Dendrogram.__init__.<locals>.<listcomp>r5   N)_itemscopy_original_items_merge)r
   itemsr   r   r   r      s   
zDendrogram.__init__c                    sj   t |dksJ t jg fdd|D R  }  jd7  _| j|d < |dd D ]} j|= q,dS )a=  
        Merges nodes at given indices in the dendrogram. The nodes will be
        combined which then replaces the first node specified. All other nodes
        involved in the merge will be removed.

        :param  indices: indices of the items to merge (at least two)
        :type   indices: seq of int
           c                 3   s    | ]} j | V  qd S r'   )rO   )r   ir   r   r   	<genexpr>       z#Dendrogram.merge.<locals>.<genexpr>r5   r   N)r   r7   rR   rO   )r
   indicesrH   rU   r   r   r   merge   s   	 
zDendrogram.mergec                 C   s8   t | jdkrt| jg| jR  }n| jd }||S )z
        Finds the n-groups of items (leaves) reachable from a cut at depth n.
        :param  n: number of groups
        :type   n: int
        r5   r   )r   rO   r7   rR   rI   )r
   rE   rootr   r   r   rI      s   

zDendrogram.groupsc                    s  d\}}}t | jdkrt| jg| jR  }n| jd }| j}|r$|}ndd |D }ttt |d d t d dfdd		 d
d }|j|fg}	 fdd|D }
|	r|		 \}}t
tdd |j}t
t|j|}|rt|}t|}tt |D ]L}|| |v r||kr| |d| n||kr| ||d n| |||  ||
|< q||  kr|krn n	| ||| q||
|  q|d |jD ]}|jr|	|j|f q|	  |
D ]}|| q|d |	s]|dfdd|D  |d dS )z
        Print the dendrogram in ASCII art to standard out.

        :param leaf_labels: an optional list of strings to use for labeling the
                            leaves
        :type leaf_labels: list
        )+-|r5   r   c                 S   s   g | ]}d |j  qS )z%s)r8   r   leafr   r   r   r      r   z#Dendrogram.show.<locals>.<listcomp>rT    c                    s    |  |  |  S r'   r   )Zcentreleftright)lhalfrhalfr   r   format   s   zDendrogram.show.<locals>.formatc                 S   s   t |  d S r'   )r   write)strr   r   r   display   s   z Dendrogram.show.<locals>.displayc                    s   g | ]} d qS )r`   r   r^   )re   r   r   r     rN   c                 S   s   |  dd S )NFr   )r=   )cr   r   r   <lambda>  s    z!Dendrogram.show.<locals>.<lambda>
 c                 3   s    | ]}|  V  qd S r'   )centerrL   )widthr   r   rV   #  rW   z"Dendrogram.show.<locals>.<genexpr>N)r`   r`   )r   rO   r7   rR   rQ   maxr   intr8   rA   r   r9   indexminrangerC   rD   join)r
   Zleaf_labelsZJOINZHLINKZVLINKrZ   r=   last_rowrh   rF   Z	verticalsrG   rH   Zchild_left_leafrX   min_idxZmax_idxrU   r?   verticalr   )re   rc   rd   rn   r   show   sZ   




zDendrogram.showc                 C   sD   t | jdkrt| jg| jR  }n| jd }|d}dt | S )Nr5   r   Fz<Dendrogram with %d leaves>)r   rO   r7   rR   r=   )r
   rZ   r=   r   r   r   __repr__&  s
   

zDendrogram.__repr__N)	r.   r/   r0   r1   r   rY   rI   rx   ry   r   r   r   r   rK      s    	IrK   )rP   abcr   mathr   sysr   r   ImportErrorZnltk.cluster.apir   r   r4   r6   r7   rK   r   r   r   r   <module>   s   _	+